├── .dvc
├── .gitignore
└── config
├── .dvcignore
├── .gitignore
├── .pre-commit-config.yaml
├── .python-version
├── .vscode
└── settings.json
├── Makefile
├── README.md
├── dvc.lock
├── dvc.yaml
├── generate_readme.py
├── images
├── y2022
│ ├── bar_plot_w_custom_cmap.png
│ ├── binary_outcome_variable.png
│ ├── box_plot_w_scatter_distributions.png
│ ├── default_plot.png
│ ├── histogram_with_two_variables.png
│ ├── line_plot_fill_between.png
│ ├── meaningless_points.png
│ ├── opinium_barchart.png
│ ├── pandas_stacked_bars_with_values.png
│ ├── pos_neg_split_hbar.png
│ ├── scatter_distributions.png
│ ├── scatter_matrix_w_kde_on_diag.png
│ ├── scatter_w_outlined_text_insert.png
│ ├── sns_violin_plot_custom.png
│ ├── split_x_axis_custom_legend.png
│ ├── stacked_bar_with_single_bars_layout.png
│ └── uk_hexmap.png
└── y2024
│ └── cat_weight.png
├── mypy.ini
├── plotting_examples
├── __init__.py
├── dvc_entry.py
├── extract_year_name.py
├── rc.mplstyle
├── save_plot_output.py
├── y2022
│ ├── __init__.py
│ ├── bar_plot_w_custom_cmap
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── binary_outcome_variable
│ │ ├── __init__.py
│ │ ├── data.csv
│ │ └── plot.py
│ ├── box_plot_w_scatter_distributions
│ │ ├── __init__.py
│ │ ├── data.parquet
│ │ └── plot.py
│ ├── default_plot
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── histogram_with_two_variables
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── line_plot_fill_between
│ │ ├── __init__.py
│ │ ├── data.csv
│ │ └── plot.py
│ ├── meaningless_points
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── metadata.py
│ ├── opinium_barchart
│ │ ├── __init__.py
│ │ ├── opinium.png
│ │ ├── opinium_barchart_example.png
│ │ └── plot.py
│ ├── pandas_stacked_bars_with_values
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── pos_neg_split_hbar
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── scatter_distributions
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── scatter_matrix_w_kde_on_diag
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── scatter_w_outlined_text_insert
│ │ ├── __init__.py
│ │ ├── data.parquet
│ │ └── plot.py
│ ├── sns_violin_plot_custom
│ │ ├── __init__.py
│ │ ├── data.parquet
│ │ └── plot.py
│ ├── split_x_axis_custom_legend
│ │ ├── __init__.py
│ │ └── plot.py
│ ├── stacked_bar_with_single_bars_layout
│ │ ├── __init__.py
│ │ ├── data
│ │ │ └── lab.png
│ │ └── plot.py
│ └── uk_hexmap
│ │ ├── __init__.py
│ │ ├── data
│ │ ├── gb_hex_cartogram
│ │ │ ├── GB_Hex_Cartogram_Const.cpg
│ │ │ ├── GB_Hex_Cartogram_Const.dbf
│ │ │ ├── GB_Hex_Cartogram_Const.prj
│ │ │ ├── GB_Hex_Cartogram_Const.sbn
│ │ │ ├── GB_Hex_Cartogram_Const.sbx
│ │ │ ├── GB_Hex_Cartogram_Const.shp
│ │ │ ├── GB_Hex_Cartogram_Const.shp.xml
│ │ │ ├── GB_Hex_Cartogram_Const.shx
│ │ │ └── GB_Hex_Cartogram_Const.zip
│ │ ├── petition_data.csv
│ │ └── petition_data.json
│ │ ├── plot.py
│ │ ├── uk_hex_example.png
│ │ └── uk_hex_example.py
└── y2024
│ ├── __init__.py
│ ├── cat_weight
│ ├── __init__.py
│ ├── data
│ │ ├── cat_looking_to_side.jpeg
│ │ └── weight_data.parquet
│ └── plot.py
│ └── metadata.py
├── poetry.lock
├── pyproject.toml
├── setup.cfg
└── work.sh
/.dvc/.gitignore:
--------------------------------------------------------------------------------
1 | /config.local
2 | /tmp
3 | /cache
4 |
--------------------------------------------------------------------------------
/.dvc/config:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/.dvc/config
--------------------------------------------------------------------------------
/.dvcignore:
--------------------------------------------------------------------------------
1 | # Add patterns of files dvc should ignore, which could improve
2 | # the performance. Learn more at
3 | # https://dvc.org/doc/user-guide/dvcignore
4 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Caches
2 | **/.ipynb_checkpoints/*
3 | **/__pycache__/*
4 | *.pyc
5 | .Rhistory
6 | .venv/
7 | venv/
8 |
9 | # Data
10 | data/**/*
11 | !data/**/*.gitkeep
12 |
13 | # IDE config
14 | .idea/
15 | ipython_config.py
16 | profile_default/
17 |
18 | # Other
19 | .DS_Store
20 | config/config.py
21 | .Rproj.user
22 | .Rproj.user/
23 | *.Rproj
24 |
25 | # VisualStudioCode
26 | .vscode/*
27 | !.vscode/settings.json
28 | !.vscode/tasks.json
29 | !.vscode/launch.json
30 | !.vscode/extensions.json
31 |
32 | # https://www.gitignore.io/
33 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | repos:
2 | - repo: https://github.com/pre-commit/pre-commit-hooks
3 | rev: v4.4.0
4 | hooks:
5 | - id: end-of-file-fixer
6 | - id: trailing-whitespace
7 | - id: check-builtin-literals
8 | - id: check-byte-order-marker
9 | - id: check-case-conflict
10 | - id: check-merge-conflict
11 | - id: check-symlinks
12 | - id: check-toml
13 | - id: check-vcs-permalinks
14 | - id: check-xml
15 | - id: debug-statements
16 | - id: detect-private-key
17 | - id: mixed-line-ending
18 | - id: fix-encoding-pragma
19 | args: ["--remove"]
20 | - id: check-yaml
21 |
--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.10.2
2 |
--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.linting.pylintEnabled": true,
3 | "python.linting.enabled": true,
4 | "python.formatting.provider": "black",
5 | "editor.formatOnSave": true,
6 | "rewrap.wrappingColumn": 87,
7 | "editor.rulers": [
8 | 88
9 | ],
10 | // "peacock.color": "#42b883",
11 | "workbench.colorCustomizations": {
12 | "activityBar.activeBackground": "#65c89b",
13 | "activityBar.activeBorder": "#945bc4",
14 | "activityBar.background": "#65c89b",
15 | "activityBar.foreground": "#15202b",
16 | "activityBar.inactiveForeground": "#15202b99",
17 | "activityBarBadge.background": "#945bc4",
18 | "activityBarBadge.foreground": "#e7e7e7",
19 | "sash.hoverBorder": "#65c89b",
20 | "statusBar.background": "#42b883",
21 | "statusBar.foreground": "#15202b",
22 | "statusBarItem.hoverBackground": "#359268",
23 | "statusBarItem.remoteBackground": "#42b883",
24 | "statusBarItem.remoteForeground": "#15202b",
25 | "titleBar.activeBackground": "#42b883",
26 | "titleBar.activeForeground": "#15202b",
27 | "titleBar.inactiveBackground": "#42b88399",
28 | "titleBar.inactiveForeground": "#15202b99"
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean requirements
2 | .PHONY: git-stats git-log cloc clean-git
3 | .PHONY: deploy
4 | .PHONY: test
5 | .PHONY: requirements
6 | .PHONY: help
7 |
8 | GIT := git
9 | CLOC := cloc
10 |
11 | #########
12 | # UTILS #
13 | #########
14 |
15 | help:
16 | @awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST) | sort
17 |
18 | clean:
19 | @echo "Cleaning up temporary and cache files"
20 | @find . -type f -name "*.pyc" -delete
21 | @find . -type d -name "__pycache__" -exec rm -rf {} +
22 | @find . -type d -name ".pytest_cache" -exec rm -rf {} +
23 | @find . -type d -name ".mypy_cache" -exec rm -rf {} +
24 | @find . -type d -name ".ipynb_checkpoints" -exec rm -rf {} +
25 |
26 | cloc:
27 | @echo "Code statistics using cloc:"
28 | $(CLOC) --exclude-dir=venv .
29 |
30 | ######################
31 | # WORKING ON PROJECT #
32 | ######################
33 |
34 | pre-commit-run:
35 | poetry run pre-commit run --all-files
36 |
37 | readme: ## Generate README file.
38 | poetry run python generate_readme.py
39 |
40 | # This'll just run through all the plots.
41 | repro: ## run dvc repro
42 | poetry run dvc repro dvc.yaml
43 |
44 |
45 | ########
46 | # LINT #
47 | ########
48 |
49 | mypy:
50 | poetry run mypy . --strict
51 |
52 | lint: mypy ## run linting - mypy,ruff
53 | poetry run ruff check .
54 | poetry run ruff format . --check
55 | @$(MAKE) --no-print-directory clean
56 |
57 | # Using this as format & lint really...
58 | format: pre-commit-run ## run formatters - pre-commit,ruff
59 | poetry run ruff format .
60 | poetry run ruff check . --fix --unsafe-fixes
61 | @$(MAKE) --no-print-directory clean
62 |
63 |
64 | ##########
65 | # POETRY #
66 | ##########
67 |
68 | poetry.lock:
69 | poetry lock --no-update
70 |
71 | install: poetry.lock
72 | poetry install
73 | @$(MAKE) --no-print-directory clean
74 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Example plots
2 |
3 | Example plots, typically using matplotlib. Mainly for personal use / so I have somewhere to remind
4 | myself of some things, but if they're of any use to anyone else then ace. Code/visuals often aren't
5 | great as they're mainly just scratch work, often copied straight over from a notebook with little
6 | cleanup.
7 |
8 | ----
9 |
10 | [comment]: # (Automate plots beneath this.)
11 |
12 | # Plots
13 |
14 | * [`bar_plot_w_custom_cmap`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#bar_plot_w_custom_cmap)
15 | * [`binary_outcome_variable`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#binary_outcome_variable)
16 | * [`box_plot_w_scatter_distributions`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#box_plot_w_scatter_distributions)
17 | * [`cat_weight`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#cat_weight)
18 | * [`line_plot_fill_between`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#line_plot_fill_between)
19 | * [`meaningless_points`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#meaningless_points)
20 | * [`opinium_barchart`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#opinium_barchart)
21 | * [`pandas_stacked_bars_with_values`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#pandas_stacked_bars_with_values)
22 | * [`pos_neg_split_hbar`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#pos_neg_split_hbar)
23 | * [`scatter_distributions`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_distributions)
24 | * [`scatter_matrix_w_kde_on_diag`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_matrix_w_kde_on_diag)
25 | * [`scatter_w_outlined_text_insert`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#scatter_w_outlined_text_insert)
26 | * [`split_x_axis_custom_legend`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#split_x_axis_custom_legend)
27 | * [`stacked_bar_with_single_bars_layout`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#stacked_bar_with_single_bars_layout)
28 | * [`uk_hexmap`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#uk_hexmap)
29 |
30 |
31 |
32 | ## [`bar_plot_w_custom_cmap`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py)
33 |
34 | Bar plot with custom cmap.
35 |
36 | Based on this tweet: https://twitter.com/ryanburge/status/1505602885215834112 - wanted
37 | to create something with a similar effect using mpl.
38 |
39 | Example of:
40 |
41 | - Different font types (using monospace font)
42 | - using different colours for bars depending on their values (custom cmap).
43 | - padding around the axis using rc parameters
44 |
45 | 
46 |
47 | ## [`binary_outcome_variable`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/binary_outcome_variable/plot.py)
48 |
49 | Plot dichotomous variable.
50 |
51 | Simple dots with median lines - might be nice to add a kde to this as well.
52 |
53 | The y-axis is redundant here as there are only two options (`0.6` doesn't make any
54 | sense).
55 |
56 | 
57 |
58 | ## [`box_plot_w_scatter_distributions`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py)
59 |
60 | Bar plot with distributions.
61 |
62 | Thought I'd create a bar plot with scatter plots of the distributions adjacent to the
63 | bars, it was based off something else but I can't remember what. Bar plots are created
64 | from scratch using hlines etc, for no particular reason.
65 |
66 | Data was from tidy tuesday.
67 |
68 | 
69 |
70 | ## [`cat_weight`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2024/cat_weight/plot.py)
71 |
72 | Timeseries of the cats diet.
73 |
74 | Cat was getting a little chunky towards the end of 2023 so had a resolution made for
75 | them to lose a bit of weight. Data collection is just a daily weigh, the average of
76 | this is taken (as there are sometimes multiple entries in a day) and then plotted along
77 | with a ten day rolling average. Most days were covered, where there are missing days
78 | they're imputed using the average of the days either side, eg `(a, nan, b) -> (a,
79 | (a+b)/2, b)` though this is just a plot..
80 |
81 | 
82 |
83 | ## [`line_plot_fill_between`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/line_plot_fill_between/plot.py)
84 |
85 | Visualise time tracking, how much over/under time.
86 |
87 | Mainly serves as an example of plotting with dates, and filling above / below
88 | particular values on a plot.
89 |
90 | Example of:
91 |
92 | - plotting with dates
93 | - different fonts
94 | - filling between lines
95 |
96 | 
97 |
98 | ## [`meaningless_points`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/meaningless_points/plot.py)
99 |
100 | Some random points.
101 |
102 | No real meaning to this - was messing about with some bokeh style bits (the effect, not
103 | the python library), so dumping here. Not sure I'm mad on the output - it's also slow
104 | as hell.
105 |
106 | 
107 |
108 | ## [`opinium_barchart`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/opinium_barchart/plot.py)
109 |
110 | Bar chart style copied from Opinium.
111 |
112 | Saw this on twitter (i think) and thought I'd recreate it in mpl.
113 |
114 | 
115 |
116 | ## [`pandas_stacked_bars_with_values`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py)
117 |
118 | Horizontal stacked bars, based off of pandas.
119 |
120 | Could do these from scratch - pandas makes things a bit more straightforward though.
121 |
122 | Example of:
123 |
124 | - fixed formatting - setting categorical ticks at particular positions.
125 |
126 | 
127 |
128 | ## [`pos_neg_split_hbar`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/pos_neg_split_hbar/plot.py)
129 |
130 | Create split horizontal bar chart.
131 |
132 | Split by dichotomous variable, with bar classifications.
133 |
134 | Can be a bit messy - not sure I'm much of a fan - but wanted to re-create anyway.
135 |
136 | 
137 |
138 | ## [`scatter_distributions`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_distributions/plot.py)
139 |
140 | Distributions of multiple variables.
141 |
142 | For a set of variables, each with an accompanying continuous variable on the same scale,
143 | plot the distributions of the continuous variable. Might be useful to have a kde
144 | overlaid here.
145 |
146 | Example of:
147 |
148 | - fixed formatting
149 | - setting categorical ticks at particular positions.
150 |
151 | 
152 |
153 | ## [`scatter_matrix_w_kde_on_diag`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py)
154 |
155 | Scatter matrix with kde instead of histogram on the diagonal.
156 |
157 | Could probably adapt pd.scatter_matrix instead of doing it from scratch. Though with
158 | this approach the non-diagonal plots could be whatever instead of a scatter plot I
159 | guess...
160 |
161 | Would be good to make the upper diagonals differ from the lower diagonals a bit... maybe
162 | some sort of table from pd.cut on the others or whatever.
163 |
164 | I'd probably just use subplot_mosaic as well now - that's grown on me a lot since this.
165 |
166 | 
167 |
168 | ## [`scatter_w_outlined_text_insert`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py)
169 |
170 | Scatter plot with text inserted to scatter points.
171 |
172 | Data was taken from a tidy tuesday.
173 |
174 | Example of:
175 |
176 | - Outlining text elements in a plot.
177 |
178 | 
179 |
180 | ## [`split_x_axis_custom_legend`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/split_x_axis_custom_legend/plot.py)
181 |
182 | Example of creating multiple x-axis in order to plot year / months.
183 |
184 | The fig size needs to be pretty large in order to squeeze all the month names etc in
185 | here. Generated data looks a mess on these plots.
186 |
187 | Example of:
188 |
189 | - Custom legend
190 | - generating random date data
191 | - multiple x-axis to display years / months
192 |
193 | 
194 |
195 | ## [`stacked_bar_with_single_bars_layout`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py)
196 |
197 | Layout containing two bar plots and a bivariate plot between them.
198 |
199 | In this case it's a silly example of some data containing the social grade of
200 | Labradors, as well as the education group. The main plot is a stacked bar containing
201 | the breakdown of education group for each social grade.
202 |
203 | Don't think I'm too keen on the code for this plot - though it's not always so clear
204 | (to me) how to make "nice" code with a lot of matplotlib stuff.
205 |
206 | Obviously, the data is made up.
207 |
208 | 
209 |
210 | ## [`uk_hexmap`](https://github.com/geo7/plotting_examples/blob/main/plotting_examples/y2022/uk_hexmap/plot.py)
211 |
212 | Hex map for the UK constituencies.
213 |
214 | Some meaningless generated data - small multiples with hex maps can be useful sometimes
215 | though. Could be good to add in the geographically accurate version as well.
216 |
217 | 
218 |
--------------------------------------------------------------------------------
/dvc.lock:
--------------------------------------------------------------------------------
1 | schema: '2.0'
2 | stages:
3 | sav_to_csv_and_json:
4 | cmd: python -m plotting_examples.binary_outcome_variable.plot
5 | deps:
6 | - path: plotting_examples/binary_outcome_variable/plot.py
7 | md5: 1427c5fcadda1f47e11a817d2f55e61e
8 | size: 2109
9 | binary_outcome:
10 | cmd: python -m plotting_examples.binary_outcome_variable.plot
11 | deps:
12 | - path: plotting_examples/binary_outcome_variable/plot.py
13 | md5: b8b284298598c316bd0c661e7705ffda
14 | size: 2101
15 | outs:
16 | - path:
17 | images/GSL_projects_plotting_examples_plotting_examples_binary_outcome_variable_plot.png
18 | md5: 3e15bc2bdba0ea318625e341fc600adc
19 | size: 68657
20 | histogram_with_two_variables:
21 | cmd: python -m plotting_examples.histogram_with_two_variables.plot
22 | deps:
23 | - path: plotting_examples/histogram_with_two_variables/plot.py
24 | md5: f176e507379760e333fd3d21a0a03f66
25 | size: 4958
26 | outs:
27 | - path:
28 | images/GSL_projects_plotting_examples_plotting_examples_histogram_with_two_variables_plot.png
29 | md5: a42aff20a7970bd5a593dbb5e4a72083
30 | size: 121044
31 | pandas_stacked_bars_with_values:
32 | cmd: python -m plotting_examples.pandas_stacked_bars_with_values.plot
33 | deps:
34 | - path: plotting_examples/pandas_stacked_bars_with_values/plot.py
35 | md5: 41f46cb867b53e3d1fa2bd9ce9a7e59b
36 | size: 4276
37 | outs:
38 | - path:
39 | images/GSL_projects_plotting_examples_plotting_examples_pandas_stacked_bars_with_values_plot.png
40 | md5: b444b9c40539997369b306794dd55383
41 | size: 109980
42 | scatter_distributions:
43 | cmd: python -m plotting_examples.scatter_distributions.plot
44 | deps:
45 | - path: plotting_examples/scatter_distributions/plot.py
46 | md5: ca9e0719035769cb951018cdc60cae5a
47 | size: 6101
48 | outs:
49 | - path:
50 | images/GSL_projects_plotting_examples_plotting_examples_scatter_distributions_plot.png
51 | md5: ff1ea112d284d6995f21d1555da17868
52 | size: 1048818
53 | split_x_axis_custom_legend:
54 | cmd: python -m plotting_examples.split_x_axis_custom_legend.plot
55 | deps:
56 | - path: plotting_examples/split_x_axis_custom_legend/plot.py
57 | md5: 4cbef0469f1542bbba40f26080fa8147
58 | size: 6216
59 | outs:
60 | - path:
61 | images/GSL_projects_plotting_examples_plotting_examples_split_x_axis_custom_legend_plot.png
62 | md5: e31a09751ac8a7cb68c33f573888c04c
63 | size: 2059481
64 | trump_church_votes_2020:
65 | cmd: python -m plotting_examples.trump_church_votes_2020.plot
66 | deps:
67 | - path: plotting_examples/trump_church_votes_2020/plot.py
68 | md5: df8965d04acf1363a89adc51dbf8d823
69 | size: 5092
70 | outs:
71 | - path:
72 | images/GSL_projects_plotting_examples_plotting_examples_trump_church_votes_2020_plot.png
73 | md5: 79f742eb79b7cbcac22518634984fefe
74 | size: 442400
75 | work_time_tracking_plot:
76 | cmd: python -m plotting_examples.work_time_tracking.plot
77 | deps:
78 | - path: plotting_examples/work_time_tracking/plot.py
79 | md5: 295be3a691582e23266edcfdf75abe9f
80 | size: 4618
81 | outs:
82 | - path:
83 | images/GSL_projects_plotting_examples_plotting_examples_work_time_tracking_plot.png
84 | md5: 022854ffd33d0c1052ad561d3b0a29e4
85 | size: 420434
86 | bar_plot_w_custom_cmap:
87 | cmd: python -m plotting_examples.2022.bar_plot_w_custom_cmap.plot
88 | deps:
89 | - path: plotting_examples/2022/bar_plot_w_custom_cmap/plot.py
90 | md5: 672de9f4bd9f4e7c2095b572b58e1b9e
91 | size: 5697
92 | outs:
93 | - path:
94 | images/GSL_projects_plotting_examples_plotting_examples_2022_bar_plot_w_custom_cmap_plot.png
95 | md5: 149eaf7e57549fb2119ef508746f653d
96 | size: 324973
97 | sns_violin_plot_custom:
98 | cmd: python -m plotting_examples.sns_violin_plot_custom.plot
99 | deps:
100 | - path: plotting_examples/sns_violin_plot_custom/plot.py
101 | md5: 01e55cb3dd5935e2f317fdc90dbe64a4
102 | size: 1523
103 | outs:
104 | - path:
105 | images/GSL_projects_plotting_examples_plotting_examples_sns_violin_plot_custom_plot.png
106 | md5: 54134457d7681189e81518fc6214f4ec
107 | size: 100869
108 | scatter_matrix_w_kde_on_diag:
109 | cmd: python -m plotting_examples.scatter_matrix_w_kde_on_diag.plot
110 | deps:
111 | - path: plotting_examples/scatter_matrix_w_kde_on_diag/plot.py
112 | md5: 5f9b69d8a3762e617653d044b3fec13a
113 | size: 2216
114 | outs:
115 | - path:
116 | images/GSL_projects_plotting_examples_plotting_examples_scatter_matrix_w_kde_on_diag_plot.png
117 | md5: 7a6dec845e9bb9fe0a667b8b7937ae9c
118 | size: 504189
119 | pos_neg_split_hbar:
120 | cmd: python -m plotting_examples.pos_neg_split_hbar.plot
121 | deps:
122 | - path: plotting_examples/pos_neg_split_hbar/plot.py
123 | md5: 882938cd457ffdec5c72e67f2235c181
124 | size: 5017
125 | outs:
126 | - path:
127 | images/GSL_projects_plotting_examples_plotting_examples_pos_neg_split_hbar_plot.png
128 | md5: ea865e0df1293cc02d7843aa4140a387
129 | size: 160025
130 | default_plot:
131 | cmd: python -m plotting_examples.default_plot.plot
132 | deps:
133 | - path: plotting_examples/default_plot/plot.py
134 | md5: 7b305baa02cf195d47332f3b2586265a
135 | size: 865
136 | outs:
137 | - path: images/GSL_projects_plotting_examples_plotting_examples_default_plot_plot.png
138 | md5: 215cc189594c56704b2ab62ea6983b6d
139 | size: 54181
140 | 2022_default_plot:
141 | cmd: python -m plotting_examples.2022.default_plot.plot
142 | deps:
143 | - path: plotting_examples/2022/default_plot/plot.py
144 | md5: 3006f0a7bbbea7a22224095255f4bb5e
145 | size: 838
146 | outs:
147 | - path: images/2022/default_plot.png
148 | md5: 63f2d738e25563eff978e01bc834e480
149 | size: 25121
150 | 2022_binary_outcome_variable:
151 | cmd: python -m plotting_examples.2022.binary_outcome_variable.plot
152 | deps:
153 | - path: plotting_examples/2022/binary_outcome_variable/plot.py
154 | md5: 97ac99d8c9299f723f4e730fbfea90e2
155 | size: 2182
156 | outs:
157 | - path: images/2022/binary_outcome_variable.png
158 | md5: 605d409516acb29e92ba53afd5203a94
159 | size: 36620
160 | 2022_pandas_stacked_bars_with_values:
161 | cmd: python -m plotting_examples.2022.pandas_stacked_bars_with_values.plot
162 | deps:
163 | - path: plotting_examples/2022/pandas_stacked_bars_with_values/plot.py
164 | md5: 51afea4e6b06813f61887aad963b2b5d
165 | size: 4357
166 | outs:
167 | - path: images/2022/pandas_stacked_bars_with_values.png
168 | md5: 187f7f8d78b2a7bee60517b0bca5f463
169 | size: 57606
170 | 2022_pos_neg_split_hbar:
171 | cmd: python -m plotting_examples.2022.pos_neg_split_hbar.plot
172 | deps:
173 | - path: plotting_examples/2022/pos_neg_split_hbar/plot.py
174 | md5: 606148fb6a0601d3a2828b24a62f8614
175 | size: 5097
176 | outs:
177 | - path: images/2022/pos_neg_split_hbar.png
178 | md5: 7102b82280627073c912a37cbb5bddfe
179 | size: 82422
180 | 2022_histogram_with_two_variables:
181 | cmd: python -m plotting_examples.2022.histogram_with_two_variables.plot
182 | deps:
183 | - path: plotting_examples/2022/histogram_with_two_variables/plot.py
184 | md5: f1a1d50c1b05b9927b749d0acaad438e
185 | size: 5248
186 | outs:
187 | - path: images/2022/histogram_with_two_variables.png
188 | md5: 5ee46a399d1635bbca0acbb38baa417d
189 | size: 67845
190 | 2022_split_x_axis_custom_legend:
191 | cmd: python -m plotting_examples.2022.split_x_axis_custom_legend.plot
192 | deps:
193 | - path: plotting_examples/2022/split_x_axis_custom_legend/plot.py
194 | md5: d96a9320c4b7009e9f110bb891d1182d
195 | size: 6297
196 | outs:
197 | - path: images/2022/split_x_axis_custom_legend.png
198 | md5: da1c3324e4d037a49696bdb1bae75bd5
199 | size: 1080487
200 | 2022_scatter_matrix_w_kde_on_diag:
201 | cmd: python -m plotting_examples.2022.scatter_matrix_w_kde_on_diag.plot
202 | deps:
203 | - path: plotting_examples/2022/scatter_matrix_w_kde_on_diag/plot.py
204 | md5: 5f0e7e326606d868b8c4411c288e00cb
205 | size: 2522
206 | outs:
207 | - path: images/2022/scatter_matrix_w_kde_on_diag.png
208 | md5: 14a39a1df810953354cde040c1cf10fb
209 | size: 217990
210 | 2022_sns_violin_plot_custom:
211 | cmd: python -m plotting_examples.2022.sns_violin_plot_custom.plot
212 | deps:
213 | - path: plotting_examples/2022/sns_violin_plot_custom/plot.py
214 | md5: e5aa38595903134348a4b97e12af4b51
215 | size: 1604
216 | outs:
217 | - path: images/2022/sns_violin_plot_custom.png
218 | md5: 7a451fc7c780d5e165f7325327ded3ce
219 | size: 56996
220 | 2022_work_time_tracking:
221 | cmd: python -m plotting_examples.2022.work_time_tracking.plot
222 | deps:
223 | - path: plotting_examples/2022/work_time_tracking/plot.py
224 | md5: 41af922980ad6d06a0813c3f6dff37af
225 | size: 4704
226 | outs:
227 | - path: images/2022/work_time_tracking.png
228 | md5: 5e468011855755d5a8238e7e8b51063c
229 | size: 227174
230 | 2022_scatter_distributions:
231 | cmd: python -m plotting_examples.2022.scatter_distributions.plot
232 | deps:
233 | - path: plotting_examples/2022/scatter_distributions/plot.py
234 | md5: 0be18ddf32bdd5a0a5768d214d49c5c2
235 | size: 6181
236 | outs:
237 | - path: images/2022/scatter_distributions.png
238 | md5: d102a1f3534a68d11439103b85f7b4b1
239 | size: 520358
240 | 2022_bar_plot_w_custom_cmap:
241 | cmd: python -m plotting_examples.2022.bar_plot_w_custom_cmap.plot
242 | deps:
243 | - path: plotting_examples/2022/bar_plot_w_custom_cmap/plot.py
244 | md5: d4e5771aeb11a5c78cf16bc01681b014
245 | size: 5778
246 | outs:
247 | - path: images/2022/bar_plot_w_custom_cmap.png
248 | md5: f90122b668c246a8ef62d4c8302daf68
249 | size: 174070
250 | 2022_week42:
251 | cmd: python -m plotting_examples.2022.week42.plot
252 | deps:
253 | - path: plotting_examples/2022/week42/plot.py
254 | md5: 1abbfdf49c2abe180492a606e4760c98
255 | size: 12120
256 | outs:
257 | - path: images/2022/week42.png
258 | md5: f6bcc5fd9c521a9a8b20c8a11659004d
259 | size: 1146056
260 | 2022_box_plot_w_scatter_distributions:
261 | cmd: python -m plotting_examples.2022.box_plot_w_scatter_distributions.plot
262 | deps:
263 | - path: plotting_examples/2022/box_plot_w_scatter_distributions/plot.py
264 | md5: 0ac3c9c54beeac3e1e5395ab8c6a04b0
265 | size: 12148
266 | outs:
267 | - path: images/2022/box_plot_w_scatter_distributions.png
268 | md5: a4db1dd69327dcf7166701d7dce61ef6
269 | size: 478450
270 | 2022_line_plot_fill_between:
271 | cmd: python -m plotting_examples.2022.line_plot_fill_between.plot
272 | deps:
273 | - path: plotting_examples/2022/line_plot_fill_between/plot.py
274 | md5: d4de88d563946fdd04daa65fa4e46280
275 | size: 5016
276 | outs:
277 | - path: images/2022/line_plot_fill_between.png
278 | md5: 31ccdfaec1719946aaaa86fe69e01626
279 | size: 231583
280 | y2022_box_plot_w_scatter_distributions:
281 | cmd: poetry run python -m plotting_examples.y2022.box_plot_w_scatter_distributions.plot
282 | deps:
283 | - path: plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py
284 | hash: md5
285 | md5: 75dc92ce6ff62d836d8d1cb15377579c
286 | size: 11629
287 | outs:
288 | - path: images/y2022/box_plot_w_scatter_distributions.png
289 | hash: md5
290 | md5: 33dc283243d8df737f43f92f990c62ca
291 | size: 469728
292 | y2022_sns_violin_plot_custom:
293 | cmd: poetry run python -m plotting_examples.y2022.sns_violin_plot_custom.plot
294 | deps:
295 | - path: plotting_examples/y2022/sns_violin_plot_custom/plot.py
296 | hash: md5
297 | md5: f1613352c0eabb1b8a7e40714b7ee8c3
298 | size: 1975
299 | outs:
300 | - path: images/y2022/sns_violin_plot_custom.png
301 | md5: 05e5eca57ef23097e878e8c603a3f22e
302 | size: 67429
303 | y2022_bar_plot_w_custom_cmap:
304 | cmd: poetry run python -m plotting_examples.y2022.bar_plot_w_custom_cmap.plot
305 | deps:
306 | - path: plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py
307 | hash: md5
308 | md5: f7d8241b1ed31f2754eed7cc64442423
309 | size: 6031
310 | outs:
311 | - path: images/y2022/bar_plot_w_custom_cmap.png
312 | hash: md5
313 | md5: 6be8b259164e4992a3e62f5255199e02
314 | size: 174412
315 | y2022_histogram_with_two_variables:
316 | cmd: poetry run python -m plotting_examples.y2022.histogram_with_two_variables.plot
317 | deps:
318 | - path: plotting_examples/y2022/histogram_with_two_variables/plot.py
319 | hash: md5
320 | md5: 16ab57c6b359fdee3d75e5dabc93d4ce
321 | size: 5849
322 | outs:
323 | - path: images/y2022/histogram_with_two_variables.png
324 | md5: 2e0d94432170ec007918cead8ddb08cb
325 | size: 58637
326 | y2022_binary_outcome_variable:
327 | cmd: poetry run python -m plotting_examples.y2022.binary_outcome_variable.plot
328 | deps:
329 | - path: plotting_examples/y2022/binary_outcome_variable/plot.py
330 | hash: md5
331 | md5: f8468c0554abebb6a4fc31f9ea511457
332 | size: 2790
333 | outs:
334 | - path: images/y2022/binary_outcome_variable.png
335 | hash: md5
336 | md5: 55b0cfcab7573baa0f5363f7f516e075
337 | size: 36069
338 | y2022_pos_neg_split_hbar:
339 | cmd: poetry run python -m plotting_examples.y2022.pos_neg_split_hbar.plot
340 | deps:
341 | - path: plotting_examples/y2022/pos_neg_split_hbar/plot.py
342 | hash: md5
343 | md5: 6fe3dcb9f263d6d5aa331ab522434de7
344 | size: 5744
345 | outs:
346 | - path: images/y2022/pos_neg_split_hbar.png
347 | md5: 09185fc58c132ebac7555ecf30463f81
348 | size: 73712
349 | y2022_scatter_matrix_w_kde_on_diag:
350 | cmd: poetry run python -m plotting_examples.y2022.scatter_matrix_w_kde_on_diag.plot
351 | deps:
352 | - path: plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py
353 | hash: md5
354 | md5: 3b183d57dce913fb84246f4dc10569be
355 | size: 3363
356 | outs:
357 | - path: images/y2022/scatter_matrix_w_kde_on_diag.png
358 | hash: md5
359 | md5: 9cbe0ed36d6b99d3832fe5d60b710cdd
360 | size: 325647
361 | y2022_line_plot_fill_between:
362 | cmd: poetry run python -m plotting_examples.y2022.line_plot_fill_between.plot
363 | deps:
364 | - path: plotting_examples/y2022/line_plot_fill_between/plot.py
365 | hash: md5
366 | md5: e1c730d690f93eb35b930ec00454fe08
367 | size: 5136
368 | outs:
369 | - path: images/y2022/line_plot_fill_between.png
370 | md5: 9dcd62a428039331ee813626f2c53089
371 | size: 231583
372 | y2022_pandas_stacked_bars_with_values:
373 | cmd: poetry run python -m plotting_examples.y2022.pandas_stacked_bars_with_values.plot
374 | deps:
375 | - path: plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py
376 | hash: md5
377 | md5: d13b08351135216105931d12ce145b3c
378 | size: 4375
379 | outs:
380 | - path: images/y2022/pandas_stacked_bars_with_values.png
381 | md5: 199cbc2e9ac00928ff31306bb0f4ab5e
382 | size: 61398
383 | y2022_split_x_axis_custom_legend:
384 | cmd: poetry run python -m plotting_examples.y2022.split_x_axis_custom_legend.plot
385 | deps:
386 | - path: plotting_examples/y2022/split_x_axis_custom_legend/plot.py
387 | hash: md5
388 | md5: e45ab4f896af74c62953922a2f03d39e
389 | size: 6806
390 | outs:
391 | - path: images/y2022/split_x_axis_custom_legend.png
392 | hash: md5
393 | md5: ad997f3edd4f6e0e3c68f3f6d51f6051
394 | size: 1691509
395 | y2022_scatter_distributions:
396 | cmd: poetry run python -m plotting_examples.y2022.scatter_distributions.plot
397 | deps:
398 | - path: plotting_examples/y2022/scatter_distributions/plot.py
399 | hash: md5
400 | md5: 7d668af948c9c04578842e57d5c1e0cd
401 | size: 7129
402 | outs:
403 | - path: images/y2022/scatter_distributions.png
404 | hash: md5
405 | md5: 78dcd94a72b9a63beb8c7f755d1aeb4a
406 | size: 549714
407 | y2022_default_plot:
408 | cmd: poetry run python -m plotting_examples.y2022.default_plot.plot
409 | deps:
410 | - path: plotting_examples/y2022/default_plot/plot.py
411 | hash: md5
412 | md5: 29d823025843b30bfc00b615c5c23edb
413 | size: 1182
414 | outs:
415 | - path: images/y2022/default_plot.png
416 | md5: 1d81f2b1567c55dec832acd4ac5dca60
417 | size: 24778
418 | y2022_scatter_w_outlined_text_insert:
419 | cmd: poetry run python -m plotting_examples.y2022.scatter_w_outlined_text_insert.plot
420 | deps:
421 | - path: plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py
422 | hash: md5
423 | md5: e775b5e81d6ef7ccb89058e5c78a1922
424 | size: 4853
425 | outs:
426 | - path: images/y2022/scatter_w_outlined_text_insert.png
427 | md5: 6de49552e5ea5e034d052d98943f89b9
428 | size: 481441
429 | y2022_opinium_barchart:
430 | cmd: poetry run python -m plotting_examples.y2022.opinium_barchart.plot
431 | deps:
432 | - path: plotting_examples/y2022/opinium_barchart/plot.py
433 | hash: md5
434 | md5: d464e5a4363c14d09582154a6f5392c8
435 | size: 5707
436 | outs:
437 | - path: images/y2022/opinium_barchart.png
438 | md5: 0f840e7d0a449d057ff8a5d5c27eda48
439 | size: 65128
440 | y2022_uk_hexmap:
441 | cmd: poetry run python -m plotting_examples.y2022.uk_hexmap.plot
442 | deps:
443 | - path: plotting_examples/y2022/uk_hexmap/plot.py
444 | hash: md5
445 | md5: 2f043cde045257f9a78e99e19989bc23
446 | size: 3956
447 | outs:
448 | - path: images/y2022/uk_hexmap.png
449 | hash: md5
450 | md5: e7fe06a9f73be99214870928b9768d3a
451 | size: 536769
452 | y2022_meaningless_points:
453 | cmd: poetry run python -m plotting_examples.y2022.meaningless_points.plot
454 | deps:
455 | - path: plotting_examples/y2022/meaningless_points/plot.py
456 | hash: md5
457 | md5: 93c3a423474a8309f9004adfcc4effda
458 | size: 2630
459 | outs:
460 | - path: images/y2022/meaningless_points.png
461 | md5: 0abf95fc710641a8564d5df871600131
462 | size: 273061
463 | y2022_stacked_bar_with_single_bars_layout:
464 | cmd: poetry run python -m plotting_examples.y2022.stacked_bar_with_single_bars_layout.plot
465 | deps:
466 | - path: plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py
467 | hash: md5
468 | md5: 101d705bcfee29021b8772ece52d8bd9
469 | size: 15024
470 | outs:
471 | - path: images/y2022/stacked_bar_with_single_bars_layout.png
472 | md5: a0f95c280b1fe5edf4f2946654be8ed6
473 | size: 229895
474 | y2024_stacked_bar_with_single_bars_layout:
475 | cmd: poetry run python -m plotting_examples.y2024.stacked_bar_with_single_bars_layout.plot
476 | deps:
477 | - path: plotting_examples/y2024/stacked_bar_with_single_bars_layout/plot.py
478 | hash: md5
479 | md5: 42d5d36996dcebf858d3dc20de2170e7
480 | size: 15223
481 | outs:
482 | - path: images/y2024/stacked_bar_with_single_bars_layout.png
483 | md5: a0f95c280b1fe5edf4f2946654be8ed6
484 | size: 229895
485 | y2024_mish_weight:
486 | cmd: poetry run python -m plotting_examples.y2024.mish_weight.plot
487 | deps:
488 | - path: plotting_examples/y2024/mish_weight/plot.py
489 | hash: md5
490 | md5: c23fddaa7cda28671d084731a63dab0e
491 | size: 15199
492 | outs:
493 | - path: images/y2024/mish_weight.png
494 | hash: md5
495 | md5: a0f95c280b1fe5edf4f2946654be8ed6
496 | size: 229895
497 | y2024_cat_weight:
498 | cmd: poetry run python -m plotting_examples.y2024.cat_weight.plot
499 | deps:
500 | - path: plotting_examples/y2024/cat_weight/plot.py
501 | hash: md5
502 | md5: ffb3af869133e167f72b1b9e234991c1
503 | size: 11067
504 | outs:
505 | - path: images/y2024/cat_weight.png
506 | hash: md5
507 | md5: 7ac39610e8a8e23a8582c232da7fc7c5
508 | size: 1563392
509 |
--------------------------------------------------------------------------------
/dvc.yaml:
--------------------------------------------------------------------------------
1 | stages:
2 | y2022_bar_plot_w_custom_cmap:
3 | cmd: poetry run python -m plotting_examples.y2022.bar_plot_w_custom_cmap.plot
4 | deps:
5 | - plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py
6 | outs:
7 | - images/y2022/bar_plot_w_custom_cmap.png:
8 | cache: false
9 | wdir: .
10 | y2022_binary_outcome_variable:
11 | cmd: poetry run python -m plotting_examples.y2022.binary_outcome_variable.plot
12 | deps:
13 | - plotting_examples/y2022/binary_outcome_variable/plot.py
14 | outs:
15 | - images/y2022/binary_outcome_variable.png:
16 | cache: false
17 | wdir: .
18 | y2022_box_plot_w_scatter_distributions:
19 | cmd: poetry run python -m plotting_examples.y2022.box_plot_w_scatter_distributions.plot
20 | deps:
21 | - plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py
22 | outs:
23 | - images/y2022/box_plot_w_scatter_distributions.png:
24 | cache: false
25 | wdir: .
26 | y2022_default_plot:
27 | cmd: poetry run python -m plotting_examples.y2022.default_plot.plot
28 | deps:
29 | - plotting_examples/y2022/default_plot/plot.py
30 | outs:
31 | - images/y2022/default_plot.png:
32 | cache: false
33 | wdir: .
34 | y2022_histogram_with_two_variables:
35 | cmd: poetry run python -m plotting_examples.y2022.histogram_with_two_variables.plot
36 | deps:
37 | - plotting_examples/y2022/histogram_with_two_variables/plot.py
38 | outs:
39 | - images/y2022/histogram_with_two_variables.png:
40 | cache: false
41 | wdir: .
42 | y2022_line_plot_fill_between:
43 | cmd: poetry run python -m plotting_examples.y2022.line_plot_fill_between.plot
44 | deps:
45 | - plotting_examples/y2022/line_plot_fill_between/plot.py
46 | outs:
47 | - images/y2022/line_plot_fill_between.png:
48 | cache: false
49 | wdir: .
50 | y2022_meaningless_points:
51 | cmd: poetry run python -m plotting_examples.y2022.meaningless_points.plot
52 | deps:
53 | - plotting_examples/y2022/meaningless_points/plot.py
54 | outs:
55 | - images/y2022/meaningless_points.png:
56 | cache: false
57 | wdir: .
58 | y2022_opinium_barchart:
59 | cmd: poetry run python -m plotting_examples.y2022.opinium_barchart.plot
60 | deps:
61 | - plotting_examples/y2022/opinium_barchart/plot.py
62 | outs:
63 | - images/y2022/opinium_barchart.png:
64 | cache: false
65 | wdir: .
66 | y2022_pandas_stacked_bars_with_values:
67 | cmd: poetry run python -m plotting_examples.y2022.pandas_stacked_bars_with_values.plot
68 | deps:
69 | - plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py
70 | outs:
71 | - images/y2022/pandas_stacked_bars_with_values.png:
72 | cache: false
73 | wdir: .
74 | y2022_pos_neg_split_hbar:
75 | cmd: poetry run python -m plotting_examples.y2022.pos_neg_split_hbar.plot
76 | deps:
77 | - plotting_examples/y2022/pos_neg_split_hbar/plot.py
78 | outs:
79 | - images/y2022/pos_neg_split_hbar.png:
80 | cache: false
81 | wdir: .
82 | y2022_scatter_distributions:
83 | cmd: poetry run python -m plotting_examples.y2022.scatter_distributions.plot
84 | deps:
85 | - plotting_examples/y2022/scatter_distributions/plot.py
86 | outs:
87 | - images/y2022/scatter_distributions.png:
88 | cache: false
89 | wdir: .
90 | y2022_scatter_matrix_w_kde_on_diag:
91 | cmd: poetry run python -m plotting_examples.y2022.scatter_matrix_w_kde_on_diag.plot
92 | deps:
93 | - plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py
94 | outs:
95 | - images/y2022/scatter_matrix_w_kde_on_diag.png:
96 | cache: false
97 | wdir: .
98 | y2022_scatter_w_outlined_text_insert:
99 | cmd: poetry run python -m plotting_examples.y2022.scatter_w_outlined_text_insert.plot
100 | deps:
101 | - plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py
102 | outs:
103 | - images/y2022/scatter_w_outlined_text_insert.png:
104 | cache: false
105 | wdir: .
106 | y2022_sns_violin_plot_custom:
107 | cmd: poetry run python -m plotting_examples.y2022.sns_violin_plot_custom.plot
108 | deps:
109 | - plotting_examples/y2022/sns_violin_plot_custom/plot.py
110 | outs:
111 | - images/y2022/sns_violin_plot_custom.png:
112 | cache: false
113 | wdir: .
114 | y2022_split_x_axis_custom_legend:
115 | cmd: poetry run python -m plotting_examples.y2022.split_x_axis_custom_legend.plot
116 | deps:
117 | - plotting_examples/y2022/split_x_axis_custom_legend/plot.py
118 | outs:
119 | - images/y2022/split_x_axis_custom_legend.png:
120 | cache: false
121 | wdir: .
122 | y2022_stacked_bar_with_single_bars_layout:
123 | cmd: poetry run python -m plotting_examples.y2022.stacked_bar_with_single_bars_layout.plot
124 | deps:
125 | - plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py
126 | outs:
127 | - images/y2022/stacked_bar_with_single_bars_layout.png:
128 | cache: false
129 | wdir: .
130 | y2022_uk_hexmap:
131 | cmd: poetry run python -m plotting_examples.y2022.uk_hexmap.plot
132 | deps:
133 | - plotting_examples/y2022/uk_hexmap/plot.py
134 | outs:
135 | - images/y2022/uk_hexmap.png:
136 | cache: false
137 | wdir: .
138 | y2024_cat_weight:
139 | cmd: poetry run python -m plotting_examples.y2024.cat_weight.plot
140 | deps:
141 | - plotting_examples/y2024/cat_weight/plot.py
142 | outs:
143 | - images/y2024/cat_weight.png:
144 | cache: false
145 | wdir: .
146 |
--------------------------------------------------------------------------------
/generate_readme.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate plots at the end of the README.
3 |
4 | Bit of a hack - but works for now, this is mainly just to display all the created plots
5 | in the README.
6 | """
7 |
8 | from __future__ import annotations
9 |
10 | import ast
11 | import re
12 | from pathlib import Path
13 |
14 | from PIL import Image
15 |
16 | CODE = (
17 | "https://github.com/geo7/plotting_examples/blob/main/plotting_examples/{}/plot.py"
18 | )
19 |
20 |
21 | def resize_image_if_needed(
22 | *,
23 | im: str,
24 | ) -> None:
25 | """
26 | Resize image to requred aspect ratio if needed.
27 |
28 | Given FIGSIZE (width, height) check to see if the aspect ratio (where
29 | aspect ratio = height/width) of the image file `im` matches that of
30 | FIGSIZE. If not then the image is resized to the correct dimensions in
31 | place, so the original is lost with this.
32 |
33 | Args:
34 | ----
35 | im (str):
36 | Path to image file.
37 | FIGSIZE (tuple):
38 | Typically `figsize` tuple from `plt.subplots(figsize = FIGSIZE)`.
39 |
40 | """
41 | image = Image.open(im)
42 | width, height = image.size
43 |
44 | m = 500
45 | if height > m:
46 | scale = m / height
47 | new_height = int(height * scale)
48 | new_width = int(width * scale)
49 | new_image = image.resize((new_width, new_height))
50 | new_image.save(im)
51 |
52 |
53 | EXCLUDE_PLOTS = [
54 | # This is just the template for starting a new plot off.
55 | "default_plot",
56 | # Got bored of seeing this one.
57 | "sns_violin_plot_custom",
58 | # This was was annoying as well - it's an example of creating a histogram from
59 | # scratch with patches which eh.
60 | "histogram_with_two_variables",
61 | ]
62 |
63 |
64 | def docstring_from_py_module(*, mod_path: str | Path) -> str:
65 | """
66 | Docstrings in plot.py contain context about the plot.
67 |
68 | These are then used in the README.
69 | """
70 | # with open(mod_path, encoding="utf8") as fh:
71 | with Path(mod_path).open() as fh:
72 | code_txt = fh.read()
73 | mod = ast.parse(code_txt)
74 | docstr = ast.get_docstring(mod)
75 |
76 | if docstr == "":
77 | msg = f"No docstring found for : {mod_path}"
78 | raise ValueError(msg)
79 |
80 | if docstr is None:
81 | msg = "Do not expect docstring to be None."
82 | raise ValueError(msg)
83 |
84 | return docstr
85 |
86 |
87 | def main() -> int:
88 | """Generate readme with plots and docstring extracts."""
89 | year = "y2022"
90 |
91 | years = [
92 | # This should get the years up to 2099... If I'm still using matplotlib
93 | # at that point I'll consider that a success.... or maybe a failure,
94 | # not sure.
95 | Path(x.name).stem
96 | for x in sorted(Path("./plotting_examples").glob("*"))
97 | if "y20" in str(x)
98 | ]
99 |
100 | readme_data = {}
101 |
102 | for year in years:
103 | # Will have to update this when there's a different year I guess but
104 | # for now meh.
105 | images = sorted(Path(f"./images/{year}").glob("*"))
106 |
107 | # For each image want to build up a dictionary of the image path within
108 | # the repo, and the docstring from the respective python module. Then
109 | # in the README the python docstring will be added alongside the image.
110 | for img in images:
111 | dir_from_img_path = Path(img.name).stem
112 |
113 | code_path = (
114 | Path("./plotting_examples") / str(year) / dir_from_img_path / "plot.py"
115 | )
116 |
117 | if "DS_Store" in str(code_path):
118 | continue
119 |
120 | # Not sure why this _wouldn't_ exist
121 | if not img.exists():
122 | raise ValueError
123 |
124 | docstr = docstring_from_py_module(mod_path=code_path)
125 |
126 | readme_data[dir_from_img_path] = {
127 | "img_path": img,
128 | "doc_str": docstr,
129 | }
130 |
131 | # Might as well sort the generated plots.
132 | readme_data = {
133 | x: readme_data[x]
134 | for x in sorted(readme_data)
135 | if not any(exclude in x for exclude in EXCLUDE_PLOTS)
136 | }
137 |
138 | # Create values to append to readme.
139 | readme_update = "\n\n# Plots\n\n"
140 |
141 | # Create some bullet points with the plot names
142 | for title in readme_data:
143 | readme_update += f"* [`{title}`](https://github.com/geo7/plotting_examples?tab=readme-ov-file#{title})\n"
144 |
145 | readme_update += "\n"
146 |
147 | for title, data in readme_data.items():
148 | year = re.findall(r".*(y\d{4}).*", str(data["img_path"]))[0]
149 | readme_update += "\n\n"
150 | url_path = f"{year}/{title}"
151 | readme_update += f"## [`{title}`]({CODE.format(url_path)})\n\n"
152 | readme_update += str(data["doc_str"])
153 | readme_update += "\n\n"
154 | md_img_format = f""
155 | readme_update += md_img_format
156 |
157 | # Update README
158 |
159 | # This is used to signal where automated content starts.
160 | rm_split = "[comment]: # (Automate plots beneath this.)"
161 | with Path("README.md").open() as rm:
162 | rm_txt = rm.read()
163 |
164 | rm_txt = rm_txt.split(rm_split)[0]
165 | rm_txt = rm_txt + rm_split + readme_update
166 | # Ensure new line at eof
167 | rm_txt += "\n"
168 |
169 | with Path("README.md").open("w") as file:
170 | file.write(rm_txt)
171 |
172 | return 0
173 |
174 |
175 | if __name__ == "__main__":
176 | raise SystemExit(main())
177 |
--------------------------------------------------------------------------------
/images/y2022/bar_plot_w_custom_cmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/bar_plot_w_custom_cmap.png
--------------------------------------------------------------------------------
/images/y2022/binary_outcome_variable.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/binary_outcome_variable.png
--------------------------------------------------------------------------------
/images/y2022/box_plot_w_scatter_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/box_plot_w_scatter_distributions.png
--------------------------------------------------------------------------------
/images/y2022/default_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/default_plot.png
--------------------------------------------------------------------------------
/images/y2022/histogram_with_two_variables.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/histogram_with_two_variables.png
--------------------------------------------------------------------------------
/images/y2022/line_plot_fill_between.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/line_plot_fill_between.png
--------------------------------------------------------------------------------
/images/y2022/meaningless_points.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/meaningless_points.png
--------------------------------------------------------------------------------
/images/y2022/opinium_barchart.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/opinium_barchart.png
--------------------------------------------------------------------------------
/images/y2022/pandas_stacked_bars_with_values.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/pandas_stacked_bars_with_values.png
--------------------------------------------------------------------------------
/images/y2022/pos_neg_split_hbar.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/pos_neg_split_hbar.png
--------------------------------------------------------------------------------
/images/y2022/scatter_distributions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_distributions.png
--------------------------------------------------------------------------------
/images/y2022/scatter_matrix_w_kde_on_diag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_matrix_w_kde_on_diag.png
--------------------------------------------------------------------------------
/images/y2022/scatter_w_outlined_text_insert.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/scatter_w_outlined_text_insert.png
--------------------------------------------------------------------------------
/images/y2022/sns_violin_plot_custom.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/sns_violin_plot_custom.png
--------------------------------------------------------------------------------
/images/y2022/split_x_axis_custom_legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/split_x_axis_custom_legend.png
--------------------------------------------------------------------------------
/images/y2022/stacked_bar_with_single_bars_layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/stacked_bar_with_single_bars_layout.png
--------------------------------------------------------------------------------
/images/y2022/uk_hexmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2022/uk_hexmap.png
--------------------------------------------------------------------------------
/images/y2024/cat_weight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/images/y2024/cat_weight.png
--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
1 | [mypy]
2 |
3 | [mypy-numpy.*]
4 | ignore_missing_imports = True
5 |
6 | [mypy-pandas.*]
7 | ignore_missing_imports = True
8 |
9 | [mypy-geopandas.*]
10 | ignore_missing_imports = True
11 |
12 | [mypy-matplotlib.*]
13 | ignore_missing_imports = True
14 |
15 | [mypy-seaborn.*]
16 | ignore_missing_imports = True
17 |
18 | [mypy-PIL.*]
19 | ignore_missing_imports = True
20 |
21 | [mypy-yaml.*]
22 | ignore_missing_imports = True
23 |
--------------------------------------------------------------------------------
/plotting_examples/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Plotting examples of various kinds.
3 |
4 | Some of these might be developed into little projects if they're interesting
5 | enough, main idea it that they're examples which can be leveraged for other
6 | work though.
7 | """
8 |
--------------------------------------------------------------------------------
/plotting_examples/dvc_entry.py:
--------------------------------------------------------------------------------
1 | """Create an entry in the dvc.yaml file for the particular plot."""
2 |
3 | from __future__ import annotations
4 |
5 | import pathlib
6 |
7 | import yaml
8 |
9 | from plotting_examples.extract_year_name import extract_year_name_from_plot_py
10 |
11 |
12 | def add_to_dvc(*, path: pathlib.Path) -> None:
13 | """Add stages to dvc.yaml based on given path."""
14 | year, name = extract_year_name_from_plot_py(file=str(path))
15 |
16 | dvc = yaml.safe_load(pathlib.Path("dvc.yaml").read_text(encoding="utf8"))
17 |
18 | stage_name = f"{year}_{name}"
19 | if stage_name not in dvc["stages"]:
20 | # Project not yet added to dvc.yaml
21 | dvc["stages"][stage_name] = {
22 | "wdir": ".",
23 | "cmd": f"poetry run python -m plotting_examples.{year}.{name}.plot",
24 | "deps": [f"plotting_examples/{year}/{name}/plot.py"],
25 | "outs": [{f"images/{year}/{name}.png": {"cache": False}}],
26 | }
27 |
28 | with pathlib.Path("dvc.yaml").open("w") as file:
29 | file.write(yaml.dump(dvc))
30 |
--------------------------------------------------------------------------------
/plotting_examples/extract_year_name.py:
--------------------------------------------------------------------------------
1 | """Get year, name from path to plotting file."""
2 |
3 | from __future__ import annotations
4 |
5 | from pathlib import Path
6 |
7 |
8 | def extract_year_name_from_plot_py(*, file: str) -> tuple[str, str]:
9 | """
10 | Given a path such as.
11 |
12 | >>> /home/.../plotting_examples/plotting_examples/y2022/default_plot/plot.py
13 |
14 | Return:
15 | ------
16 | >>> 2022, default_plot
17 |
18 | """
19 | pth = Path(file)
20 | if pth.suffix != ".py":
21 | msg = "Expect this to be run on .py files."
22 | raise ValueError(msg)
23 | year, name = (
24 | str(pth)
25 | .rsplit("plotting_examples/plotting_examples/", maxsplit=1)[-1]
26 | .rsplit("/", maxsplit=1)[0]
27 | .split("/")
28 | )
29 | return year, name
30 |
--------------------------------------------------------------------------------
/plotting_examples/save_plot_output.py:
--------------------------------------------------------------------------------
1 | """Util for saving output from plots."""
2 |
3 | from __future__ import annotations
4 |
5 | from pathlib import Path
6 | from typing import TYPE_CHECKING
7 |
8 | from plotting_examples.extract_year_name import extract_year_name_from_plot_py
9 |
10 | if TYPE_CHECKING:
11 | import matplotlib as mpl
12 |
13 |
14 | def save_plot(
15 | *,
16 | fig: mpl.figure.Figure,
17 | file: str,
18 | dpi: int = 150,
19 | ) -> None:
20 | """Util for saving plot to images dir."""
21 | year, name = extract_year_name_from_plot_py(file=file)
22 |
23 | year_dir = Path("./images") / year
24 | # If the dir doesn't exist we need to make it...
25 | if not year_dir.exists():
26 | year_dir.mkdir(exist_ok=False, parents=False)
27 |
28 | png_pth = year_dir / (name + ".png")
29 | fig.savefig(png_pth, dpi=dpi)
30 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/__init__.py:
--------------------------------------------------------------------------------
1 | """Plots from 2022."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/bar_plot_w_custom_cmap/__init__.py:
--------------------------------------------------------------------------------
1 | """Bar plot with custom color map."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/bar_plot_w_custom_cmap/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Bar plot with custom cmap.
4 |
5 | Based on this tweet: https://twitter.com/ryanburge/status/1505602885215834112 - wanted
6 | to create something with a similar effect using mpl.
7 |
8 | Example of:
9 |
10 | - Different font types (using monospace font)
11 | - using different colours for bars depending on their values (custom cmap).
12 | - padding around the axis using rc parameters
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | import pathlib
18 |
19 | import matplotlib as mpl
20 | import matplotlib.pyplot as plt
21 | import matplotlib.ticker as plticker
22 | import numpy as np
23 | import pandas as pd
24 |
25 | from plotting_examples import dvc_entry, save_plot_output
26 | from plotting_examples.y2022 import metadata
27 |
28 | np_rnd = np.random.Generator(np.random.MT19937(0))
29 |
30 |
31 | def generate_data() -> pd.DataFrame:
32 | """Create sample data."""
33 | n = 1_000
34 | return pd.DataFrame(
35 | {
36 | "race": np_rnd.choice(
37 | ["White", "Black", "Hispanic", "Asian", "All Others"],
38 | size=n,
39 | ),
40 | "church_attendance": np_rnd.choice(
41 | ["Never", "Seldom", "Yearly", "Monthly", "Weekly", "Weekly+"],
42 | size=n,
43 | p=[
44 | 0.1,
45 | 0.1,
46 | 0.1,
47 | 0.15,
48 | 0.25,
49 | 0.3,
50 | ],
51 | ),
52 | },
53 | ).sort_values("race")
54 |
55 |
56 | def main() -> mpl.figure.Figure:
57 | """Main."""
58 | data = generate_data()
59 |
60 | ordering = [
61 | "Never",
62 | "Seldom",
63 | "Yearly",
64 | "Monthly",
65 | "Weekly",
66 | "Weekly+",
67 | ]
68 |
69 | loc = plticker.MultipleLocator(
70 | base=20.0,
71 | ) # this locator puts ticks at regular intervals
72 |
73 | with plt.rc_context(
74 | {
75 | "xtick.major.pad": 20,
76 | "font.family": "monospace",
77 | },
78 | ):
79 | fig, axis = plt.subplots(
80 | figsize=(30, 12),
81 | ncols=3,
82 | nrows=2,
83 | sharey=True,
84 | constrained_layout=False,
85 | )
86 | fig.tight_layout(h_pad=10, w_pad=10)
87 |
88 | axis = axis.flatten()
89 |
90 | # Style plots.
91 | for ax in axis:
92 | ax.grid(alpha=0.2, zorder=0)
93 | for x in ["top", "right", "left", "bottom"]:
94 | ax.spines[x].set_visible(False)
95 | ax.tick_params(axis="both", which="both", length=0, labelsize=18)
96 |
97 | fig.suptitle(
98 | "The Relationship Between Church Attendence and a Republican Vote by Race",
99 | fontsize=30,
100 | y=1.1,
101 | x=0.0,
102 | horizontalalignment="left",
103 | )
104 | # needs mpl version >= 3.4
105 | fig.supylabel(
106 | "Vote for Trump in 2020",
107 | fontsize=25,
108 | x=-0.02,
109 | )
110 |
111 | axis = iter(axis)
112 |
113 | for g, dfg in data.groupby("race"):
114 | color_map = mpl.colormaps["cool"].resampled(100)
115 |
116 | ax = next(axis)
117 | ax.yaxis.set_major_locator(loc)
118 | group_bar_values_unordered = (
119 | dfg["church_attendance"].value_counts().to_dict()
120 | )
121 | group_bar_values = {x: group_bar_values_unordered[x] for x in ordering}
122 |
123 | barplot = ax.bar(
124 | x=list(group_bar_values.keys()),
125 | height=list(group_bar_values.values()),
126 | zorder=3,
127 | )
128 | ax.set_title(g, fontsize=25, y=1.0)
129 | ax.set_ylim(bottom=0, top=90)
130 | ax.set_yticks([], minor=True)
131 |
132 | def fmt(x: float, _pos: int) -> str:
133 | # Not _too_ sure what this is about - think it's just what
134 | # set_major_formatter applies? It passes two arguments though - the
135 | # tick value (x) and the position (pos)...
136 | return f"{int(x)}"
137 |
138 | ax.yaxis.set_major_formatter(mpl.ticker.FuncFormatter(fmt))
139 |
140 | for bar in barplot:
141 | # Set the bar color by bar height.
142 | bar.set_color(color_map(bar.get_height()))
143 | ax.text(
144 | x=bar.get_x() + 0.5 * (bar.get_width()),
145 | y=bar.get_y() + 2.5,
146 | s=f"{bar.get_height()}%",
147 | fontsize=20,
148 | ha="center",
149 | )
150 | ax.vlines(
151 | x=bar.get_x() + 0.5 * (bar.get_width()),
152 | ymin=bar.get_height() - 5,
153 | ymax=bar.get_height() + 5,
154 | linewidth=4,
155 | zorder=5,
156 | color="#404040",
157 | )
158 | ax.hlines(
159 | y=bar.get_height() - 5,
160 | xmin=(bar.get_x() + 0.5 * (bar.get_width())) - 0.1,
161 | xmax=(bar.get_x() + 0.5 * (bar.get_width())) + 0.1,
162 | zorder=5,
163 | linewidth=4,
164 | color="#404040",
165 | )
166 | ax.hlines(
167 | y=bar.get_height() + 5,
168 | xmin=(bar.get_x() + 0.5 * (bar.get_width())) - 0.1,
169 | xmax=(bar.get_x() + 0.5 * (bar.get_width())) + 0.1,
170 | zorder=5,
171 | linewidth=4,
172 | color="#404040",
173 | )
174 |
175 | ax.tick_params(axis="y", colors="grey")
176 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
177 |
178 | # Just format the final plot - it's blank - to just get rid of all plot params
179 | # here. If there was more than one would need to handle a bit differently here.
180 | ax = next(axis)
181 | ax.grid(alpha=0)
182 | ax.set_xticks([])
183 | for x in ["top", "right", "left", "bottom"]:
184 | ax.spines[x].set_visible(False)
185 |
186 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
187 |
188 | fig.set_tight_layout(True) # type: ignore[attr-defined]
189 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
190 | return fig
191 |
192 |
193 | if __name__ == "__main__":
194 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
195 | save_plot_output.save_plot(fig=main(), file=__file__)
196 | raise SystemExit
197 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/binary_outcome_variable/__init__.py:
--------------------------------------------------------------------------------
1 | """Binary outcome plot."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/binary_outcome_variable/data.csv:
--------------------------------------------------------------------------------
1 | ,x,y
2 | 0,1.2747902159889748,1.0
3 | 1,0.10622657239729572,1.0
4 | 2,1.3243067956082084,0.0
5 | 3,2.264920571643043,1.0
6 | 4,1.525740164619661,1.0
7 | 5,1.0213314975629986,0.0
8 | 6,-2.0868721399805117,0.0
9 | 7,3.087253476354297,1.0
10 | 8,1.6930945436063571,0.0
11 | 9,1.177730197367664,0.0
12 | 10,-0.2951726334842677,1.0
13 | 11,1.3939955116958194,0.0
14 | 12,2.710887234482275,1.0
15 | 13,2.288566627212669,1.0
16 | 14,1.0063834913479486,1.0
17 | 15,0.40506607468755984,0.0
18 | 16,1.8317978988489252,1.0
19 | 17,-0.09961820872700582,0.0
20 | 18,0.8546664554219493,0.0
21 | 19,2.59869944125502,1.0
22 | 20,3.6930255818250184,1.0
23 | 21,3.2351943407464074,1.0
24 | 22,2.7795466504470148,1.0
25 | 23,-1.5141989332951926,1.0
26 | 24,-1.3342798893336982,1.0
27 | 25,1.3894518047828301,0.0
28 | 26,4.124574072040192,1.0
29 | 27,1.5275761412076085,1.0
30 | 28,3.0920328512031645,1.0
31 | 29,-0.6103591162672523,0.0
32 | 30,1.5711361222591333,1.0
33 | 31,0.3365630598553707,0.0
34 | 32,1.7740841144550268,1.0
35 | 33,-0.4240492991525555,0.0
36 | 34,1.9765103446881005,1.0
37 | 35,0.6875731648825243,0.0
38 | 36,0.48075160343621204,1.0
39 | 37,1.7917034287025366,1.0
40 | 38,0.29649576518401316,0.0
41 | 39,-0.20568700623329317,0.0
42 | 40,2.2529655667205586,1.0
43 | 41,-0.6138512476336486,0.0
44 | 42,1.6662716460969933,0.0
45 | 43,5.607955686672451,1.0
46 | 44,-1.7178950568217535,0.0
47 | 45,-1.5541952161418995,0.0
48 | 46,0.2149985878325868,0.0
49 | 47,2.2745722013953555,1.0
50 | 48,3.6688248016156075,1.0
51 | 49,2.888755806801737,1.0
52 | 50,3.892428495587999,1.0
53 | 51,1.8273090056796961,0.0
54 | 52,2.1998739159436442,1.0
55 | 53,2.1568554817879733,1.0
56 | 54,3.348012018831957,1.0
57 | 55,2.4655510026032945,1.0
58 | 56,-0.5665483983904225,0.0
59 | 57,-0.23387727182304746,0.0
60 | 58,2.7533905386624946,1.0
61 | 59,2.454163346936323,1.0
62 | 60,1.9106009445188148,1.0
63 | 61,-0.562556123742248,0.0
64 | 62,2.4476866391258056,1.0
65 | 63,-0.4973108111772468,0.0
66 | 64,0.7416802995622478,0.0
67 | 65,3.1769629339911583,1.0
68 | 66,-0.29643406638083747,0.0
69 | 67,-0.019125121483606716,0.0
70 | 68,0.543765521085082,0.0
71 | 69,3.2595168518283213,1.0
72 | 70,1.5714071630266757,1.0
73 | 71,-0.17876891111483648,0.0
74 | 72,-0.26221130572986856,0.0
75 | 73,-1.3534058345926328,0.0
76 | 74,0.9989509035674422,1.0
77 | 75,-2.2716152798398235,0.0
78 | 76,-0.8269491136353684,0.0
79 | 77,-0.7879429469570461,1.0
80 | 78,0.467851762636354,0.0
81 | 79,-1.4089215315339054,0.0
82 | 80,0.8116493521291177,0.0
83 | 81,-0.1801822831261996,0.0
84 | 82,2.6892138693830745,1.0
85 | 83,1.2602001853572777,1.0
86 | 84,3.7686379048258347,1.0
87 | 85,1.2021253923467357,1.0
88 | 86,0.03204825943738432,1.0
89 | 87,-1.5613869496540094,0.0
90 | 88,3.5978005775339157,1.0
91 | 89,0.2904121858658958,1.0
92 | 90,0.18389993640789415,1.0
93 | 91,-1.844690371525563,0.0
94 | 92,0.6033888587207837,0.0
95 | 93,4.471774864677858,1.0
96 | 94,1.9050254474289685,1.0
97 | 95,-0.41139165393189336,1.0
98 | 96,1.7644022575097518,1.0
99 | 97,0.685763582724977,0.0
100 | 98,-0.905286178158965,0.0
101 | 99,3.775046959012724,1.0
102 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/binary_outcome_variable/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Plot dichotomous variable.
4 |
5 | Simple dots with median lines - might be nice to add a kde to this as well.
6 |
7 | The y-axis is redundant here as there are only two options (`0.6` doesn't make any
8 | sense).
9 | """
10 |
11 | from __future__ import annotations
12 |
13 | import pathlib
14 |
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import numpy as np
18 | import pandas as pd
19 |
20 | from plotting_examples import dvc_entry, save_plot_output
21 | from plotting_examples.y2022 import metadata
22 |
23 | np_rnd = np.random.Generator(np.random.MT19937(1))
24 |
25 |
26 | def make_data() -> pd.DataFrame:
27 | """Generate some sample data for testing with."""
28 | n = 1_00
29 | y = np_rnd.choice([0, 1], n)
30 | x = np_rnd.normal(0, 1, n) + np_rnd.normal(2, 1, n) * y
31 | return pd.DataFrame(np.array([x, y]).T, columns=["x", "y"])
32 |
33 |
34 | def binary_outcome_plot(
35 | data: pd.DataFrame,
36 | x_var: str = "x",
37 | y: str = "y",
38 | fig: mpl.figure.Figure | None = None,
39 | ) -> mpl.figure.Figure:
40 | """
41 | Create plot of continuous var by binary outcome.
42 |
43 | This is just pulled straight from a notebook so is pretty loose. Could improve the
44 | typing of this function, as well as it's name, and the use of mpl objects within
45 | it.
46 | """
47 | # if ax is None:
48 | fig, ax = plt.subplots(figsize=(20, 3))
49 |
50 | colors = {
51 | 0: metadata.color.PINK_COLOUR,
52 | 1: metadata.color.DEEPER_GREEN,
53 | }
54 | for g_, dfg in data.groupby([y]):
55 | if len(g_) != 1:
56 | msg = "Expect these to all be single?"
57 | raise ValueError(msg, g_)
58 | g = g_[0]
59 | ax.scatter(
60 | x=dfg[x_var],
61 | y=dfg[y],
62 | color=colors[g],
63 | )
64 |
65 | med = dfg[x_var].median()
66 | ax.scatter(
67 | x=med,
68 | y=g,
69 | s=90,
70 | color=colors[g],
71 | )
72 | ax.vlines(
73 | x=med,
74 | ymin=min(g, 0.5),
75 | ymax=max(g, 0.5),
76 | color=colors[g],
77 | )
78 |
79 | ax.text(
80 | x=med + 0.5,
81 | y=abs(g - 0.15),
82 | s=f"Median {g} : {round(med,2)}",
83 | fontsize=15,
84 | )
85 | ax.set_title(
86 | f"{x_var} x {y}",
87 | fontsize=20,
88 | )
89 | ax.grid(alpha=0.2)
90 |
91 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
92 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
93 | return fig
94 |
95 |
96 | def main() -> mpl.figure.Figure:
97 | """Plot."""
98 | with plt.rc_context(
99 | {
100 | "xtick.major.pad": 10,
101 | "font.family": "monospace",
102 | },
103 | ):
104 | fig = binary_outcome_plot(data=make_data())
105 | fig.set_tight_layout(True) # type: ignore[attr-defined]
106 |
107 | return fig
108 |
109 |
110 | if __name__ == "__main__":
111 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
112 | save_plot_output.save_plot(fig=main(), file=__file__)
113 | raise SystemExit
114 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/box_plot_w_scatter_distributions/__init__.py:
--------------------------------------------------------------------------------
1 | """Box plot with scatter dist."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/box_plot_w_scatter_distributions/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/box_plot_w_scatter_distributions/data.parquet
--------------------------------------------------------------------------------
/plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code,too-many-locals
2 | """
3 | Bar plot with distributions.
4 |
5 | Thought I'd create a bar plot with scatter plots of the distributions adjacent to the
6 | bars, it was based off something else but I can't remember what. Bar plots are created
7 | from scratch using hlines etc, for no particular reason.
8 |
9 | Data was from tidy tuesday.
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import pathlib
15 | from typing import TypeVar
16 |
17 | import attr
18 | import matplotlib as mpl
19 | import matplotlib.pyplot as plt
20 | import numpy as np
21 | import pandas as pd
22 |
23 | from plotting_examples import dvc_entry, save_plot_output
24 | from plotting_examples.y2022 import metadata
25 |
26 | np_rnd = np.random.Generator(np.random.MT19937(1))
27 |
28 | T = TypeVar("T")
29 |
30 | WEEK = "week42"
31 |
32 | DATA_URL = (
33 | "https://raw.githubusercontent.com/rfordatascience/tidytuesday/"
34 | "master/data/2021/2021-10-19/pumpkins.csv"
35 | )
36 |
37 | BACKGROUND_COLOUR = "#f2f2f2"
38 | # https://mycolor.space/?hex=%23FF69B4&sub=1
39 | PINK_COLOUR = "#ff69b4"
40 | LABEL_FONTSIZE = 12
41 |
42 |
43 | def clean_comma(df: pd.DataFrame, *, column: str) -> pd.DataFrame:
44 | """Replace commas in series with empty strings."""
45 | df = df.copy()
46 | row_mask = df[column].astype(str).str.contains(",")
47 | df.loc[row_mask, column] = (
48 | df.loc[row_mask, column].str.replace(",", "").astype(float)
49 | )
50 | return df
51 |
52 |
53 | def drop_rows_by_match_on_column(
54 | df: pd.DataFrame,
55 | *,
56 | column: str,
57 | regexp: str,
58 | ) -> pd.DataFrame:
59 | """Drop rows based on regex on a particular column."""
60 | df = df.copy()
61 | return df.loc[~df[column].astype(str).str.contains(regexp, regex=True)]
62 |
63 |
64 | def top_n_groups(
65 | df: pd.DataFrame,
66 | *,
67 | column: str,
68 | n: int,
69 | rename: str = "Other",
70 | ) -> pd.DataFrame:
71 | """Get top n groups for a given column, re-write rest to other."""
72 | df = df.copy()
73 | top_n = df[column].value_counts(dropna=False).head(n).index
74 | df.loc[~df[column].isin(top_n), column] = rename
75 | return df
76 |
77 |
78 | def clean(
79 | *,
80 | df: pd.DataFrame,
81 | ) -> pd.DataFrame:
82 | """Initial cleaning for all columns."""
83 | df = df.copy()
84 | return (
85 | df.pipe(
86 | drop_rows_by_match_on_column,
87 | column="country",
88 | regexp=".*Entries.*",
89 | )
90 | .pipe(clean_comma, column="weight_lbs")
91 | .assign(weight_lbs=lambda x: x["weight_lbs"].astype(float))
92 | .pipe(clean_comma, column="est_weight")
93 | .assign(est_weight=lambda x: x["est_weight"].astype(float))
94 | .assign(ott=lambda x: x["ott"].astype(float))
95 | .assign(pct_chart=lambda x: x["pct_chart"].astype(float))
96 | )
97 |
98 |
99 | @attr.frozen(kw_only=True)
100 | class PlotData:
101 | # pylint: disable=too-few-public-methods
102 |
103 | """Data for use in both box and scatter plotting."""
104 |
105 | box: pd.DataFrame
106 | scatter: list[float]
107 |
108 |
109 | def plot_data_for_weight_by_country(df: pd.DataFrame) -> pd.DataFrame:
110 | """Generate plot data."""
111 | df = df.copy()
112 | df = top_n_groups(df=df, column="country", n=9, rename="Other")
113 | df = df[["country", "weight_lbs"]]
114 | df = pd.concat(
115 | [
116 | df[["country", "weight_lbs"]],
117 | df[["country", "weight_lbs"]].assign(country="All Countries"),
118 | ],
119 | axis=0,
120 | )
121 | # we want to order the countries by the median of the weights for each group.
122 | sorting = list(
123 | df.groupby("country")["weight_lbs"].median().sort_values().index,
124 | )
125 | df = df.iloc[pd.Categorical(df["country"], sorting).argsort()]
126 |
127 | country_data = {}
128 | for g, dfg in df.groupby("country"):
129 | country_data[g] = PlotData(
130 | box=dfg.describe(),
131 | scatter=dfg["weight_lbs"].to_list(),
132 | )
133 | return country_data
134 |
135 |
136 | # --------------------------------------------------------------------------------------
137 |
138 | # PLOTTING METHODS
139 |
140 |
141 | def top_bottom_whisker_y_values(*, values: list[float]) -> tuple[float, float]:
142 | """Get top/bottom for boxplot whiskers."""
143 | series = pd.Series(values)
144 | quant_1 = series.describe().get("25%")
145 | quant_3 = series.describe().get("75%")
146 | iqr = quant_3 - quant_1
147 | top_range = quant_3 + 1.5 * (iqr)
148 | bottom_range = quant_3 - 1.5 * iqr
149 | # top of the boxplot
150 | box_plot_top = series[series.lt(top_range)].max()
151 | # bottom of the box_plot
152 | box_plot_bottom = series[series.gt(bottom_range)].min()
153 | return box_plot_bottom, box_plot_top
154 |
155 |
156 | def boxp_hline(
157 | *,
158 | ax: plt.Axes, # type: ignore[name-defined]
159 | x_center: float,
160 | y_value: float,
161 | box_width: float,
162 | linewidth: float,
163 | box_colour: str,
164 | ) -> None:
165 | """Plot top/bottom of box."""
166 | ax.hlines(
167 | y=y_value,
168 | xmin=x_center - box_width * 0.5,
169 | xmax=x_center + box_width * 0.5,
170 | linewidth=linewidth,
171 | color=box_colour,
172 | zorder=3,
173 | capstyle="round",
174 | )
175 |
176 |
177 | def boxp_vline(
178 | ax: plt.Axes, # type: ignore[name-defined]
179 | x: float,
180 | ymin: float,
181 | ymax: float,
182 | color: str,
183 | linewidth: float,
184 | ) -> None:
185 | """Plot sides of box."""
186 | ax.vlines(
187 | x=x,
188 | ymin=ymin,
189 | ymax=ymax,
190 | color=color,
191 | linewidth=linewidth,
192 | zorder=3,
193 | capstyle="round",
194 | )
195 |
196 |
197 | def whisker_tops(
198 | *,
199 | ax: plt.Axes, # type: ignore[name-defined]
200 | whisker_top: float,
201 | whisker_bottom: float,
202 | xmin: float,
203 | xmax: float,
204 | color: str,
205 | ) -> None:
206 | """Plot tops of the whiskers."""
207 | ax.hlines(
208 | y=whisker_top,
209 | xmin=xmin,
210 | xmax=xmax,
211 | color=color,
212 | zorder=1,
213 | )
214 | ax.hlines(
215 | y=whisker_bottom,
216 | xmin=xmin,
217 | xmax=xmax,
218 | color=color,
219 | zorder=1,
220 | )
221 |
222 |
223 | def make_single_box(
224 | *,
225 | ax: plt.Axes, # type: ignore[name-defined]
226 | values: list[float],
227 | x_center: float,
228 | scatter_color: str,
229 | linewidth: float = 5,
230 | box_width: float = 0.14,
231 | box_colour: str = "#000000",
232 | whisker_color: str = "#000000",
233 | median_colour: str = "#000000",
234 | outlier_colour: str = "#000000",
235 | ) -> None:
236 | """Add boxplot to given axis."""
237 | plotting_data = pd.Series(values).describe().to_dict()
238 |
239 | # ----------------------------------------------------------------------------------
240 | # create the box - there's not _really_ any reason for this other than being
241 | # curious at the time about creating a boxplot from scratch... it'd be a better
242 | # idea i think to just create a rectangle instead.
243 | boxp_hline(
244 | ax=ax,
245 | x_center=x_center,
246 | y_value=plotting_data["25%"],
247 | box_width=box_width,
248 | linewidth=linewidth,
249 | box_colour=box_colour,
250 | )
251 | boxp_hline(
252 | ax=ax,
253 | x_center=x_center,
254 | y_value=plotting_data["75%"],
255 | box_width=box_width,
256 | linewidth=linewidth,
257 | box_colour=box_colour,
258 | )
259 | boxp_vline(
260 | ax=ax,
261 | x=x_center + box_width * 0.5,
262 | ymin=plotting_data["25%"],
263 | ymax=plotting_data["75%"],
264 | color=box_colour,
265 | linewidth=linewidth,
266 | )
267 | boxp_vline(
268 | ax=ax,
269 | x=x_center - box_width * 0.5,
270 | ymin=plotting_data["25%"],
271 | ymax=plotting_data["75%"],
272 | color=box_colour,
273 | linewidth=linewidth,
274 | )
275 |
276 | # ----------------------------------------------------------------------------------
277 | # create the median line
278 |
279 | ax.hlines(
280 | y=plotting_data["50%"],
281 | xmin=x_center - box_width * 0.5,
282 | xmax=x_center + box_width * 0.5,
283 | color=median_colour,
284 | zorder=1,
285 | linewidth=linewidth,
286 | )
287 |
288 | # ----------------------------------------------------------------------------------
289 | # create top/bottom of whiskers
290 | whisker_bottom, whisker_top = top_bottom_whisker_y_values(values=values)
291 |
292 | # ----------------------------------------------------------------------------------
293 | # plot vertial whisker lines
294 |
295 | # create vertical lines
296 | ax.vlines(
297 | x=x_center,
298 | ymin=plotting_data["75%"],
299 | ymax=whisker_top,
300 | color=whisker_color,
301 | capstyle="round",
302 | )
303 | ax.vlines(
304 | x=x_center,
305 | ymin=plotting_data["25%"],
306 | ymax=whisker_bottom,
307 | color=whisker_color,
308 | capstyle="round",
309 | )
310 |
311 | # ----------------------------------------------------------------------------------
312 | # plot the outliers
313 |
314 | # plot outliers
315 | series = pd.Series(values)
316 | outliers = series[series.lt(whisker_bottom) | series.gt(whisker_top)]
317 | ax.scatter(
318 | x=[x_center for _ in outliers],
319 | y=list(outliers),
320 | color=outlier_colour,
321 | s=5,
322 | alpha=0.8,
323 | edgecolors=None,
324 | )
325 |
326 | # ----------------------------------------------------------------------------------
327 | # plot the scatter of values
328 |
329 | x_values = np_rnd.normal(
330 | loc=x_center + 0.2,
331 | scale=0.03,
332 | size=len(values),
333 | )
334 | ax.scatter(
335 | x=x_values,
336 | y=values,
337 | alpha=0.1,
338 | s=10,
339 | zorder=-1,
340 | color=scatter_color,
341 | edgecolors=None,
342 | )
343 |
344 |
345 | def example(*, df: pd.DataFrame) -> mpl.figure.Figure:
346 | """Generate example plot."""
347 | country_data = plot_data_for_weight_by_country(df=df)
348 | fig, ax = plt.subplots(figsize=(20, 8))
349 |
350 | country_metadata: dict[str, dict[str, str]] = {
351 | "France": {},
352 | "Japan": {},
353 | "Canada": {},
354 | "Germany": {},
355 | "United Kingdom": {},
356 | "Italy": {},
357 | "United States": {},
358 | "Austria": {},
359 | "Belgium": {},
360 | "Other": {"scatter_color": "#919191"},
361 | "All Countries": {"scatter_color": "#919191"},
362 | }
363 | xpos = 1.0
364 | xpos_inc = 0.5
365 |
366 | for country in country_metadata:
367 | data = country_data[country]
368 | color = "#919191" if country in ["Other", "All Countries"] else PINK_COLOUR
369 | make_single_box(
370 | ax=ax,
371 | values=data.scatter,
372 | x_center=xpos,
373 | linewidth=1.5,
374 | scatter_color=color,
375 | outlier_colour="#000000",
376 | )
377 | xpos += xpos_inc
378 |
379 | # ----------------------------------------------------------------------------------
380 | # format tick labels
381 |
382 | ax.set_xticks(np.arange(1, xpos, xpos_inc))
383 | ax.set_xticklabels(list(country_metadata.keys()))
384 |
385 | # ----------------------------------------------------------------------------------
386 | # plot formatting / spines / background.
387 |
388 | ax.tick_params(axis="both", which="both", length=0)
389 | ax.spines["top"].set_visible(False)
390 | ax.spines["right"].set_visible(False)
391 | ax.spines["left"].set_visible(False)
392 | ax.spines["bottom"].set_visible(False)
393 |
394 | ax.grid(alpha=0.15, axis="y", zorder=0)
395 |
396 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
397 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
398 |
399 | # ----------------------------------------------------------------------------------
400 | # titles and axis labels
401 | ax.set_title(
402 | "This is something about pumpkin competitions or something like that.",
403 | color="#919191",
404 | fontsize=LABEL_FONTSIZE,
405 | )
406 | fig.suptitle(
407 | "Data Visualization of Competitive Pumpkin Sport 2013-2021",
408 | fontsize=20,
409 | )
410 | ax.set_ylabel("Weight lbs", fontsize=LABEL_FONTSIZE)
411 | ax.yaxis.set_label_coords(-0.05, 0.5)
412 | return fig
413 |
414 |
415 | def main() -> mpl.figure.Figure:
416 | """Main."""
417 | df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet")
418 | df = clean(df=df)
419 |
420 | with plt.rc_context(
421 | {
422 | "xtick.major.pad": 10,
423 | "font.family": "monospace",
424 | },
425 | ):
426 | return example(df=df)
427 |
428 |
429 | if __name__ == "__main__":
430 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
431 | save_plot_output.save_plot(fig=main(), file=__file__)
432 | raise SystemExit
433 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/default_plot/__init__.py:
--------------------------------------------------------------------------------
1 | """Default plot."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/default_plot/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """Default for plotting example - just to base others off."""
3 |
4 | from __future__ import annotations
5 |
6 | import pathlib
7 |
8 | import matplotlib as mpl
9 | import matplotlib.pyplot as plt
10 | import pandas as pd
11 |
12 | from plotting_examples import dvc_entry, save_plot_output
13 | from plotting_examples.y2022 import metadata
14 |
15 |
16 | def get_sample_data() -> pd.DataFrame:
17 | """Sample data."""
18 | return pd.DataFrame(
19 | {
20 | "x": [1, 2, 3, 4, 5],
21 | "y": [1, 2, 2, 3, 8],
22 | },
23 | )
24 |
25 |
26 | def main() -> mpl.figure.Figure:
27 | """Main."""
28 | with plt.rc_context(
29 | {
30 | "xtick.major.pad": 10,
31 | "font.family": "monospace",
32 | },
33 | ):
34 | fig, ax = plt.subplots(
35 | figsize=(10, 10),
36 | )
37 | df = get_sample_data()
38 |
39 | ax.scatter(x=df["x"], y=df["y"])
40 | ax.set_title("Default plotting.")
41 |
42 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
43 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
44 | return fig
45 |
46 |
47 | if __name__ == "__main__":
48 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
49 | save_plot_output.save_plot(fig=main(), file=__file__)
50 | raise SystemExit
51 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/histogram_with_two_variables/__init__.py:
--------------------------------------------------------------------------------
1 | """Histogram with overlap."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/histogram_with_two_variables/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Histogram created from scratch using matplotlib.
4 |
5 | There are custom bar's created for each bin, instead of using ax.bar, I think it was
6 | originally based on something but i can't find the original / reference now so am just
7 | left with this.
8 |
9 | The result is pretty rubbish :)
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import pathlib
15 |
16 | import matplotlib as mpl
17 | import matplotlib.pyplot as plt
18 | import numpy as np
19 | import pandas as pd
20 | from matplotlib import patches, ticker
21 |
22 | from plotting_examples import dvc_entry, save_plot_output
23 | from plotting_examples.y2022 import metadata
24 |
25 |
26 | def gen_data() -> tuple[pd.DataFrame, dict[str, str]]:
27 | """
28 | Generate sample data for plotting.
29 |
30 | Return data as:
31 |
32 | >>> male female row_min row_max color pain_scale
33 | >>> 0 6.8 0.8 0.8 6.8 #9A7AA0 1
34 | >>> 1 10.7 1.0 1.0 10.7 #9A7AA0 2
35 | >>> 2 14.8 4.3 4.3 14.8 #9A7AA0 3
36 | >>> 3 18.9 10.5 10.5 18.9 #9A7AA0 4
37 | >>> 4 19.3 14.0 14.0 19.3 #9A7AA0 5
38 | >>> 5 16.9 19.9 16.9 19.9 #B4EDD2 6
39 | >>> 6 6.8 16.6 6.8 16.6 #B4EDD2 7
40 | >>> 7 3.9 16.2 3.9 16.2 #B4EDD2 8
41 | >>> 8 1.3 9.3 1.3 9.3 #B4EDD2 9
42 | >>> 9 0.6 7.4 0.6 7.4 #B4EDD2 10
43 |
44 | """
45 | rng = np.random.default_rng(1)
46 | n = 1_000
47 | df = pd.DataFrame(
48 | {
49 | "male": np.digitize(
50 | np.clip(rng.normal(loc=4, scale=2, size=n), 0, 10),
51 | range(10),
52 | ),
53 | "female": np.digitize(
54 | np.clip(rng.normal(loc=6, scale=2, size=n), 0, 10),
55 | range(10),
56 | ),
57 | },
58 | )
59 | # https://coolors.co/b4edd2-a0cfd3-8d94ba-9a7aa0-87677b
60 | colour_map = {
61 | "male": metadata.color.PINK_COLOUR,
62 | "female": metadata.color.LIGHT_GREEN,
63 | }
64 |
65 | # https://coolors.co/b4edd2-a0cfd3-8d94ba-9a7aa0-87677b
66 | plot_data = (
67 | df.apply(lambda x: x.value_counts(normalize=True).mul(100))
68 | .assign(
69 | row_min=lambda df: df.apply(lambda dt: min(dt.to_list()), axis=1),
70 | row_max=lambda df: df.apply(lambda dt: max(dt.to_list()), axis=1),
71 | # want to use this to determine colours
72 | color=lambda df: df.idxmax(axis=1).map(colour_map),
73 | pain_scale=lambda df: df.index,
74 | )
75 | .reset_index(drop=True)
76 | )
77 | return plot_data, colour_map
78 |
79 |
80 | def main() -> mpl.figure.Figure:
81 | """Create plot."""
82 | plot_data, colour_map = gen_data()
83 |
84 | plt.style.use("./plotting_examples/rc.mplstyle")
85 |
86 | with plt.rc_context(
87 | {
88 | "xtick.major.pad": 10,
89 | "font.family": "monospace",
90 | },
91 | ):
92 | fig, ax = plt.subplots(figsize=(15, 5))
93 |
94 | # ensure that axis area covers data.
95 | ax.set_xlim(left=0, right=11)
96 | ax.set_ylim(
97 | bottom=0,
98 | top=plot_data["row_max"].max() + 5,
99 | )
100 |
101 | def add_bar(
102 | ax: plt.Axes, # type: ignore[name-defined]
103 | x: int,
104 | y1: float,
105 | y2: float,
106 | facecolor: str,
107 | alpha: float,
108 | outline: bool,
109 | ) -> None:
110 | """Add a bar to the given ax object."""
111 | width = 1
112 | rect = patches.Rectangle(
113 | xy=(x - 0.5 * width, y1),
114 | width=width,
115 | height=y2,
116 | linewidth=1,
117 | edgecolor="none",
118 | facecolor=facecolor,
119 | alpha=alpha,
120 | )
121 | ax.add_patch(rect)
122 | if outline:
123 | ax.hlines(
124 | y=y2,
125 | xmin=x - 0.5 * width,
126 | xmax=x + 0.5 * width,
127 | )
128 |
129 | for row in plot_data.itertuples():
130 | # plot the diffs
131 | add_bar(
132 | ax=ax,
133 | x=row.pain_scale,
134 | y1=row.row_min,
135 | y2=(row.row_max - row.row_min),
136 | facecolor=row.color,
137 | alpha=0.8,
138 | outline=False,
139 | )
140 | # plot beneath the diffs
141 | add_bar(
142 | ax=ax,
143 | x=row.pain_scale,
144 | y1=0,
145 | y2=row.row_min,
146 | facecolor=metadata.color.GREY,
147 | alpha=0.2,
148 | outline=False,
149 | )
150 |
151 | ax.spines["right"].set_visible(False)
152 | ax.spines["top"].set_visible(False)
153 |
154 | label_fontsize = 15
155 | ax.set_ylabel("Percentage of respondents", fontsize=label_fontsize)
156 | ax.set_xlabel(
157 | "Some scale (1 least, 10 greatest)",
158 | fontsize=label_fontsize,
159 | )
160 | ax.set_title(
161 | "Reporting of something for male, female respondents",
162 | fontsize=20,
163 | )
164 |
165 | legend_elements = [
166 | patches.Patch(
167 | facecolor=colour_map["male"],
168 | edgecolor="none",
169 | label="male",
170 | ),
171 | patches.Patch(
172 | facecolor=colour_map["female"],
173 | edgecolor="none",
174 | label="female",
175 | ),
176 | ]
177 | ax.legend(
178 | handles=legend_elements,
179 | frameon=False,
180 | fontsize=15,
181 | )
182 |
183 | ax.yaxis.set_major_formatter(ticker.FormatStrFormatter("%d%%"))
184 | ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
185 |
186 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
187 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
188 |
189 | return fig
190 |
191 |
192 | if __name__ == "__main__":
193 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
194 | save_plot_output.save_plot(fig=main(), file=__file__)
195 | raise SystemExit
196 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/line_plot_fill_between/__init__.py:
--------------------------------------------------------------------------------
1 | """Line plot with fill."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/line_plot_fill_between/data.csv:
--------------------------------------------------------------------------------
1 | year,month,day,amount
2 | 2022,1,10,70
3 | 2022,1,11,-15
4 | 2022,1,11,30
5 | 2022,1,11,-40
6 | 2022,1,11,30
7 | 2022,1,13,-35
8 | 2022,1,14,-20
9 | 2022,1,14,15
10 | 2022,1,17,-10
11 | 2022,1,17,-5
12 | 2022,1,18,-15
13 | 2022,1,18,-25
14 | 2022,1,18,15
15 | 2022,1,19,-10
16 | 2022,1,19,60
17 | 2022,1,20,-20
18 | 2022,1,20,-30
19 | 2022,1,21,-15
20 | 2022,1,21,30
21 | 2022,1,25,-10
22 | 2022,1,26,-10
23 | 2022,1,26,10
24 | 2022,1,27,25
25 | 2022,1,27,80
26 | 2022,1,28,-15
27 | 2022,1,28,-15
28 | 2022,1,28,-20
29 | 2022,1,31,-10
30 | 2022,2,1,-15
31 | 2022,2,3,-10
32 | 2022,2,4,10
33 | 2022,2,7,-10
34 | 2022,2,7,-50
35 | 2022,2,7,20
36 | 2022,2,8,-5
37 | 2022,2,8,-45
38 | 2022,2,8,45
39 | 2022,2,9,-30
40 | 2022,2,10,50
41 | 2022,2,10,-40
42 | 2022,2,10,30
43 | 2022,2,11,-5
44 | 2022,2,11,-10
45 | 2022,2,14,10
46 | 2022,2,14,20
47 | 2022,2,15,-25
48 | 2022,2,15,90
49 | 2022,2,25,-15
50 | 2022,3,1,-15
51 | 2022,3,1,-70
52 | 2022,3,1,30
53 | 2022,3,3,-5
54 | 2022,3,4,45
55 | 2022,3,4,-65
56 | 2022,3,8,10
57 | 2022,3,8,15
58 | 2022,3,9,10
59 | 2022,3,10,30
60 | 2022,3,10,20
61 | 2022,3,11,10
62 | 2022,3,14,30
63 | 2022,3,15,-30
64 | 2022,3,15,-30
65 | 2022,3,16,-15
66 | 2022,3,16,15
67 | 2022,3,16,50
68 | 2022,3,17,-30
69 | 2022,3,17,-40
70 | 2022,3,17,30
71 | 2022,3,18,60
72 | 2022,3,18,60
73 | 2022,3,18,-50
74 | 2022,3,21,-15
75 | 2022,3,22,30
76 | 2022,3,22,-10
77 | 2022,3,23,-40
78 | 2022,3,24,160
79 | 2022,3,25,15
80 | 2022,3,25,120
81 | 2022,4,12,-15
82 | 2022,4,13,50
83 | 2022,4,14,-20
84 | 2022,4,14,300
85 | 2022,4,15,400
86 | 2022,4,16,480
87 | 2022,4,19,100
88 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/line_plot_fill_between/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Visualise time tracking, how much over/under time.
4 |
5 | Mainly serves as an example of plotting with dates, and filling above / below
6 | particular values on a plot.
7 |
8 | Example of:
9 |
10 | - plotting with dates
11 | - different fonts
12 | - filling between lines
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | import pathlib
18 |
19 | import matplotlib as mpl
20 | import matplotlib.dates as mdates
21 | import matplotlib.pyplot as plt
22 | import pandas as pd
23 |
24 | from plotting_examples import dvc_entry, save_plot_output
25 | from plotting_examples.y2022 import metadata
26 |
27 | PINK_COLOUR = "#ff69b4"
28 |
29 |
30 | def main() -> mpl.figure.Figure:
31 | """Main."""
32 | with plt.rc_context(
33 | {
34 | "xtick.major.pad": 10,
35 | "font.family": "monospace",
36 | },
37 | ):
38 | fig, ax = plt.subplots(
39 | figsize=(15, 5),
40 | ncols=1,
41 | nrows=1,
42 | sharey=True,
43 | constrained_layout=False,
44 | )
45 |
46 | df = (
47 | pd.read_csv(
48 | "./plotting_examples/y2022/line_plot_fill_between/data.csv",
49 | )
50 | .rename(columns=lambda x: x.lower().replace(" ", "_").strip())
51 | .assign(y=lambda df: df["amount"])
52 | )
53 |
54 | # Create date column from separate columns in sheet data.
55 | df["date"] = pd.to_datetime(
56 | df["day"].astype(str)
57 | + "/"
58 | + df["month"].astype(str)
59 | + "/"
60 | + df["year"].astype(str),
61 | format="%d/%m/%Y",
62 | )
63 |
64 | # Interested in the cumulative sum either way.
65 | df["y_cumsum"] = df["y"].cumsum()
66 |
67 | # For creating the plot title.
68 | date_min = df["date"].min().date().strftime("%d/%m/%Y")
69 | date_max = df["date"].max().date().strftime("%d/%m/%Y")
70 |
71 | # highlight break.
72 | up_to_break = df["month"].le(3) & df["day"].le(28)
73 | past_break = df["month"].ge(4) & df["day"].ge(11)
74 |
75 | fig, ax = plt.subplots(figsize=(25, 15))
76 |
77 | # before break
78 | ax.plot(
79 | df.loc[up_to_break, "date"],
80 | df.loc[up_to_break, "y_cumsum"],
81 | color="black",
82 | linewidth=2,
83 | )
84 | # after break
85 | ax.plot(
86 | df.loc[past_break, "date"],
87 | df.loc[past_break, "y_cumsum"],
88 | color="black",
89 | linewidth=2,
90 | )
91 |
92 | # Put black points on values which were over 60.
93 | ax.scatter(
94 | x=df.loc[df["y"].gt(60), "date"],
95 | y=df.loc[df["y"].gt(60), "y_cumsum"],
96 | s=100,
97 | color="black",
98 | zorder=3,
99 | )
100 |
101 | ax.xaxis.set_major_locator(mdates.DayLocator(interval=1)) # type: ignore[no-untyped-call]
102 | ax.grid(alpha=0.15)
103 |
104 | # labels
105 | ax.set_ylabel(
106 | "Units over/under",
107 | fontsize=15,
108 | )
109 | ax.set_title(
110 | f"Information about something useful, from {date_min} to {date_max}",
111 | fontsize=25,
112 | )
113 |
114 | # Text
115 | ax.text(
116 | x=df["date"].to_list()[2],
117 | y=1500,
118 | s=(
119 | "Shows information about something for some time which was interesting."
120 | " \nBlack points indicate something of particular note."
121 | ),
122 | fontsize=25,
123 | )
124 |
125 | # Color beneath plot based on whether it's over or under 0.
126 | # Before holiday.
127 | ax.fill_between(
128 | df.loc[up_to_break, "date"],
129 | 0,
130 | df.loc[up_to_break, "y_cumsum"],
131 | alpha=0.5,
132 | color=metadata.color.PINK_COLOUR,
133 | where=df.loc[up_to_break, "y_cumsum"] >= 0,
134 | )
135 | ax.fill_between(
136 | df.loc[up_to_break, "date"],
137 | 0,
138 | df.loc[up_to_break, "y_cumsum"],
139 | alpha=0.5,
140 | color=metadata.color.GREY,
141 | where=df.loc[up_to_break, "y_cumsum"] <= 0,
142 | )
143 |
144 | # Past holiday
145 | ax.fill_between(
146 | df.loc[past_break, "date"],
147 | 0,
148 | df.loc[past_break, "y_cumsum"],
149 | alpha=0.5,
150 | color=metadata.color.PINK_COLOUR,
151 | where=df.loc[past_break, "y_cumsum"] >= 0,
152 | )
153 | ax.fill_between(
154 | df.loc[past_break, "date"],
155 | 0,
156 | df.loc[past_break, "y_cumsum"],
157 | alpha=0.5,
158 | color=metadata.color.GREY,
159 | where=df.loc[past_break, "y_cumsum"] <= 0,
160 | )
161 |
162 | # Format default axis to just show the month/day.
163 | ax.xaxis.set_major_locator(mdates.DayLocator(interval=1)) # type: ignore[no-untyped-call]
164 | ax.xaxis.set_major_formatter(mdates.DateFormatter("%d/%m")) # type: ignore[no-untyped-call]
165 |
166 | for label in ax.get_xticklabels():
167 | label.set_rotation(80)
168 | label.set_ha("center")
169 |
170 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
171 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
172 |
173 | fig.set_tight_layout(True) # type: ignore[attr-defined]
174 | return fig
175 |
176 |
177 | if __name__ == "__main__":
178 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
179 | save_plot_output.save_plot(fig=main(), file=__file__)
180 | raise SystemExit
181 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/meaningless_points/__init__.py:
--------------------------------------------------------------------------------
1 | """Random viz."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/meaningless_points/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Some random points.
4 |
5 | No real meaning to this - was messing about with some bokeh style bits (the effect, not
6 | the python library), so dumping here. Not sure I'm mad on the output - it's also slow
7 | as hell.
8 | """
9 |
10 | from __future__ import annotations
11 |
12 | import itertools
13 | import pathlib
14 |
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import numpy as np
18 |
19 | from plotting_examples import dvc_entry, save_plot_output
20 | from plotting_examples.y2022 import metadata
21 |
22 | np_rnd = np.random.Generator(np.random.MT19937())
23 |
24 |
25 | def main() -> mpl.figure.Figure:
26 | """Main."""
27 | fig, ax = plt.subplots(figsize=(10, 10))
28 | ax.set_facecolor("black")
29 |
30 | def make_point(
31 | *,
32 | x: float,
33 | y: float,
34 | con_min: int = 10,
35 | con_max: int = 10_000,
36 | num_cont: int = 20,
37 | alpha_mult: float = 0.2,
38 | color: str = "black",
39 | ) -> None:
40 | concentric = np.flip(np.linspace(con_min, con_max, num=num_cont))
41 | alphas = np.flip(concentric / con_max) * alpha_mult
42 | for con, alph in zip(concentric, alphas):
43 | ax.scatter(
44 | x=x,
45 | y=y,
46 | color=color,
47 | s=con,
48 | alpha=alph,
49 | )
50 |
51 | colors = itertools.cycle(
52 | [
53 | metadata.color.PINK_COLOUR,
54 | metadata.color.LIGHT_GREEN,
55 | metadata.color.BLUE,
56 | metadata.color.DEEPER_GREEN,
57 | ],
58 | )
59 |
60 | plot_params = [
61 | # size, alpha_mult, con_max, num_cont
62 | (2, 0.3, 8_00, 50),
63 | (2, 0.35, 2_00, 5),
64 | (5, 0.05, 5_00, 9),
65 | (4, 0.15, 5_00, 9),
66 | (5, 0.1, 2_000, 50),
67 | (3, 0.1, 3_000, 50),
68 | (2, 0.1, 6_000, 50),
69 | (2, 0.09, 5_000, 50),
70 | (5, 0.008, 15_000, 150),
71 | (3, 0.08, 2000, 20),
72 | ]
73 | rng = np.random.default_rng(2)
74 |
75 | for size, alpha_mult, con_max, num_cont in plot_params:
76 | xs = rng.random(size=size)
77 | ys = xs + rng.random(size=size)
78 | for x, y in zip(xs, ys):
79 | color = next(colors)
80 | make_point(
81 | x=x,
82 | y=y,
83 | color=color,
84 | alpha_mult=alpha_mult,
85 | con_max=con_max,
86 | num_cont=num_cont,
87 | )
88 |
89 | ax.set_xticks([])
90 | ax.set_xticks([], minor=True)
91 | ax.set_yticks([])
92 | ax.set_yticks([], minor=True)
93 |
94 | fig.tight_layout()
95 |
96 | fig.patch.set_facecolor("black")
97 |
98 | return fig
99 |
100 |
101 | if __name__ == "__main__":
102 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
103 | save_plot_output.save_plot(fig=main(), file=__file__)
104 | raise SystemExit
105 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/metadata.py:
--------------------------------------------------------------------------------
1 | """
2 | Metadata for plotting.
3 |
4 | I probably could / should use an rc params file for some of this stuff instead of
5 | calling from here.
6 | """
7 |
8 | from __future__ import annotations
9 |
10 | from dataclasses import dataclass
11 |
12 |
13 | @dataclass
14 | class Colors:
15 |
16 | """
17 | Colors.
18 |
19 | https://mycolor.space/?hex=%23FF69B4&sub=1
20 | """
21 |
22 | PINK_COLOUR = "#ff69b4"
23 | BACKGROUND_COLOUR = "#f2f2f2"
24 | GREY = "#919191"
25 | BLUE = "#007FCB"
26 | LIGHT_GREEN = "#B4EDD2"
27 | DEEPER_GREEN = "#51B9BE"
28 | BROWNY_RED = "#554149"
29 | PURPLEY = "#8F6E9B"
30 | TAN = "#DDD7C6"
31 |
32 |
33 | color = Colors()
34 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/__init__.py:
--------------------------------------------------------------------------------
1 | """Styled bar chart."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/opinium.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/opinium_barchart/opinium.png
--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/opinium_barchart_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/opinium_barchart/opinium_barchart_example.png
--------------------------------------------------------------------------------
/plotting_examples/y2022/opinium_barchart/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Bar chart style copied from Opinium.
4 |
5 | Saw this on twitter (i think) and thought I'd recreate it in mpl.
6 | """
7 |
8 | from __future__ import annotations
9 |
10 | import pathlib
11 |
12 | import matplotlib as mpl
13 | import matplotlib.image as mpimg
14 | import matplotlib.pyplot as plt
15 |
16 | from plotting_examples import dvc_entry, save_plot_output
17 | from plotting_examples.y2022 import metadata
18 |
19 |
20 | def main() -> mpl.figure.Figure:
21 | """Main."""
22 | with plt.rc_context(
23 | {
24 | "xtick.major.pad": 10,
25 | "font.family": "monospace",
26 | },
27 | ):
28 | data = {
29 | "Trump": -63,
30 | "Johnson": -11,
31 | "O'Neill": 3,
32 | "Foster": 9,
33 | "Khan": 16,
34 | "Starmer": 18,
35 | "Sturgeon": 34,
36 | "Drakeford": 34,
37 | }
38 |
39 | fig, ax = plt.subplots(figsize=(15, 7))
40 |
41 | # trying to setup as many variables as possible here - though there are still
42 | # some magic values
43 |
44 | min(list(data.values()))
45 | max_val = max(list(data.values()))
46 |
47 | line_width = 20
48 | start_offset = line_width * 0.08
49 | percentage_label_shift = 3
50 | positive_bar_color = metadata.color.DEEPER_GREEN
51 | negative_bar_color = metadata.color.PINK_COLOUR
52 | font_size = 12
53 | source_fontsize = 8
54 | footnote_location = (0, -0.3)
55 |
56 | # Johnson here is the value which is used as it's not the most negative, but is
57 | # negative. Really, this is just what kinda looked ok, with different data
58 | # there would likely have to be pretty different approaches to all of this i
59 | # think
60 | grey_bar_left_x = data["Johnson"]
61 |
62 | # shading every other bar a bit
63 | for bar_i, (name, y_val_) in enumerate(zip(data, range(8))):
64 | y_val = y_val_ * 2
65 | x_val = data[name]
66 | x_loc = 20
67 | direction = 1
68 | left_adjust = 0
69 | if x_val > 0:
70 | left_adjust = 9
71 | direction *= -1
72 | bar_color = positive_bar_color
73 | sign = "+"
74 | sign_align = "left"
75 | else:
76 | left_adjust = -15
77 | bar_color = negative_bar_color
78 | sign = ""
79 | sign_align = "right"
80 |
81 | ax.plot(
82 | [start_offset * -direction, data[name]],
83 | [y_val, y_val],
84 | linewidth=line_width,
85 | c=bar_color,
86 | )
87 | ax.text(
88 | x=(x_loc * direction) + left_adjust,
89 | y=y_val,
90 | s=name,
91 | horizontalalignment="left",
92 | verticalalignment="center",
93 | fontsize=font_size,
94 | )
95 | ax.text(
96 | x=data[name] + -direction * percentage_label_shift,
97 | y=y_val,
98 | s=f"{sign}{data[name]}",
99 | verticalalignment="center",
100 | horizontalalignment=sign_align,
101 | fontsize=font_size,
102 | )
103 |
104 | if bar_i % 2 == 1:
105 | ax.plot(
106 | [grey_bar_left_x, max_val + 20],
107 | [y_val, y_val],
108 | linewidth=line_width,
109 | c="#a0a0a0",
110 | alpha=0.07,
111 | zorder=0,
112 | )
113 |
114 | _ = [ax.spines[s].set_visible(False) for s in ax.spines]
115 | _ = ax.xaxis.set_ticklabels([])
116 | _ = ax.yaxis.set_ticklabels([])
117 | _ = ax.tick_params(axis="both", length=0)
118 |
119 | title_y = 1.2
120 | title_x = 0.45
121 |
122 | # Title
123 | ax.text(
124 | x=title_x,
125 | y=title_y,
126 | s="Level of Trust in information \nprovided on Coronavirus",
127 | transform=ax.transAxes,
128 | fontsize=20,
129 | horizontalalignment="left",
130 | )
131 |
132 | # subtitle
133 | _ = ax.text(
134 | x=title_x,
135 | y=title_y - 0.11,
136 | s=(
137 | "Net Level of Trust in providing of information by party leaders\non"
138 | " Coronavirus"
139 | ),
140 | transform=ax.transAxes,
141 | c="#717171",
142 | )
143 |
144 | # add rectangle
145 | rect = mpl.patches.Rectangle(
146 | (title_x - 0.015, title_y - 0.11),
147 | width=0.01,
148 | height=0.25,
149 | color=positive_bar_color,
150 | transform=ax.transAxes,
151 | clip_on=False,
152 | )
153 | ax.add_patch(rect)
154 |
155 | # source of data
156 | _ = ax.text(
157 | x=footnote_location[0],
158 | y=footnote_location[1],
159 | s=(
160 | "https://www.opinium.com/wp-content/uploads/2020/06/"
161 | "VI-26-06-2020-Observer-Data-Tables.xlsx"
162 | ),
163 | transform=ax.transAxes,
164 | fontsize=source_fontsize,
165 | )
166 |
167 | # add company logo to plot
168 | image = mpimg.imread(
169 | pathlib.Path(__file__).parent / "opinium.png",
170 | format="png",
171 | )
172 | img_y = ax.bbox.ymin
173 |
174 | ax.text(
175 | x=ax.bbox.xmax + 400,
176 | y=img_y + 20,
177 | s="* Sample size: 2001\n25-26th June\nOpinium.co.uk",
178 | transform=None,
179 | verticalalignment="top",
180 | )
181 |
182 | fig.figimage(
183 | image,
184 | ax.bbox.xmax + 659,
185 | 0,
186 | origin="upper",
187 | )
188 | ax.axvline(0, linewidth=0.1, alpha=0.9, color="#212121")
189 |
190 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
191 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
192 |
193 | return fig
194 |
195 |
196 | if __name__ == "__main__":
197 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
198 | save_plot_output.save_plot(fig=main(), file=__file__)
199 | raise SystemExit
200 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/pandas_stacked_bars_with_values/__init__.py:
--------------------------------------------------------------------------------
1 | """Create stacked bar plot with pandas."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/pandas_stacked_bars_with_values/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Horizontal stacked bars, based off of pandas.
4 |
5 | Could do these from scratch - pandas makes things a bit more straightforward though.
6 |
7 | Example of:
8 |
9 | - fixed formatting - setting categorical ticks at particular positions.
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import io
15 | import pathlib
16 |
17 | import matplotlib as mpl
18 | import matplotlib.pyplot as plt
19 | import pandas as pd
20 | from matplotlib.lines import Line2D
21 |
22 | from plotting_examples import dvc_entry, save_plot_output
23 | from plotting_examples.y2022 import metadata
24 |
25 |
26 | def sample_data() -> tuple[pd.DataFrame, dict[int, dict[str, str]]]:
27 | """Generate sample data."""
28 | csv = """\
29 | Column A,Column B,Column C,Column D
30 | 22.9,21.4,26.6,27.1
31 | 40.0,28.9,38.1,40.9
32 | 20.9,22.0,18.7,15.3
33 | 10.5,18.9,8.5,8.4
34 | 5.7,8.8,8.1,8.3
35 | """
36 | df_plot: pd.DataFrame = pd.read_csv(io.StringIO(csv))
37 | index_labels = {
38 | 0: "Something",
39 | 1: "Another",
40 | 2: "This Thing",
41 | 3: "Thai Food",
42 | 4: "Finally",
43 | }
44 | index_colours = {
45 | 0: metadata.color.TAN,
46 | 1: metadata.color.DEEPER_GREEN,
47 | 2: metadata.color.PINK_COLOUR,
48 | 3: metadata.color.BLUE,
49 | 4: metadata.color.PURPLEY,
50 | }
51 |
52 | plot_metadata = {}
53 | for x in index_labels:
54 | plot_metadata[x] = {
55 | "colour": index_colours[x],
56 | "label": index_labels[x],
57 | }
58 |
59 | # Plot metadata has this form:
60 | # >>> {
61 | # >>> 0: {"colour": "red", "label": "Something"},
62 | # >>> 1: {"colour": "grey", "label": "Another"},
63 | # >>> 2: {"colour": "pink", "label": "This Thing"},
64 | # >>> 3: {"colour": "blue", "label": "Thai Food"},
65 | # >>> 4: {"colour": "green", "label": "Finally"},
66 | # >>> }
67 |
68 | return df_plot, plot_metadata
69 |
70 |
71 | def main() -> mpl.figure.Figure:
72 | """Main."""
73 | df_plot, plot_metadata = sample_data()
74 |
75 | # Reverse columns as want to plot A as first bar.
76 | df_plot = df_plot.loc[:, df_plot.columns[::-1]]
77 |
78 | # If you want to rename the axis y-labels it's easiest to just rename them in the
79 | # dataframe columns.
80 |
81 | with plt.rc_context(
82 | {
83 | "xtick.major.pad": 10,
84 | "font.family": "monospace",
85 | },
86 | ):
87 | fig, ax = plt.subplots(
88 | figsize=(15, 5),
89 | ncols=1,
90 | nrows=1,
91 | sharey=True,
92 | constrained_layout=False,
93 | )
94 |
95 | df_plot.T.plot.barh(
96 | stacked=True,
97 | ax=ax,
98 | color=[value["colour"] for value in plot_metadata.values()],
99 | )
100 |
101 | handles = [
102 | Line2D(
103 | [0],
104 | [0],
105 | color=value["colour"],
106 | label=value["label"],
107 | markersize=12,
108 | linewidth=7,
109 | )
110 | for value in plot_metadata.values()
111 | ]
112 |
113 | ax.legend(
114 | handles=handles,
115 | frameon=False,
116 | ncol=1,
117 | bbox_to_anchor=(1.01, 0.7),
118 | fontsize=12,
119 | )
120 |
121 | ax.set_title("This Is A Title", fontsize=20, y=1.05)
122 | ax.set_xlabel("%", fontsize=15)
123 | ax.grid(linewidth=0.2)
124 | ax.set_axisbelow(True)
125 |
126 | # Iterate over the data values, and patches of the axis, and plot the data
127 | # value over the relevant patch.
128 | data_matrix = df_plot.to_numpy().flatten()
129 |
130 | min_bar_size = 3
131 | for i, patch in enumerate(ax.patches):
132 | width = patch.get_width()
133 | height = patch.get_height()
134 | x, y = patch.get_xy()
135 | data_i = data_matrix[i] if data_matrix[i] >= min_bar_size else "-"
136 | ax.annotate(
137 | f"{data_i}",
138 | (x + width * 0.5, y + height * 0.5),
139 | ha="center",
140 | va="center",
141 | fontsize=12,
142 | )
143 |
144 | _ = [ax.spines[x].set_visible(False) for x in ax.spines]
145 |
146 | loc = mpl.ticker.MultipleLocator(base=5.0)
147 | ax.set_xlim(0, 100)
148 | ax.xaxis.set_major_locator(loc)
149 |
150 | fig.set_tight_layout(True) # type: ignore[attr-defined]
151 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
152 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
153 | return fig
154 |
155 |
156 | if __name__ == "__main__":
157 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
158 |
159 | save_plot_output.save_plot(fig=main(), file=__file__)
160 | raise SystemExit
161 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/pos_neg_split_hbar/__init__.py:
--------------------------------------------------------------------------------
1 | """Create pos/neg hbar."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/pos_neg_split_hbar/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Create split horizontal bar chart.
4 |
5 | Split by dichotomous variable, with bar classifications.
6 |
7 | Can be a bit messy - not sure I'm much of a fan - but wanted to re-create anyway.
8 | """
9 |
10 | from __future__ import annotations
11 |
12 | import io
13 | import pathlib
14 |
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import matplotlib.ticker as plt_ticker
18 | import pandas as pd
19 |
20 | from plotting_examples import dvc_entry, save_plot_output
21 | from plotting_examples.y2022 import metadata
22 |
23 | # This the dichotomy - could be anything though, eg good/bad, old/young or whatever.
24 | LEVEL_0 = "good"
25 | LEVEL_1 = "bad"
26 |
27 |
28 | def sample_data() -> tuple[pd.DataFrame, dict[int, str], dict[str, str]]:
29 | """
30 | Return sample dataframe.
31 |
32 | Dogs are taken from here : https://dogtime.com/dog-breeds/profiles
33 | """
34 | df = pd.read_csv(
35 | io.StringIO(
36 | (
37 | f"{LEVEL_0},{LEVEL_1},nr,{LEVEL_0}_colour,{LEVEL_1}_colour,meaning\n"
38 | "47.303474,51.18364658,1.51287942,med,med,Akita\n"
39 | "34.10226721,44.76493548,21.13279731,med,med,Basset Hound\n"
40 | "12.08045446,69.67354868,18.24599686,low,med,Cavapoo\n"
41 | "60.91476132,22.71988935,16.36534934,med,low,Doberdor\n"
42 | "19.43282773,56.88924657,23.67792571,low,med,Greyhound\n"
43 | "54.05072885,29.96153606,15.98773508,med,low,Irish Terrier\n"
44 | "53.096035,35.37625972,11.52770528,med,med,Poodle\n"
45 | "78.23942162,17.26331569,4.497262699,high,low,Sloughi\n"
46 | "51.68818968,38.14985888,10.16195143,med,med,Whippet\n"
47 | "38.14462181,39.1176673,22.73771089,med,med,Xoloitzcuintli\n"
48 | ),
49 | ),
50 | )
51 | index_to_meaning_map: dict[int, str] = df["meaning"].to_dict()
52 | # high/med/low represent some pretend classifications for this example.
53 | colour_map = {
54 | "high": metadata.color.PINK_COLOUR,
55 | "med": metadata.color.TAN,
56 | "low": metadata.color.LIGHT_GREEN,
57 | }
58 | return df, index_to_meaning_map, colour_map
59 |
60 |
61 | def plot_bar_percentages(df: pd.DataFrame, ax: plt.Axes) -> plt.Axes: # type: ignore[name-defined]
62 | """Plot percentages next to bars."""
63 | # Plot the percentages.
64 | for i, patch in enumerate(ax.patches):
65 | width = patch.get_width()
66 | height = patch.get_height()
67 | x, y = patch.get_xy()
68 | # Shifting is different depending on whether it's a +ve of -ve
69 | val = round(patch.get_width() * 0.01, 2)
70 |
71 | nudge = 8
72 | if i <= df.index.max():
73 | # Printing to the left
74 | ann = f"{-val:.0%}"
75 | ax.annotate(
76 | ann,
77 | ((x + width) - nudge, y + height * 0.5),
78 | ha="center",
79 | va="center",
80 | )
81 | else:
82 | # Printing to the right
83 | ann = f"{val:.0%}"
84 | ax.annotate(
85 | ann,
86 | ((x + width) + nudge, y + height * 0.5),
87 | ha="center",
88 | va="center",
89 | )
90 | return ax
91 |
92 |
93 | def main() -> mpl.figure.Figure:
94 | """Main."""
95 | df, index_to_meaning_map, colour_map = sample_data()
96 |
97 | with plt.rc_context(
98 | {
99 | "xtick.major.pad": 10,
100 | "font.family": "monospace",
101 | },
102 | ):
103 | # Create plot.
104 | fig, ax = plt.subplots(figsize=(15, 6))
105 |
106 | ax.set_axisbelow(True)
107 |
108 | ax.barh(
109 | df.index,
110 | width=-df[LEVEL_0],
111 | height=0.8,
112 | color=df[f"{LEVEL_0}_colour"].map(colour_map),
113 | edgecolor="black",
114 | )
115 | ax.barh(
116 | df.index,
117 | width=df[LEVEL_1],
118 | height=0.8,
119 | color=df[f"{LEVEL_1}_colour"].map(colour_map),
120 | edgecolor="black",
121 | )
122 |
123 | ax = plot_bar_percentages(df=df, ax=ax)
124 |
125 | # remove spines for top/right
126 | ax.spines["top"].set_visible(False)
127 | ax.spines["right"].set_visible(False)
128 |
129 | # Set axis limits
130 | ax.set_ylim(bottom=-1, top=df.index.max() + 1)
131 | ax.set_xlim(left=-109, right=109)
132 |
133 | # Reformat tick frequency for x,y axis
134 | # x
135 | loc = plt_ticker.MultipleLocator(base=10)
136 | ax.xaxis.set_major_locator(loc)
137 | # y
138 | loc = plt_ticker.MultipleLocator(base=1)
139 | ax.yaxis.set_major_locator(loc)
140 |
141 | # Functions for reformatting plot tick values
142 | def x_fmt(x: float, _y: int) -> str:
143 | fmt = f"{int(x)} %"
144 | return fmt.replace("-", "")
145 |
146 | def y_fmt(_x: float, y: int) -> str:
147 | diff = -2
148 | return index_to_meaning_map.get(y + diff, "")
149 |
150 | ax.xaxis.set_major_formatter(plt_ticker.FuncFormatter(x_fmt))
151 | ax.yaxis.set_major_formatter(plt_ticker.FuncFormatter(y_fmt))
152 |
153 | # Plot text for Agree / Disagree
154 | agree_disagree_txt_height = 1.1
155 | ax.text(
156 | 0.48,
157 | agree_disagree_txt_height,
158 | s=LEVEL_0,
159 | transform=ax.transAxes,
160 | ha="right",
161 | fontsize=20,
162 | )
163 | ax.text(
164 | 0.52,
165 | agree_disagree_txt_height,
166 | s=LEVEL_1,
167 | transform=ax.transAxes,
168 | ha="left",
169 | fontsize=20,
170 | )
171 |
172 | for tick in ax.get_xticklabels():
173 | tick.set_rotation(45)
174 |
175 | ax.grid(linewidth=0.2, which="major", axis="y")
176 |
177 | fig.set_tight_layout(True) # type: ignore[attr-defined]
178 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
179 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
180 | return fig
181 |
182 |
183 | if __name__ == "__main__":
184 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
185 | save_plot_output.save_plot(fig=main(), file=__file__)
186 | raise SystemExit
187 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_distributions/__init__.py:
--------------------------------------------------------------------------------
1 | """Create scatter distributions plot."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_distributions/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Distributions of multiple variables.
4 |
5 | For a set of variables, each with an accompanying continuous variable on the same scale,
6 | plot the distributions of the continuous variable. Might be useful to have a kde
7 | overlaid here.
8 |
9 | Example of:
10 |
11 | - fixed formatting
12 | - setting categorical ticks at particular positions.
13 |
14 | """
15 |
16 | from __future__ import annotations
17 |
18 | import itertools
19 | import pathlib
20 | import re
21 | import textwrap
22 | from typing import TYPE_CHECKING, Any, cast
23 |
24 | import matplotlib as mpl
25 | import matplotlib.pyplot as plt
26 | import numpy as np
27 | import numpy.typing as npt
28 | import pandas as pd
29 |
30 | from plotting_examples import dvc_entry, save_plot_output
31 | from plotting_examples.y2022 import metadata
32 |
33 | np_rnd = np.random.Generator(np.random.MT19937(2))
34 |
35 |
36 | if TYPE_CHECKING:
37 | from collections.abc import Mapping
38 |
39 |
40 | def sample_data(n_categories: int = 12) -> tuple[pd.DataFrame, dict[int, str]]:
41 | """Generate sample data."""
42 | # random stuff from postgres website.
43 | document = (
44 | "\n"
45 | "PostgreSQL is an object-relational database management system (ORDBMS) based "
46 | "on POSTGRES, Version 4.2, developed at the University of California at "
47 | "Berkeley Computer Science Department. POSTGRES pioneered many concepts that "
48 | "only became available in some commercial database systems much later.\n"
49 | "\n"
50 | "PostgreSQL is an open-source descendant of this original Berkeley code. It "
51 | "supports a large part of the SQL standard and offers many modern features:\n"
52 | "\n"
53 | "complex queries\n"
54 | "foreign keys\n"
55 | "triggers\n"
56 | "updatable views\n"
57 | "transactional integrity\n"
58 | "multiversion concurrency control\n"
59 | "Also, PostgreSQL can be extended by the user in many ways, for example by "
60 | "adding new\n"
61 | )
62 | words = [x for x in re.sub(r"\n|\(|\)", " ", document, flags=re.M).split(" ") if x]
63 |
64 | def rand_string() -> str:
65 | """Random string to represent labelling."""
66 | return " ".join(
67 | np_rnd.choice(words, size=np_rnd.integers(3, 15, size=1)),
68 | ).capitalize()
69 |
70 | def rand_cont() -> npt.NDArray[np.float64]:
71 | # Generates a random bimodal distribution so that it looks roughly similar to
72 | # what we might see from timing data or whatever.
73 | loc_min = 2
74 | loc_max = 7
75 | mode_1_loc = np_rnd.integers(loc_min, loc_max, size=1)[0]
76 | size = np_rnd.integers(10, 250, size=1)[0]
77 | mode_1 = np_rnd.normal(
78 | loc=mode_1_loc,
79 | scale=2,
80 | size=size,
81 | )
82 | # product
83 | direction = 1
84 | if mode_1_loc > loc_max / (loc_max + loc_min):
85 | direction = -1
86 |
87 | def _np_array_to_int(arr: npt.ArrayLike | int) -> int:
88 | """
89 | Convert single element ndarray to int.
90 |
91 | Mainly doing these mode_i checks as I'm updating some code
92 | following packages moving on.
93 | """
94 | if isinstance(arr, np.ndarray):
95 | assert len(arr) == 1
96 | arr = arr[0]
97 | else:
98 | assert isinstance(arr, int | np.int64)
99 | return cast(int, arr)
100 |
101 | mode_1_loc = _np_array_to_int(arr=mode_1_loc)
102 | mode_2_loc = int(mode_1_loc + direction * mode_1_loc * 0.5)
103 | mode_2_loc = _np_array_to_int(arr=mode_2_loc)
104 | size = _np_array_to_int(arr=size)
105 | mode_2_size = int(size * 0.4)
106 | mode_2 = np_rnd.normal(loc=mode_2_loc, scale=2, size=mode_2_size)
107 |
108 | return cast(
109 | npt.NDArray[np.float64],
110 | np.clip(np.concatenate([mode_1, mode_2]), a_min=0, a_max=np.inf),
111 | )
112 |
113 | data_dict: dict[str, list[float]] = {"cat": [], "cont": []}
114 |
115 | for category in range(1, n_categories + 1):
116 | conts = rand_cont()
117 | data_dict["cont"] = data_dict["cont"] + list(conts)
118 | data_dict["cat"].extend(list(np.repeat(category, len(conts))))
119 |
120 | data = pd.DataFrame(data_dict)
121 | labels = {x: rand_string() for x in data["cat"].unique()}
122 |
123 | return data, labels
124 |
125 |
126 | def categorical_scatters(
127 | *,
128 | ax: plt.Axes, # type: ignore[name-defined]
129 | data: pd.DataFrame,
130 | cont_var: str,
131 | cat_var: str,
132 | labels: Mapping[Any, str],
133 | # Used if there are particular colours for particular categories, if they're all
134 | # meant to be the same color then just pass in with the same value for each category
135 | # - they should all still be represented though.
136 | color_map: Mapping[Any, str] | None = None,
137 | ) -> plt.Axes: # type: ignore[name-defined]
138 | """Create plot."""
139 | # Can use this to get alternating colours, i did then went off it.
140 | colors = itertools.cycle(
141 | [metadata.color.PINK_COLOUR, metadata.color.PINK_COLOUR],
142 | )
143 |
144 | y_ticks = []
145 |
146 | for y_val, (g_, dfg) in enumerate(data.groupby([cat_var]), 1):
147 | g = g_[0]
148 | color = next(colors)
149 | color = color_map[g] if color_map else color
150 |
151 | y_values = np.repeat([y_val], len(dfg)) + np_rnd.normal(
152 | loc=0,
153 | scale=0.05,
154 | size=len(dfg),
155 | )
156 | x_values = dfg.loc[dfg[cont_var].ne(88888), cont_var]
157 | ax.scatter(
158 | x=x_values,
159 | y=y_values,
160 | color=color,
161 | alpha=0.3,
162 | )
163 |
164 | y_ticks.append((g, labels[g]))
165 |
166 | ax.grid(alpha=0.1)
167 |
168 | ax.yaxis.set_major_locator(
169 | mpl.ticker.FixedLocator([y_tick[0] for y_tick in y_ticks]),
170 | )
171 | ax.yaxis.set_major_formatter(
172 | mpl.ticker.FixedFormatter(
173 | ["\n".join(textwrap.wrap(y_tick[1], width=30)) for y_tick in y_ticks],
174 | ),
175 | )
176 |
177 | return ax
178 |
179 |
180 | def main() -> mpl.figure.Figure:
181 | """Main."""
182 | data, labels = sample_data()
183 |
184 | cat_var = "cat"
185 | cont_var = "cont"
186 |
187 | # color
188 | color_map = {x: metadata.color.PINK_COLOUR for x in labels}
189 | # Maybe we want to highlight a particular value or whatever idk.
190 | color_map[3] = metadata.color.DEEPER_GREEN
191 |
192 | with plt.rc_context(
193 | {
194 | "xtick.major.pad": 10,
195 | "font.family": "monospace",
196 | },
197 | ):
198 | fig, ax = plt.subplots(
199 | figsize=(20, 20),
200 | ncols=1,
201 | nrows=1,
202 | sharey=True,
203 | constrained_layout=False,
204 | )
205 | ax = categorical_scatters(
206 | data=data,
207 | cont_var=cont_var,
208 | cat_var=cat_var,
209 | labels=labels,
210 | ax=ax,
211 | color_map=color_map,
212 | )
213 | ax.set_title(
214 | "Scatter plot with categorical labels",
215 | fontsize=20,
216 | )
217 |
218 | # axis styling
219 | ax.spines["top"].set_visible(False)
220 | ax.spines["right"].set_visible(False)
221 | ax.spines["left"].set_visible(False)
222 | ax.spines["bottom"].set_visible(False)
223 |
224 | fig.set_tight_layout(True) # type: ignore[attr-defined]
225 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
226 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
227 | return fig
228 |
229 |
230 | if __name__ == "__main__":
231 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
232 | save_plot_output.save_plot(fig=main(), file=__file__)
233 | raise SystemExit
234 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/__init__.py:
--------------------------------------------------------------------------------
1 | """Scatter matrix with sns kde on diagonal."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_matrix_w_kde_on_diag/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Scatter matrix with kde instead of histogram on the diagonal.
4 |
5 | Could probably adapt pd.scatter_matrix instead of doing it from scratch. Though with
6 | this approach the non-diagonal plots could be whatever instead of a scatter plot I
7 | guess...
8 |
9 | Would be good to make the upper diagonals differ from the lower diagonals a bit... maybe
10 | some sort of table from pd.cut on the others or whatever.
11 |
12 | I'd probably just use subplot_mosaic as well now - that's grown on me a lot since this.
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | import itertools
18 | import pathlib
19 |
20 | import matplotlib as mpl
21 | import matplotlib.pyplot as plt
22 | import numpy as np
23 | import seaborn as sns
24 |
25 | from plotting_examples import dvc_entry, save_plot_output
26 | from plotting_examples.y2022 import metadata
27 |
28 | np_rnd = np.random.Generator(np.random.MT19937(1977))
29 |
30 |
31 | def main() -> mpl.figure.Figure:
32 | """Main."""
33 | numvars, numdata = 4, 50
34 |
35 | data = 10 * np_rnd.chisquare(df=4, size=(numvars, numdata))
36 |
37 | names = ["mpg", "disp", "drat", "wt"]
38 |
39 | numvars, numdata = data.shape
40 |
41 | with plt.rc_context(
42 | {
43 | "xtick.major.pad": 10,
44 | "font.family": "monospace",
45 | },
46 | ):
47 | fig, axes = plt.subplots(
48 | nrows=numvars,
49 | ncols=numvars,
50 | figsize=(15, 15),
51 | constrained_layout=True,
52 | )
53 |
54 | for ax in axes.flat:
55 | # Hide all ticks and labels
56 | ax.xaxis.set_visible(False)
57 | ax.yaxis.set_visible(False)
58 |
59 | # Plot the data.
60 | for i, j in zip(*np.triu_indices_from(axes, k=1)):
61 | for x, y in [(i, j), (j, i)]:
62 | axes[x, y].scatter(
63 | data[x],
64 | data[y],
65 | color=metadata.color.PINK_COLOUR,
66 | )
67 | axes[x, y].set_facecolor(metadata.color.BACKGROUND_COLOUR)
68 | axes[x, y].grid(linestyle=":", alpha=0.2)
69 |
70 | # Label the diagonal subplots...
71 | for i, label in enumerate(names):
72 | axes[i, i].annotate(
73 | label,
74 | (0.5, 0.5),
75 | xycoords="axes fraction",
76 | ha="center",
77 | va="center",
78 | fontsize=15,
79 | fontweight="bold",
80 | )
81 |
82 | rotate = 45
83 |
84 | for i, j in itertools.product(range(numvars), range(numvars)):
85 | if i != j:
86 | axes[i, j].xaxis.set_visible(True)
87 | for tick in axes[i, j].get_xticklabels():
88 | tick.set_rotation(rotate)
89 |
90 | # plot the densities on the diagonal
91 | for i, j in zip(range(numvars), range(numvars)):
92 | ax = axes[i, j]
93 | sns.kdeplot(
94 | x=data[i],
95 | ax=ax,
96 | alpha=0.1,
97 | fill=True,
98 | color=metadata.color.PINK_COLOUR,
99 | )
100 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
101 |
102 | for i, j in zip(range(1, numvars), itertools.cycle([0])):
103 | axes[i, j].yaxis.set_visible(True)
104 |
105 | _ = fig.suptitle("Example Scatterplots", fontsize=20)
106 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
107 |
108 | return fig
109 |
110 |
111 | if __name__ == "__main__":
112 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
113 | save_plot_output.save_plot(fig=main(), file=__file__)
114 | raise SystemExit
115 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_w_outlined_text_insert/__init__.py:
--------------------------------------------------------------------------------
1 | """Scatter plot with outlined text."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_w_outlined_text_insert/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/scatter_w_outlined_text_insert/data.parquet
--------------------------------------------------------------------------------
/plotting_examples/y2022/scatter_w_outlined_text_insert/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Scatter plot with text inserted to scatter points.
4 |
5 | Data was taken from a tidy tuesday.
6 |
7 | Example of:
8 |
9 | - Outlining text elements in a plot.
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import pathlib
15 | from typing import TypeVar
16 |
17 | import matplotlib as mpl
18 | import matplotlib.patheffects as pe
19 | import matplotlib.pyplot as plt
20 | import matplotlib.ticker as mtick
21 | import pandas as pd
22 | from matplotlib.dates import DateFormatter, YearLocator
23 |
24 | from plotting_examples import dvc_entry, save_plot_output
25 | from plotting_examples.y2022 import metadata
26 |
27 | T = TypeVar("T")
28 |
29 |
30 | def get_plotting_data() -> pd.DataFrame:
31 | """Plotting dataframe."""
32 | df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet")
33 |
34 | data_list = []
35 | for g_, dfg in df.groupby(["year"]):
36 | g = g_[0]
37 | x = dfg["distributor"]
38 | df_a = x.value_counts().reset_index().assign(year=g)
39 |
40 | df_b = (
41 | x.value_counts(normalize=True)
42 | .reset_index()
43 | .rename(columns={"proportion": "percentage"})
44 | .assign(percentage=lambda x: x["percentage"].mul(100).round(1), year=g)
45 | )
46 |
47 | df_c = pd.merge(df_a, df_b, on=["distributor", "year"])
48 | df_c = df_c.sort_values("count", ascending=False)
49 | top = ["#ff2309"]
50 | other_colour = "#d0d0d0"
51 | n_size = 1
52 | if len(df_c) > n_size:
53 | df_c["colour"] = top + [other_colour for _ in range(len(df_c) - n_size)]
54 | else:
55 | df_c["colour"] = top
56 |
57 | if df_c["colour"].isna().any():
58 | raise ValueError
59 |
60 | data_list.append(df_c)
61 |
62 | plotting_data = pd.concat(data_list)
63 | plotting_data["year"] = pd.to_datetime(plotting_data["year"], format="%Y")
64 |
65 | return plotting_data
66 |
67 |
68 | def main() -> mpl.figure.Figure:
69 | """Main."""
70 | plotting_data = get_plotting_data()
71 |
72 | year_counts = (
73 | plotting_data.groupby("year").size().rename("year_counts").reset_index()
74 | )
75 |
76 | # want to know how many there were each year.
77 | plotting_data = pd.merge(plotting_data, year_counts, on="year")
78 |
79 | with plt.rc_context(
80 | {
81 | "xtick.major.pad": 10,
82 | "font.family": "monospace",
83 | },
84 | ):
85 | fig, ax = plt.subplots(figsize=(40, 15))
86 |
87 | other_colour = "#d0d0d0"
88 |
89 | for _, dfg in plotting_data.groupby("distributor"):
90 | # plot text of distributor.
91 | for _, row in dfg.iterrows():
92 | if row["colour"] == other_colour:
93 | ax.scatter(
94 | x=row["year"],
95 | y=row["percentage"],
96 | alpha=0.2,
97 | s=300,
98 | color=metadata.color.PINK_COLOUR,
99 | zorder=1,
100 | )
101 | else:
102 | ax.scatter(
103 | x=row["year"],
104 | y=row["percentage"],
105 | alpha=1,
106 | s=800,
107 | color=metadata.color.PINK_COLOUR,
108 | zorder=2,
109 | )
110 | ax.text(
111 | x=row["year"],
112 | y=row["percentage"],
113 | s=row["distributor"],
114 | horizontalalignment="center",
115 | verticalalignment="center",
116 | color="black",
117 | size=14,
118 | path_effects=[
119 | pe.withStroke(
120 | linewidth=4,
121 | foreground=metadata.color.PINK_COLOUR,
122 | ),
123 | ],
124 | )
125 |
126 | ax.yaxis.set_major_formatter(mtick.PercentFormatter())
127 | ax.set_title("Top film distributor, 1957 - 2021", fontsize=35, y=1.05)
128 |
129 | for tick in ax.xaxis.get_major_ticks():
130 | tick.label1.set_fontsize(15)
131 |
132 | for tick in ax.yaxis.get_major_ticks():
133 | tick.label1.set_fontsize(15)
134 |
135 | ax.tick_params(axis="both", which="both", length=0)
136 |
137 | ax.spines["top"].set_visible(False)
138 | ax.spines["right"].set_visible(False)
139 | ax.spines["left"].set_visible(False)
140 | ax.spines["bottom"].set_visible(False)
141 |
142 | ax.grid(alpha=0.15, axis="y", zorder=0)
143 |
144 | years = YearLocator(5) # type: ignore[no-untyped-call]
145 | years_fmt = DateFormatter("%Y") # type: ignore[no-untyped-call]
146 | ax.xaxis.set_major_locator(years)
147 | ax.xaxis.set_major_formatter(years_fmt)
148 |
149 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
150 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
151 |
152 | return fig
153 |
154 |
155 | if __name__ == "__main__":
156 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
157 | save_plot_output.save_plot(fig=main(), file=__file__)
158 | raise SystemExit
159 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/sns_violin_plot_custom/__init__.py:
--------------------------------------------------------------------------------
1 | """Customise sns violin plot."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/sns_violin_plot_custom/data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/sns_violin_plot_custom/data.parquet
--------------------------------------------------------------------------------
/plotting_examples/y2022/sns_violin_plot_custom/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Edit SNS violin plot.
4 |
5 | Simple example of adjusting the output of a sns plot - I don't typically use sns, but
6 | ofc the objects can be accessed/iterated/edited over as with any other mpl axis.
7 |
8 | What's here doesn't look good - just an example of changing defaults.
9 | """
10 |
11 | from __future__ import annotations
12 |
13 | import pathlib
14 |
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import pandas as pd
18 | import seaborn as sns
19 |
20 | from plotting_examples import dvc_entry, save_plot_output
21 | from plotting_examples.y2022 import metadata
22 |
23 |
24 | def main() -> mpl.figure.Figure:
25 | """Main."""
26 | with plt.rc_context(
27 | {
28 | "xtick.major.pad": 10,
29 | "font.family": "monospace",
30 | },
31 | ):
32 | fig, axis = plt.subplots(
33 | figsize=(10, 5),
34 | constrained_layout=False,
35 | )
36 | df = pd.read_parquet(pathlib.Path(__file__).parent / "data.parquet")
37 |
38 | vio = sns.violinplot(
39 | data=df,
40 | x="species",
41 | y="flipper_length_mm",
42 | density_norm="count",
43 | inner="box",
44 | linewidth=4,
45 | ax=axis,
46 | color=metadata.color.PINK_COLOUR,
47 | )
48 |
49 | vio.grid(alpha=0.2)
50 | # What size to increase/decreate the central boxplot section to.
51 | new_width = 30
52 |
53 | # adjust the size of the boxplot, which of these list elements to edit
54 | # is just guess and check.
55 | for vio_line in vio.lines[1::2]:
56 | vio_line.set_linewidth(new_width)
57 |
58 | # Adjust the median point markers within the boxplot.
59 | for child in vio.get_children()[1:6:2]:
60 | child.set_linewidth(5)
61 |
62 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
63 | vio.set_facecolor(metadata.color.BACKGROUND_COLOUR)
64 | return fig
65 |
66 |
67 | if __name__ == "__main__":
68 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
69 | save_plot_output.save_plot(fig=main(), file=__file__)
70 | raise SystemExit
71 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/split_x_axis_custom_legend/__init__.py:
--------------------------------------------------------------------------------
1 | """Create plot with custom legend."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/split_x_axis_custom_legend/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Example of creating multiple x-axis in order to plot year / months.
4 |
5 | The fig size needs to be pretty large in order to squeeze all the month names etc in
6 | here. Generated data looks a mess on these plots.
7 |
8 | Example of:
9 |
10 | - Custom legend
11 | - generating random date data
12 | - multiple x-axis to display years / months
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | import pathlib
18 |
19 | import matplotlib as mpl
20 | import matplotlib.dates as mdates
21 | import matplotlib.pyplot as plt
22 | import numpy as np
23 | import pandas as pd
24 | from matplotlib.lines import Line2D
25 |
26 | from plotting_examples import dvc_entry, save_plot_output
27 | from plotting_examples.y2022 import metadata
28 |
29 | np_rnd = np.random.Generator(np.random.MT19937(seed=0))
30 |
31 |
32 | def random_dates(
33 | start: pd._libs.tslibs.timestamps.Timestamp,
34 | end: pd._libs.tslibs.timestamps.Timestamp,
35 | n_days: int,
36 | unit: str = "D",
37 | ) -> pd.Series:
38 | """
39 | Generate random dates.
40 |
41 | >>> start = pd.to_datetime('2015-01-01')
42 | >>> end = pd.to_datetime('2018-01-01')
43 |
44 | Found on a SO post, can't remember where now though.
45 | """
46 | ndays = (end - start).days + 1
47 | return pd.to_timedelta(np_rnd.random(n_days) * ndays, unit=unit) + start
48 |
49 |
50 | def main() -> mpl.figure.Figure:
51 | """Main."""
52 | n = 10_000
53 | # generate sample data
54 | df = pd.DataFrame(
55 | {
56 | "location": np_rnd.choice(
57 | ["UK", "US", "FR", "JP", "DE"],
58 | size=n,
59 | ),
60 | "song": np_rnd.choice(
61 | [
62 | "one two three",
63 | "four five six",
64 | "seven eight nine",
65 | "ten eleven twelve",
66 | "thirteen",
67 | "fourteen",
68 | "fifteen sixteen",
69 | ],
70 | size=n,
71 | ),
72 | "streams": np_rnd.integers(1_000, 10_000, size=n),
73 | "date": random_dates(
74 | start=pd.to_datetime("2020-01-01"),
75 | end=pd.to_datetime("2022-03-01"),
76 | n_days=n,
77 | ),
78 | },
79 | )
80 | # aggregate for plotting
81 | df = (
82 | df.groupby(["location", "song", pd.Grouper(key="date", freq="ME")])["streams"]
83 | .sum()
84 | .reset_index()
85 | # Aggregated to months so don't need date names here.
86 | .assign(
87 | date_name=df.date.dt.month_name() + " " + df.date.dt.year.astype(str),
88 | # Color mapping for song names to use in plotting
89 | color=lambda df: df["song"].map(
90 | {
91 | "fifteen sixteen": metadata.color.TAN,
92 | "four five six": metadata.color.PURPLEY,
93 | "fourteen": "black",
94 | "one two three": metadata.color.PINK_COLOUR,
95 | "seven eight nine": metadata.color.DEEPER_GREEN,
96 | "ten eleven twelve": metadata.color.BLUE,
97 | "thirteen": metadata.color.BROWNY_RED,
98 | },
99 | ),
100 | )
101 | )
102 |
103 | def format_axis(ax: plt.Axes) -> None: # type: ignore[name-defined]
104 | """Format axis."""
105 | ax.grid(alpha=0.2)
106 |
107 | def stream_plot(df: pd.DataFrame, country: str, ax: plt.Axes) -> None: # type: ignore[name-defined]
108 | for _, song_data in df.groupby("song"):
109 | ax.plot(
110 | song_data["date"],
111 | song_data["streams"],
112 | color=song_data["color"].to_list().pop(),
113 | alpha=0.7,
114 | linewidth=3,
115 | )
116 | format_axis(ax=ax)
117 | ax.set_title(
118 | country,
119 | fontsize=20,
120 | )
121 |
122 | for label in ax.get_xticklabels():
123 | label.set_rotation(45)
124 | label.set_ha("right")
125 |
126 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
127 |
128 | # want to format 1000 -> 1,000
129 | ax.get_yaxis().set_major_formatter(
130 | mpl.ticker.FuncFormatter(lambda x, _: format(int(x), ",")),
131 | )
132 | # reduce some noise
133 | ax.spines["top"].set_visible(False)
134 | ax.spines["right"].set_visible(False)
135 |
136 | fmt_month = mdates.MonthLocator(interval=1) # type: ignore[no-untyped-call]
137 | fmt_year = mdates.YearLocator() # type: ignore[no-untyped-call]
138 | ax.xaxis.set_minor_locator(fmt_month)
139 | ax.xaxis.set_minor_formatter(mdates.DateFormatter("%b")) # type: ignore[no-untyped-call]
140 | ax.xaxis.set_ticks([])
141 |
142 | ax.tick_params(axis="x", which="minor", labelsize=8)
143 |
144 | sec_xaxis = ax.secondary_xaxis(-0.1)
145 | sec_xaxis.xaxis.set_major_locator(fmt_year)
146 | sec_xaxis.xaxis.set_major_formatter(mdates.DateFormatter("%Y")) # type: ignore[no-untyped-call]
147 | sec_xaxis.spines["bottom"].set_visible(False)
148 | sec_xaxis.tick_params(length=0, labelsize=12)
149 |
150 | color_dict = df.drop_duplicates("song").set_index("song")["color"].to_dict()
151 |
152 | fig, axis = plt.subplots(
153 | ncols=3,
154 | nrows=2,
155 | figsize=(35, 20),
156 | )
157 | plt.subplots_adjust(
158 | left=None,
159 | bottom=None,
160 | right=None,
161 | top=None,
162 | wspace=None,
163 | hspace=0.5,
164 | )
165 |
166 | axis = axis.flatten()
167 | iter(axis.flatten())
168 |
169 | plt.suptitle(
170 | "Streaming across different countries for different songs",
171 | fontsize=25,
172 | )
173 |
174 | stream_plot(
175 | df=df.loc[df["location"].eq("DE")],
176 | country="DE",
177 | ax=axis[0],
178 | )
179 |
180 | stream_plot(
181 | df=df.loc[df["location"].eq("FR")],
182 | country="FR",
183 | ax=axis[1],
184 | )
185 |
186 | stream_plot(
187 | df=df.loc[df["location"].eq("JP")],
188 | country="JP",
189 | ax=axis[2],
190 | )
191 |
192 | stream_plot(
193 | df=df.loc[df["location"].eq("UK")],
194 | country="UK",
195 | ax=axis[3],
196 | )
197 |
198 | stream_plot(
199 | df=df.loc[df["location"].eq("US")],
200 | country="US",
201 | ax=axis[5],
202 | )
203 |
204 | # Plot legend
205 |
206 | ax = axis[4]
207 | custom_lines = [Line2D([0], [0], color=x, lw=6) for x in color_dict.values()]
208 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
209 |
210 | ax.legend(
211 | custom_lines,
212 | list(color_dict.keys()),
213 | loc="center",
214 | fontsize=16,
215 | frameon=False,
216 | borderpad=2,
217 | )
218 |
219 | for spine in ax.spines:
220 | ax.spines[spine].set_visible(False)
221 |
222 | ax.get_xaxis().set_ticks([])
223 | ax.get_yaxis().set_ticks([])
224 |
225 | fig.supylabel(
226 | "Something about the y-axis",
227 | x=0.09,
228 | fontsize=20,
229 | )
230 |
231 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
232 | return fig
233 |
234 |
235 | if __name__ == "__main__":
236 | with plt.rc_context(
237 | {
238 | "xtick.major.pad": 10,
239 | "font.family": "monospace",
240 | },
241 | ):
242 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
243 | save_plot_output.save_plot(fig=main(), file=__file__)
244 | raise SystemExit
245 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/stacked_bar_with_single_bars_layout/__init__.py:
--------------------------------------------------------------------------------
1 | """Stacked bar chart."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png
--------------------------------------------------------------------------------
/plotting_examples/y2022/stacked_bar_with_single_bars_layout/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Layout containing two bar plots and a bivariate plot between them.
4 |
5 | In this case it's a silly example of some data containing the social grade of
6 | Labradors, as well as the education group. The main plot is a stacked bar containing
7 | the breakdown of education group for each social grade.
8 |
9 | Don't think I'm too keen on the code for this plot - though it's not always so clear
10 | (to me) how to make "nice" code with a lot of matplotlib stuff.
11 |
12 | Obviously, the data is made up.
13 | """
14 |
15 | from __future__ import annotations
16 |
17 | import pathlib
18 |
19 | import matplotlib as mpl
20 | import matplotlib.pyplot as plt
21 | import numpy as np
22 | import pandas as pd
23 | from PIL import Image
24 |
25 | from plotting_examples import dvc_entry, save_plot_output
26 | from plotting_examples.y2022 import metadata
27 |
28 | # Fontsize for the main title and subtitle
29 | FONTSIZE_TITLE = 30
30 | FONTSIZE_SUBTITLE = 20
31 | # Fontsize for the numbers displayed on bars.
32 | FONTSIZE_PLT_TXT = 10
33 |
34 | # What colour to outline the edges of bars with - if None then there's no outline
35 | # created.
36 | BAR_EDGECOLOR: str | None = None
37 | # What level of rounding to apply to percentages displayed on bars.
38 | ROUNDING_PCTS = 1
39 |
40 | # Dependent var
41 | VAR_DEPENDENT = "dependent_var"
42 | # Independent var
43 | VAR_INDEPENDENT = "independent_var"
44 | # Text for the main title - the subtitle is generated from the metadata atm.
45 | TEXT_TITLE = "Labradors\neducation ~ social grade"
46 | # Image to display in teh top left.
47 | IMAGE_PATH = (
48 | "./plotting_examples/y2022/stacked_bar_with_single_bars_layout/data/lab.png"
49 | )
50 |
51 | COLORS = [
52 | metadata.color.PINK_COLOUR,
53 | metadata.color.DEEPER_GREEN,
54 | metadata.color.BROWNY_RED,
55 | ]
56 | COLOR_SUBTITLE_TEXT = "#808080"
57 |
58 | # subplot_mosaic layout definition.
59 | LAYOUT = [
60 | ["top_left_corner", "title", "title", "title", "top_right_corner"],
61 | ["main", "main", "main", "main", "side"],
62 | ["main", "main", "main", "main", "side"],
63 | ["bottom", "bottom", "bottom", "bottom", "bottom_right_corner"],
64 | ]
65 |
66 | # Colors which are used when the bar colour is dark/light respectively - so that the
67 | # text is readable (not dark font on dark bars etd).
68 | COLOR_FONT_LIGHT = "#000000"
69 | COLOR_FONT_DARK = "#ffffff"
70 |
71 |
72 | def get_sample_data() -> (
73 | tuple[
74 | pd.DataFrame,
75 | dict[str, dict[float, str]],
76 | dict[str, str],
77 | ]
78 | ):
79 | """
80 | Generate sample data.
81 |
82 | Data structured similar to what you'd find in an SPSS sav file - where there's the
83 | df (responses), cnl (metadata about the columns) and vvl (metadata about the values
84 | within the columns)
85 | """
86 | rng = np.random.default_rng(1)
87 | # Create dataframe with different distributions for each of the independent
88 | # variable levels.
89 | df = (
90 | pd.concat(
91 | [
92 | pd.DataFrame(
93 | {
94 | VAR_DEPENDENT: rng.choice(
95 | [1, 2, 3],
96 | size=330,
97 | p=(0.87, 0.1, 0.03),
98 | ),
99 | VAR_INDEPENDENT: 5,
100 | },
101 | ),
102 | pd.DataFrame(
103 | {
104 | VAR_DEPENDENT: rng.choice(
105 | [1, 2, 3],
106 | size=410,
107 | p=(0.44, 0.54, 0.02),
108 | ),
109 | VAR_INDEPENDENT: 4,
110 | },
111 | ),
112 | pd.DataFrame(
113 | {
114 | VAR_DEPENDENT: rng.choice(
115 | [1, 2, 3],
116 | size=510,
117 | p=(0.26, 0.61, 0.13),
118 | ),
119 | VAR_INDEPENDENT: 3,
120 | },
121 | ),
122 | pd.DataFrame(
123 | {
124 | VAR_DEPENDENT: rng.choice(
125 | [1, 2, 3],
126 | size=800,
127 | p=(0.105, 0.565, 0.33),
128 | ),
129 | VAR_INDEPENDENT: 2,
130 | },
131 | ),
132 | pd.DataFrame(
133 | {
134 | VAR_DEPENDENT: rng.choice(
135 | [1, 2, 3],
136 | size=950,
137 | p=(0.08, 0.33, 0.59),
138 | ),
139 | VAR_INDEPENDENT: 1,
140 | },
141 | ),
142 | ],
143 | )
144 | .assign(weight=1)
145 | .reset_index(drop=True)
146 | )
147 | vvl = {
148 | VAR_INDEPENDENT: {
149 | 1.0: "Upper management",
150 | 2.0: "Lower Management",
151 | 3.0: "Intermediate",
152 | 4.0: "Routine",
153 | 5.0: "Never worked",
154 | },
155 | VAR_DEPENDENT: {
156 | 1.0: "Low",
157 | 2.0: "Medium",
158 | 3.0: "High",
159 | },
160 | }
161 | cnl = {
162 | VAR_INDEPENDENT: "Social Grade",
163 | VAR_DEPENDENT: "Education Level",
164 | }
165 |
166 | return df, vvl, cnl
167 |
168 |
169 | def patch_color_light(patch: mpl.patches.Rectangle) -> bool:
170 | """Determine if mpl patch is light or dark."""
171 | # TODO: Put this into a global helper module.
172 | bar_col = mpl.colors.to_hex(patch.get_facecolor())
173 | hex_col = bar_col[1:]
174 | red, green, blue = (
175 | int(hex_col[0:2], 16),
176 | int(hex_col[2:4], 16),
177 | int(hex_col[4:6], 16),
178 | )
179 | # https://stackoverflow.com/questions/3942878/how-to-decide-
180 | # font-color-in-white-or-black-depending-on-background-color
181 | threshold = 100
182 | if (red * 0.299 + green * 0.587 + blue * 0.114) > threshold:
183 | return True
184 | return False
185 |
186 |
187 | class PlotSections:
188 |
189 | """
190 | Holds plotting sections.
191 |
192 | Just using this for namespacing really! Which was triggered by pylint complaining,
193 | which probably isn't a good reason... Might usually just put this in a module but
194 | wanted all the code in plot.py
195 |
196 | Considered adding the df, vvl, cnl to the class in an __init__ or whatever but left
197 | it as-is.
198 | """
199 |
200 | # rename to bivariate.
201 | @staticmethod
202 | def main(
203 | ax: plt.Axes, # type: ignore[name-defined]
204 | df: pd.DataFrame,
205 | vvl: dict[str, dict[float, str]],
206 | # cnl: dict[str, str],
207 | ) -> None:
208 | # pylint: disable=too-many-locals
209 | """Plot the stacked bars."""
210 | df_plot = (
211 | pd.crosstab(
212 | df[VAR_DEPENDENT],
213 | df[VAR_INDEPENDENT].replace(vvl[VAR_INDEPENDENT]),
214 | normalize="columns",
215 | )
216 | .mul(100)
217 | .round(1)
218 | .loc[:, list(vvl[VAR_INDEPENDENT].values())]
219 | )
220 | df_plot_counts = pd.crosstab(
221 | df[VAR_DEPENDENT],
222 | df[VAR_INDEPENDENT],
223 | )
224 | df_plot.T.plot.barh(
225 | stacked=True,
226 | ax=ax,
227 | color=COLORS,
228 | edgecolor=BAR_EDGECOLOR,
229 | )
230 |
231 | ax.grid(linestyle=":", alpha=0.3)
232 |
233 | # The legend _should_ be self explanatory from the context of the plot.
234 | ax.get_legend().remove()
235 |
236 | # Not interested in seeing the col name on the y axis for the main plot
237 | ax.set_ylabel("")
238 |
239 | data_matrix = df_plot.to_numpy().flatten()
240 | data_matrix_counts = df_plot_counts.to_numpy().flatten()
241 | min_bar_size = 3
242 | for i, patch in enumerate(ax.patches):
243 | width = patch.get_width()
244 | height = patch.get_height()
245 | x, y = patch.get_xy()
246 | data_i = data_matrix[i] if data_matrix[i] >= min_bar_size else "-"
247 | data_count_i = (
248 | data_matrix_counts[i] if data_matrix[i] >= min_bar_size else None
249 | )
250 |
251 | ann = f"{data_i} ({data_count_i})" if data_count_i is not None else "-"
252 |
253 | text_col = COLOR_FONT_LIGHT if patch_color_light(patch) else COLOR_FONT_DARK
254 |
255 | ax.annotate(
256 | f"{ann}",
257 | (x + width * 0.5, y + height * 0.5),
258 | ha="center",
259 | va="center",
260 | fontsize=10,
261 | zorder=12,
262 | color=text_col,
263 | )
264 |
265 | ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(base=5))
266 | ax.set_xlabel("%", fontsize=10)
267 | ax.set_xlim(0, 100)
268 |
269 | ax.spines.top.set_visible(False)
270 | ax.spines.right.set_visible(False)
271 |
272 | @staticmethod
273 | def side_marginal(
274 | ax: plt.Axes, # type: ignore[name-defined]
275 | df: pd.DataFrame,
276 | vvl: dict[str, dict[float, str]],
277 | cnl: dict[str, str],
278 | ) -> None:
279 | """Bar plot of the independent var."""
280 | counts = df[VAR_INDEPENDENT].replace(vvl[VAR_INDEPENDENT]).value_counts()
281 |
282 | ax.barh(
283 | counts.index,
284 | counts,
285 | color=metadata.color.TAN,
286 | edgecolor=BAR_EDGECOLOR,
287 | height=0.5,
288 | )
289 | ax.grid(alpha=0.2, linestyle=":")
290 | ax.set_title(cnl[VAR_INDEPENDENT], loc="left")
291 |
292 | counts_list = list(counts)
293 |
294 | for count, patch in zip(counts, ax.patches):
295 | count_pct = round((count / sum(counts_list)) * 100, ROUNDING_PCTS)
296 | width = patch.get_width()
297 | height = patch.get_height()
298 | x, y = patch.get_xy()
299 | txt_color = "#000000" if patch_color_light(patch) else "#ffffff"
300 | ax.text(
301 | s=f"{count_pct}%\n({count})",
302 | x=x + width * 0.5,
303 | y=y + height * 0.5,
304 | va="center",
305 | ha="center",
306 | color=txt_color,
307 | fontsize=FONTSIZE_PLT_TXT,
308 | )
309 |
310 | ax.set_xticks([])
311 | ax.set_yticks([])
312 |
313 | ax.spines.right.set_visible(False)
314 | ax.spines.top.set_visible(False)
315 | ax.spines.bottom.set_visible(False)
316 | ax.spines.left.set_visible(False)
317 |
318 | @staticmethod
319 | def bottom_marginal(
320 | ax: plt.Axes, # type: ignore[name-defined]
321 | df: pd.DataFrame,
322 | vvl: dict[str, dict[float, str]],
323 | cnl: dict[str, str],
324 | ) -> None:
325 | """Bar plot of the dependent variable."""
326 | counts = df[VAR_DEPENDENT].value_counts().sort_index()
327 | ax.bar(
328 | x=list(vvl[VAR_DEPENDENT].values()),
329 | height=counts,
330 | color=COLORS,
331 | edgecolor=BAR_EDGECOLOR,
332 | )
333 | ax.set_title(cnl[VAR_DEPENDENT])
334 | ax.set_yticks([])
335 |
336 | counts_list = list(counts)
337 | for count, patch in zip(counts, ax.patches):
338 | count_pct = round((count / sum(counts_list)) * 100, 2)
339 | width = patch.get_width()
340 | height = patch.get_height()
341 | x, y = patch.get_xy()
342 | if patch_color_light(patch):
343 | txt_color = COLOR_FONT_LIGHT
344 | else:
345 | txt_color = COLOR_FONT_DARK
346 | ax.text(
347 | s=f"{count_pct}\n({count})",
348 | x=x + width * 0.5,
349 | y=y + height * 0.5,
350 | va="center",
351 | ha="center",
352 | color=txt_color,
353 | fontsize=FONTSIZE_PLT_TXT,
354 | )
355 |
356 | ax.spines.top.set_visible(False)
357 | ax.spines.right.set_visible(False)
358 | ax.spines.left.set_visible(False)
359 |
360 | @staticmethod
361 | def title(ax: plt.Axes, cnl: dict[str, str]) -> None: # type: ignore[name-defined]
362 | """Overall title."""
363 | ax.text(
364 | s="Labradors",
365 | x=0.1,
366 | y=0.5,
367 | fontsize=FONTSIZE_TITLE,
368 | horizontalalignment="left",
369 | verticalalignment="bottom",
370 | )
371 |
372 | # Just using this to nudge the text placement around...
373 | diff = 0.3
374 | ax.text(
375 | # Assuming that the metadata is reasonably nice for this.
376 | s=f"{cnl[VAR_DEPENDENT]} ~ {cnl[VAR_INDEPENDENT]}",
377 | x=0.1,
378 | y=0.5 - diff,
379 | fontsize=FONTSIZE_SUBTITLE,
380 | horizontalalignment="left",
381 | verticalalignment="bottom",
382 | color=COLOR_SUBTITLE_TEXT,
383 | )
384 |
385 | ax.axis("off")
386 |
387 | @staticmethod
388 | def top_left_corner(ax: plt.Axes) -> None: # type: ignore[name-defined]
389 | """Plot logo."""
390 | img_path = IMAGE_PATH
391 | club_icon = Image.open(img_path)
392 | ax.imshow(club_icon)
393 | ax.axis("off")
394 |
395 | @staticmethod
396 | def top_right_corner(ax: plt.Axes) -> None: # type: ignore[name-defined]
397 | """Just leaving this empty for now."""
398 | ax.axis("off")
399 |
400 | @staticmethod
401 | def bottom_right_corner(ax: plt.Axes, df: pd.DataFrame) -> None: # type: ignore[name-defined]
402 | """Some random information like data source etc."""
403 | ax.text(
404 | s=(
405 | #
406 | "2022 Labrador educational \ndata and social grades"
407 | "\n"
408 | "\n"
409 | f"Sample size : {df.shape[0]}"
410 | "\n"
411 | "\n"
412 | "source: somedogdata.com"
413 | ),
414 | x=0,
415 | y=0.5,
416 | fontsize=FONTSIZE_PLT_TXT,
417 | va="center",
418 | ha="left",
419 | color=COLOR_SUBTITLE_TEXT,
420 | )
421 | ax.axis("off")
422 |
423 | @staticmethod
424 | def footnote(ax: plt.Axes) -> None: # type: ignore[name-defined]
425 | """
426 | Plot footnote.
427 |
428 | Didn't bother using this in the end.
429 | """
430 | ax.text(
431 | s=(
432 | #
433 | "Some text about the data, Labradors, whatever."
434 | ),
435 | x=0,
436 | y=1,
437 | fontsize=10,
438 | style="italic",
439 | va="top",
440 | ha="left",
441 | color=COLOR_SUBTITLE_TEXT,
442 | )
443 | ax.set_xticks([])
444 | ax.set_yticks([])
445 |
446 |
447 | def main() -> mpl.figure.Figure:
448 | """Main."""
449 | df, vvl, cnl = get_sample_data()
450 |
451 | plot_sections = PlotSections()
452 |
453 | with plt.rc_context(
454 | {
455 | "xtick.major.pad": 10,
456 | "font.family": "monospace",
457 | },
458 | ):
459 | fig = plt.figure(
460 | figsize=(15, 10),
461 | )
462 | ax_dict = fig.subplot_mosaic(LAYOUT) # type: ignore[arg-type]
463 |
464 | plot_sections.title(ax=ax_dict["title"], cnl=cnl)
465 | plot_sections.bottom_marginal(
466 | ax=ax_dict["bottom"],
467 | df=df,
468 | cnl=cnl,
469 | vvl=vvl,
470 | )
471 | plot_sections.main(
472 | ax=ax_dict["main"],
473 | df=df,
474 | vvl=vvl,
475 | )
476 | plot_sections.side_marginal(ax=ax_dict["side"], df=df, vvl=vvl, cnl=cnl)
477 | plot_sections.top_left_corner(ax=ax_dict["top_left_corner"])
478 | plot_sections.top_right_corner(ax=ax_dict["top_right_corner"])
479 | plot_sections.bottom_right_corner(
480 | ax=ax_dict["bottom_right_corner"],
481 | df=df,
482 | )
483 |
484 | fig.tight_layout()
485 |
486 | # Set background colours.
487 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
488 | for ax_name in ax_dict:
489 | ax_dict[ax_name].set_facecolor(metadata.color.BACKGROUND_COLOUR)
490 |
491 | return fig
492 |
493 |
494 | if __name__ == "__main__":
495 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
496 | save_plot_output.save_plot(fig=main(), file=__file__)
497 | raise SystemExit
498 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/__init__.py:
--------------------------------------------------------------------------------
1 | """Create hexmap."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.cpg:
--------------------------------------------------------------------------------
1 | UTF-8
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.dbf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.dbf
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.prj:
--------------------------------------------------------------------------------
1 | PROJCS["WGS_1984_Web_Mercator_Auxiliary_Sphere",GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Mercator_Auxiliary_Sphere"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],PARAMETER["Standard_Parallel_1",0.0],PARAMETER["Auxiliary_Sphere_Type",0.0],UNIT["Meter",1.0]]
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbn:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbn
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.sbx
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp.xml:
--------------------------------------------------------------------------------
1 |
2 | 20161025170019001.0TRUECalculateField Hex_All_Data Region_Name "Northern Ireland" VB #CalculateField Hex_All_Data Region_ID 12 VB #CalculateField Hex_All_Data DESCRIPTIO "Westminster Constituency" VB #file://\\AYL-LT-02839\Users\bflanagan\OneDrive - ESRI (UK) Ltd\Carto\BlogData\Cartograms.gdbLocal Area Network
3 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shx
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/data/gb_hex_cartogram/GB_Hex_Cartogram_Const.zip
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/plot.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=duplicate-code
2 | """
3 | Hex map for the UK constituencies.
4 |
5 | Some meaningless generated data - small multiples with hex maps can be useful sometimes
6 | though. Could be good to add in the geographically accurate version as well.
7 | """
8 |
9 | from __future__ import annotations
10 |
11 | import pathlib
12 |
13 | import geopandas
14 | import matplotlib as mpl
15 | import matplotlib.pyplot as plt
16 | import numpy as np
17 | from matplotlib.colors import ListedColormap
18 |
19 | from plotting_examples import dvc_entry, save_plot_output
20 | from plotting_examples.y2022 import metadata
21 |
22 | random_choice = np.random.Generator(np.random.MT19937(1)).choice
23 |
24 |
25 | def main() -> mpl.figure.Figure:
26 | """Main."""
27 | election_data = (
28 | pathlib.Path(__file__).parent
29 | / "data/gb_hex_cartogram/GB_Hex_Cartogram_Const.shp"
30 | )
31 | gdf = geopandas.read_file(election_data)
32 |
33 | # Set up color maps by party
34 | partycolors = {
35 | "A": metadata.color.DEEPER_GREEN,
36 | "B": metadata.color.PINK_COLOUR,
37 | "C": metadata.color.TAN,
38 | }
39 |
40 | parties = ["A", "B", "C"]
41 | pcols = {c: partycolors[c] for c in parties}
42 | colors = [pcols[k] for k in sorted(pcols.keys())]
43 | with plt.rc_context(
44 | {
45 | "xtick.major.pad": 10,
46 | "font.family": "monospace",
47 | },
48 | ):
49 | fig, axes = plt.subplots(
50 | nrows=1,
51 | ncols=3,
52 | figsize=(15, 5),
53 | )
54 |
55 | font_size = 15
56 | edgecolor = "black"
57 | edge_width = 0.5
58 |
59 | ax = axes[0]
60 | gdf["Party"] = list(
61 | random_choice(
62 | parties,
63 | size=len(gdf),
64 | replace=True,
65 | p=[0.4, 0.3, 0.3],
66 | ),
67 | )
68 | gdf.plot(
69 | ax=ax,
70 | column="Party",
71 | cmap=ListedColormap(colors),
72 | edgecolor=edgecolor,
73 | linewidth=edge_width,
74 | )
75 | _ = ax.axis("off")
76 | _ = ax.set_title("Current", fontsize=font_size, loc="left")
77 |
78 | ax = axes[1]
79 | gdf["Party"] = list(
80 | random_choice(
81 | parties,
82 | size=len(gdf),
83 | replace=True,
84 | p=[0.3, 0.6, 0.1],
85 | ),
86 | )
87 | gdf.plot(
88 | ax=ax,
89 | column="Party",
90 | cmap=ListedColormap(colors),
91 | edgecolor=edgecolor,
92 | linewidth=edge_width,
93 | )
94 | _ = ax.axis("off")
95 | _ = ax.set_title("Scenario A", fontsize=font_size, loc="left")
96 |
97 | ax = axes[2]
98 | gdf["Party"] = list(
99 | random_choice(
100 | parties,
101 | size=len(gdf),
102 | replace=True,
103 | p=[0.1, 0.8, 0.1],
104 | ),
105 | )
106 | gdf.plot(
107 | ax=ax,
108 | column="Party",
109 | cmap=ListedColormap(colors),
110 | edgecolor=edgecolor,
111 | linewidth=edge_width,
112 | )
113 | _ = ax.axis("off")
114 | _ = ax.set_title("Scenario B", fontsize=font_size, loc="left")
115 |
116 | # Create legend.
117 | custom_lines = [
118 | mpl.lines.Line2D([0], [0], color=x, lw=6) for x in partycolors.values()
119 | ]
120 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
121 | ax.legend(
122 | custom_lines,
123 | list(partycolors.keys()),
124 | loc=(0.7, 0.7),
125 | fontsize=12,
126 | frameon=False,
127 | borderpad=2,
128 | )
129 |
130 | # The dataframe seems to assign items to categories based on the selected column
131 | # sort order We can define a color map with a similar sorting
132 | colors = [partycolors[k] for k in sorted(partycolors.keys())]
133 |
134 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
135 | fig.set_tight_layout(True) # type: ignore[attr-defined]
136 | ax.set_facecolor(metadata.color.BACKGROUND_COLOUR)
137 | return fig
138 |
139 |
140 | if __name__ == "__main__":
141 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
142 | save_plot_output.save_plot(fig=main(), file=__file__)
143 | raise SystemExit
144 |
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/uk_hex_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2022/uk_hexmap/uk_hex_example.png
--------------------------------------------------------------------------------
/plotting_examples/y2022/uk_hexmap/uk_hex_example.py:
--------------------------------------------------------------------------------
1 | """
2 | Hex plotting example.
3 |
4 | Move hex example from notebook into here.
5 | """
6 |
--------------------------------------------------------------------------------
/plotting_examples/y2024/__init__.py:
--------------------------------------------------------------------------------
1 | """Plots from 2024."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/__init__.py:
--------------------------------------------------------------------------------
1 | """Plot for cats weight loss."""
2 |
--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/data/cat_looking_to_side.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2024/cat_weight/data/cat_looking_to_side.jpeg
--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/data/weight_data.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/geo7/plotting_examples/78d56656f38ce5ea3d7a1f1d103eb8be7e639680/plotting_examples/y2024/cat_weight/data/weight_data.parquet
--------------------------------------------------------------------------------
/plotting_examples/y2024/cat_weight/plot.py:
--------------------------------------------------------------------------------
1 | """
2 | Timeseries of the cats diet.
3 |
4 | Cat was getting a little chunky towards the end of 2023 so had a resolution made for
5 | them to lose a bit of weight. Data collection is just a daily weigh, the average of
6 | this is taken (as there are sometimes multiple entries in a day) and then plotted along
7 | with a ten day rolling average. Most days were covered, where there are missing days
8 | they're imputed using the average of the days either side, eg `(a, nan, b) -> (a,
9 | (a+b)/2, b)` though this is just a plot...
10 | """
11 |
12 | from __future__ import annotations
13 |
14 | import datetime as dt
15 | import pathlib
16 | from pathlib import Path
17 |
18 | import matplotlib as mpl
19 | import matplotlib.dates as mdates
20 | import matplotlib.pyplot as plt
21 | import numpy as np
22 | import pandas as pd
23 | from PIL import Image
24 |
25 | from plotting_examples import dvc_entry, save_plot_output
26 | from plotting_examples.y2024 import metadata
27 |
28 | np_rnd = np.random.Generator(np.random.MT19937(0))
29 |
30 | LAYOUT = [
31 | ["title", "title", "title", "title", "top_right_corner", "top_right_corner"],
32 | ["main", "main", "main", "main", "side", "side"],
33 | ["main", "main", "main", "main", "side", "side"],
34 | ["main", "main", "main", "main", "side", "side"],
35 | ["main", "main", "main", "main", "side", "side"],
36 | ]
37 | FONTSIZE_TITLE = 25
38 | FONTSIZE_SUBTITLE = 15
39 | COLOR_SUBTITLE_TEXT = "#808080"
40 |
41 |
42 | def get_xlsx_from_downloads() -> Path:
43 | """
44 | Get xlsx file from Downloads.
45 |
46 | Pretty janky approach but it's near enough whilst doing this - workflow is just to
47 | download the xlsx file containing the gform responses from gsheets then this will
48 | pick it up and move it to this project as a parquet file. Wasn't worth using the
49 | API for it.
50 | """
51 | output_name = Path(__file__).parent / "data" / "weight_data.parquet"
52 | xlsx_files = sorted((pathlib.Path.home() / "Downloads").glob("*espon*xlsx"))
53 |
54 | if len(xlsx_files) == 0:
55 | # Most likely this is re-running and has already been moved so just use
56 | # whatever's already in data/.
57 | return output_name
58 |
59 | if len(xlsx_files) > 1:
60 | msg = "Expected a single file: "
61 | raise ValueError(msg, xlsx_files)
62 |
63 | # Get response data from xlsx sheet, pull out required columns and create date
64 | # column for grouping on
65 | df_response = (
66 | pd.ExcelFile(xlsx_files[0])
67 | .parse("Form responses 1")
68 | .rename(columns=lambda x: x.lower())
69 | .assign(
70 | timestamp=lambda x: pd.to_datetime(
71 | x["timestamp"],
72 | format="%d/%m/%Y %H:%M:%S",
73 | ),
74 | # Some of the days have multiple weigh-ins so want datestamp to group by on
75 | # in those days.
76 | datestamp=lambda x: pd.to_datetime(
77 | x["timestamp"].apply(lambda x: x.date())
78 | ),
79 | mish_weight=lambda x: x["with_mish"].sub(x["without_mish"]),
80 | )
81 | .loc[:, ["timestamp", "datestamp", "mish_weight"]]
82 | ).rename(columns={"mish_weight": "cat_weight"})
83 |
84 | df_response.to_parquet(output_name)
85 | return output_name
86 |
87 |
88 | def load_data(*, data_path: Path) -> pd.DataFrame:
89 | """Get response dataframe from downloaded xlsx file."""
90 | response_data = pd.read_parquet(data_path)
91 | return (
92 | response_data
93 | # Only need these two columns.
94 | .groupby("datestamp")["cat_weight"]
95 | # Sometimes there are multiple readings in a day
96 | .mean()
97 | .reset_index()
98 | .rename(columns={"cat_weight": "cat_daily_avg"})
99 | )
100 |
101 |
102 | def main() -> mpl.figure.Figure:
103 | """
104 | Main.
105 |
106 | I did consider adding some figure shapes along the lines of:
107 |
108 | >>> for _ in range(1, 30):
109 | >>> factor = 20
110 | >>> radius = np_rnd.random() / factor
111 | >>> alpha = ((1 / factor) - radius) + 0.1
112 | >>> circ = patches.Circle(
113 | >>> (np_rnd.random(), np_rnd.random()),
114 | >>> radius=radius,
115 | >>> zorder=1,
116 | >>> color=color.PINK_COLOUR,
117 | >>> alpha=alpha,
118 | >>> )
119 | >>> circ.set_transform(fig.transFigure)
120 | >>> fig.patches.append(circ)
121 |
122 | But left it.
123 | """
124 | df = load_data(data_path=get_xlsx_from_downloads())
125 | df_dates = pd.DataFrame(
126 | {
127 | "dates": pd.date_range(
128 | df["datestamp"].min(),
129 | df["datestamp"].max(),
130 | )
131 | }
132 | ).assign(month_name=lambda x: x["dates"].dt.strftime("%B"))
133 |
134 | # Ensure that all dates are represented (in case there's missed weigh-in days).
135 | df = (
136 | pd.merge(
137 | df, df_dates[["dates"]], left_on="datestamp", right_on="dates", how="right"
138 | )
139 | .drop(columns="datestamp")
140 | .rename(columns={"dates": "datestamp"})
141 | .set_index("datestamp")
142 | .reset_index()
143 | .assign(imputed=lambda x: x["cat_daily_avg"].isna())
144 | )
145 | # Handle missing data - only expecting there to be a day of missing data at
146 | # most!
147 | df["cat_daily_avg"] = df.assign(
148 | ff=df["cat_daily_avg"].ffill(),
149 | bf=df["cat_daily_avg"].bfill(),
150 | filled=lambda x: x["ff"].add(x["bf"]).div(2),
151 | )["filled"]
152 |
153 | df = df.assign(r10=lambda x: x["cat_daily_avg"].rolling(10).mean())
154 |
155 | color = metadata.color
156 |
157 | # Create some columns for styling the scatter points - mainly in order to
158 | # differentiate between imputed days and actual days.
159 | df["scatter_color"] = color.GREY
160 | df.loc[df["imputed"], "scatter_color"] = color.GREY
161 | df["scatter_size"] = 10
162 | df.loc[df["imputed"], "scatter_size"] = 0
163 |
164 | with plt.rc_context(
165 | {
166 | "xtick.major.pad": 5,
167 | "font.family": "monospace",
168 | },
169 | ):
170 | fig = plt.figure(figsize=(28, 10))
171 | ax_dict = fig.subplot_mosaic(LAYOUT) # type: ignore[arg-type]
172 |
173 | # Plot rolling average
174 | ax_dict["main"].plot(
175 | df["datestamp"],
176 | df["r10"],
177 | color=color.PINK_COLOUR,
178 | lw=3,
179 | zorder=10,
180 | )
181 |
182 | # Want to ensure that no daily lines are drawn where data has been imputed -
183 | # will still create the rolling average line here.
184 | for _, data in df.assign(groups=df["imputed"].cumsum()).groupby("groups"):
185 | ax_dict["main"].plot(
186 | data["datestamp"].loc[~data["imputed"]],
187 | data["cat_daily_avg"].loc[~data["imputed"]],
188 | color=color.GREY,
189 | lw=1,
190 | zorder=5,
191 | )
192 |
193 | ax_dict["main"].scatter(
194 | df["datestamp"],
195 | df["cat_daily_avg"],
196 | color=df["scatter_color"],
197 | s=df["scatter_size"],
198 | zorder=5,
199 | )
200 |
201 | ax_dict["main"].set_ylabel("Weight kg")
202 | ax_dict["main"].xaxis.set_major_locator(mdates.DayLocator(interval=7)) # type: ignore[no-untyped-call]
203 |
204 | for label in ax_dict["main"].get_xticklabels():
205 | label.set_rotation(80)
206 | label.set_ha("center") # type: ignore[attr-defined]
207 |
208 | # Remove spines for top/right
209 | ax_dict["main"].spines["top"].set_visible(False)
210 | ax_dict["main"].spines["right"].set_visible(False)
211 |
212 | # Set x-axis dates to just be day/month instead of year day month.
213 | ax_dict["main"].xaxis.set_major_locator(mdates.DayLocator(interval=7)) # type: ignore[no-untyped-call]
214 | ax_dict["main"].xaxis.set_major_formatter(mdates.DateFormatter("%d/%m/%y")) # type: ignore[no-untyped-call]
215 |
216 | for label in ax_dict["main"].get_xticklabels():
217 | label.set_rotation(80)
218 | label.set_ha("center")
219 |
220 | heaviest_idx = df["cat_daily_avg"].idxmax()
221 | _ = ax_dict["main"].annotate(
222 | f"{df['cat_daily_avg'].loc[heaviest_idx].round(2)} kg",
223 | # where the arrow should end up
224 | xy=(
225 | df["datestamp"].iloc[heaviest_idx],
226 | df["cat_daily_avg"].iloc[heaviest_idx],
227 | ),
228 | # where the text should be
229 | xytext=(
230 | df["datestamp"].iloc[heaviest_idx + 5],
231 | df["cat_daily_avg"].iloc[heaviest_idx + 1] + 0.25,
232 | ),
233 | ha="center",
234 | va="bottom",
235 | arrowprops={
236 | "arrowstyle": "->",
237 | "connectionstyle": "arc3,rad=0.2",
238 | "color": color.PINK_COLOUR,
239 | },
240 | )
241 |
242 | lightest_idx = df["cat_daily_avg"].idxmin()
243 | _ = ax_dict["main"].annotate(
244 | f"{df['cat_daily_avg'].loc[lightest_idx].round(2)} kg",
245 | # where the arrow should end up
246 | xy=(
247 | df["datestamp"].iloc[lightest_idx],
248 | df["cat_daily_avg"].iloc[lightest_idx],
249 | ),
250 | # where the text should be
251 | xytext=(
252 | df["datestamp"].iloc[lightest_idx - 5],
253 | df["cat_daily_avg"].iloc[lightest_idx],
254 | ),
255 | ha="center",
256 | va="bottom",
257 | arrowprops={
258 | "arrowstyle": "->",
259 | "connectionstyle": "arc3,rad=0.35",
260 | "color": color.PINK_COLOUR,
261 | },
262 | )
263 |
264 | ax_dict["main"].grid(linewidth=0.2, which="major", axis="y")
265 |
266 | # Put cat picture in top left
267 | img_path = Path(__file__).parent / "data" / "cat_looking_to_side.jpeg"
268 | cat_img = Image.open(img_path)
269 | ax_dict["side"].imshow(cat_img, zorder=10)
270 | ax_dict["side"].axis("off")
271 |
272 | # Remove axis from particular layouts
273 | for section in {
274 | x
275 | for lst in LAYOUT
276 | for x in lst
277 | if x
278 | not in [
279 | "main",
280 | ]
281 | }:
282 | ax_dict[section].axis("off")
283 |
284 | # Add Title
285 | data_from = dt.datetime(
286 | df["datestamp"].min().year,
287 | df["datestamp"].min().month,
288 | df["datestamp"].min().day,
289 | tzinfo=dt.UTC,
290 | ).strftime("%Y-%m-%d")
291 | data_to = dt.datetime(
292 | df["datestamp"].max().year,
293 | df["datestamp"].max().month,
294 | df["datestamp"].max().day,
295 | tzinfo=dt.UTC,
296 | ).strftime("%Y-%m-%d")
297 |
298 | diff = 0.3
299 | title_x = 0.1
300 | title_x = 0.0
301 | ax_dict["title"].text(
302 | s="Cat Weight",
303 | x=title_x,
304 | y=0.5,
305 | fontsize=FONTSIZE_TITLE,
306 | horizontalalignment="left",
307 | verticalalignment="bottom",
308 | )
309 |
310 | ax_dict["title"].text(
311 | s=f"{data_from} -> {data_to}",
312 | x=title_x,
313 | y=0.5 - diff,
314 | fontsize=FONTSIZE_SUBTITLE,
315 | horizontalalignment="left",
316 | verticalalignment="bottom",
317 | color=COLOR_SUBTITLE_TEXT,
318 | )
319 |
320 | for axis in {x for lst in LAYOUT for x in lst}:
321 | ax_dict[axis].set_facecolor(metadata.color.BACKGROUND_COLOUR)
322 |
323 | fig.set_tight_layout(True) # type: ignore[attr-defined]
324 | fig.patch.set_facecolor(metadata.color.BACKGROUND_COLOUR)
325 |
326 | return fig
327 |
328 |
329 | if __name__ == "__main__":
330 | dvc_entry.add_to_dvc(path=pathlib.Path(__file__))
331 | save_plot_output.save_plot(fig=main(), file=__file__)
332 | raise SystemExit
333 |
--------------------------------------------------------------------------------
/plotting_examples/y2024/metadata.py:
--------------------------------------------------------------------------------
1 | """
2 | Metadata for plotting.
3 |
4 | I probably could / should use an rc params file for some of this stuff instead of
5 | calling from here.
6 | """
7 |
8 | from __future__ import annotations
9 |
10 | from dataclasses import dataclass
11 |
12 |
13 | @dataclass
14 | class Colors:
15 |
16 | """
17 | Colors.
18 |
19 | https://mycolor.space/?hex=%23FF69B4&sub=1
20 | """
21 |
22 | PINK_COLOUR = "#ff69b4"
23 | BACKGROUND_COLOUR = "#f2f2f2"
24 | GREY = "#919191"
25 | BLUE = "#007FCB"
26 | LIGHT_GREEN = "#B4EDD2"
27 | DEEPER_GREEN = "#51B9BE"
28 | BROWNY_RED = "#554149"
29 | PURPLEY = "#8F6E9B"
30 | TAN = "#DDD7C6"
31 | BLACK = "#000000"
32 |
33 |
34 | color = Colors()
35 |
36 | dir_year = "y2024"
37 | year = 2024
38 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.poetry]
2 | name = "plotting-examples"
3 | version = "0.1.0"
4 | description = "Example plots"
5 | authors = ["George Lenton "]
6 | readme = "README.md"
7 | packages = [{ include = "plotting_examples" }]
8 |
9 | [tool.poetry.dependencies]
10 | python = "^3.12"
11 | dvc = "^3.0.0"
12 | fastparquet = "^2024.2.0"
13 | matplotlib = "^3.7.1"
14 | pyarrow = "^15.0.0"
15 | seaborn = "^0.13.0"
16 | geopandas = "^0.14.3"
17 | pandas = "^2.0.1"
18 | openpyxl = "^3.1.2"
19 |
20 | [tool.poetry.group.dev.dependencies]
21 | pre-commit = "^3.3.1"
22 | ruff = "^0.3.2"
23 | mypy = "^1.3.0"
24 | jupyterlab = "^4.1.0"
25 | pdbpp = "^0.10.3"
26 |
27 |
28 | [build-system]
29 | requires = ["poetry-core"]
30 | build-backend = "poetry.core.masonry.api"
31 |
32 |
33 | [tool.ruff]
34 | line-length = 88
35 |
36 | [tool.ruff.lint]
37 | select = ["ALL"]
38 | ignore = [
39 | "ANN101", # Type annotation for 'self'
40 | "COM812", # trailing comma - conflicted
41 | "ISC001",
42 | "D211", # `one-blank-line-before-class`.
43 | "D212", # warning: `multi-line-summary-first-line`
44 | "D401", # First line of docstring should be in imperative mood: "Main."
45 | "FIX002", # Line contains TODO, consider resolving the issue
46 | "PD015", # Use `.merge` method instead of `pd.merge` function. They have equivalent functionality.
47 | "PD901", # Avoid using the generic variable name `df` for DataFrames
48 | "TD002", # Missing author in TODO; try: `# TODO(): ...` or `# TODO @: ...`
49 | "TD003", # Missing issue link on the line following this TODO
50 | ]
51 | fixable = ["ALL"]
52 | unfixable = []
53 | # Allow unused variables when underscore-prefixed.
54 | dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
55 |
56 |
57 | [tool.ruff.lint.per-file-ignores]
58 | "__init__.py" = ["F401"]
59 | "plotting_examples/y2022/scatter_distributions/plot.py" = [
60 | "PLR0913", # Too many arguments in function definition (6 > 5)
61 | "S101", # Use of `assert` detected
62 | ]
63 |
64 | "plotting_examples/y2022/opinium_barchart/plot.py" = [
65 | "PLR0915", # Too many statements (54 > 50)
66 | ]
67 |
68 | "plotting_examples/y2022/meaningless_points/plot.py" = [
69 | "PLR0913", # Too many arguments in function definition (7 > 5)
70 | ]
71 |
72 |
73 | "plotting_examples/y2022/histogram_with_two_variables/plot.py" = [
74 | "PLR0913", # Too many arguments in function definition (7 > 5)
75 | "FBT001", # Boolean-typed positional argument in function definition
76 | ]
77 |
78 | "plotting_examples/y2022/box_plot_w_scatter_distributions/plot.py" = [
79 | "PLR0913", # Too many arguments in function definition (10 > 5)
80 | ]
81 |
82 |
83 | [tool.ruff.lint.flake8-type-checking]
84 | # Don't want to have things move to TYPE_CHECKING if needed by pydantic.
85 | runtime-evaluated-base-classes = ["pydantic.BaseModel"]
86 |
87 | [tool.ruff.lint.isort]
88 | section-order = [
89 | "future",
90 | "standard-library",
91 | "third-party",
92 | "first-party",
93 | "local-folder",
94 | ]
95 | case-sensitive = true
96 | combine-as-imports = true
97 |
98 |
99 | # warning: The top-level linter settings are deprecated in favour of their counterparts in the `lint` section. Please update the following options in `pyproject.toml`:
100 | # - 'flake8-type-checking' -> 'lint.flake8-type-checking'
101 | # - 'isort' -> 'lint.isort'
102 | # - 'per-file-ignores' -> 'lint.per-file-ignores'
103 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | disable-noqa = True
3 | max-line-length = 100
4 | extend-ignore =
5 | E203, # whitespace before : is not PEP8 compliant (& conflicts with black)
6 |
7 | DAR003, # Incorrect indentation: ~<
8 | DAR102, # Excess parameter(s) in Docstring: + words_freq[
9 | DAR201, # Missing "Returns" in Docstring: - return
10 | DAR202, # Excess "Returns" in Docstring: + return
11 |
12 | per-file-ignores =
13 | **/__init__.py:
14 | # Missing docstring in public package
15 | D104,
16 |
17 | [flake8_nb]
18 | disable-noqa = True
19 | max-line-length = 100
20 | extend-ignore =
21 | E203, # whitespace before : is not PEP8 compliant (& conflicts with black)
22 |
23 | D100, # Missing docstring in public module
24 | E402, # module level import not at top of file
25 |
26 | D103, # Missing docstring in public function
27 | D104, # Missing docstring in public package
28 | D400, # First line should end with a period
29 | D403, # First word of the first line should be properly capitalized
30 | DAR003, # Incorrect indentation: ~<
31 | DAR102, # Excess parameter(s) in Docstring: + words_freq[
32 | DAR201, # Missing "Returns" in Docstring: - return
33 | DAR202, # Excess "Returns" in Docstring: + return
34 | E231, # missing whitespace after ','
35 | F401, # '...' imported but unused
36 | F811, # redefinition of unused '..' from line 7
37 | F821, # undefined name '...'
38 |
39 | [darglint]
40 | strictness=long
41 |
--------------------------------------------------------------------------------
/work.sh:
--------------------------------------------------------------------------------
1 | # simple script to run a few things when working on stuff.
2 | poetry run dvc repro dvc.yaml
3 | poetry run mypy --strict .
4 | poetry run pre-commit run --all-files
5 | poetry run python -m generate_readme
6 |
7 |
8 |
9 |
10 | # add changes to dvc.lock if there are any
11 | git diff --name-only HEAD -- dvc.lock && git add dvc.lock && git commit -m 'update dvc.lock'
12 | # automatically add changes to image files
13 | git diff --name-only --diff-filter=dM HEAD | egrep '.*images.*\.png$' | xargs -r git add && git commit -m 'updated generated image'
14 |
--------------------------------------------------------------------------------