├── .gitignore
├── LICENSE
├── example
├── .png
├── example.ipynb
├── example_barplot_hue.png
├── example_bucketed.png
├── example_custom_text_annot.png
├── example_hue_layout.png
├── example_hue_pairs.png
├── example_non-hue_inside.png
├── example_non-hue_outside.png
├── example_tuning_y_offsets.png
├── example_tuning_y_offsets.svg
├── example_tuning_y_offsets_w_arguments.png
└── example_tuning_y_offsets_w_arguments.svg
├── readme.md
├── requirements.txt
├── setup.py
├── statannot
├── StatResult.py
├── __init__.py
├── _version.py
├── statannot.py
└── utils.py
└── tests
└── test_statannot.py
/.gitignore:
--------------------------------------------------------------------------------
1 | .ipynb_checkpoints
2 | __pycache__
3 |
4 | statannot.egg-info/dependency_links.txt
5 | statannot.egg-info/PKG-INFO
6 | statannot.egg-info/requires.txt
7 | statannot.egg-info/SOURCES.txt
8 | statannot.egg-info/top_level.txt
9 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Marc Weber
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/example/.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/.png
--------------------------------------------------------------------------------
/example/example_barplot_hue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_barplot_hue.png
--------------------------------------------------------------------------------
/example/example_bucketed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_bucketed.png
--------------------------------------------------------------------------------
/example/example_custom_text_annot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_custom_text_annot.png
--------------------------------------------------------------------------------
/example/example_hue_layout.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_hue_layout.png
--------------------------------------------------------------------------------
/example/example_hue_pairs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_hue_pairs.png
--------------------------------------------------------------------------------
/example/example_non-hue_inside.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_non-hue_inside.png
--------------------------------------------------------------------------------
/example/example_non-hue_outside.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_non-hue_outside.png
--------------------------------------------------------------------------------
/example/example_tuning_y_offsets.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_tuning_y_offsets.png
--------------------------------------------------------------------------------
/example/example_tuning_y_offsets.svg:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
1434 |
--------------------------------------------------------------------------------
/example/example_tuning_y_offsets_w_arguments.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/webermarcolivier/statannot/8de40596e6669f4c81e791426505c1e5c8c5f1fe/example/example_tuning_y_offsets_w_arguments.png
--------------------------------------------------------------------------------
/example/example_tuning_y_offsets_w_arguments.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
2507 |
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # Disclaimer
2 |
3 | **This repository is not maintained anymore**. I recommend using the forked package [trevismd/statannotations](https://github.com/trevismd/statannotations), which has a cleaner API and further extends the functionalities of `statannot`.
4 |
5 | ## What is it
6 |
7 | Python package to optionnally compute statistical test and add statistical annotations on an existing boxplot/barplot generated by seaborn.
8 |
9 | ## Features
10 |
11 | - Single function to add statistical annotations on an existing boxplot/barplot generated by seaborn boxplot.
12 | - Integrated statistical tests (binding to `scipy.stats` methods):
13 | - Mann-Whitney
14 | - t-test (independent and paired)
15 | - Welch's t-test
16 | - Levene test
17 | - Wilcoxon test
18 | - Kruskal-Wallis test
19 | - Smart layout of multiple annotations with correct y offsets.
20 | - Annotations can be located inside or outside the plot.
21 | - Format of the statistical test annotation can be customized: star annotation, simplified p-value, or explicit p-value.
22 | - Optionally, custom p-values can be given as input. In this case, no statistical test is performed.
23 |
24 | ## Installation
25 |
26 | The latest stable release can be installed from PyPI:
27 |
28 | ```python
29 | pip install statannot
30 | ```
31 | You may instead want to use the development version from Github:
32 |
33 | ```python
34 | pip install git+https://github.com/webermarcolivier/statannot.git
35 | ```
36 |
37 | ## Documentation
38 |
39 | See example jupyter notebook `example/example.ipynb`.
40 |
41 | ## Usage
42 |
43 | Here is a minimal example:
44 |
45 | ```python
46 | import seaborn as sns
47 | from statannot import add_stat_annotation
48 |
49 | df = sns.load_dataset("tips")
50 | x = "day"
51 | y = "total_bill"
52 | order = ['Sun', 'Thur', 'Fri', 'Sat']
53 | ax = sns.boxplot(data=df, x=x, y=y, order=order)
54 | test_results = add_stat_annotation(ax, data=df, x=x, y=y, order=order,
55 | box_pairs=[("Thur", "Fri"), ("Thur", "Sat"), ("Fri", "Sun")],
56 | test='Mann-Whitney', text_format='star',
57 | loc='outside', verbose=2)
58 | test_results
59 | ```
60 |
61 | More examples are available in the jupyter notebook `example/example.ipynb`.
62 |
63 |
64 | ## Examples
65 |
66 | 
67 |
68 | 
69 |
70 | ## Requirements
71 |
72 | + Python >= 3.5
73 | + numpy >= 1.12.1
74 | + seaborn >= 0.8.1
75 | + matplotlib >= 2.2.2
76 | + pandas >= 0.23.0
77 | + scipy >= 1.1.0
78 |
79 | ## Cite
80 |
81 | A Digital Object Identifiers (DOIs) was created for the repository for easier referencing in the academic literature.
82 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | seaborn>=0.9.0
3 | matplotlib
4 | pandas
5 | scipy>=1.1.0
6 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from setuptools import find_packages
3 | import re
4 |
5 | with open("readme.md", "r") as f:
6 | long_description = f.read()
7 |
8 | VERSIONFILE = "statannot/_version.py"
9 | verstrline = open(VERSIONFILE, "rt").read()
10 | match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", verstrline, re.M)
11 | if match:
12 | version = match.group(1)
13 | else:
14 | raise RuntimeError("Unable to find version string in %s." % (VERSIONFILE,))
15 |
16 | setup(
17 | name="statannot",
18 | version=version,
19 | author="Marc Weber",
20 | author_email="webermarcolivier@gmail.com",
21 | description="add statistical annotations on an existing boxplot/barplot generated by seaborn.",
22 | long_description=long_description,
23 | long_description_content_type="text/markdown",
24 | url="https://github.com/webermarcolivier/statannot",
25 | packages=find_packages(),
26 | classifiers=[
27 | "Programming Language :: Python :: 3",
28 | "License :: OSI Approved :: MIT License",
29 | "Operating System :: OS Independent",
30 | ],
31 | # install_requires=open("requirements.txt").readlines(),
32 | python_requires='>=3.5',
33 | )
34 |
--------------------------------------------------------------------------------
/statannot/StatResult.py:
--------------------------------------------------------------------------------
1 | class StatResult:
2 | def __init__(self, test_str, test_short_name, stat_str, stat, pval):
3 | self.test_str = test_str
4 | self.test_short_name = test_short_name
5 | self.stat_str = stat_str
6 | self.stat = stat
7 | self.pval = pval
8 |
9 | @property
10 | def formatted_output(self):
11 | if self.stat_str is None and self.stat is None:
12 | stat_summary = '{}, P_val:{:.3e}'.format(self.test_str, self.pval)
13 | else:
14 | stat_summary = '{}, P_val={:.3e} {}={:.3e}'.format(
15 | self.test_str, self.pval, self.stat_str, self.stat
16 | )
17 | return stat_summary
18 |
19 | def __str__(self):
20 | return self.formatted_output
21 |
22 |
23 |
--------------------------------------------------------------------------------
/statannot/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['statannot']
2 |
3 | from .statannot import add_stat_annotation
4 | from .statannot import stat_test
5 | from ._version import __version__
6 |
--------------------------------------------------------------------------------
/statannot/_version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.3"
2 |
--------------------------------------------------------------------------------
/statannot/statannot.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | import matplotlib.pyplot as plt
4 | from matplotlib import lines
5 | import matplotlib.transforms as mtransforms
6 | from matplotlib.font_manager import FontProperties
7 | import numpy as np
8 | import pandas as pd
9 | import seaborn as sns
10 | from seaborn.utils import remove_na
11 |
12 | from .utils import raise_expected_got, assert_is_in
13 | from .StatResult import StatResult
14 |
15 | from scipy import stats
16 |
17 | DEFAULT = object()
18 |
19 |
20 | def stat_test(
21 | box_data1,
22 | box_data2,
23 | test,
24 | comparisons_correction=None,
25 | num_comparisons=1,
26 | **stats_params
27 | ):
28 | """Get formatted result of two sample statistical test.
29 |
30 | Arguments
31 | ---------
32 | bbox_data1, bbox_data2
33 | test: str
34 | Statistical test to run. Must be one of:
35 | - `Levene`
36 | - `Mann-Whitney`
37 | - `Mann-Whitney-gt`
38 | - `Mann-Whitney-ls`
39 | - `t-test_ind`
40 | - `t-test_welch`
41 | - `t-test_paired`
42 | - `Wilcoxon`
43 | - `Kruskal`
44 | comparisons_correction: str or None, default None
45 | Method to use for multiple comparisons correction. Currently only the
46 | Bonferroni correction is implemented.
47 | num_comparisons: int, default 1
48 | Number of comparisons to use for multiple comparisons correction.
49 | stats_params
50 | Additional keyword arguments to pass to scipy stats functions.
51 |
52 | Returns
53 | -------
54 | StatResult object with formatted result of test.
55 |
56 | """
57 | # Check arguments.
58 | assert_is_in(
59 | comparisons_correction,
60 | ['bonferroni', None],
61 | label='argument `comparisons_correction`',
62 | )
63 |
64 | # Switch to run scipy.stats hypothesis test.
65 | if test == 'Levene':
66 | stat, pval = stats.levene(box_data1, box_data2, **stats_params)
67 | result = StatResult(
68 | 'Levene test of variance', 'levene', 'stat', stat, pval
69 | )
70 | elif test == 'Mann-Whitney':
71 | u_stat, pval = stats.mannwhitneyu(
72 | box_data1, box_data2, alternative='two-sided', **stats_params
73 | )
74 | result = StatResult(
75 | 'Mann-Whitney-Wilcoxon test two-sided',
76 | 'M.W.W.',
77 | 'U_stat',
78 | u_stat,
79 | pval,
80 | )
81 | elif test == 'Mann-Whitney-gt':
82 | u_stat, pval = stats.mannwhitneyu(
83 | box_data1, box_data2, alternative='greater', **stats_params
84 | )
85 | result = StatResult(
86 | 'Mann-Whitney-Wilcoxon test greater',
87 | 'M.W.W.',
88 | 'U_stat',
89 | u_stat,
90 | pval,
91 | )
92 | elif test == 'Mann-Whitney-ls':
93 | u_stat, pval = stats.mannwhitneyu(
94 | box_data1, box_data2, alternative='less', **stats_params
95 | )
96 | result = StatResult(
97 | 'Mann-Whitney-Wilcoxon test smaller',
98 | 'M.W.W.',
99 | 'U_stat',
100 | u_stat,
101 | pval,
102 | )
103 | elif test == 't-test_ind':
104 | stat, pval = stats.ttest_ind(a=box_data1, b=box_data2, **stats_params)
105 | result = StatResult(
106 | 't-test independent samples', 't-test_ind', 'stat', stat, pval
107 | )
108 | elif test == 't-test_welch':
109 | stat, pval = stats.ttest_ind(
110 | a=box_data1, b=box_data2, equal_var=False, **stats_params
111 | )
112 | result = StatResult(
113 | 'Welch\'s t-test independent samples',
114 | 't-test_welch',
115 | 'stat',
116 | stat,
117 | pval,
118 | )
119 | elif test == 't-test_paired':
120 | stat, pval = stats.ttest_rel(a=box_data1, b=box_data2, **stats_params)
121 | result = StatResult(
122 | 't-test paired samples', 't-test_rel', 'stat', stat, pval
123 | )
124 | elif test == 'Wilcoxon':
125 | zero_method_default = len(box_data1) <= 20 and "pratt" or "wilcox"
126 | zero_method = stats_params.get('zero_method', zero_method_default)
127 | print("Using zero_method ", zero_method)
128 | stat, pval = stats.wilcoxon(
129 | box_data1, box_data2, zero_method=zero_method, **stats_params
130 | )
131 | result = StatResult(
132 | 'Wilcoxon test (paired samples)', 'Wilcoxon', 'stat', stat, pval
133 | )
134 | elif test == 'Kruskal':
135 | stat, pval = stats.kruskal(box_data1, box_data2, **stats_params)
136 | test_short_name = 'Kruskal'
137 | result = StatResult(
138 | 'Kruskal-Wallis paired samples', 'Kruskal', 'stat', stat, pval
139 | )
140 | else:
141 | result = StatResult(None, '', None, None, np.nan)
142 |
143 | # Optionally, run multiple comparisons correction.
144 | if comparisons_correction == 'bonferroni':
145 | result.pval = bonferroni(result.pval, num_comparisons)
146 | result.test_str = result.test_str + ' with Bonferroni correction'
147 | elif comparisons_correction is None:
148 | pass
149 | else:
150 | # This should never be reached because `comparisons_correction` must
151 | # be a valid correction method or None.
152 | raise RuntimeError('Unexpectedly reached end of switch.')
153 |
154 | return result
155 |
156 |
157 | def bonferroni(p_values, num_comparisons='auto'):
158 | """Apply Bonferroni correction for multiple comparisons.
159 |
160 | The Bonferroni correction is defined as
161 | p_corrected = min(num_comparisons * p, 1.0).
162 |
163 | Arguments
164 | ---------
165 | p_values: scalar or list-like
166 | One or more p_values to correct.
167 | num_comparisons: int or `auto`
168 | Number of comparisons. Use `auto` to infer the number of comparisons
169 | from the length of the `p_values` list.
170 |
171 | Returns
172 | -------
173 | Scalar or numpy array of corrected p-values.
174 |
175 | """
176 | # Input checks.
177 | if np.ndim(p_values) > 1:
178 | raise_expected_got(
179 | 'Scalar or list-like', 'argument `p_values`', p_values
180 | )
181 | if num_comparisons != 'auto':
182 | try:
183 | # Raise a TypeError if num_comparisons is not numeric, and raise
184 | # an AssertionError if it isn't int-like.
185 | assert np.ceil(num_comparisons) == num_comparisons
186 | except (AssertionError, TypeError) as e:
187 | raise_expected_got(
188 | 'Int or `auto`', 'argument `num_comparisons`', num_comparisons
189 | )
190 |
191 | # Coerce p_values to numpy array.
192 | p_values_array = np.atleast_1d(p_values)
193 |
194 | if num_comparisons == 'auto':
195 | # Infer number of comparisons
196 | num_comparisons = len(p_values_array)
197 | elif len(p_values_array) > 1 and num_comparisons != len(p_values_array):
198 | # Warn if multiple p_values have been passed and num_comparisons is
199 | # set manually.
200 | warnings.warn(
201 | 'Manually-specified `num_comparisons={}` differs from number of '
202 | 'p_values to correct ({}).'.format(
203 | num_comparisons, len(p_values_array)
204 | )
205 | )
206 |
207 | # Apply correction by multiplying p_values and thresholding at p=1.0
208 | p_values_array *= num_comparisons
209 | p_values_array = np.min(
210 | [p_values_array, np.ones_like(p_values_array)], axis=0
211 | )
212 |
213 | if len(p_values_array) == 1:
214 | # Return a scalar if input was a scalar.
215 | return p_values_array[0]
216 | else:
217 | return p_values_array
218 |
219 |
220 |
221 | def pval_annotation_text(x, pvalue_thresholds):
222 | single_value = False
223 | if type(x) is np.array:
224 | x1 = x
225 | else:
226 | x1 = np.array([x])
227 | single_value = True
228 | # Sort the threshold array
229 | pvalue_thresholds = pd.DataFrame(pvalue_thresholds).sort_values(by=0, ascending=False).values
230 | x_annot = pd.Series(["" for _ in range(len(x1))])
231 | for i in range(0, len(pvalue_thresholds)):
232 | if i < len(pvalue_thresholds)-1:
233 | condition = (x1 <= pvalue_thresholds[i][0]) & (pvalue_thresholds[i+1][0] < x1)
234 | x_annot[condition] = pvalue_thresholds[i][1]
235 | else:
236 | condition = x1 < pvalue_thresholds[i][0]
237 | x_annot[condition] = pvalue_thresholds[i][1]
238 |
239 | return x_annot if not single_value else x_annot.iloc[0]
240 |
241 |
242 | def simple_text(pval, pvalue_format, pvalue_thresholds, test_short_name=None):
243 | """
244 | Generates simple text for test name and pvalue
245 | :param pval: pvalue
246 | :param pvalue_format: format string for pvalue
247 | :param test_short_name: Short name of test to show
248 | :param pvalue_thresholds: String to display per pvalue range
249 | :return: simple annotation
250 | """
251 | # Sort thresholds
252 | thresholds = sorted(pvalue_thresholds, key=lambda x: x[0])
253 |
254 | # Test name if passed
255 | text = test_short_name and test_short_name + " " or ""
256 |
257 | for threshold in thresholds:
258 | if pval < threshold[0]:
259 | pval_text = "p ≤ {}".format(threshold[1])
260 | break
261 | else:
262 | pval_text = "p = {}".format(pvalue_format).format(pval)
263 |
264 | return text + pval_text
265 |
266 |
267 | def add_stat_annotation(ax, plot='boxplot',
268 | data=None, x=None, y=None, hue=None, units=None, order=None,
269 | hue_order=None, box_pairs=None, width=0.8,
270 | perform_stat_test=True,
271 | pvalues=None, test_short_name=None,
272 | test=None, text_format='star', pvalue_format_string=DEFAULT,
273 | text_annot_custom=None,
274 | loc='inside', show_test_name=True,
275 | pvalue_thresholds=DEFAULT, stats_params=dict(),
276 | comparisons_correction='bonferroni',
277 | use_fixed_offset=False, line_offset_to_box=None,
278 | line_offset=None, line_height=0.02, text_offset=1,
279 | color='0.2', linewidth=1.5,
280 | fontsize='medium', verbose=1):
281 | """
282 | Optionally computes statistical test between pairs of data series, and add statistical annotation on top
283 | of the boxes/bars. The same exact arguments `data`, `x`, `y`, `hue`, `order`, `width`,
284 | `hue_order` (and `units`) as in the seaborn boxplot/barplot function must be passed to this function.
285 |
286 | This function works in one of the two following modes:
287 | a) `perform_stat_test` is True: statistical test as given by argument `test` is performed.
288 | b) `perform_stat_test` is False: no statistical test is performed, list of custom p-values `pvalues` are
289 | used for each pair of boxes. The `test_short_name` argument is then used as the name of the
290 | custom statistical test.
291 |
292 | :param plot: type of the plot, one of 'boxplot' or 'barplot'.
293 | :param line_height: in axes fraction coordinates
294 | :param text_offset: in points
295 | :param box_pairs: can be of either form: For non-grouped boxplot: `[(cat1, cat2), (cat3, cat4)]`. For boxplot grouped by hue: `[((cat1, hue1), (cat2, hue2)), ((cat3, hue3), (cat4, hue4))]`
296 | :param pvalue_format_string: defaults to `"{.3e}"`
297 | :param pvalue_thresholds: list of lists, or tuples. Default is: For "star" text_format: `[[1e-4, "****"], [1e-3, "***"], [1e-2, "**"], [0.05, "*"], [1, "ns"]]`. For "simple" text_format : `[[1e-5, "1e-5"], [1e-4, "1e-4"], [1e-3, "0.001"], [1e-2, "0.01"]]`
298 | :param pvalues: list or array of p-values for each box pair comparison.
299 | :param comparisons_correction: Method for multiple comparisons correction. `bonferroni` or None.
300 | """
301 |
302 | def find_x_position_box(box_plotter, boxName):
303 | """
304 | boxName can be either a name "cat" or a tuple ("cat", "hue")
305 | """
306 | if box_plotter.plot_hues is None:
307 | cat = boxName
308 | hue_offset = 0
309 | else:
310 | cat = boxName[0]
311 | hue = boxName[1]
312 | hue_offset = box_plotter.hue_offsets[
313 | box_plotter.hue_names.index(hue)]
314 |
315 | group_pos = box_plotter.group_names.index(cat)
316 | box_pos = group_pos + hue_offset
317 | return box_pos
318 |
319 | def get_box_data(box_plotter, boxName):
320 | """
321 | boxName can be either a name "cat" or a tuple ("cat", "hue")
322 |
323 | Here we really have to duplicate seaborn code, because there is not
324 | direct access to the box_data in the BoxPlotter class.
325 | """
326 | cat = box_plotter.plot_hues is None and boxName or boxName[0]
327 |
328 | index = box_plotter.group_names.index(cat)
329 | group_data = box_plotter.plot_data[index]
330 |
331 | if box_plotter.plot_hues is None:
332 | # Draw a single box or a set of boxes
333 | # with a single level of grouping
334 | box_data = remove_na(group_data)
335 | else:
336 | hue_level = boxName[1]
337 | hue_mask = box_plotter.plot_hues[index] == hue_level
338 | box_data = remove_na(group_data[hue_mask])
339 |
340 | return box_data
341 |
342 | # Set default values if necessary
343 | if pvalue_format_string is DEFAULT:
344 | pvalue_format_string = '{:.3e}'
345 | simple_format_string = '{:.2f}'
346 | else:
347 | simple_format_string = pvalue_format_string
348 |
349 | if pvalue_thresholds is DEFAULT:
350 | if text_format == "star":
351 | pvalue_thresholds = [[1e-4, "****"], [1e-3, "***"],
352 | [1e-2, "**"], [0.05, "*"], [1, "ns"]]
353 | else:
354 | pvalue_thresholds = [[1e-5, "1e-5"], [1e-4, "1e-4"],
355 | [1e-3, "0.001"], [1e-2, "0.01"]]
356 |
357 | fig = plt.gcf()
358 |
359 | # Validate arguments
360 | if perform_stat_test:
361 | if test is None:
362 | raise ValueError("If `perform_stat_test` is True, `test` must be specified.")
363 | if pvalues is not None or test_short_name is not None:
364 | raise ValueError("If `perform_stat_test` is True, custom `pvalues` "
365 | "or `test_short_name` must be `None`.")
366 | valid_list = ['t-test_ind', 't-test_welch', 't-test_paired',
367 | 'Mann-Whitney', 'Mann-Whitney-gt', 'Mann-Whitney-ls',
368 | 'Levene', 'Wilcoxon', 'Kruskal']
369 | if test not in valid_list:
370 | raise ValueError("test value should be one of the following: {}."
371 | .format(', '.join(valid_list)))
372 | else:
373 | if pvalues is None:
374 | raise ValueError("If `perform_stat_test` is False, custom `pvalues` must be specified.")
375 | if test is not None:
376 | raise ValueError("If `perform_stat_test` is False, `test` must be None.")
377 | if len(pvalues) != len(box_pairs):
378 | raise ValueError("`pvalues` should be of the same length as `box_pairs`.")
379 |
380 | if text_annot_custom is not None and len(text_annot_custom) != len(box_pairs):
381 | raise ValueError("`text_annot_custom` should be of same length as `box_pairs`.")
382 |
383 | assert_is_in(
384 | loc, ['inside', 'outside'], label='argument `loc`'
385 | )
386 | assert_is_in(
387 | text_format,
388 | ['full', 'simple', 'star'],
389 | label='argument `text_format`'
390 | )
391 | assert_is_in(
392 | comparisons_correction,
393 | ['bonferroni', None],
394 | label='argument `comparisons_correction`'
395 | )
396 |
397 | if verbose >= 1 and text_format == 'star':
398 | print("p-value annotation legend:")
399 | pvalue_thresholds = pd.DataFrame(pvalue_thresholds).sort_values(by=0, ascending=False).values
400 | for i in range(0, len(pvalue_thresholds)):
401 | if i < len(pvalue_thresholds)-1:
402 | print('{}: {:.2e} < p <= {:.2e}'.format(pvalue_thresholds[i][1],
403 | pvalue_thresholds[i+1][0],
404 | pvalue_thresholds[i][0]))
405 | else:
406 | print('{}: p <= {:.2e}'.format(pvalue_thresholds[i][1], pvalue_thresholds[i][0]))
407 | print()
408 |
409 | ylim = ax.get_ylim()
410 | yrange = ylim[1] - ylim[0]
411 |
412 | if line_offset is None:
413 | if loc == 'inside':
414 | line_offset = 0.05
415 | if line_offset_to_box is None:
416 | line_offset_to_box = 0.06
417 | # 'outside', see valid_list
418 | else:
419 | line_offset = 0.03
420 | if line_offset_to_box is None:
421 | line_offset_to_box = line_offset
422 | else:
423 | if loc == 'inside':
424 | if line_offset_to_box is None:
425 | line_offset_to_box = 0.06
426 | elif loc == 'outside':
427 | line_offset_to_box = line_offset
428 | y_offset = line_offset*yrange
429 | y_offset_to_box = line_offset_to_box*yrange
430 |
431 | if plot == 'boxplot':
432 | # Create the same plotter object as seaborn's boxplot
433 | box_plotter = sns.categorical._BoxPlotter(
434 | x, y, hue, data, order, hue_order, orient=None, width=width, color=None,
435 | palette=None, saturation=.75, dodge=True, fliersize=5, linewidth=None)
436 | elif plot == 'barplot':
437 | # Create the same plotter object as seaborn's barplot
438 | box_plotter = sns.categorical._BarPlotter(
439 | x, y, hue, data, order, hue_order,
440 | estimator=np.mean, ci=95, n_boot=1000, units=None,
441 | orient=None, color=None, palette=None, saturation=.75,
442 | errcolor=".26", errwidth=None, capsize=None, dodge=True)
443 |
444 | # Build the list of box data structures with the x and ymax positions
445 | group_names = box_plotter.group_names
446 | hue_names = box_plotter.hue_names
447 | if box_plotter.plot_hues is None:
448 | box_names = group_names
449 | labels = box_names
450 | else:
451 | box_names = [(group_name, hue_name) for group_name in group_names for hue_name in hue_names]
452 | labels = ['{}_{}'.format(group_name, hue_name) for (group_name, hue_name) in box_names]
453 |
454 | box_structs = [{'box':box_names[i],
455 | 'label':labels[i],
456 | 'x':find_x_position_box(box_plotter, box_names[i]),
457 | 'box_data':get_box_data(box_plotter, box_names[i]),
458 | 'ymax':np.amax(get_box_data(box_plotter, box_names[i])) if
459 | len(get_box_data(box_plotter, box_names[i])) > 0 else np.nan}
460 | for i in range(len(box_names))]
461 | # Sort the box data structures by position along the x axis
462 | box_structs = sorted(box_structs, key=lambda x: x['x'])
463 | # Add the index position in the list of boxes along the x axis
464 | box_structs = [dict(box_struct, xi=i) for i, box_struct in enumerate(box_structs)]
465 | # Same data structure list with access key by box name
466 | box_structs_dic = {box_struct['box']:box_struct for box_struct in box_structs}
467 |
468 | # Build the list of box data structure pairs
469 | box_struct_pairs = []
470 | for i_box_pair, (box1, box2) in enumerate(box_pairs):
471 | valid = box1 in box_names and box2 in box_names
472 | if not valid:
473 | raise ValueError("box_pairs contains an invalid box pair.")
474 | pass
475 | # i_box_pair will keep track of the original order of the box pairs.
476 | box_struct1 = dict(box_structs_dic[box1], i_box_pair=i_box_pair)
477 | box_struct2 = dict(box_structs_dic[box2], i_box_pair=i_box_pair)
478 | if box_struct1['x'] <= box_struct2['x']:
479 | pair = (box_struct1, box_struct2)
480 | else:
481 | pair = (box_struct2, box_struct1)
482 | box_struct_pairs.append(pair)
483 |
484 | # Draw first the annotations with the shortest between-boxes distance, in order to reduce
485 | # overlapping between annotations.
486 | box_struct_pairs = sorted(box_struct_pairs, key=lambda x: abs(x[1]['x'] - x[0]['x']))
487 |
488 | # Build array that contains the x and y_max position of the highest annotation or box data at
489 | # a given x position, and also keeps track of the number of stacked annotations.
490 | # This array will be updated when a new annotation is drawn.
491 | y_stack_arr = np.array([[box_struct['x'] for box_struct in box_structs],
492 | [box_struct['ymax'] for box_struct in box_structs],
493 | [0 for i in range(len(box_structs))]])
494 | if loc == 'outside':
495 | y_stack_arr[1, :] = ylim[1]
496 | ann_list = []
497 | test_result_list = []
498 | ymaxs = []
499 | y_stack = []
500 |
501 | for box_struct1, box_struct2 in box_struct_pairs:
502 |
503 | box1 = box_struct1['box']
504 | box2 = box_struct2['box']
505 | label1 = box_struct1['label']
506 | label2 = box_struct2['label']
507 | box_data1 = box_struct1['box_data']
508 | box_data2 = box_struct2['box_data']
509 | x1 = box_struct1['x']
510 | x2 = box_struct2['x']
511 | xi1 = box_struct1['xi']
512 | xi2 = box_struct2['xi']
513 | ymax1 = box_struct1['ymax']
514 | ymax2 = box_struct2['ymax']
515 | i_box_pair = box_struct1['i_box_pair']
516 |
517 | # Find y maximum for all the y_stacks *in between* the box1 and the box2
518 | i_ymax_in_range_x1_x2 = xi1 + np.nanargmax(y_stack_arr[1, np.where((x1 <= y_stack_arr[0, :]) &
519 | (y_stack_arr[0, :] <= x2))])
520 | ymax_in_range_x1_x2 = y_stack_arr[1, i_ymax_in_range_x1_x2]
521 |
522 | if perform_stat_test:
523 | result = stat_test(
524 | box_data1,
525 | box_data2,
526 | test,
527 | comparisons_correction,
528 | len(box_struct_pairs),
529 | **stats_params
530 | )
531 | else:
532 | test_short_name = test_short_name if test_short_name is not None else ''
533 | result = StatResult(
534 | 'Custom statistical test',
535 | test_short_name,
536 | None,
537 | None,
538 | pvalues[i_box_pair]
539 | )
540 |
541 | result.box1 = box1
542 | result.box2 = box2
543 | test_result_list.append(result)
544 |
545 | if verbose >= 1:
546 | print("{} v.s. {}: {}".format(label1, label2, result.formatted_output))
547 |
548 | if text_annot_custom is not None:
549 | text = text_annot_custom[i_box_pair]
550 | else:
551 | if text_format == 'full':
552 | text = "{} p = {}".format('{}', pvalue_format_string).format(result.test_short_name, result.pval)
553 | elif text_format is None:
554 | text = None
555 | elif text_format is 'star':
556 | text = pval_annotation_text(result.pval, pvalue_thresholds)
557 | elif text_format is 'simple':
558 | test_short_name = show_test_name and test_short_name or ""
559 | text = simple_text(result.pval, simple_format_string, pvalue_thresholds, test_short_name)
560 |
561 | yref = ymax_in_range_x1_x2
562 | yref2 = yref
563 |
564 | # Choose the best offset depending on wether there is an annotation below
565 | # at the x position in the range [x1, x2] where the stack is the highest
566 | if y_stack_arr[2, i_ymax_in_range_x1_x2] == 0:
567 | # there is only a box below
568 | offset = y_offset_to_box
569 | else:
570 | # there is an annotation below
571 | offset = y_offset
572 | y = yref2 + offset
573 | h = line_height*yrange
574 | line_x, line_y = [x1, x1, x2, x2], [y, y + h, y + h, y]
575 | if loc == 'inside':
576 | ax.plot(line_x, line_y, lw=linewidth, c=color)
577 | elif loc == 'outside':
578 | line = lines.Line2D(line_x, line_y, lw=linewidth, c=color, transform=ax.transData)
579 | line.set_clip_on(False)
580 | ax.add_line(line)
581 |
582 | # why should we change here the ylim if at the very end we set it to the correct range????
583 | # ax.set_ylim((ylim[0], 1.1*(y + h)))
584 |
585 | if text is not None:
586 | ann = ax.annotate(
587 | text, xy=(np.mean([x1, x2]), y + h),
588 | xytext=(0, text_offset), textcoords='offset points',
589 | xycoords='data', ha='center', va='bottom',
590 | fontsize=fontsize, clip_on=False, annotation_clip=False)
591 | ann_list.append(ann)
592 |
593 | plt.draw()
594 | y_top_annot = None
595 | got_mpl_error = False
596 | if not use_fixed_offset:
597 | try:
598 | bbox = ann.get_window_extent()
599 | bbox_data = bbox.transformed(ax.transData.inverted())
600 | y_top_annot = bbox_data.ymax
601 | except RuntimeError:
602 | got_mpl_error = True
603 |
604 | if use_fixed_offset or got_mpl_error:
605 | if verbose >= 1:
606 | print("Warning: cannot get the text bounding box. Falling back to a fixed"
607 | " y offset. Layout may be not optimal.")
608 | # We will apply a fixed offset in points,
609 | # based on the font size of the annotation.
610 | fontsize_points = FontProperties(size='medium').get_size_in_points()
611 | offset_trans = mtransforms.offset_copy(
612 | ax.transData, fig=fig, x=0,
613 | y=1.0*fontsize_points + text_offset, units='points')
614 | y_top_display = offset_trans.transform((0, y + h))
615 | y_top_annot = ax.transData.inverted().transform(y_top_display)[1]
616 | else:
617 | y_top_annot = y + h
618 |
619 | y_stack.append(y_top_annot) # remark: y_stack is not really necessary if we have the stack_array
620 | ymaxs.append(max(y_stack))
621 | # Fill the highest y position of the annotation into the y_stack array
622 | # for all positions in the range x1 to x2
623 | y_stack_arr[1, (x1 <= y_stack_arr[0, :]) & (y_stack_arr[0, :] <= x2)] = y_top_annot
624 | # Increment the counter of annotations in the y_stack array
625 | y_stack_arr[2, xi1:xi2 + 1] = y_stack_arr[2, xi1:xi2 + 1] + 1
626 |
627 | y_stack_max = max(ymaxs)
628 | if loc == 'inside':
629 | ax.set_ylim((ylim[0], max(1.03*y_stack_max, ylim[1])))
630 | elif loc == 'outside':
631 | ax.set_ylim((ylim[0], ylim[1]))
632 |
633 | return ax, test_result_list
634 |
--------------------------------------------------------------------------------
/statannot/utils.py:
--------------------------------------------------------------------------------
1 | def raise_expected_got(expected, for_, got, error_type=ValueError):
2 | """Raise a standardized error message.
3 |
4 | Raise an `error_type` error with the message
5 | Expected `expected` for `for_`; got `got` instead.
6 | Or, if `for_` is `None`,
7 | Expected `expected`; got `got` instead.
8 |
9 | """
10 | if for_ is not None:
11 | raise error_type(
12 | 'Expected {} for {}; got {} instead.'.format(expected, for_, got)
13 | )
14 | else:
15 | raise error_type(
16 | 'Expected {}; got {} instead.'.format(expected, got)
17 | )
18 |
19 |
20 | def assert_is_in(x, valid_values, error_type=ValueError, label=None):
21 | """Raise an error if x is not in valid_values."""
22 | if x not in valid_values:
23 | raise_expected_got('one of {}'.format(valid_values), label, x)
24 |
--------------------------------------------------------------------------------
/tests/test_statannot.py:
--------------------------------------------------------------------------------
1 | import unittest
2 | import warnings
3 |
4 | import numpy.testing as npt
5 |
6 | from statannot import statannot
7 |
8 | class TestBonferroni(unittest.TestCase):
9 | """Test Bonferroni correction function."""
10 | def test_returns_scalar_with_scalar_input(self):
11 | corrected = statannot.bonferroni(0.5)
12 | with self.assertRaisesRegex(TypeError, 'has no len'):
13 | # If `corrected` is a scalar, calling `len` should raise an error.
14 | len(corrected)
15 |
16 | def test_returns_correct_values_with_auto_num_comparisons(self):
17 | raw_p_values = [0.1, 0.05, 0.5]
18 | expected = [0.3, 0.15, 1.0]
19 | observed = statannot.bonferroni(raw_p_values)
20 | npt.assert_allclose(observed, expected)
21 |
22 | def test_returns_correct_values_with_manual_num_comparisons_int(self):
23 | raw_p_values = [0.1, 0.05, 0.5]
24 | expected = [0.5, 0.25, 1.0]
25 | with warnings.catch_warnings():
26 | warnings.simplefilter('ignore')
27 | observed = statannot.bonferroni(raw_p_values, 5)
28 | npt.assert_allclose(observed, expected)
29 |
30 | def test_returns_correct_values_with_manual_num_comparisons_float(self):
31 | raw_p_values = [0.1, 0.05, 0.5]
32 | expected = [0.5, 0.25, 1.0]
33 | with warnings.catch_warnings():
34 | warnings.simplefilter('ignore')
35 | observed = statannot.bonferroni(raw_p_values, 5.0)
36 | npt.assert_allclose(observed, expected)
37 |
38 |
--------------------------------------------------------------------------------