├── tests ├── __init__.py └── test_disarray.py ├── requirements.demo.txt ├── disarray ├── version.py ├── metrics.py └── __init__.py ├── requirements.txt ├── .gitattributes ├── setup.cfg ├── demo └── disarray_demo.gif ├── .github ├── dependabot.yml └── workflows │ ├── pythonpublish.yml │ └── python-unittest.yml ├── CONTRIBUTING.md ├── codecov.yml ├── .gitignore ├── LICENSE ├── setup.py └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.demo.txt: -------------------------------------------------------------------------------- 1 | scikit-learn -------------------------------------------------------------------------------- /disarray/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.0" 2 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pandas>=1.0 2 | numpy>=1.16.5 -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py linguist-language=python 2 | *.ipynb linguist-documentation 3 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [metadata] 5 | description-file=README.md -------------------------------------------------------------------------------- /demo/disarray_demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arvkevi/disarray/HEAD/demo/disarray_demo.gif -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | time: "10:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to disarray 2 | Contributions to disarray are welcome. Some ideas for contributing: 3 | 4 | 1. Add `macro` and/or `weighted` averages. 5 | 2. Add additional metrics. 6 | 3. Improve documentation. 7 | 4. Report bugs. -------------------------------------------------------------------------------- /disarray/metrics.py: -------------------------------------------------------------------------------- 1 | __all_metrics__ = [ 2 | "accuracy", 3 | "f1", 4 | "false_discovery_rate", 5 | "false_negative_rate", 6 | "false_positive_rate", 7 | "negative_predictive_value", 8 | "positive_predictive_value", 9 | "precision", 10 | "recall", 11 | "sensitivity", 12 | "specificity", 13 | "true_negative_rate", 14 | "true_positive_rate", 15 | ] 16 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | codecov: 2 | require_ci_to_pass: yes 3 | 4 | coverage: 5 | precision: 2 6 | round: down 7 | range: "70...100" 8 | 9 | parsers: 10 | gcov: 11 | branch_detection: 12 | conditional: yes 13 | loop: yes 14 | method: no 15 | macro: no 16 | 17 | comment: 18 | layout: "reach,diff,flags,tree" 19 | behavior: default 20 | require_changes: no 21 | 22 | ignore: 23 | - "tests" 24 | 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[cod] 2 | .ipynb_checkpoints 3 | */.ipynb_checkpoints/* 4 | 5 | # C extensions 6 | *.so 7 | 8 | # pycharm 9 | .idea/ 10 | .idea 11 | 12 | # Packages 13 | *.egg 14 | *.egg-info 15 | build 16 | eggs 17 | parts 18 | bin 19 | var 20 | sdist 21 | develop-eggs 22 | .installed.cfg 23 | lib 24 | lib64 25 | dist/ 26 | 27 | #pyenv 28 | .python-version 29 | 30 | # Installer logs 31 | pip-log.txt 32 | 33 | # Unit test / coverage reports 34 | .coverage 35 | .tox 36 | nosetests.xml 37 | 38 | # Complexity 39 | output/*.html 40 | output/*/index.html 41 | 42 | # Sphinx 43 | docs/_build 44 | 45 | # Cookiecutter 46 | output/ 47 | -------------------------------------------------------------------------------- /.github/workflows/pythonpublish.yml: -------------------------------------------------------------------------------- 1 | name: Upload Python Package 2 | 3 | on: 4 | release: 5 | types: [created] 6 | 7 | jobs: 8 | deploy: 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/checkout@v1 12 | - name: Set up Python 13 | uses: actions/setup-python@v1 14 | with: 15 | python-version: '3.x' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install setuptools wheel twine 20 | - name: Build and publish 21 | env: 22 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 23 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 24 | run: | 25 | python setup.py sdist bdist_wheel 26 | twine upload dist/* -------------------------------------------------------------------------------- /.github/workflows/python-unittest.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | on: [push] 3 | jobs: 4 | build: 5 | runs-on: ${{ matrix.os }} 6 | strategy: 7 | matrix: 8 | os: [ubuntu-latest, macos-latest, windows-latest] 9 | python-version: [3.6, 3.7, 3.8] 10 | steps: 11 | - uses: actions/checkout@v2 12 | - name: Set up Python 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: "3.x" 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install --upgrade pip 19 | pip install -r requirements.txt 20 | pip install . 21 | - name: unittest 22 | run: | 23 | python -m unittest tests/test_disarray.py 24 | - name: coverage 25 | run: | 26 | pip install pytest-cov 27 | py.test --cov-report=xml --cov=disarray tests/test_disarray.py 28 | - name: Upload coverage to Codecov 29 | uses: codecov/codecov-action@v1 30 | with: 31 | fail_ci_if_error: true 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Kevin Arvai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | from codecs import open as copen 3 | from os import path 4 | 5 | 6 | here = path.abspath(path.dirname(__file__)) 7 | 8 | # Get the long description from the README file 9 | with copen(path.join(here, 'README.md'), encoding='utf-8') as f: 10 | long_description = f.read() 11 | 12 | # get the dependencies and installs 13 | with copen(path.join(here, 'requirements.txt'), encoding='utf-8') as f: 14 | all_reqs = f.read().split('\n') 15 | 16 | install_requires = [x.strip() for x in all_reqs if 'git+' not in x] 17 | dependency_links = [x.strip().replace('git+', '') 18 | for x in all_reqs if x.startswith('git+')] 19 | 20 | version = {} 21 | with open("disarray/version.py") as fp: 22 | exec(fp.read(), version) 23 | 24 | setup( 25 | name='disarray', 26 | version=version['__version__'], 27 | description='Calculate confusion matrix metrics from your pandas DataFrame', 28 | long_description=long_description, 29 | long_description_content_type='text/markdown', 30 | url='https://github.com/arvkevi/disarray', 31 | download_url='https://github.com/arvkevi/disarray/tarball/' + version['__version__'], 32 | license='MIT', 33 | classifiers=[ 34 | 'Development Status :: 3 - Alpha', 35 | 'Intended Audience :: Science/Research', 36 | 'Topic :: Scientific/Engineering :: Information Analysis', 37 | 'Programming Language :: Python :: 3', 38 | ], 39 | keywords='machine learning-supervised learning', 40 | packages=find_packages(exclude=['docs', 'tests*']), 41 | include_package_data=True, 42 | author='Kevin Arvai', 43 | install_requires=install_requires, 44 | dependency_links=dependency_links, 45 | author_email='arvkevi@gmail.com' 46 | ) 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # disarray 2 | [![Downloads](https://pepy.tech/badge/disarray)](https://pepy.tech/project/disarray) 3 | [![Downloads](https://pepy.tech/badge/disarray/month)](https://pepy.tech/project/disarray/month) 4 | [![Build Status](https://travis-ci.com/arvkevi/disarray.svg?branch=master)](https://travis-ci.com/arvkevi/disarray) 5 | [![codecov](https://codecov.io/gh/arvkevi/disarray/branch/master/graph/badge.svg)](https://codecov.io/gh/arvkevi/disarray) 6 | 7 | `disarray` calculates metrics derived from a confusion matrix and makes them directly accessible from a pandas DataFrame. 8 | 9 | ![disarray demo](demo/disarray_demo.gif) 10 | 11 | If you are already using [`pandas`](https://pandas.pydata.org/), then `disarray` is easy to use, simply import `disarray`: 12 | ```python 13 | import pandas as pd 14 | 15 | # dtype=int is important for Windows users 16 | df = pd.DataFrame([[18, 1], [0, 1]], dtype=int) 17 | 18 | import disarray 19 | 20 | df.da.sensitivity 21 | 0 0.947368 22 | 1 1.000000 23 | dtype: float64 24 | ``` 25 | 26 | ## Table of contents 27 | - [Installation](#installation) 28 | - [Usage](#usage) 29 | * [binary classification](#binary-classification) 30 | * [class counts](#class-counts) 31 | * [export metrics](#export-metrics) 32 | * [multi-class classification](#multi-class-classification) 33 | * [supported metrics](#supported-metrics) 34 | - [Why disarray](#why-disarray?) 35 | - [Contributing](#contributing) 36 | 37 | ## Installation 38 | **Install using pip** 39 | ```bash 40 | $ pip install disarray 41 | ``` 42 | 43 | **Clone from GitHub** 44 | ```bash 45 | $ git clone https://github.com/arvkevi/disarray.git 46 | $ python setup.py install 47 | ``` 48 | 49 | ## Usage 50 | The `disarray` package is intended to be used similar to a `pandas` attribute or method. `disarray` is registered as 51 | a `pandas` extension under `da`. For a DataFrame named `df`, access the library using `df.da.`. 52 | 53 | 54 | ### Binary Classification 55 | To understand the input and usage for `disarray`, build an example confusion matrix for a **binary classification** 56 | problem from scratch with `scikit-learn`. 57 | (You can install the packages you need to run the demo with: `pip install -r requirements.demo.txt`) 58 | 59 | ```python 60 | from sklearn import svm, datasets 61 | from sklearn.model_selection import train_test_split 62 | from sklearn.metrics import confusion_matrix 63 | # Generate a random binary classification dataset 64 | X, y = datasets.make_classification(n_classes=2, random_state=42) 65 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42) 66 | # fit and predict an SVM 67 | classifier = svm.SVC(kernel='linear', C=0.01) 68 | y_pred = classifier.fit(X_train, y_train).predict(X_test) 69 | 70 | cm = confusion_matrix(y_test, y_pred) 71 | print(cm) 72 | [[13 2] 73 | [ 0 10]] 74 | ``` 75 | 76 | Using `disarray` is as easy as importing it and instantiating a DataFrame object from a **square** array of **positive** integers. 77 | 78 | ```python 79 | import disarray 80 | import pandas as pd 81 | 82 | # dtype=int is important for Windows users 83 | df = pd.DataFrame(cm, dtype=int) 84 | # access metrics for each class by index 85 | print(df.da.precision[1]) 86 | 0.83 87 | ``` 88 | 89 | ### Class Counts 90 | `disarray` stores per-class counts of true positives, false positives, false negatives, and true negatives. Each of these are stored as capitalized abbreviations, `TP`, `FP`, `FN`, and `TN`. 91 | 92 | ```python 93 | df.da.TP 94 | ``` 95 | ```python 96 | 0 13 97 | 1 10 98 | dtype: int64 99 | ``` 100 | 101 | ### Export Metrics 102 | Use `df.da.export_metrics()` to store and/or visualize many common performance metrics in a new `pandas` DataFrame 103 | object. Use the `metrics_to_include=` argument to pass a list of metrics defined in `disarray/metrics.py` (default is 104 | to use `__all_metrics__`). 105 | 106 | ```python 107 | df.da.export_metrics(metrics_to_include=['precision', 'recall', 'f1']) 108 | ``` 109 | | | 0 | 1 | micro-average | 110 | |-----------|----------|----------|-----------------| 111 | | precision | 1.0 | 0.833333 | 0.92 | 112 | | recall | 0.866667 | 1.0 | 0.92 | 113 | | f1 | 0.928571 | 0.909091 | 0.92 | 114 | 115 | 116 | 117 | ### Multi-Class Classification 118 | `disarray` works with multi-class classification confusion matrices also. Try it out on the iris dataset. Notice, the 119 | DataFrame is instantiated with an `index` and `columns` here, but it is not required. 120 | 121 | ```python 122 | # load the iris dataset 123 | iris = datasets.load_iris() 124 | X = iris.data 125 | y = iris.target 126 | class_names = iris.target_names 127 | # split the training and testing data 128 | X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) 129 | # train and fit a SVM 130 | classifier = svm.SVC(kernel='linear', C=0.01) 131 | y_pred = classifier.fit(X_train, y_train).predict(X_test) 132 | cm = confusion_matrix(y_test, y_pred) 133 | 134 | # Instantiate the confusion matrix DataFrame with index and columns 135 | # dtype=int is important for Windows users 136 | df = pd.DataFrame(cm, index=class_names, columns=class_names, dtype=int) 137 | print(df) 138 | ``` 139 | | | setosa | versicolor | virginica | 140 | |------------|----------|--------------|-------------| 141 | | setosa | 13 | 0 | 0 | 142 | | versicolor | 0 | 10 | 6 | 143 | | virginica | 0 | 0 | 9 | 144 | 145 | `disarray` can provide per-class metrics: 146 | 147 | ```python 148 | df.da.sensitivity 149 | ``` 150 | ```python 151 | setosa 1.000 152 | versicolor 0.625 153 | virginica 1.000 154 | dtype: float64 155 | ``` 156 | In a familiar fashion, one of the classes can be accessed with bracket indexing. 157 | 158 | ```python 159 | df.da.sensitivity['setosa'] 160 | ``` 161 | ```python 162 | 1.0 163 | ``` 164 | Currently, a [micro-average](https://datascience.stackexchange.com/a/24051/16855) is supported for both binary and 165 | multi-class classification confusion matrices. (Although it only makes sense in the multi-class case). 166 | ```python 167 | df.da.micro_sensitivity 168 | ``` 169 | ```python 170 | 0.8421052631578947 171 | ``` 172 | Finally, a DataFrame can be exported with selected metrics. 173 | ```python 174 | df.da.export_metrics(metrics_to_include=['sensitivity', 'specificity', 'f1']) 175 | ``` 176 | 177 | | | setosa | versicolor | virginica | micro-average | 178 | |-------------|----------|--------------|-------------|-----------------| 179 | | sensitivity | 1.0 | 0.625 | 1.0 | 0.842105 | 180 | | specificity | 1.0 | 1.0 | 0.793103 | 0.921053 | 181 | | f1 | 1.0 | 0.769231 | 0.75 | 0.842105 | 182 | 183 | ### Supported Metrics 184 | ```python 185 | 'accuracy', 186 | 'f1', 187 | 'false_discovery_rate', 188 | 'false_negative_rate', 189 | 'false_positive_rate', 190 | 'negative_predictive_value', 191 | 'positive_predictive_value', 192 | 'precision', 193 | 'recall', 194 | 'sensitivity', 195 | 'specificity', 196 | 'true_negative_rate', 197 | 'true_positive_rate', 198 | ``` 199 | As well as micro-averages for each of these, accessible via `df.da.micro_recall`, for example. 200 | 201 | ## Why disarray? 202 | 203 | Working with a [confusion matrix](https://en.wikipedia.org/wiki/Confusion_matrix) is common in data science projects. It is useful to have performance metrics available directly from [pandas](https://pandas.pydata.org/) DataFrames. 204 | 205 | Since `pandas` version `0.23.0`, users can easily 206 | [register custom accessors](https://pandas.pydata.org/pandas-docs/stable/development/extending.html#extending-pandas), 207 | which is how `disarray` is implemented. 208 | 209 | ## Contributing 210 | 211 | Contributions are welcome, please refer to [CONTRIBUTING](https://github.com/arvkevi/disarray/blob/master/CONTRIBUTING.md) 212 | to learn more about how to contribute. 213 | -------------------------------------------------------------------------------- /disarray/__init__.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | from .metrics import __all_metrics__ 5 | from .version import __version__ 6 | 7 | 8 | @pd.api.extensions.register_dataframe_accessor("da") 9 | class PandasConfusionMatrix: 10 | def __init__(self, pandas_obj): 11 | self._validate(pandas_obj) 12 | self._obj = pandas_obj 13 | self.TP, self.FP, self.FN, self.TN = self._calculate_metrics(pandas_obj) 14 | 15 | @staticmethod 16 | def _validate(obj): 17 | # verify the input DataFrame is square 18 | x, y = obj.shape 19 | if x != y: 20 | raise AttributeError( 21 | "The input DataFrame must be an n x n square DataFrame" 22 | ) 23 | if not all([dt == int for dt in obj.dtypes]): 24 | raise AttributeError( 25 | "The input DataFrame must contain only integers, but non-ints were detected." 26 | ) 27 | if not (obj >= 0).all(axis=None): 28 | raise AttributeError( 29 | "The input DataFrame must contain all positive integers, but negative " 30 | "ints were detected." 31 | ) 32 | 33 | @staticmethod 34 | def _calculate_metrics(obj): 35 | """ 36 | Count the positive and negative instances for the actual and predicted class. 37 | Code very slightly modified from this StackOverflow answer by user lucidv01d, 38 | https://stackoverflow.com/users/576134/lucidv01d 39 | https://stackoverflow.com/a/43331484/4541548 40 | 41 | :param obj: A pandas DataFrame 42 | :return: Counts for true positive, false positive, false negative, and true negative 43 | """ 44 | FP = obj.sum(axis=0) - np.diag(obj) 45 | FN = obj.sum(axis=1) - np.diag(obj) 46 | TP = pd.Series(np.diag(obj), index=obj.index) 47 | TN = obj.values.sum() - (FP + FN + TP) 48 | return TP, FP, FN, TN 49 | 50 | @property 51 | def accuracy(self): 52 | """Accuracy is defined as (true positive + true negative) / (true positive + false positive + false negative + 53 | true negative)""" 54 | return (self.TP + self.TN) / (self.TP + self.FP + self.FN + self.TN) 55 | 56 | @property 57 | def f1(self): 58 | """F1 is the harmonic mean of precision and sensitivity""" 59 | return 2 * ( 60 | (self.precision * self.sensitivity) / (self.precision + self.sensitivity) 61 | ) 62 | 63 | @property 64 | def false_discovery_rate(self): 65 | """False discovery rate is defined as false positive / (true positive + false positive)""" 66 | return self.FP / (self.TP + self.FP) 67 | 68 | @property 69 | def false_negative_rate(self): 70 | """False negative rate is defined as false negative / (false negative + true positive)""" 71 | return self.FN / (self.FN + self.TP) 72 | 73 | @property 74 | def false_positive_rate(self): 75 | """False positive rate is defined as false positive / (false positive + true negative)""" 76 | return self.FP / (self.FP + self.TN) 77 | 78 | @property 79 | def negative_predictive_value(self): 80 | """Negative predictive value is defined as true negative / (true negative + false negative)""" 81 | return self.TN / (self.TN + self.FN) 82 | 83 | @property 84 | def positive_predictive_value(self): 85 | """Positive predictive value is defined as true positive / (true positive + false negative)""" 86 | return self.precision 87 | 88 | @property 89 | def precision(self): 90 | """Precision is defined as true Positive / (true Positive + false negative)""" 91 | return self.TP / (self.TP + self.FP) 92 | 93 | @property 94 | def recall(self): 95 | """Recall is defined as true Positive / (true Positive + false negative)""" 96 | return self.sensitivity 97 | 98 | @property 99 | def sensitivity(self): 100 | """Sensitivity is defined as true Positive / (true Positive + false negative)""" 101 | return self.TP / (self.TP + self.FN) 102 | 103 | @property 104 | def specificity(self): 105 | """Specificity is defined as true negative / (true negative + false positive)""" 106 | return self.TN / (self.TN + self.FP) 107 | 108 | @property 109 | def true_negative_rate(self): 110 | """true negative Rate is defined as (true negative / true negative + false positive)""" 111 | return self.specificity 112 | 113 | @property 114 | def true_positive_rate(self): 115 | """True Positive Rate is defined as true positive / true Positive + false negative""" 116 | return self.sensitivity 117 | 118 | @property 119 | def micro_accuracy(self): 120 | """Accuracy is defined as (true positive + true negative) / (true positive + false positive + false negative + 121 | true negative)""" 122 | return (self.TP.sum() + self.TN.sum()) / ( 123 | self.TP.sum() + self.FP.sum() + self.FN.sum() + self.TN.sum() 124 | ) 125 | 126 | @property 127 | def micro_f1(self): 128 | """F1 is the harmonic mean of precision and sensitivity""" 129 | return 2 * ( 130 | (self.micro_precision * self.micro_sensitivity) 131 | / (self.micro_precision + self.micro_sensitivity) 132 | ) 133 | 134 | @property 135 | def micro_false_discovery_rate(self): 136 | """False discovery rate is defined as false positive / (true positive + false positive)""" 137 | return self.FP.sum() / (self.TP.sum() + self.FP.sum()) 138 | 139 | @property 140 | def micro_false_negative_rate(self): 141 | """False negative rate is defined as false negative / (false negative + true positive)""" 142 | return self.FN.sum() / (self.FN.sum() + self.TP.sum()) 143 | 144 | @property 145 | def micro_false_positive_rate(self): 146 | """False positive rate is defined as false positive / (false positive + true negative)""" 147 | return self.FP.sum() / (self.FP.sum() + self.TN.sum()) 148 | 149 | @property 150 | def micro_negative_predictive_value(self): 151 | """Negative predictive value is defined as true negative / (true negative + false negative)""" 152 | return self.TN.sum() / (self.TN.sum() + self.FN.sum()) 153 | 154 | @property 155 | def micro_positive_predictive_value(self): 156 | """Positive predictive value is defined as true positive / (true positive + false negative)""" 157 | return self.TP.sum() / (self.TP.sum() + self.FN.sum()) 158 | 159 | @property 160 | def micro_precision(self): 161 | """Precision is defined as true positive / (true positive + false negative)""" 162 | return self.TP.sum() / (self.TP.sum() + self.FP.sum()) 163 | 164 | @property 165 | def micro_recall(self): 166 | """Recall is defined as true Positive / (true positive + false negative)""" 167 | return self.TP.sum() / (self.TP.sum() + self.FN.sum()) 168 | 169 | @property 170 | def micro_sensitivity(self): 171 | """Sensitivity is defined as true Positive / (true positive + false negative)""" 172 | return self.TP.sum() / (self.TP.sum() + self.FN.sum()) 173 | 174 | @property 175 | def micro_specificity(self): 176 | """Specificity is defined as true negative / (true negative + false positive)""" 177 | return self.TN.sum() / (self.TN.sum() + self.FP.sum()) 178 | 179 | @property 180 | def micro_true_negative_rate(self): 181 | """true negative Rate is defined as true negative / (true negative + false positive)""" 182 | return self.TN.sum() / (self.TN.sum() + self.FN.sum()) 183 | 184 | @property 185 | def micro_true_positive_rate(self): 186 | """True positive rate is defined as true positive / true positive + false negative""" 187 | return self.TP.sum() / (self.TP.sum() + self.FN.sum()) 188 | 189 | def export_metrics(self, metrics_to_include=None): 190 | """Returns a DataFrame of all metrics defined in metrics.py 191 | :param metrics_to_include: list of metrics to include in the summary output (must be defined in metrics.py) 192 | :return: pandas DataFrame 193 | """ 194 | if metrics_to_include is None: 195 | metrics_to_include = __all_metrics__ 196 | return pd.DataFrame( 197 | {metric: getattr(self, metric) for metric in metrics_to_include} 198 | ).T.join( 199 | pd.DataFrame.from_dict( 200 | { 201 | metric: getattr(self, "micro_{}".format(metric)) 202 | for metric in metrics_to_include 203 | }, 204 | orient="index", 205 | columns=["micro-average"], 206 | ) 207 | ) 208 | -------------------------------------------------------------------------------- /tests/test_disarray.py: -------------------------------------------------------------------------------- 1 | import disarray 2 | import pandas as pd 3 | import unittest 4 | 5 | from disarray.metrics import __all_metrics__ 6 | 7 | 8 | class TestDisarray(unittest.TestCase): 9 | @classmethod 10 | def setUpClass(cls): 11 | cls.df_binary = pd.DataFrame([[50, 10], [10, 30]], dtype=int) 12 | cls.classes = ["setosa", "versicolor", "virginica"] 13 | cls.df_multi = pd.DataFrame( 14 | [[13, 0, 0], [0, 10, 6], [0, 0, 9]], 15 | index=cls.classes, 16 | columns=cls.classes, 17 | dtype=int, 18 | ) 19 | 20 | def test_all_metrics(self): 21 | with self.assertRaises(AttributeError): 22 | getattr(self.df_binary.da, "unused-metric") 23 | 24 | detected_metrics = [] 25 | for metric in __all_metrics__: 26 | if isinstance(getattr(self.df_binary.da, metric), pd.Series): 27 | detected_metrics.append(metric) 28 | self.assertCountEqual(__all_metrics__, detected_metrics) 29 | 30 | def test_accuracy(self): 31 | self.assertAlmostEqual(self.df_binary.da.accuracy.loc[1], 0.80, 2) 32 | self.assertAlmostEqual(self.df_binary.da.micro_accuracy, 0.80, 2) 33 | self.assertAlmostEqual(self.df_multi.da.accuracy.loc["setosa"], 1.0, 2) 34 | self.assertAlmostEqual(self.df_multi.da.micro_accuracy, 0.89, 2) 35 | 36 | def test_f1(self): 37 | self.assertAlmostEqual(self.df_binary.da.f1.loc[1], 0.75, 2) 38 | self.assertAlmostEqual(self.df_binary.da.micro_f1, 0.80, 2) 39 | self.assertAlmostEqual(self.df_multi.da.f1.loc["setosa"], 1.0, 2) 40 | self.assertAlmostEqual(self.df_multi.da.micro_f1, 0.84, 2) 41 | 42 | def test_false_discovery_rate(self): 43 | self.assertAlmostEqual( 44 | self.df_binary.da.false_discovery_rate.loc[0], 0.17, 2 45 | ) 46 | self.assertAlmostEqual( 47 | self.df_binary.da.micro_false_discovery_rate, 0.20, 2 48 | ) 49 | self.assertAlmostEqual( 50 | self.df_multi.da.false_discovery_rate.loc["setosa"], 0.0, 2 51 | ) 52 | self.assertAlmostEqual( 53 | self.df_multi.da.micro_false_discovery_rate, 0.16, 2 54 | ) 55 | self.assertAlmostEqual( 56 | self.df_multi.da.micro_false_discovery_rate, 57 | 1 - self.df_multi.da.micro_precision, 58 | 2, 59 | ) 60 | 61 | def test_false_negative_rate(self): 62 | self.assertAlmostEqual( 63 | self.df_binary.da.false_negative_rate.loc[0], 0.166, 2 64 | ) 65 | self.assertAlmostEqual( 66 | self.df_binary.da.micro_false_negative_rate, 0.20, 2 67 | ) 68 | self.assertAlmostEqual( 69 | self.df_multi.da.false_negative_rate.loc["setosa"], 0.0, 2 70 | ) 71 | self.assertAlmostEqual( 72 | self.df_multi.da.micro_false_negative_rate, 0.16, 2 73 | ) 74 | self.assertAlmostEqual( 75 | self.df_multi.da.micro_false_negative_rate, 76 | 1 - self.df_multi.da.micro_true_positive_rate, 77 | 2, 78 | ) 79 | 80 | def test_false_positive_rate(self): 81 | self.assertAlmostEqual( 82 | self.df_binary.da.false_positive_rate.loc[0], 0.25, 2 83 | ) 84 | self.assertAlmostEqual( 85 | self.df_binary.da.micro_false_positive_rate, 0.20, 2 86 | ) 87 | self.assertAlmostEqual( 88 | self.df_multi.da.false_positive_rate.loc["setosa"], 0.0, 2 89 | ) 90 | self.assertAlmostEqual( 91 | self.df_multi.da.micro_false_positive_rate, 0.08, 2 92 | ) 93 | self.assertAlmostEqual( 94 | self.df_multi.da.micro_false_positive_rate, 95 | 1 - self.df_multi.da.micro_true_negative_rate, 96 | 2, 97 | ) 98 | 99 | def test_negative_predictive_value(self): 100 | self.assertAlmostEqual( 101 | self.df_binary.da.negative_predictive_value.loc[0], 0.75, 2 102 | ) 103 | self.assertAlmostEqual( 104 | self.df_binary.da.micro_negative_predictive_value, 0.80, 2 105 | ) 106 | self.assertAlmostEqual( 107 | self.df_multi.da.negative_predictive_value.loc["setosa"], 1.0, 2 108 | ) 109 | self.assertAlmostEqual( 110 | self.df_multi.da.micro_negative_predictive_value, 0.92, 2 111 | ) 112 | 113 | def test_positive_predictive_value(self): 114 | self.assertAlmostEqual( 115 | self.df_binary.da.positive_predictive_value.loc[0], 0.83, 2 116 | ) 117 | self.assertAlmostEqual( 118 | self.df_binary.da.micro_positive_predictive_value, 0.80, 2 119 | ) 120 | self.assertAlmostEqual( 121 | self.df_multi.da.positive_predictive_value.loc["setosa"], 1.0, 2 122 | ) 123 | self.assertAlmostEqual( 124 | self.df_multi.da.micro_positive_predictive_value, 0.84, 2 125 | ) 126 | self.assertAlmostEqual( 127 | self.df_multi.da.micro_positive_predictive_value, 128 | 1 - self.df_multi.da.micro_false_discovery_rate, 129 | 2, 130 | ) 131 | 132 | def test_precision(self): 133 | self.assertAlmostEqual(self.df_binary.da.precision.loc[1], 0.75, 2) 134 | self.assertAlmostEqual(self.df_binary.da.micro_precision, 0.80, 2) 135 | self.assertAlmostEqual(self.df_multi.da.precision.loc["setosa"], 1.0, 2) 136 | self.assertAlmostEqual(self.df_multi.da.micro_precision, 0.84, 2) 137 | self.assertAlmostEqual( 138 | self.df_multi.da.micro_precision, 139 | 1 - self.df_multi.da.micro_false_discovery_rate, 140 | 2, 141 | ) 142 | 143 | def test_recall(self): 144 | self.assertAlmostEqual(self.df_binary.da.recall.loc[1], 0.75, 2) 145 | self.assertAlmostEqual(self.df_binary.da.micro_recall, 0.80, 2) 146 | self.assertAlmostEqual(self.df_multi.da.recall.loc["setosa"], 1.0, 2) 147 | self.assertAlmostEqual(self.df_multi.da.micro_recall, 0.84, 2) 148 | self.assertAlmostEqual( 149 | self.df_multi.da.micro_recall, 150 | 1 - self.df_multi.da.micro_false_discovery_rate, 151 | 2, 152 | ) 153 | 154 | def test_specificity(self): 155 | self.assertAlmostEqual(self.df_binary.da.specificity.loc[0], 0.75, 2) 156 | self.assertAlmostEqual(self.df_binary.da.micro_specificity, 0.80, 2) 157 | self.assertAlmostEqual( 158 | self.df_multi.da.specificity.loc["setosa"], 1.0, 2 159 | ) 160 | self.assertAlmostEqual(self.df_multi.da.micro_specificity, 0.92, 2) 161 | self.assertAlmostEqual( 162 | self.df_multi.da.micro_specificity, 163 | 1 - self.df_multi.da.micro_false_positive_rate, 164 | 2, 165 | ) 166 | 167 | def test_specificity(self): 168 | self.assertAlmostEqual(self.df_binary.da.specificity.loc[0], 0.75, 2) 169 | self.assertAlmostEqual(self.df_binary.da.micro_specificity, 0.80, 2) 170 | self.assertAlmostEqual( 171 | self.df_multi.da.specificity.loc["setosa"], 1.0, 2 172 | ) 173 | self.assertAlmostEqual(self.df_multi.da.micro_specificity, 0.92, 2) 174 | self.assertAlmostEqual( 175 | self.df_multi.da.micro_specificity, 176 | 1 - self.df_multi.da.micro_false_positive_rate, 177 | 2, 178 | ) 179 | 180 | def test_true_negative_rate(self): 181 | self.assertAlmostEqual( 182 | self.df_binary.da.true_negative_rate.loc[0], 0.75, 2 183 | ) 184 | self.assertAlmostEqual( 185 | self.df_binary.da.micro_true_negative_rate, 0.80, 2 186 | ) 187 | self.assertAlmostEqual( 188 | self.df_multi.da.true_negative_rate.loc["setosa"], 1.0, 2 189 | ) 190 | self.assertAlmostEqual( 191 | self.df_multi.da.micro_true_negative_rate, 0.92, 2 192 | ) 193 | self.assertAlmostEqual( 194 | self.df_multi.da.micro_true_negative_rate, 195 | 1 - self.df_multi.da.micro_false_positive_rate, 196 | 2, 197 | ) 198 | 199 | def test_true_positive_rate(self): 200 | self.assertAlmostEqual( 201 | self.df_binary.da.true_positive_rate.loc[0], 0.83, 2 202 | ) 203 | self.assertAlmostEqual( 204 | self.df_binary.da.micro_true_positive_rate, 0.80, 2 205 | ) 206 | self.assertAlmostEqual( 207 | self.df_multi.da.true_positive_rate.loc["setosa"], 1.0, 2 208 | ) 209 | self.assertAlmostEqual( 210 | self.df_multi.da.micro_true_positive_rate, 0.84, 2 211 | ) 212 | self.assertAlmostEqual( 213 | self.df_multi.da.micro_true_positive_rate, 214 | 1 - self.df_multi.da.micro_false_negative_rate, 215 | 2, 216 | ) 217 | 218 | def test_export_metrics(self): 219 | cm = self.df_binary.da.export_metrics(metrics_to_include=None) 220 | self.assertListEqual(cm.index.tolist(), __all_metrics__) 221 | --------------------------------------------------------------------------------