├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt ├── setup.py ├── somperf ├── __init__.py ├── __version__.py ├── metrics │ ├── __init__.py │ ├── external.py │ └── internal.py └── utils │ ├── __init__.py │ ├── neighborhood.py │ └── topology.py └── tests ├── SOMperf-Tests.ipynb └── rc ├── som-classes-csi-1.svg ├── som-classes-csi-2.svg ├── som-classes-csi-3.svg ├── som-classes-superclusters.svg └── som-classes.svg /.gitattributes: -------------------------------------------------------------------------------- 1 | *.ipynb linguist-vendored 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Core latex/pdflatex auxiliary files: 2 | *.aux 3 | *.lof 4 | *.log 5 | *.lot 6 | *.fls 7 | *.out 8 | *.toc 9 | *.fmt 10 | *.fot 11 | *.cb 12 | *.cb2 13 | .*.lb 14 | 15 | ## Intermediate documents: 16 | *.dvi 17 | *.xdv 18 | *-converted-to.* 19 | # these rules might exclude image files for figures etc. 20 | # *.ps 21 | # *.eps 22 | # *.pdf 23 | 24 | ## Generated if empty string is given at "Please type another file name for output:" 25 | .pdf 26 | 27 | ## Bibliography auxiliary files (bibtex/biblatex/biber): 28 | *.bbl 29 | *.bcf 30 | *.blg 31 | *-blx.aux 32 | *-blx.bib 33 | *.run.xml 34 | 35 | ## Build tool auxiliary files: 36 | *.fdb_latexmk 37 | *.synctex 38 | *.synctex(busy) 39 | *.synctex.gz 40 | *.synctex.gz(busy) 41 | *.pdfsync 42 | 43 | ## Auxiliary and intermediate files from other packages: 44 | # algorithms 45 | *.alg 46 | *.loa 47 | 48 | # achemso 49 | acs-*.bib 50 | 51 | # amsthm 52 | *.thm 53 | 54 | # beamer 55 | *.nav 56 | *.pre 57 | *.snm 58 | *.vrb 59 | 60 | # changes 61 | *.soc 62 | 63 | # cprotect 64 | *.cpt 65 | 66 | # elsarticle (documentclass of Elsevier journals) 67 | *.spl 68 | 69 | # endnotes 70 | *.ent 71 | 72 | # fixme 73 | *.lox 74 | 75 | # feynmf/feynmp 76 | *.mf 77 | *.mp 78 | *.t[1-9] 79 | *.t[1-9][0-9] 80 | *.tfm 81 | 82 | #(r)(e)ledmac/(r)(e)ledpar 83 | *.end 84 | *.?end 85 | *.[1-9] 86 | *.[1-9][0-9] 87 | *.[1-9][0-9][0-9] 88 | *.[1-9]R 89 | *.[1-9][0-9]R 90 | *.[1-9][0-9][0-9]R 91 | *.eledsec[1-9] 92 | *.eledsec[1-9]R 93 | *.eledsec[1-9][0-9] 94 | *.eledsec[1-9][0-9]R 95 | *.eledsec[1-9][0-9][0-9] 96 | *.eledsec[1-9][0-9][0-9]R 97 | 98 | # glossaries 99 | *.acn 100 | *.acr 101 | *.glg 102 | *.glo 103 | *.gls 104 | *.glsdefs 105 | 106 | # gnuplottex 107 | *-gnuplottex-* 108 | 109 | # gregoriotex 110 | *.gaux 111 | *.gtex 112 | 113 | # htlatex 114 | *.4ct 115 | *.4tc 116 | *.idv 117 | *.lg 118 | *.trc 119 | *.xref 120 | 121 | # hyperref 122 | *.brf 123 | 124 | # knitr 125 | *-concordance.tex 126 | # TODO Comment the next line if you want to keep your tikz graphics files 127 | *.tikz 128 | *-tikzDictionary 129 | 130 | # listings 131 | *.lol 132 | 133 | # makeidx 134 | *.idx 135 | *.ilg 136 | *.ind 137 | *.ist 138 | 139 | # minitoc 140 | *.maf 141 | *.mlf 142 | *.mlt 143 | *.mtc[0-9]* 144 | *.slf[0-9]* 145 | *.slt[0-9]* 146 | *.stc[0-9]* 147 | 148 | # minted 149 | _minted* 150 | *.pyg 151 | 152 | # morewrites 153 | *.mw 154 | 155 | # nomencl 156 | *.nlg 157 | *.nlo 158 | *.nls 159 | 160 | # pax 161 | *.pax 162 | 163 | # pdfpcnotes 164 | *.pdfpc 165 | 166 | # sagetex 167 | *.sagetex.sage 168 | *.sagetex.py 169 | *.sagetex.scmd 170 | 171 | # scrwfile 172 | *.wrt 173 | 174 | # sympy 175 | *.sout 176 | *.sympy 177 | sympy-plots-for-*.tex/ 178 | 179 | # pdfcomment 180 | *.upa 181 | *.upb 182 | 183 | # pythontex 184 | *.pytxcode 185 | pythontex-files-*/ 186 | 187 | # thmtools 188 | *.loe 189 | 190 | # TikZ & PGF 191 | *.dpth 192 | *.md5 193 | *.auxlock 194 | 195 | # todonotes 196 | *.tdo 197 | 198 | # easy-todo 199 | *.lod 200 | 201 | # xmpincl 202 | *.xmpi 203 | 204 | # xindy 205 | *.xdy 206 | 207 | # xypic precompiled matrices 208 | *.xyc 209 | 210 | # endfloat 211 | *.ttt 212 | *.fff 213 | 214 | # Latexian 215 | TSWLatexianTemp* 216 | 217 | ## Editors: 218 | # WinEdt 219 | *.bak 220 | *.sav 221 | 222 | # Texpad 223 | .texpadtmp 224 | 225 | # Kile 226 | *.backup 227 | 228 | # KBibTeX 229 | *~[0-9]* 230 | 231 | # auto folder when using emacs and auctex 232 | ./auto/* 233 | *.el 234 | 235 | # expex forward references with \gathertags 236 | *-tags.tex 237 | 238 | # standalone packages 239 | *.sta 240 | 241 | # generated if using elsarticle.cls 242 | *.spl 243 | 244 | # python 245 | **/.ipynb_checkpoints/ 246 | **/__pycache__/ 247 | 248 | # project files 249 | **/target/ 250 | **/.idea/ 251 | 252 | # images 253 | *.png 254 | *.jpg 255 | *.gif 256 | 257 | # neural network weights 258 | *.h5 259 | 260 | # Mendeley collection data 261 | collection.Data 262 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Florent Forest 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SOMperf: SOM performance metrics and quality indices 2 | 3 | **This package is in its early phase of development. SOM performance metrics have all been tested pretty well, but they may still contain bugs. Please report them in an issue if you find some.** 4 | 5 | If you found this library useful in your work, please cite following preprint: 6 | 7 | > Forest, Florent, Mustapha Lebbah, Hanane Azzag, and Jérôme Lacaille (2020). A Survey and Implementation of Performance Metrics for Self-Organized Maps. arXiv, November 11, 2020. https://doi.org/10.48550/arXiv.2011.05847. 8 | 9 | ## Installation 10 | 11 | This module was written for Python 3 and depends on following libraries: 12 | 13 | * numpy 14 | * pandas 15 | * scipy 16 | * scikit-learn 17 | 18 | SOMperf can be installed easily using the setup script: 19 | 20 | ```shell 21 | python3 setup.py install 22 | ``` 23 | 24 | It might be available in PyPI in the future. 25 | 26 | ## Getting started 27 | 28 | SOMperf contains 2 modules: `metrics`, containing all internal and external quality indices, and `utils`, containing utility functions for SOMs (distance and neighborhood functions). 29 | 30 | Metric functions usually take several of following arguments: 31 | 32 | * `som`: a self-organizing map model with _K_ prototypes/code vectors in dimension _D_ given as a _K X D_-numpy array 33 | * `x`: data matrix with _N_ samples in dimension _D_, given as a _N X D_-numpy array 34 | * `d`: a pre-computed pairwise (non-squared) euclidean distance matrix between samples and prototypes, given as a _N X K_-numpy array 35 | * `dist_fun`: a function computing the distance between two units on the map, such that `dist_fun(k, l) == 1` iff `k` and `l`are neighbors. Distance function on usual grid topologies are available in `somperf.utils.topology`. 36 | * `neighborhood_fun`: neighborhood kernel function used in the SOM distortion loss. Usual neighborhood functions are available in `somperf.utils.neighborhood`. 37 | 38 | Neighborhood preservation and Trustworthiness also take an additional `k` argument for the number of neighbors to consider. 39 | 40 | Here is a quick example using minisom to compute metrics on an 8-color dataset and a 10-by-10 map: 41 | 42 | ```python 43 | import numpy as np 44 | from minisom import MiniSom 45 | 46 | from somperf.metrics import * 47 | from somperf.utils.topology import rectangular_topology_dist 48 | 49 | # 8 colors 50 | X = np.array([[1.0, 0.0, 0.0], 51 | [0.0, 1.0, 0.0], 52 | [0.0, 0.0, 1.0], 53 | [1.0, 1.0, 1.0], 54 | [0.5, 0.5, 0.5], 55 | [1.0, 1.0, 0.0], 56 | [0.0, 1.0, 1.0], 57 | [1.0, 0.0, 1.0]]) 58 | 59 | # define and train 10-by-10 map 60 | map_size = (10, 10) 61 | som = MiniSom(map_size[0], map_size[1], X.shape[-1], sigma=1.0, learning_rate=1.0, random_seed=42) 62 | som.random_weights_init(X) 63 | som.train_random(X, 10000) 64 | 65 | # get weights as a (100, 3) array 66 | weights = som.get_weights().reshape(map_size[0]*map_size[1], -1) 67 | 68 | # compute a few metrics 69 | print('Topographic product = ', topographic_product(rectangular_topology_dist(map_size), weights)) 70 | print('Neighborhood preservation = ', neighborhood_preservation(1, weights, X)) 71 | print('Trustworthiness = ', trustworthiness(1, weights, X)) 72 | ``` 73 | 74 | Here are the results: 75 | 76 | ```python 77 | 0.3002313673993011 # TP > 0 is no surprise, because a (10, 10) map is too large for our 8-color dataset 78 | 0.9375 # original neighbors are not always assigned to neighboring prototypes 79 | 1.0 # perfect trustworthiness means that any neighboring prototypes correspond to original neighboring samples 80 | ``` 81 | 82 | Label-based metrics, also called external indices, rather take as inputs the cluster labels `y_pred` and the ground-truth class labels `y_true`, except the Class scatter index that also depends on the map topology (`dist_fun`). 83 | 84 | ## List of metrics 85 | 86 | ### Internal 87 | 88 | * [x] Combined error [5] 89 | * [x] Distortion (SOM loss function) [4,8,11] 90 | * [x] Kruskal-Shepard error [3,7] 91 | * [x] Neighborhood preservation [10] 92 | * [x] Quantization error 93 | * [x] Topographic error 94 | * [x] Topographic product [1,2] 95 | * [x] Trustworthiness [10] 96 | * [x] Silhouette :arrow_right: `sklearn.metrics.silhouette_score` 97 | * [x] Davies-Bouldin :arrow_right: `sklearn.metrics.davies_bouldin_score` 98 | * [x] Topographic function [12] 99 | * [x] C Measure [13] 100 | 101 | ### External (label-based) 102 | 103 | * [x] Adjusted Rand index (ARI) :arrow_right: `sklearn.metrics.adjusted_rand_score` 104 | * [x] Class scatter index [3] 105 | * [x] Completeness :arrow_right: `sklearn.metrics.completeness_score` 106 | * [x] Entropy [3] 107 | * [x] Homogeneity :arrow_right: `sklearn.metrics.homogeneity_score` 108 | * [x] Normalized Minor class occurrence [3] (= 1 - purity) 109 | * [x] Mutual information :arrow_right: `sklearn.metrics.mutual_info_score` 110 | * [x] Normalized mutual information (NMI) :arrow_right: `sklearn.metrics.normalized_mutual_info_score` 111 | * [x] Purity 112 | * [x] Unsupervised clustering accuracy 113 | 114 | ## List of SOM utilities 115 | 116 | ### Map distance functions 117 | 118 | * [x] Rectangular topology 119 | * [x] Square topology 120 | * [ ] Hexagonal topology 121 | * [ ] Cylindrical topology 122 | * [ ] Toroidal topology 123 | 124 | ### Neighborhood functions 125 | 126 | * [x] Gaussian neighborhood 127 | * [x] Constant window neighborhood 128 | * [ ] Triangle neighborhood 129 | * [ ] Inverse neighborhood 130 | * [ ] Squared inverse neighborhood 131 | * [ ] Mexican hat neighborhood 132 | * [ ] Clipped versions (0 if d < eps) 133 | 134 | ## Tests 135 | 136 | All metrics have been tested to check results against manually computed values, expected behavior and/or results from research papers. Tests and visualizations are available as a jupyter notebook in the `tests/` directory. 137 | 138 | ## SOM libraries 139 | 140 | Here is a small selection of SOM algorithm implementations: 141 | 142 | * [SOM toolbox](https://github.com/ilarinieminen/SOM-Toolbox) ![](https://img.shields.io/github/stars/ilarinieminen/SOM-Toolbox.svg?style=social) (Matlab) 143 | * [minisom](https://github.com/JustGlowing/minisom) ![](https://img.shields.io/github/stars/JustGlowing/minisom.svg?style=social) (Python) 144 | * [SOMPY](https://github.com/sevamoo/SOMPY) ![](https://img.shields.io/github/stars/sevamoo/SOMPY.svg?style=social) (Python) 145 | * [tensorflow-som](https://github.com/cgorman/tensorflow-som) ![](https://img.shields.io/github/stars/cgorman/tensorflow-som.svg?style=social) (Python/TensorFlow) 146 | * [DESOM](https://github.com/FlorentF9/DESOM) ![](https://img.shields.io/github/stars/FlorentF9/DESOM.svg?style=social) (Python/Keras) 147 | * SOMbrero ([CRAN](https://cran.r-project.org/web/packages/SOMbrero/index.html)/[Github](https://github.com/tuxette/SOMbrero)) ![](https://img.shields.io/github/stars/tuxette/SOMbrero.svg?style=social) (R) 148 | * [sparkml-som](https://github.com/FlorentF9/sparkml-som) ![](https://img.shields.io/github/stars/FlorentF9/sparkml-som.svg?style=social) (Scala/Spark ML) 149 | 150 | ## References 151 | 152 | > [1] Bauer, H.-U., & Pawelzik, K. R. (1992). Quantifying the Neighborhood Preservation of Self-Organizing Feature Maps. IEEE Transactions on Neural Networks, 3(4), 570–579. https://doi.org/10.1109/72.143371 153 | 154 | > [2] Bauer, H.-U., Pawelzik, K., & Geisel, T. (1992). A Topographic Product for the Optimization of Self-Organizing Feature Maps. Advances in Neural Information Processing Systems, 4, 1141–1147. 155 | 156 | > [3] Elend, L., & Kramer, O. (2019). Self-Organizing Maps with Convolutional Layers. In WSOM 2019: Advances in Self-Organizing Maps, Learning Vector Quantization, Clustering and Data Visualization (Vol. 976, pp. 23–32). Springer International Publishing. https://doi.org/10.1007/978-3-030-19642-4 157 | 158 | > [4] Erwin, E., Obermayer, K., Schulten, K. Self-Organizing Maps: Ordering, convergence properties and energy functions. Biological Cybernetics, 67(1):47-55, 1992 159 | 160 | > [5] Kaski, S., & Lagus, K. (1996). Comparing Self-Organizing Maps. In Proceedings of International Conference on Artificial Neural Networks (ICANN). 161 | 162 | > [6] Kohonen, T. (1990). The Self-Organizing Map. In Proceedings of the IEEE (Vol. 78, pp. 1464–1480). https://doi.org/10.1109/5.58325 163 | 164 | > [7] Kruskal, J.B. (1964). Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis. 165 | 166 | > [8] Lampinen, O. Clustering Properties of Hierarchical Self-Organizing Maps. Journal of Mathematical Imaging and Vision, 2(2-3):261–272, November 1992 167 | 168 | > [9] Polzlbauer, G. (2004). Survey and comparison of quality measures for self-organizing maps. Proceedings of the Fifth Workshop on Data Analysis (WDA04), 67–82. 169 | 170 | > [10] Venna, J., & Kaski, S. (2001). Neighborhood preservation in nonlinear projection methods: An experimental study. Lecture Notes in Computer Science, 2130. https://doi.org/10.1007/3-540-44668-0 171 | 172 | > [11] Vesanto, J., Sulkava, M., & Hollmén, J. (2003). On the Decomposition of the Self-Organizing Map Distortion Measure. Proceedings of the Workshop on Self-Organizing Maps (WSOM’03), 11–16. 173 | 174 | > [12] Villmann, T., Der, R., Martinez, T. A new quantitative measure of topology preservation in Kohonen's feature maps, Proceedings of the IEEE International Conference on Neural Networks 94, Orlando, Florida, USA, 645-648, June 1994 175 | 176 | > [13] Goodhill, G. J., & Sejnowski, T. J. (1996). Quantifying neighbourhood preservation in topographic mappings. Proceedings of the 3rd Joint Symposium on Neural Computation, La Jolla, CA, 61–82. 177 | 178 | ## Future work 179 | 180 | * Implement per-node metrics 181 | * Other SOM analysis and visualization modules 182 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | pandas 3 | scipy 4 | scikit-learn 5 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open('requirements.txt') as fp: 4 | install_reqs = [r.rstrip() for r in fp.readlines() 5 | if not r.startswith('#') and not r.startswith('git+')] 6 | 7 | with open('somperf/__version__.py') as fh: 8 | version = fh.readlines()[-1].split()[-1].strip('\'\'') 9 | 10 | setup( 11 | name='SOMperf', 12 | version=version, 13 | description='Self Organizing Maps performance metrics and quality indices', 14 | author='Florent Forest', 15 | author_email='florent.forest9@gmail.com', 16 | packages=find_packages(), 17 | install_requires=install_reqs, 18 | url='https://github.com/FlorentF9/SOMperf' 19 | ) 20 | -------------------------------------------------------------------------------- /somperf/__init__.py: -------------------------------------------------------------------------------- 1 | from somperf.__version__ import __version__ -------------------------------------------------------------------------------- /somperf/__version__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Version 3 | """ 4 | 5 | __version__ = '0.2b0' 6 | -------------------------------------------------------------------------------- /somperf/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `somperf.metrics` module contains performance metrics functions for self-organizing maps. 3 | """ 4 | 5 | from .internal import c_measure 6 | from .internal import combined_error 7 | from .internal import distortion 8 | from .internal import kruskal_shepard_error 9 | from .internal import neighborhood_preservation 10 | from .internal import neighborhood_preservation_trustworthiness 11 | from .internal import quantization_error 12 | from .internal import topographic_error 13 | from .internal import topographic_function 14 | from .internal import topographic_product 15 | from .internal import trustworthiness 16 | 17 | from .external import class_scatter_index 18 | from .external import clustering_accuracy 19 | from .external import entropy 20 | from .external import normalized_minor_class_occurrence 21 | from .external import purity 22 | 23 | __all__ = [ 24 | 'c_measure', 25 | 'combined_error', 26 | 'distortion', 27 | 'kruskal_shepard_error', 28 | 'neighborhood_preservation', 29 | 'neighborhood_preservation_trustworthiness', 30 | 'quantization_error', 31 | 'topographic_error', 32 | 'topographic_function', 33 | 'topographic_product', 34 | 'trustworthiness', 35 | 'class_scatter_index', 36 | 'clustering_accuracy', 37 | 'entropy', 38 | 'normalized_minor_class_occurrence', 39 | 'purity' 40 | ] 41 | -------------------------------------------------------------------------------- /somperf/metrics/external.py: -------------------------------------------------------------------------------- 1 | """ 2 | External indices 3 | """ 4 | 5 | import numpy as np 6 | from sklearn.metrics.cluster._supervised import check_clusterings 7 | from scipy.optimize import linear_sum_assignment as linear_assignment 8 | from sklearn.metrics import accuracy_score 9 | from scipy.sparse import csr_matrix 10 | from scipy.sparse.csgraph import connected_components 11 | 12 | 13 | def _contingency_matrix(y_true, y_pred): 14 | w = np.zeros((y_true.max() + 1, y_pred.max() + 1), dtype=np.int64) 15 | for c, k in zip(y_true, y_pred): 16 | w[c, k] += 1 # w[c, k] = number of c-labeled samples in map cell k 17 | return w 18 | 19 | 20 | def class_scatter_index(dist_fun, y_true, y_pred): 21 | """Class scatter index (CSI). 22 | 23 | Parameters 24 | ---------- 25 | dist_fun : function (k : int, l : int) => int 26 | distance function between units k and l on the map. 27 | y_true : array, shape = [n] 28 | true labels. 29 | y_pred : array, shape = [n] 30 | predicted cluster ids. 31 | 32 | Returns 33 | ------- 34 | csi : float (lower is better) 35 | 36 | References 37 | ---------- 38 | Elend, L., & Kramer, O. (2019). Self-Organizing Maps with Convolutional Layers. 39 | """ 40 | y_true = y_true.astype(np.int64) 41 | y_pred = y_pred.astype(np.int64) 42 | check_clusterings(y_true, y_pred) 43 | n_classes = y_true.max() + 1 44 | n_units = y_pred.max() + 1 45 | w = _contingency_matrix(y_true, y_pred) 46 | groups = np.zeros(n_classes, dtype=np.int64) 47 | for c in range(n_classes): 48 | connectivity = csr_matrix([[1 if dist_fun(k, l) == 1 else 0 49 | for l in range(n_units) if w[c, l] > 0] 50 | for k in range(n_units) if w[c, k] > 0]) 51 | groups[c] = connected_components(csgraph=connectivity, directed=False, return_labels=False) 52 | return np.mean(groups) 53 | 54 | 55 | def clustering_accuracy(y_true, y_pred): 56 | """Unsupervised clustering accuracy. 57 | 58 | Can only be used if the number of target classes in y_true is equal to the number of clusters in y_pred. 59 | 60 | Parameters 61 | ---------- 62 | y_true : array, shape = [n] 63 | true labels. 64 | y_pred : array, shape = [n] 65 | predicted cluster ids. 66 | 67 | Returns 68 | ------- 69 | accuracy : float in [0,1] (higher is better) 70 | """ 71 | y_true = y_true.astype(np.int64) 72 | y_pred = y_pred.astype(np.int64) 73 | check_clusterings(y_true, y_pred) 74 | w = _contingency_matrix(y_true, y_pred).T 75 | row_ind, col_ind = linear_assignment(w.max() - w) 76 | return np.sum([w[i, j] for i, j in zip(row_ind, col_ind)]) / y_true.size 77 | 78 | 79 | def entropy(y_true, y_pred): 80 | """SOM class distribution entropy measure. 81 | 82 | Parameters 83 | ---------- 84 | y_true : array, shape = [n] 85 | true labels. 86 | y_pred : array, shape = [n] 87 | predicted cluster ids. 88 | 89 | Returns 90 | ------- 91 | entropy : float (lower is better) 92 | 93 | References 94 | ---------- 95 | Elend, L., & Kramer, O. (2019). Self-Organizing Maps with Convolutional Layers. 96 | """ 97 | y_true = y_true.astype(np.int64) 98 | y_pred = y_pred.astype(np.int64) 99 | check_clusterings(y_true, y_pred) 100 | w = _contingency_matrix(y_true, y_pred) 101 | freqs = np.divide(w.max(axis=0) + 1e-12, w.sum(axis=0) + 1e-12) # relative frequencies of majority class 102 | return np.sum(-np.log(freqs)) 103 | 104 | 105 | def normalized_minor_class_occurrence(y_true, y_pred): 106 | """Normalized minor class occurrence (NMCO). 107 | 108 | Ratio of samples that do not belong to the majority ground-truth label in their cluster. Is equivalent 109 | to 1 - purity. 110 | 111 | Parameters 112 | ---------- 113 | y_true : array, shape = [n] 114 | true labels. 115 | y_pred : array, shape = [n] 116 | predicted cluster ids. 117 | 118 | Returns 119 | ------- 120 | nmco : float in [0,1] (lower is better) 121 | 122 | References 123 | ---------- 124 | Elend, L., & Kramer, O. (2019). Self-Organizing Maps with Convolutional Layers. 125 | """ 126 | return 1.0 - purity(y_true, y_pred) 127 | 128 | 129 | def purity(y_true, y_pred): 130 | """Clustering purity. 131 | 132 | Parameters 133 | ---------- 134 | y_true : array, shape = [n] 135 | true labels. 136 | y_pred : array, shape = [n] 137 | predicted cluster ids. 138 | 139 | Returns 140 | ------- 141 | purity : float in [0,1] (higher is better) 142 | """ 143 | y_true = y_true.astype(np.int64) 144 | y_pred = y_pred.astype(np.int64) 145 | check_clusterings(y_true, y_pred) 146 | w = _contingency_matrix(y_true, y_pred) 147 | label_mapping = w.argmax(axis=0) 148 | y_pred_voted = np.array([label_mapping[y] for y in y_pred]) 149 | return accuracy_score(y_true, y_pred_voted) 150 | -------------------------------------------------------------------------------- /somperf/metrics/internal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Internal indices 3 | """ 4 | 5 | import numpy as np 6 | from sklearn.metrics.pairwise import euclidean_distances 7 | from scipy.sparse import csr_matrix 8 | from scipy.sparse.csgraph import shortest_path 9 | import pandas as pd 10 | 11 | 12 | def c_measure(dist_fun, x, som=None, d=None): 13 | """C measure. 14 | 15 | Measures distance preservation between input space and output space. Euclidean distance is used in input space. 16 | In output space, distance is usually Manhattan distance between the best matching units on the maps (this distance 17 | is provided by the dist_fun argument). 18 | 19 | Parameters 20 | ---------- 21 | dist_fun : function (k : int, l : int) => int 22 | distance function between units k and l on the map. 23 | x : array, shape = [n_samples, dim] 24 | input samples. 25 | som : array, shape = [n_units, dim] 26 | (optional) SOM code vectors. 27 | d : array, shape = [n_samples, n_units] 28 | (optional) euclidean distances between input samples and code vectors. 29 | 30 | Returns 31 | ------- 32 | c : float 33 | C measure (higher is better) 34 | 35 | References 36 | ---------- 37 | Goodhill, G. J., & Sejnowski, T. J. (1996). Quantifying neighbourhood preservation in topographic mappings. 38 | """ 39 | n = x.shape[0] 40 | if d is None: 41 | if som is None: 42 | raise ValueError('If distance matrix d is not given, som cannot be None!') 43 | else: 44 | d = euclidean_distances(x, som) 45 | d_data = euclidean_distances(x) 46 | bmus = np.argmin(d, axis=1) 47 | d_som = np.array([[dist_fun(k, l) 48 | for l in bmus] 49 | for k in bmus], dtype=np.float64) 50 | return np.sum(d_data * d_som) / 2.0 # should be normalized by n(n-1) ? 51 | 52 | 53 | def combined_error(dist_fun, som, x=None, d=None): 54 | """Combined error. 55 | 56 | Parameters 57 | ---------- 58 | dist_fun : function (k : int, l : int) => int 59 | distance function between units k and l on the map. 60 | som : array, shape = [n_units, dim] 61 | SOM code vectors. 62 | x : array, shape = [n_samples, dim] 63 | (optional) input samples. 64 | d : array, shape = [n_samples, n_units] 65 | (optional) euclidean distances between input samples and code vectors. 66 | 67 | Returns 68 | ------- 69 | ce : float 70 | combined error (lower is better) 71 | 72 | References 73 | ---------- 74 | Kaski, S., & Lagus, K. (1996). Comparing Self-Organizing Maps. 75 | """ 76 | if d is None: 77 | if x is None: 78 | raise ValueError('If distance matrix d is not given, x cannot be None!') 79 | else: 80 | d = euclidean_distances(x, som) 81 | # pairwise euclidean distances between neighboring SOM prototypes 82 | # distances between non-neighboring units are set to inf to force the path to follow neighboring units 83 | d_som = csr_matrix([[np.sqrt(np.sum(np.square(som[k] - som[l]))) if dist_fun(k, l) == 1 else np.inf 84 | for l in range(som.shape[0])] 85 | for k in range(som.shape[0])]) 86 | tbmus = np.argsort(d, axis=1)[:, :2] # two best matching units 87 | ces = np.zeros(d.shape[0]) 88 | for i in range(d.shape[0]): 89 | ces[i] = d[i, tbmus[i, 0]] 90 | if dist_fun(tbmus[i, 0], tbmus[i, 1]) == 1: # if BMUs are neighbors 91 | ces[i] += d_som[tbmus[i, 0], tbmus[i, 1]] 92 | else: 93 | ces[i] += shortest_path(csgraph=d_som, 94 | method='auto', 95 | directed=False, 96 | return_predecessors=False, 97 | indices=tbmus[i, 0])[tbmus[i, 1]] 98 | return np.mean(ces) 99 | 100 | 101 | def distortion(dist_fun, neighborhood_fun, som=None, x=None, d=None): 102 | """Distortion (SOM loss function). 103 | 104 | Computes distortion, which is the loss function minimized by the SOM learning algorithm. 105 | It consists in a sum of squared euclidean distances between samples and SOM prototypes, weighted 106 | by a neighborhood function that depends on the distances to the best-matching unit on the map. 107 | 108 | Parameters 109 | ---------- 110 | dist_fun : function (k : int, l : int) => int 111 | distance function between units k and l on the map. 112 | neighborhood_fun : function (d : int) => float in [0,1] 113 | neighborhood function, equal to 1 when d = 0 and decreasing with d. 114 | som : array, shape = [n_units, dim] 115 | (optional) SOM code vectors. 116 | x : array, shape = [n_samples, dim] 117 | (optional) input samples. 118 | d : array, shape = [n_samples, n_units] 119 | (optional) euclidean distances between input samples and code vectors. 120 | 121 | Returns 122 | ------- 123 | distortion : float 124 | distortion error (lower is better) 125 | """ 126 | if d is None: 127 | if som is None or x is None: 128 | raise ValueError('If distance matrix d is not given, som and x cannot be None!') 129 | else: 130 | d = euclidean_distances(x, som) 131 | bmus = np.argmin(d, axis=1) 132 | weights = np.array([[neighborhood_fun(dist_fun(bmu, k)) 133 | for k in range(d.shape[1])] 134 | for bmu in bmus]) 135 | distortions = np.sum(weights * np.square(d), axis=1) 136 | return np.mean(distortions) 137 | 138 | 139 | def kruskal_shepard_error(dist_fun, x, som=None, d=None): 140 | """Kruskal-Shepard error. 141 | 142 | Measures distance preservation between input space and output space. Euclidean distance is used in input space. 143 | In output space, distance is usually Manhattan distance between the best matching units on the maps (this distance 144 | is provided by the dist_fun argument). 145 | 146 | Parameters 147 | ---------- 148 | dist_fun : function (k : int, l : int) => int 149 | distance function between units k and l on the map. 150 | x : array, shape = [n_samples, dim] 151 | input samples. 152 | som : array, shape = [n_units, dim] 153 | (optional) SOM code vectors. 154 | d : array, shape = [n_samples, n_units] 155 | (optional) euclidean distances between input samples and code vectors. 156 | 157 | Returns 158 | ------- 159 | kse : float 160 | Kruskal-Shepard error (lower is better) 161 | 162 | References 163 | ---------- 164 | Kruskal, J.B. (1964). Multidimensional scaling by optimizing goodness of fit to a nonmetric hypothesis. 165 | Elend, L., & Kramer, O. (2019). Self-Organizing Maps with Convolutional Layers. 166 | """ 167 | n = x.shape[0] 168 | if d is None: 169 | if som is None: 170 | raise ValueError('If distance matrix d is not given, som cannot be None!') 171 | else: 172 | d = euclidean_distances(x, som) 173 | d_data = euclidean_distances(x) 174 | d_data /= d_data.max() 175 | bmus = np.argmin(d, axis=1) 176 | d_som = np.array([[dist_fun(k, l) 177 | for l in bmus] 178 | for k in bmus], dtype=np.float64) 179 | d_som /= d_som.max() 180 | return np.sum(np.square(d_data - d_som)) / (n**2 - n) 181 | 182 | 183 | def neighborhood_preservation(k, som, x, d=None): 184 | """Neighborhood preservation of SOM map. 185 | 186 | Parameters 187 | ---------- 188 | k : int 189 | number of neighbors. Must be < n // 2 where n is the data size. 190 | som : array, shape = [n_units, dim] 191 | SOM code vectors. 192 | x : array, shape = [n_samples, dim] 193 | input samples. 194 | d : array, shape = [n_samples, n_units] 195 | (optional) euclidean distances between input samples and code vectors. 196 | 197 | Returns 198 | ------- 199 | np : float in [0, 1] 200 | neighborhood preservation measure (higher is better) 201 | 202 | References 203 | ---------- 204 | Venna, J., & Kaski, S. (2001). Neighborhood preservation in nonlinear projection methods: An experimental study. 205 | """ 206 | n = x.shape[0] # data size 207 | assert k < (n / 2), 'Number of neighbors k must be < N/2 (where N is the number of data samples).' 208 | if d is None: 209 | d = euclidean_distances(x, som) 210 | d_data = euclidean_distances(x) + np.diag(np.inf * np.ones(n)) 211 | projections = som[np.argmin(d, axis=1)] 212 | d_projections = euclidean_distances(projections) + np.diag(np.inf * np.ones(n)) 213 | original_ranks = pd.DataFrame(d_data).rank(method='min', axis=1) 214 | projected_ranks = pd.DataFrame(d_projections).rank(method='min', axis=1) 215 | weights = (projected_ranks <= k).sum(axis=1) / (original_ranks <= k).sum(axis=1) # weight k-NN ties 216 | nps = np.zeros(n) 217 | for i in range(n): 218 | for j in range(n): 219 | if (i != j) and (original_ranks.iloc[i, j] <= k) and (projected_ranks.iloc[i, j] > k): 220 | nps[i] += (projected_ranks.iloc[i, j] - k) * weights[i] 221 | return 1.0 - 2.0 / (n * k * (2*n - 3*k - 1)) * np.sum(nps) 222 | 223 | 224 | def neighborhood_preservation_trustworthiness(k, som, x, d=None): 225 | """Neighborhood preservation and trustworthiness of SOM map. 226 | 227 | Parameters 228 | ---------- 229 | k : int 230 | number of neighbors. Must be < n // 2 where n is the data size. 231 | som : array, shape = [n_units, dim] 232 | SOM code vectors. 233 | x : array, shape = [n_samples, dim] 234 | input samples. 235 | d : array, shape = [n_samples, n_units] 236 | (optional) euclidean distances between input samples and code vectors. 237 | 238 | Returns 239 | ------- 240 | npr, tr : float tuple in [0, 1] 241 | neighborhood preservation and trustworthiness measures (higher is better) 242 | 243 | References 244 | ---------- 245 | Venna, J., & Kaski, S. (2001). Neighborhood preservation in nonlinear projection methods: An experimental study. 246 | """ 247 | n = x.shape[0] # data size 248 | assert k < (n / 2), 'Number of neighbors k must be < N/2 (where N is the number of data samples).' 249 | if d is None: 250 | d = euclidean_distances(x, som) 251 | d_data = euclidean_distances(x) + np.diag(np.inf * np.ones(n)) 252 | projections = som[np.argmin(d, axis=1)] 253 | d_projections = euclidean_distances(projections) + np.diag(np.inf * np.ones(n)) 254 | original_ranks = pd.DataFrame(d_data).rank(method='min', axis=1) 255 | projected_ranks = pd.DataFrame(d_projections).rank(method='min', axis=1) 256 | weights = (projected_ranks <= k).sum(axis=1) / (original_ranks <= k).sum(axis=1) # weight k-NN ties 257 | nps = np.zeros(n) 258 | trs = np.zeros(n) 259 | for i in range(n): 260 | for j in range(n): 261 | if (i != j) and (original_ranks.iloc[i, j] <= k) and (projected_ranks.iloc[i, j] > k): 262 | nps[i] += (projected_ranks.iloc[i, j] - k) * weights[i] 263 | elif (i != j) and (original_ranks.iloc[i, j] > k) and (projected_ranks.iloc[i, j] <= k): 264 | trs[i] += (original_ranks.iloc[i, j] - k) / weights[i] 265 | npr = 1.0 - 2.0 / (n * k * (2*n - 3*k - 1)) * np.sum(nps) 266 | tr = 1.0 - 2.0 / (n * k * (2*n - 3*k - 1)) * np.sum(trs) 267 | return npr, tr 268 | 269 | 270 | def quantization_error(som=None, x=None, d=None): 271 | """Quantization error. 272 | 273 | Computes mean quantization error with euclidean distance. 274 | 275 | Parameters 276 | ---------- 277 | som : array, shape = [n_units, dim] 278 | (optional) SOM code vectors. 279 | x : array, shape = [n_samples, dim] 280 | (optional) input samples. 281 | d : array, shape = [n_samples, n_units] 282 | (optional) euclidean distances between input samples and code vectors. 283 | 284 | Returns 285 | ------- 286 | qe : float 287 | quantization error (lower is better) 288 | """ 289 | if d is None: 290 | if som is None or x is None: 291 | raise ValueError('If distance matrix d is not given, som and x cannot be None!') 292 | else: 293 | d = euclidean_distances(x, som) 294 | qes = np.min(d, axis=1) 295 | return np.mean(qes) 296 | 297 | 298 | def topographic_error(dist_fun, som=None, x=None, d=None): 299 | """SOM topographic error. 300 | 301 | Topographic error is the ratio of data points for which the two best matching units are not neighbors on the map. 302 | 303 | Parameters 304 | ---------- 305 | dist_fun : function (k : int, l : int) => int 306 | distance function between units k and l on the map. 307 | som : array, shape = [n_units, dim] 308 | (optional) SOM code vectors. 309 | x : array, shape = [n_samples, dim] 310 | (optional) input samples. 311 | d : array, shape = [n_samples, n_units] 312 | (optional) euclidean distances between input samples and code vectors. 313 | 314 | Returns 315 | ------- 316 | te : float in [0, 1] 317 | topographic error (lower is better) 318 | """ 319 | if d is None: 320 | if som is None or x is None: 321 | raise ValueError('If distance matrix d is not given, som and x cannot be None!') 322 | else: 323 | d = euclidean_distances(x, som) 324 | tbmus = np.argsort(d, axis=1)[:, :2] # two best matching units 325 | tes = np.array([dist_fun(tbmu[0], tbmu[1]) > 1 for tbmu in tbmus]) 326 | return np.mean(tes) 327 | 328 | 329 | def topographic_function(ks, dist_fun, max_dist, som=None, x=None, d=None, som_dim=2): 330 | """Normalized topographic function. 331 | 332 | Parameters 333 | ---------- 334 | ks: array 335 | topographic function parameters. Must be normalized distances, i.e. k=d/max_dist where d is a distance 336 | on the map and max_dist is the maximum distance between two units on the map. 337 | dist_fun : function (k : int, l : int) => int 338 | distance function between units k and l on the map. 339 | max_dist : int 340 | maximum distance on the map. 341 | som : array, shape = [n_units, dim] 342 | (optional) SOM code vectors. 343 | x : array, shape = [n_samples, dim] 344 | (optional) input samples. 345 | d : array, shape = [n_samples, n_units] 346 | (optional) euclidean distances between input samples and code vectors. 347 | som_dim : int (default=2) 348 | number of dimensions of the SOM grid 349 | 350 | Returns 351 | ------- 352 | tf : array 353 | topographic function taken at values ks 354 | 355 | References 356 | ---------- 357 | Villmann, T., Der, R., & Martinetz, T. (1994). A New Quantitative Measure of Topology Preservation in Kohonen’s Feature Maps. 358 | """ 359 | if d is None: 360 | if som is None or x is None: 361 | raise ValueError('If distance matrix d is not given, som and x cannot be None!') 362 | else: 363 | d = euclidean_distances(x, som) 364 | tbmus = np.argsort(d, axis=1)[:, :2] # two best matching units 365 | n_units = d.shape[1] 366 | C = np.zeros((n_units, n_units), dtype='int') # connectivity matrix 367 | for tbmu in tbmus: 368 | C[tbmu[0], tbmu[1]] = 1 369 | C[tbmu[1], tbmu[0]] = 1 370 | tf = np.zeros(len(ks)) 371 | for c in range(n_units): 372 | for cc in range(n_units): 373 | for i, k in enumerate(ks): 374 | if dist_fun(c, cc)/max_dist > k and C[c, cc] == 1: 375 | tf[i] += 1 376 | return tf / (n_units * (n_units - 3**som_dim)) 377 | 378 | 379 | def topographic_product(dist_fun, som): 380 | """Topographic product. 381 | 382 | Parameters 383 | ---------- 384 | dist_fun : function (k : int, l : int) => int 385 | distance function between units k and l on the map. 386 | som : array, shape = [n_units, dim] 387 | SOM code vectors. 388 | 389 | Returns 390 | ------- 391 | tp : float 392 | topographic product (tp < 0 when the map is too small, tp > 0 if it is too large) 393 | 394 | References 395 | ---------- 396 | Bauer, H. U., & Pawelzik, K. R. (1992). Quantifying the Neighborhood Preservation of Self-Organizing Feature Maps. 397 | """ 398 | n_units = som.shape[0] 399 | original_d = euclidean_distances(som) + 1e-16 400 | original_knn = np.argsort(original_d, axis=1) 401 | map_d = np.array([[dist_fun(j, k) 402 | for k in range(n_units)] 403 | for j in range(n_units)]) + 1e-16 404 | map_knn = np.argsort(map_d, axis=1) 405 | # compute Q1 (n_units x n_units-1 matrix) 406 | q1 = np.array([[np.divide(original_d[j, map_knn[j, k]], original_d[j, original_knn[j, k]]) 407 | for k in range(1, n_units)] 408 | for j in range(n_units)]) 409 | # compute Q2 (n_units x n_units-1 matrix) 410 | q2 = np.array([[np.divide(map_d[j, map_knn[j, k]], map_d[j, original_knn[j, k]]) 411 | for k in range(1, n_units)] 412 | for j in range(n_units)]) 413 | # compute P3 (n_units x n_units-1 matrix) 414 | p3 = np.array([[np.prod([(q1[j, l] * q2[j, l])**(1/(2*k)) for l in range(k)]) 415 | for k in range(1, n_units)] 416 | for j in range(n_units)]) 417 | # combine final result (float) 418 | return np.sum(np.log(p3)) / (n_units * (n_units - 1)) 419 | 420 | 421 | def trustworthiness(k, som, x, d=None): 422 | """Trustworthiness of SOM map. 423 | 424 | Parameters 425 | ---------- 426 | k : int 427 | number of neighbors. Must be < n // 2 where n is the data size. 428 | som : array, shape = [n_units, dim] 429 | SOM code vectors. 430 | x : array, shape = [n_samples, dim] 431 | input samples. 432 | d : array, shape = [n_samples, n_units] 433 | (optional) euclidean distances between input samples and code vectors. 434 | 435 | Returns 436 | ------- 437 | tr : float in [0, 1] 438 | trustworthiness measure (higher is better) 439 | 440 | References 441 | ---------- 442 | Venna, J., & Kaski, S. (2001). Neighborhood preservation in nonlinear projection methods: An experimental study. 443 | """ 444 | n = x.shape[0] # data size 445 | assert k < (n / 2), 'Number of neighbors k must be < N/2 (where N is the number of data samples).' 446 | if d is None: 447 | d = euclidean_distances(x, som) 448 | d_data = euclidean_distances(x) + np.diag(np.inf * np.ones(n)) 449 | projections = som[np.argmin(d, axis=1)] 450 | d_projections = euclidean_distances(projections) + np.diag(np.inf * np.ones(n)) 451 | original_ranks = pd.DataFrame(d_data).rank(method='min', axis=1) 452 | projected_ranks = pd.DataFrame(d_projections).rank(method='min', axis=1) 453 | weights = (original_ranks <= k).sum(axis=1) / (projected_ranks <= k).sum(axis=1) # weight k-NN ties 454 | trs = np.zeros(n) 455 | for i in range(n): 456 | for j in range(n): 457 | if (i != j) and (original_ranks.iloc[i, j] > k) and (projected_ranks.iloc[i, j] <= k): 458 | trs[i] += (original_ranks.iloc[i, j] - k) * weights[i] 459 | return 1.0 - 2.0 / (n * k * (2*n - 3*k - 1)) * np.sum(trs) 460 | -------------------------------------------------------------------------------- /somperf/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | The `somperf.utils` module contains utility functions. 3 | """ 4 | 5 | from .topology import rectangular_topology_dist 6 | from .topology import square_topology_dist 7 | 8 | from .neighborhood import gaussian_neighborhood 9 | from .neighborhood import window_neighborhood 10 | 11 | __all__ = [ 12 | 'rectangular_topology_dist', 13 | 'square_topology_dist', 14 | 'gaussian_neighborhood', 15 | 'window_neighborhood' 16 | ] 17 | -------------------------------------------------------------------------------- /somperf/utils/neighborhood.py: -------------------------------------------------------------------------------- 1 | """ 2 | Neighborhood functions 3 | """ 4 | 5 | import numpy as np 6 | 7 | 8 | def gaussian_neighborhood(radius=1.0): 9 | """Gaussian neighborhood kernel function. 10 | 11 | Parameters 12 | ---------- 13 | radius : float (default = 1.0) 14 | standard deviation of the gaussian kernel. 15 | 16 | Returns 17 | ------- 18 | neighborhood_fun : (d : int) => float in [0,1] 19 | neighborhood function. 20 | """ 21 | def neighborhood_fun(d): 22 | return np.exp(- (d**2) / (radius**2)) 23 | return neighborhood_fun 24 | 25 | 26 | def window_neighborhood(radius=1.0): 27 | """Window neighborhood kernel function. 28 | 29 | Parameters 30 | ---------- 31 | radius : float (default = 1.0) 32 | radius of the window. 33 | 34 | Returns 35 | ------- 36 | neighborhood_fun : (d : int) => float in [0,1] 37 | neighborhood function. 38 | """ 39 | def neighborhood_fun(d): 40 | return 1.0 if d <= radius else 0.0 41 | return neighborhood_fun 42 | -------------------------------------------------------------------------------- /somperf/utils/topology.py: -------------------------------------------------------------------------------- 1 | """ 2 | Topology functions 3 | """ 4 | 5 | 6 | def rectangular_topology_dist(map_size): 7 | """Rectangular topology distance function. 8 | 9 | Returns the distance function between two units on a rectangular map (Manhattan distance). 10 | 11 | Parameters 12 | ---------- 13 | map_size : tuple (height, width) 14 | SOM height and width. 15 | 16 | Returns 17 | ------- 18 | dist_fun : (k : int, l : int) => int 19 | distance function between units k and l on the map. 20 | """ 21 | def dist_fun(k, l): 22 | return abs(k // map_size[1] - l // map_size[1]) + abs(k % map_size[1] - l % map_size[1]) 23 | return dist_fun 24 | 25 | 26 | def square_topology_dist(map_size): 27 | """Square topology distance function. 28 | 29 | Returns the distance function between two units on a square map (Manhattan distance). 30 | 31 | Parameters 32 | ---------- 33 | map_size : int 34 | SOM height or width. 35 | 36 | Returns 37 | ------- 38 | dist_fun : function (k : int, l : int) => int 39 | distance function between units k and l on the map. 40 | """ 41 | def dist_fun(k, l): 42 | return abs(k // map_size - l // map_size) + abs(k % map_size - l % map_size) 43 | return dist_fun 44 | -------------------------------------------------------------------------------- /tests/rc/som-classes-csi-1.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
2 groups
[Not supported by viewer]
-------------------------------------------------------------------------------- /tests/rc/som-classes-csi-2.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
3 groups
[Not supported by viewer]
-------------------------------------------------------------------------------- /tests/rc/som-classes-csi-3.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 |
1 group
[Not supported by viewer]
-------------------------------------------------------------------------------- /tests/rc/som-classes-superclusters.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /tests/rc/som-classes.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | --------------------------------------------------------------------------------