├── .coveragerc ├── .github └── workflows │ └── pythonapp.yml ├── .gitignore ├── .vscode └── settings.json ├── LICENSE ├── README.md ├── docs ├── index.html ├── search.js ├── spectralcluster.html └── spectralcluster │ ├── autotune.html │ ├── configs.html │ ├── constraint.html │ ├── custom_distance_kmeans.html │ ├── fallback_clusterer.html │ ├── laplacian.html │ ├── multi_stage_clusterer.html │ ├── naive_clusterer.html │ ├── refinement.html │ ├── spectral_clusterer.html │ └── utils.html ├── publish.sh ├── requirements.txt ├── resources ├── multi-stage-clustering.png ├── refinement.png ├── social_preview_image.png ├── turn-to-diarize.png ├── youtube_screenshot_icassp2018.jpg └── youtube_screenshot_icassp2022.png ├── run_pdoc.sh ├── run_tests.sh ├── setup.py ├── spectralcluster ├── __init__.py ├── autotune.py ├── configs.py ├── constraint.py ├── custom_distance_kmeans.py ├── fallback_clusterer.py ├── laplacian.py ├── multi_stage_clusterer.py ├── naive_clusterer.py ├── refinement.py ├── spectral_clusterer.py └── utils.py └── tests ├── autotune_test.py ├── configs_test.py ├── constraint_test.py ├── custom_distance_kmeans_test.py ├── fallback_clusterer_test.py ├── laplacian_test.py ├── multi_stage_clusterer_test.py ├── naive_clusterer_test.py ├── refinement_test.py ├── spectral_clusterer_test.py └── utils_test.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source=spectralcluster 3 | -------------------------------------------------------------------------------- /.github/workflows/pythonapp.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a single version of Python 2 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 3 | 4 | name: Python application 5 | 6 | on: 7 | push: 8 | branches: [ master ] 9 | pull_request: 10 | branches: [ master ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | - name: Set up Python 3.8 20 | uses: actions/setup-python@v1 21 | with: 22 | python-version: 3.8 23 | - name: Install dependencies 24 | run: | 25 | python -m pip install --upgrade pip 26 | pip install -r requirements.txt 27 | - name: Lint with flake8 28 | run: | 29 | pip install flake8 30 | flake8 --indent-size 2 --max-line-length 80 . 31 | - name: pytype 32 | run: | 33 | pip install pytype 34 | pytype . 35 | - name: Run tests 36 | run: | 37 | pip install codecov 38 | bash run_tests.sh 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | build/* 4 | dist/* 5 | spectralcluster.egg-info/* 6 | .coverage 7 | .DS_Store 8 | .pytype 9 | coverage.xml 10 | .venv 11 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.tabSize": 2, 3 | "editor.insertSpaces": true, 4 | "editor.rulers": [ 5 | 80 6 | ], 7 | "files.trimFinalNewlines": true, 8 | "files.trimTrailingWhitespace": true, 9 | "editor.formatOnSave": false, 10 | "terminal.integrated.fontSize": 13, 11 | "python.formatting.provider": "none", 12 | "python.formatting.autopep8Args": [ 13 | "--indent-size=2", 14 | "--max-line-length=80" 15 | ], 16 | "[python]": { 17 | "editor.defaultFormatter": "ms-python.autopep8" 18 | }, 19 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2019 Quan Wang 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spectral Clustering 2 | [![Python application](https://github.com/wq2012/SpectralCluster/workflows/Python%20application/badge.svg)](https://github.com/wq2012/SpectralCluster/actions) 3 | [![PyPI Version](https://img.shields.io/pypi/v/spectralcluster.svg)](https://pypi.python.org/pypi/spectralcluster) 4 | [![Python Versions](https://img.shields.io/pypi/pyversions/spectralcluster.svg)](https://pypi.org/project/spectralcluster) 5 | [![Downloads](https://static.pepy.tech/badge/spectralcluster)](https://www.pepy.tech/projects/spectralcluster) 6 | [![codecov](https://codecov.io/gh/wq2012/SpectralCluster/branch/master/graph/badge.svg)](https://codecov.io/gh/wq2012/SpectralCluster) 7 | [![Documentation](https://img.shields.io/badge/api-documentation-blue.svg)](https://wq2012.github.io/SpectralCluster) 8 | 9 | ## Overview 10 | 11 | This is a Python re-implementation of the spectral clustering algorithms 12 | presented in these papers: 13 | 14 | | Algorithm | Paper | 15 | | ------------------------------- | ----------- | 16 | | Refined Laplacian matrix | [Speaker Diarization with LSTM](https://google.github.io/speaker-id/publications/LstmDiarization/) | 17 | | Constrained spectral clustering | [Turn-to-Diarize: Online Speaker Diarization Constrained by Transformer Transducer Speaker Turn Detection](https://arxiv.org/abs/2109.11641) | 18 | | Multi-stage clustering | [Highly Efficient Real-Time Streaming and Fully On-Device Speaker Diarization with Multi-Stage Clustering](https://arxiv.org/abs/2210.13690) 19 | 20 | ![refinement](https://raw.githubusercontent.com/wq2012/SpectralCluster/master/resources/refinement.png) 21 | 22 | ## Notice 23 | 24 | We recently added new functionalities to this library to include 25 | algorithms in a [new paper](https://arxiv.org/abs/2109.11641). We updated the APIs as well. 26 | 27 | If you depend on our old API, please use an **older version** of this library: 28 | ``` 29 | pip3 install spectralcluster==0.1.0 30 | ``` 31 | 32 | ## Disclaimer 33 | 34 | **This is not a Google product.** 35 | 36 | **This is not the original C++ implementation used by the papers.** 37 | 38 | Please consider this repo as a "demonstration" of the algorithms, 39 | instead of a "reproduction" of what we use at Google. Some features 40 | might be missing or incomplete. 41 | 42 | ## Installation 43 | 44 | Install the [package](https://pypi.org/project/spectralcluster/) by: 45 | 46 | ```bash 47 | pip3 install spectralcluster 48 | ``` 49 | 50 | or 51 | 52 | ```bash 53 | python3 -m pip install spectralcluster 54 | ``` 55 | 56 | ## Tutorial 57 | 58 | Simply use the `predict()` method of class `SpectralClusterer` to perform 59 | spectral clustering. The example below should be closest to the original C++ 60 | implemention used by our 61 | [ICASSP 2018 paper](https://google.github.io/speaker-id/publications/LstmDiarization/). 62 | 63 | ```python 64 | from spectralcluster import configs 65 | 66 | labels = configs.icassp2018_clusterer.predict(X) 67 | ``` 68 | 69 | The input `X` is a numpy array of shape `(n_samples, n_features)`, 70 | and the returned `labels` is a numpy array of shape `(n_samples,)`. 71 | 72 | You can also create your own clusterer like this: 73 | 74 | ``` 75 | from spectralcluster import SpectralClusterer 76 | 77 | clusterer = SpectralClusterer( 78 | min_clusters=2, 79 | max_clusters=7, 80 | autotune=None, 81 | laplacian_type=None, 82 | refinement_options=None, 83 | custom_dist="cosine") 84 | 85 | labels = clusterer.predict(X) 86 | ``` 87 | 88 | For the complete list of parameters of `SpectralClusterer`, see 89 | `spectralcluster/spectral_clusterer.py`. 90 | 91 | [![youtube_screenshot_icassp2018](https://raw.githubusercontent.com/wq2012/SpectralCluster/master/resources/youtube_screenshot_icassp2018.jpg)](https://youtu.be/pjxGPZQeeO4) 92 | [![youtube_screenshot_icassp2022](https://raw.githubusercontent.com/wq2012/SpectralCluster/master/resources/youtube_screenshot_icassp2022.png)](https://youtu.be/U79Aw1ky7ag) 93 | 94 | ## Advanced features 95 | 96 | ### Refinement operations 97 | 98 | In our [ICASSP 2018 paper](https://google.github.io/speaker-id/publications/LstmDiarization/), we apply a sequence of refinment operations on the affinity matrix, which is critical to the performance on the speaker diarization results. 99 | 100 | You can specify your refinement operations like this: 101 | 102 | ``` 103 | from spectralcluster import RefinementOptions 104 | from spectralcluster import ThresholdType 105 | from spectralcluster import ICASSP2018_REFINEMENT_SEQUENCE 106 | 107 | refinement_options = RefinementOptions( 108 | gaussian_blur_sigma=1, 109 | p_percentile=0.95, 110 | thresholding_soft_multiplier=0.01, 111 | thresholding_type=ThresholdType.RowMax, 112 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 113 | ``` 114 | 115 | Then you can pass the `refinement_options` as an argument when initializing your 116 | `SpectralClusterer` object. 117 | 118 | For the complete list of `RefinementOptions`, see 119 | `spectralcluster/refinement.py`. 120 | 121 | ### Laplacian matrix 122 | 123 | In our [ICASSP 2018 paper](https://google.github.io/speaker-id/publications/LstmDiarization/), 124 | we apply a refinement operation `CropDiagonal` on the affinity matrix, which replaces each diagonal element of the affinity matrix by the max non-diagonal value of the row. After this operation, the matrix has similar properties to a standard Laplacian matrix, and it is also less sensitive (thus more robust) to the Gaussian blur operation than a standard Laplacian matrix. 125 | 126 | In the new version of this library, we support different types of Laplacian matrix now, including: 127 | 128 | * None Laplacian (affinity matrix): `W` 129 | * Unnormalized Laplacian: `L = D - W` 130 | * Graph cut Laplacian: `L' = D^{-1/2} * L * D^{-1/2}` 131 | * Random walk Laplacian: `L' = D^{-1} * L` 132 | 133 | You can specify the Laplacian matrix type with the `laplacian_type` argument of the `SpectralClusterer` class. 134 | 135 | Note: Refinement operations are applied to the affinity matrix **before** computing the Laplacian matrix. 136 | 137 | ### Distance for K-Means 138 | 139 | In our [ICASSP 2018 paper](https://google.github.io/speaker-id/publications/LstmDiarization/), 140 | the K-Means is based on Cosine distance. 141 | 142 | You can set `custom_dist="cosine"` when initializing your `SpectralClusterer` object. 143 | 144 | You can also use other distances supported by [scipy.spatial.distance](https://docs.scipy.org/doc/scipy/reference/spatial.distance.html), such as `"euclidean"` or `"mahalanobis"`. 145 | 146 | ### Affinity matrix 147 | 148 | In our [ICASSP 2018 paper](https://google.github.io/speaker-id/publications/LstmDiarization/), 149 | the affinity between two embeddings is defined as `(cos(x,y)+1)/2`. 150 | 151 | You can also use other affinity functions by setting `affinity_function` when initializing your `SpectralClusterer` object. 152 | 153 | ### Auto-tune 154 | 155 | We also support auto-tuning the `p_percentile` parameter of the `RowWiseThreshold` refinement operation, which was original proposed in [this paper](https://arxiv.org/abs/2003.02405). 156 | 157 | You can enable this by passing in an `AutoTune` object to the `autotune` argument when initializing your `SpectralClusterer` object. 158 | 159 | Example: 160 | 161 | ```python 162 | from spectralcluster import AutoTune, AutoTuneProxy 163 | 164 | autotune = AutoTune( 165 | p_percentile_min=0.60, 166 | p_percentile_max=0.95, 167 | init_search_step=0.01, 168 | search_level=3, 169 | proxy=AutoTuneProxy.PercentileSqrtOverNME) 170 | ``` 171 | 172 | For the complete list of parameters of `AutoTune`, see 173 | `spectralcluster/autotune.py`. 174 | 175 | ### Fallback clusterer 176 | 177 | Spectral clustering exploits the global structure of the data. But there are 178 | cases where spectral clustering does not work as well as some other simpler 179 | clustering methods, such as when the number of embeddings is too small. 180 | 181 | When initializing the `SpectralClusterer` object, you can pass in a `FallbackOptions` object to the `fallback_options` argument, to use a fallback clusterer under certain conditions. 182 | 183 | Also, spectral clustering and eigen-gap may not work well at making single-vs-multi cluster decisions. When `min_clusters=1`, we can also specify `FallbackOptions.single_cluster_condition` and `FallbackOptions.single_cluster_affinity_threshold` to help determine single cluster cases by thresdholding the affinity matrix. 184 | 185 | For the complete list of parameters of `FallbackOptions`, see `spectralcluster/fallback_clusterer.py`. 186 | 187 | ### Speed up the clustering 188 | 189 | Spectral clustering can become slow when the number of input embeddings is large. This is due to the high costs of steps such as computing the Laplacian matrix, and eigen decomposition of the Laplacian matrix. One trick to speed up the spectral clustering when the input size is large is to use hierarchical clustering as a pre-clustering step. 190 | 191 | To use this feature, you can specify the `max_spectral_size` argument when constructing the `SpectralClusterer` object. For example, if you set `max_spectral_size=200`, then the Laplacian matrix can be at most `200 * 200`. 192 | 193 | But please note that setting `max_spectral_size` may cause degradations of the final clustering quality. So please use this feature wisely. 194 | 195 | ### Constrained spectral clustering 196 | 197 | ![turn-to-diarize-diagram](https://raw.githubusercontent.com/wq2012/SpectralCluster/master/resources/turn-to-diarize.png) 198 | 199 | In the [Turn-to-Diarize paper](https://arxiv.org/abs/2109.11641), 200 | the spectral clustering is constrained by speaker turns. 201 | We implemented two constrained spectral clustering methods: 202 | 203 | * Affinity integration. 204 | * Constraint propagation (see paper [[1](https://link.springer.com/chapter/10.1007/978-3-642-15567-3_1)] and [[2](https://arxiv.org/abs/1109.4684)]). 205 | 206 | If you pass in a `ConstraintOptions` object when initializing your `SpectralClusterer` object, you can call the `predict` function with a `constraint_matrix`. 207 | 208 | Example usage: 209 | 210 | ```python 211 | from spectralcluster import constraint 212 | 213 | ConstraintName = constraint.ConstraintName 214 | 215 | constraint_options = constraint.ConstraintOptions( 216 | constraint_name=ConstraintName.ConstraintPropagation, 217 | apply_before_refinement=True, 218 | constraint_propagation_alpha=0.6) 219 | 220 | clusterer = spectral_clusterer.SpectralClusterer( 221 | max_clusters=2, 222 | refinement_options=refinement_options, 223 | constraint_options=constraint_options, 224 | laplacian_type=LaplacianType.GraphCut, 225 | row_wise_renorm=True) 226 | 227 | labels = clusterer.predict(matrix, constraint_matrix) 228 | ``` 229 | 230 | The constraint matrix can be constructed from a `speaker_turn_scores` list: 231 | 232 | ```python 233 | from spectralcluster import constraint 234 | 235 | constraint_matrix = constraint.ConstraintMatrix( 236 | speaker_turn_scores, threshold=1).compute_diagonals() 237 | ``` 238 | 239 | ### Multi-stage clustering 240 | 241 | ![multi-stage-clustering-diagram](https://raw.githubusercontent.com/wq2012/SpectralCluster/master/resources/multi-stage-clustering.png) 242 | 243 | 244 | In the [multi-stage clustering paper](https://arxiv.org/abs/2210.13690), 245 | we introduced a highly efficient **streaming** clustering approach. This is 246 | implemented as the `MultiStageClusterer` class in 247 | `spectralcluster/multi_stage_clusterer.py`. 248 | 249 | > Note: We did NOT implement speaker turn detection in this open source library. 250 | We only implemented fallback, main, pre-clusterer and dynamic compression here. 251 | 252 | The `MultiStageClusterer` class has a method named `streaming_predict`. 253 | In streaming clustering, every time we feed a **single** new embedding to the 254 | `streaming_predict` function, it will return the sequence of cluster labels 255 | for **all** inputs, including corrections for the predictions on previous 256 | embeddings. 257 | 258 | Example usage: 259 | 260 | ```python 261 | from spectralcluster import Deflicker 262 | from spectralcluster import MultiStageClusterer 263 | from spectralcluster import SpectralClusterer 264 | 265 | main_clusterer = SpectralClusterer() 266 | 267 | multi_stage = MultiStageClusterer( 268 | main_clusterer=main_clusterer, 269 | fallback_threshold=0.5, 270 | L=50, 271 | U1=200, 272 | U2=400, 273 | deflicker=Deflicker.Hungarian) 274 | 275 | for embedding in embeddings: 276 | labels = multi_stage.streaming_predict(embedding) 277 | ``` 278 | 279 | ## Citations 280 | 281 | Our papers are cited as: 282 | 283 | ``` 284 | @inproceedings{wang2018speaker, 285 | title={{Speaker Diarization with LSTM}}, 286 | author={Wang, Quan and Downey, Carlton and Wan, Li and Mansfield, Philip Andrew and Moreno, Ignacio Lopz}, 287 | booktitle={2018 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 288 | pages={5239--5243}, 289 | year={2018}, 290 | organization={IEEE} 291 | } 292 | 293 | @inproceedings{xia2022turn, 294 | title={{Turn-to-Diarize: Online Speaker Diarization Constrained by Transformer Transducer Speaker Turn Detection}}, 295 | author={Wei Xia and Han Lu and Quan Wang and Anshuman Tripathi and Yiling Huang and Ignacio Lopez Moreno and Hasim Sak}, 296 | booktitle={2022 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)}, 297 | pages={8077--8081}, 298 | year={2022}, 299 | organization={IEEE} 300 | } 301 | 302 | @article{wang2022highly, 303 | title={Highly Efficient Real-Time Streaming and Fully On-Device Speaker Diarization with Multi-Stage Clustering}, 304 | author={Quan Wang and Yiling Huang and Han Lu and Guanlong Zhao and Ignacio Lopez Moreno}, 305 | journal={arXiv:2210.13690}, 306 | year={2022} 307 | } 308 | ``` 309 | 310 | ## Star History 311 | 312 | [![Star History Chart](https://api.star-history.com/svg?repos=wq2012/SpectralCluster&type=Date)](https://star-history.com/#wq2012/SpectralCluster&Date) 313 | 314 | ## Misc 315 | 316 | We also have fully supervised speaker diarization systems, powered by 317 | [uis-rnn](https://github.com/google/uis-rnn). 318 | Check this [Google AI Blog](https://ai.googleblog.com/2018/11/accurate-online-speaker-diarization.html). 319 | 320 | Also check out our recent work on [DiarizationLM](https://arxiv.org/abs/2401.03506). 321 | 322 | To learn more about speaker diarization, you can check out: 323 | * A curated list of resources: 324 | [awesome-diarization](https://github.com/wq2012/awesome-diarization) 325 | * An online course on Udemy: [A Tutorial on Speaker Diarization](https://www.udemy.com/course/diarization/?referralCode=21D7CC0AEABB7FE3680F) 326 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs/spectralcluster/laplacian.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | spectralcluster.laplacian API documentation 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 68 |
69 |
70 |

71 | spectralcluster.laplacian

72 | 73 |

Laplacian matrix.

74 |
75 | 76 | 77 | 78 | 79 | 80 |
 1"""Laplacian matrix."""
 81 |  2
 82 |  3import enum
 83 |  4import numpy as np
 84 |  5
 85 |  6EPS = 1e-10
 86 |  7
 87 |  8
 88 |  9class LaplacianType(enum.Enum):
 89 | 10  """Different types of Laplacian matrix."""
 90 | 11  # The affinity matrix, not a Laplacian: W
 91 | 12  Affinity = enum.auto()
 92 | 13
 93 | 14  # The unnormalied Laplacian: L = D - W
 94 | 15  Unnormalized = enum.auto()
 95 | 16
 96 | 17  # The random walk view normalized Laplacian:  D^{-1} * L
 97 | 18  RandomWalk = enum.auto()
 98 | 19
 99 | 20  # The graph cut view normalized Laplacian: D^{-1/2} * L * D^{-1/2}
100 | 21  GraphCut = enum.auto()
101 | 22
102 | 23
103 | 24def compute_laplacian(affinity: np.ndarray,
104 | 25                      laplacian_type: LaplacianType = LaplacianType.GraphCut,
105 | 26                      eps: float = EPS) -> np.ndarray:
106 | 27  """Compute the Laplacian matrix.
107 | 28
108 | 29  Args:
109 | 30    affinity: the affinity matrix of input data
110 | 31    laplacian_type: a LaplacianType
111 | 32    eps: a small value for numerial stability
112 | 33
113 | 34  Returns:
114 | 35    the Laplacian matrix
115 | 36
116 | 37  Raises:
117 | 38    TypeError: if laplacian_type is not a LaplacianType
118 | 39    ValueError: if laplacian_type is not supported
119 | 40  """
120 | 41  degree = np.diag(np.sum(affinity, axis=1))
121 | 42  laplacian = degree - affinity
122 | 43  if not isinstance(laplacian_type, LaplacianType):
123 | 44    raise TypeError("laplacian_type must be a LaplacianType")
124 | 45  elif laplacian_type == LaplacianType.Affinity:
125 | 46    return affinity
126 | 47  elif laplacian_type == LaplacianType.Unnormalized:
127 | 48    return laplacian
128 | 49  elif laplacian_type == LaplacianType.RandomWalk:
129 | 50    # Random walk normalized version
130 | 51    degree_norm = np.diag(1 / (np.diag(degree) + eps))
131 | 52    laplacian_norm = degree_norm.dot(laplacian)
132 | 53    return laplacian_norm
133 | 54  elif laplacian_type == LaplacianType.GraphCut:
134 | 55    # Graph cut normalized version
135 | 56    degree_norm = np.diag(1 / (np.sqrt(np.diag(degree)) + eps))
136 | 57    laplacian_norm = degree_norm.dot(laplacian).dot(degree_norm)
137 | 58    return laplacian_norm
138 | 59  else:
139 | 60    raise ValueError("Unsupported laplacian_type.")
140 | 
141 | 142 | 143 |
144 |
145 |
146 | EPS = 147 | 1e-10 148 | 149 | 150 |
151 | 152 | 153 | 154 | 155 |
156 |
157 | 158 |
159 | 160 | class 161 | LaplacianType(enum.Enum): 162 | 163 | 164 | 165 |
166 | 167 |
10class LaplacianType(enum.Enum):
168 | 11  """Different types of Laplacian matrix."""
169 | 12  # The affinity matrix, not a Laplacian: W
170 | 13  Affinity = enum.auto()
171 | 14
172 | 15  # The unnormalied Laplacian: L = D - W
173 | 16  Unnormalized = enum.auto()
174 | 17
175 | 18  # The random walk view normalized Laplacian:  D^{-1} * L
176 | 19  RandomWalk = enum.auto()
177 | 20
178 | 21  # The graph cut view normalized Laplacian: D^{-1/2} * L * D^{-1/2}
179 | 22  GraphCut = enum.auto()
180 | 
181 | 182 | 183 |

Different types of Laplacian matrix.

184 |
185 | 186 | 187 |
188 |
189 | Affinity = 190 | <LaplacianType.Affinity: 1> 191 | 192 | 193 |
194 | 195 | 196 | 197 | 198 |
199 |
200 |
201 | Unnormalized = 202 | <LaplacianType.Unnormalized: 2> 203 | 204 | 205 |
206 | 207 | 208 | 209 | 210 |
211 |
212 |
213 | RandomWalk = 214 | <LaplacianType.RandomWalk: 3> 215 | 216 | 217 |
218 | 219 | 220 | 221 | 222 |
223 |
224 |
225 | GraphCut = 226 | <LaplacianType.GraphCut: 4> 227 | 228 | 229 |
230 | 231 | 232 | 233 | 234 |
235 |
236 |
Inherited Members
237 |
238 |
enum.Enum
239 |
name
240 |
value
241 | 242 |
243 |
244 |
245 |
246 |
247 | 248 |
249 | 250 | def 251 | compute_laplacian( affinity: numpy.ndarray, laplacian_type: spectralcluster.laplacian.LaplacianType = <LaplacianType.GraphCut: 4>, eps: float = 1e-10) -> numpy.ndarray: 252 | 253 | 254 | 255 |
256 | 257 |
25def compute_laplacian(affinity: np.ndarray,
258 | 26                      laplacian_type: LaplacianType = LaplacianType.GraphCut,
259 | 27                      eps: float = EPS) -> np.ndarray:
260 | 28  """Compute the Laplacian matrix.
261 | 29
262 | 30  Args:
263 | 31    affinity: the affinity matrix of input data
264 | 32    laplacian_type: a LaplacianType
265 | 33    eps: a small value for numerial stability
266 | 34
267 | 35  Returns:
268 | 36    the Laplacian matrix
269 | 37
270 | 38  Raises:
271 | 39    TypeError: if laplacian_type is not a LaplacianType
272 | 40    ValueError: if laplacian_type is not supported
273 | 41  """
274 | 42  degree = np.diag(np.sum(affinity, axis=1))
275 | 43  laplacian = degree - affinity
276 | 44  if not isinstance(laplacian_type, LaplacianType):
277 | 45    raise TypeError("laplacian_type must be a LaplacianType")
278 | 46  elif laplacian_type == LaplacianType.Affinity:
279 | 47    return affinity
280 | 48  elif laplacian_type == LaplacianType.Unnormalized:
281 | 49    return laplacian
282 | 50  elif laplacian_type == LaplacianType.RandomWalk:
283 | 51    # Random walk normalized version
284 | 52    degree_norm = np.diag(1 / (np.diag(degree) + eps))
285 | 53    laplacian_norm = degree_norm.dot(laplacian)
286 | 54    return laplacian_norm
287 | 55  elif laplacian_type == LaplacianType.GraphCut:
288 | 56    # Graph cut normalized version
289 | 57    degree_norm = np.diag(1 / (np.sqrt(np.diag(degree)) + eps))
290 | 58    laplacian_norm = degree_norm.dot(laplacian).dot(degree_norm)
291 | 59    return laplacian_norm
292 | 60  else:
293 | 61    raise ValueError("Unsupported laplacian_type.")
294 | 
295 | 296 | 297 |

Compute the Laplacian matrix.

298 | 299 |

Args: 300 | affinity: the affinity matrix of input data 301 | laplacian_type: a LaplacianType 302 | eps: a small value for numerial stability

303 | 304 |

Returns: 305 | the Laplacian matrix

306 | 307 |

Raises: 308 | TypeError: if laplacian_type is not a LaplacianType 309 | ValueError: if laplacian_type is not supported

310 |
311 | 312 | 313 |
314 |
315 | 497 | -------------------------------------------------------------------------------- /publish.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | 4 | # This script requires these tools: 5 | # pip3 install --user --upgrade setuptools wheel 6 | # pip3 install --user --upgrade twine 7 | 8 | # Get project path. 9 | PROJECT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 10 | 11 | pushd ${PROJECT_PATH} 12 | 13 | # clean up 14 | rm -rf build 15 | rm -rf dist 16 | rm -rf spectralcluster.egg-info 17 | 18 | # build and upload 19 | python3 setup.py sdist bdist_wheel 20 | python3 -m twine upload dist/* --verbose 21 | 22 | popd -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | scikit-learn 4 | -------------------------------------------------------------------------------- /resources/multi-stage-clustering.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wq2012/SpectralCluster/f00d5d7ee6aa3e61b18922597010595c32ca0dea/resources/multi-stage-clustering.png -------------------------------------------------------------------------------- /resources/refinement.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wq2012/SpectralCluster/f00d5d7ee6aa3e61b18922597010595c32ca0dea/resources/refinement.png -------------------------------------------------------------------------------- /resources/social_preview_image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wq2012/SpectralCluster/f00d5d7ee6aa3e61b18922597010595c32ca0dea/resources/social_preview_image.png -------------------------------------------------------------------------------- /resources/turn-to-diarize.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wq2012/SpectralCluster/f00d5d7ee6aa3e61b18922597010595c32ca0dea/resources/turn-to-diarize.png -------------------------------------------------------------------------------- /resources/youtube_screenshot_icassp2018.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wq2012/SpectralCluster/f00d5d7ee6aa3e61b18922597010595c32ca0dea/resources/youtube_screenshot_icassp2018.jpg -------------------------------------------------------------------------------- /resources/youtube_screenshot_icassp2022.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wq2012/SpectralCluster/f00d5d7ee6aa3e61b18922597010595c32ca0dea/resources/youtube_screenshot_icassp2022.png -------------------------------------------------------------------------------- /run_pdoc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | 4 | # Get project path. 5 | PROJECT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 6 | 7 | pushd ${PROJECT_PATH} 8 | 9 | rm -rf docs 10 | 11 | # This script requires pdoc: 12 | # pip3 install pdoc 13 | pdoc spectralcluster -o docs 14 | 15 | popd 16 | -------------------------------------------------------------------------------- /run_tests.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit 3 | 4 | # Get project path. 5 | PROJECT_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 6 | 7 | # Add project modules to PYTHONPATH. 8 | if [[ "${PYTHONPATH}" != *"${PROJECT_PATH}"* ]]; then 9 | export PYTHONPATH="${PYTHONPATH}:${PROJECT_PATH}" 10 | fi 11 | 12 | pushd ${PROJECT_PATH} 13 | 14 | rm -f .coverage 15 | 16 | # Run tests. 17 | for TEST_FILE in $(find tests -name "*_test.py"); do 18 | echo "Running tests in ${TEST_FILE}" 19 | python3 -m coverage run -a ${TEST_FILE} 20 | done 21 | echo "All tests passed!" 22 | 23 | popd 24 | 25 | python3 -m codecov 26 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """Setup script for the package.""" 2 | 3 | import setuptools 4 | 5 | VERSION = "0.2.22" 6 | 7 | with open("README.md", "r") as file_object: 8 | LONG_DESCRIPTION = file_object.read() 9 | 10 | with open("requirements.txt") as file_object: 11 | INSTALL_REQUIRES = file_object.read().splitlines() 12 | 13 | setuptools.setup( 14 | name="spectralcluster", 15 | version=VERSION, 16 | author="Quan Wang", 17 | author_email="quanw@google.com", 18 | description="Spectral Clustering", 19 | long_description=LONG_DESCRIPTION, 20 | long_description_content_type="text/markdown", 21 | url="https://github.com/wq2012/SpectralCluster", 22 | packages=setuptools.find_packages(), 23 | classifiers=[ 24 | "Programming Language :: Python :: 3", 25 | "License :: OSI Approved :: Apache Software License", 26 | "Operating System :: OS Independent", 27 | ], 28 | install_requires=INSTALL_REQUIRES, 29 | ) 30 | -------------------------------------------------------------------------------- /spectralcluster/__init__.py: -------------------------------------------------------------------------------- 1 | """__init__ file.""" 2 | 3 | from . import autotune 4 | from . import configs 5 | from . import constraint 6 | from . import fallback_clusterer 7 | from . import laplacian 8 | from . import multi_stage_clusterer 9 | from . import naive_clusterer 10 | from . import refinement 11 | from . import spectral_clusterer 12 | from . import utils 13 | 14 | AutoTune = autotune.AutoTune 15 | AutoTuneProxy = autotune.AutoTuneProxy 16 | 17 | ConstraintOptions = constraint.ConstraintOptions 18 | ConstraintName = constraint.ConstraintName 19 | ConstraintMatrix = constraint.ConstraintMatrix 20 | IntegrationType = constraint.IntegrationType 21 | 22 | FallbackOptions = fallback_clusterer.FallbackOptions 23 | SingleClusterCondition = fallback_clusterer.SingleClusterCondition 24 | FallbackClustererType = fallback_clusterer.FallbackClustererType 25 | 26 | LaplacianType = laplacian.LaplacianType 27 | 28 | Deflicker = multi_stage_clusterer.Deflicker 29 | MultiStageClusterer = multi_stage_clusterer.MultiStageClusterer 30 | 31 | NaiveClusterer = naive_clusterer.NaiveClusterer 32 | 33 | RefinementName = refinement.RefinementName 34 | RefinementOptions = refinement.RefinementOptions 35 | ThresholdType = refinement.ThresholdType 36 | SymmetrizeType = refinement.SymmetrizeType 37 | 38 | SpectralClusterer = spectral_clusterer.SpectralClusterer 39 | 40 | EigenGapType = utils.EigenGapType 41 | 42 | ICASSP2018_REFINEMENT_SEQUENCE = configs.ICASSP2018_REFINEMENT_SEQUENCE 43 | TURNTODIARIZE_REFINEMENT_SEQUENCE = configs.TURNTODIARIZE_REFINEMENT_SEQUENCE 44 | -------------------------------------------------------------------------------- /spectralcluster/autotune.py: -------------------------------------------------------------------------------- 1 | """Auto-tuning hyper-parameters.""" 2 | 3 | import enum 4 | import numpy as np 5 | import typing 6 | 7 | MIN_SEARCH_STEP = 1e-04 8 | 9 | 10 | class AutoTuneProxy(enum.Enum): 11 | """What proxy to use as the auto-tuning target.""" 12 | 13 | # The original proxy used in: 14 | # Park, Tae Jin, et al. "Auto-tuning spectral clustering for speaker 15 | # diarization using normalized maximum eigengap." IEEE Signal Processing 16 | # Letter 2019. 17 | PercentileOverNME = enum.auto() 18 | 19 | # The modified proxy used in: 20 | # Xia, Wei, et al. "Turn-to-diarize: Online speaker diarization constrained 21 | # by transformer transducer speaker turn detection." ICASSP 2022. 22 | # https://arxiv.org/abs/2109.11641 23 | PercentileSqrtOverNME = enum.auto() 24 | 25 | 26 | class AutoTune: 27 | """AutoTune Class. 28 | 29 | This auto-tuning method is implemented based on this paper: 30 | Park, Tae Jin, et al. "Auto-tuning spectral clustering for speaker 31 | diarization using normalized maximum eigengap." IEEE Signal Processing Letter 32 | 2019. 33 | """ 34 | 35 | def __init__(self, 36 | p_percentile_min: float = 0.60, 37 | p_percentile_max: float = 0.95, 38 | init_search_step: float = 0.01, 39 | search_level: int = 1, 40 | proxy: AutoTuneProxy = AutoTuneProxy.PercentileSqrtOverNME): 41 | """Initialization of the autotune arguments. 42 | 43 | Args: 44 | p_percentile_min: minimum value of p_percentile 45 | p_percentile_max: maximum value of p_percentile 46 | init_search_step: initial search step size for auto-tuning 47 | search_level: hierarchical search level for auto-tuning 48 | proxy: which proxy to minimize for auto-tuning 49 | """ 50 | self.p_percentile_min = p_percentile_min 51 | self.p_percentile_max = p_percentile_max 52 | self.search_step = init_search_step 53 | self.search_level = search_level 54 | if not isinstance(proxy, AutoTuneProxy): 55 | raise TypeError("proxy must be an instance of AutoTuneProxy") 56 | self.proxy = proxy 57 | 58 | def get_percentile_range(self) -> typing.Sequence[float]: 59 | """Get the current percentile search range.""" 60 | num_steps = int( 61 | np.ceil( 62 | (self.p_percentile_max - self.p_percentile_min) / self.search_step)) 63 | return list( 64 | np.linspace(self.p_percentile_min, self.p_percentile_max, num_steps)) 65 | 66 | def update_percentile_range(self, 67 | p_percentile_min: float, 68 | p_percentile_max: float, 69 | search_step: float) -> typing.Sequence[float]: 70 | """Update the percentile search range.""" 71 | self.p_percentile_min = p_percentile_min 72 | self.p_percentile_max = p_percentile_max 73 | self.search_step = search_step 74 | return self.get_percentile_range() 75 | 76 | def tune(self, p_percentile_to_ratio: typing.Callable) -> ( 77 | typing.Tuple[np.ndarray, int, float]): 78 | """Tune the hyper-parameter p_percentile. 79 | 80 | Use a proxy ratio of DER to tune the hyper-parameter p_percentile. It also 81 | performs some side work to do affinity refinement, eigen decomposition, and 82 | estimate the number of clusters. 83 | 84 | Args: 85 | p_percentile_to_ratio: a callable to compute the `ratio` given a 86 | `p_percentile` value 87 | 88 | Returns: 89 | eigenvectors: sorted eigenvectors. numpy array of shape 90 | (n_samples, n_samples) 91 | n_clusters: number of clusters as an integer 92 | best_p_percentile: p_percentile value that minimizes the ratio 93 | """ 94 | p_percentile_range = self.get_percentile_range() 95 | searched = dict() 96 | for _ in range(self.search_level): 97 | min_ratio = np.inf 98 | for index, p_percentile in enumerate(p_percentile_range): 99 | if p_percentile in searched: 100 | continue 101 | # ratio is a proxy value of DER. We minimize this ratio 102 | # to find a good p_percentile 103 | ratio, eigenvectors_p, n_clusters_p = p_percentile_to_ratio( 104 | p_percentile) 105 | searched[p_percentile] = ratio 106 | if ratio < min_ratio: 107 | min_ratio = ratio 108 | eigenvectors = eigenvectors_p 109 | n_clusters = n_clusters_p 110 | best_p_percentile = p_percentile 111 | best_p_percentile_index = index 112 | # If the search range is not valid or search step is too small, we stop 113 | if not p_percentile_range or len( 114 | p_percentile_range) == 1 or self.search_step < MIN_SEARCH_STEP: 115 | break 116 | # Update the search range of p_percentile. 117 | # We search again from `start_index` position to `end_index` position 118 | # which is `local_search_dist` away from the found 119 | # `best_p_percentile_index` position. `search_step` is reduced to half of 120 | # the original size 121 | local_search_dist = max(2, len(p_percentile_range) // 8) 122 | start_index = max(0, best_p_percentile_index - local_search_dist) 123 | end_index = min( 124 | len(p_percentile_range) - 1, 125 | best_p_percentile_index + local_search_dist) 126 | p_percentile_min = p_percentile_range[start_index] 127 | p_percentile_max = p_percentile_range[end_index] 128 | self.search_step = self.search_step / 2 129 | p_percentile_range = self.update_percentile_range(p_percentile_min, 130 | p_percentile_max, 131 | self.search_step) 132 | return eigenvectors, n_clusters, best_p_percentile 133 | -------------------------------------------------------------------------------- /spectralcluster/configs.py: -------------------------------------------------------------------------------- 1 | """Example configurations.""" 2 | 3 | from spectralcluster import autotune 4 | from spectralcluster import constraint 5 | from spectralcluster import laplacian 6 | from spectralcluster import refinement 7 | from spectralcluster import spectral_clusterer 8 | 9 | AutoTune = autotune.AutoTune 10 | ConstraintName = constraint.ConstraintName 11 | ConstraintOptions = constraint.ConstraintOptions 12 | RefinementName = refinement.RefinementName 13 | RefinementOptions = refinement.RefinementOptions 14 | ThresholdType = refinement.ThresholdType 15 | SymmetrizeType = refinement.SymmetrizeType 16 | LaplacianType = laplacian.LaplacianType 17 | SpectralClusterer = spectral_clusterer.SpectralClusterer 18 | 19 | # Configurations that are closest to the ICASSP2018 paper 20 | # "Speaker Diarization with LSTM". 21 | ICASSP2018_REFINEMENT_SEQUENCE = [ 22 | RefinementName.CropDiagonal, 23 | RefinementName.GaussianBlur, 24 | RefinementName.RowWiseThreshold, 25 | RefinementName.Symmetrize, 26 | RefinementName.Diffuse, 27 | RefinementName.RowWiseNormalize, 28 | ] 29 | 30 | icassp2018_refinement_options = RefinementOptions( 31 | gaussian_blur_sigma=1, 32 | p_percentile=0.95, 33 | thresholding_soft_multiplier=0.01, 34 | thresholding_type=ThresholdType.RowMax, 35 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 36 | 37 | icassp2018_clusterer = SpectralClusterer( 38 | min_clusters=2, 39 | max_clusters=7, 40 | autotune=None, 41 | laplacian_type=None, 42 | refinement_options=icassp2018_refinement_options, 43 | custom_dist="cosine") 44 | 45 | # Configurations of Turn-to-Diarize system using the 46 | # Turn + Constraint Propagation + AutoTune method described in the paper 47 | # "Turn-to-Diarize: Online Speaker Diarization Constrained by 48 | # Transformer Transducer Speaker Turn Detection". 49 | TURNTODIARIZE_REFINEMENT_SEQUENCE = [ 50 | RefinementName.RowWiseThreshold, RefinementName.Symmetrize 51 | ] 52 | 53 | turntodiarize_refinement_options = RefinementOptions( 54 | thresholding_soft_multiplier=0.01, 55 | thresholding_type=ThresholdType.Percentile, 56 | thresholding_with_binarization=True, 57 | thresholding_preserve_diagonal=True, 58 | symmetrize_type=SymmetrizeType.Average, 59 | refinement_sequence=TURNTODIARIZE_REFINEMENT_SEQUENCE) 60 | 61 | turntodiarize_constraint_options = ConstraintOptions( 62 | constraint_name=ConstraintName.ConstraintPropagation, 63 | apply_before_refinement=True, 64 | constraint_propagation_alpha=0.4) 65 | 66 | turntodiarize_auto_tune = AutoTune( 67 | p_percentile_min=0.40, 68 | p_percentile_max=0.95, 69 | init_search_step=0.05, 70 | search_level=1) 71 | 72 | turntodiarize_clusterer = SpectralClusterer( 73 | min_clusters=2, 74 | max_clusters=7, 75 | refinement_options=turntodiarize_refinement_options, 76 | constraint_options=turntodiarize_constraint_options, 77 | autotune=turntodiarize_auto_tune, 78 | laplacian_type=LaplacianType.GraphCut, 79 | row_wise_renorm=True, 80 | custom_dist="cosine") 81 | -------------------------------------------------------------------------------- /spectralcluster/constraint.py: -------------------------------------------------------------------------------- 1 | """Constraint information.""" 2 | import abc 3 | from dataclasses import dataclass 4 | import enum 5 | import numpy as np 6 | import typing 7 | 8 | EPS = 1e-10 9 | 10 | 11 | class ConstraintName(enum.Enum): 12 | """The names of constrained operations.""" 13 | # The Affinity Integration method 14 | AffinityIntegration = enum.auto() 15 | 16 | # The Constraint Propagation method 17 | ConstraintPropagation = enum.auto() 18 | 19 | 20 | class IntegrationType(enum.Enum): 21 | """The integration types for the Affinity Integration method.""" 22 | Max = enum.auto() 23 | Average = enum.auto() 24 | 25 | 26 | @dataclass 27 | class ConstraintOptions: 28 | """Constraint options for constrained clustering methods.""" 29 | 30 | # Name of the constrained clustering method. 31 | constraint_name: ConstraintName 32 | 33 | # If True, this operation is applied before the affinity refinement. 34 | # It is suggested to set as True for the ConstraintPropagation method 35 | # and False for the AffinityIntegration method. 36 | apply_before_refinement: bool 37 | 38 | # Integration type for the Affinity Integration method. 39 | integration_type: typing.Optional[IntegrationType] = None 40 | 41 | # alpha value of the constraint propagation method. 42 | constraint_propagation_alpha: float = 0.6 43 | 44 | def __post_init__(self): 45 | if self.constraint_name == ConstraintName.AffinityIntegration: 46 | self.constraint_operator = AffinityIntegration(self.integration_type) 47 | elif self.constraint_name == ConstraintName.ConstraintPropagation: 48 | self.constraint_operator = ConstraintPropagation( 49 | self.constraint_propagation_alpha) 50 | 51 | 52 | class ConstraintOperation(metaclass=abc.ABCMeta): 53 | """Constraint operation class.""" 54 | 55 | def check_input(self, affinity: np.ndarray, constraint_matrix: np.ndarray): 56 | """Check the input to the adjust_affinity method. 57 | 58 | Args: 59 | affinity: the input affinity matrix. 60 | constraint_matrix: numpy array of shape (n_samples, n_samples). The 61 | constraint matrix with prior information 62 | 63 | Raises: 64 | ValueError: if affinity or constraint matrix has wrong shape, etc. 65 | """ 66 | if len(affinity.shape) != 2: 67 | raise ValueError("affinity must be 2-dimensional") 68 | if affinity.shape[0] != affinity.shape[1]: 69 | raise ValueError("affinity must be a square matrix") 70 | if len(constraint_matrix.shape) != 2: 71 | raise ValueError("constraint matrix must be 2-dimensional") 72 | if constraint_matrix.shape[0] != constraint_matrix.shape[1]: 73 | raise ValueError("constraint matrix must be a square matrix") 74 | if affinity.shape != constraint_matrix.shape: 75 | raise ValueError( 76 | "affinity and constraint matrix must have the same shape") 77 | 78 | @abc.abstractmethod 79 | def adjust_affinity(self, 80 | affinity: np.ndarray, 81 | constraint_matrix: np.ndarray): 82 | """An abstract method to perform the constraint operation. 83 | 84 | Args: 85 | affinity: the affinity matrix, of size (n_samples, n_samples) 86 | constraint_matrix: numpy array of shape (n_samples, n_samples). The 87 | constraint matrix with prior information 88 | 89 | Returns: 90 | a matrix of the same size as affinity 91 | """ 92 | pass 93 | 94 | 95 | class AffinityIntegration(ConstraintOperation): 96 | """The Affinity Integration method. 97 | 98 | Basic operations to integrate the affinity matrix with given pairwise 99 | constraints in the constraint matrix. Current integration types include `Max` 100 | and `Average`. 101 | """ 102 | 103 | def __init__(self, integration_type: IntegrationType = IntegrationType.Max): 104 | self.integration_type = integration_type 105 | 106 | def adjust_affinity(self, 107 | affinity: np.ndarray, 108 | constraint_matrix: np.ndarray) -> np.ndarray: 109 | """Adjust the affinity matrix with constraints.""" 110 | self.check_input(affinity, constraint_matrix) 111 | if self.integration_type == IntegrationType.Max: 112 | return np.maximum(affinity, constraint_matrix) 113 | elif self.integration_type == IntegrationType.Average: 114 | return 0.5 * (affinity + constraint_matrix) 115 | else: 116 | raise ValueError("Unsupported integration type: {}".format( 117 | self.integration_type)) 118 | 119 | 120 | class ConstraintPropagation(ConstraintOperation): 121 | """The Constraint Propagation method. 122 | 123 | The pairwise constraints are firstly propagated throughout the whole graph by 124 | two independent horizontal and vertical propagations. The final propagated 125 | constraint matrix is applied to adjust the affinity matrix. 126 | 127 | Reference: 128 | [1] Lu, Zhiwu, and IP, Horace HS. "Constrained spectral clustering via 129 | exhaustive and efficient constraint propagation." ECCV 2010 130 | [2] Lu, Zhiwu, and Peng, Yuxin. "Exhaustive and efficient constraint 131 | propagation: A graph-based learning approach and its applications." IJCV 2013 132 | """ 133 | 134 | def __init__(self, alpha: float = 0.6): 135 | self.alpha = alpha 136 | 137 | def adjust_affinity(self, 138 | affinity: np.ndarray, 139 | constraint_matrix: np.ndarray) -> np.ndarray: 140 | """Adjust the affinity matrix with constraints.""" 141 | self.check_input(affinity, constraint_matrix) 142 | adjusted_affinity = np.copy(affinity) 143 | degree = np.diag(np.sum(affinity, axis=1)) 144 | degree_norm = np.diag(1 / (np.sqrt(np.diag(degree)) + EPS)) 145 | # Compute affinity_norm as D^(-1/2)AD^(-1/2) 146 | affinity_norm = degree_norm.dot(affinity).dot(degree_norm) 147 | # The closed form of the final converged constraint matrix is: 148 | # (1-alpha)^2 * (I-alpha*affinity_norm)^(-1) * constraint_matrix * 149 | # (I-alpha*affinity_norm)^(-1). We save (I-alpha*affinity_norm)^(-1) as a 150 | # `temp_value` for readibility 151 | temp_value = np.linalg.inv( 152 | np.eye(affinity.shape[0]) - self.alpha * affinity_norm) 153 | final_constraint_matrix = ( 154 | 1 - self.alpha)**2 * temp_value.dot(constraint_matrix).dot(temp_value) 155 | # `is_positive` is a mask matrix where values of the final_constraint_matrix 156 | # are positive. The affinity matrix is adjusted by the final constraint 157 | # matrix using equation (4) in refernce paper [1] 158 | is_positive = final_constraint_matrix > 0 159 | affinity1 = 1 - (1 - final_constraint_matrix * is_positive) * ( 160 | 1 - affinity * is_positive) 161 | affinity2 = (1 + final_constraint_matrix * np.invert(is_positive)) * ( 162 | affinity * np.invert(is_positive)) 163 | adjusted_affinity = affinity1 + affinity2 164 | return adjusted_affinity 165 | 166 | 167 | class ConstraintMatrix: 168 | """Constraint Matrix class.""" 169 | 170 | def __init__(self, 171 | speaker_turn_scores: typing.Sequence[float], 172 | threshold: float = 1): 173 | """Initialization of the constraint matrix arguments. 174 | 175 | Args: 176 | speaker_turn_scores: A list of speaker turn confidence scores. All score 177 | values are larger or equal to 0. If score is 0, there is no speaker 178 | turn. speaker_turn_scores[i+1] means the speaker turn confidence score 179 | between turn i+1 and turn i. The first score speaker_turn_scores[0] is 180 | not used. 181 | threshold: A threshold value for the speaker turn confidence score. 182 | """ 183 | if any(score < 0 for score in speaker_turn_scores): 184 | raise ValueError("Speaker turn score must be larger or equal to 0.") 185 | self.speaker_turn_scores = speaker_turn_scores 186 | self.threshold = threshold 187 | 188 | def compute_diagonals(self) -> np.ndarray: 189 | """Compute diagonal constraint matrix.""" 190 | num_turns = len(self.speaker_turn_scores) 191 | constraint_matrix = np.zeros((num_turns, num_turns)) 192 | for i in range(num_turns - 1): 193 | speaker_turn_score = self.speaker_turn_scores[i + 1] 194 | if speaker_turn_score != 0: 195 | if speaker_turn_score > self.threshold: 196 | constraint_matrix[i, i + 1] = -1 197 | constraint_matrix[i + 1, i] = -1 198 | else: 199 | constraint_matrix[i, i + 1] = 1 200 | constraint_matrix[i + 1, i] = 1 201 | return constraint_matrix 202 | -------------------------------------------------------------------------------- /spectralcluster/custom_distance_kmeans.py: -------------------------------------------------------------------------------- 1 | """Implement custom kmeans. 2 | 3 | It supports any distance measure defined in scipy.spatial.distance. 4 | """ 5 | 6 | from dataclasses import dataclass 7 | import numpy as np 8 | from scipy.spatial import distance 9 | from sklearn.cluster import KMeans 10 | import typing 11 | 12 | 13 | def run_kmeans(spectral_embeddings: np.ndarray, 14 | n_clusters: int, 15 | custom_dist: typing.Union[str, typing.Callable], 16 | max_iter: int) -> np.ndarray: 17 | """Run CustomKMeans with a custom distance measure support. 18 | 19 | Perform a custom kmeans clustering with any distance measure defined 20 | in scipy.spatial.distance. 21 | 22 | Args: 23 | spectral_embeddings: input spectracl embedding observations 24 | n_clusters: the number of clusters to form 25 | custom_dist: str or callable. custom distance measure for k-means. if a 26 | string, "cosine", "euclidean", "mahalanobis", or any other distance 27 | functions defined in scipy.spatial.distance can be used 28 | max_iter: the maximum number of iterations for the custom k-means 29 | 30 | Returns: 31 | labels: predicted clustering labels of all samples 32 | """ 33 | if not custom_dist: # Scikit-learn KMeans 34 | kmeans_clusterer = KMeans( 35 | n_clusters=n_clusters, init="k-means++", max_iter=300, random_state=0, 36 | n_init="auto") 37 | else: 38 | # Initialization using the k-means++ method in Scikit-learn 39 | kmeans_clusterer = KMeans( 40 | n_clusters=n_clusters, init="k-means++", max_iter=1, random_state=0, 41 | n_init="auto") 42 | kmeans_clusterer.fit(spectral_embeddings) 43 | centroids = kmeans_clusterer.cluster_centers_ 44 | # Run the cusotom K-means 45 | kmeans_clusterer = CustomKMeans( 46 | n_clusters=n_clusters, 47 | centroids=centroids, 48 | max_iter=max_iter, 49 | custom_dist=custom_dist) 50 | 51 | labels = kmeans_clusterer.predict(spectral_embeddings) 52 | return labels 53 | 54 | 55 | @dataclass 56 | class CustomKMeans: 57 | """Class CustomKMeans performs KMeans clustering.""" 58 | 59 | # The number of clusters to form. 60 | n_clusters: typing.Optional[int] = None 61 | 62 | # The cluster centroids. If given, initial centroids are set as 63 | # the input samples. If not, centroids are randomly initialized. 64 | centroids: typing.Optional[np.ndarray] = None 65 | 66 | # Maximum number of iterations of the k-means algorithm to run. 67 | max_iter: int = 10 68 | 69 | # The relative increment in the results before declaring convergence. 70 | tol: float = 0.001 71 | 72 | # Custom distance measure to use. If a string, "cosine", "euclidean", 73 | # "mahalanobis", or any other distance functions 74 | # defined in scipy.spatial.distance can be used. 75 | custom_dist: typing.Union[str, typing.Callable] = "cosine" 76 | 77 | def _init_centroids(self, embeddings: np.ndarray): 78 | """Compute the initial centroids.""" 79 | 80 | n_samples = embeddings.shape[0] 81 | idx = np.random.choice( 82 | np.arange(n_samples), size=self.n_clusters, replace=False) 83 | self.centroids = embeddings[idx, :] 84 | 85 | def predict(self, embeddings: np.ndarray) -> np.ndarray: 86 | """Performs the clustering. 87 | 88 | Args: 89 | embeddings: the input observations to cluster 90 | 91 | Returns: 92 | labels: predicted clustering labels of all samples 93 | 94 | Raises: 95 | ValueError: if input observations have wrong shape 96 | """ 97 | n_samples, n_features = embeddings.shape 98 | if self.max_iter <= 0: 99 | raise ValueError("Number of iterations should be a positive number," 100 | " got %d instead" % self.max_iter) 101 | if n_samples < self.n_clusters: 102 | raise ValueError("n_samples=%d should be >= n_clusters=%d" % 103 | (n_samples, self.n_clusters)) 104 | if self.centroids is None: 105 | self._init_centroids(embeddings) 106 | else: 107 | n_centroids, c_n_features = self.centroids.shape 108 | if n_centroids != self.n_clusters: 109 | raise ValueError("The shape of the initial centroids (%s)" 110 | "does not match the number of clusters %d" % 111 | (str(self.centroids.shape), self.n_clusters)) 112 | if n_features != c_n_features: 113 | raise ValueError( 114 | "The number of features of the initial centroids %d" 115 | "does not match the number of features of the data %d." % 116 | (c_n_features, n_features)) 117 | 118 | sample_ids = np.arange(n_samples) 119 | prev_mean_dist = 0 120 | for iter_idx in range(self.max_iter + 1): 121 | # Compute distances to all centroids and assign each sample a label 122 | # corresponding to the nearest centroid 123 | dist_to_all_centroids = distance.cdist( 124 | embeddings, self.centroids, metric=self.custom_dist) 125 | labels = dist_to_all_centroids.argmin(axis=1) 126 | distances = dist_to_all_centroids[sample_ids, labels] 127 | mean_distance = np.mean(distances) 128 | # If the difference between current mean_distance and previous 129 | # mean_distance is very small or the max iteration number is reached, 130 | # the clustering stops 131 | if mean_distance <= prev_mean_dist and mean_distance >= ( 132 | 1 - self.tol) * prev_mean_dist or iter_idx == self.max_iter: 133 | break 134 | prev_mean_dist = mean_distance 135 | # Update centroids 136 | for each_centroid_idx in range(n_centroids): 137 | each_centroid_samples = np.where(labels == each_centroid_idx)[0] 138 | if each_centroid_samples.any(): 139 | self.centroids[each_centroid_idx] = np.mean( 140 | embeddings[each_centroid_samples], axis=0) 141 | return labels 142 | -------------------------------------------------------------------------------- /spectralcluster/fallback_clusterer.py: -------------------------------------------------------------------------------- 1 | """In some cases, we use a fallback clusterer instead of spectral. 2 | 3 | Spectral clustering exploits the global structure of the data. But there are 4 | cases where spectral clustering does not work as well as some other simpler 5 | clustering methods, such as when the number of embeddings is too small. 6 | 7 | See this paper for more details: 8 | Quan Wang, Yiling Huang, Han Lu, Guanlong Zhao, Ignacio Lopez Moreno, 9 | "Highly Efficient Real-Time Streaming and Fully On-Device Speaker Diarization 10 | with Multi-Stage Clustering", arXiv:2210.13690. 11 | https://arxiv.org/abs/2210.13690 12 | """ 13 | 14 | from dataclasses import dataclass 15 | import enum 16 | import numpy as np 17 | from sklearn.cluster import AgglomerativeClustering 18 | from sklearn.mixture import GaussianMixture 19 | from spectralcluster import naive_clusterer 20 | import typing 21 | 22 | 23 | class SingleClusterCondition(enum.Enum): 24 | """Which condition do we use for deciding single-vs-multi cluster(s).""" 25 | 26 | # Fit affinity values with GMM with 1-vs-2 component(s), and use 27 | # Bayesian Information Criterion (BIC) to decide whether there are 28 | # at least two clusters. 29 | # Note that this approach does not require additional parameters. 30 | AffinityGmmBic = enum.auto() 31 | 32 | # If all affinities are larger than threshold, there is only a single cluster. 33 | AllAffinity = enum.auto() 34 | 35 | # If all neighboring affinities are larger than threshold, there is only 36 | # a single cluster. 37 | NeighborAffinity = enum.auto() 38 | 39 | # If the standard deviation of all affinities is smaller than threshold, 40 | # there is only a single cluster. 41 | AffinityStd = enum.auto() 42 | 43 | # Use fallback clusterer to make the decision. If fallback clusterer 44 | # finds multiple clusters, continue with spectral clusterer. 45 | FallbackClusterer = enum.auto() 46 | 47 | 48 | class FallbackClustererType(enum.Enum): 49 | """Which fallback clusterer to use.""" 50 | 51 | # AgglomerativeClustering from scikit-learn. 52 | Agglomerative = enum.auto() 53 | 54 | # Naive clustering, as described in the paper "Speaker diarization with LSTM". 55 | Naive = enum.auto() 56 | 57 | 58 | @dataclass 59 | class FallbackOptions: 60 | """Options for fallback options.""" 61 | 62 | # We only run spectral clusterer if we have at least these many embeddings; 63 | # otherwise we run fallback clusterer. 64 | spectral_min_embeddings: int = 1 65 | 66 | # How do we decide single-vs-multi cluster(s). 67 | single_cluster_condition: SingleClusterCondition = ( 68 | SingleClusterCondition.AffinityGmmBic) 69 | 70 | # Affinity threshold to decide whether there is only a single cluster. 71 | single_cluster_affinity_threshold: float = 0.75 72 | 73 | # When using AffinityGmmBic to make single-vs-multi cluster(s) decisions, 74 | # we only fit the GMM to the upper triangular matrix because the diagonal 75 | # and near-diagonal values might be very big. By default, we use a 76 | # value of 1 to only exclude diagonal values. But if embeddings are 77 | # extracted from overlapping sliding windows, this value could be larger 78 | # than 1. 79 | single_cluster_affinity_diagonal_offset: int = 1 80 | 81 | # Which fallback clusterer to use. 82 | fallback_clusterer_type: FallbackClustererType = ( 83 | FallbackClustererType.Naive) 84 | 85 | # Threshold of agglomerative clustering. 86 | agglomerative_threshold: float = 0.5 87 | 88 | # Threshold for naive clusterer. 89 | naive_threshold: float = 0.5 90 | 91 | # Adaptation_threshold for naive clusterer. 92 | naive_adaptation_threshold: typing.Optional[float] = None 93 | 94 | 95 | class FallbackClusterer: 96 | """Fallback clusterer. 97 | 98 | The fallback clusterer is introduced in the multi-stage clustering paper 99 | (https://arxiv.org/abs/2210.13690). So far we simply use 100 | AgglomerativeClustering. 101 | """ 102 | 103 | def __init__(self, options: FallbackOptions): 104 | """Initilization of the fallback clusterer. 105 | 106 | Args: 107 | options: an object of FallbackOptions 108 | """ 109 | self.options = options 110 | if options.fallback_clusterer_type == FallbackClustererType.Agglomerative: 111 | self.clusterer = AgglomerativeClustering( 112 | n_clusters=None, 113 | metric="cosine", 114 | linkage="average", 115 | distance_threshold=options.agglomerative_threshold) 116 | elif options.fallback_clusterer_type == FallbackClustererType.Naive: 117 | self.clusterer = naive_clusterer.NaiveClusterer( 118 | threshold=options.naive_threshold, 119 | adaptation_threshold=options.naive_adaptation_threshold) 120 | else: 121 | ValueError("Unsupported fallback_clusterer_type") 122 | 123 | def predict(self, embeddings: np.ndarray) -> np.ndarray: 124 | return self.clusterer.fit_predict(embeddings) 125 | 126 | 127 | def check_single_cluster(fallback_options: FallbackOptions, 128 | embeddings: typing.Optional[np.ndarray], 129 | affinity: np.ndarray) -> bool: 130 | """Check whether this is only a single cluster. 131 | 132 | This function is only called when min_clusters==1. 133 | 134 | Args: 135 | fallback_options: an object of FallbackOptions 136 | embeddings: numpy array of shape (n_samples, n_features) 137 | affinity: the affinity matrix of shape (n_samples, (n_samples) 138 | 139 | Returns: 140 | a boolean, where True means there is only a single cluster 141 | """ 142 | if (fallback_options.single_cluster_condition == 143 | SingleClusterCondition.AllAffinity): 144 | if (affinity.min() > 145 | fallback_options.single_cluster_affinity_threshold): 146 | return True 147 | elif (fallback_options.single_cluster_condition == 148 | SingleClusterCondition.NeighborAffinity): 149 | neighbor_affinity = np.diag(affinity, k=1) 150 | if (neighbor_affinity.min() > 151 | fallback_options.single_cluster_affinity_threshold): 152 | return True 153 | elif (fallback_options.single_cluster_condition == 154 | SingleClusterCondition.AffinityStd): 155 | if (np.std(affinity) < 156 | fallback_options.single_cluster_affinity_threshold): 157 | return True 158 | elif (fallback_options.single_cluster_condition == 159 | SingleClusterCondition.AffinityGmmBic): 160 | # Compute upper triangular matrix values to exclude diagonal values. 161 | if (fallback_options.single_cluster_affinity_diagonal_offset >= 162 | affinity.shape[0] - 1): 163 | raise ValueError( 164 | "single_cluster_affinity_diagonal_offset must be significantly " 165 | "smaller than affinity matrix dimension") 166 | upper_indices = np.triu_indices( 167 | affinity.shape[0], 168 | fallback_options.single_cluster_affinity_diagonal_offset) 169 | affinity_values = np.expand_dims(affinity[upper_indices], 1) 170 | 171 | # Fit GMM and compare BIC. 172 | gmm1 = GaussianMixture(n_components=1) 173 | gmm2 = GaussianMixture(n_components=2) 174 | gmm1.fit(affinity_values) 175 | gmm2.fit(affinity_values) 176 | bic1 = gmm1.bic(affinity_values) 177 | bic2 = gmm2.bic(affinity_values) 178 | return bic1 < bic2 179 | elif (fallback_options.single_cluster_condition == 180 | SingleClusterCondition.FallbackClusterer): 181 | temp_clusterer = FallbackClusterer(fallback_options) 182 | temp_labels = temp_clusterer.predict(embeddings) 183 | if np.unique(temp_labels).size == 1: 184 | return True 185 | else: 186 | raise TypeError("Unsupported single_cluster_condition") 187 | return False 188 | -------------------------------------------------------------------------------- /spectralcluster/laplacian.py: -------------------------------------------------------------------------------- 1 | """Laplacian matrix.""" 2 | 3 | import enum 4 | import numpy as np 5 | 6 | EPS = 1e-10 7 | 8 | 9 | class LaplacianType(enum.Enum): 10 | """Different types of Laplacian matrix.""" 11 | # The affinity matrix, not a Laplacian: W 12 | Affinity = enum.auto() 13 | 14 | # The unnormalied Laplacian: L = D - W 15 | Unnormalized = enum.auto() 16 | 17 | # The random walk view normalized Laplacian: D^{-1} * L 18 | RandomWalk = enum.auto() 19 | 20 | # The graph cut view normalized Laplacian: D^{-1/2} * L * D^{-1/2} 21 | GraphCut = enum.auto() 22 | 23 | 24 | def compute_laplacian(affinity: np.ndarray, 25 | laplacian_type: LaplacianType = LaplacianType.GraphCut, 26 | eps: float = EPS) -> np.ndarray: 27 | """Compute the Laplacian matrix. 28 | 29 | Args: 30 | affinity: the affinity matrix of input data 31 | laplacian_type: a LaplacianType 32 | eps: a small value for numerial stability 33 | 34 | Returns: 35 | the Laplacian matrix 36 | 37 | Raises: 38 | TypeError: if laplacian_type is not a LaplacianType 39 | ValueError: if laplacian_type is not supported 40 | """ 41 | degree = np.diag(np.sum(affinity, axis=1)) 42 | laplacian = degree - affinity 43 | if not isinstance(laplacian_type, LaplacianType): 44 | raise TypeError("laplacian_type must be a LaplacianType") 45 | elif laplacian_type == LaplacianType.Affinity: 46 | return affinity 47 | elif laplacian_type == LaplacianType.Unnormalized: 48 | return laplacian 49 | elif laplacian_type == LaplacianType.RandomWalk: 50 | # Random walk normalized version 51 | degree_norm = np.diag(1 / (np.diag(degree) + eps)) 52 | laplacian_norm = degree_norm.dot(laplacian) 53 | return laplacian_norm 54 | elif laplacian_type == LaplacianType.GraphCut: 55 | # Graph cut normalized version 56 | degree_norm = np.diag(1 / (np.sqrt(np.diag(degree)) + eps)) 57 | laplacian_norm = degree_norm.dot(laplacian).dot(degree_norm) 58 | return laplacian_norm 59 | else: 60 | raise ValueError("Unsupported laplacian_type.") 61 | -------------------------------------------------------------------------------- /spectralcluster/multi_stage_clusterer.py: -------------------------------------------------------------------------------- 1 | """Implementation of multi-stage clustering. 2 | 3 | Multi-stage clustering class is introduced in this paper: 4 | 5 | * Quan Wang, Yiling Huang, Han Lu, Guanlong Zhao, Ignacio Lopez Moreno, 6 | "Highly efficient real-time streaming and fully on-device speaker 7 | diarization with multi-stage clustering." arXiv preprint 8 | arXiv:2210.13690 (2022). 9 | """ 10 | 11 | import enum 12 | import numpy as np 13 | from scipy import optimize 14 | from sklearn.cluster import AgglomerativeClustering 15 | from spectralcluster import fallback_clusterer 16 | from spectralcluster import spectral_clusterer 17 | from spectralcluster import utils 18 | 19 | 20 | class Deflicker(enum.Enum): 21 | """Method to deflicker the streaming output labels.""" 22 | # No deflicker. 23 | NoDeflicker = enum.auto() 24 | 25 | # Deflicker by enforcing order-based outputs. 26 | OrderBased = enum.auto() 27 | 28 | # Deflicker by matching previous output using Hungarian algorithm. 29 | Hungarian = enum.auto() 30 | 31 | 32 | def match_labels( 33 | current: np.ndarray, 34 | previous: np.ndarray) -> np.ndarray: 35 | """Match current labels with previous labels using Hungarian algorithm.""" 36 | # We can assign each label in current to one or many label(s) in previous. 37 | current = utils.enforce_ordered_labels(current).astype(np.int32) 38 | previous = previous.astype(np.int32) 39 | current_crop = current[:-1] 40 | if current_crop.shape != previous.shape: 41 | raise ValueError("current must have one more element than previous .") 42 | num_current = max(current_crop) + 1 43 | num_previous = max(max(previous) + 1, num_current) 44 | 45 | # Compute cost matrix. 46 | cost = np.zeros((num_current, num_previous), dtype=np.int32) 47 | for i in range(num_current): 48 | for j in range(num_previous): 49 | cost[i, j] = np.sum(np.logical_and(current_crop == i, previous == j)) 50 | 51 | # Solve assignment problem. 52 | row_ind, col_ind = optimize.linear_sum_assignment(cost, maximize=True) 53 | 54 | # Map labels. 55 | label_map = {} 56 | for i, j in zip(row_ind, col_ind): 57 | label_map[i] = j 58 | 59 | new_labels = current.copy() 60 | for i in range(max(current) + 1): 61 | if i in label_map: 62 | new_labels[current == i] = label_map[i] 63 | 64 | return new_labels 65 | 66 | 67 | class MultiStageClusterer: 68 | """Multi-stage clustering class.""" 69 | 70 | def __init__( 71 | self, 72 | main_clusterer: spectral_clusterer.SpectralClusterer, 73 | fallback_threshold: float = 0.5, 74 | L: int = 50, 75 | U1: int = 100, 76 | U2: int = 600, 77 | deflicker: Deflicker = Deflicker.NoDeflicker 78 | ): 79 | self.deflicker = deflicker 80 | 81 | # Main clusterer. 82 | self.main = main_clusterer 83 | 84 | if self.main.max_spectral_size: 85 | raise ValueError( 86 | "Do not set max_spectral_size for SpectralClusterer when" 87 | "using MultiStageClusterer.") 88 | 89 | # Lower bound of main clusterer. 90 | self.main.fallback_options.spectral_min_embeddings = L 91 | 92 | # Upper bound of main clusterer. 93 | self.U1 = U1 94 | 95 | # Upper bound of pre-clusterer. 96 | self.U2 = U2 97 | 98 | # Threshold for fallback AHC clusterer. 99 | self.main.fallback_options.agglomerative_threshold = fallback_threshold 100 | 101 | # Other configs for fallback. 102 | self.main.fallback_options.single_cluster_condition = ( 103 | fallback_clusterer.SingleClusterCondition.FallbackClusterer) 104 | self.main.fallback_options.fallback_clusterer_type = ( 105 | fallback_clusterer.FallbackClustererType.Agglomerative) 106 | 107 | # Pre-clusterer. 108 | self.pre = AgglomerativeClustering( 109 | n_clusters=U1, 110 | metric="cosine", 111 | linkage="complete") 112 | 113 | # All cached centroids. 114 | self.cache = None 115 | 116 | # Number of clustered embeddings. 117 | self.num_embeddings = 0 118 | 119 | # Array of shape (n_samples,), mapping from original embedding to compressed 120 | # centroid. 121 | self.compression_labels = None 122 | 123 | self.previous_output = None 124 | 125 | def streaming_predict( 126 | self, 127 | embedding: np.ndarray 128 | ) -> np.ndarray: 129 | """A streaming prediction function. 130 | 131 | Note that this is not a simple online prediction class It not only 132 | predicts the label of the next input, but also makes corrections to 133 | previously predicted labels. 134 | """ 135 | self.num_embeddings += 1 136 | 137 | # First input. 138 | if self.num_embeddings == 1: 139 | self.cache = embedding 140 | final_labels = np.array([0]) 141 | self.previous_output = final_labels 142 | return final_labels 143 | 144 | self.cache = np.vstack([self.cache, embedding]) 145 | 146 | # Using fallback or main clusterer only. 147 | if self.num_embeddings <= self.U1: 148 | final_labels = self.main.predict(self.cache) 149 | self.previous_output = final_labels 150 | return final_labels 151 | 152 | # Run pre-clusterer. 153 | if self.compression_labels is not None: 154 | self.compression_labels = np.append( 155 | self.compression_labels, max(self.compression_labels) + 1) 156 | pre_labels = self.pre.fit_predict(self.cache) 157 | pre_centroids = utils.get_cluster_centroids(self.cache, pre_labels) 158 | main_labels = self.main.predict(pre_centroids) 159 | 160 | final_labels = utils.chain_labels( 161 | self.compression_labels, 162 | utils.chain_labels(pre_labels, main_labels)) 163 | 164 | # Dynamic compression. 165 | if self.cache.shape[0] == self.U2: 166 | self.cache = pre_centroids 167 | self.compression_labels = utils.chain_labels( 168 | self.compression_labels, pre_labels) 169 | 170 | # Deflicker. 171 | if self.num_embeddings > 1: 172 | if self.deflicker == Deflicker.OrderBased: 173 | final_labels = utils.enforce_ordered_labels( 174 | final_labels) 175 | elif self.deflicker == Deflicker.Hungarian: 176 | final_labels = match_labels( 177 | final_labels, self.previous_output) 178 | 179 | self.previous_output = final_labels 180 | return final_labels 181 | -------------------------------------------------------------------------------- /spectralcluster/naive_clusterer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import typing 3 | 4 | 5 | class NaiveCentroid: 6 | """A cluster centroid of the Naive clustering algorithm.""" 7 | 8 | def __init__(self, embedding: np.ndarray): 9 | """Create a new centroid.""" 10 | self.embedding = embedding 11 | self.count = 1 12 | 13 | def merge(self, embedding: np.ndarray): 14 | """Merge a new embedding into the centroid.""" 15 | self.embedding = (self.embedding * self.count + embedding) / ( 16 | self.count + 1) 17 | self.count += 1 18 | 19 | def cosine(self, embedding: np.ndarray) -> float: 20 | """Compute cosine similarity to a new embedding.""" 21 | return np.dot(self.embedding, embedding) / ( 22 | np.linalg.norm(self.embedding) * np.linalg.norm(embedding)) 23 | 24 | 25 | class NaiveClusterer: 26 | """Naive clustering class.""" 27 | 28 | def __init__(self, 29 | threshold: float, 30 | adaptation_threshold: typing.Optional[float] = None): 31 | """Initialized the clusterer. 32 | 33 | Note that since this is online clustering, fit_predict and predict 34 | are the same. 35 | 36 | Args: 37 | threshold: if cosine similarity is larger than this threshold, the 38 | embedding will be considered to belong to the cluster 39 | adaptation_threshold: if cosine similarity is larger than 40 | adaptation_threshold, the embedding will be merged to the cluster. 41 | If None, we use threshold as adaptation_threshold 42 | 43 | Raises: 44 | ValueError: if adaptation_threshold is smaller than threshold 45 | """ 46 | self.threshold = threshold 47 | if adaptation_threshold is None: 48 | self.adaptation_threshold = threshold 49 | elif adaptation_threshold < threshold: 50 | raise ValueError("adaptation_threshold cannot be smaller than threshold") 51 | else: 52 | self.adaptation_threshold = adaptation_threshold 53 | self.centroids = [] 54 | 55 | def reset(self): 56 | """Reset the clusterer.""" 57 | self.centroids = [] 58 | 59 | def predict_next(self, embedding: np.ndarray) -> int: 60 | """Given a new embedding, output its label. 61 | 62 | This is used for online clustering. 63 | 64 | Args: 65 | embedding: numpy array of shape (n_features,) 66 | 67 | Returns: 68 | label: an integer cluster label 69 | """ 70 | # Handle first embedding case. 71 | if len(self.centroids) == 0: 72 | self.centroids.append(NaiveCentroid(embedding)) 73 | return 0 74 | 75 | # Compute all similarities. 76 | similarities = np.array( 77 | [centroid.cosine(embedding) for centroid in self.centroids]) 78 | 79 | # New cluster. 80 | if similarities.max() < self.threshold: 81 | self.centroids.append(NaiveCentroid(embedding)) 82 | return len(self.centroids) - 1 83 | 84 | # Existing cluster. 85 | label = similarities.argmax() 86 | if similarities[label] > self.adaptation_threshold: 87 | self.centroids[label].merge(embedding) 88 | return label 89 | 90 | def predict(self, embeddings: np.ndarray) -> np.ndarray: 91 | """Given many embeddings, return all cluster labels. 92 | 93 | This is for simulating offline clustering behavior. 94 | 95 | Args: 96 | embeddings: numpy array of shape (n_samples, n_features) 97 | 98 | Returns: 99 | labels: numpy array of shape (n_samples,) 100 | """ 101 | return np.array([self.predict_next(embedding) for embedding in embeddings]) 102 | 103 | def fit_predict(self, embeddings: np.ndarray) -> np.ndarray: 104 | """Same as predict(), since this is an online clusterer.""" 105 | return self.predict(embeddings) 106 | -------------------------------------------------------------------------------- /spectralcluster/refinement.py: -------------------------------------------------------------------------------- 1 | """Affinity matrix refinemnet operations.""" 2 | 3 | import abc 4 | from dataclasses import dataclass 5 | import enum 6 | import numpy as np 7 | from scipy.ndimage import gaussian_filter 8 | import typing 9 | 10 | 11 | class RefinementName(enum.Enum): 12 | """The names of the refinement operations.""" 13 | CropDiagonal = enum.auto() 14 | GaussianBlur = enum.auto() 15 | RowWiseThreshold = enum.auto() 16 | Symmetrize = enum.auto() 17 | Diffuse = enum.auto() 18 | RowWiseNormalize = enum.auto() 19 | 20 | 21 | class ThresholdType(enum.Enum): 22 | """Different types of thresholding.""" 23 | # We clear values that are smaller than row_max*p_percentile 24 | RowMax = enum.auto() 25 | 26 | # We clear (p_percentile*100)% smallest values of the entire row 27 | Percentile = enum.auto() 28 | 29 | 30 | class SymmetrizeType(enum.Enum): 31 | """Different types of symmetrization operation.""" 32 | # We use max(A, A^T) 33 | Max = enum.auto() 34 | 35 | # We use 1/2(A + A^T) 36 | Average = enum.auto() 37 | 38 | 39 | class AffinityRefinementOperation(metaclass=abc.ABCMeta): 40 | """Refinement of the affinity matrix.""" 41 | 42 | def check_input(self, affinity: np.ndarray): 43 | """Check the input to the refine() method. 44 | 45 | Args: 46 | affinity: the input affinity matrix. 47 | 48 | Raises: 49 | TypeError: if affinity has wrong type 50 | ValueError: if affinity has wrong shape, etc. 51 | """ 52 | shape = affinity.shape 53 | if len(shape) != 2: 54 | raise ValueError("affinity must be 2-dimensional") 55 | if shape[0] != shape[1]: 56 | raise ValueError("affinity must be a square matrix") 57 | 58 | @abc.abstractmethod 59 | def refine(self, affinity: np.ndarray) -> np.ndarray: 60 | """An abstract method to perform the refinement operation. 61 | 62 | Args: 63 | affinity: the affinity matrix, of size (n_samples, n_samples) 64 | 65 | Returns: 66 | a matrix of the same size as affinity 67 | """ 68 | pass 69 | 70 | 71 | @dataclass 72 | class RefinementOptions: 73 | """Refinement options for the affinity matrix.""" 74 | 75 | # Sigma value of the Gaussian blur operation. 76 | gaussian_blur_sigma: int = 1 77 | 78 | # The p-percentile for the row wise thresholding. 79 | p_percentile: float = 0.95 80 | 81 | # The multiplier for soft threhsold, if this value is 0, 82 | # then it's a hard thresholding. 83 | thresholding_soft_multiplier: float = 0.01 84 | 85 | # The type of thresholding operation. 86 | thresholding_type: ThresholdType = ThresholdType.RowMax 87 | 88 | # If true, we set values larger than the threshold to 1. 89 | thresholding_with_binarization: bool = False 90 | 91 | # If true, in the row wise thresholding operation, we firstly set diagonals 92 | # of the affinity matrix to 0, and set the diagonals back to 1 in the end. 93 | thresholding_preserve_diagonal: bool = False 94 | 95 | # A SymmetrizeType. 96 | symmetrize_type: SymmetrizeType = SymmetrizeType.Max 97 | 98 | # A list of RefinementName for the sequence of refinement operations to 99 | # apply on the affinity matrix. 100 | refinement_sequence: typing.Optional[typing.Sequence[RefinementName]] = None 101 | 102 | def get_refinement_operator(self, name: RefinementName) -> ( 103 | AffinityRefinementOperation): 104 | """Get the refinement operator for the affinity matrix. 105 | 106 | Args: 107 | name: a RefinementName 108 | 109 | Returns: 110 | object of the operator 111 | 112 | Raises: 113 | TypeError: if name is not a RefinementName 114 | ValueError: if name is an unknown refinement operation 115 | """ 116 | if name == RefinementName.CropDiagonal: 117 | return CropDiagonal() 118 | elif name == RefinementName.GaussianBlur: 119 | return GaussianBlur(self.gaussian_blur_sigma) 120 | elif name == RefinementName.RowWiseThreshold: 121 | return RowWiseThreshold(self.p_percentile, 122 | self.thresholding_soft_multiplier, 123 | self.thresholding_type, 124 | self.thresholding_with_binarization, 125 | self.thresholding_preserve_diagonal) 126 | elif name == RefinementName.Symmetrize: 127 | return Symmetrize(self.symmetrize_type) 128 | elif name == RefinementName.Diffuse: 129 | return Diffuse() 130 | elif name == RefinementName.RowWiseNormalize: 131 | return RowWiseNormalize() 132 | else: 133 | raise ValueError("Unknown refinement operation: {}".format(name)) 134 | 135 | 136 | class CropDiagonal(AffinityRefinementOperation): 137 | """Crop the diagonal. 138 | 139 | Replace diagonal element by the max non-diagonal value of row. 140 | After this operation, the matrix has similar properties to a standard 141 | Laplacian matrix. This also helps to avoid the bias during Gaussian blur and 142 | normalization. 143 | """ 144 | 145 | def refine(self, affinity: np.ndarray) -> np.ndarray: 146 | self.check_input(affinity) 147 | refined_affinity = np.copy(affinity) 148 | np.fill_diagonal(refined_affinity, 0.0) 149 | di = np.diag_indices(refined_affinity.shape[0]) 150 | refined_affinity[di] = refined_affinity.max(axis=1) 151 | return refined_affinity 152 | 153 | 154 | class GaussianBlur(AffinityRefinementOperation): 155 | """Apply Gaussian blur.""" 156 | 157 | def __init__(self, sigma: int = 1): 158 | self.sigma = sigma 159 | 160 | def refine(self, affinity: np.ndarray) -> np.ndarray: 161 | self.check_input(affinity) 162 | return gaussian_filter(affinity, sigma=self.sigma) 163 | 164 | 165 | class RowWiseThreshold(AffinityRefinementOperation): 166 | """Apply row wise thresholding.""" 167 | 168 | def __init__(self, 169 | p_percentile: float = 0.95, 170 | thresholding_soft_multiplier: float = 0.01, 171 | thresholding_type: ThresholdType = ThresholdType.RowMax, 172 | thresholding_with_binarization: bool = False, 173 | thresholding_preserve_diagonal: bool = False): 174 | self.p_percentile = p_percentile 175 | self.multiplier = thresholding_soft_multiplier 176 | if not isinstance(thresholding_type, ThresholdType): 177 | raise TypeError("thresholding_type must be a ThresholdType") 178 | self.thresholding_type = thresholding_type 179 | self.thresholding_with_binarization = thresholding_with_binarization 180 | self.thresholding_preserve_diagonal = thresholding_preserve_diagonal 181 | 182 | def refine(self, affinity: np.ndarray) -> np.ndarray: 183 | self.check_input(affinity) 184 | refined_affinity = np.copy(affinity) 185 | if self.thresholding_preserve_diagonal: 186 | np.fill_diagonal(refined_affinity, 0.0) 187 | if self.thresholding_type == ThresholdType.RowMax: 188 | # Row_max based thresholding 189 | row_max = refined_affinity.max(axis=1) 190 | row_max = np.expand_dims(row_max, axis=1) 191 | is_smaller = refined_affinity < (row_max * self.p_percentile) 192 | elif self.thresholding_type == ThresholdType.Percentile: 193 | # Percentile based thresholding 194 | row_percentile = np.percentile( 195 | refined_affinity, self.p_percentile * 100, axis=1) 196 | row_percentile = np.expand_dims(row_percentile, axis=1) 197 | is_smaller = refined_affinity < row_percentile 198 | else: 199 | raise ValueError("Unsupported thresholding_type") 200 | if self.thresholding_with_binarization: 201 | # For values larger than the threshold, we binarize them to 1 202 | refined_affinity = (np.ones_like( 203 | (refined_affinity)) * np.invert(is_smaller)) + ( 204 | refined_affinity * self.multiplier * is_smaller) 205 | else: 206 | refined_affinity = (refined_affinity * np.invert(is_smaller)) + ( 207 | refined_affinity * self.multiplier * is_smaller) 208 | if self.thresholding_preserve_diagonal: 209 | np.fill_diagonal(refined_affinity, 1.0) 210 | return refined_affinity 211 | 212 | 213 | class Symmetrize(AffinityRefinementOperation): 214 | """The Symmetrization operation.""" 215 | 216 | def __init__(self, symmetrize_type: SymmetrizeType = SymmetrizeType.Max): 217 | self.symmetrize_type = symmetrize_type 218 | 219 | def refine(self, affinity: np.ndarray) -> np.ndarray: 220 | self.check_input(affinity) 221 | if self.symmetrize_type == SymmetrizeType.Max: 222 | return np.maximum(affinity, np.transpose(affinity)) 223 | elif self.symmetrize_type == SymmetrizeType.Average: 224 | return 0.5 * (affinity + np.transpose(affinity)) 225 | else: 226 | raise ValueError("Unsupported symmetrize_type.") 227 | 228 | 229 | class Diffuse(AffinityRefinementOperation): 230 | """The diffusion operation.""" 231 | 232 | def refine(self, affinity: np.ndarray) -> np.ndarray: 233 | self.check_input(affinity) 234 | return np.matmul(affinity, np.transpose(affinity)) 235 | 236 | 237 | class RowWiseNormalize(AffinityRefinementOperation): 238 | """The row wise max normalization operation.""" 239 | 240 | def refine(self, affinity: np.ndarray) -> np.ndarray: 241 | self.check_input(affinity) 242 | refined_affinity = np.copy(affinity) 243 | row_max = refined_affinity.max(axis=1) 244 | refined_affinity /= np.expand_dims(row_max, axis=1) 245 | return refined_affinity 246 | -------------------------------------------------------------------------------- /spectralcluster/spectral_clusterer.py: -------------------------------------------------------------------------------- 1 | """A spectral clusterer class to perform clustering.""" 2 | 3 | import numpy as np 4 | from sklearn.cluster import AgglomerativeClustering 5 | from spectralcluster import autotune 6 | from spectralcluster import constraint 7 | from spectralcluster import custom_distance_kmeans 8 | from spectralcluster import fallback_clusterer 9 | from spectralcluster import laplacian 10 | from spectralcluster import refinement 11 | from spectralcluster import utils 12 | import typing 13 | 14 | 15 | AutoTune = autotune.AutoTune 16 | AutoTuneProxy = autotune.AutoTuneProxy 17 | ConstraintName = constraint.ConstraintName 18 | ConstraintOptions = constraint.ConstraintOptions 19 | FallbackOptions = fallback_clusterer.FallbackOptions 20 | LaplacianType = laplacian.LaplacianType 21 | RefinementName = refinement.RefinementName 22 | RefinementOptions = refinement.RefinementOptions 23 | EigenGapType = utils.EigenGapType 24 | 25 | 26 | class SpectralClusterer: 27 | """Spectral clustering class.""" 28 | 29 | def __init__( 30 | self, 31 | min_clusters: typing.Optional[int] = None, 32 | max_clusters: typing.Optional[int] = None, 33 | refinement_options: typing.Optional[RefinementOptions] = None, 34 | autotune: typing.Optional[AutoTune] = None, 35 | fallback_options: typing.Optional[FallbackOptions] = None, 36 | laplacian_type: typing.Optional[LaplacianType] = None, 37 | stop_eigenvalue: float = 1e-2, 38 | row_wise_renorm: bool = False, 39 | custom_dist: typing.Union[str, typing.Callable] = "cosine", 40 | max_iter: int = 300, 41 | constraint_options: typing.Optional[ConstraintOptions] = None, 42 | eigengap_type: EigenGapType = EigenGapType.Ratio, 43 | max_spectral_size: typing.Optional[int] = None, 44 | affinity_function: typing.Callable = utils.compute_affinity_matrix, 45 | post_eigen_cluster_function: typing.Callable = ( 46 | custom_distance_kmeans.run_kmeans)): 47 | """Constructor of the clusterer. 48 | 49 | Args: 50 | min_clusters: minimal number of clusters allowed (only effective if not 51 | None) 52 | max_clusters: maximal number of clusters allowed (only effective if not 53 | None), can be used together with min_clusters to fix the number of 54 | clusters 55 | refinement_options: a RefinementOptions object that contains refinement 56 | arguments for the affinity matrix. If None, we will not refine 57 | autotune: an AutoTune object to automatically search p_percentile 58 | fallback_options: a FallbackOptions object to indicate when to run 59 | fallback clusterer instead of spectral clusterer 60 | laplacian_type: a LaplacianType. If None, we do not use a laplacian matrix 61 | stop_eigenvalue: when computing the number of clusters using Eigen Gap, we 62 | do not look at eigen values smaller than this value 63 | row_wise_renorm: if True, perform row-wise re-normalization on the 64 | spectral embeddings 65 | custom_dist: str or callable. custom distance measure for k-means. If a 66 | string, "cosine", "euclidean", "mahalanobis", or any other distance 67 | functions defined in scipy.spatial.distance can be used 68 | max_iter: the maximum number of iterations for the custom k-means 69 | constraint_options: a ConstraintOptions object that contains constraint 70 | arguments 71 | eigengap_type: the type of the eigengap computation 72 | max_spectral_size: the maximal size of input to the spectral clustering 73 | algorithm. If this is set, and the actual input size is larger than 74 | this value, then we are going to first use hierarchical clustering 75 | to reduce the input size to this number. This can significantly reduce 76 | the computational cost for steps like Laplacian matrix and eigen 77 | decomposition. However, please note that this may degrade the quality 78 | of the final clustering results. This corresponds to the U1 value in 79 | the multi-stage clustering paper (https://arxiv.org/abs/2210.13690) 80 | affinity_function: a function to compute the affinity matrix from the 81 | embeddings. This defaults to (cos(x,y)+1)/2 82 | post_eigen_cluster_function: a function to cluster the spectral embeddings 83 | after the eigenvalue computations. This function must have the same 84 | signature as custom_distance_kmeans.run_kmeans 85 | """ 86 | self.min_clusters = min_clusters 87 | self.max_clusters = max_clusters 88 | if not refinement_options: 89 | self.refinement_options = refinement.RefinementOptions() 90 | else: 91 | self.refinement_options = refinement_options 92 | self.autotune = autotune 93 | if not fallback_options: 94 | self.fallback_options = fallback_clusterer.FallbackOptions() 95 | else: 96 | self.fallback_options = fallback_options 97 | self.laplacian_type = laplacian_type 98 | self.row_wise_renorm = row_wise_renorm 99 | self.stop_eigenvalue = stop_eigenvalue 100 | self.custom_dist = custom_dist 101 | self.max_iter = max_iter 102 | self.constraint_options = constraint_options 103 | self.eigengap_type = eigengap_type 104 | self.max_spectral_size = max_spectral_size 105 | self.affinity_function = affinity_function 106 | self.post_eigen_cluster_function = post_eigen_cluster_function 107 | 108 | def _compute_eigenvectors_ncluster( 109 | self, 110 | affinity: np.ndarray, 111 | constraint_matrix: typing.Optional[np.ndarray] = None) -> ( 112 | typing.Tuple[np.ndarray, int, float]): 113 | """Perform eigen decomposition and estiamte the number of clusters. 114 | 115 | Perform affinity refinement, eigen decomposition and sort eigenvectors by 116 | the real part of eigenvalues. Estimate the number of clusters using EigenGap 117 | principle. 118 | 119 | Args: 120 | affinity: the affinity matrix of input data 121 | constraint_matrix: numpy array of shape (n_samples, n_samples). The 122 | constraint matrix with prior information 123 | 124 | Returns: 125 | eigenvectors: sorted eigenvectors. numpy array of shape 126 | (n_samples, n_samples) 127 | n_clusters: number of clusters as an integer 128 | max_delta_norm: normalized maximum eigen gap 129 | """ 130 | # Perform refinement operations on the affinity matrix. 131 | if self.refinement_options.refinement_sequence: 132 | for refinement_name in self.refinement_options.refinement_sequence: 133 | refinement_operator = self.refinement_options.get_refinement_operator( 134 | refinement_name) 135 | affinity = refinement_operator.refine(affinity) 136 | 137 | if (self.constraint_options and 138 | not self.constraint_options.apply_before_refinement and 139 | constraint_matrix is not None): 140 | # Perform the constraint operation after refinement 141 | affinity = self.constraint_options.constraint_operator.adjust_affinity( 142 | affinity, constraint_matrix) 143 | 144 | if not self.laplacian_type or self.laplacian_type == LaplacianType.Affinity: 145 | # Perform eigen decomposion. 146 | (eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors(affinity) 147 | # Get number of clusters. 148 | n_clusters, max_delta_norm = utils.compute_number_of_clusters( 149 | eigenvalues, 150 | max_clusters=self.max_clusters, 151 | stop_eigenvalue=self.stop_eigenvalue, 152 | eigengap_type=self.eigengap_type, 153 | descend=True) 154 | else: 155 | # Compute Laplacian matrix 156 | laplacian_norm = laplacian.compute_laplacian( 157 | affinity, laplacian_type=self.laplacian_type) 158 | # Perform eigen decomposion. Eigen values are sorted in an ascending 159 | # order 160 | (eigenvalues, eigenvectors) = utils.compute_sorted_eigenvectors( 161 | laplacian_norm, descend=False) 162 | # Get number of clusters. Eigen values are sorted in an ascending order 163 | n_clusters, max_delta_norm = utils.compute_number_of_clusters( 164 | eigenvalues, 165 | max_clusters=self.max_clusters, 166 | eigengap_type=self.eigengap_type, 167 | descend=False) 168 | return eigenvectors, n_clusters, max_delta_norm 169 | 170 | def _reduce_size_and_predict(self, embeddings: np.ndarray) -> np.ndarray: 171 | """Reduce the input size, then run spectral clustering. 172 | 173 | Args: 174 | embeddings: numpy array of shape (n_samples, n_features) 175 | 176 | Returns: 177 | labels: numpy array of shape (n_samples,) 178 | """ 179 | # Run AHC on the input to reduce the size. 180 | # Note that linkage needs to be "complete", because "average" and "single" 181 | # do not work very well here. 182 | # Alternatively, we can use "euclidean" and "ward", but that requires 183 | # that the inputs are L2 normalized first. 184 | ahc = AgglomerativeClustering( 185 | n_clusters=self.max_spectral_size, 186 | metric="cosine", 187 | linkage="complete") 188 | ahc_labels = ahc.fit_predict(embeddings) 189 | 190 | # Compute the centroids of the AHC clusters. 191 | ahc_centroids = utils.get_cluster_centroids(embeddings, ahc_labels) 192 | 193 | # Run spectral clustering on AHC centroids. 194 | spectral_labels = self.predict(ahc_centroids) 195 | 196 | # Convert spectral labels to final labels. 197 | final_labels = utils.chain_labels(ahc_labels, spectral_labels) 198 | 199 | return final_labels 200 | 201 | def predict( 202 | self, 203 | embeddings: np.ndarray, 204 | constraint_matrix: typing.Optional[np.ndarray] = None) -> np.ndarray: 205 | """Perform spectral clustering on data embeddings. 206 | 207 | The spectral clustering is performed on an affinity matrix. 208 | 209 | Args: 210 | embeddings: numpy array of shape (n_samples, n_features) 211 | constraint_matrix: numpy array of shape (n_samples, n_samples). The 212 | constraint matrix with prior information 213 | 214 | Returns: 215 | labels: numpy array of shape (n_samples,) 216 | 217 | Raises: 218 | TypeError: if embeddings has wrong type 219 | ValueError: if embeddings has wrong shape 220 | RuntimeError: if max_spectral_size is set and constraint_matrix is given 221 | """ 222 | num_embeddings = embeddings.shape[0] 223 | 224 | if not isinstance(embeddings, np.ndarray): 225 | raise TypeError("embeddings must be a numpy array") 226 | if len(embeddings.shape) != 2: 227 | raise ValueError("embeddings must be 2-dimensional") 228 | 229 | # Check whether we need to run fallback clusterer instead. 230 | if (num_embeddings < 231 | self.fallback_options.spectral_min_embeddings): 232 | temp_clusterer = fallback_clusterer.FallbackClusterer( 233 | self.fallback_options) 234 | return temp_clusterer.predict(embeddings) 235 | 236 | # Check whether the input size is too big for running spectral clustering. 237 | if (self.max_spectral_size is not None 238 | and num_embeddings > self.max_spectral_size): 239 | if constraint_matrix is not None: 240 | raise RuntimeError( 241 | "Cannot handle constraint_matrix when max_spectral_size is set") 242 | if (self.max_spectral_size < 2 or 243 | (self.max_clusters and self.max_spectral_size <= self.max_clusters) or 244 | (self.min_clusters and self.max_spectral_size <= self.min_clusters)): 245 | raise ValueError( 246 | "max_spectral_size should be a relatively big number") 247 | return self._reduce_size_and_predict(embeddings) 248 | 249 | # Compute affinity matrix. 250 | affinity = self.affinity_function(embeddings) 251 | 252 | # Make single-vs-multi cluster(s) decision. 253 | if self.min_clusters == 1: 254 | if fallback_clusterer.check_single_cluster( 255 | self.fallback_options, embeddings, affinity): 256 | return np.array([0] * num_embeddings) 257 | 258 | # Apply constraint. 259 | if (self.constraint_options and 260 | self.constraint_options.apply_before_refinement and 261 | constraint_matrix is not None): 262 | # Perform the constraint operation before refinement 263 | affinity = self.constraint_options.constraint_operator.adjust_affinity( 264 | affinity, constraint_matrix) 265 | 266 | if self.autotune: 267 | # Use Auto-tuning method to find a good p_percentile. 268 | if (RefinementName.RowWiseThreshold 269 | not in self.refinement_options.refinement_sequence): 270 | raise ValueError( 271 | "AutoTune is only effective when the refinement sequence" 272 | "contains RowWiseThreshold") 273 | 274 | def p_percentile_to_ratio(p_percentile: float) -> ( 275 | typing.Tuple[float, np.ndarray, int]): 276 | """Compute the `ratio` given a `p_percentile` value.""" 277 | self.refinement_options.p_percentile = p_percentile 278 | (eigenvectors, n_clusters, 279 | max_delta_norm) = self._compute_eigenvectors_ncluster( 280 | affinity, constraint_matrix) 281 | if self.autotune.proxy == AutoTuneProxy.PercentileSqrtOverNME: 282 | ratio = np.sqrt(1 - p_percentile) / max_delta_norm 283 | elif self.autotune.proxy == AutoTuneProxy.PercentileOverNME: 284 | ratio = (1 - p_percentile) / max_delta_norm 285 | else: 286 | raise ValueError("Unsupported value of AutoTuneProxy") 287 | return ratio, eigenvectors, n_clusters 288 | 289 | eigenvectors, n_clusters, _ = self.autotune.tune(p_percentile_to_ratio) 290 | else: 291 | # Do not use Auto-tune. 292 | eigenvectors, n_clusters, _ = self._compute_eigenvectors_ncluster( 293 | affinity, constraint_matrix) 294 | 295 | if self.min_clusters is not None: 296 | n_clusters = max(n_clusters, self.min_clusters) 297 | 298 | # Get spectral embeddings. 299 | spectral_embeddings = eigenvectors[:, :n_clusters] 300 | 301 | if self.row_wise_renorm: 302 | # Perform row wise re-normalization. 303 | rows_norm = np.linalg.norm(spectral_embeddings, axis=1, ord=2) 304 | spectral_embeddings = spectral_embeddings / np.reshape( 305 | rows_norm, (num_embeddings, 1)) 306 | 307 | # Run clustering algorithm on spectral embeddings. This defaults 308 | # to customized K-means. 309 | labels = self.post_eigen_cluster_function( 310 | spectral_embeddings=spectral_embeddings, 311 | n_clusters=n_clusters, 312 | custom_dist=self.custom_dist, 313 | max_iter=self.max_iter) 314 | return labels 315 | -------------------------------------------------------------------------------- /spectralcluster/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions.""" 2 | 3 | import enum 4 | import numpy as np 5 | import typing 6 | 7 | EPS = 1e-10 8 | 9 | 10 | class EigenGapType(enum.Enum): 11 | """Different types of the eigengap computation.""" 12 | # Eigengap is the ratio of two eigenvalues 13 | Ratio = enum.auto() 14 | 15 | # Eigengap is the subtraction of two eigenvalues, and it is normalized 16 | # by the maximum eigenvalue 17 | NormalizedDiff = enum.auto() 18 | 19 | 20 | def compute_affinity_matrix(embeddings: np.ndarray) -> np.ndarray: 21 | """Compute the affinity matrix from data. 22 | 23 | Note that the range of affinity is [0, 1]. 24 | 25 | Args: 26 | embeddings: numpy array of shape (n_samples, n_features) 27 | 28 | Returns: 29 | affinity: numpy array of shape (n_samples, n_samples) 30 | """ 31 | # Normalize the data. 32 | l2_norms = np.linalg.norm(embeddings, axis=1) 33 | embeddings_normalized = embeddings / l2_norms[:, None] 34 | # Compute cosine similarities. Range is [-1,1]. 35 | cosine_similarities = np.matmul(embeddings_normalized, 36 | np.transpose(embeddings_normalized)) 37 | # Compute the affinity. Range is [0,1]. 38 | # Note that this step is not mentioned in the paper! 39 | affinity = (cosine_similarities + 1.0) / 2.0 40 | 41 | return affinity 42 | 43 | 44 | def compute_sorted_eigenvectors( 45 | input_matrix: np.ndarray, 46 | descend: bool = True) -> typing.Tuple[np.ndarray, np.ndarray]: 47 | """Sort eigenvectors by the real part of eigenvalues. 48 | 49 | Args: 50 | input_matrix: the matrix to perform eigen analysis with shape (M, M) 51 | descend: sort eigenvalues in a descending order. Default is True 52 | 53 | Returns: 54 | w: sorted eigenvalues of shape (M,) 55 | v: sorted eigenvectors, where v[;, i] corresponds to ith largest 56 | eigenvalue 57 | """ 58 | # Eigen decomposition. 59 | eigenvalues, eigenvectors = np.linalg.eig(input_matrix) 60 | eigenvalues = eigenvalues.real 61 | eigenvectors = eigenvectors.real 62 | if descend: 63 | # Sort from largest to smallest. 64 | index_array = np.argsort(-eigenvalues) 65 | else: 66 | # Sort from smallest to largest. 67 | index_array = np.argsort(eigenvalues) 68 | # Re-order. 69 | w = eigenvalues[index_array] 70 | v = eigenvectors[:, index_array] 71 | return w, v 72 | 73 | 74 | def compute_number_of_clusters(eigenvalues: np.ndarray, 75 | max_clusters: typing.Optional[int] = None, 76 | stop_eigenvalue: float = 1e-2, 77 | eigengap_type: EigenGapType = EigenGapType.Ratio, 78 | descend: bool = True, 79 | eps: float = EPS) -> typing.Tuple[int, float]: 80 | """Compute number of clusters using EigenGap principle. 81 | 82 | Use maximum EigenGap principle to find the number of clusters. 83 | 84 | Args: 85 | eigenvalues: sorted eigenvalues of the affinity matrix 86 | max_clusters: max number of clusters allowed 87 | stop_eigenvalue: we do not look at eigen values smaller than this 88 | eigengap_type: the type of the eigengap computation 89 | descend: sort eigenvalues in a descending order. Default is True 90 | eps: a small value for numerial stability 91 | 92 | Returns: 93 | max_delta_index: number of clusters as an integer 94 | max_delta_norm: normalized maximum eigen gap 95 | """ 96 | if not isinstance(eigengap_type, EigenGapType): 97 | raise TypeError("eigengap_type must be a EigenGapType") 98 | max_delta = 0 99 | max_delta_index = 0 100 | range_end = len(eigenvalues) 101 | if max_clusters and max_clusters + 1 < range_end: 102 | range_end = max_clusters + 1 103 | 104 | if not descend: 105 | # The first eigen value is always 0 in an ascending order 106 | for i in range(1, range_end - 1): 107 | if eigengap_type == EigenGapType.Ratio: 108 | delta = eigenvalues[i + 1] / (eigenvalues[i] + eps) 109 | elif eigengap_type == EigenGapType.NormalizedDiff: 110 | delta = (eigenvalues[i + 1] - eigenvalues[i]) / np.max(eigenvalues) 111 | else: 112 | raise ValueError("Unsupported eigengap_type") 113 | if delta > max_delta: 114 | max_delta = delta 115 | max_delta_index = i + 1 # Index i means i+1 clusters 116 | else: 117 | for i in range(1, range_end): 118 | if eigenvalues[i - 1] < stop_eigenvalue: 119 | break 120 | if eigengap_type == EigenGapType.Ratio: 121 | delta = eigenvalues[i - 1] / (eigenvalues[i] + eps) 122 | elif eigengap_type == EigenGapType.NormalizedDiff: 123 | delta = (eigenvalues[i - 1] - eigenvalues[i]) / np.max(eigenvalues) 124 | else: 125 | raise ValueError("Unsupported eigengap_type") 126 | if delta > max_delta: 127 | max_delta = delta 128 | max_delta_index = i 129 | 130 | return max_delta_index, max_delta 131 | 132 | 133 | def enforce_ordered_labels(labels: np.ndarray) -> np.ndarray: 134 | """Transform the label sequence to an ordered form. 135 | 136 | This is the same type of label sequence used in the paper "Discriminative 137 | neural clustering for speaker diarisation". This makes the label sequence 138 | permutation invariant. 139 | 140 | Args: 141 | labels: an array of integers 142 | 143 | Returns: 144 | new_labels: an array of integers, where it starts with 0 and smaller 145 | labels always appear first 146 | """ 147 | new_labels = labels.copy() 148 | max_label = -1 149 | label_map = {} 150 | for element in labels.tolist(): 151 | if element not in label_map: 152 | max_label += 1 153 | label_map[element] = max_label 154 | for key in label_map: 155 | new_labels[labels == key] = label_map[key] 156 | return new_labels 157 | 158 | 159 | def get_cluster_centroids( 160 | embeddings: np.ndarray, labels: np.ndarray 161 | ) -> np.ndarray: 162 | """Get the centroids of each cluster from the embeddings. 163 | 164 | Args: 165 | embeddings: numpy array of shape (n_samples, n_features) 166 | labels: numpy array of shape (n_samples, ) 167 | 168 | Returns: 169 | numpy array of shape (n_clusters, n_features) 170 | """ 171 | n_clusters = max(labels) + 1 172 | centroids = [] 173 | for i in range(n_clusters): 174 | cluster_embeddings = embeddings[labels == i, :] 175 | cluster_centroid = np.mean(cluster_embeddings, axis=0) 176 | centroids.append(cluster_centroid) 177 | return np.stack(centroids) 178 | 179 | 180 | def chain_labels( 181 | pre_labels: typing.Optional[np.ndarray], 182 | main_labels: np.ndarray 183 | ) -> np.ndarray: 184 | """Chain the results with pre-clusterer. 185 | 186 | Args: 187 | pre_labels: labels of pre-clusterer of shape (n_samples, ); if None, 188 | simply return main_labels 189 | main_labels: labels of main clusterer of shape (U1, ) 190 | 191 | Returns: 192 | final labels of shape (n_samples, ) 193 | 194 | ValueError: if main_labels has wrong shape 195 | """ 196 | if pre_labels is None: 197 | return main_labels 198 | U1 = int(max(pre_labels) + 1) 199 | if U1 != main_labels.shape[0]: 200 | raise ValueError( 201 | "pre_labels has {} values while main_labels has {} rows.".format( 202 | U1, main_labels.shape[0])) 203 | final_labels = np.zeros(pre_labels.shape) 204 | for i in range(U1): 205 | final_labels[pre_labels == i] = main_labels[i] 206 | return final_labels 207 | -------------------------------------------------------------------------------- /tests/autotune_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import autotune 4 | from spectralcluster import laplacian 5 | from spectralcluster import refinement 6 | from spectralcluster import spectral_clusterer 7 | from spectralcluster import utils 8 | import typing 9 | 10 | RefinementName = refinement.RefinementName 11 | ThresholdType = refinement.ThresholdType 12 | LaplacianType = laplacian.LaplacianType 13 | 14 | 15 | class TestAutotune(unittest.TestCase): 16 | """Tests for the AutoTune class.""" 17 | 18 | def test_get_percentile_range(self): 19 | auto_tune = autotune.AutoTune( 20 | p_percentile_min=0.60, 21 | p_percentile_max=0.66, 22 | init_search_step=0.01, 23 | search_level=1) 24 | p_percentile_range = auto_tune.get_percentile_range() 25 | expected = [0.60, 0.61, 0.62, 0.63, 0.64, 0.65, 0.66] 26 | np.testing.assert_allclose( 27 | np.array(p_percentile_range), np.array(expected), atol=0.01) 28 | 29 | def test_update_percentile_range(self): 30 | auto_tune = autotune.AutoTune( 31 | p_percentile_min=0.4, 32 | p_percentile_max=0.9, 33 | init_search_step=0.1, 34 | search_level=1) 35 | p_percentile_range = auto_tune.update_percentile_range(0.5, 0.8, 0.05) 36 | expected = [0.50, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80] 37 | np.testing.assert_allclose( 38 | np.array(p_percentile_range), np.array(expected), atol=0.01) 39 | 40 | def test_6by2matrix_tune(self): 41 | matrix = np.array([ 42 | [1.0, 0.0], 43 | [1.1, 0.1], 44 | [0.0, 1.0], 45 | [0.1, 1.0], 46 | [0.9, -0.1], 47 | [0.0, 1.2], 48 | ]) 49 | refinement_sequence = [RefinementName.RowWiseThreshold] 50 | refinement_options = refinement.RefinementOptions( 51 | thresholding_type=ThresholdType.Percentile, 52 | refinement_sequence=refinement_sequence) 53 | auto_tune = autotune.AutoTune( 54 | p_percentile_min=0.60, 55 | p_percentile_max=0.95, 56 | init_search_step=0.05, 57 | search_level=1) 58 | clusterer = spectral_clusterer.SpectralClusterer( 59 | max_clusters=2, 60 | refinement_options=refinement_options, 61 | autotune=auto_tune, 62 | laplacian_type=LaplacianType.GraphCut, 63 | row_wise_renorm=True) 64 | 65 | affinity = utils.compute_affinity_matrix(matrix) 66 | 67 | def p_percentile_to_ratio(p_percentile: float) -> ( 68 | typing.Tuple[float, np.ndarray, int]): 69 | """compute the `ratio` given a `p_percentile` value.""" 70 | clusterer.refinement_options.p_percentile = p_percentile 71 | (eigenvectors, n_clusters, 72 | max_delta_norm) = clusterer._compute_eigenvectors_ncluster(affinity) 73 | ratio = (1 - p_percentile) / max_delta_norm 74 | return ratio, eigenvectors, n_clusters 75 | 76 | eigenvectors, n_clusters, p_percentile = clusterer.autotune.tune( 77 | p_percentile_to_ratio) 78 | 79 | self.assertEqual((6, 6), eigenvectors.shape) 80 | self.assertEqual(n_clusters, 2) 81 | self.assertEqual(p_percentile, 0.6) 82 | 83 | 84 | if __name__ == "__main__": 85 | unittest.main() 86 | -------------------------------------------------------------------------------- /tests/configs_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from spectralcluster import configs 5 | from spectralcluster import constraint 6 | from spectralcluster import utils 7 | 8 | 9 | class Icassp2018Test(unittest.TestCase): 10 | """Tests for ICASSP 2018 configs.""" 11 | 12 | def test_1000by6_matrix(self): 13 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 14 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 15 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 16 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 17 | noisy = np.random.rand(1000, 6) * 2 - 1 18 | matrix = matrix + noisy * 0.1 19 | labels = configs.icassp2018_clusterer.predict(matrix) 20 | labels = utils.enforce_ordered_labels(labels) 21 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 22 | np.testing.assert_equal(expected, labels) 23 | 24 | 25 | class TurnToDiarizeTest(unittest.TestCase): 26 | """Tests for Turn-To-Diarize system configs.""" 27 | 28 | def test_6by2_matrix(self): 29 | matrix = np.array([ 30 | [1.0, 0.0], 31 | [1.1, 0.1], 32 | [0.0, 1.0], 33 | [0.1, 1.0], 34 | [0.9, -0.1], 35 | [0.0, 1.2], 36 | ]) 37 | speaker_turn_scores = [0, 0, 1.5, 0, 1.5, 1.5] 38 | constraint_matrix = constraint.ConstraintMatrix( 39 | speaker_turn_scores, threshold=1).compute_diagonals() 40 | labels = configs.turntodiarize_clusterer.predict( 41 | matrix, constraint_matrix) 42 | labels = utils.enforce_ordered_labels(labels) 43 | expected = np.array([0, 0, 1, 1, 0, 1]) 44 | np.testing.assert_equal(expected, labels) 45 | 46 | def test_6by2_matrix_no_constraint(self): 47 | matrix = np.array([ 48 | [1.0, 0.0], 49 | [1.1, 0.1], 50 | [0.0, 1.0], 51 | [0.1, 1.0], 52 | [0.9, -0.1], 53 | [0.0, 1.2], 54 | ]) 55 | labels = configs.turntodiarize_clusterer.predict( 56 | matrix, constraint_matrix=None) 57 | labels = utils.enforce_ordered_labels(labels) 58 | expected = np.array([0, 0, 1, 1, 0, 1]) 59 | np.testing.assert_equal(expected, labels) 60 | 61 | 62 | if __name__ == "__main__": 63 | unittest.main() 64 | -------------------------------------------------------------------------------- /tests/constraint_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import constraint 4 | 5 | IntegrationType = constraint.IntegrationType 6 | 7 | 8 | class TestAffinityIntegration(unittest.TestCase): 9 | """Tests for the AffinityIntegration class.""" 10 | 11 | def test_3by3_matrix(self): 12 | affinity = np.array([[1, 0.25, 0], [0.31, 1, 0], [0, 0, 1]]) 13 | constraint_matrix = np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]]) 14 | adjusted_affinity = constraint.AffinityIntegration( 15 | integration_type=IntegrationType.Max).adjust_affinity( 16 | affinity, constraint_matrix) 17 | expected = np.array([[1, 1, 0], [1, 1, 0], [0, 0, 1]]) 18 | np.testing.assert_allclose( 19 | np.array(adjusted_affinity), np.array(expected), atol=0.01) 20 | 21 | 22 | class TestConstraintPropagation(unittest.TestCase): 23 | """Tests for the ConstraintPropagation class.""" 24 | 25 | def test_3by3_matrix(self): 26 | affinity = np.array([[1, 0.25, 0], [0.31, 1, 0], [0, 0, 1]]) 27 | constraint_matrix = np.array([[1, 1, 0], [1, 1, 0], [0, 0, 0]]) 28 | adjusted_affinity = constraint.ConstraintPropagation( 29 | alpha=0.6).adjust_affinity(affinity, constraint_matrix) 30 | expected = np.array([[1, 0.97, 0], [1.03, 1, 0], [0, 0, 1]]) 31 | np.testing.assert_allclose( 32 | np.array(adjusted_affinity), np.array(expected), atol=0.01) 33 | 34 | 35 | class TestConstraintMatrix(unittest.TestCase): 36 | """Tests for the ConstraintMatrix class.""" 37 | 38 | def test_3by3_constraint_matrix(self): 39 | speaker_turn_scores = [0, 0, 14.308253288269043] 40 | constraint_matrix = constraint.ConstraintMatrix( 41 | speaker_turn_scores, threshold=1).compute_diagonals() 42 | expected = np.array([[0, 1, 0], [1, 0, -1], [0, -1, 0]]) 43 | np.testing.assert_equal(constraint_matrix, expected) 44 | 45 | def test_3by3_constraint_matrix_threshold(self): 46 | speaker_turn_scores = [0, 0, 0.12095779925584793] 47 | constraint_matrix = constraint.ConstraintMatrix( 48 | speaker_turn_scores, threshold=1).compute_diagonals() 49 | expected = np.array([[0, 1, 0], [1, 0, 0], [0, 0, 0]]) 50 | np.testing.assert_equal(constraint_matrix, expected) 51 | 52 | 53 | if __name__ == "__main__": 54 | unittest.main() 55 | -------------------------------------------------------------------------------- /tests/custom_distance_kmeans_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import custom_distance_kmeans 4 | from spectralcluster import utils 5 | 6 | 7 | class TestCustomDistanceKmeans(unittest.TestCase): 8 | """Tests for the run_kmeans function with the CustomKMeans class.""" 9 | 10 | def setUp(self): 11 | super().setUp() 12 | pass 13 | 14 | def test_6by2_matrix_cosine_dist(self): 15 | matrix = np.array([ 16 | [1.0, 0.0], 17 | [1.1, 0.1], 18 | [0.0, 1.0], 19 | [0.1, 1.0], 20 | [0.9, -0.1], 21 | [0.0, 1.2], 22 | ]) 23 | 24 | labels = custom_distance_kmeans.run_kmeans( 25 | matrix, n_clusters=2, max_iter=300, custom_dist="cosine") 26 | labels = utils.enforce_ordered_labels(labels) 27 | expected = np.array([0, 0, 1, 1, 0, 1]) 28 | np.testing.assert_equal(expected, labels) 29 | 30 | def test_6by2_matrix_euclidean_dist(self): 31 | matrix = np.array([ 32 | [1.0, 0.0], 33 | [1.1, 0.1], 34 | [0.0, 1.0], 35 | [0.1, 1.0], 36 | [0.9, -0.1], 37 | [0.0, 1.2], 38 | ]) 39 | 40 | labels = custom_distance_kmeans.run_kmeans( 41 | matrix, n_clusters=2, max_iter=300, custom_dist="euclidean") 42 | labels = utils.enforce_ordered_labels(labels) 43 | expected = np.array([0, 0, 1, 1, 0, 1]) 44 | np.testing.assert_equal(expected, labels) 45 | 46 | def test_1000by6_matrix_cosine_dist(self): 47 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 48 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 49 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 50 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 51 | noisy = np.random.rand(1000, 6) * 2 - 1 52 | matrix = matrix + noisy * 0.1 53 | 54 | labels = custom_distance_kmeans.run_kmeans( 55 | matrix, n_clusters=4, max_iter=300, custom_dist="cosine") 56 | labels = utils.enforce_ordered_labels(labels) 57 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 58 | np.testing.assert_equal(expected, labels) 59 | 60 | def test_1000by6_matrix_euclidean_dist(self): 61 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 62 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 63 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 64 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 65 | noisy = np.random.rand(1000, 6) * 2 - 1 66 | matrix = matrix + noisy * 0.1 67 | 68 | labels = custom_distance_kmeans.run_kmeans( 69 | matrix, n_clusters=4, max_iter=300, custom_dist="euclidean") 70 | labels = utils.enforce_ordered_labels(labels) 71 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 72 | np.testing.assert_equal(expected, labels) 73 | 74 | 75 | if __name__ == "__main__": 76 | unittest.main() 77 | -------------------------------------------------------------------------------- /tests/fallback_clusterer_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import fallback_clusterer 4 | from spectralcluster import utils 5 | 6 | FallbackOptions = fallback_clusterer.FallbackOptions 7 | FallbackClusterer = fallback_clusterer.FallbackClusterer 8 | SingleClusterCondition = fallback_clusterer.SingleClusterCondition 9 | FallbackClustererType = fallback_clusterer.FallbackClustererType 10 | 11 | 12 | class TestFallbackClusterer(unittest.TestCase): 13 | 14 | def setUp(self): 15 | super().setUp() 16 | pass 17 | 18 | def test_6by2_matrix_naive(self): 19 | matrix = np.array([ 20 | [1.0, 0.0], 21 | [1.1, 0.1], 22 | [0.0, 1.0], 23 | [0.1, 1.0], 24 | [0.9, -0.1], 25 | [0.0, 1.2], 26 | ]) 27 | options = FallbackOptions( 28 | fallback_clusterer_type=FallbackClustererType.Naive, 29 | naive_threshold=0.5, 30 | ) 31 | clusterer = FallbackClusterer(options) 32 | labels = clusterer.predict(matrix) 33 | labels = utils.enforce_ordered_labels(labels) 34 | expected = np.array([0, 0, 1, 1, 0, 1]) 35 | np.testing.assert_equal(expected, labels) 36 | 37 | def test_6by2_matrix_agglomerative(self): 38 | matrix = np.array([ 39 | [1.0, 0.0], 40 | [1.1, 0.1], 41 | [0.0, 1.0], 42 | [0.1, 1.0], 43 | [0.9, -0.1], 44 | [0.0, 1.2], 45 | ]) 46 | options = FallbackOptions( 47 | fallback_clusterer_type=FallbackClustererType.Agglomerative, 48 | agglomerative_threshold=0.5, 49 | ) 50 | clusterer = FallbackClusterer(options) 51 | labels = clusterer.predict(matrix) 52 | labels = utils.enforce_ordered_labels(labels) 53 | expected = np.array([0, 0, 1, 1, 0, 1]) 54 | np.testing.assert_equal(expected, labels) 55 | 56 | 57 | class TestCheckSingleCluster(unittest.TestCase): 58 | 59 | def test_affinity_gmm_bic1(self): 60 | affinity = np.array([[1, 0.999, 1.001], [0.999, 1, 1], [1.001, 1, 1]]) 61 | fallback_options = fallback_clusterer.FallbackOptions( 62 | single_cluster_condition=SingleClusterCondition.AffinityGmmBic) 63 | self.assertTrue( 64 | fallback_clusterer.check_single_cluster( 65 | fallback_options, None, affinity)) 66 | 67 | def test_affinity_gmm_bic2(self): 68 | affinity = np.array([[1, 2, 2], [2, 1, 1], [2, 1, 1]]) 69 | fallback_options = fallback_clusterer.FallbackOptions( 70 | single_cluster_condition=SingleClusterCondition.AffinityGmmBic) 71 | self.assertFalse( 72 | fallback_clusterer.check_single_cluster( 73 | fallback_options, None, affinity)) 74 | 75 | 76 | if __name__ == "__main__": 77 | unittest.main() 78 | -------------------------------------------------------------------------------- /tests/laplacian_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from spectralcluster import laplacian 5 | from spectralcluster import utils 6 | 7 | LaplacianType = laplacian.LaplacianType 8 | 9 | 10 | class TestComputeLaplacian(unittest.TestCase): 11 | """Tests for the compute_laplacian function.""" 12 | 13 | def test_affinity(self): 14 | matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) 15 | affinity = utils.compute_affinity_matrix(matrix) 16 | result = laplacian.compute_laplacian( 17 | affinity, laplacian_type=LaplacianType.Affinity) 18 | np.testing.assert_equal(affinity, result) 19 | 20 | def test_laplacian(self): 21 | matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) 22 | affinity = utils.compute_affinity_matrix(matrix) 23 | laplacian_matrix = laplacian.compute_laplacian( 24 | affinity, laplacian_type=LaplacianType.Unnormalized) 25 | expected = np.array([[1.5, -0.5, -1, 0], [-0.5, 1.5, -0.5, -0.5], 26 | [-1, -0.5, 1.5, 0], [0, -0.5, 0, 0.5]]) 27 | np.testing.assert_equal(expected, laplacian_matrix) 28 | 29 | def test_normalized_laplacian(self): 30 | matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) 31 | affinity = utils.compute_affinity_matrix(matrix) 32 | laplacian_norm = laplacian.compute_laplacian( 33 | affinity, laplacian_type=LaplacianType.GraphCut) 34 | expected = np.array([[0.6, -0.2, -0.4, 0], [-0.2, 0.6, -0.2, -0.26], 35 | [-0.4, -0.2, 0.6, 0], [0, -0.26, 0, 0.33]]) 36 | np.testing.assert_allclose(expected, laplacian_norm, atol=0.01) 37 | 38 | def test_random_walk_normalized_laplacian(self): 39 | matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) 40 | affinity = utils.compute_affinity_matrix(matrix) 41 | laplacian_norm = laplacian.compute_laplacian( 42 | affinity, laplacian_type=LaplacianType.RandomWalk) 43 | expected = np.array([[0.6, -0.2, -0.4, 0], [-0.2, 0.6, -0.2, -0.2], 44 | [-0.4, -0.2, 0.6, 0], [0, -0.33, 0, 0.33]]) 45 | np.testing.assert_allclose(expected, laplacian_norm, atol=0.01) 46 | 47 | 48 | if __name__ == "__main__": 49 | unittest.main() 50 | -------------------------------------------------------------------------------- /tests/multi_stage_clusterer_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from spectralcluster import configs 5 | from spectralcluster import multi_stage_clusterer 6 | from spectralcluster import refinement 7 | from spectralcluster import spectral_clusterer 8 | from spectralcluster import utils 9 | 10 | 11 | class TestMatchLabels(unittest.TestCase): 12 | """Tests for the match_labels function.""" 13 | def test_short(self): 14 | current = np.array([1, 0]) 15 | previous = np.array([0]) 16 | expected = np.array([0, 1]) 17 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 18 | current, previous)) 19 | 20 | def test_current_more(self): 21 | current = np.array([0, 1, 2, 3, 4, 5]) 22 | previous = np.array([0, 0, 0, 1, 2]) 23 | expected = np.array([0, 3, 4, 1, 2, 5]) 24 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 25 | current, previous)) 26 | 27 | def test_previous_more(self): 28 | current = np.array([0, 0, 0, 1, 1, 1, 2, 2]) 29 | previous = np.array([0, 0, 1, 2, 2, 3, 4]) 30 | expected = np.array([0, 0, 0, 2, 2, 2, 4, 4]) 31 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 32 | current, previous)) 33 | 34 | def test_medium(self): 35 | current = np.array([1, 1, 1, 0, 0, 1]) 36 | previous = np.array([0, 0, 0, 1, 1]) 37 | expected = np.array([0, 0, 0, 1, 1, 0]) 38 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 39 | current, previous)) 40 | 41 | def test_medium_new_speaker(self): 42 | current = np.array([1, 1, 1, 0, 0, 2]) 43 | previous = np.array([0, 0, 0, 1, 1]) 44 | expected = np.array([0, 0, 0, 1, 1, 2]) 45 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 46 | current, previous)) 47 | 48 | def test_medium_no_order_based(self): 49 | current = np.array([0, 1, 1, 0, 0, 2]) 50 | previous = np.array([0, 0, 0, 1, 1]) 51 | expected = np.array([1, 0, 0, 1, 1, 2]) 52 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 53 | current, previous)) 54 | 55 | def test_long(self): 56 | current = np.array([0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5]) 57 | previous = np.array([0, 0, 3, 3, 1, 1, 4, 4, 5, 5, 2]) 58 | expected = np.array([0, 0, 3, 3, 1, 1, 4, 4, 5, 5, 2, 2]) 59 | np.testing.assert_equal(expected, multi_stage_clusterer.match_labels( 60 | current, previous)) 61 | 62 | 63 | class TestMultiStageClusterer(unittest.TestCase): 64 | """Tests for the MultiStageClusterer class.""" 65 | 66 | def setUp(self): 67 | super().setUp() 68 | refinement_options = refinement.RefinementOptions( 69 | gaussian_blur_sigma=0, 70 | p_percentile=0.95, 71 | refinement_sequence=configs.ICASSP2018_REFINEMENT_SEQUENCE) 72 | main_clusterer = spectral_clusterer.SpectralClusterer( 73 | refinement_options=refinement_options) 74 | self.multi_stage = multi_stage_clusterer.MultiStageClusterer( 75 | main_clusterer=main_clusterer, 76 | fallback_threshold=0.5, 77 | L=3, 78 | U1=5, 79 | U2=7 80 | ) 81 | 82 | def test_single_input(self): 83 | embedding = np.array([[1, 2]]) 84 | labels = self.multi_stage.streaming_predict(embedding) 85 | expected = np.array([0]) 86 | np.testing.assert_equal(expected, labels) 87 | 88 | def test_fallback(self): 89 | embeddings = [[1, 2], [3, -1]] 90 | for embedding in embeddings: 91 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 92 | labels = utils.enforce_ordered_labels(labels) 93 | expected = np.array([0, 1]) 94 | np.testing.assert_equal(expected, labels) 95 | 96 | def test_main(self): 97 | embeddings = [ 98 | [1, 2], 99 | [3, -1], 100 | [1, 1], 101 | [-2, -1], 102 | ] 103 | for embedding in embeddings: 104 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 105 | labels = utils.enforce_ordered_labels(labels) 106 | expected = np.array([0, 0, 0, 1]) 107 | np.testing.assert_equal(expected, labels) 108 | 109 | def test_pre(self): 110 | embeddings = [ 111 | [1, 2], 112 | [3, -1], 113 | [1, 1], 114 | [-2, -1], 115 | [0, 1], 116 | [-2, 0], 117 | ] 118 | for embedding in embeddings: 119 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 120 | labels = utils.enforce_ordered_labels(labels) 121 | expected = np.array([0, 1, 0, 2, 3, 2]) 122 | np.testing.assert_equal(expected, labels) 123 | 124 | def test_compression(self): 125 | embeddings = [ 126 | [1, 2], 127 | [3, -1], 128 | [1, 1], 129 | [-2, -1], 130 | [0, 1], 131 | [-2, 0], 132 | [1, 2], 133 | [3, -1], 134 | ] 135 | for embedding in embeddings: 136 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 137 | labels = utils.enforce_ordered_labels(labels) 138 | expected = np.array([0, 1, 0, 2, 3, 2, 0, 1]) 139 | np.testing.assert_equal(expected, labels) 140 | 141 | def test_compression_order_based_deflicker(self): 142 | embeddings = [ 143 | [1, 2], 144 | [3, -1], 145 | [1, 1], 146 | [-2, -1], 147 | [0, 1], 148 | [-2, 0], 149 | [1, 2], 150 | [3, -1], 151 | ] 152 | self.multi_stage.deflicker = multi_stage_clusterer.Deflicker.OrderBased 153 | for embedding in embeddings: 154 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 155 | labels = utils.enforce_ordered_labels(labels) 156 | expected = np.array([0, 1, 0, 2, 3, 2, 0, 1]) 157 | np.testing.assert_equal(expected, labels) 158 | 159 | def test_compression_hungarian_deflicker(self): 160 | embeddings = [ 161 | [1, 2], 162 | [3, -1], 163 | [1, 1], 164 | [-2, -1], 165 | [0, 1], 166 | [-2, 0], 167 | [1, 2], 168 | [3, -1], 169 | ] 170 | self.multi_stage.deflicker = multi_stage_clusterer.Deflicker.Hungarian 171 | for embedding in embeddings: 172 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 173 | labels = utils.enforce_ordered_labels(labels) 174 | expected = np.array([0, 1, 0, 2, 3, 2, 0, 1]) 175 | np.testing.assert_equal(expected, labels) 176 | 177 | def test_double_compression(self): 178 | embeddings = [ 179 | [1, 2], 180 | [3, -1], 181 | [1, 1], 182 | [-2, -1], 183 | [0, 1], 184 | [-2, 0], 185 | [1, 2], 186 | [3, -1], 187 | [1, 1], 188 | [-2, -1], 189 | ] 190 | for embedding in embeddings: 191 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 192 | labels = utils.enforce_ordered_labels(labels) 193 | expected = np.array([0, 1, 0, 2, 3, 2, 0, 1, 0, 2]) 194 | np.testing.assert_equal(expected, labels) 195 | 196 | def test_many_compression(self): 197 | embeddings = [ 198 | [1, 2], 199 | [3, -1], 200 | [1, 1], 201 | [-2, -1], 202 | [0, 1], 203 | [-2, 0], 204 | [1, 2], 205 | [3, -1], 206 | [1, 1], 207 | [-2, -1], 208 | [0, 1], 209 | [-2, 0], 210 | [1, 2], 211 | [3, -1], 212 | [1, 1], 213 | [-2, -1], 214 | ] 215 | for embedding in embeddings: 216 | labels = self.multi_stage.streaming_predict(np.array(embedding)) 217 | labels = utils.enforce_ordered_labels(labels) 218 | expected = np.array([0, 1, 0, 2, 3, 2, 0, 1, 0, 2, 3, 2, 0, 1, 0, 2]) 219 | np.testing.assert_equal(expected, labels) 220 | 221 | def test_1000by6_matrix(self): 222 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 100 + 223 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 200 + 224 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 300 + 225 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 400) 226 | noisy = np.random.rand(1000, 6) * 2 - 1 227 | matrix = matrix + noisy * 0.02 228 | refinement_options = refinement.RefinementOptions( 229 | gaussian_blur_sigma=0, 230 | p_percentile=0.2, 231 | refinement_sequence=configs.ICASSP2018_REFINEMENT_SEQUENCE) 232 | main_clusterer = spectral_clusterer.SpectralClusterer( 233 | refinement_options=refinement_options, stop_eigenvalue=0.01) 234 | multi_stage = multi_stage_clusterer.MultiStageClusterer( 235 | main_clusterer=main_clusterer, 236 | fallback_threshold=0.5, 237 | L=50, 238 | U1=200, 239 | U2=400 240 | ) 241 | for embedding in matrix: 242 | labels = multi_stage.streaming_predict(embedding) 243 | labels = utils.enforce_ordered_labels(labels) 244 | expected = np.array([0] * 100 + [1] * 200 + [2] * 300 + [3] * 400) 245 | np.testing.assert_equal(expected, labels) 246 | 247 | 248 | if __name__ == "__main__": 249 | unittest.main() 250 | -------------------------------------------------------------------------------- /tests/naive_clusterer_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import naive_clusterer 4 | from spectralcluster import utils 5 | 6 | NaiveClusterer = naive_clusterer.NaiveClusterer 7 | 8 | 9 | class TestNaiveClusterer(unittest.TestCase): 10 | 11 | def setUp(self): 12 | super().setUp() 13 | pass 14 | 15 | def test_6by2_matrix(self): 16 | matrix = np.array([ 17 | [1.0, 0.0], 18 | [1.1, 0.1], 19 | [0.0, 1.0], 20 | [0.1, 1.0], 21 | [0.9, -0.1], 22 | [0.0, 1.2], 23 | ]) 24 | clusterer = NaiveClusterer(threshold=0.5) 25 | labels = clusterer.predict(matrix) 26 | labels = utils.enforce_ordered_labels(labels) 27 | expected = np.array([0, 0, 1, 1, 0, 1]) 28 | np.testing.assert_equal(expected, labels) 29 | 30 | label = clusterer.predict_next(np.array([1.2, -0.1])) 31 | self.assertEqual(0, label) 32 | 33 | label = clusterer.predict_next(np.array([-0.1, 0.8])) 34 | self.assertEqual(1, label) 35 | 36 | clusterer.reset() 37 | label = clusterer.predict_next(np.array([-0.1, 0.8])) 38 | self.assertEqual(0, label) 39 | 40 | def test_adaptation(self): 41 | clusterer = NaiveClusterer(threshold=0.5, adaptation_threshold=1.0) 42 | label = clusterer.predict_next(np.array([1.2, -0.1])) 43 | self.assertEqual(0, label) 44 | self.assertEqual(1, clusterer.centroids[0].count) 45 | 46 | # adaptation_threshold is too big, won't adapt. 47 | label = clusterer.predict_next(np.array([1.3, 0.2])) 48 | self.assertEqual(0, label) 49 | self.assertEqual(1, clusterer.centroids[0].count) 50 | 51 | # adaptation_threshold is small, will adapt. 52 | clusterer.adaptation_threshold = 0.5 53 | label = clusterer.predict_next(np.array([1.3, 0.2])) 54 | self.assertEqual(0, label) 55 | self.assertEqual(2, clusterer.centroids[0].count) 56 | 57 | 58 | if __name__ == "__main__": 59 | unittest.main() 60 | -------------------------------------------------------------------------------- /tests/refinement_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import refinement 4 | 5 | ThresholdType = refinement.ThresholdType 6 | SymmetrizeType = refinement.SymmetrizeType 7 | 8 | 9 | class TestCropDiagonal(unittest.TestCase): 10 | """Tests for the CropDiagonal class.""" 11 | 12 | def test_3by3_matrix(self): 13 | matrix = np.array([[1, 2, 3], [3, 4, 5], [4, 2, 1]]) 14 | adjusted_matrix = refinement.CropDiagonal().refine(matrix) 15 | expected = np.array([[3, 2, 3], [3, 5, 5], [4, 2, 4]]) 16 | np.testing.assert_equal(expected, adjusted_matrix) 17 | 18 | 19 | class TestGaussianBlur(unittest.TestCase): 20 | """Tests for the GaussianBlur class.""" 21 | 22 | def test_3by3_matrix(self): 23 | matrix = np.array([[1.0, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) 24 | adjusted_matrix = refinement.GaussianBlur(sigma=1).refine(matrix) 25 | expected = np.array([[2.12, 2.61, 3.10], [2.76, 2.90, 3.06], 26 | [3.16, 2.78, 2.46]]) 27 | np.testing.assert_allclose(expected, adjusted_matrix, atol=0.01) 28 | 29 | 30 | class TestRowWiseThreshold(unittest.TestCase): 31 | """Tests for the RowWiseThreshold class.""" 32 | 33 | def test_3by3_matrix_percentile(self): 34 | matrix = np.array([[0.5, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) 35 | adjusted_matrix = refinement.RowWiseThreshold( 36 | p_percentile=0.5, 37 | thresholding_soft_multiplier=0.01, 38 | thresholding_type=ThresholdType.Percentile).refine(matrix) 39 | expected = np.array([[0.005, 2.0, 3.0], [0.03, 4.0, 5.0], [4.0, 2.0, 0.01]]) 40 | self.assertTrue(np.allclose(expected, adjusted_matrix, atol=0.001)) 41 | 42 | def test_3by3_matrix_row_max(self): 43 | matrix = np.array([[0.5, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) 44 | adjusted_matrix = refinement.RowWiseThreshold( 45 | p_percentile=0.5, 46 | thresholding_soft_multiplier=0.01, 47 | thresholding_type=ThresholdType.RowMax).refine(matrix) 48 | expected = np.array([[0.005, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 0.01]]) 49 | np.testing.assert_allclose(expected, adjusted_matrix, atol=0.001) 50 | 51 | def test_3by3_matrix_binarization(self): 52 | matrix = np.array([[0.5, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) 53 | adjusted_matrix = refinement.RowWiseThreshold( 54 | p_percentile=0.5, 55 | thresholding_soft_multiplier=0.01, 56 | thresholding_type=ThresholdType.RowMax, 57 | thresholding_with_binarization=True).refine(matrix) 58 | expected = np.array([[0.005, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 0.01]]) 59 | np.testing.assert_allclose(expected, adjusted_matrix, atol=0.001) 60 | 61 | def test_3by3_matrix_preserve_diagonal(self): 62 | matrix = np.array([[0.5, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) 63 | adjusted_matrix = refinement.RowWiseThreshold( 64 | p_percentile=0.5, 65 | thresholding_soft_multiplier=0.01, 66 | thresholding_type=ThresholdType.RowMax, 67 | thresholding_with_binarization=True, 68 | thresholding_preserve_diagonal=True).refine(matrix) 69 | expected = np.array([[1.0, 1.0, 1.0], [1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]) 70 | np.testing.assert_allclose(expected, adjusted_matrix, atol=0.001) 71 | 72 | 73 | class TestSymmetrize(unittest.TestCase): 74 | """Tests for the Symmetrize class.""" 75 | 76 | def test_3by3_matrix(self): 77 | matrix = np.array([[1, 2, 3], [3, 4, 5], [4, 2, 1]]) 78 | adjusted_matrix = refinement.Symmetrize().refine(matrix) 79 | expected = np.array([[1, 3, 4], [3, 4, 5], [4, 5, 1]]) 80 | np.testing.assert_equal(expected, adjusted_matrix) 81 | 82 | def test_3by3_matrix_symmetrize_average(self): 83 | matrix = np.array([[1, 2, 3], [3, 4, 5], [4, 2, 1]]) 84 | adjusted_matrix = refinement.Symmetrize( 85 | symmetrize_type=SymmetrizeType.Average).refine(matrix) 86 | expected = np.array([[1, 2.5, 3.5], [2.5, 4, 3.5], [3.5, 3.5, 1]]) 87 | np.testing.assert_equal(expected, adjusted_matrix) 88 | 89 | 90 | class TestDiffuse(unittest.TestCase): 91 | """Tests for the Diffuse class.""" 92 | 93 | def test_2by2_matrix(self): 94 | matrix = np.array([[1, 2], [3, 4]]) 95 | adjusted_matrix = refinement.Diffuse().refine(matrix) 96 | expected = np.array([[5, 11], [11, 25]]) 97 | np.testing.assert_equal(expected, adjusted_matrix) 98 | 99 | 100 | class TestRowWiseNormalize(unittest.TestCase): 101 | """Tests for the RowWiseNormalize class.""" 102 | 103 | def test_3by3_matrix(self): 104 | matrix = np.array([[0.5, 2.0, 3.0], [3.0, 4.0, 5.0], [4.0, 2.0, 1.0]]) 105 | adjusted_matrix = refinement.RowWiseNormalize().refine(matrix) 106 | expected = np.array([[0.167, 0.667, 1.0], [0.6, 0.8, 1.0], [1.0, 0.5, 107 | 0.25]]) 108 | np.testing.assert_allclose(expected, adjusted_matrix, atol=0.001) 109 | 110 | 111 | if __name__ == "__main__": 112 | unittest.main() 113 | -------------------------------------------------------------------------------- /tests/spectral_clusterer_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from spectralcluster import autotune 4 | from spectralcluster import configs 5 | from spectralcluster import constraint 6 | from spectralcluster import fallback_clusterer 7 | from spectralcluster import laplacian 8 | from spectralcluster import refinement 9 | from spectralcluster import spectral_clusterer 10 | from spectralcluster import utils 11 | 12 | AutoTuneProxy = autotune.AutoTuneProxy 13 | RefinementName = refinement.RefinementName 14 | ThresholdType = refinement.ThresholdType 15 | SymmetrizeType = refinement.SymmetrizeType 16 | LaplacianType = laplacian.LaplacianType 17 | ConstraintName = constraint.ConstraintName 18 | IntegrationType = constraint.IntegrationType 19 | EigenGapType = utils.EigenGapType 20 | FallbackOptions = fallback_clusterer.FallbackOptions 21 | SingleClusterCondition = fallback_clusterer.SingleClusterCondition 22 | FallbackClustererType = fallback_clusterer.FallbackClustererType 23 | ICASSP2018_REFINEMENT_SEQUENCE = configs.ICASSP2018_REFINEMENT_SEQUENCE 24 | 25 | 26 | class TestSpectralClusterer(unittest.TestCase): 27 | """Tests for the SpectralClusterer class.""" 28 | 29 | def setUp(self): 30 | super().setUp() 31 | pass 32 | 33 | def test_6by2_matrix(self): 34 | matrix = np.array([ 35 | [1.0, 0.0], 36 | [1.1, 0.1], 37 | [0.0, 1.0], 38 | [0.1, 1.0], 39 | [0.9, -0.1], 40 | [0.0, 1.2], 41 | ]) 42 | refinement_options = refinement.RefinementOptions( 43 | gaussian_blur_sigma=0, 44 | p_percentile=0.95, 45 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 46 | clusterer = spectral_clusterer.SpectralClusterer( 47 | refinement_options=refinement_options) 48 | labels = clusterer.predict(matrix) 49 | labels = utils.enforce_ordered_labels(labels) 50 | expected = np.array([0, 0, 1, 1, 0, 1]) 51 | np.testing.assert_equal(expected, labels) 52 | 53 | def test_1000by6_matrix(self): 54 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 55 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 56 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 57 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 58 | noisy = np.random.rand(1000, 6) * 2 - 1 59 | matrix = matrix + noisy * 0.1 60 | refinement_options = refinement.RefinementOptions( 61 | gaussian_blur_sigma=0, 62 | p_percentile=0.2, 63 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 64 | clusterer = spectral_clusterer.SpectralClusterer( 65 | refinement_options=refinement_options, stop_eigenvalue=0.01) 66 | labels = clusterer.predict(matrix) 67 | labels = utils.enforce_ordered_labels(labels) 68 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 69 | np.testing.assert_equal(expected, labels) 70 | 71 | def test_1000by6_matrix_reduce_dimension(self): 72 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 73 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 74 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 75 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 76 | noisy = np.random.rand(1000, 6) * 2 - 1 77 | matrix = matrix + noisy * 0.1 78 | refinement_options = refinement.RefinementOptions( 79 | gaussian_blur_sigma=0, 80 | p_percentile=0.2, 81 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 82 | clusterer = spectral_clusterer.SpectralClusterer( 83 | refinement_options=refinement_options, 84 | stop_eigenvalue=0.01, 85 | max_spectral_size=100) 86 | labels = clusterer.predict(matrix) 87 | labels = utils.enforce_ordered_labels(labels) 88 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 89 | np.testing.assert_equal(expected, labels) 90 | 91 | def test_6by2_matrix_eigengap_normalizeddiff(self): 92 | matrix = np.array([ 93 | [1.0, 0.0], 94 | [1.1, 0.1], 95 | [0.0, 1.0], 96 | [0.1, 1.0], 97 | [0.9, -0.1], 98 | [0.0, 1.2], 99 | ]) 100 | refinement_options = refinement.RefinementOptions( 101 | gaussian_blur_sigma=0, 102 | p_percentile=0.95, 103 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 104 | clusterer = spectral_clusterer.SpectralClusterer( 105 | refinement_options=refinement_options, 106 | eigengap_type=EigenGapType.NormalizedDiff) 107 | labels = clusterer.predict(matrix) 108 | labels = utils.enforce_ordered_labels(labels) 109 | expected = np.array([0, 0, 1, 1, 0, 1]) 110 | np.testing.assert_equal(expected, labels) 111 | 112 | def test_6by2_matrix_normalized_laplacian(self): 113 | matrix = np.array([ 114 | [1.0, 0.0], 115 | [1.1, 0.1], 116 | [0.0, 1.0], 117 | [0.1, 1.0], 118 | [0.9, -0.1], 119 | [0.0, 1.2], 120 | ]) 121 | 122 | refinement_sequence = [] 123 | refinement_options = refinement.RefinementOptions( 124 | p_percentile=0.95, refinement_sequence=refinement_sequence) 125 | clusterer = spectral_clusterer.SpectralClusterer( 126 | max_clusters=2, 127 | refinement_options=refinement_options, 128 | laplacian_type=LaplacianType.GraphCut, 129 | row_wise_renorm=True) 130 | labels = clusterer.predict(matrix) 131 | labels = utils.enforce_ordered_labels(labels) 132 | expected = np.array([0, 0, 1, 1, 0, 1]) 133 | np.testing.assert_equal(expected, labels) 134 | 135 | def test_1000by6_matrix_normalized_laplacian(self): 136 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 137 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 138 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 139 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 140 | noisy = np.random.rand(1000, 6) * 2 - 1 141 | matrix = matrix + noisy * 0.1 142 | 143 | refinement_sequence = [] 144 | refinement_options = refinement.RefinementOptions( 145 | p_percentile=0.95, refinement_sequence=refinement_sequence) 146 | clusterer = spectral_clusterer.SpectralClusterer( 147 | max_clusters=4, 148 | refinement_options=refinement_options, 149 | laplacian_type=LaplacianType.GraphCut, 150 | row_wise_renorm=True) 151 | labels = clusterer.predict(matrix) 152 | labels = utils.enforce_ordered_labels(labels) 153 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 154 | np.testing.assert_equal(expected, labels) 155 | 156 | def test_6by2_matrix_auto_tune(self): 157 | matrix = np.array([ 158 | [1.0, 0.0], 159 | [1.1, 0.1], 160 | [0.0, 1.0], 161 | [0.1, 1.0], 162 | [0.9, -0.1], 163 | [0.0, 1.2], 164 | ]) 165 | 166 | refinement_sequence = [RefinementName.RowWiseThreshold] 167 | refinement_options = refinement.RefinementOptions( 168 | thresholding_type=ThresholdType.Percentile, 169 | refinement_sequence=refinement_sequence) 170 | auto_tune = autotune.AutoTune( 171 | p_percentile_min=0.60, 172 | p_percentile_max=0.95, 173 | init_search_step=0.05, 174 | search_level=1) 175 | clusterer = spectral_clusterer.SpectralClusterer( 176 | max_clusters=2, 177 | refinement_options=refinement_options, 178 | autotune=auto_tune, 179 | laplacian_type=LaplacianType.GraphCut, 180 | row_wise_renorm=True) 181 | labels = clusterer.predict(matrix) 182 | labels = utils.enforce_ordered_labels(labels) 183 | expected = np.array([0, 0, 1, 1, 0, 1]) 184 | np.testing.assert_equal(expected, labels) 185 | 186 | def test_2by2_matrix_auto_tune(self): 187 | matrix = np.array([ 188 | [1.0, 0.0], 189 | [0.0, 1.0], 190 | ]) 191 | refinement_sequence = [RefinementName.RowWiseThreshold] 192 | refinement_options = refinement.RefinementOptions( 193 | thresholding_type=ThresholdType.Percentile, 194 | refinement_sequence=refinement_sequence) 195 | auto_tune = autotune.AutoTune( 196 | p_percentile_min=0.60, 197 | p_percentile_max=0.95, 198 | init_search_step=0.05, 199 | search_level=1, 200 | proxy=AutoTuneProxy.PercentileOverNME) 201 | fallback_options = fallback_clusterer.FallbackOptions( 202 | spectral_min_embeddings=3) 203 | clusterer = spectral_clusterer.SpectralClusterer( 204 | max_clusters=2, 205 | refinement_options=refinement_options, 206 | autotune=auto_tune, 207 | fallback_options=fallback_options, 208 | laplacian_type=LaplacianType.GraphCut, 209 | row_wise_renorm=True) 210 | labels = clusterer.predict(matrix) 211 | labels = utils.enforce_ordered_labels(labels) 212 | expected = np.array([0, 1]) 213 | np.testing.assert_equal(expected, labels) 214 | 215 | def test_1000by6_matrix_auto_tune(self): 216 | matrix = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]] * 400 + 217 | [[0.0, 1.0, 0.0, 0.0, 0.0, 0.0]] * 300 + 218 | [[0.0, 0.0, 2.0, 0.0, 0.0, 0.0]] * 200 + 219 | [[0.0, 0.0, 0.0, 1.0, 0.0, 0.0]] * 100) 220 | noisy = np.random.rand(1000, 6) * 2 - 1 221 | matrix = matrix + noisy * 0.1 222 | 223 | refinement_sequence = [RefinementName.RowWiseThreshold] 224 | refinement_options = refinement.RefinementOptions( 225 | thresholding_type=ThresholdType.Percentile, 226 | refinement_sequence=refinement_sequence) 227 | auto_tune = autotune.AutoTune( 228 | p_percentile_min=0.9, 229 | p_percentile_max=0.95, 230 | init_search_step=0.03, 231 | search_level=1) 232 | clusterer = spectral_clusterer.SpectralClusterer( 233 | max_clusters=4, 234 | refinement_options=refinement_options, 235 | autotune=auto_tune, 236 | laplacian_type=LaplacianType.GraphCut, 237 | row_wise_renorm=True) 238 | labels = clusterer.predict(matrix) 239 | labels = utils.enforce_ordered_labels(labels) 240 | expected = np.array([0] * 400 + [1] * 300 + [2] * 200 + [3] * 100) 241 | np.testing.assert_equal(expected, labels) 242 | 243 | def test_6by2_matrix_affinity_integration(self): 244 | matrix = np.array([ 245 | [1.0, 0.0], 246 | [1.1, 0.1], 247 | [0.0, 1.0], 248 | [0.1, 1.0], 249 | [0.9, -0.1], 250 | [0.0, 1.2], 251 | ]) 252 | 253 | constraint_matrix = np.array([ 254 | [1, 0, 0, 0, 0, 0], 255 | [0, 1, 0, 0, 0, 0], 256 | [0, 0, 1, 1, 1, 1], 257 | [0, 0, 1, 1, 1, 1], 258 | [0, 0, 1, 1, 1, 1], 259 | [0, 0, 1, 1, 1, 1], 260 | ]) 261 | 262 | refinement_sequence = [ 263 | RefinementName.RowWiseThreshold, RefinementName.Symmetrize 264 | ] 265 | refinement_options = refinement.RefinementOptions( 266 | p_percentile=0.95, 267 | thresholding_type=ThresholdType.Percentile, 268 | thresholding_with_binarization=True, 269 | thresholding_preserve_diagonal=True, 270 | symmetrize_type=SymmetrizeType.Average, 271 | refinement_sequence=refinement_sequence) 272 | constraint_options = constraint.ConstraintOptions( 273 | constraint_name=ConstraintName.AffinityIntegration, 274 | apply_before_refinement=False, 275 | integration_type=IntegrationType.Max) 276 | clusterer = spectral_clusterer.SpectralClusterer( 277 | max_clusters=2, 278 | refinement_options=refinement_options, 279 | constraint_options=constraint_options, 280 | laplacian_type=LaplacianType.GraphCut, 281 | row_wise_renorm=True) 282 | labels = clusterer.predict(matrix, constraint_matrix) 283 | labels = utils.enforce_ordered_labels(labels) 284 | expected = np.array([0, 0, 1, 1, 1, 1]) 285 | np.testing.assert_equal(expected, labels) 286 | 287 | def test_6by2_matrix_constraint_propagation(self): 288 | matrix = np.array([ 289 | [1.0, 0.0], 290 | [1.1, 0.1], 291 | [0.0, 1.0], 292 | [0.1, 1.0], 293 | [0.9, -0.1], 294 | [0.0, 1.2], 295 | ]) 296 | 297 | constraint_matrix = np.array([ 298 | [1, 1, 0, 0, 0, 0], 299 | [1, 1, 0, 0, 0, 0], 300 | [0, 0, 1, 0, 0, 0], 301 | [0, 0, 0, 1, 0, 0], 302 | [0, 0, 0, 0, 1, -1], 303 | [0, 0, 0, 0, -1, 1], 304 | ]) 305 | refinement_sequence = [ 306 | RefinementName.RowWiseThreshold, RefinementName.Symmetrize 307 | ] 308 | refinement_options = refinement.RefinementOptions( 309 | p_percentile=0.95, 310 | thresholding_type=ThresholdType.Percentile, 311 | thresholding_with_binarization=True, 312 | thresholding_preserve_diagonal=True, 313 | symmetrize_type=SymmetrizeType.Average, 314 | refinement_sequence=refinement_sequence) 315 | constraint_options = constraint.ConstraintOptions( 316 | constraint_name=ConstraintName.ConstraintPropagation, 317 | apply_before_refinement=True, 318 | constraint_propagation_alpha=0.6) 319 | clusterer = spectral_clusterer.SpectralClusterer( 320 | max_clusters=2, 321 | refinement_options=refinement_options, 322 | constraint_options=constraint_options, 323 | laplacian_type=LaplacianType.GraphCut, 324 | row_wise_renorm=True) 325 | labels = clusterer.predict(matrix, constraint_matrix) 326 | labels = utils.enforce_ordered_labels(labels) 327 | expected = np.array([0, 0, 1, 1, 0, 1]) 328 | np.testing.assert_equal(expected, labels) 329 | 330 | def test_6by2_matrix_single_cluster(self): 331 | matrix = np.array([ 332 | [1.0, 0.0], 333 | [1.1, 0.1], 334 | [1.0, 0.0], 335 | [1.1, 0.0], 336 | [0.9, -0.1], 337 | [1.0, 0.2], 338 | ]) 339 | refinement_options = refinement.RefinementOptions( 340 | gaussian_blur_sigma=0, 341 | p_percentile=0.95, 342 | refinement_sequence=ICASSP2018_REFINEMENT_SEQUENCE) 343 | clusterer = spectral_clusterer.SpectralClusterer( 344 | min_clusters=1, 345 | refinement_options=refinement_options) 346 | labels = clusterer.predict(matrix) 347 | labels = utils.enforce_ordered_labels(labels) 348 | expected = np.array([0, 0, 0, 0, 0, 0]) 349 | np.testing.assert_equal(expected, labels) 350 | 351 | def test_6by2_matrix_single_cluster_all_affinity(self): 352 | matrix = np.array([ 353 | [1.0, 0.0], 354 | [1.1, 0.1], 355 | [1.0, 0.0], 356 | [1.1, 0.0], 357 | [0.9, -0.1], 358 | [1.0, 0.5], 359 | ]) 360 | # High threshold. 361 | fallback_options = FallbackOptions( 362 | single_cluster_condition=SingleClusterCondition.AllAffinity, 363 | single_cluster_affinity_threshold=0.93) 364 | clusterer = spectral_clusterer.SpectralClusterer( 365 | min_clusters=1, 366 | laplacian_type=LaplacianType.GraphCut, 367 | refinement_options=None, 368 | fallback_options=fallback_options) 369 | labels = clusterer.predict(matrix) 370 | labels = utils.enforce_ordered_labels(labels) 371 | expected = np.array([0, 0, 0, 0, 0, 1]) 372 | np.testing.assert_equal(expected, labels) 373 | 374 | # Low threshold. 375 | fallback_options = FallbackOptions( 376 | single_cluster_condition=SingleClusterCondition.AllAffinity, 377 | single_cluster_affinity_threshold=0.91) 378 | clusterer = spectral_clusterer.SpectralClusterer( 379 | min_clusters=1, 380 | laplacian_type=LaplacianType.GraphCut, 381 | refinement_options=None, 382 | fallback_options=fallback_options) 383 | labels = clusterer.predict(matrix) 384 | labels = utils.enforce_ordered_labels(labels) 385 | expected = np.array([0, 0, 0, 0, 0, 0]) 386 | np.testing.assert_equal(expected, labels) 387 | 388 | def test_6by2_matrix_single_cluster_neighbor_affinity(self): 389 | matrix = np.array([ 390 | [1.0, 0.0], 391 | [1.1, 0.1], 392 | [1.0, 0.0], 393 | [1.0, 0.5], 394 | [1.1, 0.0], 395 | [0.9, -0.1], 396 | ]) 397 | # High threshold. 398 | fallback_options = FallbackOptions( 399 | single_cluster_condition=SingleClusterCondition.NeighborAffinity, 400 | single_cluster_affinity_threshold=0.96) 401 | clusterer = spectral_clusterer.SpectralClusterer( 402 | min_clusters=1, 403 | laplacian_type=LaplacianType.GraphCut, 404 | refinement_options=None, 405 | fallback_options=fallback_options) 406 | labels = clusterer.predict(matrix) 407 | labels = utils.enforce_ordered_labels(labels) 408 | expected = np.array([0, 0, 0, 1, 0, 0]) 409 | np.testing.assert_equal(expected, labels) 410 | 411 | # Low threshold. 412 | fallback_options = FallbackOptions( 413 | single_cluster_condition=SingleClusterCondition.NeighborAffinity, 414 | single_cluster_affinity_threshold=0.94) 415 | clusterer = spectral_clusterer.SpectralClusterer( 416 | min_clusters=1, 417 | laplacian_type=LaplacianType.GraphCut, 418 | refinement_options=None, 419 | fallback_options=fallback_options) 420 | labels = clusterer.predict(matrix) 421 | labels = utils.enforce_ordered_labels(labels) 422 | expected = np.array([0, 0, 0, 0, 0, 0]) 423 | np.testing.assert_equal(expected, labels) 424 | 425 | def test_6by2_matrix_single_cluster_affinity_std(self): 426 | matrix = np.array([ 427 | [1.0, 0.0], 428 | [1.1, 0.1], 429 | [1.0, 0.0], 430 | [1.0, 0.5], 431 | [1.1, 0.0], 432 | [0.9, -0.1], 433 | ]) 434 | # Low threshold. 435 | fallback_options = FallbackOptions( 436 | single_cluster_condition=SingleClusterCondition.AffinityStd, 437 | single_cluster_affinity_threshold=0.02) 438 | clusterer = spectral_clusterer.SpectralClusterer( 439 | min_clusters=1, 440 | laplacian_type=LaplacianType.GraphCut, 441 | refinement_options=None, 442 | fallback_options=fallback_options) 443 | labels = clusterer.predict(matrix) 444 | labels = utils.enforce_ordered_labels(labels) 445 | expected = np.array([0, 0, 0, 1, 0, 0]) 446 | np.testing.assert_equal(expected, labels) 447 | 448 | # High threshold. 449 | fallback_options = FallbackOptions( 450 | single_cluster_condition=SingleClusterCondition.AffinityStd, 451 | single_cluster_affinity_threshold=0.03) 452 | clusterer = spectral_clusterer.SpectralClusterer( 453 | min_clusters=1, 454 | laplacian_type=LaplacianType.GraphCut, 455 | refinement_options=None, 456 | fallback_options=fallback_options) 457 | labels = clusterer.predict(matrix) 458 | labels = utils.enforce_ordered_labels(labels) 459 | expected = np.array([0, 0, 0, 0, 0, 0]) 460 | np.testing.assert_equal(expected, labels) 461 | 462 | def test_6by2_matrix_single_cluster_fallback_naive(self): 463 | matrix = np.array([ 464 | [1.0, 0.0], 465 | [1.1, 0.1], 466 | [1.0, 0.0], 467 | [1.0, 0.5], 468 | [1.1, 0.0], 469 | [0.9, -0.1], 470 | ]) 471 | # High threshold. 472 | fallback_options = FallbackOptions( 473 | single_cluster_condition=SingleClusterCondition.FallbackClusterer, 474 | fallback_clusterer_type=FallbackClustererType.Naive, 475 | naive_threshold=0.95) 476 | clusterer = spectral_clusterer.SpectralClusterer( 477 | min_clusters=1, 478 | laplacian_type=LaplacianType.GraphCut, 479 | refinement_options=None, 480 | fallback_options=fallback_options) 481 | labels = clusterer.predict(matrix) 482 | labels = utils.enforce_ordered_labels(labels) 483 | expected = np.array([0, 0, 0, 1, 0, 0]) 484 | np.testing.assert_equal(expected, labels) 485 | 486 | # Low threshold. 487 | fallback_options = FallbackOptions( 488 | single_cluster_condition=SingleClusterCondition.FallbackClusterer, 489 | fallback_clusterer_type=FallbackClustererType.Naive, 490 | naive_threshold=0.9) 491 | clusterer = spectral_clusterer.SpectralClusterer( 492 | min_clusters=1, 493 | laplacian_type=LaplacianType.GraphCut, 494 | refinement_options=None, 495 | fallback_options=fallback_options) 496 | labels = clusterer.predict(matrix) 497 | labels = utils.enforce_ordered_labels(labels) 498 | expected = np.array([0, 0, 0, 0, 0, 0]) 499 | np.testing.assert_equal(expected, labels) 500 | 501 | 502 | if __name__ == "__main__": 503 | unittest.main() 504 | -------------------------------------------------------------------------------- /tests/utils_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import numpy as np 4 | from spectralcluster import utils 5 | 6 | 7 | class TestComputeAffinityMatrix(unittest.TestCase): 8 | """Tests for the compute_affinity_matrix function.""" 9 | 10 | def test_4by2_matrix(self): 11 | matrix = np.array([[3, 4], [-4, 3], [6, 8], [-3, -4]]) 12 | affinity = utils.compute_affinity_matrix(matrix) 13 | expected = np.array([[1, 0.5, 1, 0], [0.5, 1, 0.5, 0.5], [1, 0.5, 1, 0], 14 | [0, 0.5, 0, 1]]) 15 | np.testing.assert_equal(expected, affinity) 16 | 17 | 18 | class TestComputeSortedEigenvectors(unittest.TestCase): 19 | """Tests for the compute_sorted_eigenvectors function.""" 20 | 21 | def test_3by2_matrix(self): 22 | matrix = np.array([[1, 2], [3, 4], [1, 3]]) 23 | affinity = utils.compute_affinity_matrix(matrix) 24 | w, v = utils.compute_sorted_eigenvectors(affinity) 25 | self.assertEqual((3,), w.shape) 26 | self.assertEqual((3, 3), v.shape) 27 | self.assertGreater(w[0], w[1]) 28 | self.assertGreater(w[1], w[2]) 29 | 30 | def test_ascend(self): 31 | matrix = np.array([[1, 2], [3, 4], [1, 3]]) 32 | affinity = utils.compute_affinity_matrix(matrix) 33 | w, v = utils.compute_sorted_eigenvectors(affinity, descend=False) 34 | self.assertEqual((3,), w.shape) 35 | self.assertEqual((3, 3), v.shape) 36 | self.assertLess(w[0], w[1]) 37 | self.assertLess(w[1], w[2]) 38 | 39 | 40 | class TestComputeNumberOfClusters(unittest.TestCase): 41 | """Tests for the compute_number_of_clusters function.""" 42 | 43 | def test_5_values(self): 44 | eigenvalues = np.array([1.0, 0.9, 0.8, 0.2, 0.1]) 45 | result, max_delta_norm = utils.compute_number_of_clusters(eigenvalues) 46 | self.assertEqual(3, result) 47 | np.testing.assert_allclose(4.0, max_delta_norm, atol=0.01) 48 | 49 | def test_max_clusters(self): 50 | max_clusters = 2 51 | eigenvalues = np.array([1.0, 0.9, 0.8, 0.7, 0.6, 0.5]) 52 | 53 | result_1, max_delta_norm_1 = utils.compute_number_of_clusters(eigenvalues) 54 | self.assertEqual(5, result_1) 55 | np.testing.assert_allclose(1.2, max_delta_norm_1, atol=0.01) 56 | 57 | result_2, max_delta_norm_2 = utils.compute_number_of_clusters( 58 | eigenvalues, max_clusters=max_clusters) 59 | self.assertEqual(max_clusters, result_2) 60 | np.testing.assert_allclose(1.125, max_delta_norm_2, atol=0.01) 61 | 62 | def test_ascend(self): 63 | eigenvalues = np.array([1.0, 0.9, 0.8, 0.2, 0.1]) 64 | result, max_delta_norm = utils.compute_number_of_clusters( 65 | eigenvalues, max_clusters=3, descend=False) 66 | self.assertEqual(2, result) 67 | np.testing.assert_allclose(0.88, max_delta_norm, atol=0.01) 68 | 69 | 70 | class TestEnforceOrderedLabels(unittest.TestCase): 71 | """Tests for the enforce_ordered_labels function.""" 72 | 73 | def test_small_array(self): 74 | labels = np.array([2, 2, 1, 0, 3, 3, 1]) 75 | expected = np.array([0, 0, 1, 2, 3, 3, 1]) 76 | result = utils.enforce_ordered_labels(labels) 77 | np.testing.assert_equal(expected, result) 78 | 79 | 80 | class TestGetClusterCentroids(unittest.TestCase): 81 | """Tests for the def get_cluster_centroids function.""" 82 | 83 | def test_get_centroids(self): 84 | embeddings = np.array([ 85 | [1, 2], 86 | [3, 4], 87 | [5, 6], 88 | [7, 8], 89 | [9, 0]]) 90 | 91 | labels = np.array([0, 1, 1, 2, 0]) 92 | 93 | expected = np.array([ 94 | [5, 1], 95 | [4, 5], 96 | [7, 8]]) 97 | 98 | centroids = utils.get_cluster_centroids(embeddings, labels) 99 | 100 | np.testing.assert_equal(expected, centroids) 101 | 102 | 103 | class TestChainLabels(unittest.TestCase): 104 | """Tests for the chain_labels function.""" 105 | 106 | def test_chain(self): 107 | pre_labels = np.array([ 108 | 0, 0, 1, 1, 2, 3, 1, 1, 5, 4, 109 | ]) 110 | main_labels = np.array([ 111 | 0, 0, 1, 1, 1, 1, 112 | ]) 113 | expected = np.array([ 114 | 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 115 | ]) 116 | results = utils.chain_labels(pre_labels, main_labels) 117 | np.testing.assert_equal(expected, results) 118 | 119 | def test_bad_shape(self): 120 | pre_labels = np.array([ 121 | 0, 0, 1, 1, 2, 3, 1, 1, 5, 4, 122 | ]) 123 | main_labels = np.array([ 124 | 0, 0, 1, 1, 1, 125 | ]) 126 | with self.assertRaises(ValueError): 127 | utils.chain_labels(pre_labels, main_labels) 128 | 129 | 130 | if __name__ == "__main__": 131 | unittest.main() 132 | --------------------------------------------------------------------------------