├── run_tests.sh ├── tests ├── __init__.py ├── test_notypical.py └── test_typical.py ├── examples ├── __init__.py └── fcm_2d.py ├── run_coverage.sh ├── requirements.txt ├── fuzzycmeans ├── __init__.py ├── visualization.py └── fuzzy_clustering.py ├── README.md ├── setup.py ├── .gitignore └── LICENSE /run_tests.sh: -------------------------------------------------------------------------------- 1 | pytest tests 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | -------------------------------------------------------------------------------- /examples/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('..') 3 | -------------------------------------------------------------------------------- /run_coverage.sh: -------------------------------------------------------------------------------- 1 | coverage run -m pytest tests 2 | coverage report -m --include="fuzzycmeans/*" 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | coverage==4.5.1 2 | pytest==7.1.2 3 | numpy==1.21.0 4 | six==1.11.0 5 | bokeh==0.12.16 6 | -------------------------------------------------------------------------------- /fuzzycmeans/__init__.py: -------------------------------------------------------------------------------- 1 | name = "fuzzycmeans" 2 | try: 3 | from .fuzzy_clustering import FCM 4 | except: 5 | from fuzzy_clustering import FCM 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fuzzy-c-means 2 | Fuzzy c-means Clustering 3 | 4 | 5 | ## Description 6 | This implementation is based on the paper 7 | **FCM: The fuzzy c-means clustering algorithm** by: *James C.Bezdek, Robert Ehrlich, and William Full* 8 | 9 | ## To run the tests 10 | `sh run_tests.sh` 11 | 12 | ## To run the coverage 13 | `sh run_coverage.sh` 14 | 15 | ## Install via pip 16 | ```pip install fuzzycmeans``` 17 | 18 | 19 | ## How to use it 20 | 1. Fit the model. This is to cluster any given data *X*. 21 | ```Python 22 | X = np.array([[1, 1], [1, 2], [2, 2], [0, 0], [0, 0]]) 23 | fcm = FCM(n_clusters=3, max_iter=1) 24 | fcm.fit(X, [0, 0, 0, 1, 2]) 25 | ``` 26 | 2. (Optional.) Use the model to assign new data points to existing clusters. Note that the predict function would return the membership as this a fuzzy clustering. 27 | ```Python 28 | Y = np.array([[1, 2], [2, 2], [3, 1], [2, 1], [6, 8]]) 29 | membership = fcm.predict(Y) 30 | ``` 31 | 32 | 33 | -------------------------------------------------------------------------------- /tests/test_notypical.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from fuzzycmeans import FCM 4 | 5 | 6 | def single_known_cluster(): 7 | X = np.array([[1, 1], [1, 2], [2, 2]]) 8 | fcm = FCM(n_clusters=3, max_iter=1) 9 | fcm.fit(X, [0, 0, 0]) 10 | 11 | 12 | def fit_from_centroids(): 13 | data_test = [178.75, 5.97390157] 14 | data_height = [170., 186., 182., 177.] 15 | data_size = [40, 42, 45, 47, 50] 16 | data_injuries = [3, 4, 1, 0] 17 | centers = [[175., 4.08], 18 | [44.8, 3.54], 19 | [2., 1.58]] 20 | fcm = FCM(n_clusters=3, max_iter=1) 21 | fcm.cluster_centers_ = np.array(centers) 22 | fcm.fit(fcm.cluster_centers_, [0, 1, 2]) 23 | membership_height = fcm.predict(np.array([data_test])) 24 | membership_height = fcm.predict(np.array([[np.average(data_height), np.std(data_height)]])) 25 | membership_size = fcm.predict(np.array([[np.average(data_size), np.std(data_size)]])) 26 | membership_injuries = fcm.predict(np.array([[np.average(data_injuries), np.std(data_injuries)]])) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | import pathlib 3 | 4 | 5 | # The directory containing this file 6 | HERE = pathlib.Path(__file__).parent 7 | 8 | 9 | # The text of the README file 10 | with open("README.md") as f: 11 | README = f.read() 12 | lines = README.split('\n') 13 | desc_lines = [line for line in lines if line[:2] != "[!"] 14 | README = "\n".join(desc_lines) 15 | 16 | 17 | setuptools.setup( 18 | name="fuzzycmeans", 19 | version="1.0.4", 20 | author="Ahmad Alobaid", 21 | author_email="aalobaid@fi.upm.es", 22 | description="Fuzzy c-means according to the research paper by James C. Bezdek et. al", 23 | long_description=README, 24 | long_description_content_type="text/markdown", 25 | url="https://github.com/oeg-upm/fuzzy-c-means", 26 | packages=setuptools.find_packages(), 27 | classifiers=[ 28 | "Programming Language :: Python :: 3.9", 29 | "License :: OSI Approved :: Apache Software License", 30 | "Operating System :: OS Independent", 31 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 32 | "Topic :: Software Development :: Libraries", 33 | ], 34 | install_requires=[ 35 | 'numpy', 'six', 'bokeh' 36 | ], 37 | ) 38 | -------------------------------------------------------------------------------- /examples/fcm_2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging 3 | 4 | import sys 5 | sys.path.append('..') 6 | 7 | 8 | from fuzzycmeans import FCM 9 | from fuzzycmeans.visualization import draw_model_2d 10 | 11 | 12 | def example(): 13 | X = np.array([[1, 1], [1, 2], [2, 2], [9, 10], [10, 10], [10, 9], [9, 9], [20,20]]) 14 | fcm = FCM(n_clusters=3) 15 | fcm.fit(X, [0, 0, 0, 1, 1, 1, 1, 2]) 16 | # fcm.fit(X) 17 | testing_data = np.array([[0, 1.9], [5, 3], [4, 4], [8, 9], [9.5, 6.5], [5, 5], [15,15], [12,12], [14,14], [19,10]]) 18 | predicted_membership = fcm.predict(testing_data) 19 | print("\n\ntesting data") 20 | print(testing_data) 21 | print("predicted membership") 22 | print(predicted_membership) 23 | print("\n\n") 24 | draw_model_2d(fcm, data=testing_data, membership=predicted_membership) 25 | # draw_model_2d(fcm, data=X, membership=fcm.u) 26 | 27 | 28 | def example_single_known_zero_filled(): 29 | X = np.array([[1, 1], [1, 2], [2, 2], [0, 0], [0, 0]]) 30 | fcm = FCM(n_clusters=3, max_iter=1) 31 | fcm.fit(X, [0, 0, 0, 1, 2]) 32 | draw_model_2d(fcm, data=X, membership=fcm.u) 33 | 34 | 35 | def example_single_known(): 36 | X = np.array([[1, 1], [1, 2], [2, 2], [0, 0], [0, 0]]) 37 | fcm = FCM(n_clusters=3, max_iter=1) 38 | fcm.fit(X, [0, 0, 0, 1, 2]) 39 | draw_model_2d(fcm, data=X, membership=fcm.u) 40 | print(fcm.u) 41 | 42 | 43 | # example() 44 | example_single_known() 45 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | output.html 3 | 4 | # pycharm 5 | .idea* 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | *.log 62 | local_settings.py 63 | db.sqlite3 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # pyenv 82 | .python-version 83 | 84 | # celery beat schedule file 85 | celerybeat-schedule 86 | 87 | # SageMath parsed files 88 | *.sage.py 89 | 90 | # Environments 91 | .env 92 | .venv 93 | env/ 94 | venv/ 95 | ENV/ 96 | env.bak/ 97 | venv.bak/ 98 | 99 | # Spyder project settings 100 | .spyderproject 101 | .spyproject 102 | 103 | # Rope project settings 104 | .ropeproject 105 | 106 | # mkdocs documentation 107 | /site 108 | 109 | # mypy 110 | .mypy_cache/ 111 | -------------------------------------------------------------------------------- /tests/test_typical.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from fuzzycmeans import FCM 5 | 6 | 7 | def test_2d(): 8 | X = np.array([[1, 1], [1, 2], [2, 2], [9, 10], [10, 10], [10, 9], [9, 9]]) 9 | fcm = FCM() 10 | fcm.fit(X, [0, 0, 0, 1, 1, 1, 1]) 11 | assert len(fcm.cluster_centers_) == 2 12 | assert fcm.cluster_centers_[0][0] == pytest.approx(1.33333333, 0.1) 13 | assert fcm.cluster_centers_[0][1] == pytest.approx(1.66666667, 0.1) 14 | testing_data = np.array([[0, 1.9], [3, 3], [4, 4], [8, 9], [9.5, 6.5]]) 15 | predicted_membership = fcm.predict(testing_data) 16 | actual_membership = np.array([[0.98777232, 0.01222768], 17 | [0.94884591, 0.05115409], 18 | [0.82813688, 0.17186312], 19 | [0.02482074, 0.97517926], 20 | [0.0908581, 0.9091419]]) 21 | assert predicted_membership == pytest.approx(actual_membership, 0.01) 22 | 23 | 24 | def test_3d_2clus(): 25 | X = np.array([[1, 1, 1], [1, 2, 2], [2, 2, 2], [9, 10, 8], [10, 10, 10], [10, 9, 9], [9, 9, 9]]) 26 | fcm = FCM() 27 | fcm.fit(X, [0, 0, 0, 1, 1, 1, 1]) 28 | assert len(fcm.cluster_centers_) == 2 29 | # assert fcm.cluster_centers_[0][0] == pytest.approx(1.33333333, 0.1) 30 | # assert fcm.cluster_centers_[0][1] == pytest.approx(1.66666667, 0.1) 31 | testing_data = np.array([[0, 1.9, 1], [3, 3, 3], [4, 4, 4], [8, 9, 9], [9.5, 6.5, 8]]) 32 | predicted_membership = fcm.predict(testing_data) 33 | actual_membership = np.array([[0.98777232, 0.01222768], 34 | [0.94884591, 0.05115409], 35 | [0.82813688, 0.17186312], 36 | [0.02482074, 0.97517926], 37 | [0.0908581, 0.9091419]]) 38 | # assert predicted_membership == pytest.approx(actual_membership, 0.01) 39 | 40 | 41 | def test_3d_3clus(): 42 | X = np.array([[1, 1, 1], [1, 2, 2], [2, 2, 2], [9, 10, 8], [10, 10, 10], [10, 9, 9], [9, 9, 9], [20, 20, 20]]) 43 | fcm = FCM(n_clusters=3) 44 | fcm.fit(X, [0, 0, 0, 1, 1, 1, 1, 2]) 45 | assert len(fcm.cluster_centers_) == 3 46 | # assert fcm.cluster_centers_[0][0] == pytest.approx(1.33333333, 0.1) 47 | # assert fcm.cluster_centers_[0][1] == pytest.approx(1.66666667, 0.1) 48 | testing_data = np.array([[0, 1.9, 1], [3, 3, 3], [4, 4, 4], [8, 9, 9], [9.5, 6.5, 8]]) 49 | predicted_membership = fcm.predict(testing_data) 50 | actual_membership = np.array([[0.98777232, 0.01222768], 51 | [0.94884591, 0.05115409], 52 | [0.82813688, 0.17186312], 53 | [0.02482074, 0.97517926], 54 | [0.0908581, 0.9091419]]) 55 | # assert predicted_membership == pytest.approx(actual_membership, 0.01) -------------------------------------------------------------------------------- /fuzzycmeans/visualization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.random import random 3 | from bokeh.plotting import figure, show, output_file 4 | from bokeh.palettes import Category20 as pallette 5 | import itertools 6 | try: 7 | from fuzzycmeans import FCM 8 | except: 9 | from fuzzy_clustering import FCM 10 | import logging 11 | 12 | 13 | def color_gen(): 14 | for c in itertools.cycle(pallette[10]): 15 | yield c 16 | 17 | 18 | colors = color_gen() 19 | 20 | 21 | def example(): 22 | def mscatter(p, x, y, marker): 23 | p.scatter(x, y, marker=marker, size=15, 24 | line_color="navy", fill_color="orange", alpha=0.5) 25 | 26 | def mtext(p, x, y, text): 27 | p.text(x, y, text=[text], 28 | text_color="firebrick", text_align="center", text_font_size="10pt") 29 | 30 | p = figure(title="Bokeh Markers", toolbar_location=None) 31 | p.grid.grid_line_color = None 32 | p.background_fill_color = "#eeeeee" 33 | 34 | N = 10 35 | 36 | mscatter(p, random(N) + 2, random(N) + 1, "circle") 37 | mscatter(p, random(N) + 4, random(N) + 1, "square") 38 | mscatter(p, random(N) + 6, random(N) + 1, "triangle") 39 | mscatter(p, random(N) + 8, random(N) + 1, "asterisk") 40 | 41 | mscatter(p, random(N) + 2, random(N) + 4, "circle_x") 42 | mscatter(p, random(N) + 4, random(N) + 4, "square_x") 43 | mscatter(p, random(N) + 6, random(N) + 4, "inverted_triangle") 44 | mscatter(p, random(N) + 8, random(N) + 4, "x") 45 | 46 | mscatter(p, random(N) + 2, random(N) + 7, "circle_cross") 47 | mscatter(p, random(N) + 4, random(N) + 7, "square_cross") 48 | mscatter(p, random(N) + 6, random(N) + 7, "diamond") 49 | mscatter(p, random(N) + 8, random(N) + 7, "cross") 50 | 51 | mtext(p, 2.5, 0.5, "circle / o") 52 | mtext(p, 4.5, 0.5, "square") 53 | mtext(p, 6.5, 0.5, "triangle") 54 | mtext(p, 8.5, 0.5, "asterisk / *") 55 | 56 | mtext(p, 2.5, 3.5, "circle_x / ox") 57 | mtext(p, 4.5, 3.5, "square_x") 58 | mtext(p, 6.5, 3.5, "inverted_triangle") 59 | mtext(p, 8.5, 3.5, "x") 60 | 61 | mtext(p, 2.5, 6.5, "circle_cross / o+") 62 | mtext(p, 4.5, 6.5, "square_cross") 63 | mtext(p, 6.5, 6.5, "diamond") 64 | mtext(p, 8.5, 6.5, "cross / +") 65 | 66 | output_file("markers.html", title="markers.py example") 67 | 68 | show(p) # open a browser 69 | 70 | 71 | def draw_model_2d(model, data=None, membership=None, show_figure=True): 72 | title = "draw FCM model" 73 | fig = figure(title=title, toolbar_location=None) 74 | fig.grid.grid_line_color = None 75 | fig.background_fill_color = "#eeeeee" 76 | output_p = None 77 | for clus, cc_color in enumerate(zip(model.cluster_centers_, color_gen())): 78 | cc, color = cc_color 79 | fig = draw_points_2d(np.array([cc]), fig=fig, title=title, marker="diamond", size=15, 80 | line_color="navy", fill_color=color, alpha=1.0) 81 | if data is not None and membership is not None: 82 | for idx, data_point in enumerate(data): 83 | # print idx 84 | # print clus 85 | print(membership[idx][clus]) 86 | fig = draw_points_2d(np.array([data_point]), fig=fig, title=title, marker="circle", size=10, 87 | line_color="navy", fill_color=color, alpha=membership[idx][clus]) 88 | if show_figure: 89 | show(fig) 90 | return fig 91 | 92 | 93 | def draw_points_2d(points, fig=None, title="figure 123", **kwargs): 94 | if fig is None: 95 | fig = figure(title=title, toolbar_location=None) 96 | fig.grid.grid_line_color = None 97 | fig.background_fill_color = "#eeeeee" 98 | x, y = points.T 99 | fig.scatter(x, y, **kwargs) 100 | output_file("output.html", title=title + " of outputfile") 101 | return fig 102 | 103 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /fuzzycmeans/fuzzy_clustering.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import random 4 | import logging 5 | 6 | import sys 7 | if sys.version_info >= (3, 0): 8 | xrange = range 9 | 10 | 11 | SMALL_VALUE = 0.00001 12 | 13 | 14 | class FCM: 15 | """ 16 | This algorithm is from the paper 17 | "FCM: The fuzzy c-means clustering algorithm" by James Bezdek 18 | Here we will use the Euclidean distance 19 | 20 | Pseudo code: 21 | 1) Fix c, m, A 22 | c: n_clusters 23 | m: 2 by default 24 | A: we are using Euclidean distance, so we don't need it actually 25 | 2) compute the means (cluster centers) 26 | 3) update the membership matrix 27 | 4) compare the new membership with the old one, is difference is less than a threshold, stop. otherwise 28 | return to step 2) 29 | """ 30 | 31 | def __init__(self, n_clusters=2, m=2, max_iter=10, logger=None): 32 | self.n_clusters = n_clusters 33 | self.cluster_centers_ = None 34 | self.u = None # The membership 35 | self.m = m # the fuzziness, m=1 is hard not fuzzy. see the paper for more info 36 | self.max_iter = max_iter 37 | if logger is None: 38 | self.logger = logging.getLogger(__name__) 39 | self.logger.addHandler(logging.NullHandler()) 40 | else: 41 | self.logger = logger 42 | 43 | def init_membership(self, num_of_points): 44 | self.init_membership_random(num_of_points) 45 | 46 | def init_membership_equal(self, num_of_points): 47 | """ 48 | :param num_of_points: 49 | :return: nothing 50 | 51 | # In the below for loop, due to the rounding to 2 decimals, you may think that the membership sum for 52 | # a point can be larger than 1. this can happen if number of clusters is larger than 10. 53 | # mathematical proof that this can happen: 54 | # (1) --- max_error per point membership to a single cluster is 0.01 (because of the rounding to 2 decimal 55 | # points). 56 | # (2) --- (c-1) * 0.01 >= 1/c 57 | # (3) --- c^2 - c >= 1 58 | # solving for c we get c = 10.51 (approx.) 59 | # so when c >= 11, this error may occur. 60 | 61 | But I added a check below to prevent such a thing from happening 62 | """ 63 | self.u = np.zeros((num_of_points, self.n_clusters)) 64 | for i in xrange(num_of_points): 65 | row_sum = 0.0 66 | for c in xrange(self.n_clusters): 67 | if c == self.n_clusters-1: # last iteration 68 | self.u[i][c] = 1 - row_sum 69 | else: 70 | rand_num = round(1.0/self.n_clusters, 2) 71 | if rand_num + row_sum >= 1.0: # to prevent membership sum for a point to be larger than 1.0 72 | if rand_num + row_sum - 0.01 >= 1.0: 73 | self.logger.error('Something is not right in the init_membership') 74 | return None 75 | else: 76 | self.u[i][c] = rand_num - 0.01 77 | else: 78 | self.u[i][c] = rand_num 79 | row_sum += self.u[i][c] 80 | 81 | def init_membership_random(self, num_of_points): 82 | """ 83 | :param num_of_points: 84 | :return: nothing 85 | """ 86 | self.u = np.zeros((num_of_points, self.n_clusters)) 87 | for i in xrange(num_of_points): 88 | row_sum = 0.0 89 | for c in xrange(self.n_clusters): 90 | if c == self.n_clusters-1: # last iteration 91 | self.u[i][c] = 1.0 - row_sum 92 | else: 93 | rand_clus = random.randint(0, self.n_clusters-1) 94 | rand_num = random.random() 95 | rand_num = round(rand_num, 2) 96 | if rand_num + row_sum <= 1.0: # to prevent membership sum for a point to be larger than 1.0 97 | self.u[i][rand_clus] = rand_num 98 | row_sum += self.u[i][rand_clus] 99 | 100 | def compute_cluster_centers(self, X, update_func=None): 101 | """ 102 | :param X: 103 | :return: 104 | 105 | vi = (sum of membership for cluster i ^ m * x ) / sum of membership for cluster i ^ m : for each cluster i 106 | 107 | """ 108 | num_of_points = X.shape[0] 109 | num_of_features = X.shape[1] 110 | centers = [] 111 | if update_func is None: 112 | for c in xrange(self.n_clusters): 113 | sum1_vec = np.zeros(num_of_features) 114 | sum2_vec = 0.0 115 | for i in xrange(num_of_points): 116 | interm1 = (self.u[i][c] ** self.m) 117 | interm2 = interm1 * X[i] 118 | sum1_vec += interm2 119 | sum2_vec += interm1 120 | if np.any(np.isnan(sum1_vec)): 121 | self.logger.debug("compute_cluster_centers> interm1 %s" % str(interm1)) 122 | self.logger.debug("compute_cluster_centers> interm2 %s" % str(interm2)) 123 | self.logger.debug("compute_cluster_centers> X[%d] %s" % (i, str(X[i]))) 124 | self.logger.debug("compute_cluster_centers> loop sum1_vec %s" % str(sum1_vec)) 125 | self.logger.debug("compute_cluster_centers> loop sum2_vec %s" % str(sum2_vec)) 126 | self.logger.debug("X: [%d] %s" % (i-1, X[i-1])) 127 | self.logger.debug("X: [%d] %s" % (i+1, X[i+1])) 128 | self.logger.debug("X: ") 129 | self.logger.debug(X) 130 | raise Exception("There is a nan in compute_cluster_centers method if") 131 | if sum2_vec == 0: 132 | sum2_vec = 0.000001 133 | centers.append(sum1_vec/sum2_vec) 134 | else: 135 | for c in xrange(self.n_clusters): 136 | sum1_vec = np.zeros(num_of_features) 137 | sum2_vec = 0.0 138 | for i in xrange(num_of_points): 139 | interm1 = (self.u[i][c] ** self.m) 140 | interm2 = interm1 * X[i] 141 | sum1_vec += interm2 142 | sum2_vec += interm1 143 | if np.any(np.isnan(sum1_vec)): 144 | self.logger.debug("compute_cluster_centers> interm1 %s" % str(interm1)) 145 | self.logger.debug("compute_cluster_centers> interm2 %s" % str(interm2)) 146 | self.logger.debug("compute_cluster_centers> X[%d] %s" % (i, str(X[i]))) 147 | self.logger.debug("compute_cluster_centers> loop sum1_vec %s" % str(sum1_vec)) 148 | self.logger.debug("compute_cluster_centers> loop sum2_vec %s" % str(sum2_vec)) 149 | self.logger.debug("X: [%d] %s" % (i-1, X[i-1])) 150 | self.logger.debug("X: [%d] %s" % (i+1, X[i+1])) 151 | self.logger.debug("X: ") 152 | self.logger.debug(X) 153 | raise Exception("There is a nan in compute_cluster_centers method else") 154 | if sum2_vec == 0: 155 | sum2_vec = 0.000001 156 | centers.append(sum1_vec/sum2_vec) 157 | update_func(int(c * 1.0 / self.n_clusters * 100)) 158 | update_func(100) 159 | 160 | self.cluster_centers_ = centers 161 | return centers 162 | 163 | def distance_squared(self, x, c): 164 | """ 165 | Compute the Euclidean distance 166 | :param x: is a single point from the original data X 167 | :param c: is a single point that represent a center or a cluster 168 | :return: the distance 169 | """ 170 | sum_of_sq = 0.0 171 | for i in xrange(len(x)): 172 | sum_of_sq += (x[i]-c[i]) ** 2 173 | return sum_of_sq 174 | 175 | def compute_membership_single(self, X, datapoint_idx, cluster_idx): 176 | """ 177 | :param datapoint_idx: 178 | :param cluster_idx: 179 | :return: return computer membership for the given ids 180 | """ 181 | clean_X = X 182 | d1 = self.distance_squared(clean_X[datapoint_idx], self.cluster_centers_[cluster_idx]) 183 | sum1 = 0.0 184 | for c in self.cluster_centers_: # this is to compute the sigma 185 | d2 = self.distance_squared(c, clean_X[datapoint_idx]) 186 | if d2 == 0.0: 187 | d2 = SMALL_VALUE 188 | sum1 += (d1/d2) ** (1.0/(self.m-1)) 189 | if np.any(np.isnan(sum1)): 190 | self.logger.debug("nan is found in compute_membership_single") 191 | self.logger.debug("d1: %s" % str(d1)) 192 | self.logger.debug("sum1: %s" % str(sum1)) 193 | self.logger.debug("d2: %s" % str(d2)) 194 | self.logger.debug("c: %s" % str(c)) 195 | self.logger.debug("X[%d] %s" % (datapoint_idx, str(clean_X[datapoint_idx]))) 196 | self.logger.debug("centers: %s" % str(self.cluster_centers_)) 197 | raise Exception("nan is found in computer_memberhip_single method in the inner for") 198 | if sum1 == 0: # because otherwise it will return inf 199 | return 1.0 - SMALL_VALUE 200 | if np.any(np.isnan(sum1 ** -1)): 201 | self.logger.debug("nan is found in compute_membership_single") 202 | self.logger.debug("d1: %s" % str(d1)) 203 | self.logger.debug("sum1: %s" % str(sum1)) 204 | self.logger.debug("X[%d] %s" % (datapoint_idx, str(clean_X[datapoint_idx]))) 205 | self.logger.debug("centers: %s" % str(self.cluster_centers_)) 206 | raise Exception("nan is found in computer_memberhip_single method") 207 | return sum1 ** -1 208 | 209 | def update_membership(self, X): 210 | """ 211 | update the membership matrix 212 | :param X: data points 213 | :return: nothing 214 | 215 | For performance, the distance can be computed once, before the loop instead of computing it every time 216 | """ 217 | for i in xrange(X.shape[0]): 218 | for c in xrange(len(self.cluster_centers_)): 219 | self.u[i][c] = self.compute_membership_single(X, i, c) 220 | 221 | def fit(self, X, y=None, hard=True): 222 | """ 223 | :param X: 224 | :param y: list of clusters or a membership, now only support the hard y list which will generate 225 | the membership 226 | :param hard: whether y contains a list of clusters or a membership matrix 227 | :return: self 228 | """ 229 | X = np.array(X) 230 | if y is not None: 231 | self.logger.debug("fit> y is not None") 232 | y = np.array(y) 233 | if hard: 234 | self.logger.debug("fit> y is a hard membership") 235 | self.set_membership_from_hard_cluster(X, y) 236 | self.logger.debug("fit> the membership is set") 237 | if self.cluster_centers_ is None: 238 | do_compute_cluster_centers = True 239 | self.logger.debug("fit> cluster centers will be computed") 240 | else: 241 | do_compute_cluster_centers = False 242 | self.logger.debug("fit> cluster centers is already set") 243 | if self.u is None: 244 | num_of_points = X.shape[0] 245 | self.init_membership_random(num_of_points) 246 | list_of_centers = [] 247 | # membership_history = [] 248 | # membership_history.append(self.u.copy()) 249 | for i in xrange(self.max_iter): 250 | if do_compute_cluster_centers: 251 | centers = self.compute_cluster_centers(X) 252 | if i == 0: 253 | init_centers = centers 254 | list_of_centers.append(centers) 255 | else: 256 | init_centers = self.cluster_centers_ 257 | list_of_centers = [init_centers] 258 | self.update_membership(X) 259 | # membership_history.append(self.u.copy()) 260 | self.logger.debug("updated membership is: ") 261 | self.logger.debug(self.u) 262 | return self 263 | 264 | def predict(self, X): 265 | if self.u is None: 266 | u = None 267 | else: 268 | u = self.u.copy() 269 | self.u = np.zeros((X.shape[0], self.n_clusters)) 270 | self.update_membership(X) 271 | predicted_u = self.u.copy() 272 | if np.any(np.isnan(predicted_u)): 273 | self.logger.debug("predict> has a nan") 274 | self.logger.debug("u:") 275 | self.logger.debug(u) 276 | raise Exception("There is a nan in predict method") 277 | self.u = u 278 | return predicted_u 279 | 280 | def set_membership_from_hard_cluster(self, X, y): 281 | """ 282 | Computer the membership matrix for each point from the corresponding cluster id in "y" 283 | :param X: input data points 284 | :param y: list of clusters, each correspond to the input data X 285 | :return: None 286 | """ 287 | u = np.zeros((X.shape[0], len(y))) 288 | for clus, md in enumerate(y): 289 | u[md][clus] = 1.0 290 | self.u = u.T 291 | self.logger.debug("set_membership_from_hard_cluster > membership: ") 292 | self.logger.debug(self.u) 293 | num_of_points = X.shape[0] 294 | num_of_features = X.shape[1] 295 | self.logger.debug("num of points: %s" % str(num_of_points)) 296 | self.logger.debug("num of features: %s" % str(num_of_features)) 297 | self.compute_cluster_centers(X, update_func=None) 298 | self.logger.debug("set_membership_from_hard_cluster > cluster centers: ") 299 | self.logger.debug(self.cluster_centers_) 300 | --------------------------------------------------------------------------------