├── .gitattributes
├── .gitignore
├── .spyproject
    └── config
    │   ├── backups
    │       ├── codestyle.ini.bak
    │       ├── encoding.ini.bak
    │       ├── vcs.ini.bak
    │       └── workspace.ini.bak
    │   ├── codestyle.ini
    │   ├── defaults
    │       ├── defaults-codestyle-0.2.0.ini
    │       ├── defaults-encoding-0.2.0.ini
    │       ├── defaults-vcs-0.2.0.ini
    │       └── defaults-workspace-0.2.0.ini
    │   ├── encoding.ini
    │   ├── vcs.ini
    │   └── workspace.ini
├── LICENSE
├── README.md
├── SECURITY.md
├── examples
    ├── svdd_example_KPCA.py
    ├── svdd_example_PSO.py
    ├── svdd_example_confusion_matrix.py
    ├── svdd_example_cross_validation.py
    ├── svdd_example_grid_search.py
    ├── svdd_example_hybrid_data.py
    ├── svdd_example_kernel.py
    └── svdd_example_unlabeled_data.py
├── requirements.txt
└── src
    ├── BaseSVDD.py
    └── __pycache__
        ├── BananaDataset.cpython-38.pyc
        ├── BaseSVDD.cpython-38.pyc
        └── testmodel.cpython-38.pyc


/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/python
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python
  3 | 
  4 | ### Python ###
  5 | # Byte-compiled / optimized / DLL files
  6 | __pycache__/
  7 | *.py[cod]
  8 | *$py.class
  9 | 
 10 | # C extensions
 11 | *.so
 12 | 
 13 | # Distribution / packaging
 14 | .Python
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | share/python-wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | MANIFEST
 32 | 
 33 | # PyInstaller
 34 | #  Usually these files are written by a python script from a template
 35 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 36 | *.manifest
 37 | *.spec
 38 | 
 39 | # Installer logs
 40 | pip-log.txt
 41 | pip-delete-this-directory.txt
 42 | 
 43 | # Unit test / coverage reports
 44 | htmlcov/
 45 | .tox/
 46 | .nox/
 47 | .coverage
 48 | .coverage.*
 49 | .cache
 50 | nosetests.xml
 51 | coverage.xml
 52 | *.cover
 53 | *.py,cover
 54 | .hypothesis/
 55 | .pytest_cache/
 56 | cover/
 57 | 
 58 | # Translations
 59 | *.mo
 60 | *.pot
 61 | 
 62 | # Django stuff:
 63 | *.log
 64 | local_settings.py
 65 | db.sqlite3
 66 | db.sqlite3-journal
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | .pybuilder/
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # IPython
 86 | profile_default/
 87 | ipython_config.py
 88 | 
 89 | # pyenv
 90 | #   For a library or package, you might want to ignore these files since the code is
 91 | #   intended to run in multiple environments; otherwise, check them in:
 92 | # .python-version
 93 | 
 94 | # pipenv
 95 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 96 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 97 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 98 | #   install all needed dependencies.
 99 | #Pipfile.lock
100 | 
101 | # poetry
102 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
103 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
104 | #   commonly ignored for libraries.
105 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
106 | #poetry.lock
107 | 
108 | # pdm
109 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
110 | #pdm.lock
111 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
112 | #   in version control.
113 | #   https://pdm.fming.dev/#use-with-ide
114 | .pdm.toml
115 | 
116 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
117 | __pypackages__/
118 | 
119 | # Celery stuff
120 | celerybeat-schedule
121 | celerybeat.pid
122 | 
123 | # SageMath parsed files
124 | *.sage.py
125 | 
126 | # Environments
127 | .env
128 | .venv
129 | env/
130 | venv/
131 | ENV/
132 | env.bak/
133 | venv.bak/
134 | 
135 | # Spyder project settings
136 | .spyderproject
137 | .spyproject
138 | 
139 | # Rope project settings
140 | .ropeproject
141 | 
142 | # mkdocs documentation
143 | /site
144 | 
145 | # mypy
146 | .mypy_cache/
147 | .dmypy.json
148 | dmypy.json
149 | 
150 | # Pyre type checker
151 | .pyre/
152 | 
153 | # pytype static type analyzer
154 | .pytype/
155 | 
156 | # Cython debug symbols
157 | cython_debug/
158 | 
159 | # PyCharm
160 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
161 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
162 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
163 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
164 | #.idea/
165 | 
166 | ### Python Patch ###
167 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
168 | poetry.toml
169 | 
170 | # ruff
171 | .ruff_cache/
172 | 
173 | # LSP config files
174 | pyrightconfig.json
175 | 
176 | # End of https://www.toptal.com/developers/gitignore/api/python


--------------------------------------------------------------------------------
/.spyproject/config/backups/codestyle.ini.bak:
--------------------------------------------------------------------------------
1 | [codestyle]
2 | indentation = True
3 | edge_line = True
4 | edge_line_columns = 79
5 | 
6 | [main]
7 | version = 0.2.0
8 | 
9 | 


--------------------------------------------------------------------------------
/.spyproject/config/backups/encoding.ini.bak:
--------------------------------------------------------------------------------
1 | [encoding]
2 | text_encoding = utf-8
3 | 
4 | [main]
5 | version = 0.2.0
6 | 
7 | 


--------------------------------------------------------------------------------
/.spyproject/config/backups/vcs.ini.bak:
--------------------------------------------------------------------------------
1 | [vcs]
2 | use_version_control = False
3 | version_control_system = 
4 | 
5 | [main]
6 | version = 0.2.0
7 | 
8 | 


--------------------------------------------------------------------------------
/.spyproject/config/backups/workspace.ini.bak:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | restore_data_on_startup = True
 3 | save_data_on_exit = True
 4 | save_history = True
 5 | save_non_project_files = False
 6 | 
 7 | [main]
 8 | version = 0.2.0
 9 | recent_files = ['src\\BaseSVDD.py']
10 | 
11 | 


--------------------------------------------------------------------------------
/.spyproject/config/codestyle.ini:
--------------------------------------------------------------------------------
1 | [codestyle]
2 | indentation = True
3 | edge_line = True
4 | edge_line_columns = 79
5 | 
6 | [main]
7 | version = 0.2.0
8 | 
9 | 


--------------------------------------------------------------------------------
/.spyproject/config/defaults/defaults-codestyle-0.2.0.ini:
--------------------------------------------------------------------------------
1 | [codestyle]
2 | indentation = True
3 | edge_line = True
4 | edge_line_columns = 79
5 | 
6 | 


--------------------------------------------------------------------------------
/.spyproject/config/defaults/defaults-encoding-0.2.0.ini:
--------------------------------------------------------------------------------
1 | [encoding]
2 | text_encoding = utf-8
3 | 
4 | 


--------------------------------------------------------------------------------
/.spyproject/config/defaults/defaults-vcs-0.2.0.ini:
--------------------------------------------------------------------------------
1 | [vcs]
2 | use_version_control = False
3 | version_control_system = 
4 | 
5 | 


--------------------------------------------------------------------------------
/.spyproject/config/defaults/defaults-workspace-0.2.0.ini:
--------------------------------------------------------------------------------
1 | [workspace]
2 | restore_data_on_startup = True
3 | save_data_on_exit = True
4 | save_history = True
5 | save_non_project_files = False
6 | 
7 | 


--------------------------------------------------------------------------------
/.spyproject/config/encoding.ini:
--------------------------------------------------------------------------------
1 | [encoding]
2 | text_encoding = utf-8
3 | 
4 | [main]
5 | version = 0.2.0
6 | 
7 | 


--------------------------------------------------------------------------------
/.spyproject/config/vcs.ini:
--------------------------------------------------------------------------------
1 | [vcs]
2 | use_version_control = False
3 | version_control_system = 
4 | 
5 | [main]
6 | version = 0.2.0
7 | 
8 | 


--------------------------------------------------------------------------------
/.spyproject/config/workspace.ini:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | restore_data_on_startup = True
 3 | save_data_on_exit = True
 4 | save_history = True
 5 | save_non_project_files = False
 6 | 
 7 | [main]
 8 | version = 0.2.0
 9 | recent_files = []
10 | 
11 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Kepeng Qiu
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/IwtDFH.gif">
  3 | </p>
  4 | 
  5 | <h3 align="center">Support Vector Data Description (SVDD)</h3>
  6 | 
  7 | <p align="center">Python code for abnormal detection or fault detection using Support Vector Data Description (SVDD)</p>
  8 | <p align="center">Version 1.1, 11-NOV-2021</p>
  9 | <p align="center">Email: iqiukp@outlook.com</p>
 10 | 
 11 | <div align=center>
 12 | 
 13 | <img src="https://img.shields.io/github/v/release/iqiukp/SVDD-Python?label=version" />
 14 | <img src="https://img.shields.io/github/repo-size/iqiukp/SVDD-Python" />
 15 | <img src="https://img.shields.io/github/languages/code-size/iqiukp/SVDD-Python" />
 16 | <img src="https://img.shields.io/github/languages/top/iqiukp/SVDD-Python" />
 17 | <img src="https://img.shields.io/github/stars/iqiukp/SVDD-Python" />
 18 | <img src="https://img.shields.io/github/forks/iqiukp/SVDD-Python" />
 19 | </div>
 20 | 
 21 | <hr />
 22 | 
 23 | ## Main features
 24 | 
 25 | - SVDD BaseEstimator based on sklearn.base for one-class or binary classification
 26 | - Multiple kinds of kernel functions (linear, gaussian, polynomial, sigmoid)
 27 | - Visualization of decision boundaries for 2D data
 28 | 
 29 | ## Requirements
 30 | 
 31 | - cvxopt
 32 | - matplotlib
 33 | - numpy
 34 | - scikit_learn
 35 | - scikit-opt (optional, only used for parameter optimization)
 36 | 
 37 | ## Notices
 38 | 
 39 | - The label must be 1 for positive sample or -1 for negative sample. 
 40 | - Detailed applications please see the examples.
 41 | - This code is for reference only.
 42 | 
 43 | ## Examples
 44 | 
 45 | ### 01. svdd_example_unlabeled_data.py
 46 | 
 47 | An example for SVDD model fitting using unlabeled data.
 48 | 
 49 | <p align="center">
 50 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/Iw44fA.png">
 51 |   <img width="60%" height="60%" src="https://z3.ax1x.com/2021/11/11/Iw5Ghd.png">
 52 | </p>
 53 | 
 54 | ### 02. svdd_example_hybrid_data.py
 55 | 
 56 | An example for SVDD model fitting with negataive samples.
 57 | 
 58 | ```Python
 59 | import sys
 60 | sys.path.append("..")
 61 | from sklearn.datasets import load_wine
 62 | from src.BaseSVDD import BaseSVDD, BananaDataset
 63 | 
 64 | # Banana-shaped dataset generation and partitioning
 65 | X, y = BananaDataset.generate(number=100, display='on')
 66 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
 67 | 
 68 | # 
 69 | svdd = BaseSVDD(C=0.9, gamma=0.3, kernel='rbf', display='on')
 70 | 
 71 | # 
 72 | svdd.fit(X_train,  y_train)
 73 | 
 74 | # 
 75 | svdd.plot_boundary(X_train,  y_train)
 76 | 
 77 | #
 78 | y_test_predict = svdd.predict(X_test, y_test)
 79 | 
 80 | #
 81 | radius = svdd.radius
 82 | distance = svdd.get_distance(X_test)
 83 | svdd.plot_distance(radius, distance)
 84 | ```
 85 | 
 86 | <p align="center">
 87 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/Iw5WuV.png">
 88 |   <img width="60%" height="60%" src="https://z3.ax1x.com/2021/11/11/Iw55EF.png">
 89 | </p>
 90 | 
 91 | ### 03. svdd_example_kernel.py
 92 | 
 93 | An example for SVDD model fitting using different kernels.
 94 | 
 95 | ```Python
 96 | import sys
 97 | sys.path.append("..")
 98 | from src.BaseSVDD import BaseSVDD, BananaDataset
 99 | 
100 | # Banana-shaped dataset generation and partitioning
101 | X, y = BananaDataset.generate(number=100, display='on')
102 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
103 | 
104 | # kernel list
105 | kernelList = {"1": BaseSVDD(C=0.9, kernel='rbf', gamma=0.3, display='on'),
106 |               "2": BaseSVDD(C=0.9, kernel='poly',degree=2, display='on'),
107 |               "3": BaseSVDD(C=0.9, kernel='linear', display='on')
108 |               }
109 | 
110 | # 
111 | for i in range(len(kernelList)):
112 |     svdd = kernelList.get(str(i+1))
113 |     svdd.fit(X_train,  y_train)
114 |     svdd.plot_boundary(X_train,  y_train)
115 | ```
116 | 
117 | <p align="center">
118 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/IwoFJJ.png">
119 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/IwoKoD.png">
120 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/Iwo8SA.png">
121 | </p>
122 | 
123 | 
124 | ### 04. svdd_example_KPCA.py
125 | 
126 | An example for SVDD model fitting using nonlinear principal component.
127 | 
128 | The KPCA algorithm is used to reduce the dimension of the original data.
129 | 
130 | ```Python
131 | import sys
132 | sys.path.append("..")
133 | import numpy as np
134 | from src.BaseSVDD import BaseSVDD
135 | from sklearn.decomposition import KernelPCA
136 | 
137 | 
138 | # create 100 points with 5 dimensions
139 | X = np.r_[np.random.randn(50, 5) + 1, np.random.randn(50, 5)]
140 | y = np.append(np.ones((50, 1), dtype=np.int64), 
141 |               -np.ones((50, 1), dtype=np.int64),
142 |               axis=0)
143 | 
144 | # number of the dimensionality
145 | kpca = KernelPCA(n_components=2, kernel="rbf", gamma=0.1, fit_inverse_transform=True)
146 | X_kpca = kpca.fit_transform(X)
147 | 
148 | # fit the SVDD model
149 | svdd = BaseSVDD(C=0.9, gamma=10, kernel='rbf', display='on')
150 | 
151 | # fit and predict
152 | svdd.fit(X_kpca,  y)
153 | y_test_predict = svdd.predict(X_kpca, y)
154 | 
155 | # plot the distance curve
156 | radius = svdd.radius
157 | distance = svdd.get_distance(X_kpca)
158 | svdd.plot_distance(radius, distance)
159 | 
160 | # plot the boundary
161 | svdd.plot_boundary(X_kpca,  y)
162 | ```
163 | 
164 | <p align="center">
165 |   <img width="80%" height="80%" src="https://z3.ax1x.com/2021/11/11/IwH20P.png">
166 |   <img width="60%" height="60%" src="https://z3.ax1x.com/2021/11/11/IwHhtS.png">
167 | </p>
168 | 
169 | ### 05. svdd_example_PSO.py
170 | 
171 | An example for parameter optimization using PSO.
172 | 
173 | "scikit-opt" is required in this example.
174 | 
175 | https://github.com/guofei9987/scikit-opt
176 | 
177 | 
178 | ```Python
179 | import sys
180 | sys.path.append("..")
181 | from src.BaseSVDD import BaseSVDD, BananaDataset
182 | from sko.PSO import PSO
183 | import matplotlib.pyplot as plt
184 | 
185 | 
186 | # Banana-shaped dataset generation and partitioning
187 | X, y = BananaDataset.generate(number=100, display='off')
188 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
189 | 
190 | # objective function
191 | def objective_func(x):
192 |     x1, x2 = x
193 |     svdd = BaseSVDD(C=x1, gamma=x2, kernel='rbf', display='off')
194 |     y = 1-svdd.fit(X_train, y_train).accuracy
195 |     return y
196 | 
197 | # Do PSO
198 | pso = PSO(func=objective_func, n_dim=2, pop=10, max_iter=20, 
199 |           lb=[0.01, 0.01], ub=[1, 3], w=0.8, c1=0.5, c2=0.5)
200 | pso.run()
201 | 
202 | print('best_x is', pso.gbest_x)
203 | print('best_y is', pso.gbest_y)
204 | 
205 | # plot the result
206 | fig = plt.figure(figsize=(6, 4))
207 | ax = fig.add_subplot(1, 1, 1)
208 | ax.plot(pso.gbest_y_hist)
209 | ax.yaxis.grid()
210 | plt.show()
211 | ```
212 | 
213 | <p align="center">
214 |   <img width="60%" height="60%"src="https://z3.ax1x.com/2021/11/11/IwbG4S.png">
215 | </p>
216 | 
217 | ### 06. svdd_example_confusion_matrix.py
218 | 
219 | An example for drawing the confusion matrix and ROC curve.
220 | 
221 | <p align="center">
222 |   <img width="40%" height="40%" src="https://z3.ax1x.com/2021/11/11/Iw7S5F.png">
223 |   <img width="40%" height="40%" src="https://z3.ax1x.com/2021/11/11/Iw7ADx.png">
224 | </p>
225 | 
226 | ### 07. svdd_example_cross_validation.py
227 | 
228 | An example for cross validation.
229 | 
230 | ```Python
231 | import sys
232 | sys.path.append("..")
233 | from src.BaseSVDD import BaseSVDD, BananaDataset
234 | from sklearn.model_selection import cross_val_score
235 | 
236 | 
237 | # Banana-shaped dataset generation and partitioning
238 | X, y = BananaDataset.generate(number=100, display='on')
239 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
240 | 
241 | # 
242 | svdd = BaseSVDD(C=0.9, gamma=0.3, kernel='rbf', display='on')
243 | 
244 | 
245 | # cross validation (k-fold)
246 | k = 5
247 | scores = cross_val_score(svdd, X_train, y_train, cv=k, scoring='accuracy')
248 | 
249 | #
250 | print("Cross validation scores:")
251 | for scores_ in scores:
252 |     print(scores_)
253 |  
254 | print("Mean cross validation score: {:4f}".format(scores.mean()))
255 | ```
256 | Results
257 | ```
258 | Cross validation scores:
259 | 0.5714285714285714
260 | 0.75
261 | 0.9642857142857143
262 | 1.0
263 | 1.0
264 | Mean cross validation score: 0.857143
265 | ```
266 | 
267 | ### 08. svdd_example_grid_search.py
268 | 
269 | An example for parameter selection using grid search.
270 | 
271 | ```Python
272 | import sys
273 | sys.path.append("..")
274 | from sklearn.datasets import load_wine
275 | from src.BaseSVDD import BaseSVDD, BananaDataset
276 | from sklearn.model_selection import KFold, LeaveOneOut, ShuffleSplit
277 | from sklearn.model_selection import learning_curve, GridSearchCV
278 | 
279 | # Banana-shaped dataset generation and partitioning
280 | X, y = BananaDataset.generate(number=100, display='off')
281 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
282 | 
283 | param_grid = [
284 |     {"kernel": ["rbf"], "gamma": [0.1, 0.2, 0.5], "C": [0.1, 0.5, 1]},
285 |     {"kernel": ["linear"], "C": [0.1, 0.5, 1]},
286 |     {"kernel": ["poly"], "C": [0.1, 0.5, 1], "degree": [2, 3, 4, 5]},
287 | ]
288 | 
289 | svdd = GridSearchCV(BaseSVDD(display='off'), param_grid, cv=5, scoring="accuracy")
290 | svdd.fit(X_train, y_train)
291 | print("best parameters:")
292 | print(svdd.best_params_)
293 | print("\n")
294 | 
295 | # 
296 | best_model = svdd.best_estimator_
297 | means = svdd.cv_results_["mean_test_score"]
298 | stds = svdd.cv_results_["std_test_score"]
299 | 
300 | for mean, std, params in zip(means, stds, svdd.cv_results_["params"]):
301 |     print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
302 | print()
303 | 
304 | ```
305 | Results
306 | ```Python
307 | best parameters:
308 | {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}
309 | 
310 | 
311 | 0.921 (+/-0.159) for {'C': 0.1, 'gamma': 0.1, 'kernel': 'rbf'}
312 | 0.893 (+/-0.192) for {'C': 0.1, 'gamma': 0.2, 'kernel': 'rbf'}
313 | 0.857 (+/-0.296) for {'C': 0.1, 'gamma': 0.5, 'kernel': 'rbf'}
314 | 0.950 (+/-0.086) for {'C': 0.5, 'gamma': 0.1, 'kernel': 'rbf'}
315 | 0.921 (+/-0.131) for {'C': 0.5, 'gamma': 0.2, 'kernel': 'rbf'}
316 | 0.864 (+/-0.273) for {'C': 0.5, 'gamma': 0.5, 'kernel': 'rbf'}
317 | 0.950 (+/-0.086) for {'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}
318 | 0.921 (+/-0.131) for {'C': 1, 'gamma': 0.2, 'kernel': 'rbf'}
319 | 0.864 (+/-0.273) for {'C': 1, 'gamma': 0.5, 'kernel': 'rbf'}
320 | 0.807 (+/-0.246) for {'C': 0.1, 'kernel': 'linear'}
321 | 0.821 (+/-0.278) for {'C': 0.5, 'kernel': 'linear'}
322 | 0.793 (+/-0.273) for {'C': 1, 'kernel': 'linear'}
323 | 0.879 (+/-0.184) for {'C': 0.1, 'degree': 2, 'kernel': 'poly'}
324 | 0.836 (+/-0.305) for {'C': 0.1, 'degree': 3, 'kernel': 'poly'}
325 | 0.771 (+/-0.416) for {'C': 0.1, 'degree': 4, 'kernel': 'poly'}
326 | 0.757 (+/-0.448) for {'C': 0.1, 'degree': 5, 'kernel': 'poly'}
327 | 0.871 (+/-0.224) for {'C': 0.5, 'degree': 2, 'kernel': 'poly'}
328 | 0.814 (+/-0.311) for {'C': 0.5, 'degree': 3, 'kernel': 'poly'}
329 | 0.800 (+/-0.390) for {'C': 0.5, 'degree': 4, 'kernel': 'poly'}
330 | 0.764 (+/-0.432) for {'C': 0.5, 'degree': 5, 'kernel': 'poly'}
331 | 0.871 (+/-0.224) for {'C': 1, 'degree': 2, 'kernel': 'poly'}
332 | 0.850 (+/-0.294) for {'C': 1, 'degree': 3, 'kernel': 'poly'}
333 | 0.800 (+/-0.390) for {'C': 1, 'degree': 4, 'kernel': 'poly'}
334 | 0.771 (+/-0.416) for {'C': 1, 'degree': 5, 'kernel': 'poly'}
335 | ```
336 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | Use this section to tell people about which versions of your project are
 6 | currently being supported with security updates.
 7 | 
 8 | | Version | Supported          |
 9 | | ------- | ------------------ |
10 | | 5.1.x   | :white_check_mark: |
11 | | 5.0.x   | :x:                |
12 | | 4.0.x   | :white_check_mark: |
13 | | < 4.0   | :x:                |
14 | 
15 | ## Reporting a Vulnerability
16 | 
17 | Use this section to tell people how to report a vulnerability.
18 | 
19 | Tell them where to go, how often they can expect to get an update on a
20 | reported vulnerability, what to expect if the vulnerability is accepted or
21 | declined, etc.
22 | 


--------------------------------------------------------------------------------
/examples/svdd_example_KPCA.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | An example for SVDD model fitting using nonlinear principal component.
 5 | 
 6 | The KPCA algorithm is used to reduce the dimension of the original data.
 7 | 
 8 | """
 9 | 
10 | import sys
11 | sys.path.append("..")
12 | import numpy as np
13 | from src.BaseSVDD import BaseSVDD
14 | from sklearn.decomposition import KernelPCA
15 | 
16 | 
17 | # create 100 points with 5 dimensions
18 | X = np.r_[np.random.randn(50, 5) + 1, np.random.randn(50, 5)]
19 | y = np.append(np.ones((50, 1), dtype=np.int64), 
20 |               -np.ones((50, 1), dtype=np.int64),
21 |               axis=0)
22 | 
23 | # number of the dimensionality
24 | kpca = KernelPCA(n_components=2, kernel="rbf", gamma=0.1, fit_inverse_transform=True)
25 | X_kpca = kpca.fit_transform(X)
26 | 
27 | # fit the SVDD model
28 | svdd = BaseSVDD(C=0.9, gamma=10, kernel='rbf', display='on')
29 | 
30 | # fit and predict
31 | svdd.fit(X_kpca,  y)
32 | y_test_predict = svdd.predict(X_kpca, y)
33 | 
34 | # plot the distance curve
35 | radius = svdd.radius
36 | distance = svdd.get_distance(X_kpca)
37 | svdd.plot_distance(radius, distance)
38 | 
39 | # plot the boundary
40 | svdd.plot_boundary(X_kpca,  y)
41 | 
42 | 


--------------------------------------------------------------------------------
/examples/svdd_example_PSO.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | An example for parameter optimization using PSO.
 5 | 
 6 | "scikit-opt" is required in this examples.
 7 | 
 8 | https://github.com/guofei9987/scikit-opt
 9 | 
10 | """
11 | 
12 | import sys
13 | sys.path.append("..")
14 | from src.BaseSVDD import BaseSVDD, BananaDataset
15 | from sko.PSO import PSO
16 | import matplotlib.pyplot as plt
17 | 
18 | 
19 | # Banana-shaped dataset generation and partitioning
20 | X, y = BananaDataset.generate(number=100, display='off')
21 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
22 | 
23 | # objective function
24 | def objective_func(x):
25 |     x1, x2 = x
26 |     svdd = BaseSVDD(C=x1, gamma=x2, kernel='rbf', display='off')
27 |     y = 1-svdd.fit(X_train, y_train).accuracy
28 |     return y
29 | 
30 | # Do PSO
31 | pso = PSO(func=objective_func, n_dim=2, pop=10, max_iter=20, 
32 |           lb=[0.01, 0.01], ub=[1, 3], w=0.8, c1=0.5, c2=0.5)
33 | pso.run()
34 | 
35 | print('best_x is', pso.gbest_x)
36 | print('best_y is', pso.gbest_y)
37 | 
38 | # plot the result
39 | fig = plt.figure(figsize=(6, 4))
40 | ax = fig.add_subplot(1, 1, 1)
41 | ax.plot(pso.gbest_y_hist)
42 | ax.yaxis.grid()
43 | plt.show()
44 | 


--------------------------------------------------------------------------------
/examples/svdd_example_confusion_matrix.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | An example for drawing the confusion matrix and ROC curve
 4 | 
 5 | """
 6 | import sys
 7 | sys.path.append("..")
 8 | import numpy as np
 9 | import matplotlib.pyplot as plt
10 | from src.BaseSVDD import BaseSVDD
11 | from sklearn.metrics import confusion_matrix
12 | from sklearn.metrics import ConfusionMatrixDisplay
13 | from sklearn.metrics import roc_curve, auc
14 | from sklearn.model_selection import train_test_split
15 | 
16 | # generate data
17 | n = 100
18 | dim = 5
19 | X = np.r_[np.random.randn(n, dim) + 1, np.random.randn(n, dim)]
20 | y = np.append(np.ones((n, 1), dtype=np.int64), 
21 |               -np.ones((n, 1), dtype=np.int64),
22 |               axis=0)
23 | 
24 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
25 | 
26 | # SVDD model
27 | svdd = BaseSVDD(C=0.9, gamma=0.1, kernel='rbf', display='on')
28 | svdd.fit(X_train,  y_train)
29 | y_test_predict = svdd.predict(X_test, y_test)
30 | 
31 | # plot the distance curve
32 | radius = svdd.radius
33 | distance = svdd.get_distance(X_test)
34 | svdd.plot_distance(radius, distance)
35 | 
36 | # confusion matrix and ROC curve
37 | cm = confusion_matrix(y_test, y_test_predict)
38 | cm_display = ConfusionMatrixDisplay(cm).plot()
39 | y_score = svdd.decision_function(X_test)
40 | 
41 | fpr, tpr, _ = roc_curve(y_test, y_score)
42 | roc_auc = auc(fpr, tpr)
43 | 
44 | plt.figure()
45 | plt.plot(fpr, tpr, color="darkorange", lw=3, label="ROC curve (area = %0.2f)" % roc_auc)
46 | plt.plot([0, 1], [0, 1], color="navy", lw=3, linestyle="--")
47 | plt.xlim([0.0, 1.0])
48 | plt.ylim([0.0, 1.05])
49 | plt.xlabel("False Positive Rate")
50 | plt.ylabel("True Positive Rate")
51 | plt.title("Receiver operating characteristic")
52 | plt.legend(loc="lower right")
53 | plt.grid()
54 | plt.show()
55 | 


--------------------------------------------------------------------------------
/examples/svdd_example_cross_validation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | An example for cross validation
 4 | 
 5 | """
 6 | import sys
 7 | sys.path.append("..")
 8 | from src.BaseSVDD import BaseSVDD, BananaDataset
 9 | from sklearn.model_selection import cross_val_score
10 | 
11 | 
12 | # Banana-shaped dataset generation and partitioning
13 | X, y = BananaDataset.generate(number=100, display='on')
14 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
15 | 
16 | # 
17 | svdd = BaseSVDD(C=0.9, gamma=0.3, kernel='rbf', display='on')
18 | 
19 | 
20 | # cross validation (k-fold)
21 | k = 5
22 | scores = cross_val_score(svdd, X_train, y_train, cv=k, scoring='accuracy')
23 | 
24 | #
25 | print("Cross validation scores:")
26 | for scores_ in scores:
27 |     print(scores_)
28 |  
29 | print("Mean cross validation score: {:4f}".format(scores.mean()))
30 | 
31 | 


--------------------------------------------------------------------------------
/examples/svdd_example_grid_search.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | An example for parameter selection using grid search
 5 | 
 6 | """
 7 | import sys
 8 | sys.path.append("..")
 9 | from sklearn.datasets import load_wine
10 | from src.BaseSVDD import BaseSVDD, BananaDataset
11 | from sklearn.model_selection import KFold, LeaveOneOut, ShuffleSplit
12 | from sklearn.model_selection import learning_curve, GridSearchCV
13 | 
14 | # Banana-shaped dataset generation and partitioning
15 | X, y = BananaDataset.generate(number=100, display='off')
16 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
17 | 
18 | param_grid = [
19 |     {"kernel": ["rbf"], "gamma": [0.1, 0.2, 0.5], "C": [0.1, 0.5, 1]},
20 |     {"kernel": ["linear"], "C": [0.1, 0.5, 1]},
21 |     {"kernel": ["poly"], "C": [0.1, 0.5, 1], "degree": [2, 3, 4, 5]},
22 | ]
23 | 
24 | svdd = GridSearchCV(BaseSVDD(display='off'), param_grid, cv=5, scoring="accuracy")
25 | svdd.fit(X_train, y_train)
26 | print("best parameters:")
27 | print(svdd.best_params_)
28 | print("\n")
29 | 
30 | # 
31 | best_model = svdd.best_estimator_
32 | means = svdd.cv_results_["mean_test_score"]
33 | stds = svdd.cv_results_["std_test_score"]
34 | 
35 | for mean, std, params in zip(means, stds, svdd.cv_results_["params"]):
36 |     print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))
37 | print()
38 | 


--------------------------------------------------------------------------------
/examples/svdd_example_hybrid_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | An example for SVDD model fitting with negataive samples
 5 | 
 6 | """
 7 | import sys
 8 | sys.path.append("..")
 9 | from sklearn.datasets import load_wine
10 | from src.BaseSVDD import BaseSVDD, BananaDataset
11 | 
12 | # Banana-shaped dataset generation and partitioning
13 | X, y = BananaDataset.generate(number=100, display='on')
14 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
15 | 
16 | # 
17 | svdd = BaseSVDD(C=0.9, gamma=0.3, kernel='rbf', display='on')
18 | 
19 | # 
20 | svdd.fit(X_train,  y_train)
21 | 
22 | # 
23 | svdd.plot_boundary(X_train,  y_train)
24 | 
25 | #
26 | y_test_predict = svdd.predict(X_test, y_test)
27 | 
28 | #
29 | radius = svdd.radius
30 | distance = svdd.get_distance(X_test)
31 | svdd.plot_distance(radius, distance)


--------------------------------------------------------------------------------
/examples/svdd_example_kernel.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | An example for SVDD model fitting using different kernels
 5 | 
 6 | """
 7 | import sys
 8 | sys.path.append("..")
 9 | from src.BaseSVDD import BaseSVDD, BananaDataset
10 | 
11 | # Banana-shaped dataset generation and partitioning
12 | X, y = BananaDataset.generate(number=100, display='on')
13 | X_train, X_test, y_train, y_test = BananaDataset.split(X, y, ratio=0.3)
14 | 
15 | # kernel list
16 | kernelList = {"1": BaseSVDD(C=0.9, kernel='rbf', gamma=0.3, display='on'),
17 |               "2": BaseSVDD(C=0.9, kernel='poly',degree=2, display='on'),
18 |               "3": BaseSVDD(C=0.9, kernel='linear', display='on')
19 |               }
20 | 
21 | # 
22 | for i in range(len(kernelList)):
23 |     svdd = kernelList.get(str(i+1))
24 |     svdd.fit(X_train,  y_train)
25 |     svdd.plot_boundary(X_train,  y_train)
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/examples/svdd_example_unlabeled_data.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | 
 4 | An example for SVDD model fitting with negataive samples
 5 | 
 6 | """
 7 | import sys
 8 | sys.path.append("..")
 9 | import numpy as np
10 | from src.BaseSVDD import BaseSVDD
11 | 
12 | # create 100 points with 2 dimensions
13 | n = 100
14 | dim = 2
15 | X = np.r_[np.random.randn(n, dim)]
16 | 
17 | # svdd object using rbf kernel
18 | svdd = BaseSVDD(C=0.9, gamma=0.3, kernel='rbf', display='on')
19 | 
20 | # fit the SVDD model
21 | svdd.fit(X)
22 | 
23 | # predict the label
24 | y_predict = svdd.predict(X)
25 | 
26 | # plot the boundary
27 | svdd.plot_boundary(X)
28 | 
29 | # plot the distance
30 | radius = svdd.radius
31 | distance = svdd.get_distance(X)
32 | svdd.plot_distance(radius, distance)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cvxopt==1.2.7
2 | matplotlib==3.4.2
3 | numpy==1.22.0
4 | scikit_learn==1.0.1
5 | 


--------------------------------------------------------------------------------
/src/BaseSVDD.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Sun Feb 27 00:37:30 2022
  4 | 
  5 | @author: iqiukp
  6 | """
  7 | 
  8 | import numpy as np
  9 | import time
 10 | import matplotlib.pyplot as plt
 11 | from collections import defaultdict
 12 | import warnings
 13 | from cvxopt import matrix, solvers
 14 | from sklearn.base import BaseEstimator, OutlierMixin
 15 | from sklearn.metrics import accuracy_score
 16 | from sklearn.metrics.pairwise import pairwise_kernels
 17 | from sklearn.model_selection import train_test_split
 18 | 
 19 | class BaseSVDD(BaseEstimator, OutlierMixin):
 20 |     """One-Classification using Support Vector Data Description (SVDD).
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     C : float, default=1.0
 25 |         Regularization parameter. The strength of the regularization is
 26 |         inversely proportional to C. Must be strictly positive. The penalty
 27 |         is a squared l2 penalty.
 28 |     kernel : {'linear', 'poly', 'rbf', 'sigmoid'}, default='rbf'
 29 |         Specifies the kernel type to be used in the algorithm.
 30 |         It must be one of 'linear', 'poly', 'rbf', 'sigmoid'.
 31 |     degree : int, default=3
 32 |         Degree of the polynomial kernel function ('poly').
 33 |         Ignored by all other kernels.
 34 |     gamma : {'scale', 'auto'} or float, default='scale'
 35 |         Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.
 36 |         - if ``gamma='scale'`` (default) is passed then it uses
 37 |           1 / (n_features * X.var()) as value of gamma,
 38 |         - if 'auto', uses 1 / n_features.
 39 |     coef0 : float, default=0.0
 40 |         Independent term in kernel function.
 41 |         It is only significant in 'poly' and 'sigmoid'.
 42 | 
 43 |     n_jobs : int, default=None
 44 |         The number of parallel jobs to run.
 45 |         ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
 46 |         ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
 47 |         for more details.
 48 | 
 49 |     """
 50 | 
 51 |     def __init__(self, 
 52 |                  C=0.9,
 53 |                  kernel='rbf',
 54 |                  degree=3,
 55 |                  gamma=None,
 56 |                  coef0=1,
 57 |                  display='on',
 58 |                  n_jobs=None):
 59 | 
 60 |         self.C = C
 61 |         self.kernel = kernel
 62 |         self.degree = degree
 63 |         self.gamma = gamma       
 64 |         self.coef0 = coef0
 65 |         self.n_jobs = n_jobs
 66 |         self.display = display
 67 |         self.X = None
 68 |         self.y = None
 69 |         self.weight = None
 70 |         self.exist_label = True
 71 |         self.label_type = None
 72 |         self.support_vectors = None
 73 |         self.support_vector_indices = None
 74 |         self.n_support_vectors = None
 75 |         self.n_iterations = None
 76 |         self.object_value = None
 77 |         self.alpha = None
 78 |         self.alpha_tolerance = 1e-6
 79 |         self.support_vector_alpha = None
 80 |         self.n_support_vectors_ratio = None
 81 |         self.radius = None
 82 |         self.center = None
 83 |         self.offset = None
 84 |         self.distance = None
 85 |         self.accuracy = None
 86 |         self.predicted_y = None
 87 |         self.running_time = None
 88 |         self.boundary_indices = None
 89 |         self.classes_ = None
 90 |         
 91 | 
 92 |    
 93 |     @property 
 94 |     def n_samples(self):
 95 |         return self.X.shape[0]
 96 |     @property 
 97 |     def n_features(self):
 98 |         return self.X.shape[1]
 99 |     @property 
100 |     def n_positive_samples(self):
101 |         return np.sum(self.y == 1)
102 |     @property 
103 |     def n_negative_samples(self):
104 |         return np.sum(self.y == -1)
105 |     
106 |  
107 |     def fit(self, X, y=None, weight=None):
108 |         """Fit the model from data in X.
109 | 
110 |         Parameters
111 |         ----------
112 |         X : {array-like, sparse matrix}, shape (n_samples, n_features)
113 |             The training input samples.
114 |         y : array-like, shape (n_samples, 1)
115 |             The target values (class labels in classification, 
116 |             1 for positive samples and -1 for negative samples)
117 |         weight : array-like of shape (n_samples, 1), default=None
118 |             
119 |         Returns
120 |         -------
121 |         self : object
122 |             Returns self.
123 |         """
124 | 
125 |         start_time = time.time()
126 |         # parameter preprocessing
127 |         self.X, self.y, self.y_type, self.exist_y = self._check_X_y(X, y)
128 | 
129 |         if self.y_type == 'single':
130 |             self.C = [self.C, 1]
131 |     
132 |         if self.y_type == 'hybrid':
133 |             self.C = [self.C, 2/self.n_negative_samples]
134 |             
135 |         if weight is None:
136 |             self.weight = np.ones((self.n_samples, 1), dtype=np.int64)
137 |         else:
138 |             self.weight = weight   
139 |             
140 |         # check 'gamma'
141 |         if self.gamma == 0:
142 |             raise ValueError(
143 |                 "The gamma value of 0.0 is invalid. Use 'auto' to set"
144 |                 " gamma to a value of 1 / n_features.") 
145 |         if self.gamma is None:
146 |             self.gamma = 'scale'
147 |         if isinstance(self.gamma, str):
148 |             if self.gamma == "scale":
149 |                X_var = X.var()
150 |                self.gamma = 1.0 / (X.shape[1] * X_var) if X_var != 0 else 1.0
151 |             elif self.gamma == "auto":
152 |                 self.gamma = 1.0 / X.shape[1]
153 |             else:
154 |                raise ValueError(
155 |                    "When 'gamma' is a string, it should be either 'scale' or 'auto'.")
156 |   
157 |         # get SVDD model
158 |         self.get_model()
159 |         display_ = self.display
160 |         self.display = 'off'
161 |         self.predicted_y_ = self.predict(self.X, self.y)
162 |         self.accuracy = accuracy_score(self.y, self.predicted_y_)
163 |         self.display = display_
164 |         end_time = time.time()
165 |         self.running_time = end_time - start_time
166 | 
167 |         # display    
168 |         if self.display == 'on':
169 |            self.display_fit() 
170 |         return self
171 |    
172 |     def get_model(self):
173 |         #
174 |         K = self._get_kernel(self.X, self.X)
175 |         self.solve_problem(K)
176 | 
177 |     def _get_kernel(self, X, Y=None):
178 |         # get kernel matrix
179 |         if callable(self.kernel):
180 |             params = self.kernel_params or {}
181 |         else:
182 |             params = {"gamma": self.gamma, "degree": self.degree, "coef0": self.coef0}
183 | 
184 |         return pairwise_kernels(
185 |             X, Y, metric=self.kernel, filter_params=True, n_jobs=self.n_jobs, **params)
186 | 
187 |     def solve_problem(self, K):
188 |         """ 
189 |         DESCRIPTION
190 |         
191 |         Solve the Lagrange dual problem using cvxopt
192 |         
193 | 
194 |         minimize      (1/2)*x'*P*x + q'*x
195 |         subject to    G*x <= h
196 |                       A*x = b                    
197 |         --------------------------------------------------
198 |         
199 |         """ 
200 | 
201 |         solvers.options['show_progress'] = False
202 |         K = np.multiply(self.y * self.y.T, K)
203 |         
204 |         # P
205 |         n = K.shape[0]
206 |         P = K + K.T
207 |         
208 |         # q
209 |         q = -np.multiply(self.y, np.mat(np.diagonal(K)).T)
210 | 
211 |         # G
212 |         G1 = -np.eye(n)
213 |         G2 = np.eye(n)
214 |         G = np.append(G1, G2, axis=0)
215 |         
216 |         # h
217 |         h1 = np.zeros([n, 1])
218 |         h2 = np.ones([n, 1])
219 |         
220 |         if self.y_type == 'single':
221 |             h2[self.y == 1] = self.C[0] * self.weight[self.y == 1]
222 |         
223 |         if self.y_type == 'hybrid':
224 |             h2[self.y == 1] = self.C[0] * self.weight[self.y == 1]
225 |             h2[self.y == -1] = self.C[1] * self.weight[self.y == -1]
226 | 
227 |         h = np.append(h1, h2, axis=0)
228 |         h2_ = h2
229 |         
230 |         # A, b
231 |         A = np.ones([n, 1]).T
232 |         b = np.ones([1, 1])
233 | 
234 |         # 
235 |         P = matrix(P)
236 |         q = matrix(q)
237 |         G = matrix(G)
238 |         h = matrix(h)
239 |         A = matrix(A)
240 |         b = matrix(b)
241 | 
242 |         #
243 |         sol = solvers.qp(P, q, G, h, A, b)
244 |         
245 |         self.object_value = np.array(sol['dual objective'])
246 |         self.n_iterations = np.array(sol['iterations'])
247 | 
248 |         if len(np.array(sol['x'])) == 0:
249 |             warnings.warn("No solution for the SVDD model could be found.\n")
250 |             self.alpha = np.zeros((self.n_samples, 1))
251 |             self.alpha[0][0] = 1
252 |         else:
253 |             self.alpha = np.array(sol['x'])     
254 | 
255 |         self.alpha = self.y * self.alpha
256 |         self.support_vector_indices = np.where(np.abs(self.alpha) > self.alpha_tolerance)[0][:]
257 |         
258 |         # boundary indices  
259 |         tmp_1 = self.alpha[self.support_vector_indices, 0]
260 |         tmp_2 = h2_[self.support_vector_indices, 0]
261 |         tmp_3 = np.where(tmp_1 < tmp_2)[0][:] 
262 |         tmp_4 = np.where(tmp_1 > self.alpha_tolerance)[0][:] 
263 |         self.boundary_indices = self.support_vector_indices[np.array(list(set(tmp_3) & set(tmp_4)))]
264 | 
265 |         # support vectors
266 |         self.alpha[np.where(np.abs(self.alpha) < self.alpha_tolerance)[0][:]] = 0
267 |         self.support_vectors = self.X[self.support_vector_indices, :]
268 |         self.support_vector_alpha = self.alpha[self.support_vector_indices]
269 |         self.n_support_vectors = self.support_vector_indices.shape[0]
270 |         self.n_support_vectors_ratio = self.n_support_vectors/self.n_samples
271 | 
272 |         if self.n_support_vectors_ratio > 0.5:
273 |             warnings.warn("The fitted SVDD model may be overfitting.\n")
274 | 
275 |         # offset, center, radius
276 |         tmp_5 = np.dot(np.ones((self.n_samples, 1)), self.alpha.T)
277 |         tmp_6 = np.multiply(tmp_5, K)
278 |         tmp_ = -2*np.sum(tmp_6, axis=1, keepdims=True)        
279 |         self.offset = np.sum(np.multiply(np.dot(self.alpha, self.alpha.T), K)) 
280 |         self.center = np.dot(self.alpha.T, self.X)
281 |         self.radius = np.sqrt(np.mean(np.diag(K)[self.boundary_indices]) + self.offset + np.mean(tmp_[self.boundary_indices, 0]))
282 | 
283 |     def predict(self, X, y=None):
284 |         """Predict the class labels for the provided data.
285 | 
286 |         Parameters
287 |         ----------
288 |         X : array-like of shape (n_queries, n_features)
289 |             Test samples.
290 |         y : (optional) array-like, shape (n_samples, 1)
291 |             The target values (class labels in classification, 
292 |             1 for positive samples and -1 for negative samples)
293 | 
294 |         Returns
295 |         -------
296 |         predicted_y : array-like, shape (n_samples, 1)
297 |             The predicted target values 
298 |         """
299 | 
300 |         start_time = time.time()
301 |         results = {}
302 |         results['X'], results['y'], results['y_type'], results['exist_y'] = self._check_X_y(X, y)
303 |         results['n_samples'] = results['X'].shape[0]
304 |         results['distance'] = self.get_distance(X)
305 |         results['predicted_y'] = np.mat(np.ones(results['n_samples'])).T
306 |         index_ = results['distance'] > self.radius
307 |         results['predicted_y'][index_] = -1
308 |         results['predicted_y'] = np.asarray(results['predicted_y'])
309 |         results['n_alarm'] = np.sum(index_==True)  
310 |         
311 |         if results['exist_y'] == True:
312 |             results['accuracy'] = accuracy_score(results['y'], results['predicted_y'])
313 | 
314 |         end_time = time.time()
315 |         results['running_time'] = end_time - start_time
316 |         # display    
317 |         if self.display == 'on':
318 |             self.display_predict(results)
319 |         return results['predicted_y']
320 |     
321 |     def get_distance(self, X):
322 |         # compute the distance between the samples and the center
323 |         K = self._get_kernel(X, self.X)
324 |         K_ = self._get_kernel(X, X)
325 |         tmp_1 = np.dot(np.ones((X.shape[0], 1), dtype=np.int64), self.alpha.T)
326 |         tmp_2 = np.multiply(tmp_1, K)
327 |         tmp_ = -2*np.sum(tmp_2, axis=1, keepdims=True)  
328 |         distance = np.sqrt(np.mat(np.diag(K_)).T+self.offset+tmp_)
329 |         return distance
330 |     
331 |     def fit_predict(self, X, y=None, weight=None):
332 |         # Perform fit on X and returns labels for X.
333 |         self.fit(X, y, weight)
334 |         return self.predict(X, y)
335 |         
336 |     def decision_function(self, X):
337 |         """Signed distance to the separating hyperplane.
338 |         Signed distance is positive for an inlier and negative for an outlier.
339 |         Parameters
340 |         ----------
341 |         X : array-like of shape (n_samples, n_features)
342 |             The data matrix.
343 |         Returns
344 |         -------
345 |         dec : ndarray of shape (n_samples, 1)
346 |             Returns the decision function of the samples.
347 |             The anomaly score of the input samples. The lower, 
348 |             the more abnormal. Negative scores represent outliers,
349 |             positive scores represent inliers.
350 |             
351 |         """
352 |         return np.asarray(self.radius-self.get_distance(X))
353 |     
354 |     def get_params(self, deep=True):
355 |         """
356 |         Get parameters for this estimator.
357 |         Parameters
358 |         ----------
359 |         deep : bool, default=True
360 |             If True, will return the parameters for this estimator and
361 |             contained subobjects that are estimators.
362 |         Returns
363 |         -------
364 |         params : dict
365 |             Parameter names mapped to their values.
366 |         """
367 |         out = dict()
368 |         for key in self._get_param_names():
369 |             value = getattr(self, key)
370 |             if deep and hasattr(value, "get_params"):
371 |                 deep_items = value.get_params().items()
372 |                 out.update((key + "__" + k, val) for k, val in deep_items)
373 |             out[key] = value
374 |         return out 
375 | 
376 |     def set_params(self, **params):
377 |         """
378 |         Set the parameters of this estimator.
379 |         The method works on simple estimators as well as on nested objects
380 |         (such as :class:`~sklearn.pipeline.Pipeline`). The latter have
381 |         parameters of the form ``<component>__<parameter>`` so that it's
382 |         possible to update each component of a nested object.
383 |         Parameters
384 |         ----------
385 |         **params : dict
386 |             Estimator parameters.
387 |         Returns
388 |         -------
389 |         self : estimator instance
390 |             Estimator instance.
391 |         """
392 |         if not params:
393 |             # Simple optimization to gain speed (inspect is slow)
394 |             return self
395 |         valid_params = self.get_params(deep=True)
396 | 
397 |         nested_params = defaultdict(dict)  # grouped by prefix
398 |         for key, value in params.items():
399 |             key, delim, sub_key = key.partition("__")
400 |             if key not in valid_params:
401 |                 raise ValueError(
402 |                     "Invalid parameter %s for estimator %s. "
403 |                     "Check the list of available parameters "
404 |                     "with `estimator.get_params().keys()`." % (key, self)
405 |                 )
406 | 
407 |             if delim:
408 |                 nested_params[key][sub_key] = value
409 |             else:
410 |                 setattr(self, key, value)
411 |                 valid_params[key] = value
412 | 
413 |         for key, sub_params in nested_params.items():
414 |             valid_params[key].set_params(**sub_params)
415 | 
416 |         return self
417 |  
418 |     def _check_X_y(self, X, y):
419 | 
420 |         # check for labels
421 |         if y is None:
422 |             y = np.ones((X.shape[0], 1))
423 |             exist_y = False
424 |         else:
425 |             exist_y = True   
426 | 
427 |         # check for object type (numpy.ndarray)
428 |         if type(X) is not np.ndarray or type(y) is not np.ndarray:
429 |             raise SyntaxError("The type of X and y must be 'numpy.ndarray'.\n")
430 |         
431 |         # check for data dimensionality
432 |         if len(X.shape) != 2 or len(y.shape) != 2:
433 |             raise SyntaxError("The X and y must be 2D.\n")
434 |           
435 |         # check for data length
436 |         if X.shape[0] != y.shape[0]:
437 |             raise SyntaxError("The length of X and y must the same.\n")   
438 |              
439 |         # check for label values
440 |         tmp_ = np.unique(y)
441 |         if np.all(tmp_ == np.array([1])) or np.all(tmp_ == np.array([-1])):
442 |             y_type = 'single'
443 |            
444 |         elif np.all(tmp_ == np.array([1, -1])) or np.all(tmp_ == np.array([-1, 1])):
445 |             y_type = 'hybrid'        
446 |           
447 |         else:
448 |             errorText = "SVDD is only supported for one-class or binary classification. "\
449 |                         "The label must be 1 for positive samples or -1 for negative samples.\n"
450 |             raise SyntaxError(errorText)
451 |             
452 |         self.classes_ = np.unique(y)
453 | 
454 |         return X, y, y_type, exist_y
455 | 
456 |     def display_fit(self):
457 |         # display the fitting results       
458 |         print('\n')
459 |         print('*** Fitting of the SVDD model is completed. ***\n')
460 |         print('running time         = %.4f seconds'   % self.running_time)
461 |         print('kernel function      = %s'             % self.kernel)
462 |         print('iterations           = %d'             % self.n_iterations)
463 |         print('number of samples    = %d'             % self.n_samples)
464 |         print('number of features   = %d'             % self.n_features)
465 |         print('number of SVs        = %d'             % self.n_support_vectors)
466 |         print('ratio of SVs         = %.4f %%'        % (100*self.n_support_vectors_ratio))
467 |         print('accuracy             = %.4f %%'        % (100*self.accuracy))
468 |         print('\n')        
469 |         
470 |     def display_predict(self, results):
471 |         # display test results       
472 |         print('\n')
473 |         print('*** Prediction of the provided data is completed. ***\n')
474 |         print('running time         = %.4f seconds'   % results['running_time'])
475 |         print('number of samples    = %d'             % results['n_samples'])                                                        
476 |         print('number of alarm      = %d'             % results['n_alarm'])  
477 |         if results['exist_y'] == True:
478 |             print('accuracy             = %.4f %%'        % (100*results['accuracy']))
479 |         print('\n')  
480 | 
481 |     def plot_distance(self, radius, distance):
482 |         """ 
483 |         DESCRIPTION
484 |         
485 |         Plot the curve of distance
486 |         --------------------------------------------------------------- 
487 |         
488 |         """ 
489 | 
490 |         n = distance.shape[0]
491 |         fig = plt.figure(figsize=(10, 6))
492 |         ax = fig.add_subplot(1, 1, 1)
493 |         radius = np.ones((n, 1))*radius
494 | 
495 |         ax.plot(radius, 
496 |                 color='r',
497 |                 linestyle='-', 
498 |                 marker='None',
499 |                 linewidth=3, 
500 |                 markeredgecolor='k',
501 |                 markerfacecolor='w', 
502 |                 markersize=6)
503 |         
504 |         ax.plot(distance,
505 |                 color='k',
506 |                 linestyle=':',
507 |                 marker='o',
508 |                 linewidth=1,
509 |                 markeredgecolor='k',
510 |                 markerfacecolor='C4',
511 |                 markersize=6)
512 |         
513 |         ax.set_xlabel('Samples')
514 |         ax.set_ylabel('Distance')
515 |         
516 |         ax.legend(["Radius", "Distance"], 
517 |                   ncol=1, loc=0, 
518 |                   edgecolor='black', 
519 |                   markerscale=1, fancybox=True)
520 |         ax.yaxis.grid()
521 |         plt.show()
522 | 
523 |     def plot_boundary(self, X, y=None, expand_ratio=0.2, n_grids=50, 
524 |                       color_map='RdYlBu', n_level=6):
525 |         """ 
526 |         DESCRIPTION
527 |         
528 |         Plot the boundary
529 |         --------------------------------------------------------------- 
530 |         
531 |         """ 
532 |         start_time = time.time()  
533 |         dim = X.shape[1]
534 |         if dim != 2:
535 |             raise SyntaxError('Visualization of decision boundary only supports for 2D data')
536 |         x_range = np.zeros(shape=(n_grids, 2))
537 |         for i in range(2):  
538 |             _tmp_ = (np.max(X[:, i])-np.min(X[:, i]))*expand_ratio
539 |             xlim_1 = np.min(X[:, i])-_tmp_
540 |             xlim_2 = np.max(X[:, i])+_tmp_
541 |             x_range[:, i] = np.linspace(xlim_1, xlim_2, n_grids)
542 |         
543 |         # grid
544 |         xv, yv = np.meshgrid(x_range[:, 0], x_range[:, 1])
545 |         num1 = xv.shape[0]
546 |         num2 = yv.shape[0]
547 |         print('Calculating the grid scores (%04d*%04d)...\n' %(num1, num2))
548 |         distance_ = self.get_distance(np.c_[xv.ravel(), yv.ravel()])
549 |         distance = distance_.reshape(xv.shape)
550 |         end_time = time.time()
551 |         print('Calculation of the grid scores is completed. Time cost %.4f seconds\n' % (end_time-start_time))
552 | 
553 |         fig = plt.figure(figsize=(20, 6))
554 |         
555 |         # figure 1: the 3D contour
556 |         ax1 = fig.add_subplot(1, 3, 1, projection='3d') 
557 |         ax1.plot_surface(xv, yv, distance, cmap=color_map)
558 |         ax1.contourf(xv, yv, distance.A, n_level, zdir='z', offset=np.min(distance)*0.9, cmap=color_map)
559 |         ax1.set_zlim(np.min(distance)*0.9, np.max(distance)*1.05)
560 | 
561 |         # figure 2: the 2D contour
562 |         ax2 = fig.add_subplot(1, 3, 2)    
563 |         ctf1 = ax2.contourf(xv, yv, distance, n_level, alpha=0.8, cmap=color_map)
564 |         ctf2 = ax2.contour(xv, yv, distance, n_level, colors='black', linewidths=1)
565 |         plt.clabel(ctf2, inline=True)
566 |         plt.colorbar(ctf1)
567 | 
568 |         # figure 3: the 2D contour and data
569 |         ax3 = fig.add_subplot(1, 3, 3)
570 |         _, y, _, _ = self._check_X_y(X, y) 
571 |         tmp_1 = y == 1
572 |         tmp_2 = y == -1
573 |         positive_indices = tmp_1[:, 0]
574 |         negative_indices = tmp_2[:, 0]
575 | 
576 |         if self.y_type == 'single':
577 | 
578 |             ax3.scatter(X[:, 0], 
579 |                         X[:, 1],
580 |                         facecolor='C0', marker='o', s=100, linewidths=2,
581 |                         edgecolor='black', zorder=2)
582 | 
583 |             ax3.scatter(X[self.support_vector_indices, 0],
584 |                         X[self.support_vector_indices, 1],
585 |                         facecolor='C2', marker='o', s=144, linewidths=2,
586 |                         edgecolor='black', zorder=2)
587 |         
588 |             ax3.contour(xv, yv, distance, levels=[self.radius],
589 |                         colors='C3', linewidths=7, zorder=1)
590 |             
591 |             ax3.legend(["Data", "Support vectors"], 
592 |                       ncol=1, loc='upper left', edgecolor='black',
593 |                       markerscale=1.2, fancybox=True) 
594 |                 
595 |         else:
596 |             ax3.scatter(X[positive_indices, 0], 
597 |                         X[positive_indices, 1],
598 |                         facecolor='C0', marker='o', s=100, linewidths=2,
599 |                         edgecolor='black', zorder=2)
600 |             
601 |             ax3.scatter(X[negative_indices, 0], 
602 |                         X[negative_indices, 1],
603 |                         facecolor='C4', marker='s', s =100, linewidths=2,
604 |                         edgecolor='black', zorder=2)
605 |         
606 |             ax3.scatter(X[self.support_vector_indices, 0],
607 |                         X[self.support_vector_indices, 1],
608 |                         facecolor='C2', marker='o', s=144, linewidths=2,
609 |                         edgecolor='black', zorder=2)
610 |             
611 |             ax3.contour(xv, yv, distance, levels=[self.radius],
612 |                         colors='C3', linewidths=7, zorder=1)
613 |             
614 |             ax3.legend(["Data (+)", "Data (-)", "Support vectors"], 
615 |                       ncol=1, loc='upper left', edgecolor='black',
616 |                       markerscale=1.2, fancybox=True) 
617 | 
618 |         plt.grid()
619 |         plt.show()
620 |     
621 | class BananaDataset():
622 |     """
623 |         Banana-shaped dataset generation and partitioning.
624 |         
625 |     """
626 |     def generate(**kwargs):
627 |         # Banana-shaped dataset generation
628 |         number = kwargs['number']
629 |         display = kwargs['display']
630 |         
631 |         # parameters for banana-shaped dataset
632 |         sizeBanana = 3
633 |         varBanana = 1.2
634 |         param_1 = 0.02
635 |         param_2 = 0.02
636 |         param_3 = 0.98
637 |         param_4 = -0.8 # x-axsis shift
638 |         # generate 
639 |         class_p = param_1 * np.pi+np.random.rand(number, 1) * param_3 * np.pi
640 |         data_p = np.append(sizeBanana * np.sin(class_p), sizeBanana * np.cos(class_p), axis=1)
641 |         data_p = data_p + np.random.rand(number, 2) * varBanana
642 |         data_p[:, 0] = data_p[:, 0] - sizeBanana * 0.5
643 |         label_p = np.ones((number, 1), dtype=np.int64)
644 |         
645 |         class_n = param_2 * np.pi - np.random.rand(number, 1) * param_3 * np.pi
646 |         data_n = np.append(sizeBanana * np.sin(class_n), sizeBanana * np.cos(class_n), axis=1)
647 |         data_n = data_n + np.random.rand(number, 2)*varBanana
648 |         data_n = data_n + np.ones((number, 1)) * [sizeBanana * param_4, sizeBanana * param_4]
649 |         data_n[:, 0] = data_n[:, 0] + sizeBanana * 0.5
650 |         label_n = -np.ones((number, 1), dtype=np.int64)
651 |         
652 |         # banana-shaped dataset
653 |         data = np.append(data_p, data_n, axis=0)
654 |         label = np.append(label_p, label_n, axis=0)
655 |         
656 |         if display == 'on':
657 |             pIndex = label == 1
658 |             nIndex = label == -1
659 |             fig = plt.figure(figsize=(10, 6))
660 |             ax = fig.add_subplot(1, 1, 1)
661 |             ax.scatter(data[pIndex[:, 0], 0], data[pIndex[:, 0], 1],
662 |                     facecolor='C0', marker='o', s=100, linewidths=2,
663 |                     edgecolor='black', zorder=2)
664 |             
665 |             ax.scatter(data[nIndex[:, 0], 0], data[nIndex[:, 0], 1],
666 |                     facecolor='C3', marker='o', s = 100, linewidths=2,
667 |                     edgecolor='black', zorder=2)
668 |             
669 |             ax.set_xlim([-6, 5])
670 |             ax.set_ylim([-7, 7])
671 |         
672 |         return data, label
673 |     
674 |     def split(data, label, **kwargs):
675 |         # Banana-shaped dataset partitioning.
676 |         
677 |         ratio = kwargs['ratio']
678 |         X_train, X_test, y_train, y_test = train_test_split(data, label, test_size=ratio,
679 |                                                              random_state=None, shuffle=True, stratify=label)
680 |         pIndex = y_train == 1
681 |         nIndex = y_train == -1
682 |         X_train = np.append(X_train[pIndex[:, 0], :], X_train[nIndex[:, 0], :], axis=0)
683 |         y_train = np.append(y_train[pIndex[:, 0], :], y_train[nIndex[:, 0], :], axis=0)
684 | 
685 |         pIndex = y_test == 1
686 |         nIndex = y_test == -1
687 |         X_test = np.append(X_test[pIndex[:, 0], :], X_test[nIndex[:, 0], :], axis=0)
688 |         y_test = np.append(y_test[pIndex[:, 0], :], y_test[nIndex[:, 0], :], axis=0)
689 |         
690 |         return X_train, X_test ,y_train, y_test


--------------------------------------------------------------------------------
/src/__pycache__/BananaDataset.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iqiukp/SVDD-Python/e24837efaed0ed2054da151aeb558b6ece23e23c/src/__pycache__/BananaDataset.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/BaseSVDD.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iqiukp/SVDD-Python/e24837efaed0ed2054da151aeb558b6ece23e23c/src/__pycache__/BaseSVDD.cpython-38.pyc


--------------------------------------------------------------------------------
/src/__pycache__/testmodel.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/iqiukp/SVDD-Python/e24837efaed0ed2054da151aeb558b6ece23e23c/src/__pycache__/testmodel.cpython-38.pyc


--------------------------------------------------------------------------------