├── tests ├── __init__.py └── tests.py ├── scikit_roughsets ├── __init__.py ├── rs_reduction.py └── roughsets.py ├── setup.cfg ├── .travis.yml ├── .gitignore ├── setup.py ├── .github └── workflows │ └── python-package.yml ├── LICENSE └── README.rst /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scikit_roughsets/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file=README.rst 3 | 4 | [bdist_wheel] 5 | universal=1 -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "2.7" 4 | - "3.6" 5 | - "3.7" 6 | - "3.8" 7 | - "3.9" 8 | install: 9 | - "pip install -e ." 10 | script: 11 | - "nosetests tests" 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | */.ipynb_checkpoints 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # IntelliJ project files 12 | *.idea 13 | *.iml 14 | out 15 | gen 16 | 17 | build 18 | dist 19 | *.egg-info 20 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | 2 | from setuptools import setup 3 | from codecs import open 4 | from os import path 5 | 6 | here = path.abspath(path.dirname(__file__)) 7 | 8 | # Get the long description from the README file 9 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f: 10 | long_description = f.read() 11 | 12 | setup( 13 | name='scikit-roughsets', 14 | version='1.0', 15 | description='Feature reduction using rough set theory', 16 | long_description=long_description, 17 | url='http://www.github.com/paudan/scikit-roughsets', 18 | author='Paulius Danenas', 19 | author_email='danpaulius@gmail.com', 20 | license='MIT', 21 | keywords='machine_learning', 22 | packages=['scikit_roughsets'], 23 | package_dir={'scikit_roughsets': 'scikit_roughsets'}, 24 | install_requires=['numpy', 'scikit-learn'], 25 | ) -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests with a variety of Python versions 2 | 3 | name: Test package 4 | 5 | on: 6 | push: 7 | branches: [ "master" ] 8 | pull_request: 9 | branches: [ "master" ] 10 | 11 | jobs: 12 | build: 13 | 14 | runs-on: ubuntu-latest 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: ["3.6", "3.8", "3.9", "3.10"] 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - name: Set up Python ${{ matrix.python-version }} 23 | uses: actions/setup-python@v3 24 | with: 25 | python-version: ${{ matrix.python-version }} 26 | - name: Install dependencies 27 | run: | 28 | python -m pip install --upgrade pip 29 | python -m pip install pytest scikit-learn 30 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 31 | - name: Test with pytest 32 | run: | 33 | pytest tests/* 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Paulius Danenas 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scikit_roughsets/rs_reduction.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.base import BaseEstimator 3 | from sklearn.feature_selection import SelectorMixin 4 | from scikit_roughsets.roughsets import RoughSetsReducer 5 | 6 | 7 | class RoughSetsSelector(BaseEstimator, SelectorMixin): 8 | 9 | def _get_support_mask(self): 10 | return self.mask_ 11 | 12 | def fit(self, X, y=None): 13 | # Missing values are not supported yet! 14 | if np.isnan(X).any(): 15 | raise ValueError("X must not contain any missing values") 16 | if np.isnan(y).any(): 17 | raise ValueError("y must not contain any missing values") 18 | # Check that X and Y contains only integer values 19 | if not np.all(np.equal(np.mod(X, 1), 0)): 20 | raise ValueError("X must contain only integer values") 21 | if not np.all(np.equal(np.mod(y, 1), 0)): 22 | raise ValueError("y must contain only integer values") 23 | 24 | reducer = RoughSetsReducer() 25 | selected_ = reducer.reduce(X, y) 26 | B_unique_sorted, B_idx = np.unique(np.array(range(X.shape[1])), return_index=True) 27 | B_unique_sorted = B_unique_sorted + 1 # Shift elements by one, as RS index array starts by one 28 | self.mask_ = np.in1d(B_unique_sorted, selected_, assume_unique=True) 29 | 30 | if self.mask_.size == 0: 31 | raise ValueError("No features were selected by rough sets reducer") 32 | return self 33 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | scikit-roughsets 2 | ================ 3 | .. image:: https://travis-ci.org/paudan/scikit-roughsets.svg?branch=master 4 | :target: https://travis-ci.org/paudan/scikit-roughsets 5 | 6 | This is an implementation of rough sets feature reduction algorithm, based on MATLAB code from 7 | `Dingyu Xue, YangQuan Chen. Solving applied mathematical problems with MATLAB `_. Integration with *scikit-learn* package is also provided. 8 | 9 | 10 | Installation 11 | ------------ 12 | 13 | The package can be easily installed using Python's ``pip`` utility: 14 | 15 | .. code:: shell 16 | 17 | pip install git+https://github.com/paudan/scikit-roughsets.git 18 | 19 | 20 | Usage 21 | ----- 22 | 23 | The usage is very straightforward, identical to ``scikit`` feature selection module: 24 | 25 | .. code:: python 26 | 27 | from scikit_roughsets.rs_reduction import RoughSetsSelector 28 | import numpy as np 29 | 30 | y = np.array([[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]).T 31 | X = np.array([[1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], 32 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 33 | [1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0], 34 | [0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1], 35 | [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0], 36 | [0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1], 37 | [1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1], 38 | [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], 39 | [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1], 40 | [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], 41 | [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1], 42 | [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1]]) 43 | 44 | selector = RoughSetsSelector() 45 | X_selected = selector.fit(X, y).transform(X) 46 | 47 | Several restrictions apply to its current use: 48 | 49 | - *X* must be an integer matrix, and *y* must must be an integer array 50 | - It does not work with NaN values, thus, initial preprocessing must be performed by the user 51 | 52 | Tests 53 | ----- 54 | 55 | Tests can be run using ``pytest`` tool: 56 | 57 | .. code:: shell 58 | 59 | pytest tests/tests.py 60 | 61 | 62 | -------------------------------------------------------------------------------- /tests/tests.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from scikit_roughsets.roughsets import RoughSetsReducer 4 | 5 | class TestRoughsets(unittest.TestCase): 6 | 7 | red = RoughSetsReducer() 8 | S = np.array([[0, 0], [0, 0], [0, 0], [0, 1], [1, 1], [1, 1], [1, 1], [1, 2], [2, 2], [2, 2]]) 9 | X = np.array([1, 2, 3, 4, 5]) 10 | a = np.array([1, 2]) 11 | 12 | D = np.array([[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]).T 13 | C = np.array([[1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], 14 | [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], 15 | [1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0], 16 | [0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1], 17 | [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0], 18 | [0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1], 19 | [1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1], 20 | [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], 21 | [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1], 22 | [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1], 23 | [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1], 24 | [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1]]) 25 | 26 | 27 | def test_indiscernibility(self): 28 | np.array_equal([[1]], self.red.indisc(self.a, self.X)) 29 | 30 | def test_indiscernibility2(self): 31 | result = np.array([[ 1, 2, 3, 0, 0, 0, 0, 0, 0, 0], 32 | [ 0, 0, 0, 4, 0, 0, 0, 0, 0, 0], 33 | [ 0, 0, 0, 0, 5, 6, 7, 0, 0, 0], 34 | [ 0, 0, 0, 0, 0, 0, 0, 8, 0, 0], 35 | [ 0, 0, 0, 0, 0, 0, 0, 0, 9, 10]]) 36 | self.assertTrue(np.array_equal(result, self.red.indisc(self.a, self.S))) 37 | 38 | def test_rslower(self): 39 | self.assertListEqual([1, 2, 3, 4], self.red.rslower(self.X, self.a, self.S).tolist()) 40 | 41 | def test_rsupper(self): 42 | self.assertListEqual([1, 2, 3, 4, 5, 6, 7], self.red.rsupper(self.X, self.a, self.S).tolist()) 43 | 44 | def test_core(self): 45 | C = np.array([[1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 0, 0, 0, 0], [1, 1, 0, 1, 1, 0, 1], 46 | [1, 1, 1, 1, 0, 0, 1], [0, 1, 1, 0, 0, 1, 1], [1, 0, 1, 1, 0, 1, 1], 47 | [1, 0, 1, 1, 1, 1, 1], [1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1], 48 | [1, 1, 1, 1, 0, 1, 1]]) 49 | D = np.array([range(0, 10)]).T 50 | self.assertListEqual([1, 2, 5, 6, 7], self.red.core(C, D).tolist()) 51 | 52 | def test_reduct(self): 53 | self.assertListEqual([], self.red.core(self.C, self.D).tolist()) 54 | Y = self.red.reduce(self.C, self.D).tolist() 55 | self.assertListEqual([3, 4], Y) 56 | 57 | def test_scikit(self): 58 | from scikit_roughsets.rs_reduction import RoughSetsSelector 59 | selector = RoughSetsSelector() 60 | X_selected = selector.fit(self.C, self.D).transform(self.C) 61 | self.assertEqual(X_selected.shape[1], 2) -------------------------------------------------------------------------------- /scikit_roughsets/roughsets.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class RoughSetsReducer: 4 | 5 | def __size(self, x): 6 | return (1, x.shape[0]) if x.ndim == 1 else x.shape 7 | 8 | ''' 9 | Calculates indiscernibility relation 10 | ''' 11 | def indisc(self, a, x): 12 | 13 | def codea(a, x, b): 14 | yy = 0 15 | for i in range(0, a): 16 | yy += x[i] * b**(a-(i+1)) 17 | return yy 18 | 19 | p, q = self.__size(x) 20 | ap, aq = self.__size(a) 21 | z = [e for e in range(1, q+1)] 22 | tt = np.setdiff1d(z, a) 23 | tt_ind = np.setdiff1d(z, tt)-1 24 | if x.ndim == 1: 25 | x = x[tt_ind] 26 | else: 27 | x = x[:, tt_ind] 28 | y = x 29 | v = [codea(aq, y, 10) for i in range(0, p)] if y.ndim == 1 \ 30 | else [codea(aq, y[i, :], 10) for i in range(0, p)] 31 | y = np.transpose(v) 32 | if y.shape[0] == 1 and len(y.shape) == 1: 33 | I, yy = [1], [y] 34 | y = np.hstack((y, I)) 35 | b, k, l = [y], [1], [1] 36 | else: 37 | ax = 1 if y.ndim > 1 else 0 38 | yy = np.sort(y, axis=ax) 39 | I = y.argsort(axis=ax) 40 | y = np.hstack((yy, I)) 41 | b, k, l = np.unique(yy, return_index=True, return_inverse=True) 42 | y = np.hstack((l, I)) 43 | m = np.max(l) 44 | aa = np.zeros((m+1, p), dtype=int) 45 | for ii in range(0, m+1): 46 | for j in range(0, p): 47 | if l[j] == ii: 48 | aa[ii, j] = I[j]+1 49 | return aa 50 | 51 | ''' 52 | Calculates lower approximation set of y 53 | ''' 54 | def rslower(self, y, a, T): 55 | z = self.indisc(a, T) 56 | w = [] 57 | p, q = self.__size(z) 58 | for u in range(0, p): 59 | zz = np.setdiff1d(z[u, :], 0) 60 | if np.in1d(zz, y).all(): 61 | w = np.hstack((w, zz)) 62 | return w.astype(dtype=int) 63 | 64 | ''' 65 | Calculates upper approximation set of y 66 | ''' 67 | def rsupper(self, y, a, T): 68 | z = self.indisc(a, T) 69 | w = [] 70 | p, q = self.__size(z) 71 | for u in range(0, p): 72 | zz = np.setdiff1d(z[u, :], 0) 73 | zzz = np.intersect1d(zz, y) 74 | if len(zzz) > 0: 75 | w = np.hstack((w, zz)) 76 | return w.astype(dtype=int) 77 | 78 | 79 | def __pospq(self, p, q): 80 | pm, pn = self.__size(p) 81 | qm, qn = self.__size(q) 82 | num = 0 83 | pp, qq = [[]] * pm, [[]] * qm 84 | for i in range(0, pm): 85 | pp[i] = np.unique(p[i, :]) 86 | for j in range(0, qm): 87 | qq[j] = np.unique(q[j, :]) 88 | b = [] 89 | for i in range(0, qm): 90 | for j in range(0, pm): 91 | if np.in1d(pp[j], qq[i]).all(): 92 | num += 1 93 | b = np.hstack((b, pp[j])) 94 | bb = np.unique(b) 95 | if bb.size == 0: 96 | dd = 1 97 | else: 98 | _, dd = self.__size(bb) 99 | y = float(dd - 1)/pn if 0 in bb else float(dd)/pn 100 | b = np.setdiff1d(bb, 0) 101 | return y, b 102 | 103 | ''' 104 | Extract core set from C to D 105 | ''' 106 | def core(self, C, D): 107 | x = np.hstack((C, D)) 108 | c = np.array(range(1, C.shape[1]+1)) 109 | d = np.array([C.shape[1]+1]) 110 | cp, cq = self.__size(c) 111 | q = self.indisc(d, x) 112 | pp = self.indisc(c, x) 113 | b, w = self.__pospq(pp, q) 114 | a, k, kk, p = ([[]] * cq for i in range(4)) 115 | y = [] 116 | for u in range(0, cq): 117 | ind = u+1 118 | a[u] = np.setdiff1d(c, ind) 119 | p[u] = self.indisc(a[u], x) 120 | k[u], kk[u] = self.__pospq(p[u], q) 121 | if k[u] != b: 122 | y = np.hstack((y, ind)) 123 | return np.array(y) 124 | 125 | def __sgf(self, a, r, d, x): 126 | pr = self.indisc(r, x) 127 | q = self.indisc(d, x) 128 | b = np.hstack((r, a)) 129 | pb = self.indisc(b, x) 130 | p1, _ = self.__pospq(pb, q) 131 | p2, _ = self.__pospq(pr, q) 132 | return p1 - p2 133 | 134 | ''' 135 | Return the set of irreducible attributes 136 | ''' 137 | def reduce(self, C, D): 138 | 139 | def redu2(i, re, c, d, x): 140 | yre = re 141 | re1, re2 = self.__size(re) 142 | q = self.indisc(d, x) 143 | p = self.indisc(c, x) 144 | pos_cd, _ = self.__pospq(p, q) 145 | y, j = None, None 146 | for qi in range(i, re2): 147 | re = np.setdiff1d(re, re[qi]) 148 | red = self.indisc(re, x) 149 | pos_red, _ = self.__pospq(red, q) 150 | if np.array_equal(pos_cd, pos_red): 151 | y = re 152 | j = i 153 | break 154 | else: 155 | y = yre 156 | j = i + 1 157 | break 158 | return y, j 159 | 160 | x = np.hstack((C, D)) 161 | c = np.array(range(1, C.shape[1]+1)) 162 | d = np.array([C.shape[1]+1]) 163 | y = self.core(C, D) 164 | q = self.indisc(d, x) 165 | p = self.indisc(c, x) 166 | pos_cd, _ = self.__pospq(p, q) 167 | re = y 168 | red = self.indisc(y, x) 169 | pos_red, _ = self.__pospq(red, q) 170 | while pos_cd != pos_red: 171 | cc = np.setdiff1d(c, re) 172 | c1, c2 = self.__size(cc) 173 | yy = [0] * c2 174 | for i in range(0, c2): 175 | yy[i] = self.__sgf(cc[i], re, d, x) 176 | cd = np.setdiff1d(c, y) 177 | d1, d2 = self.__size(cd) 178 | for i in range(d2, c2, -1): 179 | yy[i] = [] 180 | ii = np.argsort(yy) 181 | for v1 in range(c2-1, -1, -1): 182 | v2 = ii[v1] 183 | re = np.hstack((re, cc[v2])) 184 | red = self.indisc(re, x) 185 | pos_red, _ = self.__pospq(red, q) 186 | re1, re2 = self.__size(re) 187 | core = y 188 | for qi in range(re2-1, -1, -1): 189 | if re[qi] in core: 190 | y = re 191 | break 192 | re = np.setdiff1d(re, re[qi]) 193 | red = self.indisc(re, x) 194 | pos_red, _ = self.__pospq(red, q) 195 | if np.array_equal(pos_cd, pos_red): 196 | y = re 197 | y1, y2 = self.__size(y) 198 | j = 0 199 | for i in range(0, y2): 200 | y, j = redu2(j, y, c, d, x) 201 | return y 202 | 203 | 204 | --------------------------------------------------------------------------------