├── tests
    ├── __init__.py
    └── tests.py
├── scikit_roughsets
    ├── __init__.py
    ├── rs_reduction.py
    └── roughsets.py
├── setup.cfg
├── .travis.yml
├── .gitignore
├── setup.py
├── .github
    └── workflows
    │   └── python-package.yml
├── LICENSE
└── README.rst


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scikit_roughsets/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file=README.rst
3 | 
4 | [bdist_wheel]
5 | universal=1


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |     - "2.7"
 4 |     - "3.6"
 5 |     - "3.7"
 6 |     - "3.8"
 7 |     - "3.9"
 8 | install:
 9 |     - "pip install -e ."
10 | script:
11 |     - "nosetests tests"
12 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ### Python template
 2 | # Byte-compiled / optimized / DLL files
 3 | */.ipynb_checkpoints
 4 | __pycache__/
 5 | *.py[cod]
 6 | *$py.class
 7 | 
 8 | # C extensions
 9 | *.so
10 | 
11 | # IntelliJ project files
12 | *.idea
13 | *.iml
14 | out
15 | gen
16 | 
17 | build
18 | dist
19 | *.egg-info
20 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from setuptools import setup
 3 | from codecs import open
 4 | from os import path
 5 | 
 6 | here = path.abspath(path.dirname(__file__))
 7 | 
 8 | # Get the long description from the README file
 9 | with open(path.join(here, 'README.rst'), encoding='utf-8') as f:
10 |     long_description = f.read()
11 | 
12 | setup(
13 |     name='scikit-roughsets',
14 |     version='1.0',
15 |     description='Feature reduction using rough set theory',
16 |     long_description=long_description,
17 |     url='http://www.github.com/paudan/scikit-roughsets',
18 |     author='Paulius Danenas',
19 |     author_email='danpaulius@gmail.com',
20 |     license='MIT',
21 |     keywords='machine_learning',
22 |     packages=['scikit_roughsets'],
23 |     package_dir={'scikit_roughsets': 'scikit_roughsets'},
24 |     install_requires=['numpy', 'scikit-learn'],
25 | )


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests with a variety of Python versions
 2 | 
 3 | name: Test package
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: [ "master" ]
 8 |   pull_request:
 9 |     branches: [ "master" ]
10 | 
11 | jobs:
12 |   build:
13 | 
14 |     runs-on: ubuntu-latest
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         python-version: ["3.6", "3.8", "3.9", "3.10"]
19 | 
20 |     steps:
21 |     - uses: actions/checkout@v3
22 |     - name: Set up Python ${{ matrix.python-version }}
23 |       uses: actions/setup-python@v3
24 |       with:
25 |         python-version: ${{ matrix.python-version }}
26 |     - name: Install dependencies
27 |       run: |
28 |         python -m pip install --upgrade pip
29 |         python -m pip install pytest scikit-learn
30 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
31 |     - name: Test with pytest
32 |       run: |
33 |         pytest tests/*
34 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Paulius Danenas
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/scikit_roughsets/rs_reduction.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.base import BaseEstimator
 3 | from sklearn.feature_selection import SelectorMixin
 4 | from scikit_roughsets.roughsets import RoughSetsReducer
 5 | 
 6 | 
 7 | class RoughSetsSelector(BaseEstimator, SelectorMixin):
 8 | 
 9 |     def _get_support_mask(self):
10 |         return self.mask_
11 | 
12 |     def fit(self, X, y=None):
13 |         # Missing values are not supported yet!
14 |         if np.isnan(X).any():
15 |             raise ValueError("X must not contain any missing values")
16 |         if np.isnan(y).any():
17 |             raise ValueError("y must not contain any missing values")
18 |         # Check that X and Y contains only integer values
19 |         if not np.all(np.equal(np.mod(X, 1), 0)):
20 |             raise ValueError("X must contain only integer values")
21 |         if not np.all(np.equal(np.mod(y, 1), 0)):
22 |             raise ValueError("y must contain only integer values")
23 | 
24 |         reducer = RoughSetsReducer()
25 |         selected_ = reducer.reduce(X, y)
26 |         B_unique_sorted, B_idx = np.unique(np.array(range(X.shape[1])), return_index=True)
27 |         B_unique_sorted = B_unique_sorted + 1  # Shift elements by one, as RS index array starts by one
28 |         self.mask_ = np.in1d(B_unique_sorted, selected_, assume_unique=True)
29 | 
30 |         if self.mask_.size == 0:
31 |             raise ValueError("No features were selected by rough sets reducer")
32 |         return self
33 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | scikit-roughsets
 2 | ================
 3 | .. image:: https://travis-ci.org/paudan/scikit-roughsets.svg?branch=master
 4 |     :target: https://travis-ci.org/paudan/scikit-roughsets
 5 | 
 6 | This is an implementation of rough sets feature reduction algorithm, based on MATLAB code from
 7 | `Dingyu Xue, YangQuan Chen. Solving applied mathematical problems with MATLAB <https://books.google.lt/books?id=V4vulPEc29kC>`_. Integration with *scikit-learn* package is also provided.
 8 | 
 9 | 
10 | Installation
11 | ------------
12 | 
13 | The package can be easily installed using Python's ``pip`` utility:
14 | 
15 | .. code:: shell
16 |     
17 |     pip install git+https://github.com/paudan/scikit-roughsets.git
18 |     
19 | 
20 | Usage
21 | -----
22 | 
23 | The usage is very straightforward, identical to ``scikit`` feature selection module:
24 | 
25 | .. code:: python
26 | 
27 |     from scikit_roughsets.rs_reduction import RoughSetsSelector
28 |     import numpy as np
29 | 
30 |     y = np.array([[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]).T
31 |     X = np.array([[1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
32 |                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
33 |                   [1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
34 |                   [0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1],
35 |                   [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0],
36 |                   [0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1],
37 |                   [1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1],
38 |                   [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
39 |                   [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1],
40 |                   [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
41 |                   [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1],
42 |                   [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1]])
43 | 
44 |     selector = RoughSetsSelector()
45 |     X_selected = selector.fit(X, y).transform(X)
46 | 
47 | Several restrictions apply to its current use:
48 | 
49 | - *X* must be an integer matrix, and *y* must must be an integer array
50 | - It does not work with NaN values, thus, initial preprocessing must be performed by the user
51 | 
52 | Tests
53 | -----
54 | 
55 | Tests can be run using ``pytest`` tool:
56 | 
57 | .. code:: shell
58 | 
59 |     pytest tests/tests.py
60 | 
61 | 
62 | 


--------------------------------------------------------------------------------
/tests/tests.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from scikit_roughsets.roughsets import RoughSetsReducer
 4 | 
 5 | class TestRoughsets(unittest.TestCase):
 6 | 
 7 |     red = RoughSetsReducer()
 8 |     S = np.array([[0, 0], [0, 0], [0, 0], [0, 1], [1, 1], [1, 1], [1, 1], [1, 2], [2, 2], [2, 2]])
 9 |     X = np.array([1, 2, 3, 4, 5])
10 |     a = np.array([1, 2])
11 | 
12 |     D = np.array([[1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]]).T
13 |     C = np.array([[1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
14 |                   [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
15 |                   [1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0],
16 |                   [0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1],
17 |                   [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0],
18 |                   [0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1],
19 |                   [1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1],
20 |                   [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
21 |                   [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1],
22 |                   [1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1],
23 |                   [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1],
24 |                   [1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1]])
25 | 
26 | 
27 |     def test_indiscernibility(self):
28 |         np.array_equal([[1]], self.red.indisc(self.a, self.X))
29 | 
30 |     def test_indiscernibility2(self):
31 |         result = np.array([[ 1, 2, 3, 0, 0, 0, 0, 0, 0, 0],
32 |                           [ 0, 0, 0, 4, 0, 0, 0, 0, 0, 0],
33 |                           [ 0, 0, 0, 0, 5, 6, 7, 0, 0, 0],
34 |                           [ 0, 0, 0, 0, 0, 0, 0, 8, 0, 0],
35 |                           [ 0, 0, 0, 0, 0, 0, 0, 0, 9, 10]])
36 |         self.assertTrue(np.array_equal(result, self.red.indisc(self.a, self.S)))
37 | 
38 |     def test_rslower(self):
39 |         self.assertListEqual([1, 2, 3, 4], self.red.rslower(self.X, self.a, self.S).tolist())
40 | 
41 |     def test_rsupper(self):
42 |         self.assertListEqual([1, 2, 3, 4, 5, 6, 7], self.red.rsupper(self.X, self.a, self.S).tolist())
43 | 
44 |     def test_core(self):
45 |         C = np.array([[1, 1, 1, 1, 1, 1, 0], [0, 1, 1, 0, 0, 0, 0], [1, 1, 0, 1, 1, 0, 1],
46 |                       [1, 1, 1, 1, 0, 0, 1], [0, 1, 1, 0, 0, 1, 1], [1, 0, 1, 1, 0, 1, 1],
47 |                       [1, 0, 1, 1, 1, 1, 1], [1, 1, 1, 0, 0, 0, 0], [1, 1, 1, 1, 1, 1, 1],
48 |                       [1, 1, 1, 1, 0, 1, 1]])
49 |         D = np.array([range(0, 10)]).T
50 |         self.assertListEqual([1, 2, 5, 6, 7], self.red.core(C, D).tolist())
51 | 
52 |     def test_reduct(self):
53 |         self.assertListEqual([], self.red.core(self.C, self.D).tolist())
54 |         Y = self.red.reduce(self.C, self.D).tolist()
55 |         self.assertListEqual([3, 4], Y)
56 | 
57 |     def test_scikit(self):
58 |         from scikit_roughsets.rs_reduction import RoughSetsSelector
59 |         selector = RoughSetsSelector()
60 |         X_selected = selector.fit(self.C, self.D).transform(self.C)
61 |         self.assertEqual(X_selected.shape[1], 2)


--------------------------------------------------------------------------------
/scikit_roughsets/roughsets.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class RoughSetsReducer:
  4 | 
  5 |     def __size(self, x):
  6 |         return (1, x.shape[0]) if x.ndim == 1 else x.shape
  7 | 
  8 |     '''
  9 |     Calculates indiscernibility relation
 10 |     '''
 11 |     def indisc(self, a, x):
 12 | 
 13 |         def codea(a, x, b):
 14 |             yy = 0
 15 |             for i in range(0, a):
 16 |                 yy += x[i] * b**(a-(i+1))
 17 |             return yy
 18 | 
 19 |         p, q = self.__size(x)
 20 |         ap, aq = self.__size(a)
 21 |         z = [e for e in range(1, q+1)]
 22 |         tt = np.setdiff1d(z, a)
 23 |         tt_ind = np.setdiff1d(z, tt)-1
 24 |         if x.ndim == 1:
 25 |             x = x[tt_ind]
 26 |         else:
 27 |             x = x[:, tt_ind]
 28 |         y = x
 29 |         v = [codea(aq, y, 10) for i in range(0, p)] if y.ndim == 1 \
 30 |             else [codea(aq, y[i, :], 10) for i in range(0, p)]
 31 |         y = np.transpose(v)
 32 |         if y.shape[0] == 1 and len(y.shape) == 1:
 33 |             I, yy = [1], [y]
 34 |             y = np.hstack((y, I))
 35 |             b, k, l = [y], [1], [1]
 36 |         else:
 37 |             ax = 1 if y.ndim > 1 else 0
 38 |             yy = np.sort(y, axis=ax)
 39 |             I = y.argsort(axis=ax)
 40 |             y = np.hstack((yy, I))
 41 |             b, k, l = np.unique(yy, return_index=True, return_inverse=True)
 42 |         y = np.hstack((l, I))
 43 |         m = np.max(l)
 44 |         aa = np.zeros((m+1, p), dtype=int)
 45 |         for ii in range(0, m+1):
 46 |             for j in range(0, p):
 47 |                 if l[j] == ii:
 48 |                     aa[ii, j] = I[j]+1
 49 |         return aa
 50 | 
 51 |     '''
 52 |     Calculates lower approximation set of y
 53 |     '''
 54 |     def rslower(self, y, a, T):
 55 |         z = self.indisc(a, T)
 56 |         w = []
 57 |         p, q = self.__size(z)
 58 |         for u in range(0, p):
 59 |             zz = np.setdiff1d(z[u, :], 0)
 60 |             if np.in1d(zz, y).all():
 61 |                 w = np.hstack((w, zz))
 62 |         return w.astype(dtype=int)
 63 | 
 64 |     '''
 65 |     Calculates upper approximation set of y
 66 |     '''
 67 |     def rsupper(self, y, a, T):
 68 |         z = self.indisc(a, T)
 69 |         w = []
 70 |         p, q = self.__size(z)
 71 |         for u in range(0, p):
 72 |             zz = np.setdiff1d(z[u, :], 0)
 73 |             zzz = np.intersect1d(zz, y)
 74 |             if len(zzz) > 0:
 75 |                 w = np.hstack((w, zz))
 76 |         return w.astype(dtype=int)
 77 | 
 78 | 
 79 |     def __pospq(self, p, q):
 80 |         pm, pn = self.__size(p)
 81 |         qm, qn = self.__size(q)
 82 |         num = 0
 83 |         pp, qq = [[]] * pm, [[]] * qm
 84 |         for i in range(0, pm):
 85 |             pp[i] = np.unique(p[i, :])
 86 |         for j in range(0, qm):
 87 |             qq[j] = np.unique(q[j, :])
 88 |         b = []
 89 |         for i in range(0, qm):
 90 |             for j in range(0, pm):
 91 |                 if np.in1d(pp[j], qq[i]).all():
 92 |                     num += 1
 93 |                     b = np.hstack((b, pp[j]))
 94 |         bb = np.unique(b)
 95 |         if bb.size == 0:
 96 |             dd = 1
 97 |         else:
 98 |             _, dd = self.__size(bb)
 99 |         y = float(dd - 1)/pn if 0 in bb else float(dd)/pn
100 |         b = np.setdiff1d(bb, 0)
101 |         return y, b
102 | 
103 |     '''
104 |     Extract core set from C to D
105 |     '''
106 |     def core(self, C, D):
107 |         x = np.hstack((C, D))
108 |         c = np.array(range(1, C.shape[1]+1))
109 |         d = np.array([C.shape[1]+1])
110 |         cp, cq = self.__size(c)
111 |         q = self.indisc(d, x)
112 |         pp = self.indisc(c, x)
113 |         b, w = self.__pospq(pp, q)
114 |         a, k, kk, p = ([[]] * cq for i in range(4))
115 |         y = []
116 |         for u in range(0, cq):
117 |             ind = u+1
118 |             a[u] = np.setdiff1d(c, ind)
119 |             p[u] = self.indisc(a[u], x)
120 |             k[u], kk[u] = self.__pospq(p[u], q)
121 |             if k[u] != b:
122 |                 y = np.hstack((y, ind))
123 |         return np.array(y)
124 | 
125 |     def __sgf(self, a, r, d, x):
126 |         pr = self.indisc(r, x)
127 |         q = self.indisc(d, x)
128 |         b = np.hstack((r, a))
129 |         pb = self.indisc(b, x)
130 |         p1, _ = self.__pospq(pb, q)
131 |         p2, _ = self.__pospq(pr, q)
132 |         return p1 - p2
133 | 
134 |     '''
135 |     Return the set of irreducible attributes
136 |     '''
137 |     def reduce(self, C, D):
138 | 
139 |         def redu2(i, re, c, d, x):
140 |             yre = re
141 |             re1, re2 = self.__size(re)
142 |             q = self.indisc(d, x)
143 |             p = self.indisc(c, x)
144 |             pos_cd, _ = self.__pospq(p, q)
145 |             y, j = None, None
146 |             for qi in range(i, re2):
147 |                 re = np.setdiff1d(re, re[qi])
148 |                 red = self.indisc(re, x)
149 |                 pos_red, _ = self.__pospq(red, q)
150 |                 if np.array_equal(pos_cd, pos_red):
151 |                     y = re
152 |                     j = i
153 |                     break
154 |                 else:
155 |                     y = yre
156 |                     j = i + 1
157 |                     break
158 |             return y, j
159 | 
160 |         x = np.hstack((C, D))
161 |         c = np.array(range(1, C.shape[1]+1))
162 |         d = np.array([C.shape[1]+1])
163 |         y = self.core(C, D)
164 |         q = self.indisc(d, x)
165 |         p = self.indisc(c, x)
166 |         pos_cd, _ = self.__pospq(p, q)
167 |         re = y
168 |         red = self.indisc(y, x)
169 |         pos_red, _ = self.__pospq(red, q)
170 |         while pos_cd != pos_red:
171 |             cc = np.setdiff1d(c, re)
172 |             c1, c2 = self.__size(cc)
173 |             yy = [0] * c2
174 |             for i in range(0, c2):
175 |                 yy[i] = self.__sgf(cc[i], re, d, x)
176 |             cd = np.setdiff1d(c, y)
177 |             d1, d2 = self.__size(cd)
178 |             for i in range(d2, c2, -1):
179 |                 yy[i] = []
180 |             ii = np.argsort(yy)
181 |             for v1 in range(c2-1, -1, -1):
182 |                 v2 = ii[v1]
183 |                 re = np.hstack((re, cc[v2]))
184 |                 red = self.indisc(re, x)
185 |                 pos_red, _ = self.__pospq(red, q)
186 |         re1, re2 = self.__size(re)
187 |         core = y
188 |         for qi in range(re2-1, -1, -1):
189 |             if re[qi] in core:
190 |                 y = re
191 |                 break
192 |             re = np.setdiff1d(re, re[qi])
193 |             red = self.indisc(re, x)
194 |             pos_red, _ = self.__pospq(red, q)
195 |             if np.array_equal(pos_cd, pos_red):
196 |                 y = re
197 |         y1, y2 = self.__size(y)
198 |         j = 0
199 |         for i in range(0, y2):
200 |             y, j = redu2(j, y, c, d, x)
201 |         return y
202 | 
203 | 
204 | 


--------------------------------------------------------------------------------