├── .ipynb_checkpoints
    ├── FeatureWeightedFuzzyX-checkpoint.ipynb
    ├── Sample_Weighting-checkpoint.ipynb
    └── Untitled-checkpoint.ipynb
├── Literature
    ├── featureweightlearning.pdf
    └── yeung2002(2).pdf
├── Notebooks
    ├── .ipynb_checkpoints
    │   ├── FeatureWeightedFuzzyX-checkpoint.ipynb
    │   ├── Module_Test-checkpoint.ipynb
    │   ├── Sample_Weighting-checkpoint.ipynb
    │   └── Untitled1-checkpoint.ipynb
    ├── FeatureWeightedFuzzyX.ipynb
    ├── Module_Test.ipynb
    ├── NoiseReduction.png
    ├── Sample_Weighting.ipynb
    ├── Untitled.ipynb
    └── Untitled1.ipynb
├── README.md
├── figures
    ├── Iris_W.png
    ├── Iris_WO.png
    ├── Loss.png
    ├── NoiseReduction.png
    ├── fuzzymatrix.png
    ├── gradient.png
    └── solve_for_beta.png
└── src
    ├── __init__.py
    ├── __pycache__
        └── feature_learning.cpython-36.pyc
    └── feature_learning.py


/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Feature Weighted Fuzzy C-means Classifier\n",
  8 |     "This project was originally meant to be a part of the D-BOM order mining tool, but was foreseen becoming the bulk of the actual work. For that reason, I gave it a stand alone repo. This notebook will explore how I will turn this into a library. I will use the iris dataset for testing. This notebook will first attemp to build fuzzy c-means from scratch, and then it will attempt to introduce the feature weighted learning component. \n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 17,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "from sklearn.datasets import load_iris\n",
 18 |     "import numpy as np\n",
 19 |     "iris = load_iris()"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 18,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "array([[5.1, 3.5, 1.4, 0.2],\n",
 31 |        "       [4.9, 3. , 1.4, 0.2],\n",
 32 |        "       [4.7, 3.2, 1.3, 0.2],\n",
 33 |        "       [4.6, 3.1, 1.5, 0.2],\n",
 34 |        "       [5. , 3.6, 1.4, 0.2],\n",
 35 |        "       [5.4, 3.9, 1.7, 0.4],\n",
 36 |        "       [4.6, 3.4, 1.4, 0.3],\n",
 37 |        "       [5. , 3.4, 1.5, 0.2],\n",
 38 |        "       [4.4, 2.9, 1.4, 0.2],\n",
 39 |        "       [4.9, 3.1, 1.5, 0.1]])"
 40 |       ]
 41 |      },
 42 |      "execution_count": 18,
 43 |      "metadata": {},
 44 |      "output_type": "execute_result"
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "X = iris.data\n",
 49 |     "X[:10, :]"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 41,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "scales = np.amax(X, axis = 0)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 44,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# Basic Fuzzy c-means clustering \n",
 68 |     "c = 3\n",
 69 |     "epsilon  = .05\n",
 70 |     "m = 2\n",
 71 |     "V = np.empty((c,4))\n",
 72 |     "for i in np.arange(0,c):\n",
 73 |     "    c_ = np.random.random((c,1))* scales[i]\n",
 74 |     "    V[:, i:i+1] = c_\n",
 75 |     "    \n",
 76 |     "fuzz = np.random.random(X.shape)    "
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 45,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "array([[6.90067416e-001, 4.03567345e+000, 1.96562883e+000,\n",
 88 |        "        6.95024462e-310],\n",
 89 |        "       [4.36526976e+000, 1.52127116e+000, 6.67822419e+000,\n",
 90 |        "        6.95024462e-310],\n",
 91 |        "       [5.17583961e+000, 2.75369530e+000, 2.57638666e+000,\n",
 92 |        "        6.95028791e-310]])"
 93 |       ]
 94 |      },
 95 |      "execution_count": 45,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "V"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 46,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "dist_weights = [1,1,1,1]\n",
111 |     "def euclidean(X, V, weights):\n",
112 |     "    dists = X- V\n",
113 |     "    return np.sqrt((dists * weights) **2)\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 47,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "def update(X, V):\n",
123 |     "    for v_i in V.shape[0]:\n",
124 |     "        numerator = fuzz[i]**m * X\n",
125 |     "        denominator = fuzz[i]**m\n",
126 |     "        v_i = numerator/denominator\n",
127 |     "        \n",
128 |     "    return \n",
129 |     "    "
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": []
138 |   }
139 |  ],
140 |  "metadata": {
141 |   "kernelspec": {
142 |    "display_name": "Python 3",
143 |    "language": "python",
144 |    "name": "python3"
145 |   },
146 |   "language_info": {
147 |    "codemirror_mode": {
148 |     "name": "ipython",
149 |     "version": 3
150 |    },
151 |    "file_extension": ".py",
152 |    "mimetype": "text/x-python",
153 |    "name": "python",
154 |    "nbconvert_exporter": "python",
155 |    "pygments_lexer": "ipython3",
156 |    "version": "3.6.6"
157 |   }
158 |  },
159 |  "nbformat": 4,
160 |  "nbformat_minor": 2
161 | }
162 | 


--------------------------------------------------------------------------------
/Literature/featureweightlearning.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/Literature/featureweightlearning.pdf


--------------------------------------------------------------------------------
/Literature/yeung2002(2).pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/Literature/yeung2002(2).pdf


--------------------------------------------------------------------------------
/Notebooks/.ipynb_checkpoints/Module_Test-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Module Testing\n",
  8 |     "This notebook is meant to test the code repo built and ensure it runs accurately"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import sys \n",
 18 |     "sys.path.insert(0, '/home/colin/Desktop/FWFCM/src')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import feature_learning\n",
 28 |     "from sklearn.datasets import load_iris\n",
 29 |     "import numpy as np\n",
 30 |     "from sklearn.preprocessing import StandardScaler\n",
 31 |     "iris = load_iris()\n",
 32 |     "X = iris.data\n",
 33 |     "X[:10, :]\n",
 34 |     "X = StandardScaler().fit_transform(X)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "99 Iterations Required\n"
 47 |      ]
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "max_iter = 100\n",
 52 |     "threshold= .00001\n",
 53 |     "w = feature_learning.return_weighted_distance(X, threshold = threshold, max_iter = max_iter, n = 15)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 4,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "fcm = feature_learning.c_means(threshold = .03, max_iter = 100)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "fcm.fit(X, w)"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": 6,
 77 |    "metadata": {},
 78 |    "outputs": [
 79 |     {
 80 |      "data": {
 81 |       "text/plain": [
 82 |        "array([[ 0.12570625, -0.62975002,  0.33890973,  0.25467352],\n",
 83 |        "       [ 1.08686586, -0.05200863,  1.10661401,  1.1292369 ],\n",
 84 |        "       [-1.01263132,  0.83811185, -1.29881138, -1.24810065]])"
 85 |       ]
 86 |      },
 87 |      "execution_count": 6,
 88 |      "metadata": {},
 89 |      "output_type": "execute_result"
 90 |     }
 91 |    ],
 92 |    "source": [
 93 |     "fcm.cluster_centers"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 7,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "from itertools import combinations\n",
103 |     "import matplotlib.pyplot as plt\n",
104 |     "def to_color(u):\n",
105 |     "    colors = []\n",
106 |     "    for i in range(0, u.shape[0]):\n",
107 |     "        colors.append(tuple((u[i, 0], u[i, 1], u[i, 2])))\n",
108 |     "    return colors \n",
109 |     " \n",
110 |     "\n",
111 |     "def _2D_Plot(X_, V, colorlist, columns):\n",
112 |     "    fig, ax = plt.subplots(2,3, figsize = (15,10))\n",
113 |     "    axlist = [ (i,j) for i in np.arange(0,3) for j in np.arange(0,3)]\n",
114 |     "    for idx, axcombo in enumerate(combinations([0,1,2,3], 2)):\n",
115 |     "        axl = axlist[idx]\n",
116 |     "        axl = ax[axl[0], axl[1]]\n",
117 |     "        axl.scatter(X_[:, axcombo[0]], X_[:, axcombo[1]],  c = colorlist, alpha = .5)\n",
118 |     "        axl.scatter(V[:, axcombo[0]], V[:, axcombo[1]], marker ='x',  c=['red', 'green', 'blue'], s =200)\n",
119 |     "        axl.set_xlabel(columns[axcombo[0]])\n",
120 |     "        axl.set_ylabel(columns[axcombo[1]])\n",
121 |     "    #plt.show()\n",
122 |     "    return fig\n",
123 |     "V = fcm.cluster_centers\n",
124 |     "u = fcm.fuzzy_partition\n",
125 |     "colorlist = to_color(u)\n",
126 |     "columns = iris['feature_names']\n",
127 |     "fig = _2D_Plot(X, V,colorlist, columns)"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 8,
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "0.863453892472934"
139 |       ]
140 |      },
141 |      "execution_count": 8,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "fcm.f_p_coeff"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 9,
153 |    "metadata": {},
154 |    "outputs": [
155 |     {
156 |      "data": {
157 |       "text/plain": [
158 |        "array([[0.        , 0.        , 1.        , 0.01548039]])"
159 |       ]
160 |      },
161 |      "execution_count": 9,
162 |      "metadata": {},
163 |      "output_type": "execute_result"
164 |     }
165 |    ],
166 |    "source": [
167 |     "fcm.weights"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 10,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "data": {
177 |       "text/plain": [
178 |        "array([[ 0.12570625, -0.62975002,  0.33890973,  0.25467352],\n",
179 |        "       [ 1.08686586, -0.05200863,  1.10661401,  1.1292369 ],\n",
180 |        "       [-1.01263132,  0.83811185, -1.29881138, -1.24810065]])"
181 |       ]
182 |      },
183 |      "execution_count": 10,
184 |      "metadata": {},
185 |      "output_type": "execute_result"
186 |     }
187 |    ],
188 |    "source": [
189 |     "fcm.cluster_centers"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 11,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "data": {
199 |       "text/plain": [
200 |        "array([[6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
201 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
202 |        "       [3.18794184e-03, 1.53258878e-03, 9.95279469e-01],\n",
203 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
204 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
205 |        "       [7.24899867e-03, 3.18385129e-03, 9.89567150e-01],\n",
206 |        "       [6.08040121e-04, 2.86336135e-04, 9.99105624e-01],\n",
207 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
208 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
209 |        "       [9.39396592e-05, 4.32820022e-05, 9.99862778e-01],\n",
210 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
211 |        "       [2.12399565e-03, 9.56175533e-04, 9.96919829e-01],\n",
212 |        "       [6.11005073e-04, 2.87737773e-04, 9.99101257e-01],\n",
213 |        "       [1.28746051e-02, 6.42946841e-03, 9.80695926e-01],\n",
214 |        "       [7.39984381e-03, 3.62762008e-03, 9.88972536e-01],\n",
215 |        "       [9.38400131e-05, 4.32348219e-05, 9.99862925e-01],\n",
216 |        "       [3.19079899e-03, 1.53394138e-03, 9.95275260e-01],\n",
217 |        "       [6.08040121e-04, 2.86336135e-04, 9.99105624e-01],\n",
218 |        "       [7.24519464e-03, 3.18221452e-03, 9.89572591e-01],\n",
219 |        "       [9.07195587e-05, 4.17974757e-05, 9.99867483e-01],\n",
220 |        "       [7.24493691e-03, 3.18214184e-03, 9.89572921e-01],\n",
221 |        "       [9.38400131e-05, 4.32348219e-05, 9.99862925e-01],\n",
222 |        "       [1.92925307e-02, 9.80517703e-03, 9.70902292e-01],\n",
223 |        "       [7.25634926e-03, 3.18705221e-03, 9.89556599e-01],\n",
224 |        "       [2.90350853e-02, 1.20799423e-02, 9.58884972e-01],\n",
225 |        "       [2.12399565e-03, 9.56175533e-04, 9.96919829e-01],\n",
226 |        "       [2.12744162e-03, 9.57707248e-04, 9.96914851e-01],\n",
227 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
228 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
229 |        "       [2.12399565e-03, 9.56175533e-04, 9.96919829e-01],\n",
230 |        "       [2.12399565e-03, 9.56175533e-04, 9.96919829e-01],\n",
231 |        "       [9.38400131e-05, 4.32348219e-05, 9.99862925e-01],\n",
232 |        "       [9.39396592e-05, 4.32820022e-05, 9.99862778e-01],\n",
233 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
234 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
235 |        "       [7.39984381e-03, 3.62762008e-03, 9.88972536e-01],\n",
236 |        "       [3.18794184e-03, 1.53258878e-03, 9.95279469e-01],\n",
237 |        "       [6.11005073e-04, 2.87737773e-04, 9.99101257e-01],\n",
238 |        "       [3.18794184e-03, 1.53258878e-03, 9.95279469e-01],\n",
239 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
240 |        "       [3.18801011e-03, 1.53261003e-03, 9.95279380e-01],\n",
241 |        "       [3.18801011e-03, 1.53261003e-03, 9.95279380e-01],\n",
242 |        "       [3.18794184e-03, 1.53258878e-03, 9.95279469e-01],\n",
243 |        "       [2.14432872e-03, 9.65296742e-04, 9.96890375e-01],\n",
244 |        "       [2.90415247e-02, 1.20822484e-02, 9.58876227e-01],\n",
245 |        "       [6.08040121e-04, 2.86336135e-04, 9.99105624e-01],\n",
246 |        "       [2.12399565e-03, 9.56175533e-04, 9.96919829e-01],\n",
247 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
248 |        "       [9.07528881e-05, 4.18132408e-05, 9.99867434e-01],\n",
249 |        "       [6.08053787e-04, 2.86345031e-04, 9.99105601e-01],\n",
250 |        "       [8.85150875e-01, 1.04692160e-01, 1.01569646e-02],\n",
251 |        "       [9.83333380e-01, 1.43868671e-02, 2.27975312e-03],\n",
252 |        "       [6.73545028e-01, 3.09378704e-01, 1.70762682e-02],\n",
253 |        "       [9.40892685e-01, 4.06181668e-02, 1.84891479e-02],\n",
254 |        "       [9.47309302e-01, 4.68414506e-02, 5.84924782e-03],\n",
255 |        "       [9.83339378e-01, 1.43811052e-02, 2.27951716e-03],\n",
256 |        "       [8.85078271e-01, 1.04761541e-01, 1.01601882e-02],\n",
257 |        "       [6.55699649e-01, 1.25997651e-01, 2.18302700e-01],\n",
258 |        "       [9.47324295e-01, 4.68263691e-02, 5.84933544e-03],\n",
259 |        "       [9.10453411e-01, 5.76612590e-02, 3.18853301e-02],\n",
260 |        "       [7.53174635e-01, 1.13051212e-01, 1.33774154e-01],\n",
261 |        "       [9.86470603e-01, 1.03712087e-02, 3.15818832e-03],\n",
262 |        "       [9.40817045e-01, 4.06657047e-02, 1.85172504e-02],\n",
263 |        "       [8.85150875e-01, 1.04692160e-01, 1.01569646e-02],\n",
264 |        "       [7.97393846e-01, 1.02370983e-01, 1.00235171e-01],\n",
265 |        "       [9.98531093e-01, 1.22533234e-03, 2.43575028e-04],\n",
266 |        "       [9.83333380e-01, 1.43868671e-02, 2.27975312e-03],\n",
267 |        "       [9.66595067e-01, 2.43228311e-02, 9.08210139e-03],\n",
268 |        "       [9.83333380e-01, 1.43868671e-02, 2.27975312e-03],\n",
269 |        "       [9.10416139e-01, 5.76786502e-02, 3.19052111e-02],\n",
270 |        "       [7.93165196e-01, 1.92588782e-01, 1.42460228e-02],\n",
271 |        "       [9.40892685e-01, 4.06181668e-02, 1.84891479e-02],\n",
272 |        "       [6.73545028e-01, 3.09378704e-01, 1.70762682e-02],\n",
273 |        "       [8.85145526e-01, 1.04692989e-01, 1.01614841e-02],\n",
274 |        "       [9.98135960e-01, 1.49479468e-03, 3.69245368e-04],\n",
275 |        "       [9.98531093e-01, 1.22533234e-03, 2.43575028e-04],\n",
276 |        "       [7.93427146e-01, 1.92335609e-01, 1.42372449e-02],\n",
277 |        "       [5.34064714e-01, 4.48034516e-01, 1.79007702e-02],\n",
278 |        "       [9.83333380e-01, 1.43868671e-02, 2.27975312e-03],\n",
279 |        "       [7.53174635e-01, 1.13051212e-01, 1.33774154e-01],\n",
280 |        "       [8.76114566e-01, 7.41739555e-02, 4.97114790e-02],\n",
281 |        "       [8.38340731e-01, 8.92836851e-02, 7.23755836e-02],\n",
282 |        "       [9.10438176e-01, 5.76667363e-02, 3.18950875e-02],\n",
283 |        "       [3.90519659e-01, 5.92974633e-01, 1.65057083e-02],\n",
284 |        "       [9.83333380e-01, 1.43868671e-02, 2.27975312e-03],\n",
285 |        "       [9.83301024e-01, 1.44150461e-02, 2.28393013e-03],\n",
286 |        "       [8.85124319e-01, 1.04718072e-01, 1.01576087e-02],\n",
287 |        "       [9.98523495e-01, 1.23164175e-03, 2.44863430e-04],\n",
288 |        "       [9.66682815e-01, 2.42613932e-02, 9.05579202e-03],\n",
289 |        "       [9.40892685e-01, 4.06181668e-02, 1.84891479e-02],\n",
290 |        "       [9.98497885e-01, 1.25297294e-03, 2.49142102e-04],\n",
291 |        "       [9.47326948e-01, 4.68249353e-02, 5.84811704e-03],\n",
292 |        "       [9.40878449e-01, 4.06265320e-02, 1.84950189e-02],\n",
293 |        "       [6.55699649e-01, 1.25997651e-01, 2.18302700e-01],\n",
294 |        "       [9.86473954e-01, 1.03680870e-02, 3.15795938e-03],\n",
295 |        "       [9.86454214e-01, 1.03829121e-02, 3.16287369e-03],\n",
296 |        "       [9.86473954e-01, 1.03680870e-02, 3.15795938e-03],\n",
297 |        "       [9.98135960e-01, 1.49479468e-03, 3.69245368e-04],\n",
298 |        "       [4.90965003e-01, 1.23053684e-01, 3.85981312e-01],\n",
299 |        "       [9.66682815e-01, 2.42613932e-02, 9.05579202e-03],\n",
300 |        "       [3.10766664e-02, 9.64815480e-01, 4.10785333e-03],\n",
301 |        "       [3.90252558e-01, 5.93246302e-01, 1.65011393e-02],\n",
302 |        "       [1.56356171e-02, 9.82458258e-01, 1.90612468e-03],\n",
303 |        "       [7.10038829e-03, 9.92252756e-01, 6.46855758e-04],\n",
304 |        "       [4.30972439e-03, 9.95209128e-01, 4.81147750e-04],\n",
305 |        "       [1.33540360e-01, 8.40840915e-01, 2.56187253e-02],\n",
306 |        "       [9.83249096e-01, 1.44600914e-02, 2.29081212e-03],\n",
307 |        "       [8.43632824e-02, 9.01930968e-01, 1.37057498e-02],\n",
308 |        "       [4.33887700e-03, 9.95176819e-01, 4.84304272e-04],\n",
309 |        "       [4.84226556e-02, 9.44683417e-01, 6.89392720e-03],\n",
310 |        "       [3.90174096e-01, 5.93324383e-01, 1.65015209e-02],\n",
311 |        "       [1.53485088e-01, 8.37141414e-01, 9.37349812e-03],\n",
312 |        "       [3.09254345e-02, 9.66570900e-01, 2.50366525e-03],\n",
313 |        "       [5.33778451e-01, 4.48317023e-01, 1.79045260e-02],\n",
314 |        "       [3.89913388e-01, 5.93574040e-01, 1.65125726e-02],\n",
315 |        "       [1.53414194e-01, 8.37210239e-01, 9.37556676e-03],\n",
316 |        "       [3.09940662e-02, 9.66497331e-01, 2.50860314e-03],\n",
317 |        "       [1.47935268e-01, 8.22273695e-01, 2.97910367e-02],\n",
318 |        "       [1.73653196e-01, 7.88138780e-01, 3.82080240e-02],\n",
319 |        "       [5.34240849e-01, 4.47856053e-01, 1.79030981e-02],\n",
320 |        "       [5.55045814e-05, 9.99938869e-01, 5.62690068e-06],\n",
321 |        "       [6.73081101e-01, 3.09830889e-01, 1.70880099e-02],\n",
322 |        "       [1.47940740e-01, 8.22268053e-01, 2.97912065e-02],\n",
323 |        "       [6.73298694e-01, 3.09621483e-01, 1.70798229e-02],\n",
324 |        "       [1.49953367e-05, 9.99983485e-01, 1.51992272e-06],\n",
325 |        "       [3.10344038e-02, 9.64864708e-01, 4.10088776e-03],\n",
326 |        "       [7.93165196e-01, 1.92588782e-01, 1.42460228e-02],\n",
327 |        "       [6.73298694e-01, 3.09621483e-01, 1.70798229e-02],\n",
328 |        "       [7.04599991e-03, 9.92311971e-01, 6.42029245e-04],\n",
329 |        "       [4.42621469e-03, 9.95079750e-01, 4.94035204e-04],\n",
330 |        "       [4.83769394e-02, 9.44737488e-01, 6.88557270e-03],\n",
331 |        "       [1.01657954e-01, 8.80818410e-01, 1.75236354e-02],\n",
332 |        "       [7.06025094e-03, 9.92296359e-01, 6.43390023e-04],\n",
333 |        "       [3.90619214e-01, 5.92871650e-01, 1.65091354e-02],\n",
334 |        "       [7.39953763e-03, 9.91926391e-01, 6.74071042e-04],\n",
335 |        "       [4.83788706e-02, 9.44734250e-01, 6.88687904e-03],\n",
336 |        "       [7.13727884e-03, 9.92212157e-01, 6.50564623e-04],\n",
337 |        "       [3.09940662e-02, 9.66497331e-01, 2.50860314e-03],\n",
338 |        "       [7.93165196e-01, 1.92588782e-01, 1.42460228e-02],\n",
339 |        "       [7.79165729e-02, 9.16557038e-01, 5.52638894e-03],\n",
340 |        "       [7.13727884e-03, 9.92212157e-01, 6.50564623e-04],\n",
341 |        "       [3.89970579e-01, 5.93521040e-01, 1.65083809e-02],\n",
342 |        "       [3.90252558e-01, 5.93246302e-01, 1.65011393e-02],\n",
343 |        "       [1.56625913e-02, 9.82427762e-01, 1.90964704e-03],\n",
344 |        "       [1.52701815e-04, 9.99831814e-01, 1.54839538e-05],\n",
345 |        "       [2.59089559e-01, 7.27555956e-01, 1.33544854e-02],\n",
346 |        "       [5.33876792e-01, 4.48220900e-01, 1.79023080e-02],\n",
347 |        "       [2.59192873e-01, 7.27456801e-01, 1.33503261e-02],\n",
348 |        "       [7.79457049e-02, 9.16524102e-01, 5.53019287e-03],\n",
349 |        "       [3.90336315e-01, 5.93161975e-01, 1.65017101e-02]])"
350 |       ]
351 |      },
352 |      "execution_count": 11,
353 |      "metadata": {},
354 |      "output_type": "execute_result"
355 |     }
356 |    ],
357 |    "source": [
358 |     "fcm.fuzzy_partition"
359 |    ]
360 |   }
361 |  ],
362 |  "metadata": {
363 |   "kernelspec": {
364 |    "display_name": "Python 3",
365 |    "language": "python",
366 |    "name": "python3"
367 |   },
368 |   "language_info": {
369 |    "codemirror_mode": {
370 |     "name": "ipython",
371 |     "version": 3
372 |    },
373 |    "file_extension": ".py",
374 |    "mimetype": "text/x-python",
375 |    "name": "python",
376 |    "nbconvert_exporter": "python",
377 |    "pygments_lexer": "ipython3",
378 |    "version": "3.6.6"
379 |   }
380 |  },
381 |  "nbformat": 4,
382 |  "nbformat_minor": 2
383 | }
384 | 


--------------------------------------------------------------------------------
/Notebooks/.ipynb_checkpoints/Sample_Weighting-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Feature Weighted Fuzzy C-means Classifier Visuals"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import matplotlib.pyplot as plt\n",
 17 |     "import numpy as np\n",
 18 |     "from sklearn.cluster import KMeans\n",
 19 |     "\n",
 20 |     "np.random.seed(0)\n",
 21 |     "n_points_per_cluster = 250\n",
 22 |     "\n",
 23 |     "C1 = [-.5, -1.5] + .4 * np.random.randn(n_points_per_cluster, 2)\n",
 24 |     "C2 = [.5,0] + .25 * np.random.randn(n_points_per_cluster, 2)\n",
 25 |     "C3 = [0, 1.5] + .4 * np.random.randn(n_points_per_cluster, 2)\n",
 26 |     "\n",
 27 |     "C1 = np.hstack((C1, 1 * np.random.randn(n_points_per_cluster, 1)))\n",
 28 |     "C2 = np.hstack((C2, 1 * np.random.randn(n_points_per_cluster, 1)))\n",
 29 |     "C3 = np.hstack((C3, 1 * np.random.randn(n_points_per_cluster, 1)))\n",
 30 |     "X_ =  np.vstack((C1, C2, C3))\n",
 31 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 32 |     "fig3D = plt.figure(figsize=(20, 20))\n",
 33 |     "ax = fig3D.add_subplot(221, projection='3d')\n",
 34 |     "ax.scatter(C1[:, 0], C1[:, 1],C1[:, 2], c= 'red',  alpha = .5)\n",
 35 |     "ax.scatter(C2[:, 0], C2[:, 1],C2[:, 2], c= 'green',  alpha = .5)\n",
 36 |     "ax.scatter(C3[:, 0], C3[:, 1],C3[:, 2], c= 'blue',  alpha = .5)\n",
 37 |     "ax.set_ylim(-3,3)\n",
 38 |     "ax.set_xlim(-3,3)\n",
 39 |     "ax.set_title(\"Clusters With Noise Feature\")\n",
 40 |     "ax2 = fig3D.add_subplot(222, projection='3d')\n",
 41 |     "zeros = np.zeros((n_points_per_cluster, 1))\n",
 42 |     "ax2.scatter(C1[:, 0], C1[:, 1],zeros, c= 'red',  alpha = .5)\n",
 43 |     "ax2.scatter(C2[:, 0], C2[:, 1],zeros, c= 'green',  alpha = .5)\n",
 44 |     "ax2.scatter(C3[:, 0], C3[:, 1],zeros, c= 'blue',  alpha = .5)\n",
 45 |     "ax2.set_ylim(-3,3)\n",
 46 |     "ax2.set_xlim(-3,3)\n",
 47 |     "ax2.set_zlim(-3,3)\n",
 48 |     "ax2.set_title(\"Clusters Without Noise Feature\")\n",
 49 |     "k = KMeans(3)\n",
 50 |     "k.fit(X_)\n",
 51 |     "l = k.labels_\n",
 52 |     "ax3 = fig3D.add_subplot(223, projection='3d')\n",
 53 |     "ax3.scatter(X_[:, 0], X_[:, 1],X_[:, 2], c=l,  alpha = .5)\n",
 54 |     "ax3.set_ylim(-3,3)\n",
 55 |     "ax3.set_xlim(-3,3)\n",
 56 |     "ax3.set_zlim(-3,3)\n",
 57 |     "ax3.set_title(\"K-means With Noise Features\")\n",
 58 |     "\n",
 59 |     "import sys \n",
 60 |     "sys.path.insert(0, '/home/colin/Desktop/FWFCM/src')\n",
 61 |     "from feature_learning import return_weighted_distance\n",
 62 |     "p = return_weighted_distance(X_, threshold = 0.00005, n = 20)\n",
 63 |     "\n",
 64 |     "for i in range(3):\n",
 65 |     "    X_[:, i] = X_[:, i] *  p[:, i] * 1.5\n",
 66 |     "k.fit(X_)\n",
 67 |     "l = k.labels_\n",
 68 |     "zeros = np.zeros((n_points_per_cluster*3, 1))\n",
 69 |     "ax4 = fig3D.add_subplot(224, projection='3d')\n",
 70 |     "ax4.set_title(\"K-means With Learned and Scaled Features\")\n",
 71 |     "ax4.scatter(X_[:, 0], X_[:, 1],X_[:, 2], c=l,  alpha = .5)\n",
 72 |     "ax4.set_ylim(-3,3)\n",
 73 |     "ax4.set_xlim(-3,3)\n",
 74 |     "ax4.set_zlim(-3,3)\n",
 75 |     "fig3D.savefig(\"NoiseReduction.png\")\n",
 76 |     "fig3D.show()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "p"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "plt.show()"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": []
103 |   }
104 |  ],
105 |  "metadata": {
106 |   "kernelspec": {
107 |    "display_name": "Python 3",
108 |    "language": "python",
109 |    "name": "python3"
110 |   },
111 |   "language_info": {
112 |    "codemirror_mode": {
113 |     "name": "ipython",
114 |     "version": 3
115 |    },
116 |    "file_extension": ".py",
117 |    "mimetype": "text/x-python",
118 |    "name": "python",
119 |    "nbconvert_exporter": "python",
120 |    "pygments_lexer": "ipython3",
121 |    "version": "3.6.6"
122 |   }
123 |  },
124 |  "nbformat": 4,
125 |  "nbformat_minor": 2
126 | }
127 | 


--------------------------------------------------------------------------------
/Notebooks/.ipynb_checkpoints/Untitled1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/Notebooks/Module_Test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Module Testing\n",
  8 |     "This notebook is meant to test the code repo built and ensure it runs accurately"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 1,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "import sys \n",
 18 |     "sys.path.insert(0, '/home/colin/Desktop/Projects/feature_learning/src')"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": 2,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "import feature_learning\n",
 28 |     "from sklearn.datasets import load_iris\n",
 29 |     "import numpy as np\n",
 30 |     "from sklearn.preprocessing import StandardScaler\n",
 31 |     "iris = load_iris()\n",
 32 |     "X = iris.data\n",
 33 |     "X[:10, :]\n",
 34 |     "X = StandardScaler().fit_transform(X)"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [
 42 |     {
 43 |      "name": "stdout",
 44 |      "output_type": "stream",
 45 |      "text": [
 46 |       "1 Iterations Required\n"
 47 |      ]
 48 |     }
 49 |    ],
 50 |    "source": [
 51 |     "max_iter = 100\n",
 52 |     "threshold= .00001\n",
 53 |     "w = feature_learning.return_weighted_distance(X, threshold = threshold, max_iter = max_iter, n = 15)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 4,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "fcm = feature_learning.c_means(threshold = .03, max_iter = 100)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 5,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "ename": "ValueError",
 72 |      "evalue": "Input contains NaN, infinity or a value too large for dtype('float64').",
 73 |      "output_type": "error",
 74 |      "traceback": [
 75 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
 76 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
 77 |       "\u001b[0;32m<ipython-input-5-445ae57af61c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfcm\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
 78 |       "\u001b[0;32m~/Desktop/Projects/feature_learning/src/feature_learning.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X, weights, log_argmax)\u001b[0m\n\u001b[1;32m    277\u001b[0m         \u001b[0;32mwhile\u001b[0m \u001b[0mnum_iter\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0mmax_iter\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    278\u001b[0m             \u001b[0mu2\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mu\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 279\u001b[0;31m             \u001b[0mV\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mJm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mu2\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mweights\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    280\u001b[0m             \u001b[0mJ\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhstack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mJ\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mJm\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    281\u001b[0m             \u001b[0mnum_iter\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 79 |       "\u001b[0;32m~/Desktop/Projects/feature_learning/src/feature_learning.py\u001b[0m in \u001b[0;36mupdate\u001b[0;34m(X, u2, m, weights, n, c)\u001b[0m\n\u001b[1;32m    173\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    174\u001b[0m     \u001b[0;31m# update distance matrix\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 175\u001b[0;31m     \u001b[0md\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpairwise_distances\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mV\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetric\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mweighted_euclidean\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'weights'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0mweights\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    176\u001b[0m     \u001b[0;31m#update fuzziness\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    177\u001b[0m     \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 80 |       "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mpairwise_distances\u001b[0;34m(X, Y, metric, n_jobs, **kwds)\u001b[0m\n\u001b[1;32m   1404\u001b[0m         \u001b[0mfunc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpartial\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdistance\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcdist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmetric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1405\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1406\u001b[0;31m     \u001b[0;32mreturn\u001b[0m \u001b[0m_parallel_pairwise\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_jobs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1407\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1408\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 81 |       "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36m_parallel_pairwise\u001b[0;34m(X, Y, func, n_jobs, **kwds)\u001b[0m\n\u001b[1;32m   1065\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1066\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0meffective_n_jobs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mn_jobs\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1067\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1068\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1069\u001b[0m     \u001b[0;31m# TODO: in some cases, backend='threading' may be appropriate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 82 |       "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36m_pairwise_callable\u001b[0;34m(X, Y, metric, **kwds)\u001b[0m\n\u001b[1;32m   1079\u001b[0m     \"\"\"Handle the callable case for pairwise_{distances,kernels}\n\u001b[1;32m   1080\u001b[0m     \"\"\"\n\u001b[0;32m-> 1081\u001b[0;31m     \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_pairwise_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1082\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1083\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mX\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 83 |       "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mcheck_pairwise_arrays\u001b[0;34m(X, Y, precomputed, dtype)\u001b[0m\n\u001b[1;32m    111\u001b[0m                         warn_on_dtype=warn_on_dtype, estimator=estimator)\n\u001b[1;32m    112\u001b[0m         Y = check_array(Y, accept_sparse='csr', dtype=dtype,\n\u001b[0;32m--> 113\u001b[0;31m                         warn_on_dtype=warn_on_dtype, estimator=estimator)\n\u001b[0m\u001b[1;32m    114\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    115\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mprecomputed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 84 |       "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, warn_on_dtype, estimator)\u001b[0m\n\u001b[1;32m    571\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mforce_all_finite\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    572\u001b[0m             _assert_all_finite(array,\n\u001b[0;32m--> 573\u001b[0;31m                                allow_nan=force_all_finite == 'allow-nan')\n\u001b[0m\u001b[1;32m    574\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    575\u001b[0m     \u001b[0mshape_repr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_shape_repr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
 85 |       "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36m_assert_all_finite\u001b[0;34m(X, allow_nan)\u001b[0m\n\u001b[1;32m     54\u001b[0m                 not allow_nan and not np.isfinite(X).all()):\n\u001b[1;32m     55\u001b[0m             \u001b[0mtype_err\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'infinity'\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mallow_nan\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;34m'NaN, infinity'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 56\u001b[0;31m             \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmsg_err\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtype_err\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     58\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
 86 |       "\u001b[0;31mValueError\u001b[0m: Input contains NaN, infinity or a value too large for dtype('float64')."
 87 |      ]
 88 |     }
 89 |    ],
 90 |    "source": [
 91 |     "fcm.fit(X, w)"
 92 |    ]
 93 |   },
 94 |   {
 95 |    "cell_type": "code",
 96 |    "execution_count": null,
 97 |    "metadata": {},
 98 |    "outputs": [],
 99 |    "source": [
100 |     "fcm.cluster_centers"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {},
107 |    "outputs": [],
108 |    "source": [
109 |     "from itertools import combinations\n",
110 |     "import matplotlib.pyplot as plt\n",
111 |     "def to_color(u):\n",
112 |     "    colors = []\n",
113 |     "    for i in range(0, u.shape[0]):\n",
114 |     "        colors.append(tuple((u[i, 0], u[i, 1], u[i, 2])))\n",
115 |     "    return colors \n",
116 |     " \n",
117 |     "\n",
118 |     "def _2D_Plot(X_, V, colorlist, columns):\n",
119 |     "    fig, ax = plt.subplots(2,3, figsize = (15,10))\n",
120 |     "    axlist = [ (i,j) for i in np.arange(0,3) for j in np.arange(0,3)]\n",
121 |     "    for idx, axcombo in enumerate(combinations([0,1,2,3], 2)):\n",
122 |     "        axl = axlist[idx]\n",
123 |     "        axl = ax[axl[0], axl[1]]\n",
124 |     "        axl.scatter(X_[:, axcombo[0]], X_[:, axcombo[1]],  c = colorlist, alpha = .5)\n",
125 |     "        axl.scatter(V[:, axcombo[0]], V[:, axcombo[1]], marker ='x',  c=['red', 'green', 'blue'], s =200)\n",
126 |     "        axl.set_xlabel(columns[axcombo[0]])\n",
127 |     "        axl.set_ylabel(columns[axcombo[1]])\n",
128 |     "    #plt.show()\n",
129 |     "    return fig\n",
130 |     "V = fcm.cluster_centers\n",
131 |     "u = fcm.fuzzy_partition\n",
132 |     "colorlist = to_color(u)\n",
133 |     "columns = iris['feature_names']\n",
134 |     "fig = _2D_Plot(X, V,colorlist, columns)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {},
141 |    "outputs": [],
142 |    "source": [
143 |     "fcm.f_p_coeff"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "fcm.weights"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "fcm.cluster_centers"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "fcm.fuzzy_partition"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": null,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": []
179 |   },
180 |   {
181 |    "cell_type": "code",
182 |    "execution_count": null,
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": []
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {},
191 |    "outputs": [],
192 |    "source": []
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": null,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": []
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {},
205 |    "outputs": [],
206 |    "source": []
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "Python 3",
212 |    "language": "python",
213 |    "name": "python3"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.6.7"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 2
230 | }
231 | 


--------------------------------------------------------------------------------
/Notebooks/NoiseReduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/Notebooks/NoiseReduction.png


--------------------------------------------------------------------------------
/Notebooks/Sample_Weighting.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Feature Weighted Fuzzy C-means Classifier Visuals"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": 1,
 13 |    "metadata": {},
 14 |    "outputs": [
 15 |     {
 16 |      "name": "stdout",
 17 |      "output_type": "stream",
 18 |      "text": [
 19 |       "30 Iterations Required\n"
 20 |      ]
 21 |     }
 22 |    ],
 23 |    "source": [
 24 |     "import matplotlib.pyplot as plt\n",
 25 |     "import numpy as np\n",
 26 |     "from sklearn.cluster import KMeans\n",
 27 |     "\n",
 28 |     "np.random.seed(0)\n",
 29 |     "n_points_per_cluster = 250\n",
 30 |     "\n",
 31 |     "C1 = [-.5, -1.5] + .4 * np.random.randn(n_points_per_cluster, 2)\n",
 32 |     "C2 = [.5,0] + .25 * np.random.randn(n_points_per_cluster, 2)\n",
 33 |     "C3 = [0, 1.5] + .4 * np.random.randn(n_points_per_cluster, 2)\n",
 34 |     "\n",
 35 |     "C1 = np.hstack((C1, 1 * np.random.randn(n_points_per_cluster, 1)))\n",
 36 |     "C2 = np.hstack((C2, 1 * np.random.randn(n_points_per_cluster, 1)))\n",
 37 |     "C3 = np.hstack((C3, 1 * np.random.randn(n_points_per_cluster, 1)))\n",
 38 |     "X_ =  np.vstack((C1, C2, C3))\n",
 39 |     "from mpl_toolkits.mplot3d import Axes3D\n",
 40 |     "fig3D = plt.figure(figsize=(20, 20))\n",
 41 |     "ax = fig3D.add_subplot(221, projection='3d')\n",
 42 |     "ax.scatter(C1[:, 0], C1[:, 1],C1[:, 2], c= 'red',  alpha = .5)\n",
 43 |     "ax.scatter(C2[:, 0], C2[:, 1],C2[:, 2], c= 'green',  alpha = .5)\n",
 44 |     "ax.scatter(C3[:, 0], C3[:, 1],C3[:, 2], c= 'blue',  alpha = .5)\n",
 45 |     "ax.set_ylim(-3,3)\n",
 46 |     "ax.set_xlim(-3,3)\n",
 47 |     "ax.set_title(\"Clusters With Noise Feature\")\n",
 48 |     "ax2 = fig3D.add_subplot(222, projection='3d')\n",
 49 |     "zeros = np.zeros((n_points_per_cluster, 1))\n",
 50 |     "ax2.scatter(C1[:, 0], C1[:, 1],zeros, c= 'red',  alpha = .5)\n",
 51 |     "ax2.scatter(C2[:, 0], C2[:, 1],zeros, c= 'green',  alpha = .5)\n",
 52 |     "ax2.scatter(C3[:, 0], C3[:, 1],zeros, c= 'blue',  alpha = .5)\n",
 53 |     "ax2.set_ylim(-3,3)\n",
 54 |     "ax2.set_xlim(-3,3)\n",
 55 |     "ax2.set_zlim(-3,3)\n",
 56 |     "ax2.set_title(\"Clusters Without Noise Feature\")\n",
 57 |     "k = KMeans(3)\n",
 58 |     "k.fit(X_)\n",
 59 |     "l = k.labels_\n",
 60 |     "ax3 = fig3D.add_subplot(223, projection='3d')\n",
 61 |     "ax3.scatter(X_[:, 0], X_[:, 1],X_[:, 2], c=l,  alpha = .5)\n",
 62 |     "ax3.set_ylim(-3,3)\n",
 63 |     "ax3.set_xlim(-3,3)\n",
 64 |     "ax3.set_zlim(-3,3)\n",
 65 |     "ax3.set_title(\"K-means With Noise Features\")\n",
 66 |     "\n",
 67 |     "import sys \n",
 68 |     "sys.path.insert(0, '/home/colin/Desktop/FWFCM/src')\n",
 69 |     "from feature_learning import return_weighted_distance\n",
 70 |     "p = return_weighted_distance(X_, threshold = 0.00005, n = 20)\n",
 71 |     "\n",
 72 |     "for i in range(3):\n",
 73 |     "    X_[:, i] = X_[:, i] *  p[:, i] * 1.5\n",
 74 |     "k.fit(X_)\n",
 75 |     "l = k.labels_\n",
 76 |     "zeros = np.zeros((n_points_per_cluster*3, 1))\n",
 77 |     "ax4 = fig3D.add_subplot(224, projection='3d')\n",
 78 |     "ax4.set_title(\"K-means With Learned and Scaled Features\")\n",
 79 |     "ax4.scatter(X_[:, 0], X_[:, 1],X_[:, 2], c=l,  alpha = .5)\n",
 80 |     "ax4.set_ylim(-3,3)\n",
 81 |     "ax4.set_xlim(-3,3)\n",
 82 |     "ax4.set_zlim(-3,3)\n",
 83 |     "fig3D.savefig(\"NoiseReduction.png\")\n",
 84 |     "fig3D.show()"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 2,
 90 |    "metadata": {},
 91 |    "outputs": [
 92 |     {
 93 |      "data": {
 94 |       "text/plain": [
 95 |        "array([[0.11590097, 1.        , 0.16391792]])"
 96 |       ]
 97 |      },
 98 |      "execution_count": 2,
 99 |      "metadata": {},
100 |      "output_type": "execute_result"
101 |     }
102 |    ],
103 |    "source": [
104 |     "p"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 3,
110 |    "metadata": {},
111 |    "outputs": [],
112 |    "source": [
113 |     "plt.show()"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": null,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": []
122 |   }
123 |  ],
124 |  "metadata": {
125 |   "kernelspec": {
126 |    "display_name": "Python 3",
127 |    "language": "python",
128 |    "name": "python3"
129 |   },
130 |   "language_info": {
131 |    "codemirror_mode": {
132 |     "name": "ipython",
133 |     "version": 3
134 |    },
135 |    "file_extension": ".py",
136 |    "mimetype": "text/x-python",
137 |    "name": "python",
138 |    "nbconvert_exporter": "python",
139 |    "pygments_lexer": "ipython3",
140 |    "version": "3.6.6"
141 |   }
142 |  },
143 |  "nbformat": 4,
144 |  "nbformat_minor": 2
145 | }
146 | 


--------------------------------------------------------------------------------
/Notebooks/Untitled.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Feature Weighted Fuzzy C-means Classifier\n",
  8 |     "This project was originally meant to be a part of the D-BOM order mining tool, but was foreseen becoming the bulk of the actual work. For that reason, I gave it a stand alone repo. This notebook will explore how I will turn this into a library. I will use the iris dataset for testing. This notebook will first attemp to build fuzzy c-means from scratch, and then it will attempt to introduce the feature weighted learning component. \n"
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": 17,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "from sklearn.datasets import load_iris\n",
 18 |     "import numpy as np\n",
 19 |     "iris = load_iris()"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 18,
 25 |    "metadata": {},
 26 |    "outputs": [
 27 |     {
 28 |      "data": {
 29 |       "text/plain": [
 30 |        "array([[5.1, 3.5, 1.4, 0.2],\n",
 31 |        "       [4.9, 3. , 1.4, 0.2],\n",
 32 |        "       [4.7, 3.2, 1.3, 0.2],\n",
 33 |        "       [4.6, 3.1, 1.5, 0.2],\n",
 34 |        "       [5. , 3.6, 1.4, 0.2],\n",
 35 |        "       [5.4, 3.9, 1.7, 0.4],\n",
 36 |        "       [4.6, 3.4, 1.4, 0.3],\n",
 37 |        "       [5. , 3.4, 1.5, 0.2],\n",
 38 |        "       [4.4, 2.9, 1.4, 0.2],\n",
 39 |        "       [4.9, 3.1, 1.5, 0.1]])"
 40 |       ]
 41 |      },
 42 |      "execution_count": 18,
 43 |      "metadata": {},
 44 |      "output_type": "execute_result"
 45 |     }
 46 |    ],
 47 |    "source": [
 48 |     "X = iris.data\n",
 49 |     "X[:10, :]"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": 41,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "scales = np.amax(X, axis = 0)"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 44,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "# Basic Fuzzy c-means clustering \n",
 68 |     "c = 3\n",
 69 |     "epsilon  = .05\n",
 70 |     "m = 2\n",
 71 |     "V = np.empty((c,4))\n",
 72 |     "for i in np.arange(0,c):\n",
 73 |     "    c_ = np.random.random((c,1))* scales[i]\n",
 74 |     "    V[:, i:i+1] = c_\n",
 75 |     "    \n",
 76 |     "fuzz = np.random.random(X.shape)    "
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 45,
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "data": {
 86 |       "text/plain": [
 87 |        "array([[6.90067416e-001, 4.03567345e+000, 1.96562883e+000,\n",
 88 |        "        6.95024462e-310],\n",
 89 |        "       [4.36526976e+000, 1.52127116e+000, 6.67822419e+000,\n",
 90 |        "        6.95024462e-310],\n",
 91 |        "       [5.17583961e+000, 2.75369530e+000, 2.57638666e+000,\n",
 92 |        "        6.95028791e-310]])"
 93 |       ]
 94 |      },
 95 |      "execution_count": 45,
 96 |      "metadata": {},
 97 |      "output_type": "execute_result"
 98 |     }
 99 |    ],
100 |    "source": [
101 |     "V"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": 46,
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "dist_weights = [1,1,1,1]\n",
111 |     "def euclidean(X, V, weights):\n",
112 |     "    dists = X- V\n",
113 |     "    return np.sqrt((dists * weights) **2)\n"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 47,
119 |    "metadata": {},
120 |    "outputs": [],
121 |    "source": [
122 |     "def update(X, V):\n",
123 |     "    #update fuzziness matrix\n",
124 |     "    for v_i in V.shape[0]:\n",
125 |     "        numerator = fuzz[i]**m * X\n",
126 |     "        denominator = fuzz[i]**m\n",
127 |     "        v_i = numerator/denominator\n",
128 |     "    \n",
129 |     "    #update distance matrix\n",
130 |     "    d = np.empty(X.shape)\n",
131 |     "    for d_j in np.arange(0,X.shape[0]):\n",
132 |     "        x_jk = X[d_j, :]\n",
133 |     "        for d_k in np.arange(0,c)\n",
134 |     "        \n",
135 |     "        \n",
136 |     "    return \n",
137 |     "    "
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {},
144 |    "outputs": [],
145 |    "source": []
146 |   }
147 |  ],
148 |  "metadata": {
149 |   "kernelspec": {
150 |    "display_name": "Python 3",
151 |    "language": "python",
152 |    "name": "python3"
153 |   },
154 |   "language_info": {
155 |    "codemirror_mode": {
156 |     "name": "ipython",
157 |     "version": 3
158 |    },
159 |    "file_extension": ".py",
160 |    "mimetype": "text/x-python",
161 |    "name": "python",
162 |    "nbconvert_exporter": "python",
163 |    "pygments_lexer": "ipython3",
164 |    "version": "3.6.6"
165 |   }
166 |  },
167 |  "nbformat": 4,
168 |  "nbformat_minor": 2
169 | }
170 | 


--------------------------------------------------------------------------------
/Notebooks/Untitled1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Feature Weighted Fuzzy C-Means Classifier
 2 | 
 3 | When trying to cluster data into separate groups, it is often difficult to understand what features to include as well as how much importance to give to each. In this case, feature weight assignment can be seen as a generalization of feature selection. The purpose of this library is to take a data set and determine feature weights for each feature ranging from [0,1]. The results of the feature weight learning will be tested on a weighted fuzzy c-means on the Iris data set from the UCI machine learning repository.
 4 | 
 5 | ## Algorithm
 6 | 
 7 | The guiding principle of similarity based clustering is that similar objects are within the same cluster and dissimilar objects are in different clusters. This is not different than the goal of most conventional clustering algorithms. With similarity based clustering, a measure must be given to determine how similar two objects are. This similarity measure is based off distance, and different distance metrics can be employed, but the similarity measure results in a value in [0,1] with 0 having no similarity and 1 being identical. To measure feature weight importance, we will have to use a weighted euclidean distance function. The similarity measure is defined in the following:
 8 | 
 9 | ![Fuzzy Equation](figures/fuzzymatrix.png)
10 | 
11 | β here is a value that we will actually have to solve for, (w) represents the distance weight matrix, and d represents the pairwise distances between all objects. To solve for β, we have to use the assumption that if using the standard weights(all 1's), our similarity matrix would uniformly distributed between [0,1] resulting in a mean of .5. So to find β, we solve the equation:
12 | 
13 | ![Beta Equation](figures/solve_for_beta.png)
14 | 
15 | If using a weighted euclidean distance, it is possible to use this similarity matrix to identify what features introduce more noise and which ones are important to clustering. The ultimate goal is to minimize the "fuzziness" of the similarity matrix, trying to move everything in the middle (ie .5) to either 1 or 0. For this purpose we use the loss metric:
16 | 
17 | ![Loss Equation](figures/Loss.png)
18 | 
19 | 
20 | Here (1) represents the base weights (all 1's). ρ represents the resulting fuzzy partition matrix that is a product of the weights used in the euclidean distance function between points p and q. 
21 | We can then attempt to use Gradient Descent on this loss function to try and minimize it with respect to the similarity matrix. Gradient Descent is one of the most common optimization algorithms in machine learning that is used to find best parameters of a given function by using the function gradient, a combination of the partial derivatives. By taking steps proportional to the negative of the gradient, we can try to find the local minimum of the function. We will continually update the weights until either our maximum number of iterations has been met, or the function converges. So the gradient descent will be of our loss function with a partial derivative in respect to the weights. 
22 | Where n is the learning rate defined. n is a very important parameter, as something too small will require too much computation, while too big and the function may never converge. 
23 | If you can think of it in terms of a 3D graph, it would be like stretching or shrinking each axis, in a way that would put our points into tighter groups, that are further away from each other. We are not actually changing the locations of the data, we are solely transforming how we measure the distances that drive our similarity metrics. 
24 | Here is a created example where I introduce 3 clusters with separate centroids on the first two variables, but introduce a third noise variable that would make the clustering more difficult. These are colored by the actual cluster labels given when the data is created. When eliminating the third noise variable, we can see it would be much easier to identify clusters. 
25 | 
26 | ![Noise Reduction](figures/NoiseReduction.png)
27 | 
28 | As you can see, K-means had a tougher time identifying the actual clusters, because it had to incorporate the noise feature. 
29 | 
30 | 
31 | Measuring Improvement
32 | A good representation of its effectiveness is fuzzy c-means, a relative of the commonly used k-means algorithm. It works in a very similar fashion to k-means, but rather results in something called the fuzzy partition matrix instead of just a cluster label. 
33 | The fuzzy partition matrix is a set of weights that measure how similar a single point is to a given cluster center, close to how our similarity matrix is used previously. It can also be calculated using a weighted distance metric which we can feed our new found optimum weights. This will also then go back into updating the cluster centers. Like K-means, this results in the cluster centers shifting with each iteration, until the maximum number of iterations or a certain improvement threshold has been met.
34 | In fuzzy c-means, you would have a very similar goal as to our original loss function. You would like less "fuzzyness" from points, and you want them all to be as close as possible to their cluster centers, and further away from others. A good measure of the fuzzy clustering algorithm is Dunn's partition coefficient, a sum of all components of the fuzzy partition matrix. 
35 | Let's try using fuzzy c-means on the Iris data set with and without our learned feature weights. Here the output of fuzzy c-means comparing all variables, assuming 3 clusters(since we know that from the data set). 
36 | ![Iris W](figures/Iris_WO.png)
37 | 
38 | 
39 | 
40 | Notice how the boundaries between some are less defined, and because we have multiple features equally weighted, it can be blurred. Now, when applying the feature weighted learning approach, we get normalized distance weights of:
41 | {'sepal length': 0.0, 'sepal width': 0.0, 'petal length': 1.0, 'petal width': 0.0258}
42 | 
43 | ![Noise Reduction](figures/Iris_W.png)
44 | 
45 | There are still fuzzy boundaries, mostly on features where we deemed them 0 value in the distance weights, but the algorithm put a major focus on petal length. We resulted in similar clusters, stronger boundaries (on some features), and overall our fuzzy partition coefficient increased from .70 to .86, an increase of ~23%!
46 | We also now know that if we wanted to generate rules about classifying them, we could just focus on 2 features instead of 4!
47 | 
48 | 
49 | ## Usage
50 | 
51 | To use this module, you can either just return the weighted distance matrix by calling return_weighted_distance, and then you can run the fuzzy c-means by fitting it on the data, with the returned weights.
52 | 
53 | 
54 | ```
55 | from feature_learning import return_weighted_distance, c_means
56 | w = return_weighted_distance(X)
57 | fcm = c_means()
58 | fcm.fit(X, w)
59 | 
60 | ```
61 | 
62 | The resulting class has the following Attributes: <br>
63 | cluster_centers - location of cluster centers <br>
64 | fuzzy_partition - fuzzy partition matrix <br>
65 | f_p_coeff - fuzzy partition coefficient
66 | 


--------------------------------------------------------------------------------
/figures/Iris_W.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/Iris_W.png


--------------------------------------------------------------------------------
/figures/Iris_WO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/Iris_WO.png


--------------------------------------------------------------------------------
/figures/Loss.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/Loss.png


--------------------------------------------------------------------------------
/figures/NoiseReduction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/NoiseReduction.png


--------------------------------------------------------------------------------
/figures/fuzzymatrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/fuzzymatrix.png


--------------------------------------------------------------------------------
/figures/gradient.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/gradient.png


--------------------------------------------------------------------------------
/figures/solve_for_beta.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/figures/solve_for_beta.png


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/src/__init__.py


--------------------------------------------------------------------------------
/src/__pycache__/feature_learning.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Csinclair0/feature_learning/25afe1538f040c0a314712fdeae58c52c6c12c80/src/__pycache__/feature_learning.cpython-36.pyc


--------------------------------------------------------------------------------
/src/feature_learning.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.metrics import pairwise_distances
  3 | import math
  4 | import warnings
  5 | from scipy.sparse import triu
  6 | 
  7 | warnings.filterwarnings('ignore')
  8 | 
  9 | 
 10 | def weighted_euclidean(X, V, weights):
 11 |     """Weighted euclidean distance function
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     X : array
 16 |         first object
 17 |     V : array
 18 |         second object
 19 |     weights : array
 20 |         feature weights
 21 | 
 22 |     Returns
 23 |     -------
 24 |     float
 25 |         weighted distance
 26 | 
 27 |     """
 28 |     dists = X- V
 29 |     return np.sqrt(np.sum((dists * weights) **2))
 30 | 
 31 | 
 32 | def single_delta(X, V, F):
 33 |     """Distance using one single parameter
 34 | 
 35 |     Parameters
 36 |     ----------
 37 |     X : type
 38 |         Description of parameter `X`.
 39 |     V : type
 40 |         Description of parameter `V`.
 41 |     F : type
 42 |         Description of parameter `F`.
 43 | 
 44 |     Returns
 45 |     -------
 46 |     float
 47 |         single distance of just one feature
 48 | 
 49 |     """
 50 |     d = X[F] - V[F]
 51 |     return d
 52 | 
 53 | 
 54 | def calc_beta(X, d):
 55 |     """calculate beta calue for feature weight learning
 56 | 
 57 |     Parameters
 58 |     ----------
 59 |     X : array
 60 |         data set
 61 |     d : array
 62 |         distance matrix
 63 | 
 64 |     Returns
 65 |     -------
 66 |     float
 67 |         beta value
 68 | 
 69 |     """
 70 |     n = X.shape[0]
 71 |     for b in np.linspace(0,1,10000):
 72 |         p = 1/(1+b*d)
 73 |         p = triu(p, 1).toarray()
 74 |         if (2 / (n*(n-1))) *np.sum(p)< .5:
 75 |             return b
 76 | 
 77 | 
 78 | def return_weights(X, b, d, mincols, threshold, learning_rate, max_iter):
 79 |     """returns learned feature weights, given the data set, beta, the distance matrix and the minimum number of columns
 80 | 
 81 |     Parameters
 82 |     ----------
 83 |     X : array
 84 |         data set
 85 |     b : float
 86 |         beta value
 87 |     d : array
 88 |         distance atrix
 89 |     mincols : int
 90 |         minimum number of columns to return that have weights
 91 |     threshold : float
 92 |         minimum threshold when to stop learning
 93 |     n : int
 94 |         learning rate
 95 | 
 96 |     Returns
 97 |     -------
 98 |     array
 99 |         learned feature weights
100 | 
101 |     """
102 | 
103 |     w= np.empty((1,X.shape[1]))
104 |     w.fill(1)
105 |     p_1 = 1/(1+b*d)
106 |     n = X.shape[0]
107 |     E_old = 1
108 |     for i in np.arange(0, max_iter):
109 |         d = pairwise_distances(X,X, metric = weighted_euclidean, **{'weights':w})
110 |         grad_w = np.empty((1,X.shape[1]))
111 |         part_pq = -b/((1+b*d)**2)
112 |         p = 1/(1+b*d)
113 |         E = (2/(n*(n-1))) * np.sum(triu(.5*((p*(1-p_1) + p_1*(1-p))), 1).toarray())
114 |         if E_old - E < threshold:
115 |             break
116 |         E_old = E
117 |         part_eq = (1-2*p_1)
118 |         w_valid = np.where(w > 0)[1]
119 | 
120 |         if w_valid.shape[0] == mincols:
121 |             break
122 | 
123 |         for j in w_valid:
124 |             d_w = pairwise_distances(X, X, metric = single_delta, **{'F':j})
125 |             part_w = w[0, j]*(d_w)**2 / d
126 |             part_w = triu(part_w, 1).toarray()
127 |             grad_w_j = 1/(n*(n-1)) * part_eq * part_pq * part_w
128 |             grad_w_j = triu(grad_w_j, 1).toarray()
129 |             grad_w[ 0, j] = np.nansum(grad_w_j)
130 |         grad_w = grad_w * learning_rate
131 |         w = w-grad_w
132 |         w = w.clip(min=0)
133 |         #if i %100 == 0: #and i > 0:
134 |             #print("Iteration {} Finished".format(i))
135 |             #print("Weights : {} ".format(w))
136 |             #print("Function Improvement : {}".format(E))
137 | 
138 |     wmax = np.max(w)
139 |     w = w / wmax
140 |     print("{} Iterations Required".format(i))
141 |     return w
142 | 
143 | def update(X, u2, m, weights, n, c):
144 |     """update the fuzzy c-means process
145 | 
146 |     Parameters
147 |     ----------
148 |     X : array
149 |         data being clustered
150 |     u2 : array
151 |         current fuzziness matrix
152 |     m : float
153 |         fuzzy factor
154 |     weights : array
155 |         distance metrics
156 |     n : int
157 |         sample size
158 |     c : int
159 |         numebr of cluster
160 | 
161 |     Returns
162 |     -------
163 |     4 arrays used in c-means
164 | 
165 |     """
166 |     u = u2.copy()
167 |     um = u ** m
168 |     #update cluster centeers matrix
169 |     numerator = um.T.dot(X)
170 |     denominator = um.T.sum(axis = 1)
171 |     V = numerator.T/(denominator)
172 |     V = V.T
173 | 
174 |     # update distance matrix
175 |     d = pairwise_distances(X, V, metric = weighted_euclidean, **{'weights':weights})
176 |     #update fuzziness
177 |     for i in np.arange(0, n):
178 |         for j in np.arange(0, c):
179 |             newdenom = (d[i, j] / d[i, :]) ** (2/(m-1))
180 |             u[i, j] = 1 / np.sum(newdenom, axis = 0)
181 | 
182 |     #update loss
183 |     J = (u *d**2).sum()
184 | 
185 |     return V, u, J, d
186 | 
187 | def return_weighted_distance(X,  mincols = 0, sample_size = 1, threshold = .0005, n = 1, max_iter = 1000):
188 |     """takes in data set and completes entire process of feature weight learning.
189 |     1. Calculate Pairwise Distances
190 |     2. Calculate Beta
191 |     3. Learn Feature weights through gradient descent
192 | 
193 |     Parameters
194 |     ----------
195 |     sku : class transformation
196 |         class transformation where to add feature weights
197 |     X : array
198 |         data set
199 |     mincols : int
200 |         minimum number of columns to return
201 |     sample_size :  float
202 |         fraction of dataset to use in feature weight learning
203 | 
204 |     Returns
205 |     -------
206 |     array
207 |         weighted dataset
208 | 
209 |     """
210 |     numsample = math.ceil(sample_size * X.shape[0])
211 |     sample = np.random.choice(X.shape[0],numsample, replace = False )
212 |     X_S = X#X[sample]
213 |     d = pairwise_distances(X_S, X_S, metric = 'euclidean')
214 |     b = calc_beta(X_S, d)
215 |     w = return_weights(X_S, b, d, mincols, threshold, n, max_iter)
216 |     w = w.reshape(1,-1)
217 |     return w
218 | 
219 | 
220 | class c_means():
221 |     """ fuzzy cmeans class
222 | 
223 |     Parameters
224 |     ----------
225 |     c : int
226 |         number of clusters
227 |     m : float
228 |         fuzzification index
229 |     max_iter : int
230 |         maximum iterations
231 |     threshold : float
232 |         threhold of improvement
233 | 
234 |     Attributes
235 |     ----------
236 |     c_ : c
237 |     max_iter
238 |     threshold
239 |     m
240 | 
241 |     """
242 |     def __init__(self, c = 3, m = 2,  max_iter = 1000, threshold = .01):
243 |         self.c_ = c
244 |         self.max_iter = max_iter
245 |         self.threshold = threshold
246 |         self.m = m
247 | 
248 |     def fit(self, X, weights, log_argmax = False):
249 |         """ performs clustering using given weights
250 | 
251 |         Parameters
252 |         ----------
253 |         X : array
254 |             data to be clustered
255 |         weights : array
256 |             distance weight matrix
257 | 
258 |         Returns
259 |         -------
260 |         class
261 |             attributes of clustering
262 | 
263 |         """
264 |         self.weights = weights
265 |         c = self.c_
266 |         m = self.m
267 |         self.u_ = np.empty((X.shape[0], c))
268 |         d = X.shape[1]
269 |         n = X.shape[0]
270 |         V = np.random.random((c, d))
271 |         u_0 = np.random.random((n, c))
272 |         J = np.zeros(0)
273 |         num_iter = 0
274 |         u = u_0
275 |         max_iter = self.max_iter
276 |         threshold = self.threshold
277 |         while num_iter < max_iter - 1:
278 |             u2 = u.copy()
279 |             V, u, Jm, d = update(X, u2, m, weights, n, c)
280 |             J = np.hstack((J, Jm))
281 |             num_iter += 1
282 | 
283 |             if np.linalg.norm(u - u2) < threshold:
284 |                 break
285 | 
286 |         self.error_improvement = np.linalg.norm(u - u_0)
287 |         self.f_p_coeff = np.trace(u.dot(u.T)) / float(n)
288 |         self.cluster_centers = V
289 |         self.fuzzy_partition = u
290 |         self.loss = J
291 | 
292 |         if log_argmax == True:
293 |             self.cluster = np.argmax(self.f_p_coeff, axis = 1)
294 | 
295 | 
296 | 
297 | 


--------------------------------------------------------------------------------