├── .ipynb_checkpoints
    ├── ATest!-checkpoint.ipynb
    ├── CV_BMatrix-checkpoint.ipynb
    ├── Ensemble Model-checkpoint.ipynb
    ├── TFlearnVersion-checkpoint.ipynb
    ├── Untitled-checkpoint.ipynb
    ├── Untitled1-checkpoint.ipynb
    ├── XGBoost-checkpoint.ipynb
    └── testHaarcascades-checkpoint.ipynb
├── ATest!.ipynb
├── CV_BMatrix.ipynb
├── Ensemble Model.ipynb
├── MakeNewData.ipynb
├── Note-Part1.ipynb
├── Note-Part2 (Ensemble).ipynb
├── Preprocess - Should be ran after labeled csv file has been generated.ipynb
├── README.md
├── TFlearnVersion.ipynb
├── XGBoost.ipynb
├── randomLasso.py
├── run_model
    ├── Q_run_AsthmaAcos_NoSmokeAge.py
    ├── Q_run_AsthmaCOPD_NoSmokeAge.py
    ├── Q_run_COPDAcos_NoSmokeAge.py
    ├── rerun_AsthmaAcos.py
    ├── rerun_AsthmaCOPD.py
    └── rerun_COPDAcos.py
├── supporting_files
    ├── __init__.py
    ├── __init__.pyc
    ├── dfs2.py
    ├── dfs2.pyc
    ├── helpers.py
    ├── helpers.pyc
    ├── nncomponents.py
    ├── nncomponents.pyc
    ├── sda.py
    └── sda.pyc
├── weights
    ├── Q_indexes_xgboost_All_AsthmaCOPD.npy
    ├── Q_weights_AsthmaCOPD.npy
    ├── indexes_xgboost.npy
    ├── indexes_xgboost_rerun.npy
    ├── indexes_xgboost_rerun_All.npy
    ├── indexes_xgboost_rerun_All_AsAc.npy
    ├── indexes_xgboost_rerun_All_AsC.npy
    ├── indexes_xgboost_rerun_All_CAc.npy
    ├── weights-0-10-NEW-mean.npy
    ├── weights-10-20-NEW-mean.npy
    ├── weights-20-30-NEW-mean.npy
    ├── weights-NEW-mean.npy
    ├── weights_AsthmaAcos_rerun.npy
    ├── weights_AsthmaCOPD_rerun.npy
    └── weights_COPDAcos_rerun.npy
└── xgboost_result.mat


/.ipynb_checkpoints/CV_BMatrix-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 0
6 | }
7 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Ensemble Model-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn.preprocessing import normalize\n",
 12 |     "import numpy as np\n",
 13 |     "\n",
 14 |     "weights_0_10 = np.load(\"weights-0-10-NEW-mean.npy\")\n",
 15 |     "weights_10_20 = np.load(\"weights-10-20-NEW-mean.npy\")\n",
 16 |     "weights_20_30 = np.load(\"weights-20-30-NEW-mean.npy\")\n",
 17 |     "indexes_xgboost = np.load(\"indexes_xgboost.npy\")\n",
 18 |     "\n",
 19 |     "weights = np.concatenate((weights_0_10, weights_10_20, weights_20_30))\n",
 20 |     "\n",
 21 |     "np.save(\"weights-NEW-mean\", weights)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "from scipy import io as sio\n",
 33 |     "\n",
 34 |     "ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/B_mean_2labels.mat\")\n",
 35 |     "\n",
 36 |     "inputX = ourdata['X']\n",
 37 |     "inputX = normalize(inputX, axis=0)\n",
 38 |     "inputY = ourdata['Y'][0,:]\n",
 39 |     "columnNames = ourdata['columnNames']"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 12,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "weights = abs(weights)\n",
 51 |     "averagedWeight = normalize(weights).sum(axis=0)\n",
 52 |     "indexes_average_dfs = np.argsort(averagedWeight)[::-1]\n",
 53 |     "\n",
 54 |     "def unionDFSfeatures(n):\n",
 55 |     "    indexes_union = []\n",
 56 |     "    for i in xrange(30):\n",
 57 |     "        indexes_union.append(np.argsort(weights[i])[::-1][:n].tolist())\n",
 58 |     "        \n",
 59 |     "    union = reduce(np.union1d, indexes_union).tolist()\n",
 60 |     "    print(\"Number of union features:\", len(union))\n",
 61 |     "    return inputX[:, union], union\n",
 62 |     "\n",
 63 |     "def intersectDFSfeatures(n):\n",
 64 |     "    indexes_intersect = []\n",
 65 |     "    for i in xrange(30):\n",
 66 |     "        indexes_intersect.append(np.argsort(weights[i])[::-1][:n].tolist())\n",
 67 |     "    \n",
 68 |     "    intersected = reduce(np.intersect1d, indexes_intersect).tolist()\n",
 69 |     "    print(\"Number of intersected features:\",len(intersected))\n",
 70 |     "    return inputX[:, intersected]\n",
 71 |     "\n",
 72 |     "def topXGBoostfeatures(a,b):\n",
 73 |     "    return inputX[:, indexes_xgboost.tolist()[a:b]], indexes_xgboost.tolist()[a:b]\n",
 74 |     "\n",
 75 |     "def topAveDFSfeatures(a,b):\n",
 76 |     "    return inputX[:, indexes_average_dfs.tolist()[a:b]], indexes_average_dfs.tolist()[a:b]\n",
 77 |     "\n",
 78 |     "def pickOneDFSfeatures(a,b,n):\n",
 79 |     "    indexx = np.argsort(weights[n])[::-1]\n",
 80 |     "    return inputX[:, indexx.tolist()[a:b]]\n",
 81 |     "\n",
 82 |     "def topDFSTemp(a,b):\n",
 83 |     "    temp = np.argsort(abs(dfsMLP.selected_ws[0]))[::-1]\n",
 84 |     "    return inputX[:, temp.tolist()[a:b]]"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 36,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "('Number of intersected features:', 9)\n",
 99 |       "('Intersect:', 0.97940872141117929)\n",
100 |       "('Number of union features:', 27)\n"
101 |      ]
102 |     },
103 |     {
104 |      "ename": "ValueError",
105 |      "evalue": "Found arrays with inconsistent numbers of samples: [    2 10684]",
106 |      "output_type": "error",
107 |      "traceback": [
108 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
109 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
110 |       "\u001b[0;32m<ipython-input-36-a3eadc27761e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mkeke\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munionDFSfeatures\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msvm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeke\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Union:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
111 |       "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)\u001b[0m\n\u001b[1;32m   1420\u001b[0m         \u001b[0mArray\u001b[0m \u001b[0mof\u001b[0m \u001b[0mscores\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mestimator\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mrun\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcross\u001b[0m \u001b[0mvalidation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1421\u001b[0m     \"\"\"\n\u001b[0;32m-> 1422\u001b[0;31m     \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1424\u001b[0m     \u001b[0mcv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_cv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassifier\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
112 |       "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mindexable\u001b[0;34m(*iterables)\u001b[0m\n\u001b[1;32m    199\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    200\u001b[0m             \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    202\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    203\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
113 |       "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mcheck_consistent_length\u001b[0;34m(*arrays)\u001b[0m\n\u001b[1;32m    174\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    175\u001b[0m         raise ValueError(\"Found arrays with inconsistent numbers of samples: \"\n\u001b[0;32m--> 176\u001b[0;31m                          \"%s\" % str(uniques))\n\u001b[0m\u001b[1;32m    177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    178\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
114 |       "\u001b[0;31mValueError\u001b[0m: Found arrays with inconsistent numbers of samples: [    2 10684]"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "from sklearn.svm import LinearSVC\n",
120 |     "from sklearn.metrics import accuracy_score\n",
121 |     "from sklearn.linear_model import LogisticRegression\n",
122 |     "from sklearn.cross_validation import cross_val_score\n",
123 |     "\n",
124 |     "svm = LinearSVC()\n",
125 |     "\n",
126 |     "keke = intersectDFSfeatures(200)\n",
127 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
128 |     "print(\"Intersect:\", np.mean(scores))\n",
129 |     "\n",
130 |     "keke = unionDFSfeatures(10)\n",
131 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
132 |     "print(\"Union:\", np.mean(scores))\n",
133 |     "\n",
134 |     "keke = topAveDFSfeatures(1,27)\n",
135 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
136 |     "print(\"Ave:\", np.mean(scores))\n",
137 |     "\n",
138 |     "keke = topXGBoostfeatures(0,27)\n",
139 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
140 |     "print(\"XGBoost:\", np.mean(scores))\n",
141 |     "\n",
142 |     "print(\"Pick one TOP DFS features from 30\")\n",
143 |     "for i in xrange(0,30):\n",
144 |     "    keke = pickOneDFSfeatures(0,27,i)\n",
145 |     "    scores = cross_val_score(svm, keke, inputY, cv=5)\n",
146 |     "    print(np.mean(scores))"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 65,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/html": [
159 |        "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tonyabracadabra/2.embed\" height=\"525px\" width=\"100%\"></iframe>"
160 |       ],
161 |       "text/plain": [
162 |        "<plotly.tools.PlotlyDisplay object>"
163 |       ]
164 |      },
165 |      "execution_count": 65,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "import plotly.plotly as py\n",
172 |     "import plotly.graph_objs as go\n",
173 |     "import plotly\n",
174 |     "\n",
175 |     "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n",
176 |     "\n",
177 |     "data = [\n",
178 |     "    go.Heatmap(\n",
179 |     "        z=np.abs(weights)\n",
180 |     "    )\n",
181 |     "]\n",
182 |     "\n",
183 |     "py.iplot(data, filename='30 Weights')"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 91,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [
193 |     {
194 |      "data": {
195 |       "text/plain": [
196 |        "[u'Phe9_2534',\n",
197 |        " u'Phe9_491',\n",
198 |        " u'Phe9_4912',\n",
199 |        " u'Phe9_492',\n",
200 |        " u'Phe9_4928',\n",
201 |        " u'Phe9_493',\n",
202 |        " u'Phe9_4930',\n",
203 |        " u'Phe9_4931',\n",
204 |        " u'Phe9_4932',\n",
205 |        " u'Phe9_4938',\n",
206 |        " u'Phe9_4939',\n",
207 |        " u'Phe9_494',\n",
208 |        " u'Phe9_4940',\n",
209 |        " u'Phe9_496',\n",
210 |        " u'Phe9_5343',\n",
211 |        " u'Phe9_V146',\n",
212 |        " u'Phe10_E236',\n",
213 |        " u'Phe10_I23',\n",
214 |        " u'Phe10_J44',\n",
215 |        " u'Phe10_J449',\n",
216 |        " u'Phe10_J45',\n",
217 |        " u'Phe10_J452',\n",
218 |        " u'Phe10_J453',\n",
219 |        " u'Phe10_J454',\n",
220 |        " u'Phe10_J459',\n",
221 |        " u'Phe10_N08',\n",
222 |        " u'Phe10_S060']"
223 |       ]
224 |      },
225 |      "execution_count": 91,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "indexes_union = []\n",
232 |     "for i in xrange(30):\n",
233 |     "    indexes_union.append(np.argsort(weights[i])[::-1][:10].tolist())\n",
234 |     "union = reduce(np.union1d, indexes_union).tolist()\n",
235 |     "\n",
236 |     "\n",
237 |     "[i[0] for i in columnNames.reshape(7205,)[union]]"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 78,
243 |    "metadata": {
244 |     "collapsed": false
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "def getSelectedFeatureNames(which, topN):\n",
249 |     "    if which == \"Ave\":\n",
250 |     "        indexes = indexes_average_dfs\n",
251 |     "    elif which == \"Union\":\n",
252 |     "        indexes = \"\"\n",
253 |     "    elif which == \"Intersect\"\n",
254 |     "        \n",
255 |     "    featureNames = [i[0] for i in columnNames.reshape(7205,)[indexes[:topN]]]\n",
256 |     "    \n",
257 |     "    return featureNames"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 99,
263 |    "metadata": {
264 |     "collapsed": false
265 |    },
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "('Number of union features:', 1)\n",
272 |       "('Union:', 0.94487121332880197)\n",
273 |       "('Number of union features:', 10)\n",
274 |       "('Union:', 0.94346676284389586)\n",
275 |       "('Number of union features:', 12)\n",
276 |       "('Union:', 0.9591910107244368)\n",
277 |       "('Number of union features:', 17)\n",
278 |       "('Union:', 0.97145272318043541)\n",
279 |       "('Number of union features:', 21)\n",
280 |       "('Union:', 0.97182712357021495)\n",
281 |       "('Number of union features:', 23)\n",
282 |       "('Union:', 0.99204408956348877)\n",
283 |       "('Number of union features:', 26)\n",
284 |       "('Union:', 0.99166968909173525)\n",
285 |       "('Number of union features:', 26)\n",
286 |       "('Union:', 0.99166968909173525)\n",
287 |       "('Number of union features:', 26)\n",
288 |       "('Union:', 0.99166968909173525)\n",
289 |       "('Number of union features:', 27)\n",
290 |       "('Union:', 0.99166960150243777)\n",
291 |       "('Number of union features:', 28)\n",
292 |       "('Union:', 0.99073384134732279)\n",
293 |       "('Number of union features:', 31)\n",
294 |       "('Union:', 0.98418163686619731)\n",
295 |       "('Number of union features:', 31)\n",
296 |       "('Union:', 0.98418163686619731)\n",
297 |       "('Number of union features:', 31)\n",
298 |       "('Union:', 0.98418163686619731)\n",
299 |       "('Number of union features:', 34)\n",
300 |       "('Union:', 0.98446240429717535)\n",
301 |       "('Number of union features:', 36)\n",
302 |       "('Union:', 0.98474339076287709)\n",
303 |       "('Number of union features:', 40)\n",
304 |       "('Union:', 0.98596013738369381)\n",
305 |       "('Number of union features:', 45)\n",
306 |       "('Union:', 0.98558578076806957)\n",
307 |       "('Number of union features:', 51)\n",
308 |       "('Union:', 0.98876816262448164)\n",
309 |       "('Number of union features:', 55)\n",
310 |       "('Union:', 0.98895538476792955)\n",
311 |       "('Number of union features:', 61)\n",
312 |       "('Union:', 0.98876816262448164)\n",
313 |       "('Number of union features:', 67)\n",
314 |       "('Union:', 0.98839398118745181)\n",
315 |       "('Number of union features:', 73)\n",
316 |       "('Union:', 0.9883001730500558)\n",
317 |       "('Number of union features:', 77)\n",
318 |       "('Union:', 0.98801936180393546)\n",
319 |       "('Number of union features:', 86)\n",
320 |       "('Union:', 0.98783209588633247)\n",
321 |       "('Number of union features:', 89)\n",
322 |       "('Union:', 0.98811295098858187)\n",
323 |       "('Number of union features:', 102)\n",
324 |       "('Union:', 0.98596048790483126)\n",
325 |       "('Number of union features:', 108)\n",
326 |       "('Union:', 0.98567980810413758)\n",
327 |       "('Number of union features:', 120)\n",
328 |       "('Union:', 0.98418233778551067)\n",
329 |       "('Number of union features:', 129)\n",
330 |       "('Union:', 0.98277810621236728)\n",
331 |       "('Number of union features:', 139)\n",
332 |       "('Union:', 0.98193567247400682)\n",
333 |       "('Number of union features:', 151)\n",
334 |       "('Union:', 0.98184238987239447)\n",
335 |       "('Number of union features:', 162)\n",
336 |       "('Union:', 0.98109350142156404)\n",
337 |       "('Number of union features:', 173)\n",
338 |       "('Union:', 0.98090641072354268)\n",
339 |       "('Number of union features:', 182)\n",
340 |       "('Union:', 0.9806255118471382)\n",
341 |       "('Number of union features:', 192)\n",
342 |       "('Union:', 0.97968944506800215)\n",
343 |       "('Number of union features:', 202)\n",
344 |       "('Union:', 0.97997030012926456)\n",
345 |       "('Number of union features:', 214)\n",
346 |       "('Union:', 0.97940863386286914)\n",
347 |       "('Number of union features:', 224)\n",
348 |       "('Union:', 0.9790342334321025)\n",
349 |       "('Number of union features:', 234)\n",
350 |       "('Union:', 0.97837897798106077)\n",
351 |       "('Number of union features:', 242)\n",
352 |       "('Union:', 0.97809829809839288)\n",
353 |       "('Number of union features:', 255)\n",
354 |       "('Union:', 0.97837889035077619)\n",
355 |       "('Number of union features:', 265)\n",
356 |       "('Union:', 0.97791085700219527)\n",
357 |       "('Number of union features:', 269)\n",
358 |       "('Union:', 0.9780979915153587)\n",
359 |       "('Number of union features:', 275)\n",
360 |       "('Union:', 0.9780979915153587)\n",
361 |       "('Number of union features:', 280)\n",
362 |       "('Union:', 0.9776299143516356)\n",
363 |       "('Number of union features:', 289)\n",
364 |       "('Union:', 0.97781744303713047)\n",
365 |       "('Number of union features:', 297)\n",
366 |       "('Union:', 0.97706864221658452)\n",
367 |       "('Number of union features:', 304)\n",
368 |       "('Union:', 0.97716231894954109)\n",
369 |       "('Number of union features:', 309)\n",
370 |       "('Union:', 0.97678783088849008)\n",
371 |       "('Number of union features:', 318)\n",
372 |       "('Union:', 0.97660078396462391)\n",
373 |       "('Number of union features:', 322)\n",
374 |       "('Union:', 0.97669424174483088)\n",
375 |       "('Number of union features:', 325)\n",
376 |       "('Union:', 0.97688159525173113)\n",
377 |       "('Number of union features:', 331)\n",
378 |       "('Union:', 0.97697509680609307)\n",
379 |       "('Number of union features:', 340)\n",
380 |       "('Union:', 0.97631975376575397)\n",
381 |       "('Number of union features:', 347)\n",
382 |       "('Union:', 0.97660069641631364)\n",
383 |       "('Number of union features:', 356)\n",
384 |       "('Union:', 0.97678804988222689)\n",
385 |       "('Number of union features:', 369)\n",
386 |       "('Union:', 0.97678796229292963)\n",
387 |       "('Number of union features:', 375)\n",
388 |       "('Union:', 0.97632001653364586)\n",
389 |       "('Number of union features:', 386)\n",
390 |       "('Union:', 0.97697522825151961)\n",
391 |       "('Number of union features:', 393)\n",
392 |       "('Union:', 0.97688150766243387)\n",
393 |       "('Number of union features:', 400)\n",
394 |       "('Union:', 0.97716231890855398)\n",
395 |       "('Number of union features:', 407)\n",
396 |       "('Union:', 0.97706877353904975)\n",
397 |       "('Number of union features:', 410)\n",
398 |       "('Union:', 0.97706872976489456)\n",
399 |       "('Number of union features:', 419)\n",
400 |       "('Union:', 0.97678796229292963)\n",
401 |       "('Number of union features:', 422)\n",
402 |       "('Union:', 0.97697509680609307)\n",
403 |       "('Number of union features:', 433)\n",
404 |       "('Union:', 0.9766944169644125)\n",
405 |       "('Number of union features:', 442)\n",
406 |       "('Union:', 0.976787787114335)\n",
407 |       "('Number of union features:', 446)\n",
408 |       "('Union:', 0.97660060882701638)\n",
409 |       "('Number of union features:', 451)\n",
410 |       "('Union:', 0.97669415419652061)\n",
411 |       "('Number of union features:', 457)\n",
412 |       "('Union:', 0.97669411042236542)\n",
413 |       "('Number of union features:', 460)\n",
414 |       "('Union:', 0.97669406660722335)\n",
415 |       "('Number of union features:', 467)\n",
416 |       "('Union:', 0.97678761201771458)\n",
417 |       "('Number of union features:', 475)\n",
418 |       "('Union:', 0.9765069320940597)\n",
419 |       "('Number of union features:', 482)\n",
420 |       "('Union:', 0.97669402287405516)\n",
421 |       "('Number of union features:', 487)\n",
422 |       "('Union:', 0.97660052127870611)\n",
423 |       "('Number of union features:', 496)\n",
424 |       "('Union:', 0.97622607711477138)\n",
425 |       "('Number of union features:', 506)\n",
426 |       "('Union:', 0.97613248793012508)\n",
427 |       "('Number of union features:', 517)\n",
428 |       "('Union:', 0.9762260770737845)\n",
429 |       "('Number of union features:', 530)\n",
430 |       "('Union:', 0.97575804372520347)\n",
431 |       "('Number of union features:', 539)\n",
432 |       "('Union:', 0.97538381851401856)\n",
433 |       "('Number of union features:', 550)\n",
434 |       "('Union:', 0.97547740761669088)\n",
435 |       "('Number of union features:', 559)\n",
436 |       "('Union:', 0.97547745143183295)\n",
437 |       "('Number of union features:', 572)\n",
438 |       "('Union:', 0.97547740761669088)\n",
439 |       "('Number of union features:', 581)\n",
440 |       "('Union:', 0.97538377465788939)\n",
441 |       "('Number of union features:', 595)\n",
442 |       "('Union:', 0.97547740765767776)\n",
443 |       "('Number of union features:', 605)\n",
444 |       "('Union:', 0.97529014174007478)\n",
445 |       "('Number of union features:', 620)\n",
446 |       "('Union:', 0.97538377465788939)\n",
447 |       "('Number of union features:', 630)\n",
448 |       "('Union:', 0.97491591648790299)\n",
449 |       "('Number of union features:', 638)\n",
450 |       "('Union:', 0.97519681536430747)\n",
451 |       "('Number of union features:', 647)\n",
452 |       "('Union:', 0.97529031691866952)\n",
453 |       "('Number of union features:', 656)\n",
454 |       "('Union:', 0.97538390606232883)\n",
455 |       "('Number of union features:', 666)\n",
456 |       "('Union:', 0.97510318240550597)\n",
457 |       "('Number of union features:', 677)\n",
458 |       "('Union:', 0.97482241497452793)\n",
459 |       "('Number of union features:', 692)\n",
460 |       "('Union:', 0.97510300726789834)\n",
461 |       "('Number of union features:', 705)\n",
462 |       "('Union:', 0.97482232738523056)\n",
463 |       "('Number of union features:', 713)\n",
464 |       "('Union:', 0.97472873824157136)\n",
465 |       "('Number of union features:', 724)\n",
466 |       "('Union:', 0.97444788318030895)\n",
467 |       "('Number of union features:', 738)\n",
468 |       "('Union:', 0.97444792699545124)\n"
469 |      ]
470 |     }
471 |    ],
472 |    "source": [
473 |     "scores_union = []\n",
474 |     "for i in xrange(1,100):\n",
475 |     "    keke = unionDFSfeatures(i)\n",
476 |     "    scores_union.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))\n",
477 |     "    print(\"Union:\", scores_union[-1])"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": 29,
483 |    "metadata": {
484 |     "collapsed": false
485 |    },
486 |    "outputs": [
487 |     {
488 |      "name": "stdout",
489 |      "output_type": "stream",
490 |      "text": [
491 |       "('Number of union features:', 23)\n"
492 |      ]
493 |     }
494 |    ],
495 |    "source": [
496 |     "_, union = unionDFSfeatures(6)"
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "code",
501 |    "execution_count": 59,
502 |    "metadata": {
503 |     "collapsed": false
504 |    },
505 |    "outputs": [
506 |     {
507 |      "name": "stdout",
508 |      "output_type": "stream",
509 |      "text": [
510 |       "1\n",
511 |       "2\n"
512 |      ]
513 |     },
514 |     {
515 |      "ename": "IndexError",
516 |      "evalue": "list index out of range",
517 |      "output_type": "error",
518 |      "traceback": [
519 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
520 |       "\u001b[0;31mIndexError\u001b[0m                                Traceback (most recent call last)",
521 |       "\u001b[0;32m<ipython-input-59-844464ca0bf2>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      7\u001b[0m     \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtopXGBoostfeatures\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      8\u001b[0m     \u001b[0mtemp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintersect1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mave\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mxg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m     \u001b[0;32mif\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnum\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     10\u001b[0m         \u001b[0mnum\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     11\u001b[0m         \u001b[0mkeke\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
522 |       "\u001b[0;31mIndexError\u001b[0m: list index out of range"
523 |      ]
524 |     }
525 |    ],
526 |    "source": [
527 |     "ave, xg = 0, 0\n",
528 |     "num = []\n",
529 |     "acc = []\n",
530 |     "for i in xrange(1,100):\n",
531 |     "    print i\n",
532 |     "    _, ave = topAveDFSfeatures(0,i)\n",
533 |     "    _, xg = topXGBoostfeatures(0,i)\n",
534 |     "    temp = np.intersect1d(ave,xg)\n",
535 |     "    if temp.shape[0] > 0:\n",
536 |     "        num.append(temp.shape[0])\n",
537 |     "        keke = inputX[:,temp.tolist()]\n",
538 |     "        acc.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))"
539 |    ]
540 |   },
541 |   {
542 |    "cell_type": "code",
543 |    "execution_count": 58,
544 |    "metadata": {
545 |     "collapsed": false
546 |    },
547 |    "outputs": [
548 |     {
549 |      "data": {
550 |       "text/plain": [
551 |        "[0.94992524665396139,\n",
552 |        " 0.94992524665396139,\n",
553 |        " 0.94992524665396139,\n",
554 |        " 0.94992524665396139,\n",
555 |        " 0.94992524665396139,\n",
556 |        " 0.94393448928154644,\n",
557 |        " 0.95994038105985702,\n",
558 |        " 0.95816201190581274,\n",
559 |        " 0.95984692323866305,\n",
560 |        " 0.95984692323866305,\n",
561 |        " 0.96012760312133083,\n",
562 |        " 0.96555599322606267,\n",
563 |        " 0.96555599322606267,\n",
564 |        " 0.96555599322606267,\n",
565 |        " 0.96555599322606267,\n",
566 |        " 0.96555599322606267,\n",
567 |        " 0.96555599322606267,\n",
568 |        " 0.96555599322606267,\n",
569 |        " 0.96555599322606267,\n",
570 |        " 0.96555599322606267,\n",
571 |        " 0.98109310729021981,\n",
572 |        " 0.98109310729021981,\n",
573 |        " 0.98109310729021981,\n",
574 |        " 0.98109310729021981,\n",
575 |        " 0.98109310729021981,\n",
576 |        " 0.98109310729021981,\n",
577 |        " 0.98118660884458198,\n",
578 |        " 0.9806250301674837,\n",
579 |        " 0.98062511775678107,\n",
580 |        " 0.97940823973152469,\n",
581 |        " 0.97940823973152469,\n",
582 |        " 0.97940823973152469,\n",
583 |        " 0.97940823973152469,\n",
584 |        " 0.97940823973152469,\n",
585 |        " 0.97940823973152469,\n",
586 |        " 0.97978303425264845,\n",
587 |        " 0.97978303425264845,\n",
588 |        " 0.97978303425264845,\n",
589 |        " 0.97978303425264845,\n",
590 |        " 0.97978303425264845,\n",
591 |        " 0.97978307810877774,\n",
592 |        " 0.97978307810877774,\n",
593 |        " 0.97978307810877774,\n",
594 |        " 0.97950217923237326,\n",
595 |        " 0.97950217923237326,\n",
596 |        " 0.97950217923237326,\n",
597 |        " 0.97950217923237326,\n",
598 |        " 0.97950217923237326,\n",
599 |        " 0.97950217923237326,\n",
600 |        " 0.97950217923237326,\n",
601 |        " 0.97950217923237326,\n",
602 |        " 0.97950217923237326,\n",
603 |        " 0.97950217923237326,\n",
604 |        " 0.97950217923237326,\n",
605 |        " 0.97950217923237326,\n",
606 |        " 0.97950217923237326,\n",
607 |        " 0.97950217923237326,\n",
608 |        " 0.97950217923237326,\n",
609 |        " 0.97950217923237326,\n",
610 |        " 0.97950217923237326,\n",
611 |        " 0.97950217923237326,\n",
612 |        " 0.97950217923237326,\n",
613 |        " 0.97950217923237326,\n",
614 |        " 0.97950217923237326,\n",
615 |        " 0.97950217923237326,\n",
616 |        " 0.97950217923237326,\n",
617 |        " 0.97950217923237326,\n",
618 |        " 0.97950217923237326,\n",
619 |        " 0.97950217923237326,\n",
620 |        " 0.97622585791609962,\n",
621 |        " 0.97622585791609962,\n",
622 |        " 0.97622585791609962,\n",
623 |        " 0.97622585791609962,\n",
624 |        " 0.97622585791609962,\n",
625 |        " 0.97622585791609962,\n",
626 |        " 0.97622585791609962,\n",
627 |        " 0.97622585791609962,\n",
628 |        " 0.97622585791609962,\n",
629 |        " 0.97622585791609962,\n",
630 |        " 0.97622585791609962,\n",
631 |        " 0.97622585791609962,\n",
632 |        " 0.97622585791609962,\n",
633 |        " 0.97622585791609962,\n",
634 |        " 0.97622585791609962,\n",
635 |        " 0.97622585791609962,\n",
636 |        " 0.97622585791609962,\n",
637 |        " 0.97622585791609962,\n",
638 |        " 0.97622585791609962,\n",
639 |        " 0.97622585791609962,\n",
640 |        " 0.97622585791609962,\n",
641 |        " 0.97734914679769658,\n",
642 |        " 0.97734914679769658,\n",
643 |        " 0.97734914679769658,\n",
644 |        " 0.97734914679769658,\n",
645 |        " 0.97734914679769658,\n",
646 |        " 0.97734914679769658,\n",
647 |        " 0.97734914679769658,\n",
648 |        " 0.97734914679769658]"
649 |       ]
650 |      },
651 |      "execution_count": 58,
652 |      "metadata": {},
653 |      "output_type": "execute_result"
654 |     }
655 |    ],
656 |    "source": [
657 |     "acc"
658 |    ]
659 |   },
660 |   {
661 |    "cell_type": "code",
662 |    "execution_count": 53,
663 |    "metadata": {
664 |     "collapsed": false
665 |    },
666 |    "outputs": [
667 |     {
668 |      "data": {
669 |       "text/plain": [
670 |        "0.98109310729021981"
671 |       ]
672 |      },
673 |      "execution_count": 53,
674 |      "metadata": {},
675 |      "output_type": "execute_result"
676 |     }
677 |    ],
678 |    "source": []
679 |   },
680 |   {
681 |    "cell_type": "code",
682 |    "execution_count": 54,
683 |    "metadata": {
684 |     "collapsed": false
685 |    },
686 |    "outputs": [
687 |     {
688 |      "data": {
689 |       "text/plain": [
690 |        "(9,)"
691 |       ]
692 |      },
693 |      "execution_count": 54,
694 |      "metadata": {},
695 |      "output_type": "execute_result"
696 |     }
697 |    ],
698 |    "source": [
699 |     "temp.shape"
700 |    ]
701 |   }
702 |  ],
703 |  "metadata": {
704 |   "kernelspec": {
705 |    "display_name": "Python 2",
706 |    "language": "python",
707 |    "name": "python2"
708 |   },
709 |   "language_info": {
710 |    "codemirror_mode": {
711 |     "name": "ipython",
712 |     "version": 2
713 |    },
714 |    "file_extension": ".py",
715 |    "mimetype": "text/x-python",
716 |    "name": "python",
717 |    "nbconvert_exporter": "python",
718 |    "pygments_lexer": "ipython2",
719 |    "version": "2.7.12"
720 |   }
721 |  },
722 |  "nbformat": 4,
723 |  "nbformat_minor": 0
724 | }
725 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/TFlearnVersion-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn import datasets\n",
 12 |     "from sklearn.cross_validation import train_test_split\n",
 13 |     "from scipy import io as sio\n",
 14 |     "from tensorflow.python.framework import ops\n",
 15 |     "import numpy as np\n",
 16 |     "from sklearn.datasets import make_classification\n",
 17 |     "from sklearn.preprocessing import normalize\n",
 18 |     "import tflearn\n",
 19 |     "import tensorflow as tf\n",
 20 |     "from nncomponents import One2OneInputLayer\n",
 21 |     "\n",
 22 |     "ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n",
 23 |     "# ourdataB = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n",
 24 |     "\n",
 25 |     "inputX = ourdataB['X']\n",
 26 |     "inputX = normalize(inputX, axis=0)\n",
 27 |     "inputY = ourdataB['Y'][0,:]\n",
 28 |     "columnNames = ourdataB['columnNames']\n",
 29 |     "\n",
 30 |     "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)\n",
 31 |     "Y_train, Y_test = tflearn.data_utils.to_categorical(y_train, 2), tflearn.data_utils.to_categorical(y_test, 2)\n",
 32 |     "\n",
 33 |     "indexes = sio.loadmat(\"xgboost_result\")['importance_rank']\n",
 34 |     "\n",
 35 |     "X_train500, X_test500 = X_train[:, indexes.tolist()[0][:500]], X_test[:, indexes.tolist()[0][:500]]\n",
 36 |     "X_train100, X_test100 = X_train[:, indexes.tolist()[0][:100]], X_test[:, indexes.tolist()[0][:100]]\n",
 37 |     "X_train10, X_test10 = X_train[:, indexes.tolist()[0][:10]], X_test[:, indexes.tolist()[0][:10]]\n",
 38 |     "X_train50, X_test50 = X_train[:, indexes.tolist()[0][:50]], X_test[:, indexes.tolist()[0][:50]]"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 54,
 44 |    "metadata": {
 45 |     "collapsed": false
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "def dfs(lambda1, n_epoch, size=None):\n",
 50 |     "    with tf.Graph().as_default():\n",
 51 |     "        sess = tf.Session()\n",
 52 |     "        \n",
 53 |     "        if size is None:\n",
 54 |     "            size = 7203\n",
 55 |     "\n",
 56 |     "        input_data = tflearn.input_data(shape=[None, size])\n",
 57 |     "        input_layer = One2OneInputLayer(input_data)\n",
 58 |     "\n",
 59 |     "        tflearn.helpers.regularizer.add_weights_regularizer(input_layer.w, loss='L1', \\\n",
 60 |     "                                                weight_decay=lambda1, add_to_collection=None)\n",
 61 |     "\n",
 62 |     "        dense = tflearn.fully_connected(input_layer.output, 50, activation='tanh')\n",
 63 |     "        \n",
 64 |     "        sofmax = tflearn.fully_connected(dense, 2, activation='softmax')\n",
 65 |     "        \n",
 66 |     "        net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n",
 67 |     "        model = tflearn.DNN(net)\n",
 68 |     "\n",
 69 |     "        sess.run(tf.initialize_all_variables())\n",
 70 |     "        \n",
 71 |     "#         variables = tflearn.variables.get_all_trainable_variable()\n",
 72 |     "#         for i in xrange(1,4):\n",
 73 |     "#             sess.run(variables[i].assign(initial_values[i]))\n",
 74 |     "        if size == 500:\n",
 75 |     "            X_train, X_test = X_train500, X_test500\n",
 76 |     "        elif size == 100:\n",
 77 |     "            X_train, X_test = X_train100, X_test100\n",
 78 |     "        elif size == 50:\n",
 79 |     "            X_train, X_test = X_train50, X_test50\n",
 80 |     "        elif size == 10:\n",
 81 |     "            X_train, X_test = X_train10, X_test10\n",
 82 |     "            \n",
 83 |     "        model.fit(X_train, Y_train, n_epoch=n_epoch, show_metric=True, validation_set=(X_test, Y_test), batch_size=50)\n",
 84 |     "\n",
 85 |     "        selected_w = sess.run(input_layer.w)\n",
 86 |     "\n",
 87 |     "        return selected_w"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 57,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "Training Step: 3419  | total loss: \u001b[1m\u001b[32m0.10726\u001b[0m\u001b[0m\n",
102 |       "\u001b[2K\r",
103 |       "| Adam | epoch: 019 | loss: 0.10726 - acc: 0.9731 -- iter: 8500/8547\n"
104 |      ]
105 |     },
106 |     {
107 |      "ename": "KeyboardInterrupt",
108 |      "evalue": "",
109 |      "output_type": "error",
110 |      "traceback": [
111 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
112 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
113 |       "\u001b[0;32m<ipython-input-57-642171fe1d77>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlambda1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mlambda1\u001b[0m \u001b[0;34m/=\u001b[0m \u001b[0;36m10000.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mweights\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m500\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
114 |       "\u001b[0;32m<ipython-input-54-97feb5d763f8>\u001b[0m in \u001b[0;36mdfs\u001b[0;34m(lambda1, n_epoch, size)\u001b[0m\n\u001b[1;32m     32\u001b[0m             \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_train10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_metric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_set\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m         \u001b[0mselected_w\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_layer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
115 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/models/dnn.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X_inputs, Y_targets, n_epoch, validation_set, show_metric, batch_size, shuffle, snapshot_epoch, snapshot_step, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m    187\u001b[0m                          \u001b[0mdaug_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdaug_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    188\u001b[0m                          \u001b[0mexcl_trainops\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexcl_trainops\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m                          run_id=run_id)\n\u001b[0m\u001b[1;32m    190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    191\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
116 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m    282\u001b[0m                                                        \u001b[0msnapshot_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    283\u001b[0m                                                        \u001b[0msnapshot_step\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m                                                        show_metric)\n\u001b[0m\u001b[1;32m    285\u001b[0m                             \u001b[0mglobal_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    286\u001b[0m                             \u001b[0;32mif\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macc_value\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mglobal_acc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
117 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, training_step, snapshot_epoch, snapshot_step, show_metric)\u001b[0m\n\u001b[1;32m    722\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    723\u001b[0m                 \u001b[0meval_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 724\u001b[0;31m             \u001b[0me\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluate_flow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_ops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_dflow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    725\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    726\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
118 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mevaluate_flow\u001b[0;34m(session, ops_to_evaluate, dataflow)\u001b[0m\n\u001b[1;32m    846\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    847\u001b[0m                 \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mcurrent_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m             \u001b[0mfeed_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    849\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_samples\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    850\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
119 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/data_flow.pyc\u001b[0m in \u001b[0;36mnext\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    126\u001b[0m         \"\"\"\n\u001b[1;32m    127\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata_status\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed_dict_queue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    130\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_status\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
120 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/Queue.pyc\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m    166\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    167\u001b[0m                 \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 168\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_empty\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    169\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    170\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
121 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/threading.pyc\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    338\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    339\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 340\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    341\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    342\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_note\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s.wait(): got it\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
122 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "weights = []\n",
128 |     "for lambda1 in xrange(0, 50, 5):\n",
129 |     "    lambda1 /= 10000.\n",
130 |     "    weights.append(dfs(0, 20, 500))"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 34,
136 |    "metadata": {
137 |     "collapsed": false
138 |    },
139 |    "outputs": [
140 |     {
141 |      "ename": "NameError",
142 |      "evalue": "name 'model' is not defined",
143 |      "output_type": "error",
144 |      "traceback": [
145 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
146 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
147 |       "\u001b[0;32m<ipython-input-34-791a71ed118a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
148 |       "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
149 |      ]
150 |     }
151 |    ],
152 |    "source": [
153 |     "type(model)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 37,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "def get_inits():\n",
165 |     "    with tf.Graph().as_default():\n",
166 |     "        sess = tf.Session()\n",
167 |     "\n",
168 |     "        input_data = tflearn.input_data(shape=[None, 7203])\n",
169 |     "        input_layer = One2OneInputLayer(input_data)\n",
170 |     "\n",
171 |     "        dense = tflearn.fully_connected(input_layer.output, 500, activation='tanh', name='dense')\n",
172 |     "        sofmax = tflearn.fully_connected(dense, 2, activation='softmax', name='sofmax')\n",
173 |     "        net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n",
174 |     "        model = tflearn.DNN(net)\n",
175 |     "        \n",
176 |     "        print(type(model))\n",
177 |     "\n",
178 |     "        sess.run(tf.initialize_all_variables())\n",
179 |     "\n",
180 |     "#         model.fit(X_train, Y_train, n_epoch=10, show_metric=True, validation_set=(X_test, Y_test))\n",
181 |     "\n",
182 |     "        variables = tflearn.variables.get_all_trainable_variable()\n",
183 |     "        \n",
184 |     "        values = []\n",
185 |     "        for i in xrange(4):\n",
186 |     "            values.append(sess.run(variables[i]))\n",
187 |     "\n",
188 |     "        return values"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": 38,
194 |    "metadata": {
195 |     "collapsed": false
196 |    },
197 |    "outputs": [
198 |     {
199 |      "name": "stdout",
200 |      "output_type": "stream",
201 |      "text": [
202 |       "<class 'tflearn.models.dnn.DNN'>\n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "initial_values = get_inits()"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": null,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [],
217 |    "source": [
218 |     "initial_values[0]"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": false
226 |    },
227 |    "outputs": [],
228 |    "source": [
229 |     "tflearn.input_data(tf.Variable(initial_values[0]))"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "metadata": {
236 |     "collapsed": false
237 |    },
238 |    "outputs": [],
239 |    "source": [
240 |     "dense_vars = tflearn.variables.get_all_variables()\n",
241 |     "print(\"Dense1 layer weights:\")\n",
242 |     "print(model.get_weights(dense_vars[0]))\n",
243 |     "# Or using generic tflearn function:\n",
244 |     "print(\"Dense1 layer biases:\")\n",
245 |     "with model.session.as_default():\n",
246 |     "    print(tflearn.variables.get_value(dense_vars[1]))"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": null,
252 |    "metadata": {
253 |     "collapsed": false
254 |    },
255 |    "outputs": [],
256 |    "source": [
257 |     "dense_vars"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 32,
263 |    "metadata": {
264 |     "collapsed": false
265 |    },
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~tonyabracadabra/0 or inside your plot.ly account where it is named 'basic-heatmap'\n"
272 |      ]
273 |     },
274 |     {
275 |      "data": {
276 |       "text/html": [
277 |        "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tonyabracadabra/0.embed\" height=\"525px\" width=\"100%\"></iframe>"
278 |       ],
279 |       "text/plain": [
280 |        "<plotly.tools.PlotlyDisplay object>"
281 |       ]
282 |      },
283 |      "execution_count": 32,
284 |      "metadata": {},
285 |      "output_type": "execute_result"
286 |     }
287 |    ],
288 |    "source": [
289 |     "import plotly.plotly as py\n",
290 |     "import plotly.graph_objs as go\n",
291 |     "import plotly\n",
292 |     "\n",
293 |     "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n",
294 |     "\n",
295 |     "data = [\n",
296 |     "    go.Heatmap(\n",
297 |     "        z=np.abs(weights)\n",
298 |     "    )\n",
299 |     "]\n",
300 |     "\n",
301 |     "py.iplot(data, filename='basic-heatmap')"
302 |    ]
303 |   },
304 |   {
305 |    "cell_type": "code",
306 |    "execution_count": null,
307 |    "metadata": {
308 |     "collapsed": false
309 |    },
310 |    "outputs": [],
311 |    "source": [
312 |     "type(inputlayer)"
313 |    ]
314 |   }
315 |  ],
316 |  "metadata": {
317 |   "kernelspec": {
318 |    "display_name": "Python 2",
319 |    "language": "python",
320 |    "name": "python2"
321 |   },
322 |   "language_info": {
323 |    "codemirror_mode": {
324 |     "name": "ipython",
325 |     "version": 2
326 |    },
327 |    "file_extension": ".py",
328 |    "mimetype": "text/x-python",
329 |    "name": "python",
330 |    "nbconvert_exporter": "python",
331 |    "pygments_lexer": "ipython2",
332 |    "version": "2.7.11"
333 |   }
334 |  },
335 |  "nbformat": 4,
336 |  "nbformat_minor": 0
337 | }
338 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd\n",
 12 |     "import os\n",
 13 |     "\n",
 14 |     "os.chdir(\"/Users/xupeng.tong/Documents/Data/OriginalData\")\n",
 15 |     "\n",
 16 |     "df = pd.read_csv(\"QMatrix_label.csv\")"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 4,
 22 |    "metadata": {
 23 |     "collapsed": false
 24 |    },
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "df.drop(['Unnamed: 0'], axis = 1 , inplace= True,errors= 'ignore')\n",
 28 |     "labels = df[\"PatientID\"]"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 7,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import numpy as np\n",
 40 |     "\n",
 41 |     "mapping = {j:i for i,j in enumerate(np.unique(labels))}\n",
 42 |     "numericLabels = np.array([mapping[i] for i in labels])"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 13,
 48 |    "metadata": {
 49 |     "collapsed": false
 50 |    },
 51 |    "outputs": [
 52 |     {
 53 |      "data": {
 54 |       "text/plain": [
 55 |        "6096"
 56 |       ]
 57 |      },
 58 |      "execution_count": 13,
 59 |      "metadata": {},
 60 |      "output_type": "execute_result"
 61 |     }
 62 |    ],
 63 |    "source": [
 64 |     "len(numericLabels[numericLabels==1])"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 25,
 70 |    "metadata": {
 71 |     "collapsed": true
 72 |    },
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from sklearn.preprocessing import Imputer\n",
 76 |     "\n",
 77 |     "imp = Imputer(missing_values='NaN', strategy='median', axis=0)\n",
 78 |     "imp.fit(dataToBeImputed)\n",
 79 |     "imputedData = imp.transform(dataToBeImputed)"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 17,
 85 |    "metadata": {
 86 |     "collapsed": false
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "subgroups = df[\"Smoking_Sub_Group\"].values\n",
 91 |     "\n",
 92 |     "mappingSubgroup = {j:i for i,j in enumerate(np.unique(subgroups))}\n",
 93 |     "numericSubgroups = np.array([mappingSubgroup[i] for i in subgroups])"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 19,
 99 |    "metadata": {
100 |     "collapsed": false
101 |    },
102 |    "outputs": [],
103 |    "source": [
104 |     "df[\"Smoking_Sub_Group\"] = numericSubgroups"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": 23,
110 |    "metadata": {
111 |     "collapsed": false
112 |    },
113 |    "outputs": [],
114 |    "source": [
115 |     "dataToBeImputed = df.values"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 27,
121 |    "metadata": {
122 |     "collapsed": false
123 |    },
124 |    "outputs": [],
125 |    "source": [
126 |     "import scipy.io as sio\n",
127 |     "\n",
128 |     "sio.savemat(\"ourdataQ_3labels_unstandardized\", {'X':imputedData,'Y':numericLabels, 'columnNames':df.columns.values})"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 28,
134 |    "metadata": {
135 |     "collapsed": true
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "from sklearn.preprocessing import scale\n",
140 |     "\n",
141 |     "imputedData = scale(imputedData)"
142 |    ]
143 |   },
144 |   {
145 |    "cell_type": "code",
146 |    "execution_count": 30,
147 |    "metadata": {
148 |     "collapsed": false
149 |    },
150 |    "outputs": [],
151 |    "source": [
152 |     "sio.savemat(\"ourdataQ_3labels_standardized\", {'X':imputedData,'Y':numericLabels, 'columnNames':df.columns.values})"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 41,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": [
163 |     "index1 = np.argwhere(numericLabels==1).reshape(6096,).tolist()"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 43,
169 |    "metadata": {
170 |     "collapsed": true
171 |    },
172 |    "outputs": [],
173 |    "source": [
174 |     "index2 = np.argwhere(numericLabels==2).reshape(4593,).tolist()"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 47,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [
184 |     {
185 |      "data": {
186 |       "text/plain": [
187 |        "array([1, 1, 1, ..., 2, 2, 2])"
188 |       ]
189 |      },
190 |      "execution_count": 47,
191 |      "metadata": {},
192 |      "output_type": "execute_result"
193 |     }
194 |    ],
195 |    "source": [
196 |     "numericLabels[index1+index2]"
197 |    ]
198 |   },
199 |   {
200 |    "cell_type": "code",
201 |    "execution_count": 57,
202 |    "metadata": {
203 |     "collapsed": false
204 |    },
205 |    "outputs": [],
206 |    "source": [
207 |     "no0 = [i for i,j in enumerate(numericLabels) if j != 0]"
208 |    ]
209 |   },
210 |   {
211 |    "cell_type": "code",
212 |    "execution_count": 58,
213 |    "metadata": {
214 |     "collapsed": false
215 |    },
216 |    "outputs": [],
217 |    "source": [
218 |     "sio.savemat(\"Q_2labels_standardized\", {'X':imputedData[no0],'Y':numericLabels[no0], 'columnNames':df.columns.values})"
219 |    ]
220 |   }
221 |  ],
222 |  "metadata": {
223 |   "kernelspec": {
224 |    "display_name": "Python 2",
225 |    "language": "python",
226 |    "name": "python2"
227 |   },
228 |   "language_info": {
229 |    "codemirror_mode": {
230 |     "name": "ipython",
231 |     "version": 2
232 |    },
233 |    "file_extension": ".py",
234 |    "mimetype": "text/x-python",
235 |    "name": "python",
236 |    "nbconvert_exporter": "python",
237 |    "pygments_lexer": "ipython2",
238 |    "version": "2.7.12"
239 |   }
240 |  },
241 |  "nbformat": 4,
242 |  "nbformat_minor": 0
243 | }
244 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/Untitled1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 0
6 | }
7 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/XGBoost-checkpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "from sklearn import datasets\n",
12 |     "from sklearn.cross_validation import train_test_split\n",
13 |     "from scipy import io as sio\n",
14 |     "from tensorflow.python.framework import ops\n",
15 |     "from dfs2 import DeepFeatureSelectionNew\n",
16 |     "import numpy as np\n",
17 |     "from sklearn.datasets import make_classification\n",
18 |     "from sklearn.preprocessing import normalize\n",
19 |     "\n",
20 |     "# ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n",
21 |     "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n",
22 |     "ourdata = sio.loadmat(\"./B_mean_2labels.mat\")\n",
23 |     "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/Q_2labels_unstandardized.mat\")\n",
24 |     "\n",
25 |     "inputX = ourdata['X']\n",
26 |     "inputX = normalize(inputX, axis=0)\n",
27 |     "inputY = ourdata['Y'][0,:]\n",
28 |     "columnNames = ourdata['columnNames']\n",
29 |     "\n",
30 |     "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "code",
35 |    "execution_count": null,
36 |    "metadata": {
37 |     "collapsed": true
38 |    },
39 |    "outputs": [],
40 |    "source": [
41 |     "from sklearn.ensemble import RandomForestClassifier\n",
42 |     "from sklearn.metrics import accuracy_score\n",
43 |     "import xgboost as xgb\n",
44 |     "\n",
45 |     "# rf = RandomForestClassifier(criterion=\"entropy\", n_estimators = 300, max_depth = 100)\n",
46 |     "# rf.fit(X_train, y_train)\n",
47 |     "\n",
48 |     "# y_pred = rf.predict(X_test)\n",
49 |     "\n",
50 |     "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(X_train, y_train)\n",
51 |     "y_pred = gbm.predict(X_test)\n",
52 |     "\n",
53 |     "# featurescores = gbm.feature_importances_\n",
54 |     "\n",
55 |     "print(accuracy_score(y_test, y_pred))\n",
56 |     "\n",
57 |     "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n",
58 |     "\n",
59 |     "np.save(\"indexes_xgboost\",indexes_xgboost)"
60 |    ]
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "Python 2",
66 |    "language": "python",
67 |    "name": "python2"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 2
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython2",
79 |    "version": "2.7.12"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 0
84 | }
85 | 


--------------------------------------------------------------------------------
/.ipynb_checkpoints/testHaarcascades-checkpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 2,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "import numpy as np\n",
12 |     "import cv2\n",
13 |     "\n",
14 |     "HAAR_PATH = \"/Users/xupeng.tong/opencv/data/haarcascades\"\n",
15 |     "IMG_PATH = \"/Users/xupeng.tong/State-Farm-Distracted-Driver-Detection/Images/imgs/train/\"\n",
16 |     "\n",
17 |     "upperbody_cascade = cv2.CascadeClassifier(HAAR_PATH+'haarcascade_upperbody.xml')\n",
18 |     "# eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')\n",
19 |     "img = cv2.imread(IMG_PATH+'c0/img_100050.jpg')\n",
20 |     "gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)"
21 |    ]
22 |   },
23 |   {
24 |    "cell_type": "code",
25 |    "execution_count": 5,
26 |    "metadata": {
27 |     "collapsed": false
28 |    },
29 |    "outputs": [
30 |     {
31 |      "data": {
32 |       "text/plain": [
33 |        "'/Users/xupeng.tong/State-Farm-Distracted-Driver-Detection/model'"
34 |       ]
35 |      },
36 |      "execution_count": 5,
37 |      "metadata": {},
38 |      "output_type": "execute_result"
39 |     }
40 |    ],
41 |    "source": [
42 |     "faces = upperbody_cascade.detectMultiScale(gray, 1.3, 5)\n",
43 |     "\n",
44 |     "for (x,y,w,h) in faces:\n",
45 |     "    cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)\n",
46 |     "    roi_gray = gray[y:y+h, x:x+w]\n",
47 |     "    roi_color = img[y:y+h, x:x+w]\n",
48 |     "    eyes = eye_cascade.detectMultiScale(roi_gray)\n",
49 |     "    \n",
50 |     "    for (ex,ey,ew,eh) in eyes:\n",
51 |     "        cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)\n",
52 |     "        \n",
53 |     "    cv2.imshow('testing',img)\n",
54 |     "    cv2.waitKey(0)\n",
55 |     "    cv2.destroyAllWindows()\n",
56 |     "    \n",
57 |     "import os\n",
58 |     "\n",
59 |     "os.getcwd()"
60 |    ]
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "Python 2",
66 |    "language": "python",
67 |    "name": "python2"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 2
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython2",
79 |    "version": "2.7.12"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 0
84 | }
85 | 


--------------------------------------------------------------------------------
/CV_BMatrix.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 2,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn import datasets\n",
 12 |     "from sklearn.cross_validation import train_test_split\n",
 13 |     "from scipy import io as sio\n",
 14 |     "from tensorflow.python.framework import ops\n",
 15 |     "from dfs2 import DeepFeatureSelectionNew\n",
 16 |     "import numpy as np\n",
 17 |     "from sklearn.datasets import make_classification\n",
 18 |     "from sklearn.preprocessing import normalize\n",
 19 |     "from __future__ import print_function\n",
 20 |     "\n",
 21 |     "# ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n",
 22 |     "ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n",
 23 |     "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/Q_2labels_standardized.mat\")\n",
 24 |     "\n",
 25 |     "inputX = ourdata['X']\n",
 26 |     "inputX = normalize(inputX, axis=0)\n",
 27 |     "inputY = ourdata['Y'][0,:]\n",
 28 |     "columnNames = ourdata['columnNames']"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 4,
 34 |    "metadata": {
 35 |     "collapsed": false
 36 |    },
 37 |    "outputs": [
 38 |     {
 39 |      "name": "stdout",
 40 |      "output_type": "stream",
 41 |      "text": [
 42 |       "epoch 0: global loss = 0.733293890953\n",
 43 |       "('Train accuracy:', 0.42763543)\n",
 44 |       "('Test accuracy:', 0.43799719)\n",
 45 |       "epoch 10: global loss = 0.476914912462\n",
 46 |       "('Train accuracy:', 0.83935887)\n",
 47 |       "('Test accuracy:', 0.84277022)\n",
 48 |       "epoch 20: global loss = 0.129701793194\n",
 49 |       "('Train accuracy:', 0.96197498)\n",
 50 |       "('Test accuracy:', 0.95273751)\n",
 51 |       "epoch 30: global loss = 0.052871208638\n",
 52 |       "('Train accuracy:', 0.98350298)\n",
 53 |       "('Test accuracy:', 0.97707069)\n",
 54 |       "epoch 40: global loss = 0.0323026739061\n",
 55 |       "('Train accuracy:', 0.98993796)\n",
 56 |       "('Test accuracy:', 0.98596162)\n",
 57 |       "('Final train accuracy:', 0.99216098)\n",
 58 |       "('Final test accuracy:', 0.99064106)\n",
 59 |       "Train finised for random state:0\n",
 60 |       "epoch 0: global loss = 0.751039803028\n",
 61 |       "('Train accuracy:', 0.42939043)\n",
 62 |       "('Test accuracy:', 0.430978)\n",
 63 |       "epoch 10: global loss = 0.507127285004\n",
 64 |       "('Train accuracy:', 0.85257983)\n",
 65 |       "('Test accuracy:', 0.86008424)\n",
 66 |       "epoch 20: global loss = 0.141821071506\n",
 67 |       "('Train accuracy:', 0.95401895)\n",
 68 |       "('Test accuracy:', 0.95320541)\n",
 69 |       "epoch 30: global loss = 0.0590038299561\n",
 70 |       "('Train accuracy:', 0.98256701)\n",
 71 |       "('Test accuracy:', 0.98408985)\n",
 72 |       "epoch 40: global loss = 0.0350771062076\n",
 73 |       "('Train accuracy:', 0.98993796)\n",
 74 |       "('Test accuracy:', 0.98642957)\n",
 75 |       "('Final train accuracy:', 0.992863)\n",
 76 |       "('Final test accuracy:', 0.99110901)\n",
 77 |       "Train finised for random state:1\n",
 78 |       "epoch 0: global loss = 0.759872674942\n",
 79 |       "('Train accuracy:', 0.42857143)\n",
 80 |       "('Test accuracy:', 0.43425363)\n",
 81 |       "epoch 10: global loss = 0.506673395634\n",
 82 |       "('Train accuracy:', 0.85059083)\n",
 83 |       "('Test accuracy:', 0.84604585)\n",
 84 |       "epoch 20: global loss = 0.154908597469\n",
 85 |       "('Train accuracy:', 0.94793493)\n",
 86 |       "('Test accuracy:', 0.94478238)\n",
 87 |       "epoch 30: global loss = 0.0784661099315\n",
 88 |       "('Train accuracy:', 0.97542995)\n",
 89 |       "('Test accuracy:', 0.97145534)\n",
 90 |       "epoch 40: global loss = 0.0517223998904\n",
 91 |       "('Train accuracy:', 0.98432201)\n",
 92 |       "('Test accuracy:', 0.98455781)\n",
 93 |       "('Final train accuracy:', 0.98982102)\n",
 94 |       "('Final test accuracy:', 0.98736548)\n",
 95 |       "Train finised for random state:2\n",
 96 |       "epoch 0: global loss = 0.748433232307\n",
 97 |       "('Train accuracy:', 0.43009242)\n",
 98 |       "('Test accuracy:', 0.42817032)\n",
 99 |       "epoch 10: global loss = 0.509174644947\n",
100 |       "('Train accuracy:', 0.83315784)\n",
101 |       "('Test accuracy:', 0.83575106)\n",
102 |       "epoch 20: global loss = 0.141662657261\n",
103 |       "('Train accuracy:', 0.95495498)\n",
104 |       "('Test accuracy:', 0.96443611)\n",
105 |       "epoch 30: global loss = 0.0590850003064\n",
106 |       "('Train accuracy:', 0.98198199)\n",
107 |       "('Test accuracy:', 0.98408985)\n",
108 |       "epoch 40: global loss = 0.0356163904071\n",
109 |       "('Train accuracy:', 0.98888499)\n",
110 |       "('Test accuracy:', 0.9897052)\n",
111 |       "('Final train accuracy:', 0.99321401)\n",
112 |       "('Final test accuracy:', 0.99204493)\n",
113 |       "Train finised for random state:3\n",
114 |       "epoch 0: global loss = 0.733057200909\n",
115 |       "('Train accuracy:', 0.43360242)\n",
116 |       "('Test accuracy:', 0.41413197)\n",
117 |       "epoch 10: global loss = 0.49149876833\n",
118 |       "('Train accuracy:', 0.83163685)\n",
119 |       "('Test accuracy:', 0.83153951)\n",
120 |       "epoch 20: global loss = 0.134568467736\n",
121 |       "('Train accuracy:', 0.95858198)\n",
122 |       "('Test accuracy:', 0.94431448)\n",
123 |       "epoch 30: global loss = 0.0583451613784\n",
124 |       "('Train accuracy:', 0.98291796)\n",
125 |       "('Test accuracy:', 0.97707069)\n",
126 |       "epoch 40: global loss = 0.0373480655253\n",
127 |       "('Train accuracy:', 0.98853397)\n",
128 |       "('Test accuracy:', 0.98502576)\n",
129 |       "('Final train accuracy:', 0.99145901)\n",
130 |       "('Final test accuracy:', 0.99064106)\n",
131 |       "Train finised for random state:4\n",
132 |       "epoch 0: global loss = 0.736807286739\n",
133 |       "('Train accuracy:', 0.43056044)\n",
134 |       "('Test accuracy:', 0.42629856)\n",
135 |       "epoch 10: global loss = 0.50833773613\n",
136 |       "('Train accuracy:', 0.82917982)\n",
137 |       "('Test accuracy:', 0.83902669)\n",
138 |       "epoch 20: global loss = 0.138045296073\n",
139 |       "('Train accuracy:', 0.95612496)\n",
140 |       "('Test accuracy:', 0.95975667)\n",
141 |       "epoch 30: global loss = 0.0586299747229\n",
142 |       "('Train accuracy:', 0.982099)\n",
143 |       "('Test accuracy:', 0.98315394)\n",
144 |       "epoch 40: global loss = 0.0354525335133\n",
145 |       "('Train accuracy:', 0.989353)\n",
146 |       "('Test accuracy:', 0.98689753)\n",
147 |       "('Final train accuracy:', 0.992863)\n",
148 |       "('Final test accuracy:', 0.99017316)\n",
149 |       "Train finised for random state:5\n",
150 |       "epoch 0: global loss = 0.75368309021\n",
151 |       "('Train accuracy:', 0.43009242)\n",
152 |       "('Test accuracy:', 0.42817032)\n",
153 |       "epoch 10: global loss = 0.509149491787\n",
154 |       "('Train accuracy:', 0.84942085)\n",
155 |       "('Test accuracy:', 0.84464204)\n",
156 |       "epoch 20: global loss = 0.138327404857\n",
157 |       "('Train accuracy:', 0.95682698)\n",
158 |       "('Test accuracy:', 0.95507723)\n",
159 |       "epoch 30: global loss = 0.056102283299\n",
160 |       "('Train accuracy:', 0.983971)\n",
161 |       "('Test accuracy:', 0.97987831)\n",
162 |       "epoch 40: global loss = 0.0324657447636\n",
163 |       "('Train accuracy:', 0.99063998)\n",
164 |       "('Test accuracy:', 0.98596162)\n",
165 |       "('Final train accuracy:', 0.99414998)\n",
166 |       "('Final test accuracy:', 0.98923725)\n",
167 |       "Train finised for random state:6\n",
168 |       "epoch 0: global loss = 0.727672696114\n",
169 |       "('Train accuracy:', 0.42611444)\n",
170 |       "('Test accuracy:', 0.44408047)\n",
171 |       "epoch 10: global loss = 0.47494405508\n",
172 |       "('Train accuracy:', 0.84052885)\n",
173 |       "('Test accuracy:', 0.83809078)\n",
174 |       "epoch 20: global loss = 0.126760289073\n",
175 |       "('Train accuracy:', 0.96045399)\n",
176 |       "('Test accuracy:', 0.95741695)\n",
177 |       "epoch 30: global loss = 0.0550770014524\n",
178 |       "('Train accuracy:', 0.983854)\n",
179 |       "('Test accuracy:', 0.97894245)\n",
180 |       "epoch 40: global loss = 0.0347224362195\n",
181 |       "('Train accuracy:', 0.98982102)\n",
182 |       "('Test accuracy:', 0.98502576)\n",
183 |       "('Final train accuracy:', 0.99298)\n",
184 |       "('Final test accuracy:', 0.98736548)\n",
185 |       "Train finised for random state:7\n",
186 |       "epoch 0: global loss = 0.753837943077\n",
187 |       "('Train accuracy:', 0.42903942)\n",
188 |       "('Test accuracy:', 0.43238184)\n",
189 |       "epoch 10: global loss = 0.528392732143\n",
190 |       "('Train accuracy:', 0.84532583)\n",
191 |       "('Test accuracy:', 0.83996254)\n",
192 |       "epoch 20: global loss = 0.140707731247\n",
193 |       "('Train accuracy:', 0.95635897)\n",
194 |       "('Test accuracy:', 0.95741695)\n",
195 |       "epoch 30: global loss = 0.0585461705923\n",
196 |       "('Train accuracy:', 0.982099)\n",
197 |       "('Test accuracy:', 0.98128217)\n",
198 |       "epoch 40: global loss = 0.0387108251452\n",
199 |       "('Train accuracy:', 0.98806602)\n",
200 |       "('Test accuracy:', 0.98689753)\n",
201 |       "('Final train accuracy:', 0.99251199)\n",
202 |       "('Final test accuracy:', 0.98923725)\n",
203 |       "Train finised for random state:8\n",
204 |       "epoch 0: global loss = 0.745738267899\n",
205 |       "('Train accuracy:', 0.42810342)\n",
206 |       "('Test accuracy:', 0.4361254)\n",
207 |       "epoch 10: global loss = 0.502628087997\n",
208 |       "('Train accuracy:', 0.83678484)\n",
209 |       "('Test accuracy:', 0.8404305)\n",
210 |       "epoch 20: global loss = 0.140836164355\n",
211 |       "('Train accuracy:', 0.95425296)\n",
212 |       "('Test accuracy:', 0.95367336)\n",
213 |       "epoch 30: global loss = 0.0612846538424\n",
214 |       "('Train accuracy:', 0.98151398)\n",
215 |       "('Test accuracy:', 0.98034626)\n",
216 |       "epoch 40: global loss = 0.0394038744271\n",
217 |       "('Train accuracy:', 0.987481)\n",
218 |       "('Test accuracy:', 0.9836219)\n",
219 |       "('Final train accuracy:', 0.99157602)\n",
220 |       "('Final test accuracy:', 0.99064106)\n",
221 |       "Train finised for random state:9\n"
222 |      ]
223 |     }
224 |    ],
225 |    "source": [
226 |     "weights_0_10 = []\n",
227 |     "for random_state in xrange(10):\n",
228 |     "    X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)\n",
229 |     "    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \\\n",
230 |     "                                     lambda1=0, lambda2=1, alpha1=0, alpha2=0, activation='tanh', \\\n",
231 |     "                                     weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)\n",
232 |     "    dfsMLP.train(batch_size=2000)\n",
233 |     "    print(\"Train finised for random state:\" + str(random_state))\n",
234 |     "    weights_0_10.append(dfsMLP.selected_ws[0])"
235 |    ]
236 |   },
237 |   {
238 |    "cell_type": "code",
239 |    "execution_count": 5,
240 |    "metadata": {
241 |     "collapsed": false
242 |    },
243 |    "outputs": [],
244 |    "source": [
245 |     "np.save(\"weights-0-10-new\", weights_0_10)\n",
246 |     "# weights_10_20 = np.load(\"weights-10-20.npy\")\n",
247 |     "# weights_20_30 = np.load(\"weights-20-30.npy\")\n"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": 29,
253 |    "metadata": {
254 |     "collapsed": false
255 |    },
256 |    "outputs": [],
257 |    "source": [
258 |     "allweights = np.concatenate((weights,weights_10_20,weights_20_30))"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 32,
264 |    "metadata": {
265 |     "collapsed": false
266 |    },
267 |    "outputs": [],
268 |    "source": [
269 |     "np.save(\"allweights-0-30\", allweights)"
270 |    ]
271 |   },
272 |   {
273 |    "cell_type": "code",
274 |    "execution_count": 9,
275 |    "metadata": {
276 |     "collapsed": false
277 |    },
278 |    "outputs": [],
279 |    "source": [
280 |     "weights = np.array(weights)"
281 |    ]
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": 31,
286 |    "metadata": {
287 |     "collapsed": false
288 |    },
289 |    "outputs": [
290 |     {
291 |      "name": "stdout",
292 |      "output_type": "stream",
293 |      "text": [
294 |       "High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~tonyabracadabra/0 or inside your plot.ly account where it is named 'basic-heatmap'\n"
295 |      ]
296 |     },
297 |     {
298 |      "data": {
299 |       "text/html": [
300 |        "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tonyabracadabra/0.embed\" height=\"525px\" width=\"100%\"></iframe>"
301 |       ],
302 |       "text/plain": [
303 |        "<plotly.tools.PlotlyDisplay object>"
304 |       ]
305 |      },
306 |      "execution_count": 31,
307 |      "metadata": {},
308 |      "output_type": "execute_result"
309 |     }
310 |    ],
311 |    "source": [
312 |     "import plotly.plotly as py\n",
313 |     "import plotly.graph_objs as go\n",
314 |     "import plotly\n",
315 |     "\n",
316 |     "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n",
317 |     "\n",
318 |     "data = [\n",
319 |     "    go.Heatmap(\n",
320 |     "        z=np.abs(allweights)\n",
321 |     "    )\n",
322 |     "]\n",
323 |     "\n",
324 |     "py.iplot(data, filename='basic-heatmap')"
325 |    ]
326 |   },
327 |   {
328 |    "cell_type": "code",
329 |    "execution_count": 17,
330 |    "metadata": {
331 |     "collapsed": false
332 |    },
333 |    "outputs": [],
334 |    "source": [
335 |     "averagedWeight = np.abs(weights.sum(axis=0)/10)"
336 |    ]
337 |   },
338 |   {
339 |    "cell_type": "code",
340 |    "execution_count": null,
341 |    "metadata": {
342 |     "collapsed": true
343 |    },
344 |    "outputs": [],
345 |    "source": [
346 |     "for random_state in xrange(10,20):\n",
347 |     "    X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)\n",
348 |     "    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \\\n",
349 |     "                                     lambda1=0, lambda2=1, alpha1=0.0001, alpha2=0, activation='tanh', \\\n",
350 |     "                                     weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)\n",
351 |     "    dfsMLP.train(batch_size=2000)\n",
352 |     "    print(\"Train finised for random state:\" + str(random_state))\n",
353 |     "    weights.append(dfsMLP.selected_ws[0])"
354 |    ]
355 |   }
356 |  ],
357 |  "metadata": {
358 |   "kernelspec": {
359 |    "display_name": "Python 2",
360 |    "language": "python",
361 |    "name": "python2"
362 |   },
363 |   "language_info": {
364 |    "codemirror_mode": {
365 |     "name": "ipython",
366 |     "version": 2
367 |    },
368 |    "file_extension": ".py",
369 |    "mimetype": "text/x-python",
370 |    "name": "python",
371 |    "nbconvert_exporter": "python",
372 |    "pygments_lexer": "ipython2",
373 |    "version": "2.7.12"
374 |   }
375 |  },
376 |  "nbformat": 4,
377 |  "nbformat_minor": 0
378 | }
379 | 


--------------------------------------------------------------------------------
/Ensemble Model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn.preprocessing import normalize\n",
 12 |     "import numpy as np\n",
 13 |     "\n",
 14 |     "weights_0_10 = np.load(\"weights-0-10-NEW-mean.npy\")\n",
 15 |     "weights_10_20 = np.load(\"weights-10-20-NEW-mean.npy\")\n",
 16 |     "weights_20_30 = np.load(\"weights-20-30-NEW-mean.npy\")\n",
 17 |     "indexes_xgboost = np.load(\"indexes_xgboost.npy\")\n",
 18 |     "\n",
 19 |     "weights = np.concatenate((weights_0_10, weights_10_20, weights_20_30))\n",
 20 |     "\n",
 21 |     "np.save(\"weights-NEW-mean\", weights)"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 2,
 27 |    "metadata": {
 28 |     "collapsed": false
 29 |    },
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "from scipy import io as sio\n",
 33 |     "\n",
 34 |     "ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/B_mean_2labels.mat\")\n",
 35 |     "\n",
 36 |     "inputX = ourdata['X']\n",
 37 |     "inputX = normalize(inputX, axis=0)\n",
 38 |     "inputY = ourdata['Y'][0,:]\n",
 39 |     "columnNames = ourdata['columnNames']"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 12,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "weights = abs(weights)\n",
 51 |     "averagedWeight = normalize(weights).sum(axis=0)\n",
 52 |     "indexes_average_dfs = np.argsort(averagedWeight)[::-1]\n",
 53 |     "\n",
 54 |     "def unionDFSfeatures(n):\n",
 55 |     "    indexes_union = []\n",
 56 |     "    for i in xrange(30):\n",
 57 |     "        indexes_union.append(np.argsort(weights[i])[::-1][:n].tolist())\n",
 58 |     "        \n",
 59 |     "    union = reduce(np.union1d, indexes_union).tolist()\n",
 60 |     "    print(\"Number of union features:\", len(union))\n",
 61 |     "    return inputX[:, union], union\n",
 62 |     "\n",
 63 |     "def intersectDFSfeatures(n):\n",
 64 |     "    indexes_intersect = []\n",
 65 |     "    for i in xrange(30):\n",
 66 |     "        indexes_intersect.append(np.argsort(weights[i])[::-1][:n].tolist())\n",
 67 |     "    \n",
 68 |     "    intersected = reduce(np.intersect1d, indexes_intersect).tolist()\n",
 69 |     "    print(\"Number of intersected features:\",len(intersected))\n",
 70 |     "    return inputX[:, intersected]\n",
 71 |     "\n",
 72 |     "def topXGBoostfeatures(a,b):\n",
 73 |     "    return inputX[:, indexes_xgboost.tolist()[a:b]], indexes_xgboost.tolist()[a:b]\n",
 74 |     "\n",
 75 |     "def topAveDFSfeatures(a,b):\n",
 76 |     "    return inputX[:, indexes_average_dfs.tolist()[a:b]], indexes_average_dfs.tolist()[a:b]\n",
 77 |     "\n",
 78 |     "def pickOneDFSfeatures(a,b,n):\n",
 79 |     "    indexx = np.argsort(weights[n])[::-1]\n",
 80 |     "    return inputX[:, indexx.tolist()[a:b]]\n",
 81 |     "\n",
 82 |     "def topDFSTemp(a,b):\n",
 83 |     "    temp = np.argsort(abs(dfsMLP.selected_ws[0]))[::-1]\n",
 84 |     "    return inputX[:, temp.tolist()[a:b]]"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 36,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "name": "stdout",
 96 |      "output_type": "stream",
 97 |      "text": [
 98 |       "('Number of intersected features:', 9)\n",
 99 |       "('Intersect:', 0.97940872141117929)\n",
100 |       "('Number of union features:', 27)\n"
101 |      ]
102 |     },
103 |     {
104 |      "ename": "ValueError",
105 |      "evalue": "Found arrays with inconsistent numbers of samples: [    2 10684]",
106 |      "output_type": "error",
107 |      "traceback": [
108 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
109 |       "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
110 |       "\u001b[0;32m<ipython-input-36-a3eadc27761e>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m     11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     12\u001b[0m \u001b[0mkeke\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munionDFSfeatures\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msvm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeke\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     14\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Union:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
111 |       "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)\u001b[0m\n\u001b[1;32m   1420\u001b[0m         \u001b[0mArray\u001b[0m \u001b[0mof\u001b[0m \u001b[0mscores\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mestimator\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mrun\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcross\u001b[0m \u001b[0mvalidation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1421\u001b[0m     \"\"\"\n\u001b[0;32m-> 1422\u001b[0;31m     \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1424\u001b[0m     \u001b[0mcv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_cv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassifier\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
112 |       "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mindexable\u001b[0;34m(*iterables)\u001b[0m\n\u001b[1;32m    199\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    200\u001b[0m             \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m     \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    202\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    203\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
113 |       "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mcheck_consistent_length\u001b[0;34m(*arrays)\u001b[0m\n\u001b[1;32m    174\u001b[0m     \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    175\u001b[0m         raise ValueError(\"Found arrays with inconsistent numbers of samples: \"\n\u001b[0;32m--> 176\u001b[0;31m                          \"%s\" % str(uniques))\n\u001b[0m\u001b[1;32m    177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    178\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
114 |       "\u001b[0;31mValueError\u001b[0m: Found arrays with inconsistent numbers of samples: [    2 10684]"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "from sklearn.svm import LinearSVC\n",
120 |     "from sklearn.metrics import accuracy_score\n",
121 |     "from sklearn.linear_model import LogisticRegression\n",
122 |     "from sklearn.cross_validation import cross_val_score\n",
123 |     "\n",
124 |     "svm = LinearSVC()\n",
125 |     "\n",
126 |     "keke = intersectDFSfeatures(200)\n",
127 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
128 |     "print(\"Intersect:\", np.mean(scores))\n",
129 |     "\n",
130 |     "keke = unionDFSfeatures(10)\n",
131 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
132 |     "print(\"Union:\", np.mean(scores))\n",
133 |     "\n",
134 |     "keke = topAveDFSfeatures(1,27)\n",
135 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
136 |     "print(\"Ave:\", np.mean(scores))\n",
137 |     "\n",
138 |     "keke = topXGBoostfeatures(0,27)\n",
139 |     "scores = cross_val_score(svm, keke, inputY, cv=5)\n",
140 |     "print(\"XGBoost:\", np.mean(scores))\n",
141 |     "\n",
142 |     "print(\"Pick one TOP DFS features from 30\")\n",
143 |     "for i in xrange(0,30):\n",
144 |     "    keke = pickOneDFSfeatures(0,27,i)\n",
145 |     "    scores = cross_val_score(svm, keke, inputY, cv=5)\n",
146 |     "    print(np.mean(scores))"
147 |    ]
148 |   },
149 |   {
150 |    "cell_type": "code",
151 |    "execution_count": 65,
152 |    "metadata": {
153 |     "collapsed": false
154 |    },
155 |    "outputs": [
156 |     {
157 |      "data": {
158 |       "text/html": [
159 |        "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tonyabracadabra/2.embed\" height=\"525px\" width=\"100%\"></iframe>"
160 |       ],
161 |       "text/plain": [
162 |        "<plotly.tools.PlotlyDisplay object>"
163 |       ]
164 |      },
165 |      "execution_count": 65,
166 |      "metadata": {},
167 |      "output_type": "execute_result"
168 |     }
169 |    ],
170 |    "source": [
171 |     "import plotly.plotly as py\n",
172 |     "import plotly.graph_objs as go\n",
173 |     "import plotly\n",
174 |     "\n",
175 |     "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n",
176 |     "\n",
177 |     "data = [\n",
178 |     "    go.Heatmap(\n",
179 |     "        z=np.abs(weights)\n",
180 |     "    )\n",
181 |     "]\n",
182 |     "\n",
183 |     "py.iplot(data, filename='30 Weights')"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": 91,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [
193 |     {
194 |      "data": {
195 |       "text/plain": [
196 |        "[u'Phe9_2534',\n",
197 |        " u'Phe9_491',\n",
198 |        " u'Phe9_4912',\n",
199 |        " u'Phe9_492',\n",
200 |        " u'Phe9_4928',\n",
201 |        " u'Phe9_493',\n",
202 |        " u'Phe9_4930',\n",
203 |        " u'Phe9_4931',\n",
204 |        " u'Phe9_4932',\n",
205 |        " u'Phe9_4938',\n",
206 |        " u'Phe9_4939',\n",
207 |        " u'Phe9_494',\n",
208 |        " u'Phe9_4940',\n",
209 |        " u'Phe9_496',\n",
210 |        " u'Phe9_5343',\n",
211 |        " u'Phe9_V146',\n",
212 |        " u'Phe10_E236',\n",
213 |        " u'Phe10_I23',\n",
214 |        " u'Phe10_J44',\n",
215 |        " u'Phe10_J449',\n",
216 |        " u'Phe10_J45',\n",
217 |        " u'Phe10_J452',\n",
218 |        " u'Phe10_J453',\n",
219 |        " u'Phe10_J454',\n",
220 |        " u'Phe10_J459',\n",
221 |        " u'Phe10_N08',\n",
222 |        " u'Phe10_S060']"
223 |       ]
224 |      },
225 |      "execution_count": 91,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "indexes_union = []\n",
232 |     "for i in xrange(30):\n",
233 |     "    indexes_union.append(np.argsort(weights[i])[::-1][:10].tolist())\n",
234 |     "union = reduce(np.union1d, indexes_union).tolist()\n",
235 |     "\n",
236 |     "\n",
237 |     "[i[0] for i in columnNames.reshape(7205,)[union]]"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": 78,
243 |    "metadata": {
244 |     "collapsed": false
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "def getSelectedFeatureNames(which, topN):\n",
249 |     "    if which == \"Ave\":\n",
250 |     "        indexes = indexes_average_dfs\n",
251 |     "    elif which == \"Union\":\n",
252 |     "        indexes = \"\"\n",
253 |     "    elif which == \"Intersect\"\n",
254 |     "        \n",
255 |     "    featureNames = [i[0] for i in columnNames.reshape(7205,)[indexes[:topN]]]\n",
256 |     "    \n",
257 |     "    return featureNames"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 99,
263 |    "metadata": {
264 |     "collapsed": false
265 |    },
266 |    "outputs": [
267 |     {
268 |      "name": "stdout",
269 |      "output_type": "stream",
270 |      "text": [
271 |       "('Number of union features:', 1)\n",
272 |       "('Union:', 0.94487121332880197)\n",
273 |       "('Number of union features:', 10)\n",
274 |       "('Union:', 0.94346676284389586)\n",
275 |       "('Number of union features:', 12)\n",
276 |       "('Union:', 0.9591910107244368)\n",
277 |       "('Number of union features:', 17)\n",
278 |       "('Union:', 0.97145272318043541)\n",
279 |       "('Number of union features:', 21)\n",
280 |       "('Union:', 0.97182712357021495)\n",
281 |       "('Number of union features:', 23)\n",
282 |       "('Union:', 0.99204408956348877)\n",
283 |       "('Number of union features:', 26)\n",
284 |       "('Union:', 0.99166968909173525)\n",
285 |       "('Number of union features:', 26)\n",
286 |       "('Union:', 0.99166968909173525)\n",
287 |       "('Number of union features:', 26)\n",
288 |       "('Union:', 0.99166968909173525)\n",
289 |       "('Number of union features:', 27)\n",
290 |       "('Union:', 0.99166960150243777)\n",
291 |       "('Number of union features:', 28)\n",
292 |       "('Union:', 0.99073384134732279)\n",
293 |       "('Number of union features:', 31)\n",
294 |       "('Union:', 0.98418163686619731)\n",
295 |       "('Number of union features:', 31)\n",
296 |       "('Union:', 0.98418163686619731)\n",
297 |       "('Number of union features:', 31)\n",
298 |       "('Union:', 0.98418163686619731)\n",
299 |       "('Number of union features:', 34)\n",
300 |       "('Union:', 0.98446240429717535)\n",
301 |       "('Number of union features:', 36)\n",
302 |       "('Union:', 0.98474339076287709)\n",
303 |       "('Number of union features:', 40)\n",
304 |       "('Union:', 0.98596013738369381)\n",
305 |       "('Number of union features:', 45)\n",
306 |       "('Union:', 0.98558578076806957)\n",
307 |       "('Number of union features:', 51)\n",
308 |       "('Union:', 0.98876816262448164)\n",
309 |       "('Number of union features:', 55)\n",
310 |       "('Union:', 0.98895538476792955)\n",
311 |       "('Number of union features:', 61)\n",
312 |       "('Union:', 0.98876816262448164)\n",
313 |       "('Number of union features:', 67)\n",
314 |       "('Union:', 0.98839398118745181)\n",
315 |       "('Number of union features:', 73)\n",
316 |       "('Union:', 0.9883001730500558)\n",
317 |       "('Number of union features:', 77)\n",
318 |       "('Union:', 0.98801936180393546)\n",
319 |       "('Number of union features:', 86)\n",
320 |       "('Union:', 0.98783209588633247)\n",
321 |       "('Number of union features:', 89)\n",
322 |       "('Union:', 0.98811295098858187)\n",
323 |       "('Number of union features:', 102)\n",
324 |       "('Union:', 0.98596048790483126)\n",
325 |       "('Number of union features:', 108)\n",
326 |       "('Union:', 0.98567980810413758)\n",
327 |       "('Number of union features:', 120)\n",
328 |       "('Union:', 0.98418233778551067)\n",
329 |       "('Number of union features:', 129)\n",
330 |       "('Union:', 0.98277810621236728)\n",
331 |       "('Number of union features:', 139)\n",
332 |       "('Union:', 0.98193567247400682)\n",
333 |       "('Number of union features:', 151)\n",
334 |       "('Union:', 0.98184238987239447)\n",
335 |       "('Number of union features:', 162)\n",
336 |       "('Union:', 0.98109350142156404)\n",
337 |       "('Number of union features:', 173)\n",
338 |       "('Union:', 0.98090641072354268)\n",
339 |       "('Number of union features:', 182)\n",
340 |       "('Union:', 0.9806255118471382)\n",
341 |       "('Number of union features:', 192)\n",
342 |       "('Union:', 0.97968944506800215)\n",
343 |       "('Number of union features:', 202)\n",
344 |       "('Union:', 0.97997030012926456)\n",
345 |       "('Number of union features:', 214)\n",
346 |       "('Union:', 0.97940863386286914)\n",
347 |       "('Number of union features:', 224)\n",
348 |       "('Union:', 0.9790342334321025)\n",
349 |       "('Number of union features:', 234)\n",
350 |       "('Union:', 0.97837897798106077)\n",
351 |       "('Number of union features:', 242)\n",
352 |       "('Union:', 0.97809829809839288)\n",
353 |       "('Number of union features:', 255)\n",
354 |       "('Union:', 0.97837889035077619)\n",
355 |       "('Number of union features:', 265)\n",
356 |       "('Union:', 0.97791085700219527)\n",
357 |       "('Number of union features:', 269)\n",
358 |       "('Union:', 0.9780979915153587)\n",
359 |       "('Number of union features:', 275)\n",
360 |       "('Union:', 0.9780979915153587)\n",
361 |       "('Number of union features:', 280)\n",
362 |       "('Union:', 0.9776299143516356)\n",
363 |       "('Number of union features:', 289)\n",
364 |       "('Union:', 0.97781744303713047)\n",
365 |       "('Number of union features:', 297)\n",
366 |       "('Union:', 0.97706864221658452)\n",
367 |       "('Number of union features:', 304)\n",
368 |       "('Union:', 0.97716231894954109)\n",
369 |       "('Number of union features:', 309)\n",
370 |       "('Union:', 0.97678783088849008)\n",
371 |       "('Number of union features:', 318)\n",
372 |       "('Union:', 0.97660078396462391)\n",
373 |       "('Number of union features:', 322)\n",
374 |       "('Union:', 0.97669424174483088)\n",
375 |       "('Number of union features:', 325)\n",
376 |       "('Union:', 0.97688159525173113)\n",
377 |       "('Number of union features:', 331)\n",
378 |       "('Union:', 0.97697509680609307)\n",
379 |       "('Number of union features:', 340)\n",
380 |       "('Union:', 0.97631975376575397)\n",
381 |       "('Number of union features:', 347)\n",
382 |       "('Union:', 0.97660069641631364)\n",
383 |       "('Number of union features:', 356)\n",
384 |       "('Union:', 0.97678804988222689)\n",
385 |       "('Number of union features:', 369)\n",
386 |       "('Union:', 0.97678796229292963)\n",
387 |       "('Number of union features:', 375)\n",
388 |       "('Union:', 0.97632001653364586)\n",
389 |       "('Number of union features:', 386)\n",
390 |       "('Union:', 0.97697522825151961)\n",
391 |       "('Number of union features:', 393)\n",
392 |       "('Union:', 0.97688150766243387)\n",
393 |       "('Number of union features:', 400)\n",
394 |       "('Union:', 0.97716231890855398)\n",
395 |       "('Number of union features:', 407)\n",
396 |       "('Union:', 0.97706877353904975)\n",
397 |       "('Number of union features:', 410)\n",
398 |       "('Union:', 0.97706872976489456)\n",
399 |       "('Number of union features:', 419)\n",
400 |       "('Union:', 0.97678796229292963)\n",
401 |       "('Number of union features:', 422)\n",
402 |       "('Union:', 0.97697509680609307)\n",
403 |       "('Number of union features:', 433)\n",
404 |       "('Union:', 0.9766944169644125)\n",
405 |       "('Number of union features:', 442)\n",
406 |       "('Union:', 0.976787787114335)\n",
407 |       "('Number of union features:', 446)\n",
408 |       "('Union:', 0.97660060882701638)\n",
409 |       "('Number of union features:', 451)\n",
410 |       "('Union:', 0.97669415419652061)\n",
411 |       "('Number of union features:', 457)\n",
412 |       "('Union:', 0.97669411042236542)\n",
413 |       "('Number of union features:', 460)\n",
414 |       "('Union:', 0.97669406660722335)\n",
415 |       "('Number of union features:', 467)\n",
416 |       "('Union:', 0.97678761201771458)\n",
417 |       "('Number of union features:', 475)\n",
418 |       "('Union:', 0.9765069320940597)\n",
419 |       "('Number of union features:', 482)\n",
420 |       "('Union:', 0.97669402287405516)\n",
421 |       "('Number of union features:', 487)\n",
422 |       "('Union:', 0.97660052127870611)\n",
423 |       "('Number of union features:', 496)\n",
424 |       "('Union:', 0.97622607711477138)\n",
425 |       "('Number of union features:', 506)\n",
426 |       "('Union:', 0.97613248793012508)\n",
427 |       "('Number of union features:', 517)\n",
428 |       "('Union:', 0.9762260770737845)\n",
429 |       "('Number of union features:', 530)\n",
430 |       "('Union:', 0.97575804372520347)\n",
431 |       "('Number of union features:', 539)\n",
432 |       "('Union:', 0.97538381851401856)\n",
433 |       "('Number of union features:', 550)\n",
434 |       "('Union:', 0.97547740761669088)\n",
435 |       "('Number of union features:', 559)\n",
436 |       "('Union:', 0.97547745143183295)\n",
437 |       "('Number of union features:', 572)\n",
438 |       "('Union:', 0.97547740761669088)\n",
439 |       "('Number of union features:', 581)\n",
440 |       "('Union:', 0.97538377465788939)\n",
441 |       "('Number of union features:', 595)\n",
442 |       "('Union:', 0.97547740765767776)\n",
443 |       "('Number of union features:', 605)\n",
444 |       "('Union:', 0.97529014174007478)\n",
445 |       "('Number of union features:', 620)\n",
446 |       "('Union:', 0.97538377465788939)\n",
447 |       "('Number of union features:', 630)\n",
448 |       "('Union:', 0.97491591648790299)\n",
449 |       "('Number of union features:', 638)\n",
450 |       "('Union:', 0.97519681536430747)\n",
451 |       "('Number of union features:', 647)\n",
452 |       "('Union:', 0.97529031691866952)\n",
453 |       "('Number of union features:', 656)\n",
454 |       "('Union:', 0.97538390606232883)\n",
455 |       "('Number of union features:', 666)\n",
456 |       "('Union:', 0.97510318240550597)\n",
457 |       "('Number of union features:', 677)\n",
458 |       "('Union:', 0.97482241497452793)\n",
459 |       "('Number of union features:', 692)\n",
460 |       "('Union:', 0.97510300726789834)\n",
461 |       "('Number of union features:', 705)\n",
462 |       "('Union:', 0.97482232738523056)\n",
463 |       "('Number of union features:', 713)\n",
464 |       "('Union:', 0.97472873824157136)\n",
465 |       "('Number of union features:', 724)\n",
466 |       "('Union:', 0.97444788318030895)\n",
467 |       "('Number of union features:', 738)\n",
468 |       "('Union:', 0.97444792699545124)\n"
469 |      ]
470 |     }
471 |    ],
472 |    "source": [
473 |     "scores_union = []\n",
474 |     "for i in xrange(1,100):\n",
475 |     "    keke = unionDFSfeatures(i)\n",
476 |     "    scores_union.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))\n",
477 |     "    print(\"Union:\", scores_union[-1])"
478 |    ]
479 |   },
480 |   {
481 |    "cell_type": "code",
482 |    "execution_count": 29,
483 |    "metadata": {
484 |     "collapsed": false
485 |    },
486 |    "outputs": [
487 |     {
488 |      "name": "stdout",
489 |      "output_type": "stream",
490 |      "text": [
491 |       "('Number of union features:', 23)\n"
492 |      ]
493 |     }
494 |    ],
495 |    "source": [
496 |     "_, union = unionDFSfeatures(6)"
497 |    ]
498 |   },
499 |   {
500 |    "cell_type": "code",
501 |    "execution_count": 66,
502 |    "metadata": {
503 |     "collapsed": false
504 |    },
505 |    "outputs": [
506 |     {
507 |      "name": "stdout",
508 |      "output_type": "stream",
509 |      "text": [
510 |       "10\n",
511 |       "20\n",
512 |       "30\n",
513 |       "40\n",
514 |       "50\n",
515 |       "60\n",
516 |       "70\n",
517 |       "80\n",
518 |       "90\n",
519 |       "100\n",
520 |       "110\n",
521 |       "120\n",
522 |       "130\n",
523 |       "140\n",
524 |       "150\n",
525 |       "160\n",
526 |       "170\n",
527 |       "180\n",
528 |       "190\n",
529 |       "200\n",
530 |       "210\n",
531 |       "220\n",
532 |       "230\n",
533 |       "240\n",
534 |       "250\n",
535 |       "260\n",
536 |       "270\n",
537 |       "280\n",
538 |       "290\n",
539 |       "300\n",
540 |       "310\n",
541 |       "320\n",
542 |       "330\n",
543 |       "340\n",
544 |       "350\n",
545 |       "360\n",
546 |       "370\n",
547 |       "380\n",
548 |       "390\n",
549 |       "400\n",
550 |       "410\n",
551 |       "420\n",
552 |       "430\n",
553 |       "440\n",
554 |       "450\n",
555 |       "460\n",
556 |       "470\n",
557 |       "480\n",
558 |       "490\n"
559 |      ]
560 |     }
561 |    ],
562 |    "source": [
563 |     "ave, xg = 0, 0\n",
564 |     "num = []\n",
565 |     "acc = []\n",
566 |     "for i in xrange(1,500):\n",
567 |     "    if i % 10 == 0:\n",
568 |     "        print i\n",
569 |     "    _, ave = topAveDFSfeatures(0,i)\n",
570 |     "    _, xg = topXGBoostfeatures(0,i)\n",
571 |     "    temp = np.intersect1d(ave,xg)\n",
572 |     "    if temp.shape[0] > 0:\n",
573 |     "        if len(num) == 0 or num[-1] != temp.shape[0]:\n",
574 |     "            num.append(temp.shape[0])\n",
575 |     "            keke = inputX[:,temp.tolist()]\n",
576 |     "            acc.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))"
577 |    ]
578 |   },
579 |   {
580 |    "cell_type": "code",
581 |    "execution_count": 100,
582 |    "metadata": {
583 |     "collapsed": false
584 |    },
585 |    "outputs": [
586 |     {
587 |      "name": "stdout",
588 |      "output_type": "stream",
589 |      "text": [
590 |       "('Number of union features:', 102)\n",
591 |       "(17,)\n"
592 |      ]
593 |     },
594 |     {
595 |      "data": {
596 |       "text/plain": [
597 |        "0.98034465678587579"
598 |       ]
599 |      },
600 |      "execution_count": 100,
601 |      "metadata": {},
602 |      "output_type": "execute_result"
603 |     }
604 |    ],
605 |    "source": [
606 |     "_, ave = topAveDFSfeatures(0,102)\n",
607 |     "_, xg = topXGBoostfeatures(0,102)\n",
608 |     "_, union = unionDFSfeatures(27)\n",
609 |     "temp = np.intersect1d(np.intersect1d(ave,union),xg)\n",
610 |     "print temp.shape\n",
611 |     "\n",
612 |     "keke = inputX[:,temp.tolist()]\n",
613 |     "np.mean(cross_val_score(svm, keke, inputY, cv=5))"
614 |    ]
615 |   },
616 |   {
617 |    "cell_type": "code",
618 |    "execution_count": 78,
619 |    "metadata": {
620 |     "collapsed": false
621 |    },
622 |    "outputs": [
623 |     {
624 |      "data": {
625 |       "text/plain": [
626 |        "[1385,\n",
627 |        " 1373,\n",
628 |        " 1378,\n",
629 |        " 1377,\n",
630 |        " 4885,\n",
631 |        " 1365,\n",
632 |        " 4886,\n",
633 |        " 1376,\n",
634 |        " 4883,\n",
635 |        " 4888,\n",
636 |        " 4884,\n",
637 |        " 1367,\n",
638 |        " 1379,\n",
639 |        " 4882,\n",
640 |        " 1370,\n",
641 |        " 1372,\n",
642 |        " 1375,\n",
643 |        " 4880,\n",
644 |        " 1374,\n",
645 |        " 1380,\n",
646 |        " 4877,\n",
647 |        " 4879,\n",
648 |        " 4881,\n",
649 |        " 1369,\n",
650 |        " 2,\n",
651 |        " 7025,\n",
652 |        " 4887,\n",
653 |        " 7024,\n",
654 |        " 0,\n",
655 |        " 651,\n",
656 |        " 649,\n",
657 |        " 4874,\n",
658 |        " 6431,\n",
659 |        " 4889,\n",
660 |        " 3004,\n",
661 |        " 2511,\n",
662 |        " 1340,\n",
663 |        " 4891,\n",
664 |        " 3908,\n",
665 |        " 3859,\n",
666 |        " 4835,\n",
667 |        " 4660,\n",
668 |        " 4830,\n",
669 |        " 1336,\n",
670 |        " 4876,\n",
671 |        " 3,\n",
672 |        " 6197,\n",
673 |        " 4926,\n",
674 |        " 6281,\n",
675 |        " 1726,\n",
676 |        " 3907,\n",
677 |        " 6245,\n",
678 |        " 2510,\n",
679 |        " 6196,\n",
680 |        " 1368,\n",
681 |        " 1371,\n",
682 |        " 6469,\n",
683 |        " 4822,\n",
684 |        " 4454,\n",
685 |        " 6411,\n",
686 |        " 5203,\n",
687 |        " 6183,\n",
688 |        " 6287,\n",
689 |        " 1,\n",
690 |        " 4665,\n",
691 |        " 1773,\n",
692 |        " 4725,\n",
693 |        " 1153,\n",
694 |        " 7195,\n",
695 |        " 5628,\n",
696 |        " 4578,\n",
697 |        " 6468,\n",
698 |        " 1727,\n",
699 |        " 4571,\n",
700 |        " 784,\n",
701 |        " 1500,\n",
702 |        " 4572,\n",
703 |        " 6283,\n",
704 |        " 4823,\n",
705 |        " 3272,\n",
706 |        " 3455,\n",
707 |        " 4577,\n",
708 |        " 1359,\n",
709 |        " 1228,\n",
710 |        " 750,\n",
711 |        " 5200,\n",
712 |        " 5627,\n",
713 |        " 2244,\n",
714 |        " 1098,\n",
715 |        " 5692,\n",
716 |        " 4645,\n",
717 |        " 1501,\n",
718 |        " 4722,\n",
719 |        " 5695,\n",
720 |        " 786,\n",
721 |        " 3962,\n",
722 |        " 5813,\n",
723 |        " 1154,\n",
724 |        " 1672,\n",
725 |        " 4798]"
726 |       ]
727 |      },
728 |      "execution_count": 78,
729 |      "metadata": {},
730 |      "output_type": "execute_result"
731 |     }
732 |    ],
733 |    "source": [
734 |     "ave"
735 |    ]
736 |   },
737 |   {
738 |    "cell_type": "code",
739 |    "execution_count": 106,
740 |    "metadata": {
741 |     "collapsed": false
742 |    },
743 |    "outputs": [],
744 |    "source": [
745 |     "[str(i[0]) for i in columnNames[0][temp]]\n",
746 |     "\n",
747 |     "import pandas as pd\n",
748 |     "\n",
749 |     "dictionary = pd.read_csv(\"/Users/xupeng.tong/Documents/Data/OriginalData/BinaryTraitMatrix_V2_F50K_DD_20160523.csv\")"
750 |    ]
751 |   },
752 |   {
753 |    "cell_type": "code",
754 |    "execution_count": 116,
755 |    "metadata": {
756 |     "collapsed": false
757 |    },
758 |    "outputs": [
759 |     {
760 |      "name": "stdout",
761 |      "output_type": "stream",
762 |      "text": [
763 |       "('Number of union features:', 23)\n"
764 |      ]
765 |     },
766 |     {
767 |      "data": {
768 |       "text/plain": [
769 |        "[432,\n",
770 |        " 1365,\n",
771 |        " 1367,\n",
772 |        " 1370,\n",
773 |        " 1373,\n",
774 |        " 1374,\n",
775 |        " 1375,\n",
776 |        " 1376,\n",
777 |        " 1377,\n",
778 |        " 1378,\n",
779 |        " 1379,\n",
780 |        " 1380,\n",
781 |        " 1385,\n",
782 |        " 1501,\n",
783 |        " 3859,\n",
784 |        " 4571,\n",
785 |        " 4880,\n",
786 |        " 4882,\n",
787 |        " 4883,\n",
788 |        " 4884,\n",
789 |        " 4885,\n",
790 |        " 4886,\n",
791 |        " 4888]"
792 |       ]
793 |      },
794 |      "execution_count": 116,
795 |      "metadata": {},
796 |      "output_type": "execute_result"
797 |     }
798 |    ],
799 |    "source": [
800 |     "_, union = unionDFSfeatures(6)\n",
801 |     "\n",
802 |     "union"
803 |    ]
804 |   },
805 |   {
806 |    "cell_type": "code",
807 |    "execution_count": 110,
808 |    "metadata": {
809 |     "collapsed": true
810 |    },
811 |    "outputs": [],
812 |    "source": [
813 |     "codemapping = {i:j for i, j in zip(dictionary[\"FIELD_NAME\"].values, dictionary[\"FIELD_DESCRIPTION\"].values)}"
814 |    ]
815 |   },
816 |   {
817 |    "cell_type": "code",
818 |    "execution_count": 122,
819 |    "metadata": {
820 |     "collapsed": false
821 |    },
822 |    "outputs": [
823 |     {
824 |      "name": "stdout",
825 |      "output_type": "stream",
826 |      "text": [
827 |       "Age at time of last encounter with the health system\n",
828 |       "Derived categorical smoking status\n",
829 |       "ICD9 3D: Nondependent abuse of drugs\n",
830 |       "ICD9 4D: Tobacco use disorder\n",
831 |       "ICD9 3D: Chronic bronchitis\n",
832 |       "ICD9 4D: Obstructive Chronic Bronchitis\n",
833 |       "ICD9 3D: Asthma\n",
834 |       "ICD9 4D: Chronic Obstructive Asthma\n",
835 |       "ICD9 4D: Asthma, Unspecified\n",
836 |       "ICD9 3D: Bronchiectasis\n",
837 |       "ICD9 3D: Chronic airway obstruction, not elsewhere classified\n",
838 |       "ICD10 3D: Emphysema\n",
839 |       "ICD10 4D: Emphysema, unspecified\n",
840 |       "ICD10 3D: Asthma\n",
841 |       "ICD10 4D: Other and unspecified asthma\n",
842 |       "ICD10 4D: Dyspnea\n",
843 |       "ICD10 3D: Problems related to lifestyle\n"
844 |      ]
845 |     }
846 |    ],
847 |    "source": [
848 |     "the23 = [codemapping[str(i[0])] for i in columnNames[0][temp]]\n",
849 |     "\n",
850 |     "for i in the23:\n",
851 |     "    print i"
852 |    ]
853 |   },
854 |   {
855 |    "cell_type": "code",
856 |    "execution_count": null,
857 |    "metadata": {
858 |     "collapsed": true
859 |    },
860 |    "outputs": [],
861 |    "source": []
862 |   }
863 |  ],
864 |  "metadata": {
865 |   "kernelspec": {
866 |    "display_name": "Python 2",
867 |    "language": "python",
868 |    "name": "python2"
869 |   },
870 |   "language_info": {
871 |    "codemirror_mode": {
872 |     "name": "ipython",
873 |     "version": 2
874 |    },
875 |    "file_extension": ".py",
876 |    "mimetype": "text/x-python",
877 |    "name": "python",
878 |    "nbconvert_exporter": "python",
879 |    "pygments_lexer": "ipython2",
880 |    "version": "2.7.12"
881 |   }
882 |  },
883 |  "nbformat": 4,
884 |  "nbformat_minor": 0
885 | }
886 | 


--------------------------------------------------------------------------------
/MakeNewData.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 31,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from scipy import io as sio\n",
 12 |     "\n",
 13 |     "# Load the data\n",
 14 |     "ourdata = sio.loadmat(\"./data/B_3labels_mean_scaled.mat\")\n",
 15 |     "columnNames = ourdata['columnNames']"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {
 22 |     "collapsed": true
 23 |    },
 24 |    "outputs": [],
 25 |    "source": [
 26 |     "# ICD-9 codes mapping\n",
 27 |     "import pandas as pd\n",
 28 |     "\n",
 29 |     "dictionary = pd.read_csv(\"./data/BinaryTraitMatrix_V2.2_F60K_DD_20160722.csv\")\n",
 30 |     "codemapping = {i:j for i, j in zip(dictionary[\"FIELD_NAME\"].values, dictionary[\"FIELD_DESCRIPTION\"].values)}\n",
 31 |     "\n",
 32 |     "def getFeatureNames(indexes):\n",
 33 |     "    return [codemapping[str(i).rstrip()] for i in columnNames[indexes]]"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "markdown",
 38 |    "metadata": {},
 39 |    "source": [
 40 |     "## A list of features should be eliminated"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 28,
 46 |    "metadata": {
 47 |     "collapsed": true
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "a = [i[0] for i in codemapping.items() if 'Emphysema' in i[1]]\n",
 52 |     "b = [i[0] for i in codemapping.items() if 'emphysema' in i[1]]\n",
 53 |     "c = [i[0] for i in codemapping.items() if 'asthma' in i[1]]\n",
 54 |     "d = [i[0] for i in codemapping.items() if 'Asthma' in i[1]]\n",
 55 |     "e = [i[0] for i in codemapping.items() if 'Chronic bronchitis' in i[1]]\n",
 56 |     "f = [i[0] for i in codemapping.items() if 'chronic bronchitis' in i[1]]\n",
 57 |     "g = [i[0] for i in codemapping.items() if 'Chronic Obstructive' in i[1]]\n",
 58 |     "h = [i[0] for i in codemapping.items() if 'chronic obstructive' in i[1]]\n",
 59 |     "j = [i[0] for i in codemapping.items() if 'smoking' in i[1]]\n",
 60 |     "k = [i[0] for i in codemapping.items() if 'Gender' == i[1]]\n",
 61 |     "u = [i[0] for i in codemapping.items() if 'Age at time of last encounter with the health system' == i[1]]\n",
 62 |     "s = [i[0] for i in codemapping.items() if 'Indicator if patient is alive or deceased' == i[1]]\n",
 63 |     "# w = [i for i in codemapping.items() if 'bronc' in i[1]]\n",
 64 |     "i = [i[0] for i in codemapping.items() if 'Tobacco' in i[1]]\n",
 65 |     "\n",
 66 |     "# Should add more"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 26,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "icd9_eliminated = a+b+c+d+e+f+g+h+i+j+k+u+s"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 22,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "index_eliminated = [i for i, j in enumerate(columnNames) if str(j.rstrip()) in icd9_eliminated]\n",
 89 |     "index_keep = [i for i in xrange(len(columnNames)) if i not in index_eliminated]"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "markdown",
 94 |    "metadata": {},
 95 |    "source": [
 96 |     "## New column names is generated with index_keep"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 8,
102 |    "metadata": {
103 |     "collapsed": false
104 |    },
105 |    "outputs": [],
106 |    "source": [
107 |     "newColumnNames = ourdata['columnNames'][index_keep]"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "markdown",
112 |    "metadata": {},
113 |    "source": [
114 |     "## Create New data that does not contain the features eliminated above"
115 |    ]
116 |   },
117 |   {
118 |    "cell_type": "code",
119 |    "execution_count": 87,
120 |    "metadata": {
121 |     "collapsed": false
122 |    },
123 |    "outputs": [],
124 |    "source": [
125 |     "# Change the file names and generate new files\n",
126 |     "\n",
127 |     "# Load old data with original columns\n",
128 |     "ourdata = sio.loadmat(\"./data/B_AsthmaAcos_mean_scaled.mat\")\n",
129 |     "\n",
130 |     "ourdata['X'] = ourdata['X'][:,index_keep]\n",
131 |     "ourdata['columnNames'] = newColumnNames\n",
132 |     "\n",
133 |     "# Save new Data with new columns (name end with number of new columns)\n",
134 |     "sio.savemat(\"./data/B_AsthmaAcos_mean_scaled_\"+str(len(newColumnNames))\".mat\", ourdata)"
135 |    ]
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 2",
141 |    "language": "python",
142 |    "name": "python2"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 2
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython2",
154 |    "version": "2.7.12"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 0
159 | }
160 | 


--------------------------------------------------------------------------------
/Note-Part1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {
  6 |     "collapsed": false
  7 |    },
  8 |    "source": [
  9 |     "# Read The Data"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from supporting_files.dfs2 import DeepFeatureSelectionNew\n",
 21 |     "from sklearn.cross_validation import train_test_split\n",
 22 |     "from sklearn import datasets\n",
 23 |     "from scipy import io as sio\n",
 24 |     "from tensorflow.python.framework import ops\n",
 25 |     "import numpy as np\n",
 26 |     "from sklearn.datasets import make_classification\n",
 27 |     "from sklearn.preprocessing import normalize\n",
 28 |     "\n",
 29 |     "ourdata = sio.loadmat(\"./data/B_AsthmaCOPD_mean_scaled_7159.mat\")\n",
 30 |     "\n",
 31 |     "inputX = ourdata['X']\n",
 32 |     "inputY = ourdata['Y'][0,:]\n",
 33 |     "\n",
 34 |     "columnNames = ourdata['columnNames']"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "markdown",
 39 |    "metadata": {},
 40 |    "source": [
 41 |     "# Run the Deep Feature Selection\n",
 42 |     "## Changing lambda1 slightly"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {
 49 |     "collapsed": false,
 50 |     "scrolled": false
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# Reset the graph\n",
 55 |     "ops.reset_default_graph()\n",
 56 |     "\n",
 57 |     "weights_tuning_lamda1 = []\n",
 58 |     "for lambda1 in xrange(0, 10, 1):\n",
 59 |     "    # Should be modified for different datasets, similar things should be done for alpha1\n",
 60 |     "    lambda1 /= 10000.\n",
 61 |     "    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \\\n",
 62 |     "                                     lambda1=0.0001, lambda2=1, alpha1=0.00001, alpha2=0, activation='tanh', \\\n",
 63 |     "                                     weight_init='uniform',epochs=20, optimizer='Adam', print_step=1)\n",
 64 |     "    dfsMLP.train(batch_size=2000)\n",
 65 |     "    print(\"Train finised for lambda1:\" + str(lambda1))\n",
 66 |     "    weights_tuning_lamda1.append(dfsMLP.selected_ws[0])"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "metadata": {},
 72 |    "source": [
 73 |     "# Run different random states in order to select features given selected set of parameters chosen above "
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "weights_randomstates = []\n",
 85 |     "\n",
 86 |     "for random_state in xrange(20):\n",
 87 |     "    # Resplit the data\n",
 88 |     "    X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)\n",
 89 |     "    \n",
 90 |     "    # Change number of epochs to control the training time\n",
 91 |     "    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[30], learning_rate=0.01, \\\n",
 92 |     "                                         lambda1=0.0001, lambda2=1, alpha1=0.0001, alpha2=0, activation='tanh', \\\n",
 93 |     "                                         weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)\n",
 94 |     "    dfsMLP.train(batch_size=2000)\n",
 95 |     "    print(\"Train finised for random state:\" + str(random_state))\n",
 96 |     "    weights_randomstates.append(dfsMLP.selected_ws[0])\n",
 97 |     "\n",
 98 |     "# The generated weights will be in the weights folder\n",
 99 |     "np.save(\"./weights/weights_randomstates\", weights_randomstates)"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "# The below code is for single model testing / parameter discovering"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false,
114 |     "scrolled": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=2)\n",
119 |     "\n",
120 |     "dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.012, \\\n",
121 |     "                                     lambda1=0.002, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \\\n",
122 |     "                                     weight_init='uniform',epochs=200, optimizer='Adam', print_step=1)\n",
123 |     "dfsMLP.train(batch_size=2000)\n",
124 |     "\n",
125 |     "# More layers might cause overfitting problems, but certainly change the alpha1 and lambda1 accordingly would \n",
126 |     "# set the problem"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "metadata": {
133 |     "collapsed": true
134 |    },
135 |    "outputs": [],
136 |    "source": [
137 |     "ourdata = sio.loadmat(\"./data/B_COPDAcos_mean_scaled_7169.mat\")\n",
138 |     "inputX = ourdata['X']\n",
139 |     "inputY = ourdata['Y'][0,:]\n",
140 |     "columnNames = ourdata['columnNames']\n",
141 |     "\n",
142 |     "index_Acos = np.where(inputY==0)[0]\n",
143 |     "index_COPD = np.where(inputY==1)[0]"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "ourdata = sio.loadmat(\"./data/B_COPDAcos_mean_scaled_7169.mat\")\n",
155 |     "inputX = ourdata['X']\n",
156 |     "inputY = ourdata['Y'][0,:]\n",
157 |     "columnNames = ourdata['columnNames']\n",
158 |     "\n",
159 |     "index_Acos = np.where(inputY==0)[0]\n",
160 |     "index_COPD = np.where(inputY==1)[0]"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "metadata": {
167 |     "collapsed": false
168 |    },
169 |    "outputs": [],
170 |    "source": [
171 |     "weights = []\n",
172 |     "for i in xrange(1):\n",
173 |     "    # made random choice of asthma patients\n",
174 |     "    choice = np.random.choice(a=len(index_COPD), size=len(index_Acos))\n",
175 |     "    index_COPD_chosen = index_Asthma[choice]\n",
176 |     "\n",
177 |     "    # Concatenate the indexes for Asthma and Acos patients\n",
178 |     "    indexes = np.array(index_Acos.tolist()+index_COPD_chosen.tolist())\n",
179 |     "    # Shuffle the indexes\n",
180 |     "    np.random.shuffle(indexes)\n",
181 |     "    indexes = indexes.tolist()\n",
182 |     "\n",
183 |     "    # inputX and inputY for this round\n",
184 |     "    inputX_ = inputX[indexes,:]\n",
185 |     "    inputY_ = inputY[indexes]\n",
186 |     "    \n",
187 |     "    X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)\n",
188 |     "    \n",
189 |     "    # Change number of epochs to control the training time\n",
190 |     "    dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[10], learning_rate=0.01, \\\n",
191 |     "                                         lambda1=0.01, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \\\n",
192 |     "                                         weight_init='uniform',epochs=30, optimizer='Adam', print_step=1)\n",
193 |     "    dfsMLP.train(batch_size=500)\n",
194 |     "    print(\"Train finised for random state:\" + str(random_state))\n",
195 |     "    weights.append(dfsMLP.selected_ws[0])"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "markdown",
200 |    "metadata": {},
201 |    "source": [
202 |     "# Run XGBoost Model"
203 |    ]
204 |   },
205 |   {
206 |    "cell_type": "code",
207 |    "execution_count": null,
208 |    "metadata": {
209 |     "collapsed": false
210 |    },
211 |    "outputs": [],
212 |    "source": [
213 |     "import scipy.io as sio\n",
214 |     "from sklearn.ensemble import RandomForestClassifier\n",
215 |     "from sklearn.metrics import accuracy_score\n",
216 |     "import xgboost as xgb\n",
217 |     "import numpy as np\n",
218 |     "\n",
219 |     "# COPD Acos\n",
220 |     "ourdata = sio.loadmat(\"./data/B_COPDAcos_mean_scaled_7159.mat\")\n",
221 |     "inputX = ourdata['X']\n",
222 |     "inputY = ourdata['Y'][0,:]\n",
223 |     "\n",
224 |     "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)\n",
225 |     "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n",
226 |     "\n",
227 |     "np.save(\"./weights/indexes_xgboost_rerun_All_CAc\",indexes_xgboost)"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {
234 |     "collapsed": true
235 |    },
236 |    "outputs": [],
237 |    "source": [
238 |     "ourdata = sio.loadmat(\"./data/B_AsthmaCOPD_mean_scaled_7159.mat\")\n",
239 |     "inputX = ourdata['X']\n",
240 |     "inputY = ourdata['Y'][0,:]\n",
241 |     "\n",
242 |     "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)\n",
243 |     "# y_pred = gbm.predict(X_test)\n",
244 |     "\n",
245 |     "# featurescores = gbm.feature_importances_\n",
246 |     "\n",
247 |     "# print(accuracy_score(y_test, y_pred))\n",
248 |     "\n",
249 |     "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n",
250 |     "np.save(\"./weights/indexes_xgboost_rerun_All_AsC\",indexes_xgboost)"
251 |    ]
252 |   },
253 |   {
254 |    "cell_type": "code",
255 |    "execution_count": null,
256 |    "metadata": {
257 |     "collapsed": true
258 |    },
259 |    "outputs": [],
260 |    "source": [
261 |     "# Asthma Acos\n",
262 |     "ourdata = sio.loadmat(\"./data/B_AsthmaAcos_mean_scaled_7159.mat\")\n",
263 |     "inputX = ourdata['X']\n",
264 |     "inputY = ourdata['Y'][0,:]\n",
265 |     "\n",
266 |     "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)\n",
267 |     "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n",
268 |     "\n",
269 |     "np.save(\"./weights/indexes_xgboost_rerun_All_AsAc\",indexes_xgboost)"
270 |    ]
271 |   }
272 |  ],
273 |  "metadata": {
274 |   "kernelspec": {
275 |    "display_name": "Python 2",
276 |    "language": "python",
277 |    "name": "python2"
278 |   },
279 |   "language_info": {
280 |    "codemirror_mode": {
281 |     "name": "ipython",
282 |     "version": 2
283 |    },
284 |    "file_extension": ".py",
285 |    "mimetype": "text/x-python",
286 |    "name": "python",
287 |    "nbconvert_exporter": "python",
288 |    "pygments_lexer": "ipython2",
289 |    "version": "2.7.12"
290 |   }
291 |  },
292 |  "nbformat": 4,
293 |  "nbformat_minor": 0
294 | }
295 | 


--------------------------------------------------------------------------------
/Preprocess - Should be ran after labeled csv file has been generated.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from scipy import io as sio\n",
 12 |     "import pandas as pd\n",
 13 |     "import numpy as np\n",
 14 |     "\n",
 15 |     "# Process Q and B data separately\n",
 16 |     "\n",
 17 |     "filePath = \"./data/QMatrix_label.csv\"\n",
 18 |     "if 'B' in filePath:\n",
 19 |     "    dataType = 'B'\n",
 20 |     "else:\n",
 21 |     "    dataType = 'Q'\n",
 22 |     "\n",
 23 |     "impute_strategy = 'mean'\n",
 24 |     "    \n",
 25 |     "df = pd.read_csv(filePath)"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": false
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# See what does the data looks like\n",
 37 |     "df"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "# Read the labels\n",
 49 |     "labels = df.ix[:,1].values"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {
 56 |     "collapsed": false
 57 |    },
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "# Convert alphabet subgroups to numbers\n",
 61 |     "mappingSub = {j:i for i,j in enumerate(np.unique(df['Smoking_Sub_Group']))}\n",
 62 |     "numericSub = np.array([mappingSub[i] for i in df['Smoking_Sub_Group']])\n",
 63 |     "df['Smoking_Sub_Group'] = numericSub"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "# Convert string labels to numbers\n",
 75 |     "mappingLabels = {j:i for i,j in enumerate(np.unique(labels))}\n",
 76 |     "numericLabels = np.array([mappingLabels[i] for i in labels])"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {
 83 |     "collapsed": false
 84 |    },
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# Drop first two columns\n",
 88 |     "df.drop(df.columns[[0,1]], axis=1, inplace=True)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "# Data to be imputed\n",
100 |     "dataToBeImputed = df.values\n",
101 |     "\n",
102 |     "# Get column names\n",
103 |     "columnNames = df.columns.values.astype('U')\n",
104 |     "# columnNames = [i for i in columnNames]"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "markdown",
109 |    "metadata": {},
110 |    "source": [
111 |     "## Get the info from the data, for each feature and each label, calculate their NA rate and store the non-NA values for further analysis, like box-plot"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "info = {}\n",
123 |     "for column in xrange(len(columnNames)):\n",
124 |     "    if column % 100 == 0:\n",
125 |     "        print column\n",
126 |     "    info[columnNames[column]] = {}\n",
127 |     "    for label in xrange(3):\n",
128 |     "        info[columnNames[column]][label] = {}\n",
129 |     "        indexes = list(np.where(numericLabels==label)[0])\n",
130 |     "        temp = df.ix[indexes,column]\n",
131 |     "        info[columnNames[column]][label]['NA rate'] = temp.isnull().values.sum()*1.0/len(indexes)\n",
132 |     "        info[columnNames[column]][label]['non-NA data'] = temp[temp.notnull().values].values\n",
133 |     "\n",
134 |     "info['labelMap'] = {0:'Acos',1:'Asthma',2:'COPD'}"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "np.save('./data/Info_' + dataType + '.npy', info)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "## Data Imputation with simply mean/median, advanced methods will be attached as well"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {
159 |     "collapsed": false
160 |    },
161 |    "outputs": [],
162 |    "source": [
163 |     "from sklearn.preprocessing import Imputer\n",
164 |     "\n",
165 |     "imp = Imputer(missing_values='NaN', strategy=impute_strategy, axis=0)\n",
166 |     "imp.fit(dataToBeImputed)\n",
167 |     "imputedData = imp.transform(dataToBeImputed)"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "markdown",
172 |    "metadata": {},
173 |    "source": [
174 |     "## Scale the data from 0 to 1"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": null,
180 |    "metadata": {
181 |     "collapsed": false
182 |    },
183 |    "outputs": [],
184 |    "source": [
185 |     "from sklearn.preprocessing import MinMaxScaler\n",
186 |     "\n",
187 |     "mm = MinMaxScaler(feature_range=(0, 1))\n",
188 |     "scaledData = mm.fit_transform(imputedData)"
189 |    ]
190 |   },
191 |   {
192 |    "cell_type": "code",
193 |    "execution_count": null,
194 |    "metadata": {
195 |     "collapsed": false
196 |    },
197 |    "outputs": [],
198 |    "source": [
199 |     "# Read the patient list generated from the last file\n",
200 |     "\n",
201 |     "patientList = np.load(\"./data/patientList\"+dataType+\".npy\")"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {
208 |     "collapsed": false
209 |    },
210 |    "outputs": [],
211 |    "source": [
212 |     "# Save the file\n",
213 |     "sio.savemat(\"./data/\" + dataType + \"_3labels_mean_scaled.mat\", \\\n",
214 |     "                {'X':scaledData,'Y':numericLabels,'patients':patientList, 'columnNames':columnNames})"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": null,
220 |    "metadata": {
221 |     "collapsed": false
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "labels2_list = ['AsthmaCOPD','AcosCOPD','AcosAsthma']\n",
226 |     "for i in xrange(len(labels2_list)):\n",
227 |     "    # Create 2-classes patients list\n",
228 |     "    indexes = np.where(numericLabels!=i)\n",
229 |     "    Y = numericLabels[indexes]\n",
230 |     "    if i == 0:\n",
231 |     "        Y = Y-1\n",
232 |     "    elif i == 1:\n",
233 |     "        Y = np.array([j if j == 0 else 1 for j in Y])\n",
234 |     "    X, p = scaledData[indexes,:], patientList[indexes]\n",
235 |     "    \n",
236 |     "    sio.savemat(\"./data/\" + dataType + \"_\" + labels2_list[i] + \"_mean_scaled.mat\", \\\n",
237 |     "                {'X':X, 'Y':Y, 'patients':p, 'columnNames':columnNames})"
238 |    ]
239 |   }
240 |  ],
241 |  "metadata": {
242 |   "kernelspec": {
243 |    "display_name": "Python 2",
244 |    "language": "python",
245 |    "name": "python2"
246 |   },
247 |   "language_info": {
248 |    "codemirror_mode": {
249 |     "name": "ipython",
250 |     "version": 2
251 |    },
252 |    "file_extension": ".py",
253 |    "mimetype": "text/x-python",
254 |    "name": "python",
255 |    "nbconvert_exporter": "python",
256 |    "pygments_lexer": "ipython2",
257 |    "version": "2.7.12"
258 |   }
259 |  },
260 |  "nbformat": 4,
261 |  "nbformat_minor": 0
262 | }
263 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DeepFeatureSelection--Tensorflow
2 | Deep Feature Selection Framework implemented in Tensorflow based on the paper from 
3 | http://link.springer.com/chapter/10.1007%2F978-3-319-16706-0_20
4 | 


--------------------------------------------------------------------------------
/TFlearnVersion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "from sklearn import datasets\n",
 12 |     "from sklearn.cross_validation import train_test_split\n",
 13 |     "from scipy import io as sio\n",
 14 |     "from tensorflow.python.framework import ops\n",
 15 |     "import numpy as np\n",
 16 |     "from sklearn.datasets import make_classification\n",
 17 |     "from sklearn.preprocessing import normalize\n",
 18 |     "import tflearn\n",
 19 |     "import tensorflow as tf\n",
 20 |     "from nncomponents import One2OneInputLayer\n",
 21 |     "\n",
 22 |     "ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n",
 23 |     "# ourdataB = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n",
 24 |     "\n",
 25 |     "inputX = ourdataB['X']\n",
 26 |     "inputX = normalize(inputX, axis=0)\n",
 27 |     "inputY = ourdataB['Y'][0,:]\n",
 28 |     "columnNames = ourdataB['columnNames']\n",
 29 |     "\n",
 30 |     "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)\n",
 31 |     "Y_train, Y_test = tflearn.data_utils.to_categorical(y_train, 2), tflearn.data_utils.to_categorical(y_test, 2)\n",
 32 |     "\n",
 33 |     "indexes = sio.loadmat(\"xgboost_result\")['importance_rank']\n",
 34 |     "\n",
 35 |     "X_train500, X_test500 = X_train[:, indexes.tolist()[0][:500]], X_test[:, indexes.tolist()[0][:500]]\n",
 36 |     "X_train100, X_test100 = X_train[:, indexes.tolist()[0][:100]], X_test[:, indexes.tolist()[0][:100]]\n",
 37 |     "X_train10, X_test10 = X_train[:, indexes.tolist()[0][:10]], X_test[:, indexes.tolist()[0][:10]]\n",
 38 |     "X_train50, X_test50 = X_train[:, indexes.tolist()[0][:50]], X_test[:, indexes.tolist()[0][:50]]"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": 64,
 44 |    "metadata": {
 45 |     "collapsed": false
 46 |    },
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "def dfs(lambda1, n_epoch, size=None):\n",
 50 |     "    with tf.Graph().as_default():\n",
 51 |     "        sess = tf.Session()\n",
 52 |     "        \n",
 53 |     "        if size is None:\n",
 54 |     "            size = 7203\n",
 55 |     "\n",
 56 |     "        input_data = tflearn.input_data(shape=[None, size])\n",
 57 |     "        input_layer = One2OneInputLayer(input_data)\n",
 58 |     "\n",
 59 |     "        tflearn.helpers.regularizer.add_weights_regularizer(input_layer.w, loss='L1', \\\n",
 60 |     "                                                weight_decay=lambda1, add_to_collection=None)\n",
 61 |     "\n",
 62 |     "        dense = tflearn.fully_connected(input_layer.output, 50, activation='tanh')\n",
 63 |     "        \n",
 64 |     "        sofmax = tflearn.fully_connected(dense, 2, activation='softmax')\n",
 65 |     "        \n",
 66 |     "        net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n",
 67 |     "        model = tflearn.DNN(net)\n",
 68 |     "\n",
 69 |     "        sess.run(tf.initialize_all_variables())\n",
 70 |     "        \n",
 71 |     "#         variables = tflearn.variables.get_all_trainable_variable()\n",
 72 |     "#         for i in xrange(1,4):\n",
 73 |     "#             sess.run(variables[i].assign(initial_values[i]))\n",
 74 |     "        if size == 500:\n",
 75 |     "            X_train, X_test = X_train500, X_test500\n",
 76 |     "        elif size == 100:\n",
 77 |     "            X_train, X_test = X_train100, X_test100\n",
 78 |     "        elif size == 50:\n",
 79 |     "            X_train, X_test = X_train50, X_test50\n",
 80 |     "        elif size == 10:\n",
 81 |     "            X_train, X_test = X_train10, X_test10\n",
 82 |     "            \n",
 83 |     "        model.fit(X_train, Y_train, n_epoch=n_epoch, show_metric=True, validation_set=(X_test, Y_test), batch_size=100)\n",
 84 |     "\n",
 85 |     "        selected_w = sess.run(input_layer.w)\n",
 86 |     "\n",
 87 |     "        return selected_w"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 67,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [
 97 |     {
 98 |      "name": "stdout",
 99 |      "output_type": "stream",
100 |      "text": [
101 |       "Training Step: 2407  | total loss: \u001b[1m\u001b[32m0.21137\u001b[0m\u001b[0m\n",
102 |       "\u001b[2K\r",
103 |       "| Adam | epoch: 027 | loss: 0.21137 - acc: 0.9473 -- iter: 8500/8547\n"
104 |      ]
105 |     },
106 |     {
107 |      "ename": "KeyboardInterrupt",
108 |      "evalue": "",
109 |      "output_type": "error",
110 |      "traceback": [
111 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
112 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
113 |       "\u001b[0;32m<ipython-input-67-1f98ae99981c>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlambda1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m500\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m     \u001b[0mlambda1\u001b[0m \u001b[0;34m/=\u001b[0m \u001b[0;36m10000.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mweights\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
114 |       "\u001b[0;32m<ipython-input-64-18a8cfab0282>\u001b[0m in \u001b[0;36mdfs\u001b[0;34m(lambda1, n_epoch, size)\u001b[0m\n\u001b[1;32m     32\u001b[0m             \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_train10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m         \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_metric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_set\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m     35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     36\u001b[0m         \u001b[0mselected_w\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_layer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
115 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/models/dnn.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X_inputs, Y_targets, n_epoch, validation_set, show_metric, batch_size, shuffle, snapshot_epoch, snapshot_step, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m    187\u001b[0m                          \u001b[0mdaug_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdaug_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    188\u001b[0m                          \u001b[0mexcl_trainops\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexcl_trainops\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m                          run_id=run_id)\n\u001b[0m\u001b[1;32m    190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    191\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
116 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m    282\u001b[0m                                                        \u001b[0msnapshot_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    283\u001b[0m                                                        \u001b[0msnapshot_step\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m                                                        show_metric)\n\u001b[0m\u001b[1;32m    285\u001b[0m                             \u001b[0mglobal_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    286\u001b[0m                             \u001b[0;32mif\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macc_value\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mglobal_acc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
117 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, training_step, snapshot_epoch, snapshot_step, show_metric)\u001b[0m\n\u001b[1;32m    722\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    723\u001b[0m                 \u001b[0meval_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 724\u001b[0;31m             \u001b[0me\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluate_flow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_ops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_dflow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    725\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    726\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
118 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mevaluate_flow\u001b[0;34m(session, ops_to_evaluate, dataflow)\u001b[0m\n\u001b[1;32m    846\u001b[0m             \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    847\u001b[0m                 \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mcurrent_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m             \u001b[0mfeed_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    849\u001b[0m         \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_samples\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    850\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
119 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/data_flow.pyc\u001b[0m in \u001b[0;36mnext\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    126\u001b[0m         \"\"\"\n\u001b[1;32m    127\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata_status\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed_dict_queue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    130\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_status\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
120 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/Queue.pyc\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m    166\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    167\u001b[0m                 \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 168\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_empty\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    169\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    170\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
121 |       "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/threading.pyc\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    338\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    339\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 340\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    341\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    342\u001b[0m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_note\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s.wait(): got it\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
122 |       "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
123 |      ]
124 |     }
125 |    ],
126 |    "source": [
127 |     "weights = []\n",
128 |     "for lambda1 in xrange(0, 500, 5):\n",
129 |     "    print(lambda1)\n",
130 |     "    lambda1 /= 10000.\n",
131 |     "    weights.append(dfs(0, 30, 100))"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": 34,
137 |    "metadata": {
138 |     "collapsed": false
139 |    },
140 |    "outputs": [
141 |     {
142 |      "ename": "NameError",
143 |      "evalue": "name 'model' is not defined",
144 |      "output_type": "error",
145 |      "traceback": [
146 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
147 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
148 |       "\u001b[0;32m<ipython-input-34-791a71ed118a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
149 |       "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "type(model)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": 37,
160 |    "metadata": {
161 |     "collapsed": false
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "def get_inits():\n",
166 |     "    with tf.Graph().as_default():\n",
167 |     "        sess = tf.Session()\n",
168 |     "\n",
169 |     "        input_data = tflearn.input_data(shape=[None, 7203])\n",
170 |     "        input_layer = One2OneInputLayer(input_data)\n",
171 |     "\n",
172 |     "        dense = tflearn.fully_connected(input_layer.output, 500, activation='tanh', name='dense')\n",
173 |     "        sofmax = tflearn.fully_connected(dense, 2, activation='softmax', name='sofmax')\n",
174 |     "        net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n",
175 |     "        model = tflearn.DNN(net)\n",
176 |     "        \n",
177 |     "        print(type(model))\n",
178 |     "\n",
179 |     "        sess.run(tf.initialize_all_variables())\n",
180 |     "\n",
181 |     "#         model.fit(X_train, Y_train, n_epoch=10, show_metric=True, validation_set=(X_test, Y_test))\n",
182 |     "\n",
183 |     "        variables = tflearn.variables.get_all_trainable_variable()\n",
184 |     "        \n",
185 |     "        values = []\n",
186 |     "        for i in xrange(4):\n",
187 |     "            values.append(sess.run(variables[i]))\n",
188 |     "\n",
189 |     "        return values"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 38,
195 |    "metadata": {
196 |     "collapsed": false
197 |    },
198 |    "outputs": [
199 |     {
200 |      "name": "stdout",
201 |      "output_type": "stream",
202 |      "text": [
203 |       "<class 'tflearn.models.dnn.DNN'>\n"
204 |      ]
205 |     }
206 |    ],
207 |    "source": [
208 |     "initial_values = get_inits()"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": false
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "initial_values[0]"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": false
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "tflearn.input_data(tf.Variable(initial_values[0]))"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {
237 |     "collapsed": false
238 |    },
239 |    "outputs": [],
240 |    "source": [
241 |     "dense_vars = tflearn.variables.get_all_variables()\n",
242 |     "print(\"Dense1 layer weights:\")\n",
243 |     "print(model.get_weights(dense_vars[0]))\n",
244 |     "# Or using generic tflearn function:\n",
245 |     "print(\"Dense1 layer biases:\")\n",
246 |     "with model.session.as_default():\n",
247 |     "    print(tflearn.variables.get_value(dense_vars[1]))"
248 |    ]
249 |   },
250 |   {
251 |    "cell_type": "code",
252 |    "execution_count": null,
253 |    "metadata": {
254 |     "collapsed": false
255 |    },
256 |    "outputs": [],
257 |    "source": [
258 |     "dense_varsa"
259 |    ]
260 |   },
261 |   {
262 |    "cell_type": "code",
263 |    "execution_count": 68,
264 |    "metadata": {
265 |     "collapsed": false
266 |    },
267 |    "outputs": [
268 |     {
269 |      "name": "stdout",
270 |      "output_type": "stream",
271 |      "text": [
272 |       "High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~tonyabracadabra/0 or inside your plot.ly account where it is named 'basic-heatmap'\n"
273 |      ]
274 |     },
275 |     {
276 |      "data": {
277 |       "text/html": [
278 |        "<iframe id=\"igraph\" scrolling=\"no\" style=\"border:none;\" seamless=\"seamless\" src=\"https://plot.ly/~tonyabracadabra/0.embed\" height=\"525px\" width=\"100%\"></iframe>"
279 |       ],
280 |       "text/plain": [
281 |        "<plotly.tools.PlotlyDisplay object>"
282 |       ]
283 |      },
284 |      "execution_count": 68,
285 |      "metadata": {},
286 |      "output_type": "execute_result"
287 |     }
288 |    ],
289 |    "source": [
290 |     "import plotly.plotly as py\n",
291 |     "import plotly.graph_objs as go\n",
292 |     "import plotly\n",
293 |     "\n",
294 |     "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n",
295 |     "\n",
296 |     "data = [\n",
297 |     "    go.Heatmap(\n",
298 |     "        z=np.abs(weights)\n",
299 |     "    )\n",
300 |     "]\n",
301 |     "\n",
302 |     "py.iplot(data, filename='basic-heatmap')"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": 1,
308 |    "metadata": {
309 |     "collapsed": false
310 |    },
311 |    "outputs": [
312 |     {
313 |      "ename": "NameError",
314 |      "evalue": "name 'weights' is not defined",
315 |      "output_type": "error",
316 |      "traceback": [
317 |       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
318 |       "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
319 |       "\u001b[0;32m<ipython-input-1-fad624b28208>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mweights\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
320 |       "\u001b[0;31mNameError\u001b[0m: name 'weights' is not defined"
321 |      ]
322 |     }
323 |    ],
324 |    "source": [
325 |     "weights"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "code",
330 |    "execution_count": null,
331 |    "metadata": {
332 |     "collapsed": true
333 |    },
334 |    "outputs": [],
335 |    "source": [
336 |     "import pydendroheatmap as pdh\n",
337 |     "import scipy.cluster.hierarchy as sch\n",
338 |     "\n",
339 |     "\n",
340 |     "heatmap_array = pickle.load(open('some_data_file.pickle'))#a numpy.ndarray or numpy.matrix, for this example, let's say mxn array\n",
341 |     "top_dendrogram = pickle.load(open('another_data_file.pickle'))#a (n-1) x 4 array\n",
342 |     "side_dendrogram = pickle.load(open('a_third_data_file.pickle'))#a (m-1) x 4 array\n",
343 |     "\n",
344 |     "heatmap = pdh.DendroHeatMap(heat_map_data=heatmap_array, left_dendrogram=side_dendrogram, top_dendrogram=top_dendrogram)\n",
345 |     "heatmap.title = 'This is an example'\n",
346 |     "heatmap.show()\n",
347 |     "\n",
348 |     "heatmap.colormap = heatmap.yellowBlackBlue\n",
349 |     "\n",
350 |     "heatmap.show()\n",
351 |     "\n",
352 |     "heatmap.row_labels = ['some', 'row','labels'] #must have the same number of rows in heat_map_data\n",
353 |     "\n",
354 |     "heatmap.reset_plot()\n",
355 |     "heatmap.show()"
356 |    ]
357 |   },
358 |   {
359 |    "cell_type": "code",
360 |    "execution_count": 72,
361 |    "metadata": {
362 |     "collapsed": false
363 |    },
364 |    "outputs": [
365 |     {
366 |      "data": {
367 |       "text/plain": [
368 |        "R object with classes: ('list',) mapped to:\n",
369 |        "<ListVector - Python:0x25ac40e60 / R:0x25d5cd848>\n",
370 |        "[IntVector, IntVector, RNULLType, RNULLType]\n",
371 |        "  rowInd: <class 'rpy2.robjects.vectors.IntVector'>\n",
372 |        "  R object with classes: ('integer',) mapped to:\n",
373 |        "<IntVector - Python:0x25ae8f488 / R:0x2575885d0>\n",
374 |        "[      16,        9,       59, ...,       53,       52,       39]\n",
375 |        "  colInd: <class 'rpy2.robjects.vectors.IntVector'>\n",
376 |        "  R object with classes: ('integer',) mapped to:\n",
377 |        "<IntVector - Python:0x25ae8f290 / R:0x1fc913be0>\n",
378 |        "[      94,       68,       93, ...,       31,       55,       61]\n",
379 |        "  Rowv: <type 'rpy2.rinterface.RNULLType'>\n",
380 |        "  rpy2.rinterface.NULL\n",
381 |        "  Colv: <type 'rpy2.rinterface.RNULLType'>\n",
382 |        "  rpy2.rinterface.NULL"
383 |       ]
384 |      },
385 |      "execution_count": 72,
386 |      "metadata": {},
387 |      "output_type": "execute_result"
388 |     }
389 |    ],
390 |    "source": [
391 |     "import scipy.io as sio\n",
392 |     "from rpy2.robjects import r\n",
393 |     "import rpy2.robjects.numpy2ri\n",
394 |     "\n",
395 |     "rpy2.robjects.numpy2ri.activate()\n",
396 |     "\n",
397 |     "data = np.random.random((10,10))\n",
398 |     "r.heatmap(np.array(weights)) "
399 |    ]
400 |   }
401 |  ],
402 |  "metadata": {
403 |   "kernelspec": {
404 |    "display_name": "Python 2",
405 |    "language": "python",
406 |    "name": "python2"
407 |   },
408 |   "language_info": {
409 |    "codemirror_mode": {
410 |     "name": "ipython",
411 |     "version": 2
412 |    },
413 |    "file_extension": ".py",
414 |    "mimetype": "text/x-python",
415 |    "name": "python",
416 |    "nbconvert_exporter": "python",
417 |    "pygments_lexer": "ipython2",
418 |    "version": "2.7.12"
419 |   }
420 |  },
421 |  "nbformat": 4,
422 |  "nbformat_minor": 0
423 | }
424 | 


--------------------------------------------------------------------------------
/XGBoost.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {
 7 |     "collapsed": true
 8 |    },
 9 |    "outputs": [],
10 |    "source": [
11 |     "from sklearn import datasets\n",
12 |     "from sklearn.cross_validation import train_test_split\n",
13 |     "from scipy import io as sio\n",
14 |     "from tensorflow.python.framework import ops\n",
15 |     "from dfs2 import DeepFeatureSelectionNew\n",
16 |     "import numpy as np\n",
17 |     "from sklearn.datasets import make_classification\n",
18 |     "from sklearn.preprocessing import normalize\n",
19 |     "\n",
20 |     "# ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n",
21 |     "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n",
22 |     "ourdata = sio.loadmat(\"./B_mean_2labels.mat\")\n",
23 |     "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/Q_2labels_unstandardized.mat\")\n",
24 |     "\n",
25 |     "inputX = ourdata['X']\n",
26 |     "inputX = normalize(inputX, axis=0)\n",
27 |     "inputY = ourdata['Y'][0,:]\n",
28 |     "columnNames = ourdata['columnNames']\n",
29 |     "\n",
30 |     "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)"
31 |    ]
32 |   },
33 |   {
34 |    "cell_type": "code",
35 |    "execution_count": null,
36 |    "metadata": {
37 |     "collapsed": true
38 |    },
39 |    "outputs": [],
40 |    "source": [
41 |     "from sklearn.ensemble import RandomForestClassifier\n",
42 |     "from sklearn.metrics import accuracy_score\n",
43 |     "import xgboost as xgb\n",
44 |     "\n",
45 |     "# rf = RandomForestClassifier(criterion=\"entropy\", n_estimators = 300, max_depth = 100)\n",
46 |     "# rf.fit(X_train, y_train)\n",
47 |     "\n",
48 |     "# y_pred = rf.predict(X_test)\n",
49 |     "\n",
50 |     "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(X_train, y_train)\n",
51 |     "y_pred = gbm.predict(X_test)\n",
52 |     "\n",
53 |     "# featurescores = gbm.feature_importances_\n",
54 |     "\n",
55 |     "print(accuracy_score(y_test, y_pred))\n",
56 |     "\n",
57 |     "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n",
58 |     "\n",
59 |     "np.save(\"indexes_xgboost\",indexes_xgboost)"
60 |    ]
61 |   }
62 |  ],
63 |  "metadata": {
64 |   "kernelspec": {
65 |    "display_name": "Python 2",
66 |    "language": "python",
67 |    "name": "python2"
68 |   },
69 |   "language_info": {
70 |    "codemirror_mode": {
71 |     "name": "ipython",
72 |     "version": 2
73 |    },
74 |    "file_extension": ".py",
75 |    "mimetype": "text/x-python",
76 |    "name": "python",
77 |    "nbconvert_exporter": "python",
78 |    "pygments_lexer": "ipython2",
79 |    "version": "2.7.12"
80 |   }
81 |  },
82 |  "nbformat": 4,
83 |  "nbformat_minor": 0
84 | }
85 | 


--------------------------------------------------------------------------------
/randomLasso.py:
--------------------------------------------------------------------------------
 1 | from sklearn.linear_model import RandomizedLasso
 2 | from sklearn import datasets
 3 | from sklearn.cross_validation import train_test_split
 4 | from scipy import io as sio
 5 | from tensorflow.python.framework import ops
 6 | from dfs2 import DeepFeatureSelectionNew
 7 | import numpy as np
 8 | from sklearn.datasets import make_classification
 9 | from sklearn.preprocessing import normalize
10 | 
11 | # ourdataB = sio.loadmat("/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat")
12 | ourdataB = sio.loadmat("/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat")
13 | # ourdataB = sio.loadmat("/home/REGENERON/xupeng.tong/newDataB_2labels.mat")
14 | 
15 | inputX = ourdataB['X']
16 | inputX = normalize(inputX, axis=0)
17 | inputY = ourdataB['Y'][0,:]
18 | columnNames = ourdataB['columnNames']
19 | 
20 | X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)
21 | 
22 | randomized_lasso = RandomizedLasso()
23 | randomized_lasso.fit(X_train, y_train)
24 | 
25 | featureMask = randomized_lasso.get_support()
26 | 
27 | X_train_lasso = X_train[:,featureMask]
28 | X_test_lasso = X_train[:,featureMask]
29 | 
30 | columnNames[0][:100][featureMask]
31 | 
32 | sio.savemat('RandomLasso-result', {'X_train_lasso':X_train_lasso, \
33 | 			'X_train_lasso':X_test_lasso, 'featureMask':featureMask})


--------------------------------------------------------------------------------
/run_model/Q_run_AsthmaAcos_NoSmokeAge.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__' and __package__ is None:
 2 |     from os import sys, path
 3 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 4 | 
 5 | def runDFS(inputX, inputY)
 6 |     inputX = ourdata['X']
 7 |     inputY = ourdata['Y'][0,:]
 8 |     columnNames = ourdata['columnNames']
 9 | 
10 |     index_Acos = np.where(inputY==0)[0]
11 |     index_Asthma = np.where(inputY==1)[0]
12 | 
13 |     weights = []
14 |     for i in xrange(50):
15 |         # made random choice of asthma patients
16 |         choice = np.random.choice(a=len(index_Asthma), size=len(index_Acos))
17 |         index_Asthma_chosen = index_Asthma[choice]
18 | 
19 |         # Concatenate the indexes for Asthma and Acos patients
20 |         indexes = np.array(index_Acos.tolist()+index_Asthma_chosen.tolist())
21 |         # Shuffle the indexes
22 |         np.random.shuffle(indexes)
23 |         indexes = indexes.tolist()
24 | 
25 |         # inputX and inputY for this round
26 |         inputX_ = inputX[indexes,:]
27 |         inputY_ = inputY[indexes]
28 |         
29 |         X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)
30 |         
31 |         # Change number of epochs to control the training time
32 |         dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[150], learning_rate=0.01, \
33 |                                              lambda1=0.005, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
34 |                                              weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)
35 |         dfsMLP.train(batch_size=500)
36 |         print("Train finised for random state:" + str(i))
37 |         weights.append(dfsMLP.selected_ws[0])
38 |     
39 | np.save("./weights/Q_weights_AsthmaAcos", weights)


--------------------------------------------------------------------------------
/run_model/Q_run_AsthmaCOPD_NoSmokeAge.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__' and __package__ is None:
 2 |     from os import sys, path
 3 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 4 | 
 5 | from sklearn import datasets
 6 | from sklearn.cross_validation import train_test_split
 7 | from scipy import io as sio
 8 | from tensorflow.python.framework import ops
 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew
10 | import numpy as np
11 | from sklearn.datasets import make_classification
12 | from sklearn.preprocessing import normalize
13 | 
14 | ourdata = sio.loadmat("./data/Q_AsthmaCOPD_NoAgeSmoke.mat")
15 | 
16 | inputX = ourdata['X']
17 | inputY = ourdata['Y'][0,:]
18 | columnNames = ourdata['columnNames']
19 | 
20 | weights = []
21 | 
22 | for random_state in xrange(50):
23 |     # Resplit the data
24 |     X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)
25 |     
26 |     # Change number of epochs to control the training time
27 |     dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.01, \
28 |                                          lambda1=0.001, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
29 |                                          weight_init='uniform',epochs=100, optimizer='Adam', print_step=10)
30 |     dfsMLP.train(batch_size=2000)
31 |     print("Train finised for random state:" + str(random_state))
32 |     weights.append(dfsMLP.selected_ws[0])
33 | 
34 | # The generated weights will be in the weights folder
35 | np.save("./weights/Q_weights_AsthmaCOPD", weights)


--------------------------------------------------------------------------------
/run_model/Q_run_COPDAcos_NoSmokeAge.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__' and __package__ is None:
 2 |     from os import sys, path
 3 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 4 | 
 5 | from sklearn import datasets
 6 | from sklearn.cross_validation import train_test_split
 7 | from scipy import io as sio
 8 | from tensorflow.python.framework import ops
 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew
10 | import numpy as np
11 | from sklearn.datasets import make_classification
12 | from sklearn.preprocessing import normalize
13 | 
14 | ourdata = sio.loadmat("./data/Q_COPDAcos_NoAgeSmoke.mat")
15 | inputX = ourdata['X']
16 | inputY = ourdata['Y'][0,:]
17 | columnNames = ourdata['columnNames']
18 | 
19 | index_Acos = np.where(inputY==0)[0]
20 | index_Asthma = np.where(inputY==1)[0]
21 | 
22 | weights = []
23 | for i in xrange(50):
24 |     # made random choice of asthma patients
25 |     choice = np.random.choice(a=len(index_Asthma), size=len(index_Acos))
26 |     index_Asthma_chosen = index_Asthma[choice]
27 | 
28 |     # Concatenate the indexes for Asthma and Acos patients
29 |     indexes = np.array(index_Acos.tolist()+index_Asthma_chosen.tolist())
30 |     # Shuffle the indexes
31 |     np.random.shuffle(indexes)
32 |     indexes = indexes.tolist()
33 | 
34 |     # inputX and inputY for this round
35 |     inputX_ = inputX[indexes,:]
36 |     inputY_ = inputY[indexes]
37 |     
38 |     X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)
39 |     
40 |     # Change number of epochs to control the training time
41 |     dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[150], learning_rate=0.01, \
42 |                                          lambda1=0.005, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
43 |                                          weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)
44 |     dfsMLP.train(batch_size=500)
45 |     print("Train finised for random state:" + str(i))
46 |     weights.append(dfsMLP.selected_ws[0])
47 |     
48 | np.save("./weights/Q_weights_AsthmaAcos", weights)


--------------------------------------------------------------------------------
/run_model/rerun_AsthmaAcos.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__' and __package__ is None:
 2 |     from os import sys, path
 3 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 4 | 
 5 | from sklearn import datasets
 6 | from sklearn.cross_validation import train_test_split
 7 | from scipy import io as sio
 8 | from tensorflow.python.framework import ops
 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew
10 | import numpy as np
11 | from sklearn.datasets import make_classification
12 | from sklearn.preprocessing import normalize
13 | 
14 | 
15 | ourdata = sio.loadmat("../data/B_AsthmaAcos_mean_scaled_7159.mat")
16 | inputX = ourdata['X']
17 | inputY = ourdata['Y'][0,:]
18 | columnNames = ourdata['columnNames']
19 | 
20 | index_Acos = np.where(inputY==0)[0]
21 | index_Asthma = np.where(inputY==1)[0]
22 | 
23 | weights = []
24 | for i in xrange(50):
25 |     # made random choice of asthma patients
26 |     choice = np.random.choice(a=len(index_Asthma), size=len(index_Acos))
27 |     index_Asthma_chosen = index_Asthma[choice]
28 | 
29 |     # Concatenate the indexes for Asthma and Acos patients
30 |     indexes = np.array(index_Acos.tolist()+index_Asthma_chosen.tolist())
31 |     # Shuffle the indexes
32 |     np.random.shuffle(indexes)
33 |     indexes = indexes.tolist()
34 | 
35 |     # inputX and inputY for this round
36 |     inputX_ = inputX[indexes,:]
37 |     inputY_ = inputY[indexes]
38 |     
39 |     X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)
40 |     
41 |     # Change number of epochs to control the training time
42 |     dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[150], learning_rate=0.01, \
43 |                                          lambda1=0.005, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
44 |                                          weight_init='uniform',epochs=30, optimizer='Adam', print_step=10)
45 |     dfsMLP.train(batch_size=500)
46 |     print("Train finised for random state:" + str(i))
47 |     weights.append(dfsMLP.selected_ws[0])
48 |     
49 | np.save("./weights/weights_AsthmaAcos_rerun", weights)


--------------------------------------------------------------------------------
/run_model/rerun_AsthmaCOPD.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__' and __package__ is None:
 2 |     from os import sys, path
 3 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 4 | 
 5 | from sklearn import datasets
 6 | from sklearn.cross_validation import train_test_split
 7 | from scipy import io as sio
 8 | from tensorflow.python.framework import ops
 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew
10 | import numpy as np
11 | from sklearn.datasets import make_classification
12 | from sklearn.preprocessing import normalize
13 | 
14 | ourdata = sio.loadmat("./data/B_AsthmaCOPD_mean_scaled_7159.mat")
15 | 
16 | inputX = ourdata['X']
17 | inputY = ourdata['Y'][0,:]
18 | columnNames = ourdata['columnNames']
19 | 
20 | weights = []
21 | 
22 | for i in xrange(50):
23 | 	# Resplit the data
24 | 	X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=i)
25 | 
26 |     # Change number of epochs to control the training time
27 | 	dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.012, \
28 | 									lambda1=0.002, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
29 | 									weight_init='uniform',epochs=20, optimizer='Adam', print_step=10)
30 | 	dfsMLP.train(batch_size=2000)
31 | 	print("Train finised for random state:" + str(i))
32 | 	weights.append(dfsMLP.selected_ws[0])
33 | 
34 | # The generated weights will be in the weights folder
35 | np.save("./weights/weights_AsthmaCOPD_rerun", weights)


--------------------------------------------------------------------------------
/run_model/rerun_COPDAcos.py:
--------------------------------------------------------------------------------
 1 | if __name__ == '__main__' and __package__ is None:
 2 |     from os import sys, path
 3 |     sys.path.append(path.dirname(path.dirname(path.abspath(__file__))))
 4 | 
 5 | from sklearn import datasets
 6 | from sklearn.cross_validation import train_test_split
 7 | from scipy import io as sio
 8 | from tensorflow.python.framework import ops
 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew
10 | import numpy as np
11 | from sklearn.datasets import make_classification
12 | from sklearn.preprocessing import normalize
13 | 
14 | 
15 | ourdata = sio.loadmat("../data/B_COPDAcos_mean_scaled_7159.mat")
16 | inputX = ourdata['X']
17 | inputY = ourdata['Y'][0,:]
18 | columnNames = ourdata['columnNames']
19 | 
20 | index_Acos = np.where(inputY==0)[0]
21 | index_COPD = np.where(inputY==1)[0]
22 | 
23 | weights = []
24 | for i in xrange(50):
25 |     # made random choice of asthma patients
26 |     choice = np.random.choice(a=len(index_COPD), size=len(index_Acos))
27 |     index_COPD_chosen = index_COPD[choice]
28 | 
29 |     # Concatenate the indexes for Asthma and Acos patients
30 |     indexes = np.array(index_Acos.tolist()+index_COPD_chosen.tolist())
31 |     # Shuffle the indexes
32 |     np.random.shuffle(indexes)
33 |     indexes = indexes.tolist()
34 | 
35 |     # inputX and inputY for this round
36 |     inputX_ = inputX[indexes,:]
37 |     inputY_ = inputY[indexes]
38 |     
39 |     X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)
40 |     
41 |     # Change number of epochs to control the training time
42 |     dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[10], learning_rate=0.01, \
43 |                                          lambda1=0.01, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \
44 |                                          weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)
45 |     dfsMLP.train(batch_size=500)
46 |     print("Train finised for random state:" + str(i))
47 |     weights.append(dfsMLP.selected_ws[0])
48 |     
49 | np.save("./weights/weights_COPDAcos_rerun", weights)


--------------------------------------------------------------------------------
/supporting_files/__init__.py:
--------------------------------------------------------------------------------
1 | # Xupeng Tong
2 | # 
3 | # This is the supporting files for Deep Feature Selection,
4 | # including all the NN components that are implemented, activation function,
5 | # mini-batch functions, initializations etc.
6 | 
7 | __author__ = "Xupeng Tong"
8 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron"
9 | __email__ = "tongxupeng.cpu@gmail.com"


--------------------------------------------------------------------------------
/supporting_files/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/__init__.pyc


--------------------------------------------------------------------------------
/supporting_files/dfs2.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Xupeng Tong"
  4 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron"
  5 | __email__ = "tongxupeng.cpu@gmail.com"
  6 | 
  7 | import tensorflow as tf
  8 | from nncomponents import *
  9 | from helpers import *
 10 | from sda import StackedDenoisingAutoencoder
 11 | import numpy as np
 12 | 
 13 | class DeepFeatureSelectionNew:
 14 |     """ Performe Deep Feature Selection
 15 | 
 16 |     Read more on the original paper:
 17 |     Li Y, Chen C Y, Wasserman W W. Deep feature selection: Theory and application to identify enhancers and promoters[C]
 18 |     //International Conference on Research in Computational Molecular Biology. Springer International Publishing, 2015: 205-217.
 19 | 
 20 |     This implementation has been modified
 21 | 
 22 |     Parameters
 23 |     ----------
 24 |     X_train: numpy array
 25 |         The training data
 26 | 
 27 |     X_test: numpy array
 28 |         The testing data
 29 | 
 30 |     weight_init: string, default : "uniform", with options "sda" (StackedDenoisingAutoencoder) and "uniform"
 31 |         Initialize the weights of the neural network
 32 | 
 33 |     n_input: int, default : 1, optional
 34 |         Number of input layer, not proved to be useful with n_input larger than 1 but worth trying
 35 | 
 36 |     hidden_dims: list, default : [1000]
 37 |         A list of hidden nodes with each layer, len(hidden_dims) should equals to the number of hidden layers
 38 | 
 39 |     activation: string, default : 'sigmoid', with options "sigmoid", "tanh" and "relu"
 40 |         The activation functions that applied to all layers
 41 | 
 42 |     epochs: int, default : 1000
 43 |         How many epochs to run, X_train data is expected to run over in one epoch
 44 | 
 45 |     lambda1: float32, default : 0.001, normally be a very small number otherwise the training with get stucked
 46 |         Decide the sparseness of the input layer, prevent overfitting
 47 | 
 48 |     lambda2: float32, default : 1.0, range from 0.0 to 1.0
 49 |         With lambda1, they define the elastic net regularization of the input layer, when lambda2 == 1, it is 
 50 |         equivalent to L1 regularization
 51 | 
 52 |     alpha1: float32, default : 0.001, normally be a very small number otherwise the training with get stucked
 53 |         Decide the L2 regularization of the hidden layer, prevent overfitting
 54 | 
 55 |     alpha2: float32, default : 0.0, range from 0.0 to 1.0
 56 |         With lambda1, they define the elastic net regularization of the hidden layer, when alpha2 == 0, it is 
 57 |         equivalent to L2 regularization
 58 | 
 59 |     learning_rate: float32, default : 0.1
 60 |         Learning rate of the gradient descent
 61 | 
 62 |     optimizer: string, default : "Adam", with options "Adam", "FTRL" and "SGD"
 63 |         The optimizer for gradient descent, normally Adam Optimizer will give us the fastest converging rate, while
 64 |         FTRL claimed they could produce more sparsity, not verified yet
 65 | 
 66 |     print_step: int, default : 1000
 67 |         Epochs step for printing
 68 | 
 69 | 
 70 |     Attributes
 71 |     ----------
 72 |     cost: 
 73 |         Cost to be minimized
 74 |     accuracy:
 75 |         Accuracy by the softmax layer
 76 |     selected_ws:
 77 |         The weights learned from the input layer, if n_input == 1,
 78 |         use selected_ws[0] for the input weights
 79 |     """
 80 |     
 81 |     def __init__(self, X_train, X_test, y_train, y_test, weight_init='sda', n_input = 1, hidden_dims=[1000], activation='sigmoid',epochs=1000, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1, optimizer='Adam', print_step=1000):
 82 |         """
 83 |         Initialize the DFS class
 84 |         """
 85 |         
 86 |         # Get the dimension of the input X
 87 |         n_sample, n_feat = X_train.shape
 88 |         n_classes = len(np.unique(y_train))
 89 |         
 90 |         self.epochs = epochs
 91 |         self.n_input = n_input
 92 |         self.print_step = print_step
 93 |         
 94 |         # Store up original value
 95 |         self.X_train = X_train
 96 |         self.y_train = one_hot(y_train)
 97 |         self.X_test = X_test
 98 |         self.y_test = one_hot(y_test)
 99 |         
100 |         # Two variables with undetermined length is created
101 |         self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x')
102 |         self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y')
103 |         
104 |         input_hidden = 0
105 |         self.L1_input, self.L2_input = 0, 0
106 |         # If there is no input layer
107 |         if n_input != 0:
108 |         # Create several one to one layers
109 |             self.input_layers = []
110 |             input_1to1 = self.var_X
111 | 
112 |             # regularization terms on coefficients of input layer
113 |             L1_input, L2_input = [], []
114 | 
115 |             for i in xrange(n_input):
116 |                 self.input_layers.append(One2OneInputLayer(input_1to1))
117 |                 input_1to1 = self.input_layers[-1].output
118 |                 L1_input.append(tf.reduce_sum(tf.abs(self.input_layers[i].w)))
119 |                 L2_input.append(tf.nn.l2_loss(self.input_layers[i].w))
120 | 
121 |             input_hidden = self.input_layers[-1].output
122 | 
123 |             # Add it up
124 |             self.L1_input = tf.add_n(L1_input)
125 |             self.L2_input = tf.add_n(L2_input)
126 | 
127 |         else:
128 |             input_hidden = self.var_X
129 |         
130 |         # Create list of hidden layers
131 |         self.hidden_layers = []
132 |         # Initialize the network weights
133 |         weights, biases = init_layer_weight(hidden_dims, X_train, weight_init)
134 |                 
135 |         # Create regularization terms on weights of hidden layers        
136 |         L1s, L2_sqrs = [], []
137 |         # Create hidden layers
138 |         for init_w, init_b in zip(weights, biases):
139 |             self.hidden_layers.append(DenseLayer(input_hidden, init_w, init_b, activation=activation))
140 |             input_hidden = self.hidden_layers[-1].output
141 |             L1s.append(tf.reduce_sum(tf.abs(self.hidden_layers[-1].w)))
142 |             L2_sqrs.append(tf.nn.l2_loss(self.hidden_layers[-1].w))
143 |         
144 |         # Final classification layer, variable Y is passed
145 |         self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y)
146 |            
147 |         L1s.append(tf.reduce_sum(tf.abs(self.softmax_layer.w)))
148 |         L2_sqrs.append(tf.nn.l2_loss(self.softmax_layer.w))
149 | 
150 |         self.L1 = tf.add_n(L1s)
151 |         self.L2_sqr = tf.add_n(L2_sqrs)
152 |         
153 |         # Cost with two regularization terms
154 |         self.cost = self.softmax_layer.cost \
155 |                     + lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \
156 |                     + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1
157 |         
158 |         
159 |         self.optimizer = optimize(self.cost, learning_rate, optimizer)
160 |         
161 |         self.accuracy = self.softmax_layer.accuracy
162 | 
163 |         self.y = self.softmax_layer.y
164 |         
165 |     def train(self, batch_size=100):
166 |         """ Train the data with specified batch size, note the if the batch size exceed
167 |         the number of samples in the training set, all the training data will be used
168 |         as one batch
169 | 
170 |         Parameters
171 |         ----------
172 | 
173 |         batch_size: int, default : 100
174 |             Defined the number of sample per batch
175 |         """
176 |         sess = tf.Session()
177 |         self.sess = sess
178 |         sess.run(tf.initialize_all_variables())
179 |         batch_generator = GenBatch(self.X_train, self.y_train, batch_size)
180 |         n_batch = batch_generator.n_batch
181 | 
182 |         self.losses, self.train_Accs, self.test_Accs = [], [], []
183 |         for i in xrange(self.epochs):
184 |             # x_batch, y_batch = get_batch(self.X_train, self.y_train, batch_size)
185 |             batch_generator.resetIndex()
186 |             for j in xrange(n_batch+1):
187 |                 x_batch, y_batch = batch_generator.get_batch()
188 |                 sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch})
189 | 
190 |             self.train_Accs.append(sess.run(self.accuracy, \
191 |                 feed_dict={self.var_X: self.X_train, self.var_Y: self.y_train}))
192 |             self.test_Accs.append(sess.run(self.accuracy, \
193 |                 feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))
194 |             self.losses.append(sess.run(self.cost, \
195 |                 feed_dict={self.var_X: x_batch, self.var_Y: y_batch}))
196 | 
197 |             if i % self.print_step == 0:
198 |                 print('epoch {0}: global loss = {1}'.format(i, self.losses[-1]))
199 |                 print("Train accuracy:", self.train_Accs[-1])
200 |                 print("Test accuracy:", self.test_Accs[-1])
201 |         
202 |         self.selected_ws = [sess.run(self.input_layers[i].w) for i in xrange(self.n_input)]
203 |         # print("Input layer w: ", self.selected_ws)
204 |         print("Final train accuracy:", self.train_Accs[-1])
205 |         print("Final test accuracy:", self.test_Accs[-1])
206 | 
207 |     def refine_init_weight(self, threshold=0.001):
208 |         """ Set input layer weights whose value is smaller than some threshold to zero and
209 |         recalculate the accuracy rate
210 | 
211 |         Parameters
212 |         ----------
213 | 
214 |         threshold: float32, default : 0.001
215 |             Threshold value
216 |         """
217 | 
218 |         refined_ws = [np.copy(w) for w in self.selected_ws]
219 |         for i, refined_w in enumerate(refined_ws):
220 |             refined_w[refined_w < threshold] = 0
221 |             self.sess.run(self.input_layers[i].w.assign(refined_w))
222 |         print("Test accuracy refined:",self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test}))


--------------------------------------------------------------------------------
/supporting_files/dfs2.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/dfs2.pyc


--------------------------------------------------------------------------------
/supporting_files/helpers.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | __author__ = "Xupeng Tong"
  6 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron"
  7 | __email__ = "tongxupeng.cpu@gmail.com"
  8 | 
  9 | import tensorflow as tf
 10 | import numpy as np
 11 | 
 12 | def activate(layer, name):
 13 |     """ Activate one layer with specified activation function
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     layer: Tensor
 18 |         The layer to be activated
 19 |     name: string, with options "sigmoid", "softmax", "tanh", "relu" and "linear"
 20 |         The name of the activation function
 21 |     """
 22 | 
 23 |     if name == 'sigmoid':
 24 |         return tf.nn.sigmoid(layer)
 25 |     elif name == 'softmax':
 26 |         return tf.nn.softmax(layer)
 27 |     elif name == 'tanh':
 28 |         return tf.nn.tanh(layer)
 29 |     elif name == 'relu':
 30 |         return tf.nn.relu(layer)
 31 |     elif name == 'linear':
 32 |         return layer
 33 | 
 34 | def optimize(cost, learning_rate, optimizer):
 35 |     """ Optimize the cost
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     learning_rate: float32
 40 |         Learning rate for gradient descent
 41 |     name: string, with options "FTRL", "Adam", "SGD"
 42 |         The name of the optimization function
 43 |         Adam optimizer generally gives us the best result
 44 |     """
 45 | 
 46 |     optimizer = {'FTRL':tf.train.FtrlOptimizer, 'Adam':tf.train.AdamOptimizer, \
 47 |                  'SGD':tf.train.GradientDescentOptimizer}[optimizer]
 48 | 
 49 |     return optimizer(learning_rate=learning_rate).minimize(cost)
 50 | 
 51 | def one_hot(y):
 52 |     """ Generate the one hot representation of Y
 53 | 
 54 |     Parameters
 55 |     ----------
 56 |     y: numpy array
 57 |     """
 58 |     n_classes = len(np.unique(y))
 59 |     one_hot_Y = np.zeros((len(y), n_classes))
 60 |     for i,j in enumerate(y):
 61 |         one_hot_Y[i][j] = 1
 62 |         
 63 |     return one_hot_Y
 64 | 
 65 | def init_layer_weight(dims, X, name):
 66 |     """ Initialize the weights for layers, and return the initialized result
 67 | 
 68 |     Parameters
 69 |     ----------
 70 |     dims: list, with each element stands for the number of nodes in each layer
 71 | 
 72 |     name: string, with options "sda" and "uniform"
 73 |         The name of the initialization method
 74 |     """
 75 | 
 76 |     weights, biases = [], []
 77 |     if name == 'sda':
 78 |         from sda import StackedDenoisingAutoencoder
 79 |         sda = StackedDenoisingAutoencoder(dims=dims)
 80 |         sda._fit(X)
 81 |         weights, biases = sda.weights, sda.biases
 82 |     elif name == 'uniform':
 83 |         n_in = X.shape[1]
 84 |         for d in dims:
 85 |             r = 4*np.sqrt(6.0/(n_in+d))
 86 |             weights.append(tf.random_uniform([n_in, d], minval=-r, maxval=r))
 87 |             biases.append(tf.zeros([d,]))
 88 |             n_in = d
 89 |             
 90 |     return weights, biases
 91 |     
 92 | def get_random_batch(X, Y, size):
 93 |     """
 94 |     Alternative method of getting a random batch each time
 95 |     """
 96 |     assert len(X) == len(Y)
 97 |     a = np.random.choice(len(X), size, replace=False)
 98 |     return X[a], Y[a]
 99 | 
100 | class GenBatch():
101 |     """ The batch generator for training
102 | 
103 |     Parameters
104 |     ----------
105 |     X: numpy array
106 |     Y: numpy array
107 |     batch_size: int
108 |     """
109 |     def __init__(self, X, y, batch_size):
110 |         self.X = X
111 |         self.Y = y
112 |         self.batch_size = batch_size
113 |         self.n_batch = (len(X) / batch_size)
114 |         self.index = 0
115 | 
116 |     def get_batch(self):
117 |         """
118 |         Get the next batch
119 |         """
120 |         batch_range = xrange(self.index, (self.index+1)*self.batch_size)
121 |         if self.index == self.n_batch:
122 |             batch_range = xrange(self.index, len(self.X))
123 |         self.index += 1
124 | 
125 |         return self.X[batch_range], self.Y[batch_range]
126 | 
127 |     def resetIndex(self):
128 |         self.index = 0


--------------------------------------------------------------------------------
/supporting_files/helpers.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/helpers.pyc


--------------------------------------------------------------------------------
/supporting_files/nncomponents.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | __author__ = "Xupeng Tong"
  4 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron"
  5 | __email__ = "tongxupeng.cpu@gmail.com"
  6 | 
  7 | import tensorflow as tf
  8 | import numpy as np
  9 | from helpers import *
 10 | 
 11 | class One2OneInputLayer(object):
 12 |     """ One to One input layer
 13 | 
 14 |     Parameters
 15 |     ----------
 16 | 
 17 |     input: Tensor
 18 |         The output from the last layer
 19 |     weight_init:
 20 |         initial value for weights
 21 |     """
 22 |     # One to One Mapping!
 23 |     def __init__(self, input, weight_init=None):
 24 |         n_in = input.get_shape()[1].value
 25 |         
 26 |         self.input = input
 27 |         
 28 |         # Initiate the weight for the input layer
 29 |         r = 4*np.sqrt(3.0/n_in)
 30 | 
 31 |         if weight_init is None:
 32 |             self.w = tf.Variable(tf.random_uniform([n_in,],-r, r), name='w')
 33 |         else: 
 34 |             self.w = tf.Variable(weight_init, name='w')
 35 | 
 36 |         self.output = self.w * self.input
 37 |     
 38 | class DenseLayer(object):
 39 |     """ Canonical dense layer
 40 | 
 41 |     Parameters
 42 |     ----------
 43 | 
 44 |     input: Tensor
 45 |         The output from the last layer
 46 |     init_w: numpy array
 47 |         initial value for weights
 48 |     init_b: numpy array
 49 |         initial value for b
 50 |     """
 51 |     def __init__(self, input, init_w, init_b, activation='sigmoid'):
 52 | 
 53 |         n_in = input.get_shape()[1].value
 54 |         self.input = input
 55 | 
 56 |         # Initiate the weight for the input layer
 57 |         
 58 |         w = tf.Variable(init_w, name='w')
 59 |         b = tf.Variable(init_b, name='b')
 60 | 
 61 |         output = tf.add(tf.matmul(input, w), b)
 62 |         output = activate(output, activation)
 63 |         
 64 |         self.w = w
 65 |         self.b = b
 66 |         self.output = output
 67 |         self.params = [w]
 68 |         
 69 | class SoftmaxLayer(object):
 70 |     """ Softmax layer for classification
 71 | 
 72 |     Parameters
 73 |     ----------
 74 | 
 75 |     input: Tensor
 76 |         The output from the last layer
 77 |     n_out: int
 78 |         Number of labels
 79 |     y: numpy array
 80 |         True label for the data
 81 |     """
 82 |     def __init__(self, input, n_out, y):
 83 |         n_in = input.get_shape()[1].value
 84 |         self.input = input
 85 | 
 86 |         # Initiate the weight and biases for this layer
 87 |         r = 4*np.sqrt(6.0/(n_in + n_out))
 88 |         w = tf.Variable(tf.random_uniform([n_in, n_out], minval=-r, maxval=r))
 89 |         b = tf.Variable(tf.zeros([n_out]), name='b')
 90 | 
 91 |         pred = tf.add(tf.matmul(input, w), b)
 92 |         cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y))
 93 | 
 94 |         # Evaluate model
 95 |         correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
 96 |         self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
 97 |         
 98 |         self.y = y
 99 |         self.w = w
100 |         self.b = b
101 |         self.cost = cost
102 |         self.params= [w]


--------------------------------------------------------------------------------
/supporting_files/nncomponents.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/nncomponents.pyc


--------------------------------------------------------------------------------
/supporting_files/sda.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function
  3 | 
  4 | __author__ = "Xupeng Tong"
  5 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron"
  6 | __email__ = "tongxupeng.cpu@gmail.com"
  7 | 
  8 | import numpy as np
  9 | from helpers import get_random_batch
 10 | from helpers import activate
 11 | import tensorflow as tf
 12 | 
 13 | class StackedDenoisingAutoencoder:
 14 |     """ A stacked deep autoencoder with denoising capability
 15 | 
 16 |     Parameters
 17 |     ----------
 18 | 
 19 |     dims: list
 20 |         The output from the last layer
 21 |     epochs: list
 22 |         epochs for training each layer
 23 |     activations: list of string, each element one can change from 
 24 | 
 25 |     """
 26 | 
 27 |     def __init__(self, dims=[100,100,100], epochs=[100,100,100], activations=['sigmoid']*3, noise=None, loss='rmse', lr=0.001, batch_size=100, print_step=50):
 28 |         self.print_step = print_step
 29 |         self.batch_size = batch_size
 30 |         self.lr = lr
 31 |         self.loss = loss
 32 |         self.activations = activations
 33 |         self.noise = noise
 34 |         self.epochs = epochs
 35 |         self.dims = dims
 36 |         self.depth = len(dims)
 37 |         self.weights, self.biases = [], []
 38 |         epochs = [100 for i in xrange(len(dims))]
 39 |         # assert len(dims) == len(epochs)
 40 | 
 41 |     def _fit(self, x):
 42 |         for i in range(self.depth):
 43 |             print('Layer {0}'.format(i + 1))
 44 |             x = self._run(data_x=self._add_noise(x), activation=self.activations[i], data_x_=x,
 45 |                          hidden_dim=self.dims[i], epochs=self.epochs[i], loss=self.loss, 
 46 |                          batch_size=self.batch_size, lr=self.lr, print_step=self.print_step)
 47 |     
 48 |     def _add_noise(self, x):
 49 |         if self.noise == 'gaussian':
 50 |             n = np.random.normal(0, 0.1, (len(x), len(x[0])))
 51 |             return x + n
 52 |         if self.noise == 'mask':
 53 |             frac = float(self.noise.split('-')[1])
 54 |             temp = np.copy(x)
 55 |             for i in temp:
 56 |                 n = np.random.choice(len(i), round(frac * len(i)), replace=False)
 57 |                 i[n] = 0
 58 |             return temp
 59 |         if self.noise == None:
 60 |             return x
 61 | 
 62 |     def _transform(self, data):
 63 |         sess = tf.Session()
 64 |         x = tf.constant(data, dtype=tf.float32)
 65 |         for w, b, a in zip(self.weights, self.biases, self.activations):
 66 |             weight = tf.constant(w, dtype=tf.float32)
 67 |             bias = tf.constant(b, dtype=tf.float32)
 68 |             layer = tf.matmul(x, weight) + bias
 69 |             x = activate(layer, a)
 70 |         return x.eval(session=sess)
 71 | 
 72 |     def get_transformed_data(self, x):
 73 |         self._fit(x)
 74 |         return self._transform(x)
 75 | 
 76 |     def _run(self, data_x, data_x_, hidden_dim, activation, loss, lr, print_step, epochs, batch_size=100):
 77 |         input_dim = len(data_x[0])
 78 |         print(input_dim)
 79 |         print(hidden_dim)
 80 |         sess = tf.Session()
 81 |         x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
 82 |         x_ = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_')
 83 |         encode = {'weights': tf.Variable(tf.truncated_normal([input_dim, hidden_dim], dtype=tf.float32)),
 84 |                   'biases': tf.Variable(tf.truncated_normal([hidden_dim], dtype=tf.float32))}
 85 |         decode = {'biases': tf.Variable(tf.truncated_normal([input_dim], dtype=tf.float32)),
 86 |                   'weights': tf.transpose(encode['weights'])}
 87 | 
 88 |         encoded = activate(tf.matmul(x, encode['weights']) + encode['biases'], activation)
 89 |         decoded = tf.matmul(encoded, decode['weights']) + decode['biases']
 90 | 
 91 |         # reconstruction loss
 92 |         if loss == 'rmse':
 93 |             loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x_, decoded))))
 94 |         elif loss == 'cross-entropy':
 95 |             loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(decoded, x_))
 96 |         train_op = tf.train.AdamOptimizer(lr).minimize(loss)
 97 | 
 98 |         sess.run(tf.initialize_all_variables())
 99 |         for i in range(epochs):
100 |             b_x, b_x_ = get_random_batch(data_x, data_x_, batch_size)
101 |             sess.run(train_op, feed_dict={x: b_x, x_: b_x_})
102 |             if (i + 1) % print_step == 0:
103 |                 l = sess.run(loss, feed_dict={x: data_x, x_: data_x_})
104 |                 print('epoch {0}: global loss = {1}'.format(i, l))
105 | 
106 |         self.weights.append(sess.run(encode['weights']))
107 |         self.biases.append(sess.run(encode['biases']))
108 |         return sess.run(encoded, feed_dict={x: data_x_})


--------------------------------------------------------------------------------
/supporting_files/sda.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/sda.pyc


--------------------------------------------------------------------------------
/weights/Q_indexes_xgboost_All_AsthmaCOPD.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/Q_indexes_xgboost_All_AsthmaCOPD.npy


--------------------------------------------------------------------------------
/weights/Q_weights_AsthmaCOPD.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/Q_weights_AsthmaCOPD.npy


--------------------------------------------------------------------------------
/weights/indexes_xgboost.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost.npy


--------------------------------------------------------------------------------
/weights/indexes_xgboost_rerun.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun.npy


--------------------------------------------------------------------------------
/weights/indexes_xgboost_rerun_All.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All.npy


--------------------------------------------------------------------------------
/weights/indexes_xgboost_rerun_All_AsAc.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All_AsAc.npy


--------------------------------------------------------------------------------
/weights/indexes_xgboost_rerun_All_AsC.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All_AsC.npy


--------------------------------------------------------------------------------
/weights/indexes_xgboost_rerun_All_CAc.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All_CAc.npy


--------------------------------------------------------------------------------
/weights/weights-0-10-NEW-mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-0-10-NEW-mean.npy


--------------------------------------------------------------------------------
/weights/weights-10-20-NEW-mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-10-20-NEW-mean.npy


--------------------------------------------------------------------------------
/weights/weights-20-30-NEW-mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-20-30-NEW-mean.npy


--------------------------------------------------------------------------------
/weights/weights-NEW-mean.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-NEW-mean.npy


--------------------------------------------------------------------------------
/weights/weights_AsthmaAcos_rerun.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights_AsthmaAcos_rerun.npy


--------------------------------------------------------------------------------
/weights/weights_AsthmaCOPD_rerun.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights_AsthmaCOPD_rerun.npy


--------------------------------------------------------------------------------
/weights/weights_COPDAcos_rerun.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights_COPDAcos_rerun.npy


--------------------------------------------------------------------------------
/xgboost_result.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/xgboost_result.mat


--------------------------------------------------------------------------------