├── .ipynb_checkpoints ├── ATest!-checkpoint.ipynb ├── CV_BMatrix-checkpoint.ipynb ├── Ensemble Model-checkpoint.ipynb ├── TFlearnVersion-checkpoint.ipynb ├── Untitled-checkpoint.ipynb ├── Untitled1-checkpoint.ipynb ├── XGBoost-checkpoint.ipynb └── testHaarcascades-checkpoint.ipynb ├── ATest!.ipynb ├── CV_BMatrix.ipynb ├── Ensemble Model.ipynb ├── MakeNewData.ipynb ├── Note-Part1.ipynb ├── Note-Part2 (Ensemble).ipynb ├── Preprocess - Should be ran after labeled csv file has been generated.ipynb ├── README.md ├── TFlearnVersion.ipynb ├── XGBoost.ipynb ├── randomLasso.py ├── run_model ├── Q_run_AsthmaAcos_NoSmokeAge.py ├── Q_run_AsthmaCOPD_NoSmokeAge.py ├── Q_run_COPDAcos_NoSmokeAge.py ├── rerun_AsthmaAcos.py ├── rerun_AsthmaCOPD.py └── rerun_COPDAcos.py ├── supporting_files ├── __init__.py ├── __init__.pyc ├── dfs2.py ├── dfs2.pyc ├── helpers.py ├── helpers.pyc ├── nncomponents.py ├── nncomponents.pyc ├── sda.py └── sda.pyc ├── weights ├── Q_indexes_xgboost_All_AsthmaCOPD.npy ├── Q_weights_AsthmaCOPD.npy ├── indexes_xgboost.npy ├── indexes_xgboost_rerun.npy ├── indexes_xgboost_rerun_All.npy ├── indexes_xgboost_rerun_All_AsAc.npy ├── indexes_xgboost_rerun_All_AsC.npy ├── indexes_xgboost_rerun_All_CAc.npy ├── weights-0-10-NEW-mean.npy ├── weights-10-20-NEW-mean.npy ├── weights-20-30-NEW-mean.npy ├── weights-NEW-mean.npy ├── weights_AsthmaAcos_rerun.npy ├── weights_AsthmaCOPD_rerun.npy └── weights_COPDAcos_rerun.npy └── xgboost_result.mat /.ipynb_checkpoints/CV_BMatrix-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Ensemble Model-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn.preprocessing import normalize\n", 12 | "import numpy as np\n", 13 | "\n", 14 | "weights_0_10 = np.load(\"weights-0-10-NEW-mean.npy\")\n", 15 | "weights_10_20 = np.load(\"weights-10-20-NEW-mean.npy\")\n", 16 | "weights_20_30 = np.load(\"weights-20-30-NEW-mean.npy\")\n", 17 | "indexes_xgboost = np.load(\"indexes_xgboost.npy\")\n", 18 | "\n", 19 | "weights = np.concatenate((weights_0_10, weights_10_20, weights_20_30))\n", 20 | "\n", 21 | "np.save(\"weights-NEW-mean\", weights)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from scipy import io as sio\n", 33 | "\n", 34 | "ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/B_mean_2labels.mat\")\n", 35 | "\n", 36 | "inputX = ourdata['X']\n", 37 | "inputX = normalize(inputX, axis=0)\n", 38 | "inputY = ourdata['Y'][0,:]\n", 39 | "columnNames = ourdata['columnNames']" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 12, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "weights = abs(weights)\n", 51 | "averagedWeight = normalize(weights).sum(axis=0)\n", 52 | "indexes_average_dfs = np.argsort(averagedWeight)[::-1]\n", 53 | "\n", 54 | "def unionDFSfeatures(n):\n", 55 | " indexes_union = []\n", 56 | " for i in xrange(30):\n", 57 | " indexes_union.append(np.argsort(weights[i])[::-1][:n].tolist())\n", 58 | " \n", 59 | " union = reduce(np.union1d, indexes_union).tolist()\n", 60 | " print(\"Number of union features:\", len(union))\n", 61 | " return inputX[:, union], union\n", 62 | "\n", 63 | "def intersectDFSfeatures(n):\n", 64 | " indexes_intersect = []\n", 65 | " for i in xrange(30):\n", 66 | " indexes_intersect.append(np.argsort(weights[i])[::-1][:n].tolist())\n", 67 | " \n", 68 | " intersected = reduce(np.intersect1d, indexes_intersect).tolist()\n", 69 | " print(\"Number of intersected features:\",len(intersected))\n", 70 | " return inputX[:, intersected]\n", 71 | "\n", 72 | "def topXGBoostfeatures(a,b):\n", 73 | " return inputX[:, indexes_xgboost.tolist()[a:b]], indexes_xgboost.tolist()[a:b]\n", 74 | "\n", 75 | "def topAveDFSfeatures(a,b):\n", 76 | " return inputX[:, indexes_average_dfs.tolist()[a:b]], indexes_average_dfs.tolist()[a:b]\n", 77 | "\n", 78 | "def pickOneDFSfeatures(a,b,n):\n", 79 | " indexx = np.argsort(weights[n])[::-1]\n", 80 | " return inputX[:, indexx.tolist()[a:b]]\n", 81 | "\n", 82 | "def topDFSTemp(a,b):\n", 83 | " temp = np.argsort(abs(dfsMLP.selected_ws[0]))[::-1]\n", 84 | " return inputX[:, temp.tolist()[a:b]]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 36, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "('Number of intersected features:', 9)\n", 99 | "('Intersect:', 0.97940872141117929)\n", 100 | "('Number of union features:', 27)\n" 101 | ] 102 | }, 103 | { 104 | "ename": "ValueError", 105 | "evalue": "Found arrays with inconsistent numbers of samples: [ 2 10684]", 106 | "output_type": "error", 107 | "traceback": [ 108 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 109 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 110 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mkeke\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munionDFSfeatures\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msvm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeke\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Union:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 111 | "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)\u001b[0m\n\u001b[1;32m 1420\u001b[0m \u001b[0mArray\u001b[0m \u001b[0mof\u001b[0m \u001b[0mscores\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mestimator\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mrun\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcross\u001b[0m \u001b[0mvalidation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1421\u001b[0m \"\"\"\n\u001b[0;32m-> 1422\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1424\u001b[0m \u001b[0mcv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_cv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassifier\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 112 | "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mindexable\u001b[0;34m(*iterables)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 113 | "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mcheck_consistent_length\u001b[0;34m(*arrays)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 175\u001b[0m raise ValueError(\"Found arrays with inconsistent numbers of samples: \"\n\u001b[0;32m--> 176\u001b[0;31m \"%s\" % str(uniques))\n\u001b[0m\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 114 | "\u001b[0;31mValueError\u001b[0m: Found arrays with inconsistent numbers of samples: [ 2 10684]" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "from sklearn.svm import LinearSVC\n", 120 | "from sklearn.metrics import accuracy_score\n", 121 | "from sklearn.linear_model import LogisticRegression\n", 122 | "from sklearn.cross_validation import cross_val_score\n", 123 | "\n", 124 | "svm = LinearSVC()\n", 125 | "\n", 126 | "keke = intersectDFSfeatures(200)\n", 127 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 128 | "print(\"Intersect:\", np.mean(scores))\n", 129 | "\n", 130 | "keke = unionDFSfeatures(10)\n", 131 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 132 | "print(\"Union:\", np.mean(scores))\n", 133 | "\n", 134 | "keke = topAveDFSfeatures(1,27)\n", 135 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 136 | "print(\"Ave:\", np.mean(scores))\n", 137 | "\n", 138 | "keke = topXGBoostfeatures(0,27)\n", 139 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 140 | "print(\"XGBoost:\", np.mean(scores))\n", 141 | "\n", 142 | "print(\"Pick one TOP DFS features from 30\")\n", 143 | "for i in xrange(0,30):\n", 144 | " keke = pickOneDFSfeatures(0,27,i)\n", 145 | " scores = cross_val_score(svm, keke, inputY, cv=5)\n", 146 | " print(np.mean(scores))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 65, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/html": [ 159 | "" 160 | ], 161 | "text/plain": [ 162 | "" 163 | ] 164 | }, 165 | "execution_count": 65, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "import plotly.plotly as py\n", 172 | "import plotly.graph_objs as go\n", 173 | "import plotly\n", 174 | "\n", 175 | "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n", 176 | "\n", 177 | "data = [\n", 178 | " go.Heatmap(\n", 179 | " z=np.abs(weights)\n", 180 | " )\n", 181 | "]\n", 182 | "\n", 183 | "py.iplot(data, filename='30 Weights')" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 91, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "[u'Phe9_2534',\n", 197 | " u'Phe9_491',\n", 198 | " u'Phe9_4912',\n", 199 | " u'Phe9_492',\n", 200 | " u'Phe9_4928',\n", 201 | " u'Phe9_493',\n", 202 | " u'Phe9_4930',\n", 203 | " u'Phe9_4931',\n", 204 | " u'Phe9_4932',\n", 205 | " u'Phe9_4938',\n", 206 | " u'Phe9_4939',\n", 207 | " u'Phe9_494',\n", 208 | " u'Phe9_4940',\n", 209 | " u'Phe9_496',\n", 210 | " u'Phe9_5343',\n", 211 | " u'Phe9_V146',\n", 212 | " u'Phe10_E236',\n", 213 | " u'Phe10_I23',\n", 214 | " u'Phe10_J44',\n", 215 | " u'Phe10_J449',\n", 216 | " u'Phe10_J45',\n", 217 | " u'Phe10_J452',\n", 218 | " u'Phe10_J453',\n", 219 | " u'Phe10_J454',\n", 220 | " u'Phe10_J459',\n", 221 | " u'Phe10_N08',\n", 222 | " u'Phe10_S060']" 223 | ] 224 | }, 225 | "execution_count": 91, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "indexes_union = []\n", 232 | "for i in xrange(30):\n", 233 | " indexes_union.append(np.argsort(weights[i])[::-1][:10].tolist())\n", 234 | "union = reduce(np.union1d, indexes_union).tolist()\n", 235 | "\n", 236 | "\n", 237 | "[i[0] for i in columnNames.reshape(7205,)[union]]" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 78, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "def getSelectedFeatureNames(which, topN):\n", 249 | " if which == \"Ave\":\n", 250 | " indexes = indexes_average_dfs\n", 251 | " elif which == \"Union\":\n", 252 | " indexes = \"\"\n", 253 | " elif which == \"Intersect\"\n", 254 | " \n", 255 | " featureNames = [i[0] for i in columnNames.reshape(7205,)[indexes[:topN]]]\n", 256 | " \n", 257 | " return featureNames" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 99, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "('Number of union features:', 1)\n", 272 | "('Union:', 0.94487121332880197)\n", 273 | "('Number of union features:', 10)\n", 274 | "('Union:', 0.94346676284389586)\n", 275 | "('Number of union features:', 12)\n", 276 | "('Union:', 0.9591910107244368)\n", 277 | "('Number of union features:', 17)\n", 278 | "('Union:', 0.97145272318043541)\n", 279 | "('Number of union features:', 21)\n", 280 | "('Union:', 0.97182712357021495)\n", 281 | "('Number of union features:', 23)\n", 282 | "('Union:', 0.99204408956348877)\n", 283 | "('Number of union features:', 26)\n", 284 | "('Union:', 0.99166968909173525)\n", 285 | "('Number of union features:', 26)\n", 286 | "('Union:', 0.99166968909173525)\n", 287 | "('Number of union features:', 26)\n", 288 | "('Union:', 0.99166968909173525)\n", 289 | "('Number of union features:', 27)\n", 290 | "('Union:', 0.99166960150243777)\n", 291 | "('Number of union features:', 28)\n", 292 | "('Union:', 0.99073384134732279)\n", 293 | "('Number of union features:', 31)\n", 294 | "('Union:', 0.98418163686619731)\n", 295 | "('Number of union features:', 31)\n", 296 | "('Union:', 0.98418163686619731)\n", 297 | "('Number of union features:', 31)\n", 298 | "('Union:', 0.98418163686619731)\n", 299 | "('Number of union features:', 34)\n", 300 | "('Union:', 0.98446240429717535)\n", 301 | "('Number of union features:', 36)\n", 302 | "('Union:', 0.98474339076287709)\n", 303 | "('Number of union features:', 40)\n", 304 | "('Union:', 0.98596013738369381)\n", 305 | "('Number of union features:', 45)\n", 306 | "('Union:', 0.98558578076806957)\n", 307 | "('Number of union features:', 51)\n", 308 | "('Union:', 0.98876816262448164)\n", 309 | "('Number of union features:', 55)\n", 310 | "('Union:', 0.98895538476792955)\n", 311 | "('Number of union features:', 61)\n", 312 | "('Union:', 0.98876816262448164)\n", 313 | "('Number of union features:', 67)\n", 314 | "('Union:', 0.98839398118745181)\n", 315 | "('Number of union features:', 73)\n", 316 | "('Union:', 0.9883001730500558)\n", 317 | "('Number of union features:', 77)\n", 318 | "('Union:', 0.98801936180393546)\n", 319 | "('Number of union features:', 86)\n", 320 | "('Union:', 0.98783209588633247)\n", 321 | "('Number of union features:', 89)\n", 322 | "('Union:', 0.98811295098858187)\n", 323 | "('Number of union features:', 102)\n", 324 | "('Union:', 0.98596048790483126)\n", 325 | "('Number of union features:', 108)\n", 326 | "('Union:', 0.98567980810413758)\n", 327 | "('Number of union features:', 120)\n", 328 | "('Union:', 0.98418233778551067)\n", 329 | "('Number of union features:', 129)\n", 330 | "('Union:', 0.98277810621236728)\n", 331 | "('Number of union features:', 139)\n", 332 | "('Union:', 0.98193567247400682)\n", 333 | "('Number of union features:', 151)\n", 334 | "('Union:', 0.98184238987239447)\n", 335 | "('Number of union features:', 162)\n", 336 | "('Union:', 0.98109350142156404)\n", 337 | "('Number of union features:', 173)\n", 338 | "('Union:', 0.98090641072354268)\n", 339 | "('Number of union features:', 182)\n", 340 | "('Union:', 0.9806255118471382)\n", 341 | "('Number of union features:', 192)\n", 342 | "('Union:', 0.97968944506800215)\n", 343 | "('Number of union features:', 202)\n", 344 | "('Union:', 0.97997030012926456)\n", 345 | "('Number of union features:', 214)\n", 346 | "('Union:', 0.97940863386286914)\n", 347 | "('Number of union features:', 224)\n", 348 | "('Union:', 0.9790342334321025)\n", 349 | "('Number of union features:', 234)\n", 350 | "('Union:', 0.97837897798106077)\n", 351 | "('Number of union features:', 242)\n", 352 | "('Union:', 0.97809829809839288)\n", 353 | "('Number of union features:', 255)\n", 354 | "('Union:', 0.97837889035077619)\n", 355 | "('Number of union features:', 265)\n", 356 | "('Union:', 0.97791085700219527)\n", 357 | "('Number of union features:', 269)\n", 358 | "('Union:', 0.9780979915153587)\n", 359 | "('Number of union features:', 275)\n", 360 | "('Union:', 0.9780979915153587)\n", 361 | "('Number of union features:', 280)\n", 362 | "('Union:', 0.9776299143516356)\n", 363 | "('Number of union features:', 289)\n", 364 | "('Union:', 0.97781744303713047)\n", 365 | "('Number of union features:', 297)\n", 366 | "('Union:', 0.97706864221658452)\n", 367 | "('Number of union features:', 304)\n", 368 | "('Union:', 0.97716231894954109)\n", 369 | "('Number of union features:', 309)\n", 370 | "('Union:', 0.97678783088849008)\n", 371 | "('Number of union features:', 318)\n", 372 | "('Union:', 0.97660078396462391)\n", 373 | "('Number of union features:', 322)\n", 374 | "('Union:', 0.97669424174483088)\n", 375 | "('Number of union features:', 325)\n", 376 | "('Union:', 0.97688159525173113)\n", 377 | "('Number of union features:', 331)\n", 378 | "('Union:', 0.97697509680609307)\n", 379 | "('Number of union features:', 340)\n", 380 | "('Union:', 0.97631975376575397)\n", 381 | "('Number of union features:', 347)\n", 382 | "('Union:', 0.97660069641631364)\n", 383 | "('Number of union features:', 356)\n", 384 | "('Union:', 0.97678804988222689)\n", 385 | "('Number of union features:', 369)\n", 386 | "('Union:', 0.97678796229292963)\n", 387 | "('Number of union features:', 375)\n", 388 | "('Union:', 0.97632001653364586)\n", 389 | "('Number of union features:', 386)\n", 390 | "('Union:', 0.97697522825151961)\n", 391 | "('Number of union features:', 393)\n", 392 | "('Union:', 0.97688150766243387)\n", 393 | "('Number of union features:', 400)\n", 394 | "('Union:', 0.97716231890855398)\n", 395 | "('Number of union features:', 407)\n", 396 | "('Union:', 0.97706877353904975)\n", 397 | "('Number of union features:', 410)\n", 398 | "('Union:', 0.97706872976489456)\n", 399 | "('Number of union features:', 419)\n", 400 | "('Union:', 0.97678796229292963)\n", 401 | "('Number of union features:', 422)\n", 402 | "('Union:', 0.97697509680609307)\n", 403 | "('Number of union features:', 433)\n", 404 | "('Union:', 0.9766944169644125)\n", 405 | "('Number of union features:', 442)\n", 406 | "('Union:', 0.976787787114335)\n", 407 | "('Number of union features:', 446)\n", 408 | "('Union:', 0.97660060882701638)\n", 409 | "('Number of union features:', 451)\n", 410 | "('Union:', 0.97669415419652061)\n", 411 | "('Number of union features:', 457)\n", 412 | "('Union:', 0.97669411042236542)\n", 413 | "('Number of union features:', 460)\n", 414 | "('Union:', 0.97669406660722335)\n", 415 | "('Number of union features:', 467)\n", 416 | "('Union:', 0.97678761201771458)\n", 417 | "('Number of union features:', 475)\n", 418 | "('Union:', 0.9765069320940597)\n", 419 | "('Number of union features:', 482)\n", 420 | "('Union:', 0.97669402287405516)\n", 421 | "('Number of union features:', 487)\n", 422 | "('Union:', 0.97660052127870611)\n", 423 | "('Number of union features:', 496)\n", 424 | "('Union:', 0.97622607711477138)\n", 425 | "('Number of union features:', 506)\n", 426 | "('Union:', 0.97613248793012508)\n", 427 | "('Number of union features:', 517)\n", 428 | "('Union:', 0.9762260770737845)\n", 429 | "('Number of union features:', 530)\n", 430 | "('Union:', 0.97575804372520347)\n", 431 | "('Number of union features:', 539)\n", 432 | "('Union:', 0.97538381851401856)\n", 433 | "('Number of union features:', 550)\n", 434 | "('Union:', 0.97547740761669088)\n", 435 | "('Number of union features:', 559)\n", 436 | "('Union:', 0.97547745143183295)\n", 437 | "('Number of union features:', 572)\n", 438 | "('Union:', 0.97547740761669088)\n", 439 | "('Number of union features:', 581)\n", 440 | "('Union:', 0.97538377465788939)\n", 441 | "('Number of union features:', 595)\n", 442 | "('Union:', 0.97547740765767776)\n", 443 | "('Number of union features:', 605)\n", 444 | "('Union:', 0.97529014174007478)\n", 445 | "('Number of union features:', 620)\n", 446 | "('Union:', 0.97538377465788939)\n", 447 | "('Number of union features:', 630)\n", 448 | "('Union:', 0.97491591648790299)\n", 449 | "('Number of union features:', 638)\n", 450 | "('Union:', 0.97519681536430747)\n", 451 | "('Number of union features:', 647)\n", 452 | "('Union:', 0.97529031691866952)\n", 453 | "('Number of union features:', 656)\n", 454 | "('Union:', 0.97538390606232883)\n", 455 | "('Number of union features:', 666)\n", 456 | "('Union:', 0.97510318240550597)\n", 457 | "('Number of union features:', 677)\n", 458 | "('Union:', 0.97482241497452793)\n", 459 | "('Number of union features:', 692)\n", 460 | "('Union:', 0.97510300726789834)\n", 461 | "('Number of union features:', 705)\n", 462 | "('Union:', 0.97482232738523056)\n", 463 | "('Number of union features:', 713)\n", 464 | "('Union:', 0.97472873824157136)\n", 465 | "('Number of union features:', 724)\n", 466 | "('Union:', 0.97444788318030895)\n", 467 | "('Number of union features:', 738)\n", 468 | "('Union:', 0.97444792699545124)\n" 469 | ] 470 | } 471 | ], 472 | "source": [ 473 | "scores_union = []\n", 474 | "for i in xrange(1,100):\n", 475 | " keke = unionDFSfeatures(i)\n", 476 | " scores_union.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))\n", 477 | " print(\"Union:\", scores_union[-1])" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 29, 483 | "metadata": { 484 | "collapsed": false 485 | }, 486 | "outputs": [ 487 | { 488 | "name": "stdout", 489 | "output_type": "stream", 490 | "text": [ 491 | "('Number of union features:', 23)\n" 492 | ] 493 | } 494 | ], 495 | "source": [ 496 | "_, union = unionDFSfeatures(6)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 59, 502 | "metadata": { 503 | "collapsed": false 504 | }, 505 | "outputs": [ 506 | { 507 | "name": "stdout", 508 | "output_type": "stream", 509 | "text": [ 510 | "1\n", 511 | "2\n" 512 | ] 513 | }, 514 | { 515 | "ename": "IndexError", 516 | "evalue": "list index out of range", 517 | "output_type": "error", 518 | "traceback": [ 519 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 520 | "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)", 521 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0m_\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxg\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtopXGBoostfeatures\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mtemp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mintersect1d\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mave\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mxg\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0;32mif\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m0\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mnum\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m!=\u001b[0m \u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 10\u001b[0m \u001b[0mnum\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0mkeke\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minputX\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0mtemp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtolist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 522 | "\u001b[0;31mIndexError\u001b[0m: list index out of range" 523 | ] 524 | } 525 | ], 526 | "source": [ 527 | "ave, xg = 0, 0\n", 528 | "num = []\n", 529 | "acc = []\n", 530 | "for i in xrange(1,100):\n", 531 | " print i\n", 532 | " _, ave = topAveDFSfeatures(0,i)\n", 533 | " _, xg = topXGBoostfeatures(0,i)\n", 534 | " temp = np.intersect1d(ave,xg)\n", 535 | " if temp.shape[0] > 0:\n", 536 | " num.append(temp.shape[0])\n", 537 | " keke = inputX[:,temp.tolist()]\n", 538 | " acc.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 58, 544 | "metadata": { 545 | "collapsed": false 546 | }, 547 | "outputs": [ 548 | { 549 | "data": { 550 | "text/plain": [ 551 | "[0.94992524665396139,\n", 552 | " 0.94992524665396139,\n", 553 | " 0.94992524665396139,\n", 554 | " 0.94992524665396139,\n", 555 | " 0.94992524665396139,\n", 556 | " 0.94393448928154644,\n", 557 | " 0.95994038105985702,\n", 558 | " 0.95816201190581274,\n", 559 | " 0.95984692323866305,\n", 560 | " 0.95984692323866305,\n", 561 | " 0.96012760312133083,\n", 562 | " 0.96555599322606267,\n", 563 | " 0.96555599322606267,\n", 564 | " 0.96555599322606267,\n", 565 | " 0.96555599322606267,\n", 566 | " 0.96555599322606267,\n", 567 | " 0.96555599322606267,\n", 568 | " 0.96555599322606267,\n", 569 | " 0.96555599322606267,\n", 570 | " 0.96555599322606267,\n", 571 | " 0.98109310729021981,\n", 572 | " 0.98109310729021981,\n", 573 | " 0.98109310729021981,\n", 574 | " 0.98109310729021981,\n", 575 | " 0.98109310729021981,\n", 576 | " 0.98109310729021981,\n", 577 | " 0.98118660884458198,\n", 578 | " 0.9806250301674837,\n", 579 | " 0.98062511775678107,\n", 580 | " 0.97940823973152469,\n", 581 | " 0.97940823973152469,\n", 582 | " 0.97940823973152469,\n", 583 | " 0.97940823973152469,\n", 584 | " 0.97940823973152469,\n", 585 | " 0.97940823973152469,\n", 586 | " 0.97978303425264845,\n", 587 | " 0.97978303425264845,\n", 588 | " 0.97978303425264845,\n", 589 | " 0.97978303425264845,\n", 590 | " 0.97978303425264845,\n", 591 | " 0.97978307810877774,\n", 592 | " 0.97978307810877774,\n", 593 | " 0.97978307810877774,\n", 594 | " 0.97950217923237326,\n", 595 | " 0.97950217923237326,\n", 596 | " 0.97950217923237326,\n", 597 | " 0.97950217923237326,\n", 598 | " 0.97950217923237326,\n", 599 | " 0.97950217923237326,\n", 600 | " 0.97950217923237326,\n", 601 | " 0.97950217923237326,\n", 602 | " 0.97950217923237326,\n", 603 | " 0.97950217923237326,\n", 604 | " 0.97950217923237326,\n", 605 | " 0.97950217923237326,\n", 606 | " 0.97950217923237326,\n", 607 | " 0.97950217923237326,\n", 608 | " 0.97950217923237326,\n", 609 | " 0.97950217923237326,\n", 610 | " 0.97950217923237326,\n", 611 | " 0.97950217923237326,\n", 612 | " 0.97950217923237326,\n", 613 | " 0.97950217923237326,\n", 614 | " 0.97950217923237326,\n", 615 | " 0.97950217923237326,\n", 616 | " 0.97950217923237326,\n", 617 | " 0.97950217923237326,\n", 618 | " 0.97950217923237326,\n", 619 | " 0.97950217923237326,\n", 620 | " 0.97622585791609962,\n", 621 | " 0.97622585791609962,\n", 622 | " 0.97622585791609962,\n", 623 | " 0.97622585791609962,\n", 624 | " 0.97622585791609962,\n", 625 | " 0.97622585791609962,\n", 626 | " 0.97622585791609962,\n", 627 | " 0.97622585791609962,\n", 628 | " 0.97622585791609962,\n", 629 | " 0.97622585791609962,\n", 630 | " 0.97622585791609962,\n", 631 | " 0.97622585791609962,\n", 632 | " 0.97622585791609962,\n", 633 | " 0.97622585791609962,\n", 634 | " 0.97622585791609962,\n", 635 | " 0.97622585791609962,\n", 636 | " 0.97622585791609962,\n", 637 | " 0.97622585791609962,\n", 638 | " 0.97622585791609962,\n", 639 | " 0.97622585791609962,\n", 640 | " 0.97622585791609962,\n", 641 | " 0.97734914679769658,\n", 642 | " 0.97734914679769658,\n", 643 | " 0.97734914679769658,\n", 644 | " 0.97734914679769658,\n", 645 | " 0.97734914679769658,\n", 646 | " 0.97734914679769658,\n", 647 | " 0.97734914679769658,\n", 648 | " 0.97734914679769658]" 649 | ] 650 | }, 651 | "execution_count": 58, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "acc" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 53, 663 | "metadata": { 664 | "collapsed": false 665 | }, 666 | "outputs": [ 667 | { 668 | "data": { 669 | "text/plain": [ 670 | "0.98109310729021981" 671 | ] 672 | }, 673 | "execution_count": 53, 674 | "metadata": {}, 675 | "output_type": "execute_result" 676 | } 677 | ], 678 | "source": [] 679 | }, 680 | { 681 | "cell_type": "code", 682 | "execution_count": 54, 683 | "metadata": { 684 | "collapsed": false 685 | }, 686 | "outputs": [ 687 | { 688 | "data": { 689 | "text/plain": [ 690 | "(9,)" 691 | ] 692 | }, 693 | "execution_count": 54, 694 | "metadata": {}, 695 | "output_type": "execute_result" 696 | } 697 | ], 698 | "source": [ 699 | "temp.shape" 700 | ] 701 | } 702 | ], 703 | "metadata": { 704 | "kernelspec": { 705 | "display_name": "Python 2", 706 | "language": "python", 707 | "name": "python2" 708 | }, 709 | "language_info": { 710 | "codemirror_mode": { 711 | "name": "ipython", 712 | "version": 2 713 | }, 714 | "file_extension": ".py", 715 | "mimetype": "text/x-python", 716 | "name": "python", 717 | "nbconvert_exporter": "python", 718 | "pygments_lexer": "ipython2", 719 | "version": "2.7.12" 720 | } 721 | }, 722 | "nbformat": 4, 723 | "nbformat_minor": 0 724 | } 725 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/TFlearnVersion-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn import datasets\n", 12 | "from sklearn.cross_validation import train_test_split\n", 13 | "from scipy import io as sio\n", 14 | "from tensorflow.python.framework import ops\n", 15 | "import numpy as np\n", 16 | "from sklearn.datasets import make_classification\n", 17 | "from sklearn.preprocessing import normalize\n", 18 | "import tflearn\n", 19 | "import tensorflow as tf\n", 20 | "from nncomponents import One2OneInputLayer\n", 21 | "\n", 22 | "ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n", 23 | "# ourdataB = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n", 24 | "\n", 25 | "inputX = ourdataB['X']\n", 26 | "inputX = normalize(inputX, axis=0)\n", 27 | "inputY = ourdataB['Y'][0,:]\n", 28 | "columnNames = ourdataB['columnNames']\n", 29 | "\n", 30 | "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)\n", 31 | "Y_train, Y_test = tflearn.data_utils.to_categorical(y_train, 2), tflearn.data_utils.to_categorical(y_test, 2)\n", 32 | "\n", 33 | "indexes = sio.loadmat(\"xgboost_result\")['importance_rank']\n", 34 | "\n", 35 | "X_train500, X_test500 = X_train[:, indexes.tolist()[0][:500]], X_test[:, indexes.tolist()[0][:500]]\n", 36 | "X_train100, X_test100 = X_train[:, indexes.tolist()[0][:100]], X_test[:, indexes.tolist()[0][:100]]\n", 37 | "X_train10, X_test10 = X_train[:, indexes.tolist()[0][:10]], X_test[:, indexes.tolist()[0][:10]]\n", 38 | "X_train50, X_test50 = X_train[:, indexes.tolist()[0][:50]], X_test[:, indexes.tolist()[0][:50]]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 54, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "def dfs(lambda1, n_epoch, size=None):\n", 50 | " with tf.Graph().as_default():\n", 51 | " sess = tf.Session()\n", 52 | " \n", 53 | " if size is None:\n", 54 | " size = 7203\n", 55 | "\n", 56 | " input_data = tflearn.input_data(shape=[None, size])\n", 57 | " input_layer = One2OneInputLayer(input_data)\n", 58 | "\n", 59 | " tflearn.helpers.regularizer.add_weights_regularizer(input_layer.w, loss='L1', \\\n", 60 | " weight_decay=lambda1, add_to_collection=None)\n", 61 | "\n", 62 | " dense = tflearn.fully_connected(input_layer.output, 50, activation='tanh')\n", 63 | " \n", 64 | " sofmax = tflearn.fully_connected(dense, 2, activation='softmax')\n", 65 | " \n", 66 | " net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n", 67 | " model = tflearn.DNN(net)\n", 68 | "\n", 69 | " sess.run(tf.initialize_all_variables())\n", 70 | " \n", 71 | "# variables = tflearn.variables.get_all_trainable_variable()\n", 72 | "# for i in xrange(1,4):\n", 73 | "# sess.run(variables[i].assign(initial_values[i]))\n", 74 | " if size == 500:\n", 75 | " X_train, X_test = X_train500, X_test500\n", 76 | " elif size == 100:\n", 77 | " X_train, X_test = X_train100, X_test100\n", 78 | " elif size == 50:\n", 79 | " X_train, X_test = X_train50, X_test50\n", 80 | " elif size == 10:\n", 81 | " X_train, X_test = X_train10, X_test10\n", 82 | " \n", 83 | " model.fit(X_train, Y_train, n_epoch=n_epoch, show_metric=True, validation_set=(X_test, Y_test), batch_size=50)\n", 84 | "\n", 85 | " selected_w = sess.run(input_layer.w)\n", 86 | "\n", 87 | " return selected_w" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 57, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "Training Step: 3419 | total loss: \u001b[1m\u001b[32m0.10726\u001b[0m\u001b[0m\n", 102 | "\u001b[2K\r", 103 | "| Adam | epoch: 019 | loss: 0.10726 - acc: 0.9731 -- iter: 8500/8547\n" 104 | ] 105 | }, 106 | { 107 | "ename": "KeyboardInterrupt", 108 | "evalue": "", 109 | "output_type": "error", 110 | "traceback": [ 111 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 112 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 113 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlambda1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m50\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mlambda1\u001b[0m \u001b[0;34m/=\u001b[0m \u001b[0;36m10000.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mweights\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m20\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m500\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 114 | "\u001b[0;32m\u001b[0m in \u001b[0;36mdfs\u001b[0;34m(lambda1, n_epoch, size)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_train10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_metric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_set\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m50\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mselected_w\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_layer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 115 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/models/dnn.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X_inputs, Y_targets, n_epoch, validation_set, show_metric, batch_size, shuffle, snapshot_epoch, snapshot_step, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0mdaug_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdaug_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0mexcl_trainops\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexcl_trainops\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m run_id=run_id)\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 116 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0msnapshot_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0msnapshot_step\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m show_metric)\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0mglobal_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macc_value\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mglobal_acc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 117 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, training_step, snapshot_epoch, snapshot_step, show_metric)\u001b[0m\n\u001b[1;32m 722\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0meval_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 724\u001b[0;31m \u001b[0me\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluate_flow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_ops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_dflow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 725\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 118 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mevaluate_flow\u001b[0;34m(session, ops_to_evaluate, dataflow)\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 847\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mcurrent_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m \u001b[0mfeed_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 849\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_samples\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 119 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/data_flow.pyc\u001b[0m in \u001b[0;36mnext\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 126\u001b[0m \"\"\"\n\u001b[1;32m 127\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata_status\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed_dict_queue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_status\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 120 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/Queue.pyc\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_empty\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 121 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/threading.pyc\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 340\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 341\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 342\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_note\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s.wait(): got it\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 122 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "weights = []\n", 128 | "for lambda1 in xrange(0, 50, 5):\n", 129 | " lambda1 /= 10000.\n", 130 | " weights.append(dfs(0, 20, 500))" 131 | ] 132 | }, 133 | { 134 | "cell_type": "code", 135 | "execution_count": 34, 136 | "metadata": { 137 | "collapsed": false 138 | }, 139 | "outputs": [ 140 | { 141 | "ename": "NameError", 142 | "evalue": "name 'model' is not defined", 143 | "output_type": "error", 144 | "traceback": [ 145 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 146 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 147 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 148 | "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined" 149 | ] 150 | } 151 | ], 152 | "source": [ 153 | "type(model)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 37, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "def get_inits():\n", 165 | " with tf.Graph().as_default():\n", 166 | " sess = tf.Session()\n", 167 | "\n", 168 | " input_data = tflearn.input_data(shape=[None, 7203])\n", 169 | " input_layer = One2OneInputLayer(input_data)\n", 170 | "\n", 171 | " dense = tflearn.fully_connected(input_layer.output, 500, activation='tanh', name='dense')\n", 172 | " sofmax = tflearn.fully_connected(dense, 2, activation='softmax', name='sofmax')\n", 173 | " net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n", 174 | " model = tflearn.DNN(net)\n", 175 | " \n", 176 | " print(type(model))\n", 177 | "\n", 178 | " sess.run(tf.initialize_all_variables())\n", 179 | "\n", 180 | "# model.fit(X_train, Y_train, n_epoch=10, show_metric=True, validation_set=(X_test, Y_test))\n", 181 | "\n", 182 | " variables = tflearn.variables.get_all_trainable_variable()\n", 183 | " \n", 184 | " values = []\n", 185 | " for i in xrange(4):\n", 186 | " values.append(sess.run(variables[i]))\n", 187 | "\n", 188 | " return values" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 38, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "\n" 203 | ] 204 | } 205 | ], 206 | "source": [ 207 | "initial_values = get_inits()" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": null, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "initial_values[0]" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "tflearn.input_data(tf.Variable(initial_values[0]))" 230 | ] 231 | }, 232 | { 233 | "cell_type": "code", 234 | "execution_count": null, 235 | "metadata": { 236 | "collapsed": false 237 | }, 238 | "outputs": [], 239 | "source": [ 240 | "dense_vars = tflearn.variables.get_all_variables()\n", 241 | "print(\"Dense1 layer weights:\")\n", 242 | "print(model.get_weights(dense_vars[0]))\n", 243 | "# Or using generic tflearn function:\n", 244 | "print(\"Dense1 layer biases:\")\n", 245 | "with model.session.as_default():\n", 246 | " print(tflearn.variables.get_value(dense_vars[1]))" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "execution_count": null, 252 | "metadata": { 253 | "collapsed": false 254 | }, 255 | "outputs": [], 256 | "source": [ 257 | "dense_vars" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 32, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~tonyabracadabra/0 or inside your plot.ly account where it is named 'basic-heatmap'\n" 272 | ] 273 | }, 274 | { 275 | "data": { 276 | "text/html": [ 277 | "" 278 | ], 279 | "text/plain": [ 280 | "" 281 | ] 282 | }, 283 | "execution_count": 32, 284 | "metadata": {}, 285 | "output_type": "execute_result" 286 | } 287 | ], 288 | "source": [ 289 | "import plotly.plotly as py\n", 290 | "import plotly.graph_objs as go\n", 291 | "import plotly\n", 292 | "\n", 293 | "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n", 294 | "\n", 295 | "data = [\n", 296 | " go.Heatmap(\n", 297 | " z=np.abs(weights)\n", 298 | " )\n", 299 | "]\n", 300 | "\n", 301 | "py.iplot(data, filename='basic-heatmap')" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": null, 307 | "metadata": { 308 | "collapsed": false 309 | }, 310 | "outputs": [], 311 | "source": [ 312 | "type(inputlayer)" 313 | ] 314 | } 315 | ], 316 | "metadata": { 317 | "kernelspec": { 318 | "display_name": "Python 2", 319 | "language": "python", 320 | "name": "python2" 321 | }, 322 | "language_info": { 323 | "codemirror_mode": { 324 | "name": "ipython", 325 | "version": 2 326 | }, 327 | "file_extension": ".py", 328 | "mimetype": "text/x-python", 329 | "name": "python", 330 | "nbconvert_exporter": "python", 331 | "pygments_lexer": "ipython2", 332 | "version": "2.7.11" 333 | } 334 | }, 335 | "nbformat": 4, 336 | "nbformat_minor": 0 337 | } 338 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import os\n", 13 | "\n", 14 | "os.chdir(\"/Users/xupeng.tong/Documents/Data/OriginalData\")\n", 15 | "\n", 16 | "df = pd.read_csv(\"QMatrix_label.csv\")" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 4, 22 | "metadata": { 23 | "collapsed": false 24 | }, 25 | "outputs": [], 26 | "source": [ 27 | "df.drop(['Unnamed: 0'], axis = 1 , inplace= True,errors= 'ignore')\n", 28 | "labels = df[\"PatientID\"]" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 7, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "import numpy as np\n", 40 | "\n", 41 | "mapping = {j:i for i,j in enumerate(np.unique(labels))}\n", 42 | "numericLabels = np.array([mapping[i] for i in labels])" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 13, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "6096" 56 | ] 57 | }, 58 | "execution_count": 13, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "len(numericLabels[numericLabels==1])" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 25, 70 | "metadata": { 71 | "collapsed": true 72 | }, 73 | "outputs": [], 74 | "source": [ 75 | "from sklearn.preprocessing import Imputer\n", 76 | "\n", 77 | "imp = Imputer(missing_values='NaN', strategy='median', axis=0)\n", 78 | "imp.fit(dataToBeImputed)\n", 79 | "imputedData = imp.transform(dataToBeImputed)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 17, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "subgroups = df[\"Smoking_Sub_Group\"].values\n", 91 | "\n", 92 | "mappingSubgroup = {j:i for i,j in enumerate(np.unique(subgroups))}\n", 93 | "numericSubgroups = np.array([mappingSubgroup[i] for i in subgroups])" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 19, 99 | "metadata": { 100 | "collapsed": false 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "df[\"Smoking_Sub_Group\"] = numericSubgroups" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 23, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "dataToBeImputed = df.values" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 27, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "import scipy.io as sio\n", 127 | "\n", 128 | "sio.savemat(\"ourdataQ_3labels_unstandardized\", {'X':imputedData,'Y':numericLabels, 'columnNames':df.columns.values})" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 28, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "from sklearn.preprocessing import scale\n", 140 | "\n", 141 | "imputedData = scale(imputedData)" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 30, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "sio.savemat(\"ourdataQ_3labels_standardized\", {'X':imputedData,'Y':numericLabels, 'columnNames':df.columns.values})" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 41, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "index1 = np.argwhere(numericLabels==1).reshape(6096,).tolist()" 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "execution_count": 43, 169 | "metadata": { 170 | "collapsed": true 171 | }, 172 | "outputs": [], 173 | "source": [ 174 | "index2 = np.argwhere(numericLabels==2).reshape(4593,).tolist()" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 47, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "array([1, 1, 1, ..., 2, 2, 2])" 188 | ] 189 | }, 190 | "execution_count": 47, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "numericLabels[index1+index2]" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 57, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "no0 = [i for i,j in enumerate(numericLabels) if j != 0]" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 58, 213 | "metadata": { 214 | "collapsed": false 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "sio.savemat(\"Q_2labels_standardized\", {'X':imputedData[no0],'Y':numericLabels[no0], 'columnNames':df.columns.values})" 219 | ] 220 | } 221 | ], 222 | "metadata": { 223 | "kernelspec": { 224 | "display_name": "Python 2", 225 | "language": "python", 226 | "name": "python2" 227 | }, 228 | "language_info": { 229 | "codemirror_mode": { 230 | "name": "ipython", 231 | "version": 2 232 | }, 233 | "file_extension": ".py", 234 | "mimetype": "text/x-python", 235 | "name": "python", 236 | "nbconvert_exporter": "python", 237 | "pygments_lexer": "ipython2", 238 | "version": "2.7.12" 239 | } 240 | }, 241 | "nbformat": 4, 242 | "nbformat_minor": 0 243 | } 244 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/Untitled1-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/XGBoost-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn import datasets\n", 12 | "from sklearn.cross_validation import train_test_split\n", 13 | "from scipy import io as sio\n", 14 | "from tensorflow.python.framework import ops\n", 15 | "from dfs2 import DeepFeatureSelectionNew\n", 16 | "import numpy as np\n", 17 | "from sklearn.datasets import make_classification\n", 18 | "from sklearn.preprocessing import normalize\n", 19 | "\n", 20 | "# ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n", 21 | "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n", 22 | "ourdata = sio.loadmat(\"./B_mean_2labels.mat\")\n", 23 | "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/Q_2labels_unstandardized.mat\")\n", 24 | "\n", 25 | "inputX = ourdata['X']\n", 26 | "inputX = normalize(inputX, axis=0)\n", 27 | "inputY = ourdata['Y'][0,:]\n", 28 | "columnNames = ourdata['columnNames']\n", 29 | "\n", 30 | "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "from sklearn.ensemble import RandomForestClassifier\n", 42 | "from sklearn.metrics import accuracy_score\n", 43 | "import xgboost as xgb\n", 44 | "\n", 45 | "# rf = RandomForestClassifier(criterion=\"entropy\", n_estimators = 300, max_depth = 100)\n", 46 | "# rf.fit(X_train, y_train)\n", 47 | "\n", 48 | "# y_pred = rf.predict(X_test)\n", 49 | "\n", 50 | "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(X_train, y_train)\n", 51 | "y_pred = gbm.predict(X_test)\n", 52 | "\n", 53 | "# featurescores = gbm.feature_importances_\n", 54 | "\n", 55 | "print(accuracy_score(y_test, y_pred))\n", 56 | "\n", 57 | "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n", 58 | "\n", 59 | "np.save(\"indexes_xgboost\",indexes_xgboost)" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kernelspec": { 65 | "display_name": "Python 2", 66 | "language": "python", 67 | "name": "python2" 68 | }, 69 | "language_info": { 70 | "codemirror_mode": { 71 | "name": "ipython", 72 | "version": 2 73 | }, 74 | "file_extension": ".py", 75 | "mimetype": "text/x-python", 76 | "name": "python", 77 | "nbconvert_exporter": "python", 78 | "pygments_lexer": "ipython2", 79 | "version": "2.7.12" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 0 84 | } 85 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/testHaarcascades-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import numpy as np\n", 12 | "import cv2\n", 13 | "\n", 14 | "HAAR_PATH = \"/Users/xupeng.tong/opencv/data/haarcascades\"\n", 15 | "IMG_PATH = \"/Users/xupeng.tong/State-Farm-Distracted-Driver-Detection/Images/imgs/train/\"\n", 16 | "\n", 17 | "upperbody_cascade = cv2.CascadeClassifier(HAAR_PATH+'haarcascade_upperbody.xml')\n", 18 | "# eye_cascade = cv2.CascadeClassifier('haarcascade_eye.xml')\n", 19 | "img = cv2.imread(IMG_PATH+'c0/img_100050.jpg')\n", 20 | "gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 5, 26 | "metadata": { 27 | "collapsed": false 28 | }, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/plain": [ 33 | "'/Users/xupeng.tong/State-Farm-Distracted-Driver-Detection/model'" 34 | ] 35 | }, 36 | "execution_count": 5, 37 | "metadata": {}, 38 | "output_type": "execute_result" 39 | } 40 | ], 41 | "source": [ 42 | "faces = upperbody_cascade.detectMultiScale(gray, 1.3, 5)\n", 43 | "\n", 44 | "for (x,y,w,h) in faces:\n", 45 | " cv2.rectangle(img,(x,y),(x+w,y+h),(255,0,0),2)\n", 46 | " roi_gray = gray[y:y+h, x:x+w]\n", 47 | " roi_color = img[y:y+h, x:x+w]\n", 48 | " eyes = eye_cascade.detectMultiScale(roi_gray)\n", 49 | " \n", 50 | " for (ex,ey,ew,eh) in eyes:\n", 51 | " cv2.rectangle(roi_color,(ex,ey),(ex+ew,ey+eh),(0,255,0),2)\n", 52 | " \n", 53 | " cv2.imshow('testing',img)\n", 54 | " cv2.waitKey(0)\n", 55 | " cv2.destroyAllWindows()\n", 56 | " \n", 57 | "import os\n", 58 | "\n", 59 | "os.getcwd()" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kernelspec": { 65 | "display_name": "Python 2", 66 | "language": "python", 67 | "name": "python2" 68 | }, 69 | "language_info": { 70 | "codemirror_mode": { 71 | "name": "ipython", 72 | "version": 2 73 | }, 74 | "file_extension": ".py", 75 | "mimetype": "text/x-python", 76 | "name": "python", 77 | "nbconvert_exporter": "python", 78 | "pygments_lexer": "ipython2", 79 | "version": "2.7.12" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 0 84 | } 85 | -------------------------------------------------------------------------------- /CV_BMatrix.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn import datasets\n", 12 | "from sklearn.cross_validation import train_test_split\n", 13 | "from scipy import io as sio\n", 14 | "from tensorflow.python.framework import ops\n", 15 | "from dfs2 import DeepFeatureSelectionNew\n", 16 | "import numpy as np\n", 17 | "from sklearn.datasets import make_classification\n", 18 | "from sklearn.preprocessing import normalize\n", 19 | "from __future__ import print_function\n", 20 | "\n", 21 | "# ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n", 22 | "ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n", 23 | "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/Q_2labels_standardized.mat\")\n", 24 | "\n", 25 | "inputX = ourdata['X']\n", 26 | "inputX = normalize(inputX, axis=0)\n", 27 | "inputY = ourdata['Y'][0,:]\n", 28 | "columnNames = ourdata['columnNames']" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 4, 34 | "metadata": { 35 | "collapsed": false 36 | }, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "epoch 0: global loss = 0.733293890953\n", 43 | "('Train accuracy:', 0.42763543)\n", 44 | "('Test accuracy:', 0.43799719)\n", 45 | "epoch 10: global loss = 0.476914912462\n", 46 | "('Train accuracy:', 0.83935887)\n", 47 | "('Test accuracy:', 0.84277022)\n", 48 | "epoch 20: global loss = 0.129701793194\n", 49 | "('Train accuracy:', 0.96197498)\n", 50 | "('Test accuracy:', 0.95273751)\n", 51 | "epoch 30: global loss = 0.052871208638\n", 52 | "('Train accuracy:', 0.98350298)\n", 53 | "('Test accuracy:', 0.97707069)\n", 54 | "epoch 40: global loss = 0.0323026739061\n", 55 | "('Train accuracy:', 0.98993796)\n", 56 | "('Test accuracy:', 0.98596162)\n", 57 | "('Final train accuracy:', 0.99216098)\n", 58 | "('Final test accuracy:', 0.99064106)\n", 59 | "Train finised for random state:0\n", 60 | "epoch 0: global loss = 0.751039803028\n", 61 | "('Train accuracy:', 0.42939043)\n", 62 | "('Test accuracy:', 0.430978)\n", 63 | "epoch 10: global loss = 0.507127285004\n", 64 | "('Train accuracy:', 0.85257983)\n", 65 | "('Test accuracy:', 0.86008424)\n", 66 | "epoch 20: global loss = 0.141821071506\n", 67 | "('Train accuracy:', 0.95401895)\n", 68 | "('Test accuracy:', 0.95320541)\n", 69 | "epoch 30: global loss = 0.0590038299561\n", 70 | "('Train accuracy:', 0.98256701)\n", 71 | "('Test accuracy:', 0.98408985)\n", 72 | "epoch 40: global loss = 0.0350771062076\n", 73 | "('Train accuracy:', 0.98993796)\n", 74 | "('Test accuracy:', 0.98642957)\n", 75 | "('Final train accuracy:', 0.992863)\n", 76 | "('Final test accuracy:', 0.99110901)\n", 77 | "Train finised for random state:1\n", 78 | "epoch 0: global loss = 0.759872674942\n", 79 | "('Train accuracy:', 0.42857143)\n", 80 | "('Test accuracy:', 0.43425363)\n", 81 | "epoch 10: global loss = 0.506673395634\n", 82 | "('Train accuracy:', 0.85059083)\n", 83 | "('Test accuracy:', 0.84604585)\n", 84 | "epoch 20: global loss = 0.154908597469\n", 85 | "('Train accuracy:', 0.94793493)\n", 86 | "('Test accuracy:', 0.94478238)\n", 87 | "epoch 30: global loss = 0.0784661099315\n", 88 | "('Train accuracy:', 0.97542995)\n", 89 | "('Test accuracy:', 0.97145534)\n", 90 | "epoch 40: global loss = 0.0517223998904\n", 91 | "('Train accuracy:', 0.98432201)\n", 92 | "('Test accuracy:', 0.98455781)\n", 93 | "('Final train accuracy:', 0.98982102)\n", 94 | "('Final test accuracy:', 0.98736548)\n", 95 | "Train finised for random state:2\n", 96 | "epoch 0: global loss = 0.748433232307\n", 97 | "('Train accuracy:', 0.43009242)\n", 98 | "('Test accuracy:', 0.42817032)\n", 99 | "epoch 10: global loss = 0.509174644947\n", 100 | "('Train accuracy:', 0.83315784)\n", 101 | "('Test accuracy:', 0.83575106)\n", 102 | "epoch 20: global loss = 0.141662657261\n", 103 | "('Train accuracy:', 0.95495498)\n", 104 | "('Test accuracy:', 0.96443611)\n", 105 | "epoch 30: global loss = 0.0590850003064\n", 106 | "('Train accuracy:', 0.98198199)\n", 107 | "('Test accuracy:', 0.98408985)\n", 108 | "epoch 40: global loss = 0.0356163904071\n", 109 | "('Train accuracy:', 0.98888499)\n", 110 | "('Test accuracy:', 0.9897052)\n", 111 | "('Final train accuracy:', 0.99321401)\n", 112 | "('Final test accuracy:', 0.99204493)\n", 113 | "Train finised for random state:3\n", 114 | "epoch 0: global loss = 0.733057200909\n", 115 | "('Train accuracy:', 0.43360242)\n", 116 | "('Test accuracy:', 0.41413197)\n", 117 | "epoch 10: global loss = 0.49149876833\n", 118 | "('Train accuracy:', 0.83163685)\n", 119 | "('Test accuracy:', 0.83153951)\n", 120 | "epoch 20: global loss = 0.134568467736\n", 121 | "('Train accuracy:', 0.95858198)\n", 122 | "('Test accuracy:', 0.94431448)\n", 123 | "epoch 30: global loss = 0.0583451613784\n", 124 | "('Train accuracy:', 0.98291796)\n", 125 | "('Test accuracy:', 0.97707069)\n", 126 | "epoch 40: global loss = 0.0373480655253\n", 127 | "('Train accuracy:', 0.98853397)\n", 128 | "('Test accuracy:', 0.98502576)\n", 129 | "('Final train accuracy:', 0.99145901)\n", 130 | "('Final test accuracy:', 0.99064106)\n", 131 | "Train finised for random state:4\n", 132 | "epoch 0: global loss = 0.736807286739\n", 133 | "('Train accuracy:', 0.43056044)\n", 134 | "('Test accuracy:', 0.42629856)\n", 135 | "epoch 10: global loss = 0.50833773613\n", 136 | "('Train accuracy:', 0.82917982)\n", 137 | "('Test accuracy:', 0.83902669)\n", 138 | "epoch 20: global loss = 0.138045296073\n", 139 | "('Train accuracy:', 0.95612496)\n", 140 | "('Test accuracy:', 0.95975667)\n", 141 | "epoch 30: global loss = 0.0586299747229\n", 142 | "('Train accuracy:', 0.982099)\n", 143 | "('Test accuracy:', 0.98315394)\n", 144 | "epoch 40: global loss = 0.0354525335133\n", 145 | "('Train accuracy:', 0.989353)\n", 146 | "('Test accuracy:', 0.98689753)\n", 147 | "('Final train accuracy:', 0.992863)\n", 148 | "('Final test accuracy:', 0.99017316)\n", 149 | "Train finised for random state:5\n", 150 | "epoch 0: global loss = 0.75368309021\n", 151 | "('Train accuracy:', 0.43009242)\n", 152 | "('Test accuracy:', 0.42817032)\n", 153 | "epoch 10: global loss = 0.509149491787\n", 154 | "('Train accuracy:', 0.84942085)\n", 155 | "('Test accuracy:', 0.84464204)\n", 156 | "epoch 20: global loss = 0.138327404857\n", 157 | "('Train accuracy:', 0.95682698)\n", 158 | "('Test accuracy:', 0.95507723)\n", 159 | "epoch 30: global loss = 0.056102283299\n", 160 | "('Train accuracy:', 0.983971)\n", 161 | "('Test accuracy:', 0.97987831)\n", 162 | "epoch 40: global loss = 0.0324657447636\n", 163 | "('Train accuracy:', 0.99063998)\n", 164 | "('Test accuracy:', 0.98596162)\n", 165 | "('Final train accuracy:', 0.99414998)\n", 166 | "('Final test accuracy:', 0.98923725)\n", 167 | "Train finised for random state:6\n", 168 | "epoch 0: global loss = 0.727672696114\n", 169 | "('Train accuracy:', 0.42611444)\n", 170 | "('Test accuracy:', 0.44408047)\n", 171 | "epoch 10: global loss = 0.47494405508\n", 172 | "('Train accuracy:', 0.84052885)\n", 173 | "('Test accuracy:', 0.83809078)\n", 174 | "epoch 20: global loss = 0.126760289073\n", 175 | "('Train accuracy:', 0.96045399)\n", 176 | "('Test accuracy:', 0.95741695)\n", 177 | "epoch 30: global loss = 0.0550770014524\n", 178 | "('Train accuracy:', 0.983854)\n", 179 | "('Test accuracy:', 0.97894245)\n", 180 | "epoch 40: global loss = 0.0347224362195\n", 181 | "('Train accuracy:', 0.98982102)\n", 182 | "('Test accuracy:', 0.98502576)\n", 183 | "('Final train accuracy:', 0.99298)\n", 184 | "('Final test accuracy:', 0.98736548)\n", 185 | "Train finised for random state:7\n", 186 | "epoch 0: global loss = 0.753837943077\n", 187 | "('Train accuracy:', 0.42903942)\n", 188 | "('Test accuracy:', 0.43238184)\n", 189 | "epoch 10: global loss = 0.528392732143\n", 190 | "('Train accuracy:', 0.84532583)\n", 191 | "('Test accuracy:', 0.83996254)\n", 192 | "epoch 20: global loss = 0.140707731247\n", 193 | "('Train accuracy:', 0.95635897)\n", 194 | "('Test accuracy:', 0.95741695)\n", 195 | "epoch 30: global loss = 0.0585461705923\n", 196 | "('Train accuracy:', 0.982099)\n", 197 | "('Test accuracy:', 0.98128217)\n", 198 | "epoch 40: global loss = 0.0387108251452\n", 199 | "('Train accuracy:', 0.98806602)\n", 200 | "('Test accuracy:', 0.98689753)\n", 201 | "('Final train accuracy:', 0.99251199)\n", 202 | "('Final test accuracy:', 0.98923725)\n", 203 | "Train finised for random state:8\n", 204 | "epoch 0: global loss = 0.745738267899\n", 205 | "('Train accuracy:', 0.42810342)\n", 206 | "('Test accuracy:', 0.4361254)\n", 207 | "epoch 10: global loss = 0.502628087997\n", 208 | "('Train accuracy:', 0.83678484)\n", 209 | "('Test accuracy:', 0.8404305)\n", 210 | "epoch 20: global loss = 0.140836164355\n", 211 | "('Train accuracy:', 0.95425296)\n", 212 | "('Test accuracy:', 0.95367336)\n", 213 | "epoch 30: global loss = 0.0612846538424\n", 214 | "('Train accuracy:', 0.98151398)\n", 215 | "('Test accuracy:', 0.98034626)\n", 216 | "epoch 40: global loss = 0.0394038744271\n", 217 | "('Train accuracy:', 0.987481)\n", 218 | "('Test accuracy:', 0.9836219)\n", 219 | "('Final train accuracy:', 0.99157602)\n", 220 | "('Final test accuracy:', 0.99064106)\n", 221 | "Train finised for random state:9\n" 222 | ] 223 | } 224 | ], 225 | "source": [ 226 | "weights_0_10 = []\n", 227 | "for random_state in xrange(10):\n", 228 | " X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)\n", 229 | " dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \\\n", 230 | " lambda1=0, lambda2=1, alpha1=0, alpha2=0, activation='tanh', \\\n", 231 | " weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)\n", 232 | " dfsMLP.train(batch_size=2000)\n", 233 | " print(\"Train finised for random state:\" + str(random_state))\n", 234 | " weights_0_10.append(dfsMLP.selected_ws[0])" 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "execution_count": 5, 240 | "metadata": { 241 | "collapsed": false 242 | }, 243 | "outputs": [], 244 | "source": [ 245 | "np.save(\"weights-0-10-new\", weights_0_10)\n", 246 | "# weights_10_20 = np.load(\"weights-10-20.npy\")\n", 247 | "# weights_20_30 = np.load(\"weights-20-30.npy\")\n" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 29, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "allweights = np.concatenate((weights,weights_10_20,weights_20_30))" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 32, 264 | "metadata": { 265 | "collapsed": false 266 | }, 267 | "outputs": [], 268 | "source": [ 269 | "np.save(\"allweights-0-30\", allweights)" 270 | ] 271 | }, 272 | { 273 | "cell_type": "code", 274 | "execution_count": 9, 275 | "metadata": { 276 | "collapsed": false 277 | }, 278 | "outputs": [], 279 | "source": [ 280 | "weights = np.array(weights)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "code", 285 | "execution_count": 31, 286 | "metadata": { 287 | "collapsed": false 288 | }, 289 | "outputs": [ 290 | { 291 | "name": "stdout", 292 | "output_type": "stream", 293 | "text": [ 294 | "High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~tonyabracadabra/0 or inside your plot.ly account where it is named 'basic-heatmap'\n" 295 | ] 296 | }, 297 | { 298 | "data": { 299 | "text/html": [ 300 | "" 301 | ], 302 | "text/plain": [ 303 | "" 304 | ] 305 | }, 306 | "execution_count": 31, 307 | "metadata": {}, 308 | "output_type": "execute_result" 309 | } 310 | ], 311 | "source": [ 312 | "import plotly.plotly as py\n", 313 | "import plotly.graph_objs as go\n", 314 | "import plotly\n", 315 | "\n", 316 | "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n", 317 | "\n", 318 | "data = [\n", 319 | " go.Heatmap(\n", 320 | " z=np.abs(allweights)\n", 321 | " )\n", 322 | "]\n", 323 | "\n", 324 | "py.iplot(data, filename='basic-heatmap')" 325 | ] 326 | }, 327 | { 328 | "cell_type": "code", 329 | "execution_count": 17, 330 | "metadata": { 331 | "collapsed": false 332 | }, 333 | "outputs": [], 334 | "source": [ 335 | "averagedWeight = np.abs(weights.sum(axis=0)/10)" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": null, 341 | "metadata": { 342 | "collapsed": true 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "for random_state in xrange(10,20):\n", 347 | " X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)\n", 348 | " dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \\\n", 349 | " lambda1=0, lambda2=1, alpha1=0.0001, alpha2=0, activation='tanh', \\\n", 350 | " weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)\n", 351 | " dfsMLP.train(batch_size=2000)\n", 352 | " print(\"Train finised for random state:\" + str(random_state))\n", 353 | " weights.append(dfsMLP.selected_ws[0])" 354 | ] 355 | } 356 | ], 357 | "metadata": { 358 | "kernelspec": { 359 | "display_name": "Python 2", 360 | "language": "python", 361 | "name": "python2" 362 | }, 363 | "language_info": { 364 | "codemirror_mode": { 365 | "name": "ipython", 366 | "version": 2 367 | }, 368 | "file_extension": ".py", 369 | "mimetype": "text/x-python", 370 | "name": "python", 371 | "nbconvert_exporter": "python", 372 | "pygments_lexer": "ipython2", 373 | "version": "2.7.12" 374 | } 375 | }, 376 | "nbformat": 4, 377 | "nbformat_minor": 0 378 | } 379 | -------------------------------------------------------------------------------- /Ensemble Model.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn.preprocessing import normalize\n", 12 | "import numpy as np\n", 13 | "\n", 14 | "weights_0_10 = np.load(\"weights-0-10-NEW-mean.npy\")\n", 15 | "weights_10_20 = np.load(\"weights-10-20-NEW-mean.npy\")\n", 16 | "weights_20_30 = np.load(\"weights-20-30-NEW-mean.npy\")\n", 17 | "indexes_xgboost = np.load(\"indexes_xgboost.npy\")\n", 18 | "\n", 19 | "weights = np.concatenate((weights_0_10, weights_10_20, weights_20_30))\n", 20 | "\n", 21 | "np.save(\"weights-NEW-mean\", weights)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": { 28 | "collapsed": false 29 | }, 30 | "outputs": [], 31 | "source": [ 32 | "from scipy import io as sio\n", 33 | "\n", 34 | "ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/B_mean_2labels.mat\")\n", 35 | "\n", 36 | "inputX = ourdata['X']\n", 37 | "inputX = normalize(inputX, axis=0)\n", 38 | "inputY = ourdata['Y'][0,:]\n", 39 | "columnNames = ourdata['columnNames']" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 12, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "weights = abs(weights)\n", 51 | "averagedWeight = normalize(weights).sum(axis=0)\n", 52 | "indexes_average_dfs = np.argsort(averagedWeight)[::-1]\n", 53 | "\n", 54 | "def unionDFSfeatures(n):\n", 55 | " indexes_union = []\n", 56 | " for i in xrange(30):\n", 57 | " indexes_union.append(np.argsort(weights[i])[::-1][:n].tolist())\n", 58 | " \n", 59 | " union = reduce(np.union1d, indexes_union).tolist()\n", 60 | " print(\"Number of union features:\", len(union))\n", 61 | " return inputX[:, union], union\n", 62 | "\n", 63 | "def intersectDFSfeatures(n):\n", 64 | " indexes_intersect = []\n", 65 | " for i in xrange(30):\n", 66 | " indexes_intersect.append(np.argsort(weights[i])[::-1][:n].tolist())\n", 67 | " \n", 68 | " intersected = reduce(np.intersect1d, indexes_intersect).tolist()\n", 69 | " print(\"Number of intersected features:\",len(intersected))\n", 70 | " return inputX[:, intersected]\n", 71 | "\n", 72 | "def topXGBoostfeatures(a,b):\n", 73 | " return inputX[:, indexes_xgboost.tolist()[a:b]], indexes_xgboost.tolist()[a:b]\n", 74 | "\n", 75 | "def topAveDFSfeatures(a,b):\n", 76 | " return inputX[:, indexes_average_dfs.tolist()[a:b]], indexes_average_dfs.tolist()[a:b]\n", 77 | "\n", 78 | "def pickOneDFSfeatures(a,b,n):\n", 79 | " indexx = np.argsort(weights[n])[::-1]\n", 80 | " return inputX[:, indexx.tolist()[a:b]]\n", 81 | "\n", 82 | "def topDFSTemp(a,b):\n", 83 | " temp = np.argsort(abs(dfsMLP.selected_ws[0]))[::-1]\n", 84 | " return inputX[:, temp.tolist()[a:b]]" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 36, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "name": "stdout", 96 | "output_type": "stream", 97 | "text": [ 98 | "('Number of intersected features:', 9)\n", 99 | "('Intersect:', 0.97940872141117929)\n", 100 | "('Number of union features:', 27)\n" 101 | ] 102 | }, 103 | { 104 | "ename": "ValueError", 105 | "evalue": "Found arrays with inconsistent numbers of samples: [ 2 10684]", 106 | "output_type": "error", 107 | "traceback": [ 108 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 109 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 110 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[0mkeke\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0munionDFSfeatures\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 13\u001b[0;31m \u001b[0mscores\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcross_val_score\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msvm\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeke\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputY\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcv\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 14\u001b[0m \u001b[0;32mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Union:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscores\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 111 | "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/cross_validation.pyc\u001b[0m in \u001b[0;36mcross_val_score\u001b[0;34m(estimator, X, y, scoring, cv, n_jobs, verbose, fit_params, pre_dispatch)\u001b[0m\n\u001b[1;32m 1420\u001b[0m \u001b[0mArray\u001b[0m \u001b[0mof\u001b[0m \u001b[0mscores\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mestimator\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0meach\u001b[0m \u001b[0mrun\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mcross\u001b[0m \u001b[0mvalidation\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1421\u001b[0m \"\"\"\n\u001b[0;32m-> 1422\u001b[0;31m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mindexable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1423\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1424\u001b[0m \u001b[0mcv\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_cv\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcv\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0my\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mclassifier\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mis_classifier\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mestimator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 112 | "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mindexable\u001b[0;34m(*iterables)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 200\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 201\u001b[0;31m \u001b[0mcheck_consistent_length\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 202\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 203\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 113 | "\u001b[0;32m/Users/xupeng.tong/anaconda/lib/python2.7/site-packages/sklearn/utils/validation.pyc\u001b[0m in \u001b[0;36mcheck_consistent_length\u001b[0;34m(*arrays)\u001b[0m\n\u001b[1;32m 174\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0muniques\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 175\u001b[0m raise ValueError(\"Found arrays with inconsistent numbers of samples: \"\n\u001b[0;32m--> 176\u001b[0;31m \"%s\" % str(uniques))\n\u001b[0m\u001b[1;32m 177\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 178\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 114 | "\u001b[0;31mValueError\u001b[0m: Found arrays with inconsistent numbers of samples: [ 2 10684]" 115 | ] 116 | } 117 | ], 118 | "source": [ 119 | "from sklearn.svm import LinearSVC\n", 120 | "from sklearn.metrics import accuracy_score\n", 121 | "from sklearn.linear_model import LogisticRegression\n", 122 | "from sklearn.cross_validation import cross_val_score\n", 123 | "\n", 124 | "svm = LinearSVC()\n", 125 | "\n", 126 | "keke = intersectDFSfeatures(200)\n", 127 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 128 | "print(\"Intersect:\", np.mean(scores))\n", 129 | "\n", 130 | "keke = unionDFSfeatures(10)\n", 131 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 132 | "print(\"Union:\", np.mean(scores))\n", 133 | "\n", 134 | "keke = topAveDFSfeatures(1,27)\n", 135 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 136 | "print(\"Ave:\", np.mean(scores))\n", 137 | "\n", 138 | "keke = topXGBoostfeatures(0,27)\n", 139 | "scores = cross_val_score(svm, keke, inputY, cv=5)\n", 140 | "print(\"XGBoost:\", np.mean(scores))\n", 141 | "\n", 142 | "print(\"Pick one TOP DFS features from 30\")\n", 143 | "for i in xrange(0,30):\n", 144 | " keke = pickOneDFSfeatures(0,27,i)\n", 145 | " scores = cross_val_score(svm, keke, inputY, cv=5)\n", 146 | " print(np.mean(scores))" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": 65, 152 | "metadata": { 153 | "collapsed": false 154 | }, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/html": [ 159 | "" 160 | ], 161 | "text/plain": [ 162 | "" 163 | ] 164 | }, 165 | "execution_count": 65, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "import plotly.plotly as py\n", 172 | "import plotly.graph_objs as go\n", 173 | "import plotly\n", 174 | "\n", 175 | "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n", 176 | "\n", 177 | "data = [\n", 178 | " go.Heatmap(\n", 179 | " z=np.abs(weights)\n", 180 | " )\n", 181 | "]\n", 182 | "\n", 183 | "py.iplot(data, filename='30 Weights')" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 91, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [ 193 | { 194 | "data": { 195 | "text/plain": [ 196 | "[u'Phe9_2534',\n", 197 | " u'Phe9_491',\n", 198 | " u'Phe9_4912',\n", 199 | " u'Phe9_492',\n", 200 | " u'Phe9_4928',\n", 201 | " u'Phe9_493',\n", 202 | " u'Phe9_4930',\n", 203 | " u'Phe9_4931',\n", 204 | " u'Phe9_4932',\n", 205 | " u'Phe9_4938',\n", 206 | " u'Phe9_4939',\n", 207 | " u'Phe9_494',\n", 208 | " u'Phe9_4940',\n", 209 | " u'Phe9_496',\n", 210 | " u'Phe9_5343',\n", 211 | " u'Phe9_V146',\n", 212 | " u'Phe10_E236',\n", 213 | " u'Phe10_I23',\n", 214 | " u'Phe10_J44',\n", 215 | " u'Phe10_J449',\n", 216 | " u'Phe10_J45',\n", 217 | " u'Phe10_J452',\n", 218 | " u'Phe10_J453',\n", 219 | " u'Phe10_J454',\n", 220 | " u'Phe10_J459',\n", 221 | " u'Phe10_N08',\n", 222 | " u'Phe10_S060']" 223 | ] 224 | }, 225 | "execution_count": 91, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "indexes_union = []\n", 232 | "for i in xrange(30):\n", 233 | " indexes_union.append(np.argsort(weights[i])[::-1][:10].tolist())\n", 234 | "union = reduce(np.union1d, indexes_union).tolist()\n", 235 | "\n", 236 | "\n", 237 | "[i[0] for i in columnNames.reshape(7205,)[union]]" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 78, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "def getSelectedFeatureNames(which, topN):\n", 249 | " if which == \"Ave\":\n", 250 | " indexes = indexes_average_dfs\n", 251 | " elif which == \"Union\":\n", 252 | " indexes = \"\"\n", 253 | " elif which == \"Intersect\"\n", 254 | " \n", 255 | " featureNames = [i[0] for i in columnNames.reshape(7205,)[indexes[:topN]]]\n", 256 | " \n", 257 | " return featureNames" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 99, 263 | "metadata": { 264 | "collapsed": false 265 | }, 266 | "outputs": [ 267 | { 268 | "name": "stdout", 269 | "output_type": "stream", 270 | "text": [ 271 | "('Number of union features:', 1)\n", 272 | "('Union:', 0.94487121332880197)\n", 273 | "('Number of union features:', 10)\n", 274 | "('Union:', 0.94346676284389586)\n", 275 | "('Number of union features:', 12)\n", 276 | "('Union:', 0.9591910107244368)\n", 277 | "('Number of union features:', 17)\n", 278 | "('Union:', 0.97145272318043541)\n", 279 | "('Number of union features:', 21)\n", 280 | "('Union:', 0.97182712357021495)\n", 281 | "('Number of union features:', 23)\n", 282 | "('Union:', 0.99204408956348877)\n", 283 | "('Number of union features:', 26)\n", 284 | "('Union:', 0.99166968909173525)\n", 285 | "('Number of union features:', 26)\n", 286 | "('Union:', 0.99166968909173525)\n", 287 | "('Number of union features:', 26)\n", 288 | "('Union:', 0.99166968909173525)\n", 289 | "('Number of union features:', 27)\n", 290 | "('Union:', 0.99166960150243777)\n", 291 | "('Number of union features:', 28)\n", 292 | "('Union:', 0.99073384134732279)\n", 293 | "('Number of union features:', 31)\n", 294 | "('Union:', 0.98418163686619731)\n", 295 | "('Number of union features:', 31)\n", 296 | "('Union:', 0.98418163686619731)\n", 297 | "('Number of union features:', 31)\n", 298 | "('Union:', 0.98418163686619731)\n", 299 | "('Number of union features:', 34)\n", 300 | "('Union:', 0.98446240429717535)\n", 301 | "('Number of union features:', 36)\n", 302 | "('Union:', 0.98474339076287709)\n", 303 | "('Number of union features:', 40)\n", 304 | "('Union:', 0.98596013738369381)\n", 305 | "('Number of union features:', 45)\n", 306 | "('Union:', 0.98558578076806957)\n", 307 | "('Number of union features:', 51)\n", 308 | "('Union:', 0.98876816262448164)\n", 309 | "('Number of union features:', 55)\n", 310 | "('Union:', 0.98895538476792955)\n", 311 | "('Number of union features:', 61)\n", 312 | "('Union:', 0.98876816262448164)\n", 313 | "('Number of union features:', 67)\n", 314 | "('Union:', 0.98839398118745181)\n", 315 | "('Number of union features:', 73)\n", 316 | "('Union:', 0.9883001730500558)\n", 317 | "('Number of union features:', 77)\n", 318 | "('Union:', 0.98801936180393546)\n", 319 | "('Number of union features:', 86)\n", 320 | "('Union:', 0.98783209588633247)\n", 321 | "('Number of union features:', 89)\n", 322 | "('Union:', 0.98811295098858187)\n", 323 | "('Number of union features:', 102)\n", 324 | "('Union:', 0.98596048790483126)\n", 325 | "('Number of union features:', 108)\n", 326 | "('Union:', 0.98567980810413758)\n", 327 | "('Number of union features:', 120)\n", 328 | "('Union:', 0.98418233778551067)\n", 329 | "('Number of union features:', 129)\n", 330 | "('Union:', 0.98277810621236728)\n", 331 | "('Number of union features:', 139)\n", 332 | "('Union:', 0.98193567247400682)\n", 333 | "('Number of union features:', 151)\n", 334 | "('Union:', 0.98184238987239447)\n", 335 | "('Number of union features:', 162)\n", 336 | "('Union:', 0.98109350142156404)\n", 337 | "('Number of union features:', 173)\n", 338 | "('Union:', 0.98090641072354268)\n", 339 | "('Number of union features:', 182)\n", 340 | "('Union:', 0.9806255118471382)\n", 341 | "('Number of union features:', 192)\n", 342 | "('Union:', 0.97968944506800215)\n", 343 | "('Number of union features:', 202)\n", 344 | "('Union:', 0.97997030012926456)\n", 345 | "('Number of union features:', 214)\n", 346 | "('Union:', 0.97940863386286914)\n", 347 | "('Number of union features:', 224)\n", 348 | "('Union:', 0.9790342334321025)\n", 349 | "('Number of union features:', 234)\n", 350 | "('Union:', 0.97837897798106077)\n", 351 | "('Number of union features:', 242)\n", 352 | "('Union:', 0.97809829809839288)\n", 353 | "('Number of union features:', 255)\n", 354 | "('Union:', 0.97837889035077619)\n", 355 | "('Number of union features:', 265)\n", 356 | "('Union:', 0.97791085700219527)\n", 357 | "('Number of union features:', 269)\n", 358 | "('Union:', 0.9780979915153587)\n", 359 | "('Number of union features:', 275)\n", 360 | "('Union:', 0.9780979915153587)\n", 361 | "('Number of union features:', 280)\n", 362 | "('Union:', 0.9776299143516356)\n", 363 | "('Number of union features:', 289)\n", 364 | "('Union:', 0.97781744303713047)\n", 365 | "('Number of union features:', 297)\n", 366 | "('Union:', 0.97706864221658452)\n", 367 | "('Number of union features:', 304)\n", 368 | "('Union:', 0.97716231894954109)\n", 369 | "('Number of union features:', 309)\n", 370 | "('Union:', 0.97678783088849008)\n", 371 | "('Number of union features:', 318)\n", 372 | "('Union:', 0.97660078396462391)\n", 373 | "('Number of union features:', 322)\n", 374 | "('Union:', 0.97669424174483088)\n", 375 | "('Number of union features:', 325)\n", 376 | "('Union:', 0.97688159525173113)\n", 377 | "('Number of union features:', 331)\n", 378 | "('Union:', 0.97697509680609307)\n", 379 | "('Number of union features:', 340)\n", 380 | "('Union:', 0.97631975376575397)\n", 381 | "('Number of union features:', 347)\n", 382 | "('Union:', 0.97660069641631364)\n", 383 | "('Number of union features:', 356)\n", 384 | "('Union:', 0.97678804988222689)\n", 385 | "('Number of union features:', 369)\n", 386 | "('Union:', 0.97678796229292963)\n", 387 | "('Number of union features:', 375)\n", 388 | "('Union:', 0.97632001653364586)\n", 389 | "('Number of union features:', 386)\n", 390 | "('Union:', 0.97697522825151961)\n", 391 | "('Number of union features:', 393)\n", 392 | "('Union:', 0.97688150766243387)\n", 393 | "('Number of union features:', 400)\n", 394 | "('Union:', 0.97716231890855398)\n", 395 | "('Number of union features:', 407)\n", 396 | "('Union:', 0.97706877353904975)\n", 397 | "('Number of union features:', 410)\n", 398 | "('Union:', 0.97706872976489456)\n", 399 | "('Number of union features:', 419)\n", 400 | "('Union:', 0.97678796229292963)\n", 401 | "('Number of union features:', 422)\n", 402 | "('Union:', 0.97697509680609307)\n", 403 | "('Number of union features:', 433)\n", 404 | "('Union:', 0.9766944169644125)\n", 405 | "('Number of union features:', 442)\n", 406 | "('Union:', 0.976787787114335)\n", 407 | "('Number of union features:', 446)\n", 408 | "('Union:', 0.97660060882701638)\n", 409 | "('Number of union features:', 451)\n", 410 | "('Union:', 0.97669415419652061)\n", 411 | "('Number of union features:', 457)\n", 412 | "('Union:', 0.97669411042236542)\n", 413 | "('Number of union features:', 460)\n", 414 | "('Union:', 0.97669406660722335)\n", 415 | "('Number of union features:', 467)\n", 416 | "('Union:', 0.97678761201771458)\n", 417 | "('Number of union features:', 475)\n", 418 | "('Union:', 0.9765069320940597)\n", 419 | "('Number of union features:', 482)\n", 420 | "('Union:', 0.97669402287405516)\n", 421 | "('Number of union features:', 487)\n", 422 | "('Union:', 0.97660052127870611)\n", 423 | "('Number of union features:', 496)\n", 424 | "('Union:', 0.97622607711477138)\n", 425 | "('Number of union features:', 506)\n", 426 | "('Union:', 0.97613248793012508)\n", 427 | "('Number of union features:', 517)\n", 428 | "('Union:', 0.9762260770737845)\n", 429 | "('Number of union features:', 530)\n", 430 | "('Union:', 0.97575804372520347)\n", 431 | "('Number of union features:', 539)\n", 432 | "('Union:', 0.97538381851401856)\n", 433 | "('Number of union features:', 550)\n", 434 | "('Union:', 0.97547740761669088)\n", 435 | "('Number of union features:', 559)\n", 436 | "('Union:', 0.97547745143183295)\n", 437 | "('Number of union features:', 572)\n", 438 | "('Union:', 0.97547740761669088)\n", 439 | "('Number of union features:', 581)\n", 440 | "('Union:', 0.97538377465788939)\n", 441 | "('Number of union features:', 595)\n", 442 | "('Union:', 0.97547740765767776)\n", 443 | "('Number of union features:', 605)\n", 444 | "('Union:', 0.97529014174007478)\n", 445 | "('Number of union features:', 620)\n", 446 | "('Union:', 0.97538377465788939)\n", 447 | "('Number of union features:', 630)\n", 448 | "('Union:', 0.97491591648790299)\n", 449 | "('Number of union features:', 638)\n", 450 | "('Union:', 0.97519681536430747)\n", 451 | "('Number of union features:', 647)\n", 452 | "('Union:', 0.97529031691866952)\n", 453 | "('Number of union features:', 656)\n", 454 | "('Union:', 0.97538390606232883)\n", 455 | "('Number of union features:', 666)\n", 456 | "('Union:', 0.97510318240550597)\n", 457 | "('Number of union features:', 677)\n", 458 | "('Union:', 0.97482241497452793)\n", 459 | "('Number of union features:', 692)\n", 460 | "('Union:', 0.97510300726789834)\n", 461 | "('Number of union features:', 705)\n", 462 | "('Union:', 0.97482232738523056)\n", 463 | "('Number of union features:', 713)\n", 464 | "('Union:', 0.97472873824157136)\n", 465 | "('Number of union features:', 724)\n", 466 | "('Union:', 0.97444788318030895)\n", 467 | "('Number of union features:', 738)\n", 468 | "('Union:', 0.97444792699545124)\n" 469 | ] 470 | } 471 | ], 472 | "source": [ 473 | "scores_union = []\n", 474 | "for i in xrange(1,100):\n", 475 | " keke = unionDFSfeatures(i)\n", 476 | " scores_union.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))\n", 477 | " print(\"Union:\", scores_union[-1])" 478 | ] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "execution_count": 29, 483 | "metadata": { 484 | "collapsed": false 485 | }, 486 | "outputs": [ 487 | { 488 | "name": "stdout", 489 | "output_type": "stream", 490 | "text": [ 491 | "('Number of union features:', 23)\n" 492 | ] 493 | } 494 | ], 495 | "source": [ 496 | "_, union = unionDFSfeatures(6)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 66, 502 | "metadata": { 503 | "collapsed": false 504 | }, 505 | "outputs": [ 506 | { 507 | "name": "stdout", 508 | "output_type": "stream", 509 | "text": [ 510 | "10\n", 511 | "20\n", 512 | "30\n", 513 | "40\n", 514 | "50\n", 515 | "60\n", 516 | "70\n", 517 | "80\n", 518 | "90\n", 519 | "100\n", 520 | "110\n", 521 | "120\n", 522 | "130\n", 523 | "140\n", 524 | "150\n", 525 | "160\n", 526 | "170\n", 527 | "180\n", 528 | "190\n", 529 | "200\n", 530 | "210\n", 531 | "220\n", 532 | "230\n", 533 | "240\n", 534 | "250\n", 535 | "260\n", 536 | "270\n", 537 | "280\n", 538 | "290\n", 539 | "300\n", 540 | "310\n", 541 | "320\n", 542 | "330\n", 543 | "340\n", 544 | "350\n", 545 | "360\n", 546 | "370\n", 547 | "380\n", 548 | "390\n", 549 | "400\n", 550 | "410\n", 551 | "420\n", 552 | "430\n", 553 | "440\n", 554 | "450\n", 555 | "460\n", 556 | "470\n", 557 | "480\n", 558 | "490\n" 559 | ] 560 | } 561 | ], 562 | "source": [ 563 | "ave, xg = 0, 0\n", 564 | "num = []\n", 565 | "acc = []\n", 566 | "for i in xrange(1,500):\n", 567 | " if i % 10 == 0:\n", 568 | " print i\n", 569 | " _, ave = topAveDFSfeatures(0,i)\n", 570 | " _, xg = topXGBoostfeatures(0,i)\n", 571 | " temp = np.intersect1d(ave,xg)\n", 572 | " if temp.shape[0] > 0:\n", 573 | " if len(num) == 0 or num[-1] != temp.shape[0]:\n", 574 | " num.append(temp.shape[0])\n", 575 | " keke = inputX[:,temp.tolist()]\n", 576 | " acc.append(np.mean(cross_val_score(svm, keke, inputY, cv=5)))" 577 | ] 578 | }, 579 | { 580 | "cell_type": "code", 581 | "execution_count": 100, 582 | "metadata": { 583 | "collapsed": false 584 | }, 585 | "outputs": [ 586 | { 587 | "name": "stdout", 588 | "output_type": "stream", 589 | "text": [ 590 | "('Number of union features:', 102)\n", 591 | "(17,)\n" 592 | ] 593 | }, 594 | { 595 | "data": { 596 | "text/plain": [ 597 | "0.98034465678587579" 598 | ] 599 | }, 600 | "execution_count": 100, 601 | "metadata": {}, 602 | "output_type": "execute_result" 603 | } 604 | ], 605 | "source": [ 606 | "_, ave = topAveDFSfeatures(0,102)\n", 607 | "_, xg = topXGBoostfeatures(0,102)\n", 608 | "_, union = unionDFSfeatures(27)\n", 609 | "temp = np.intersect1d(np.intersect1d(ave,union),xg)\n", 610 | "print temp.shape\n", 611 | "\n", 612 | "keke = inputX[:,temp.tolist()]\n", 613 | "np.mean(cross_val_score(svm, keke, inputY, cv=5))" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 78, 619 | "metadata": { 620 | "collapsed": false 621 | }, 622 | "outputs": [ 623 | { 624 | "data": { 625 | "text/plain": [ 626 | "[1385,\n", 627 | " 1373,\n", 628 | " 1378,\n", 629 | " 1377,\n", 630 | " 4885,\n", 631 | " 1365,\n", 632 | " 4886,\n", 633 | " 1376,\n", 634 | " 4883,\n", 635 | " 4888,\n", 636 | " 4884,\n", 637 | " 1367,\n", 638 | " 1379,\n", 639 | " 4882,\n", 640 | " 1370,\n", 641 | " 1372,\n", 642 | " 1375,\n", 643 | " 4880,\n", 644 | " 1374,\n", 645 | " 1380,\n", 646 | " 4877,\n", 647 | " 4879,\n", 648 | " 4881,\n", 649 | " 1369,\n", 650 | " 2,\n", 651 | " 7025,\n", 652 | " 4887,\n", 653 | " 7024,\n", 654 | " 0,\n", 655 | " 651,\n", 656 | " 649,\n", 657 | " 4874,\n", 658 | " 6431,\n", 659 | " 4889,\n", 660 | " 3004,\n", 661 | " 2511,\n", 662 | " 1340,\n", 663 | " 4891,\n", 664 | " 3908,\n", 665 | " 3859,\n", 666 | " 4835,\n", 667 | " 4660,\n", 668 | " 4830,\n", 669 | " 1336,\n", 670 | " 4876,\n", 671 | " 3,\n", 672 | " 6197,\n", 673 | " 4926,\n", 674 | " 6281,\n", 675 | " 1726,\n", 676 | " 3907,\n", 677 | " 6245,\n", 678 | " 2510,\n", 679 | " 6196,\n", 680 | " 1368,\n", 681 | " 1371,\n", 682 | " 6469,\n", 683 | " 4822,\n", 684 | " 4454,\n", 685 | " 6411,\n", 686 | " 5203,\n", 687 | " 6183,\n", 688 | " 6287,\n", 689 | " 1,\n", 690 | " 4665,\n", 691 | " 1773,\n", 692 | " 4725,\n", 693 | " 1153,\n", 694 | " 7195,\n", 695 | " 5628,\n", 696 | " 4578,\n", 697 | " 6468,\n", 698 | " 1727,\n", 699 | " 4571,\n", 700 | " 784,\n", 701 | " 1500,\n", 702 | " 4572,\n", 703 | " 6283,\n", 704 | " 4823,\n", 705 | " 3272,\n", 706 | " 3455,\n", 707 | " 4577,\n", 708 | " 1359,\n", 709 | " 1228,\n", 710 | " 750,\n", 711 | " 5200,\n", 712 | " 5627,\n", 713 | " 2244,\n", 714 | " 1098,\n", 715 | " 5692,\n", 716 | " 4645,\n", 717 | " 1501,\n", 718 | " 4722,\n", 719 | " 5695,\n", 720 | " 786,\n", 721 | " 3962,\n", 722 | " 5813,\n", 723 | " 1154,\n", 724 | " 1672,\n", 725 | " 4798]" 726 | ] 727 | }, 728 | "execution_count": 78, 729 | "metadata": {}, 730 | "output_type": "execute_result" 731 | } 732 | ], 733 | "source": [ 734 | "ave" 735 | ] 736 | }, 737 | { 738 | "cell_type": "code", 739 | "execution_count": 106, 740 | "metadata": { 741 | "collapsed": false 742 | }, 743 | "outputs": [], 744 | "source": [ 745 | "[str(i[0]) for i in columnNames[0][temp]]\n", 746 | "\n", 747 | "import pandas as pd\n", 748 | "\n", 749 | "dictionary = pd.read_csv(\"/Users/xupeng.tong/Documents/Data/OriginalData/BinaryTraitMatrix_V2_F50K_DD_20160523.csv\")" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 116, 755 | "metadata": { 756 | "collapsed": false 757 | }, 758 | "outputs": [ 759 | { 760 | "name": "stdout", 761 | "output_type": "stream", 762 | "text": [ 763 | "('Number of union features:', 23)\n" 764 | ] 765 | }, 766 | { 767 | "data": { 768 | "text/plain": [ 769 | "[432,\n", 770 | " 1365,\n", 771 | " 1367,\n", 772 | " 1370,\n", 773 | " 1373,\n", 774 | " 1374,\n", 775 | " 1375,\n", 776 | " 1376,\n", 777 | " 1377,\n", 778 | " 1378,\n", 779 | " 1379,\n", 780 | " 1380,\n", 781 | " 1385,\n", 782 | " 1501,\n", 783 | " 3859,\n", 784 | " 4571,\n", 785 | " 4880,\n", 786 | " 4882,\n", 787 | " 4883,\n", 788 | " 4884,\n", 789 | " 4885,\n", 790 | " 4886,\n", 791 | " 4888]" 792 | ] 793 | }, 794 | "execution_count": 116, 795 | "metadata": {}, 796 | "output_type": "execute_result" 797 | } 798 | ], 799 | "source": [ 800 | "_, union = unionDFSfeatures(6)\n", 801 | "\n", 802 | "union" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 110, 808 | "metadata": { 809 | "collapsed": true 810 | }, 811 | "outputs": [], 812 | "source": [ 813 | "codemapping = {i:j for i, j in zip(dictionary[\"FIELD_NAME\"].values, dictionary[\"FIELD_DESCRIPTION\"].values)}" 814 | ] 815 | }, 816 | { 817 | "cell_type": "code", 818 | "execution_count": 122, 819 | "metadata": { 820 | "collapsed": false 821 | }, 822 | "outputs": [ 823 | { 824 | "name": "stdout", 825 | "output_type": "stream", 826 | "text": [ 827 | "Age at time of last encounter with the health system\n", 828 | "Derived categorical smoking status\n", 829 | "ICD9 3D: Nondependent abuse of drugs\n", 830 | "ICD9 4D: Tobacco use disorder\n", 831 | "ICD9 3D: Chronic bronchitis\n", 832 | "ICD9 4D: Obstructive Chronic Bronchitis\n", 833 | "ICD9 3D: Asthma\n", 834 | "ICD9 4D: Chronic Obstructive Asthma\n", 835 | "ICD9 4D: Asthma, Unspecified\n", 836 | "ICD9 3D: Bronchiectasis\n", 837 | "ICD9 3D: Chronic airway obstruction, not elsewhere classified\n", 838 | "ICD10 3D: Emphysema\n", 839 | "ICD10 4D: Emphysema, unspecified\n", 840 | "ICD10 3D: Asthma\n", 841 | "ICD10 4D: Other and unspecified asthma\n", 842 | "ICD10 4D: Dyspnea\n", 843 | "ICD10 3D: Problems related to lifestyle\n" 844 | ] 845 | } 846 | ], 847 | "source": [ 848 | "the23 = [codemapping[str(i[0])] for i in columnNames[0][temp]]\n", 849 | "\n", 850 | "for i in the23:\n", 851 | " print i" 852 | ] 853 | }, 854 | { 855 | "cell_type": "code", 856 | "execution_count": null, 857 | "metadata": { 858 | "collapsed": true 859 | }, 860 | "outputs": [], 861 | "source": [] 862 | } 863 | ], 864 | "metadata": { 865 | "kernelspec": { 866 | "display_name": "Python 2", 867 | "language": "python", 868 | "name": "python2" 869 | }, 870 | "language_info": { 871 | "codemirror_mode": { 872 | "name": "ipython", 873 | "version": 2 874 | }, 875 | "file_extension": ".py", 876 | "mimetype": "text/x-python", 877 | "name": "python", 878 | "nbconvert_exporter": "python", 879 | "pygments_lexer": "ipython2", 880 | "version": "2.7.12" 881 | } 882 | }, 883 | "nbformat": 4, 884 | "nbformat_minor": 0 885 | } 886 | -------------------------------------------------------------------------------- /MakeNewData.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 31, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from scipy import io as sio\n", 12 | "\n", 13 | "# Load the data\n", 14 | "ourdata = sio.loadmat(\"./data/B_3labels_mean_scaled.mat\")\n", 15 | "columnNames = ourdata['columnNames']" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": { 22 | "collapsed": true 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "# ICD-9 codes mapping\n", 27 | "import pandas as pd\n", 28 | "\n", 29 | "dictionary = pd.read_csv(\"./data/BinaryTraitMatrix_V2.2_F60K_DD_20160722.csv\")\n", 30 | "codemapping = {i:j for i, j in zip(dictionary[\"FIELD_NAME\"].values, dictionary[\"FIELD_DESCRIPTION\"].values)}\n", 31 | "\n", 32 | "def getFeatureNames(indexes):\n", 33 | " return [codemapping[str(i).rstrip()] for i in columnNames[indexes]]" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "## A list of features should be eliminated" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": 28, 46 | "metadata": { 47 | "collapsed": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "a = [i[0] for i in codemapping.items() if 'Emphysema' in i[1]]\n", 52 | "b = [i[0] for i in codemapping.items() if 'emphysema' in i[1]]\n", 53 | "c = [i[0] for i in codemapping.items() if 'asthma' in i[1]]\n", 54 | "d = [i[0] for i in codemapping.items() if 'Asthma' in i[1]]\n", 55 | "e = [i[0] for i in codemapping.items() if 'Chronic bronchitis' in i[1]]\n", 56 | "f = [i[0] for i in codemapping.items() if 'chronic bronchitis' in i[1]]\n", 57 | "g = [i[0] for i in codemapping.items() if 'Chronic Obstructive' in i[1]]\n", 58 | "h = [i[0] for i in codemapping.items() if 'chronic obstructive' in i[1]]\n", 59 | "j = [i[0] for i in codemapping.items() if 'smoking' in i[1]]\n", 60 | "k = [i[0] for i in codemapping.items() if 'Gender' == i[1]]\n", 61 | "u = [i[0] for i in codemapping.items() if 'Age at time of last encounter with the health system' == i[1]]\n", 62 | "s = [i[0] for i in codemapping.items() if 'Indicator if patient is alive or deceased' == i[1]]\n", 63 | "# w = [i for i in codemapping.items() if 'bronc' in i[1]]\n", 64 | "i = [i[0] for i in codemapping.items() if 'Tobacco' in i[1]]\n", 65 | "\n", 66 | "# Should add more" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 26, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "icd9_eliminated = a+b+c+d+e+f+g+h+i+j+k+u+s" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 22, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "index_eliminated = [i for i, j in enumerate(columnNames) if str(j.rstrip()) in icd9_eliminated]\n", 89 | "index_keep = [i for i in xrange(len(columnNames)) if i not in index_eliminated]" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "## New column names is generated with index_keep" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 8, 102 | "metadata": { 103 | "collapsed": false 104 | }, 105 | "outputs": [], 106 | "source": [ 107 | "newColumnNames = ourdata['columnNames'][index_keep]" 108 | ] 109 | }, 110 | { 111 | "cell_type": "markdown", 112 | "metadata": {}, 113 | "source": [ 114 | "## Create New data that does not contain the features eliminated above" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": 87, 120 | "metadata": { 121 | "collapsed": false 122 | }, 123 | "outputs": [], 124 | "source": [ 125 | "# Change the file names and generate new files\n", 126 | "\n", 127 | "# Load old data with original columns\n", 128 | "ourdata = sio.loadmat(\"./data/B_AsthmaAcos_mean_scaled.mat\")\n", 129 | "\n", 130 | "ourdata['X'] = ourdata['X'][:,index_keep]\n", 131 | "ourdata['columnNames'] = newColumnNames\n", 132 | "\n", 133 | "# Save new Data with new columns (name end with number of new columns)\n", 134 | "sio.savemat(\"./data/B_AsthmaAcos_mean_scaled_\"+str(len(newColumnNames))\".mat\", ourdata)" 135 | ] 136 | } 137 | ], 138 | "metadata": { 139 | "kernelspec": { 140 | "display_name": "Python 2", 141 | "language": "python", 142 | "name": "python2" 143 | }, 144 | "language_info": { 145 | "codemirror_mode": { 146 | "name": "ipython", 147 | "version": 2 148 | }, 149 | "file_extension": ".py", 150 | "mimetype": "text/x-python", 151 | "name": "python", 152 | "nbconvert_exporter": "python", 153 | "pygments_lexer": "ipython2", 154 | "version": "2.7.12" 155 | } 156 | }, 157 | "nbformat": 4, 158 | "nbformat_minor": 0 159 | } 160 | -------------------------------------------------------------------------------- /Note-Part1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "collapsed": false 7 | }, 8 | "source": [ 9 | "# Read The Data" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 1, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from supporting_files.dfs2 import DeepFeatureSelectionNew\n", 21 | "from sklearn.cross_validation import train_test_split\n", 22 | "from sklearn import datasets\n", 23 | "from scipy import io as sio\n", 24 | "from tensorflow.python.framework import ops\n", 25 | "import numpy as np\n", 26 | "from sklearn.datasets import make_classification\n", 27 | "from sklearn.preprocessing import normalize\n", 28 | "\n", 29 | "ourdata = sio.loadmat(\"./data/B_AsthmaCOPD_mean_scaled_7159.mat\")\n", 30 | "\n", 31 | "inputX = ourdata['X']\n", 32 | "inputY = ourdata['Y'][0,:]\n", 33 | "\n", 34 | "columnNames = ourdata['columnNames']" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "# Run the Deep Feature Selection\n", 42 | "## Changing lambda1 slightly" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": false, 50 | "scrolled": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "# Reset the graph\n", 55 | "ops.reset_default_graph()\n", 56 | "\n", 57 | "weights_tuning_lamda1 = []\n", 58 | "for lambda1 in xrange(0, 10, 1):\n", 59 | " # Should be modified for different datasets, similar things should be done for alpha1\n", 60 | " lambda1 /= 10000.\n", 61 | " dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[50], learning_rate=0.01, \\\n", 62 | " lambda1=0.0001, lambda2=1, alpha1=0.00001, alpha2=0, activation='tanh', \\\n", 63 | " weight_init='uniform',epochs=20, optimizer='Adam', print_step=1)\n", 64 | " dfsMLP.train(batch_size=2000)\n", 65 | " print(\"Train finised for lambda1:\" + str(lambda1))\n", 66 | " weights_tuning_lamda1.append(dfsMLP.selected_ws[0])" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "# Run different random states in order to select features given selected set of parameters chosen above " 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "weights_randomstates = []\n", 85 | "\n", 86 | "for random_state in xrange(20):\n", 87 | " # Resplit the data\n", 88 | " X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state)\n", 89 | " \n", 90 | " # Change number of epochs to control the training time\n", 91 | " dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[30], learning_rate=0.01, \\\n", 92 | " lambda1=0.0001, lambda2=1, alpha1=0.0001, alpha2=0, activation='tanh', \\\n", 93 | " weight_init='uniform',epochs=50, optimizer='Adam', print_step=10)\n", 94 | " dfsMLP.train(batch_size=2000)\n", 95 | " print(\"Train finised for random state:\" + str(random_state))\n", 96 | " weights_randomstates.append(dfsMLP.selected_ws[0])\n", 97 | "\n", 98 | "# The generated weights will be in the weights folder\n", 99 | "np.save(\"./weights/weights_randomstates\", weights_randomstates)" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "metadata": {}, 105 | "source": [ 106 | "# The below code is for single model testing / parameter discovering" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false, 114 | "scrolled": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=2)\n", 119 | "\n", 120 | "dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.012, \\\n", 121 | " lambda1=0.002, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \\\n", 122 | " weight_init='uniform',epochs=200, optimizer='Adam', print_step=1)\n", 123 | "dfsMLP.train(batch_size=2000)\n", 124 | "\n", 125 | "# More layers might cause overfitting problems, but certainly change the alpha1 and lambda1 accordingly would \n", 126 | "# set the problem" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": null, 132 | "metadata": { 133 | "collapsed": true 134 | }, 135 | "outputs": [], 136 | "source": [ 137 | "ourdata = sio.loadmat(\"./data/B_COPDAcos_mean_scaled_7169.mat\")\n", 138 | "inputX = ourdata['X']\n", 139 | "inputY = ourdata['Y'][0,:]\n", 140 | "columnNames = ourdata['columnNames']\n", 141 | "\n", 142 | "index_Acos = np.where(inputY==0)[0]\n", 143 | "index_COPD = np.where(inputY==1)[0]" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "ourdata = sio.loadmat(\"./data/B_COPDAcos_mean_scaled_7169.mat\")\n", 155 | "inputX = ourdata['X']\n", 156 | "inputY = ourdata['Y'][0,:]\n", 157 | "columnNames = ourdata['columnNames']\n", 158 | "\n", 159 | "index_Acos = np.where(inputY==0)[0]\n", 160 | "index_COPD = np.where(inputY==1)[0]" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "weights = []\n", 172 | "for i in xrange(1):\n", 173 | " # made random choice of asthma patients\n", 174 | " choice = np.random.choice(a=len(index_COPD), size=len(index_Acos))\n", 175 | " index_COPD_chosen = index_Asthma[choice]\n", 176 | "\n", 177 | " # Concatenate the indexes for Asthma and Acos patients\n", 178 | " indexes = np.array(index_Acos.tolist()+index_COPD_chosen.tolist())\n", 179 | " # Shuffle the indexes\n", 180 | " np.random.shuffle(indexes)\n", 181 | " indexes = indexes.tolist()\n", 182 | "\n", 183 | " # inputX and inputY for this round\n", 184 | " inputX_ = inputX[indexes,:]\n", 185 | " inputY_ = inputY[indexes]\n", 186 | " \n", 187 | " X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2)\n", 188 | " \n", 189 | " # Change number of epochs to control the training time\n", 190 | " dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[10], learning_rate=0.01, \\\n", 191 | " lambda1=0.01, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \\\n", 192 | " weight_init='uniform',epochs=30, optimizer='Adam', print_step=1)\n", 193 | " dfsMLP.train(batch_size=500)\n", 194 | " print(\"Train finised for random state:\" + str(random_state))\n", 195 | " weights.append(dfsMLP.selected_ws[0])" 196 | ] 197 | }, 198 | { 199 | "cell_type": "markdown", 200 | "metadata": {}, 201 | "source": [ 202 | "# Run XGBoost Model" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": null, 208 | "metadata": { 209 | "collapsed": false 210 | }, 211 | "outputs": [], 212 | "source": [ 213 | "import scipy.io as sio\n", 214 | "from sklearn.ensemble import RandomForestClassifier\n", 215 | "from sklearn.metrics import accuracy_score\n", 216 | "import xgboost as xgb\n", 217 | "import numpy as np\n", 218 | "\n", 219 | "# COPD Acos\n", 220 | "ourdata = sio.loadmat(\"./data/B_COPDAcos_mean_scaled_7159.mat\")\n", 221 | "inputX = ourdata['X']\n", 222 | "inputY = ourdata['Y'][0,:]\n", 223 | "\n", 224 | "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)\n", 225 | "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n", 226 | "\n", 227 | "np.save(\"./weights/indexes_xgboost_rerun_All_CAc\",indexes_xgboost)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "collapsed": true 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "ourdata = sio.loadmat(\"./data/B_AsthmaCOPD_mean_scaled_7159.mat\")\n", 239 | "inputX = ourdata['X']\n", 240 | "inputY = ourdata['Y'][0,:]\n", 241 | "\n", 242 | "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)\n", 243 | "# y_pred = gbm.predict(X_test)\n", 244 | "\n", 245 | "# featurescores = gbm.feature_importances_\n", 246 | "\n", 247 | "# print(accuracy_score(y_test, y_pred))\n", 248 | "\n", 249 | "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n", 250 | "np.save(\"./weights/indexes_xgboost_rerun_All_AsC\",indexes_xgboost)" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": true 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "# Asthma Acos\n", 262 | "ourdata = sio.loadmat(\"./data/B_AsthmaAcos_mean_scaled_7159.mat\")\n", 263 | "inputX = ourdata['X']\n", 264 | "inputY = ourdata['Y'][0,:]\n", 265 | "\n", 266 | "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(inputX, inputY)\n", 267 | "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n", 268 | "\n", 269 | "np.save(\"./weights/indexes_xgboost_rerun_All_AsAc\",indexes_xgboost)" 270 | ] 271 | } 272 | ], 273 | "metadata": { 274 | "kernelspec": { 275 | "display_name": "Python 2", 276 | "language": "python", 277 | "name": "python2" 278 | }, 279 | "language_info": { 280 | "codemirror_mode": { 281 | "name": "ipython", 282 | "version": 2 283 | }, 284 | "file_extension": ".py", 285 | "mimetype": "text/x-python", 286 | "name": "python", 287 | "nbconvert_exporter": "python", 288 | "pygments_lexer": "ipython2", 289 | "version": "2.7.12" 290 | } 291 | }, 292 | "nbformat": 4, 293 | "nbformat_minor": 0 294 | } 295 | -------------------------------------------------------------------------------- /Preprocess - Should be ran after labeled csv file has been generated.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from scipy import io as sio\n", 12 | "import pandas as pd\n", 13 | "import numpy as np\n", 14 | "\n", 15 | "# Process Q and B data separately\n", 16 | "\n", 17 | "filePath = \"./data/QMatrix_label.csv\"\n", 18 | "if 'B' in filePath:\n", 19 | " dataType = 'B'\n", 20 | "else:\n", 21 | " dataType = 'Q'\n", 22 | "\n", 23 | "impute_strategy = 'mean'\n", 24 | " \n", 25 | "df = pd.read_csv(filePath)" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": false 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# See what does the data looks like\n", 37 | "df" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "# Read the labels\n", 49 | "labels = df.ix[:,1].values" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "# Convert alphabet subgroups to numbers\n", 61 | "mappingSub = {j:i for i,j in enumerate(np.unique(df['Smoking_Sub_Group']))}\n", 62 | "numericSub = np.array([mappingSub[i] for i in df['Smoking_Sub_Group']])\n", 63 | "df['Smoking_Sub_Group'] = numericSub" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "# Convert string labels to numbers\n", 75 | "mappingLabels = {j:i for i,j in enumerate(np.unique(labels))}\n", 76 | "numericLabels = np.array([mappingLabels[i] for i in labels])" 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "execution_count": null, 82 | "metadata": { 83 | "collapsed": false 84 | }, 85 | "outputs": [], 86 | "source": [ 87 | "# Drop first two columns\n", 88 | "df.drop(df.columns[[0,1]], axis=1, inplace=True)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "# Data to be imputed\n", 100 | "dataToBeImputed = df.values\n", 101 | "\n", 102 | "# Get column names\n", 103 | "columnNames = df.columns.values.astype('U')\n", 104 | "# columnNames = [i for i in columnNames]" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "## Get the info from the data, for each feature and each label, calculate their NA rate and store the non-NA values for further analysis, like box-plot" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "info = {}\n", 123 | "for column in xrange(len(columnNames)):\n", 124 | " if column % 100 == 0:\n", 125 | " print column\n", 126 | " info[columnNames[column]] = {}\n", 127 | " for label in xrange(3):\n", 128 | " info[columnNames[column]][label] = {}\n", 129 | " indexes = list(np.where(numericLabels==label)[0])\n", 130 | " temp = df.ix[indexes,column]\n", 131 | " info[columnNames[column]][label]['NA rate'] = temp.isnull().values.sum()*1.0/len(indexes)\n", 132 | " info[columnNames[column]][label]['non-NA data'] = temp[temp.notnull().values].values\n", 133 | "\n", 134 | "info['labelMap'] = {0:'Acos',1:'Asthma',2:'COPD'}" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "np.save('./data/Info_' + dataType + '.npy', info)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "metadata": {}, 151 | "source": [ 152 | "## Data Imputation with simply mean/median, advanced methods will be attached as well" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "metadata": { 159 | "collapsed": false 160 | }, 161 | "outputs": [], 162 | "source": [ 163 | "from sklearn.preprocessing import Imputer\n", 164 | "\n", 165 | "imp = Imputer(missing_values='NaN', strategy=impute_strategy, axis=0)\n", 166 | "imp.fit(dataToBeImputed)\n", 167 | "imputedData = imp.transform(dataToBeImputed)" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "## Scale the data from 0 to 1" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": { 181 | "collapsed": false 182 | }, 183 | "outputs": [], 184 | "source": [ 185 | "from sklearn.preprocessing import MinMaxScaler\n", 186 | "\n", 187 | "mm = MinMaxScaler(feature_range=(0, 1))\n", 188 | "scaledData = mm.fit_transform(imputedData)" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": null, 194 | "metadata": { 195 | "collapsed": false 196 | }, 197 | "outputs": [], 198 | "source": [ 199 | "# Read the patient list generated from the last file\n", 200 | "\n", 201 | "patientList = np.load(\"./data/patientList\"+dataType+\".npy\")" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "metadata": { 208 | "collapsed": false 209 | }, 210 | "outputs": [], 211 | "source": [ 212 | "# Save the file\n", 213 | "sio.savemat(\"./data/\" + dataType + \"_3labels_mean_scaled.mat\", \\\n", 214 | " {'X':scaledData,'Y':numericLabels,'patients':patientList, 'columnNames':columnNames})" 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": { 221 | "collapsed": false 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "labels2_list = ['AsthmaCOPD','AcosCOPD','AcosAsthma']\n", 226 | "for i in xrange(len(labels2_list)):\n", 227 | " # Create 2-classes patients list\n", 228 | " indexes = np.where(numericLabels!=i)\n", 229 | " Y = numericLabels[indexes]\n", 230 | " if i == 0:\n", 231 | " Y = Y-1\n", 232 | " elif i == 1:\n", 233 | " Y = np.array([j if j == 0 else 1 for j in Y])\n", 234 | " X, p = scaledData[indexes,:], patientList[indexes]\n", 235 | " \n", 236 | " sio.savemat(\"./data/\" + dataType + \"_\" + labels2_list[i] + \"_mean_scaled.mat\", \\\n", 237 | " {'X':X, 'Y':Y, 'patients':p, 'columnNames':columnNames})" 238 | ] 239 | } 240 | ], 241 | "metadata": { 242 | "kernelspec": { 243 | "display_name": "Python 2", 244 | "language": "python", 245 | "name": "python2" 246 | }, 247 | "language_info": { 248 | "codemirror_mode": { 249 | "name": "ipython", 250 | "version": 2 251 | }, 252 | "file_extension": ".py", 253 | "mimetype": "text/x-python", 254 | "name": "python", 255 | "nbconvert_exporter": "python", 256 | "pygments_lexer": "ipython2", 257 | "version": "2.7.12" 258 | } 259 | }, 260 | "nbformat": 4, 261 | "nbformat_minor": 0 262 | } 263 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DeepFeatureSelection--Tensorflow 2 | Deep Feature Selection Framework implemented in Tensorflow based on the paper from 3 | http://link.springer.com/chapter/10.1007%2F978-3-319-16706-0_20 4 | -------------------------------------------------------------------------------- /TFlearnVersion.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn import datasets\n", 12 | "from sklearn.cross_validation import train_test_split\n", 13 | "from scipy import io as sio\n", 14 | "from tensorflow.python.framework import ops\n", 15 | "import numpy as np\n", 16 | "from sklearn.datasets import make_classification\n", 17 | "from sklearn.preprocessing import normalize\n", 18 | "import tflearn\n", 19 | "import tensorflow as tf\n", 20 | "from nncomponents import One2OneInputLayer\n", 21 | "\n", 22 | "ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n", 23 | "# ourdataB = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n", 24 | "\n", 25 | "inputX = ourdataB['X']\n", 26 | "inputX = normalize(inputX, axis=0)\n", 27 | "inputY = ourdataB['Y'][0,:]\n", 28 | "columnNames = ourdataB['columnNames']\n", 29 | "\n", 30 | "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)\n", 31 | "Y_train, Y_test = tflearn.data_utils.to_categorical(y_train, 2), tflearn.data_utils.to_categorical(y_test, 2)\n", 32 | "\n", 33 | "indexes = sio.loadmat(\"xgboost_result\")['importance_rank']\n", 34 | "\n", 35 | "X_train500, X_test500 = X_train[:, indexes.tolist()[0][:500]], X_test[:, indexes.tolist()[0][:500]]\n", 36 | "X_train100, X_test100 = X_train[:, indexes.tolist()[0][:100]], X_test[:, indexes.tolist()[0][:100]]\n", 37 | "X_train10, X_test10 = X_train[:, indexes.tolist()[0][:10]], X_test[:, indexes.tolist()[0][:10]]\n", 38 | "X_train50, X_test50 = X_train[:, indexes.tolist()[0][:50]], X_test[:, indexes.tolist()[0][:50]]" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 64, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "def dfs(lambda1, n_epoch, size=None):\n", 50 | " with tf.Graph().as_default():\n", 51 | " sess = tf.Session()\n", 52 | " \n", 53 | " if size is None:\n", 54 | " size = 7203\n", 55 | "\n", 56 | " input_data = tflearn.input_data(shape=[None, size])\n", 57 | " input_layer = One2OneInputLayer(input_data)\n", 58 | "\n", 59 | " tflearn.helpers.regularizer.add_weights_regularizer(input_layer.w, loss='L1', \\\n", 60 | " weight_decay=lambda1, add_to_collection=None)\n", 61 | "\n", 62 | " dense = tflearn.fully_connected(input_layer.output, 50, activation='tanh')\n", 63 | " \n", 64 | " sofmax = tflearn.fully_connected(dense, 2, activation='softmax')\n", 65 | " \n", 66 | " net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n", 67 | " model = tflearn.DNN(net)\n", 68 | "\n", 69 | " sess.run(tf.initialize_all_variables())\n", 70 | " \n", 71 | "# variables = tflearn.variables.get_all_trainable_variable()\n", 72 | "# for i in xrange(1,4):\n", 73 | "# sess.run(variables[i].assign(initial_values[i]))\n", 74 | " if size == 500:\n", 75 | " X_train, X_test = X_train500, X_test500\n", 76 | " elif size == 100:\n", 77 | " X_train, X_test = X_train100, X_test100\n", 78 | " elif size == 50:\n", 79 | " X_train, X_test = X_train50, X_test50\n", 80 | " elif size == 10:\n", 81 | " X_train, X_test = X_train10, X_test10\n", 82 | " \n", 83 | " model.fit(X_train, Y_train, n_epoch=n_epoch, show_metric=True, validation_set=(X_test, Y_test), batch_size=100)\n", 84 | "\n", 85 | " selected_w = sess.run(input_layer.w)\n", 86 | "\n", 87 | " return selected_w" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 67, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "Training Step: 2407 | total loss: \u001b[1m\u001b[32m0.21137\u001b[0m\u001b[0m\n", 102 | "\u001b[2K\r", 103 | "| Adam | epoch: 027 | loss: 0.21137 - acc: 0.9473 -- iter: 8500/8547\n" 104 | ] 105 | }, 106 | { 107 | "ename": "KeyboardInterrupt", 108 | "evalue": "", 109 | "output_type": "error", 110 | "traceback": [ 111 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 112 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 113 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mlambda1\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mxrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m500\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mlambda1\u001b[0m \u001b[0;34m/=\u001b[0m \u001b[0;36m10000.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0mweights\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdfs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m30\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 114 | "\u001b[0;32m\u001b[0m in \u001b[0;36mdfs\u001b[0;34m(lambda1, n_epoch, size)\u001b[0m\n\u001b[1;32m 32\u001b[0m \u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mX_train10\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test10\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 33\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 34\u001b[0;31m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_train\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mn_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mn_epoch\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_metric\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalidation_set\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 35\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 36\u001b[0m \u001b[0mselected_w\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msess\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_layer\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mw\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 115 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/models/dnn.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, X_inputs, Y_targets, n_epoch, validation_set, show_metric, batch_size, shuffle, snapshot_epoch, snapshot_step, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m 187\u001b[0m \u001b[0mdaug_dict\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdaug_dict\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 188\u001b[0m \u001b[0mexcl_trainops\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mexcl_trainops\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 189\u001b[0;31m run_id=run_id)\n\u001b[0m\u001b[1;32m 190\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 191\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 116 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, feed_dicts, n_epoch, val_feed_dicts, show_metric, snapshot_step, snapshot_epoch, shuffle_all, dprep_dict, daug_dict, excl_trainops, run_id)\u001b[0m\n\u001b[1;32m 282\u001b[0m \u001b[0msnapshot_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 283\u001b[0m \u001b[0msnapshot_step\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 284\u001b[0;31m show_metric)\n\u001b[0m\u001b[1;32m 285\u001b[0m \u001b[0mglobal_loss\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloss_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 286\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtrain_op\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macc_value\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mglobal_acc\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 117 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36m_train\u001b[0;34m(self, training_step, snapshot_epoch, snapshot_step, show_metric)\u001b[0m\n\u001b[1;32m 722\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 723\u001b[0m \u001b[0meval_ops\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 724\u001b[0;31m \u001b[0me\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mevaluate_flow\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msession\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0meval_ops\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtest_dflow\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 725\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval_loss\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 726\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mshow_metric\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmetric\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 118 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/helpers/trainer.pyc\u001b[0m in \u001b[0;36mevaluate_flow\u001b[0;34m(session, ops_to_evaluate, dataflow)\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 847\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m+=\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m*\u001b[0m \u001b[0mcurrent_batch_size\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 848\u001b[0;31m \u001b[0mfeed_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 849\u001b[0m \u001b[0mres\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mr\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mdataflow\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mn_samples\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 850\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mres\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 119 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/site-packages/tflearn-0.2.1-py2.7.egg/tflearn/data_flow.pyc\u001b[0m in \u001b[0;36mnext\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 126\u001b[0m \"\"\"\n\u001b[1;32m 127\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdata_status\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mupdate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 128\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeed_dict_queue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtimeout\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 129\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 130\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mstart\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mreset_status\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 120 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/Queue.pyc\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_empty\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 121 | "\u001b[0;32m/Volumes/TONY/anaconda/lib/python2.7/threading.pyc\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 338\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 339\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 340\u001b[0;31m \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 341\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0m__debug__\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 342\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_note\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"%s.wait(): got it\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 122 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "weights = []\n", 128 | "for lambda1 in xrange(0, 500, 5):\n", 129 | " print(lambda1)\n", 130 | " lambda1 /= 10000.\n", 131 | " weights.append(dfs(0, 30, 100))" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 34, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [ 141 | { 142 | "ename": "NameError", 143 | "evalue": "name 'model' is not defined", 144 | "output_type": "error", 145 | "traceback": [ 146 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 147 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 148 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 149 | "\u001b[0;31mNameError\u001b[0m: name 'model' is not defined" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "type(model)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 37, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "def get_inits():\n", 166 | " with tf.Graph().as_default():\n", 167 | " sess = tf.Session()\n", 168 | "\n", 169 | " input_data = tflearn.input_data(shape=[None, 7203])\n", 170 | " input_layer = One2OneInputLayer(input_data)\n", 171 | "\n", 172 | " dense = tflearn.fully_connected(input_layer.output, 500, activation='tanh', name='dense')\n", 173 | " sofmax = tflearn.fully_connected(dense, 2, activation='softmax', name='sofmax')\n", 174 | " net = tflearn.regression(sofmax, optimizer='Adam', loss='categorical_crossentropy')\n", 175 | " model = tflearn.DNN(net)\n", 176 | " \n", 177 | " print(type(model))\n", 178 | "\n", 179 | " sess.run(tf.initialize_all_variables())\n", 180 | "\n", 181 | "# model.fit(X_train, Y_train, n_epoch=10, show_metric=True, validation_set=(X_test, Y_test))\n", 182 | "\n", 183 | " variables = tflearn.variables.get_all_trainable_variable()\n", 184 | " \n", 185 | " values = []\n", 186 | " for i in xrange(4):\n", 187 | " values.append(sess.run(variables[i]))\n", 188 | "\n", 189 | " return values" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 38, 195 | "metadata": { 196 | "collapsed": false 197 | }, 198 | "outputs": [ 199 | { 200 | "name": "stdout", 201 | "output_type": "stream", 202 | "text": [ 203 | "\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "initial_values = get_inits()" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": false 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "initial_values[0]" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": false 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "tflearn.input_data(tf.Variable(initial_values[0]))" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "collapsed": false 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "dense_vars = tflearn.variables.get_all_variables()\n", 242 | "print(\"Dense1 layer weights:\")\n", 243 | "print(model.get_weights(dense_vars[0]))\n", 244 | "# Or using generic tflearn function:\n", 245 | "print(\"Dense1 layer biases:\")\n", 246 | "with model.session.as_default():\n", 247 | " print(tflearn.variables.get_value(dense_vars[1]))" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": null, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "dense_varsa" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 68, 264 | "metadata": { 265 | "collapsed": false 266 | }, 267 | "outputs": [ 268 | { 269 | "name": "stdout", 270 | "output_type": "stream", 271 | "text": [ 272 | "High five! You successfuly sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~tonyabracadabra/0 or inside your plot.ly account where it is named 'basic-heatmap'\n" 273 | ] 274 | }, 275 | { 276 | "data": { 277 | "text/html": [ 278 | "" 279 | ], 280 | "text/plain": [ 281 | "" 282 | ] 283 | }, 284 | "execution_count": 68, 285 | "metadata": {}, 286 | "output_type": "execute_result" 287 | } 288 | ], 289 | "source": [ 290 | "import plotly.plotly as py\n", 291 | "import plotly.graph_objs as go\n", 292 | "import plotly\n", 293 | "\n", 294 | "plotly.tools.set_credentials_file(username='tonyabracadabra', api_key='6gs9i5iec7')\n", 295 | "\n", 296 | "data = [\n", 297 | " go.Heatmap(\n", 298 | " z=np.abs(weights)\n", 299 | " )\n", 300 | "]\n", 301 | "\n", 302 | "py.iplot(data, filename='basic-heatmap')" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 1, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [ 312 | { 313 | "ename": "NameError", 314 | "evalue": "name 'weights' is not defined", 315 | "output_type": "error", 316 | "traceback": [ 317 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 318 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 319 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mweights\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 320 | "\u001b[0;31mNameError\u001b[0m: name 'weights' is not defined" 321 | ] 322 | } 323 | ], 324 | "source": [ 325 | "weights" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": null, 331 | "metadata": { 332 | "collapsed": true 333 | }, 334 | "outputs": [], 335 | "source": [ 336 | "import pydendroheatmap as pdh\n", 337 | "import scipy.cluster.hierarchy as sch\n", 338 | "\n", 339 | "\n", 340 | "heatmap_array = pickle.load(open('some_data_file.pickle'))#a numpy.ndarray or numpy.matrix, for this example, let's say mxn array\n", 341 | "top_dendrogram = pickle.load(open('another_data_file.pickle'))#a (n-1) x 4 array\n", 342 | "side_dendrogram = pickle.load(open('a_third_data_file.pickle'))#a (m-1) x 4 array\n", 343 | "\n", 344 | "heatmap = pdh.DendroHeatMap(heat_map_data=heatmap_array, left_dendrogram=side_dendrogram, top_dendrogram=top_dendrogram)\n", 345 | "heatmap.title = 'This is an example'\n", 346 | "heatmap.show()\n", 347 | "\n", 348 | "heatmap.colormap = heatmap.yellowBlackBlue\n", 349 | "\n", 350 | "heatmap.show()\n", 351 | "\n", 352 | "heatmap.row_labels = ['some', 'row','labels'] #must have the same number of rows in heat_map_data\n", 353 | "\n", 354 | "heatmap.reset_plot()\n", 355 | "heatmap.show()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 72, 361 | "metadata": { 362 | "collapsed": false 363 | }, 364 | "outputs": [ 365 | { 366 | "data": { 367 | "text/plain": [ 368 | "R object with classes: ('list',) mapped to:\n", 369 | "\n", 370 | "[IntVector, IntVector, RNULLType, RNULLType]\n", 371 | " rowInd: \n", 372 | " R object with classes: ('integer',) mapped to:\n", 373 | "\n", 374 | "[ 16, 9, 59, ..., 53, 52, 39]\n", 375 | " colInd: \n", 376 | " R object with classes: ('integer',) mapped to:\n", 377 | "\n", 378 | "[ 94, 68, 93, ..., 31, 55, 61]\n", 379 | " Rowv: \n", 380 | " rpy2.rinterface.NULL\n", 381 | " Colv: \n", 382 | " rpy2.rinterface.NULL" 383 | ] 384 | }, 385 | "execution_count": 72, 386 | "metadata": {}, 387 | "output_type": "execute_result" 388 | } 389 | ], 390 | "source": [ 391 | "import scipy.io as sio\n", 392 | "from rpy2.robjects import r\n", 393 | "import rpy2.robjects.numpy2ri\n", 394 | "\n", 395 | "rpy2.robjects.numpy2ri.activate()\n", 396 | "\n", 397 | "data = np.random.random((10,10))\n", 398 | "r.heatmap(np.array(weights)) " 399 | ] 400 | } 401 | ], 402 | "metadata": { 403 | "kernelspec": { 404 | "display_name": "Python 2", 405 | "language": "python", 406 | "name": "python2" 407 | }, 408 | "language_info": { 409 | "codemirror_mode": { 410 | "name": "ipython", 411 | "version": 2 412 | }, 413 | "file_extension": ".py", 414 | "mimetype": "text/x-python", 415 | "name": "python", 416 | "nbconvert_exporter": "python", 417 | "pygments_lexer": "ipython2", 418 | "version": "2.7.12" 419 | } 420 | }, 421 | "nbformat": 4, 422 | "nbformat_minor": 0 423 | } 424 | -------------------------------------------------------------------------------- /XGBoost.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from sklearn import datasets\n", 12 | "from sklearn.cross_validation import train_test_split\n", 13 | "from scipy import io as sio\n", 14 | "from tensorflow.python.framework import ops\n", 15 | "from dfs2 import DeepFeatureSelectionNew\n", 16 | "import numpy as np\n", 17 | "from sklearn.datasets import make_classification\n", 18 | "from sklearn.preprocessing import normalize\n", 19 | "\n", 20 | "# ourdataB = sio.loadmat(\"/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat\")\n", 21 | "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat\")\n", 22 | "ourdata = sio.loadmat(\"./B_mean_2labels.mat\")\n", 23 | "# ourdata = sio.loadmat(\"/Users/xupeng.tong/Documents/Data/OriginalData/Q_2labels_unstandardized.mat\")\n", 24 | "\n", 25 | "inputX = ourdata['X']\n", 26 | "inputX = normalize(inputX, axis=0)\n", 27 | "inputY = ourdata['Y'][0,:]\n", 28 | "columnNames = ourdata['columnNames']\n", 29 | "\n", 30 | "X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": { 37 | "collapsed": true 38 | }, 39 | "outputs": [], 40 | "source": [ 41 | "from sklearn.ensemble import RandomForestClassifier\n", 42 | "from sklearn.metrics import accuracy_score\n", 43 | "import xgboost as xgb\n", 44 | "\n", 45 | "# rf = RandomForestClassifier(criterion=\"entropy\", n_estimators = 300, max_depth = 100)\n", 46 | "# rf.fit(X_train, y_train)\n", 47 | "\n", 48 | "# y_pred = rf.predict(X_test)\n", 49 | "\n", 50 | "gbm = xgb.XGBClassifier(max_depth=3, n_estimators=400, learning_rate=0.05).fit(X_train, y_train)\n", 51 | "y_pred = gbm.predict(X_test)\n", 52 | "\n", 53 | "# featurescores = gbm.feature_importances_\n", 54 | "\n", 55 | "print(accuracy_score(y_test, y_pred))\n", 56 | "\n", 57 | "indexes_xgboost = np.argsort(gbm.feature_importances_)[::-1]\n", 58 | "\n", 59 | "np.save(\"indexes_xgboost\",indexes_xgboost)" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "kernelspec": { 65 | "display_name": "Python 2", 66 | "language": "python", 67 | "name": "python2" 68 | }, 69 | "language_info": { 70 | "codemirror_mode": { 71 | "name": "ipython", 72 | "version": 2 73 | }, 74 | "file_extension": ".py", 75 | "mimetype": "text/x-python", 76 | "name": "python", 77 | "nbconvert_exporter": "python", 78 | "pygments_lexer": "ipython2", 79 | "version": "2.7.12" 80 | } 81 | }, 82 | "nbformat": 4, 83 | "nbformat_minor": 0 84 | } 85 | -------------------------------------------------------------------------------- /randomLasso.py: -------------------------------------------------------------------------------- 1 | from sklearn.linear_model import RandomizedLasso 2 | from sklearn import datasets 3 | from sklearn.cross_validation import train_test_split 4 | from scipy import io as sio 5 | from tensorflow.python.framework import ops 6 | from dfs2 import DeepFeatureSelectionNew 7 | import numpy as np 8 | from sklearn.datasets import make_classification 9 | from sklearn.preprocessing import normalize 10 | 11 | # ourdataB = sio.loadmat("/Volumes/TONY/Regeneron/Data/OriginalData/newDataB_2labels.mat") 12 | ourdataB = sio.loadmat("/Users/xupeng.tong/Documents/Data/OriginalData/newDataB_2labels.mat") 13 | # ourdataB = sio.loadmat("/home/REGENERON/xupeng.tong/newDataB_2labels.mat") 14 | 15 | inputX = ourdataB['X'] 16 | inputX = normalize(inputX, axis=0) 17 | inputY = ourdataB['Y'][0,:] 18 | columnNames = ourdataB['columnNames'] 19 | 20 | X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=42) 21 | 22 | randomized_lasso = RandomizedLasso() 23 | randomized_lasso.fit(X_train, y_train) 24 | 25 | featureMask = randomized_lasso.get_support() 26 | 27 | X_train_lasso = X_train[:,featureMask] 28 | X_test_lasso = X_train[:,featureMask] 29 | 30 | columnNames[0][:100][featureMask] 31 | 32 | sio.savemat('RandomLasso-result', {'X_train_lasso':X_train_lasso, \ 33 | 'X_train_lasso':X_test_lasso, 'featureMask':featureMask}) -------------------------------------------------------------------------------- /run_model/Q_run_AsthmaAcos_NoSmokeAge.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__' and __package__ is None: 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | 5 | def runDFS(inputX, inputY) 6 | inputX = ourdata['X'] 7 | inputY = ourdata['Y'][0,:] 8 | columnNames = ourdata['columnNames'] 9 | 10 | index_Acos = np.where(inputY==0)[0] 11 | index_Asthma = np.where(inputY==1)[0] 12 | 13 | weights = [] 14 | for i in xrange(50): 15 | # made random choice of asthma patients 16 | choice = np.random.choice(a=len(index_Asthma), size=len(index_Acos)) 17 | index_Asthma_chosen = index_Asthma[choice] 18 | 19 | # Concatenate the indexes for Asthma and Acos patients 20 | indexes = np.array(index_Acos.tolist()+index_Asthma_chosen.tolist()) 21 | # Shuffle the indexes 22 | np.random.shuffle(indexes) 23 | indexes = indexes.tolist() 24 | 25 | # inputX and inputY for this round 26 | inputX_ = inputX[indexes,:] 27 | inputY_ = inputY[indexes] 28 | 29 | X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2) 30 | 31 | # Change number of epochs to control the training time 32 | dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[150], learning_rate=0.01, \ 33 | lambda1=0.005, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \ 34 | weight_init='uniform',epochs=50, optimizer='Adam', print_step=10) 35 | dfsMLP.train(batch_size=500) 36 | print("Train finised for random state:" + str(i)) 37 | weights.append(dfsMLP.selected_ws[0]) 38 | 39 | np.save("./weights/Q_weights_AsthmaAcos", weights) -------------------------------------------------------------------------------- /run_model/Q_run_AsthmaCOPD_NoSmokeAge.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__' and __package__ is None: 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | 5 | from sklearn import datasets 6 | from sklearn.cross_validation import train_test_split 7 | from scipy import io as sio 8 | from tensorflow.python.framework import ops 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew 10 | import numpy as np 11 | from sklearn.datasets import make_classification 12 | from sklearn.preprocessing import normalize 13 | 14 | ourdata = sio.loadmat("./data/Q_AsthmaCOPD_NoAgeSmoke.mat") 15 | 16 | inputX = ourdata['X'] 17 | inputY = ourdata['Y'][0,:] 18 | columnNames = ourdata['columnNames'] 19 | 20 | weights = [] 21 | 22 | for random_state in xrange(50): 23 | # Resplit the data 24 | X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=random_state) 25 | 26 | # Change number of epochs to control the training time 27 | dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.01, \ 28 | lambda1=0.001, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \ 29 | weight_init='uniform',epochs=100, optimizer='Adam', print_step=10) 30 | dfsMLP.train(batch_size=2000) 31 | print("Train finised for random state:" + str(random_state)) 32 | weights.append(dfsMLP.selected_ws[0]) 33 | 34 | # The generated weights will be in the weights folder 35 | np.save("./weights/Q_weights_AsthmaCOPD", weights) -------------------------------------------------------------------------------- /run_model/Q_run_COPDAcos_NoSmokeAge.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__' and __package__ is None: 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | 5 | from sklearn import datasets 6 | from sklearn.cross_validation import train_test_split 7 | from scipy import io as sio 8 | from tensorflow.python.framework import ops 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew 10 | import numpy as np 11 | from sklearn.datasets import make_classification 12 | from sklearn.preprocessing import normalize 13 | 14 | ourdata = sio.loadmat("./data/Q_COPDAcos_NoAgeSmoke.mat") 15 | inputX = ourdata['X'] 16 | inputY = ourdata['Y'][0,:] 17 | columnNames = ourdata['columnNames'] 18 | 19 | index_Acos = np.where(inputY==0)[0] 20 | index_Asthma = np.where(inputY==1)[0] 21 | 22 | weights = [] 23 | for i in xrange(50): 24 | # made random choice of asthma patients 25 | choice = np.random.choice(a=len(index_Asthma), size=len(index_Acos)) 26 | index_Asthma_chosen = index_Asthma[choice] 27 | 28 | # Concatenate the indexes for Asthma and Acos patients 29 | indexes = np.array(index_Acos.tolist()+index_Asthma_chosen.tolist()) 30 | # Shuffle the indexes 31 | np.random.shuffle(indexes) 32 | indexes = indexes.tolist() 33 | 34 | # inputX and inputY for this round 35 | inputX_ = inputX[indexes,:] 36 | inputY_ = inputY[indexes] 37 | 38 | X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2) 39 | 40 | # Change number of epochs to control the training time 41 | dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[150], learning_rate=0.01, \ 42 | lambda1=0.005, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \ 43 | weight_init='uniform',epochs=50, optimizer='Adam', print_step=10) 44 | dfsMLP.train(batch_size=500) 45 | print("Train finised for random state:" + str(i)) 46 | weights.append(dfsMLP.selected_ws[0]) 47 | 48 | np.save("./weights/Q_weights_AsthmaAcos", weights) -------------------------------------------------------------------------------- /run_model/rerun_AsthmaAcos.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__' and __package__ is None: 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | 5 | from sklearn import datasets 6 | from sklearn.cross_validation import train_test_split 7 | from scipy import io as sio 8 | from tensorflow.python.framework import ops 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew 10 | import numpy as np 11 | from sklearn.datasets import make_classification 12 | from sklearn.preprocessing import normalize 13 | 14 | 15 | ourdata = sio.loadmat("../data/B_AsthmaAcos_mean_scaled_7159.mat") 16 | inputX = ourdata['X'] 17 | inputY = ourdata['Y'][0,:] 18 | columnNames = ourdata['columnNames'] 19 | 20 | index_Acos = np.where(inputY==0)[0] 21 | index_Asthma = np.where(inputY==1)[0] 22 | 23 | weights = [] 24 | for i in xrange(50): 25 | # made random choice of asthma patients 26 | choice = np.random.choice(a=len(index_Asthma), size=len(index_Acos)) 27 | index_Asthma_chosen = index_Asthma[choice] 28 | 29 | # Concatenate the indexes for Asthma and Acos patients 30 | indexes = np.array(index_Acos.tolist()+index_Asthma_chosen.tolist()) 31 | # Shuffle the indexes 32 | np.random.shuffle(indexes) 33 | indexes = indexes.tolist() 34 | 35 | # inputX and inputY for this round 36 | inputX_ = inputX[indexes,:] 37 | inputY_ = inputY[indexes] 38 | 39 | X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2) 40 | 41 | # Change number of epochs to control the training time 42 | dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[150], learning_rate=0.01, \ 43 | lambda1=0.005, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \ 44 | weight_init='uniform',epochs=30, optimizer='Adam', print_step=10) 45 | dfsMLP.train(batch_size=500) 46 | print("Train finised for random state:" + str(i)) 47 | weights.append(dfsMLP.selected_ws[0]) 48 | 49 | np.save("./weights/weights_AsthmaAcos_rerun", weights) -------------------------------------------------------------------------------- /run_model/rerun_AsthmaCOPD.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__' and __package__ is None: 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | 5 | from sklearn import datasets 6 | from sklearn.cross_validation import train_test_split 7 | from scipy import io as sio 8 | from tensorflow.python.framework import ops 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew 10 | import numpy as np 11 | from sklearn.datasets import make_classification 12 | from sklearn.preprocessing import normalize 13 | 14 | ourdata = sio.loadmat("./data/B_AsthmaCOPD_mean_scaled_7159.mat") 15 | 16 | inputX = ourdata['X'] 17 | inputY = ourdata['Y'][0,:] 18 | columnNames = ourdata['columnNames'] 19 | 20 | weights = [] 21 | 22 | for i in xrange(50): 23 | # Resplit the data 24 | X_train, X_test, y_train, y_test = train_test_split(inputX, inputY, test_size=0.2, random_state=i) 25 | 26 | # Change number of epochs to control the training time 27 | dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[5], learning_rate=0.012, \ 28 | lambda1=0.002, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \ 29 | weight_init='uniform',epochs=20, optimizer='Adam', print_step=10) 30 | dfsMLP.train(batch_size=2000) 31 | print("Train finised for random state:" + str(i)) 32 | weights.append(dfsMLP.selected_ws[0]) 33 | 34 | # The generated weights will be in the weights folder 35 | np.save("./weights/weights_AsthmaCOPD_rerun", weights) -------------------------------------------------------------------------------- /run_model/rerun_COPDAcos.py: -------------------------------------------------------------------------------- 1 | if __name__ == '__main__' and __package__ is None: 2 | from os import sys, path 3 | sys.path.append(path.dirname(path.dirname(path.abspath(__file__)))) 4 | 5 | from sklearn import datasets 6 | from sklearn.cross_validation import train_test_split 7 | from scipy import io as sio 8 | from tensorflow.python.framework import ops 9 | from supporting_files.dfs2 import DeepFeatureSelectionNew 10 | import numpy as np 11 | from sklearn.datasets import make_classification 12 | from sklearn.preprocessing import normalize 13 | 14 | 15 | ourdata = sio.loadmat("../data/B_COPDAcos_mean_scaled_7159.mat") 16 | inputX = ourdata['X'] 17 | inputY = ourdata['Y'][0,:] 18 | columnNames = ourdata['columnNames'] 19 | 20 | index_Acos = np.where(inputY==0)[0] 21 | index_COPD = np.where(inputY==1)[0] 22 | 23 | weights = [] 24 | for i in xrange(50): 25 | # made random choice of asthma patients 26 | choice = np.random.choice(a=len(index_COPD), size=len(index_Acos)) 27 | index_COPD_chosen = index_COPD[choice] 28 | 29 | # Concatenate the indexes for Asthma and Acos patients 30 | indexes = np.array(index_Acos.tolist()+index_COPD_chosen.tolist()) 31 | # Shuffle the indexes 32 | np.random.shuffle(indexes) 33 | indexes = indexes.tolist() 34 | 35 | # inputX and inputY for this round 36 | inputX_ = inputX[indexes,:] 37 | inputY_ = inputY[indexes] 38 | 39 | X_train, X_test, y_train, y_test = train_test_split(inputX_, inputY_, test_size=0.2) 40 | 41 | # Change number of epochs to control the training time 42 | dfsMLP = DeepFeatureSelectionNew(X_train, X_test, y_train, y_test, n_input=1, hidden_dims=[10], learning_rate=0.01, \ 43 | lambda1=0.01, lambda2=1, alpha1=0.001, alpha2=0, activation='tanh', \ 44 | weight_init='uniform',epochs=50, optimizer='Adam', print_step=10) 45 | dfsMLP.train(batch_size=500) 46 | print("Train finised for random state:" + str(i)) 47 | weights.append(dfsMLP.selected_ws[0]) 48 | 49 | np.save("./weights/weights_COPDAcos_rerun", weights) -------------------------------------------------------------------------------- /supporting_files/__init__.py: -------------------------------------------------------------------------------- 1 | # Xupeng Tong 2 | # 3 | # This is the supporting files for Deep Feature Selection, 4 | # including all the NN components that are implemented, activation function, 5 | # mini-batch functions, initializations etc. 6 | 7 | __author__ = "Xupeng Tong" 8 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron" 9 | __email__ = "tongxupeng.cpu@gmail.com" -------------------------------------------------------------------------------- /supporting_files/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/__init__.pyc -------------------------------------------------------------------------------- /supporting_files/dfs2.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Xupeng Tong" 4 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron" 5 | __email__ = "tongxupeng.cpu@gmail.com" 6 | 7 | import tensorflow as tf 8 | from nncomponents import * 9 | from helpers import * 10 | from sda import StackedDenoisingAutoencoder 11 | import numpy as np 12 | 13 | class DeepFeatureSelectionNew: 14 | """ Performe Deep Feature Selection 15 | 16 | Read more on the original paper: 17 | Li Y, Chen C Y, Wasserman W W. Deep feature selection: Theory and application to identify enhancers and promoters[C] 18 | //International Conference on Research in Computational Molecular Biology. Springer International Publishing, 2015: 205-217. 19 | 20 | This implementation has been modified 21 | 22 | Parameters 23 | ---------- 24 | X_train: numpy array 25 | The training data 26 | 27 | X_test: numpy array 28 | The testing data 29 | 30 | weight_init: string, default : "uniform", with options "sda" (StackedDenoisingAutoencoder) and "uniform" 31 | Initialize the weights of the neural network 32 | 33 | n_input: int, default : 1, optional 34 | Number of input layer, not proved to be useful with n_input larger than 1 but worth trying 35 | 36 | hidden_dims: list, default : [1000] 37 | A list of hidden nodes with each layer, len(hidden_dims) should equals to the number of hidden layers 38 | 39 | activation: string, default : 'sigmoid', with options "sigmoid", "tanh" and "relu" 40 | The activation functions that applied to all layers 41 | 42 | epochs: int, default : 1000 43 | How many epochs to run, X_train data is expected to run over in one epoch 44 | 45 | lambda1: float32, default : 0.001, normally be a very small number otherwise the training with get stucked 46 | Decide the sparseness of the input layer, prevent overfitting 47 | 48 | lambda2: float32, default : 1.0, range from 0.0 to 1.0 49 | With lambda1, they define the elastic net regularization of the input layer, when lambda2 == 1, it is 50 | equivalent to L1 regularization 51 | 52 | alpha1: float32, default : 0.001, normally be a very small number otherwise the training with get stucked 53 | Decide the L2 regularization of the hidden layer, prevent overfitting 54 | 55 | alpha2: float32, default : 0.0, range from 0.0 to 1.0 56 | With lambda1, they define the elastic net regularization of the hidden layer, when alpha2 == 0, it is 57 | equivalent to L2 regularization 58 | 59 | learning_rate: float32, default : 0.1 60 | Learning rate of the gradient descent 61 | 62 | optimizer: string, default : "Adam", with options "Adam", "FTRL" and "SGD" 63 | The optimizer for gradient descent, normally Adam Optimizer will give us the fastest converging rate, while 64 | FTRL claimed they could produce more sparsity, not verified yet 65 | 66 | print_step: int, default : 1000 67 | Epochs step for printing 68 | 69 | 70 | Attributes 71 | ---------- 72 | cost: 73 | Cost to be minimized 74 | accuracy: 75 | Accuracy by the softmax layer 76 | selected_ws: 77 | The weights learned from the input layer, if n_input == 1, 78 | use selected_ws[0] for the input weights 79 | """ 80 | 81 | def __init__(self, X_train, X_test, y_train, y_test, weight_init='sda', n_input = 1, hidden_dims=[1000], activation='sigmoid',epochs=1000, lambda1=0.001, lambda2=1.0, alpha1=0.001, alpha2=0.0, learning_rate=0.1, optimizer='Adam', print_step=1000): 82 | """ 83 | Initialize the DFS class 84 | """ 85 | 86 | # Get the dimension of the input X 87 | n_sample, n_feat = X_train.shape 88 | n_classes = len(np.unique(y_train)) 89 | 90 | self.epochs = epochs 91 | self.n_input = n_input 92 | self.print_step = print_step 93 | 94 | # Store up original value 95 | self.X_train = X_train 96 | self.y_train = one_hot(y_train) 97 | self.X_test = X_test 98 | self.y_test = one_hot(y_test) 99 | 100 | # Two variables with undetermined length is created 101 | self.var_X = tf.placeholder(dtype=tf.float32, shape=[None, n_feat], name='x') 102 | self.var_Y = tf.placeholder(dtype=tf.float32, shape=[None, n_classes], name='y') 103 | 104 | input_hidden = 0 105 | self.L1_input, self.L2_input = 0, 0 106 | # If there is no input layer 107 | if n_input != 0: 108 | # Create several one to one layers 109 | self.input_layers = [] 110 | input_1to1 = self.var_X 111 | 112 | # regularization terms on coefficients of input layer 113 | L1_input, L2_input = [], [] 114 | 115 | for i in xrange(n_input): 116 | self.input_layers.append(One2OneInputLayer(input_1to1)) 117 | input_1to1 = self.input_layers[-1].output 118 | L1_input.append(tf.reduce_sum(tf.abs(self.input_layers[i].w))) 119 | L2_input.append(tf.nn.l2_loss(self.input_layers[i].w)) 120 | 121 | input_hidden = self.input_layers[-1].output 122 | 123 | # Add it up 124 | self.L1_input = tf.add_n(L1_input) 125 | self.L2_input = tf.add_n(L2_input) 126 | 127 | else: 128 | input_hidden = self.var_X 129 | 130 | # Create list of hidden layers 131 | self.hidden_layers = [] 132 | # Initialize the network weights 133 | weights, biases = init_layer_weight(hidden_dims, X_train, weight_init) 134 | 135 | # Create regularization terms on weights of hidden layers 136 | L1s, L2_sqrs = [], [] 137 | # Create hidden layers 138 | for init_w, init_b in zip(weights, biases): 139 | self.hidden_layers.append(DenseLayer(input_hidden, init_w, init_b, activation=activation)) 140 | input_hidden = self.hidden_layers[-1].output 141 | L1s.append(tf.reduce_sum(tf.abs(self.hidden_layers[-1].w))) 142 | L2_sqrs.append(tf.nn.l2_loss(self.hidden_layers[-1].w)) 143 | 144 | # Final classification layer, variable Y is passed 145 | self.softmax_layer = SoftmaxLayer(self.hidden_layers[-1].output, n_classes, self.var_Y) 146 | 147 | L1s.append(tf.reduce_sum(tf.abs(self.softmax_layer.w))) 148 | L2_sqrs.append(tf.nn.l2_loss(self.softmax_layer.w)) 149 | 150 | self.L1 = tf.add_n(L1s) 151 | self.L2_sqr = tf.add_n(L2_sqrs) 152 | 153 | # Cost with two regularization terms 154 | self.cost = self.softmax_layer.cost \ 155 | + lambda1*(1.0-lambda2)*0.5*self.L2_input + lambda1*lambda2*self.L1_input \ 156 | + alpha1*(1.0-alpha2)*0.5 * self.L2_sqr + alpha1*alpha2*self.L1 157 | 158 | 159 | self.optimizer = optimize(self.cost, learning_rate, optimizer) 160 | 161 | self.accuracy = self.softmax_layer.accuracy 162 | 163 | self.y = self.softmax_layer.y 164 | 165 | def train(self, batch_size=100): 166 | """ Train the data with specified batch size, note the if the batch size exceed 167 | the number of samples in the training set, all the training data will be used 168 | as one batch 169 | 170 | Parameters 171 | ---------- 172 | 173 | batch_size: int, default : 100 174 | Defined the number of sample per batch 175 | """ 176 | sess = tf.Session() 177 | self.sess = sess 178 | sess.run(tf.initialize_all_variables()) 179 | batch_generator = GenBatch(self.X_train, self.y_train, batch_size) 180 | n_batch = batch_generator.n_batch 181 | 182 | self.losses, self.train_Accs, self.test_Accs = [], [], [] 183 | for i in xrange(self.epochs): 184 | # x_batch, y_batch = get_batch(self.X_train, self.y_train, batch_size) 185 | batch_generator.resetIndex() 186 | for j in xrange(n_batch+1): 187 | x_batch, y_batch = batch_generator.get_batch() 188 | sess.run(self.optimizer, feed_dict={self.var_X: x_batch, self.var_Y: y_batch}) 189 | 190 | self.train_Accs.append(sess.run(self.accuracy, \ 191 | feed_dict={self.var_X: self.X_train, self.var_Y: self.y_train})) 192 | self.test_Accs.append(sess.run(self.accuracy, \ 193 | feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test})) 194 | self.losses.append(sess.run(self.cost, \ 195 | feed_dict={self.var_X: x_batch, self.var_Y: y_batch})) 196 | 197 | if i % self.print_step == 0: 198 | print('epoch {0}: global loss = {1}'.format(i, self.losses[-1])) 199 | print("Train accuracy:", self.train_Accs[-1]) 200 | print("Test accuracy:", self.test_Accs[-1]) 201 | 202 | self.selected_ws = [sess.run(self.input_layers[i].w) for i in xrange(self.n_input)] 203 | # print("Input layer w: ", self.selected_ws) 204 | print("Final train accuracy:", self.train_Accs[-1]) 205 | print("Final test accuracy:", self.test_Accs[-1]) 206 | 207 | def refine_init_weight(self, threshold=0.001): 208 | """ Set input layer weights whose value is smaller than some threshold to zero and 209 | recalculate the accuracy rate 210 | 211 | Parameters 212 | ---------- 213 | 214 | threshold: float32, default : 0.001 215 | Threshold value 216 | """ 217 | 218 | refined_ws = [np.copy(w) for w in self.selected_ws] 219 | for i, refined_w in enumerate(refined_ws): 220 | refined_w[refined_w < threshold] = 0 221 | self.sess.run(self.input_layers[i].w.assign(refined_w)) 222 | print("Test accuracy refined:",self.sess.run(self.accuracy, feed_dict={self.var_X: self.X_test, self.var_Y: self.y_test})) -------------------------------------------------------------------------------- /supporting_files/dfs2.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/dfs2.pyc -------------------------------------------------------------------------------- /supporting_files/helpers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | 5 | __author__ = "Xupeng Tong" 6 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron" 7 | __email__ = "tongxupeng.cpu@gmail.com" 8 | 9 | import tensorflow as tf 10 | import numpy as np 11 | 12 | def activate(layer, name): 13 | """ Activate one layer with specified activation function 14 | 15 | Parameters 16 | ---------- 17 | layer: Tensor 18 | The layer to be activated 19 | name: string, with options "sigmoid", "softmax", "tanh", "relu" and "linear" 20 | The name of the activation function 21 | """ 22 | 23 | if name == 'sigmoid': 24 | return tf.nn.sigmoid(layer) 25 | elif name == 'softmax': 26 | return tf.nn.softmax(layer) 27 | elif name == 'tanh': 28 | return tf.nn.tanh(layer) 29 | elif name == 'relu': 30 | return tf.nn.relu(layer) 31 | elif name == 'linear': 32 | return layer 33 | 34 | def optimize(cost, learning_rate, optimizer): 35 | """ Optimize the cost 36 | 37 | Parameters 38 | ---------- 39 | learning_rate: float32 40 | Learning rate for gradient descent 41 | name: string, with options "FTRL", "Adam", "SGD" 42 | The name of the optimization function 43 | Adam optimizer generally gives us the best result 44 | """ 45 | 46 | optimizer = {'FTRL':tf.train.FtrlOptimizer, 'Adam':tf.train.AdamOptimizer, \ 47 | 'SGD':tf.train.GradientDescentOptimizer}[optimizer] 48 | 49 | return optimizer(learning_rate=learning_rate).minimize(cost) 50 | 51 | def one_hot(y): 52 | """ Generate the one hot representation of Y 53 | 54 | Parameters 55 | ---------- 56 | y: numpy array 57 | """ 58 | n_classes = len(np.unique(y)) 59 | one_hot_Y = np.zeros((len(y), n_classes)) 60 | for i,j in enumerate(y): 61 | one_hot_Y[i][j] = 1 62 | 63 | return one_hot_Y 64 | 65 | def init_layer_weight(dims, X, name): 66 | """ Initialize the weights for layers, and return the initialized result 67 | 68 | Parameters 69 | ---------- 70 | dims: list, with each element stands for the number of nodes in each layer 71 | 72 | name: string, with options "sda" and "uniform" 73 | The name of the initialization method 74 | """ 75 | 76 | weights, biases = [], [] 77 | if name == 'sda': 78 | from sda import StackedDenoisingAutoencoder 79 | sda = StackedDenoisingAutoencoder(dims=dims) 80 | sda._fit(X) 81 | weights, biases = sda.weights, sda.biases 82 | elif name == 'uniform': 83 | n_in = X.shape[1] 84 | for d in dims: 85 | r = 4*np.sqrt(6.0/(n_in+d)) 86 | weights.append(tf.random_uniform([n_in, d], minval=-r, maxval=r)) 87 | biases.append(tf.zeros([d,])) 88 | n_in = d 89 | 90 | return weights, biases 91 | 92 | def get_random_batch(X, Y, size): 93 | """ 94 | Alternative method of getting a random batch each time 95 | """ 96 | assert len(X) == len(Y) 97 | a = np.random.choice(len(X), size, replace=False) 98 | return X[a], Y[a] 99 | 100 | class GenBatch(): 101 | """ The batch generator for training 102 | 103 | Parameters 104 | ---------- 105 | X: numpy array 106 | Y: numpy array 107 | batch_size: int 108 | """ 109 | def __init__(self, X, y, batch_size): 110 | self.X = X 111 | self.Y = y 112 | self.batch_size = batch_size 113 | self.n_batch = (len(X) / batch_size) 114 | self.index = 0 115 | 116 | def get_batch(self): 117 | """ 118 | Get the next batch 119 | """ 120 | batch_range = xrange(self.index, (self.index+1)*self.batch_size) 121 | if self.index == self.n_batch: 122 | batch_range = xrange(self.index, len(self.X)) 123 | self.index += 1 124 | 125 | return self.X[batch_range], self.Y[batch_range] 126 | 127 | def resetIndex(self): 128 | self.index = 0 -------------------------------------------------------------------------------- /supporting_files/helpers.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/helpers.pyc -------------------------------------------------------------------------------- /supporting_files/nncomponents.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | __author__ = "Xupeng Tong" 4 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron" 5 | __email__ = "tongxupeng.cpu@gmail.com" 6 | 7 | import tensorflow as tf 8 | import numpy as np 9 | from helpers import * 10 | 11 | class One2OneInputLayer(object): 12 | """ One to One input layer 13 | 14 | Parameters 15 | ---------- 16 | 17 | input: Tensor 18 | The output from the last layer 19 | weight_init: 20 | initial value for weights 21 | """ 22 | # One to One Mapping! 23 | def __init__(self, input, weight_init=None): 24 | n_in = input.get_shape()[1].value 25 | 26 | self.input = input 27 | 28 | # Initiate the weight for the input layer 29 | r = 4*np.sqrt(3.0/n_in) 30 | 31 | if weight_init is None: 32 | self.w = tf.Variable(tf.random_uniform([n_in,],-r, r), name='w') 33 | else: 34 | self.w = tf.Variable(weight_init, name='w') 35 | 36 | self.output = self.w * self.input 37 | 38 | class DenseLayer(object): 39 | """ Canonical dense layer 40 | 41 | Parameters 42 | ---------- 43 | 44 | input: Tensor 45 | The output from the last layer 46 | init_w: numpy array 47 | initial value for weights 48 | init_b: numpy array 49 | initial value for b 50 | """ 51 | def __init__(self, input, init_w, init_b, activation='sigmoid'): 52 | 53 | n_in = input.get_shape()[1].value 54 | self.input = input 55 | 56 | # Initiate the weight for the input layer 57 | 58 | w = tf.Variable(init_w, name='w') 59 | b = tf.Variable(init_b, name='b') 60 | 61 | output = tf.add(tf.matmul(input, w), b) 62 | output = activate(output, activation) 63 | 64 | self.w = w 65 | self.b = b 66 | self.output = output 67 | self.params = [w] 68 | 69 | class SoftmaxLayer(object): 70 | """ Softmax layer for classification 71 | 72 | Parameters 73 | ---------- 74 | 75 | input: Tensor 76 | The output from the last layer 77 | n_out: int 78 | Number of labels 79 | y: numpy array 80 | True label for the data 81 | """ 82 | def __init__(self, input, n_out, y): 83 | n_in = input.get_shape()[1].value 84 | self.input = input 85 | 86 | # Initiate the weight and biases for this layer 87 | r = 4*np.sqrt(6.0/(n_in + n_out)) 88 | w = tf.Variable(tf.random_uniform([n_in, n_out], minval=-r, maxval=r)) 89 | b = tf.Variable(tf.zeros([n_out]), name='b') 90 | 91 | pred = tf.add(tf.matmul(input, w), b) 92 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(pred, y)) 93 | 94 | # Evaluate model 95 | correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) 96 | self.accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) 97 | 98 | self.y = y 99 | self.w = w 100 | self.b = b 101 | self.cost = cost 102 | self.params= [w] -------------------------------------------------------------------------------- /supporting_files/nncomponents.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/nncomponents.pyc -------------------------------------------------------------------------------- /supporting_files/sda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | __author__ = "Xupeng Tong" 5 | __copyright__ = "Copyright 2016, Deep Feature Selection at Regeneron" 6 | __email__ = "tongxupeng.cpu@gmail.com" 7 | 8 | import numpy as np 9 | from helpers import get_random_batch 10 | from helpers import activate 11 | import tensorflow as tf 12 | 13 | class StackedDenoisingAutoencoder: 14 | """ A stacked deep autoencoder with denoising capability 15 | 16 | Parameters 17 | ---------- 18 | 19 | dims: list 20 | The output from the last layer 21 | epochs: list 22 | epochs for training each layer 23 | activations: list of string, each element one can change from 24 | 25 | """ 26 | 27 | def __init__(self, dims=[100,100,100], epochs=[100,100,100], activations=['sigmoid']*3, noise=None, loss='rmse', lr=0.001, batch_size=100, print_step=50): 28 | self.print_step = print_step 29 | self.batch_size = batch_size 30 | self.lr = lr 31 | self.loss = loss 32 | self.activations = activations 33 | self.noise = noise 34 | self.epochs = epochs 35 | self.dims = dims 36 | self.depth = len(dims) 37 | self.weights, self.biases = [], [] 38 | epochs = [100 for i in xrange(len(dims))] 39 | # assert len(dims) == len(epochs) 40 | 41 | def _fit(self, x): 42 | for i in range(self.depth): 43 | print('Layer {0}'.format(i + 1)) 44 | x = self._run(data_x=self._add_noise(x), activation=self.activations[i], data_x_=x, 45 | hidden_dim=self.dims[i], epochs=self.epochs[i], loss=self.loss, 46 | batch_size=self.batch_size, lr=self.lr, print_step=self.print_step) 47 | 48 | def _add_noise(self, x): 49 | if self.noise == 'gaussian': 50 | n = np.random.normal(0, 0.1, (len(x), len(x[0]))) 51 | return x + n 52 | if self.noise == 'mask': 53 | frac = float(self.noise.split('-')[1]) 54 | temp = np.copy(x) 55 | for i in temp: 56 | n = np.random.choice(len(i), round(frac * len(i)), replace=False) 57 | i[n] = 0 58 | return temp 59 | if self.noise == None: 60 | return x 61 | 62 | def _transform(self, data): 63 | sess = tf.Session() 64 | x = tf.constant(data, dtype=tf.float32) 65 | for w, b, a in zip(self.weights, self.biases, self.activations): 66 | weight = tf.constant(w, dtype=tf.float32) 67 | bias = tf.constant(b, dtype=tf.float32) 68 | layer = tf.matmul(x, weight) + bias 69 | x = activate(layer, a) 70 | return x.eval(session=sess) 71 | 72 | def get_transformed_data(self, x): 73 | self._fit(x) 74 | return self._transform(x) 75 | 76 | def _run(self, data_x, data_x_, hidden_dim, activation, loss, lr, print_step, epochs, batch_size=100): 77 | input_dim = len(data_x[0]) 78 | print(input_dim) 79 | print(hidden_dim) 80 | sess = tf.Session() 81 | x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x') 82 | x_ = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_') 83 | encode = {'weights': tf.Variable(tf.truncated_normal([input_dim, hidden_dim], dtype=tf.float32)), 84 | 'biases': tf.Variable(tf.truncated_normal([hidden_dim], dtype=tf.float32))} 85 | decode = {'biases': tf.Variable(tf.truncated_normal([input_dim], dtype=tf.float32)), 86 | 'weights': tf.transpose(encode['weights'])} 87 | 88 | encoded = activate(tf.matmul(x, encode['weights']) + encode['biases'], activation) 89 | decoded = tf.matmul(encoded, decode['weights']) + decode['biases'] 90 | 91 | # reconstruction loss 92 | if loss == 'rmse': 93 | loss = tf.sqrt(tf.reduce_mean(tf.square(tf.sub(x_, decoded)))) 94 | elif loss == 'cross-entropy': 95 | loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(decoded, x_)) 96 | train_op = tf.train.AdamOptimizer(lr).minimize(loss) 97 | 98 | sess.run(tf.initialize_all_variables()) 99 | for i in range(epochs): 100 | b_x, b_x_ = get_random_batch(data_x, data_x_, batch_size) 101 | sess.run(train_op, feed_dict={x: b_x, x_: b_x_}) 102 | if (i + 1) % print_step == 0: 103 | l = sess.run(loss, feed_dict={x: data_x, x_: data_x_}) 104 | print('epoch {0}: global loss = {1}'.format(i, l)) 105 | 106 | self.weights.append(sess.run(encode['weights'])) 107 | self.biases.append(sess.run(encode['biases'])) 108 | return sess.run(encoded, feed_dict={x: data_x_}) -------------------------------------------------------------------------------- /supporting_files/sda.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/supporting_files/sda.pyc -------------------------------------------------------------------------------- /weights/Q_indexes_xgboost_All_AsthmaCOPD.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/Q_indexes_xgboost_All_AsthmaCOPD.npy -------------------------------------------------------------------------------- /weights/Q_weights_AsthmaCOPD.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/Q_weights_AsthmaCOPD.npy -------------------------------------------------------------------------------- /weights/indexes_xgboost.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost.npy -------------------------------------------------------------------------------- /weights/indexes_xgboost_rerun.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun.npy -------------------------------------------------------------------------------- /weights/indexes_xgboost_rerun_All.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All.npy -------------------------------------------------------------------------------- /weights/indexes_xgboost_rerun_All_AsAc.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All_AsAc.npy -------------------------------------------------------------------------------- /weights/indexes_xgboost_rerun_All_AsC.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All_AsC.npy -------------------------------------------------------------------------------- /weights/indexes_xgboost_rerun_All_CAc.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/indexes_xgboost_rerun_All_CAc.npy -------------------------------------------------------------------------------- /weights/weights-0-10-NEW-mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-0-10-NEW-mean.npy -------------------------------------------------------------------------------- /weights/weights-10-20-NEW-mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-10-20-NEW-mean.npy -------------------------------------------------------------------------------- /weights/weights-20-30-NEW-mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-20-30-NEW-mean.npy -------------------------------------------------------------------------------- /weights/weights-NEW-mean.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights-NEW-mean.npy -------------------------------------------------------------------------------- /weights/weights_AsthmaAcos_rerun.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights_AsthmaAcos_rerun.npy -------------------------------------------------------------------------------- /weights/weights_AsthmaCOPD_rerun.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights_AsthmaCOPD_rerun.npy -------------------------------------------------------------------------------- /weights/weights_COPDAcos_rerun.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/weights/weights_COPDAcos_rerun.npy -------------------------------------------------------------------------------- /xgboost_result.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tonyabracadabra/DeepFeatureSelection--Tensorflow/ff94ee3973b98aca024ccf50d5ddbf5b9dd9d119/xgboost_result.mat --------------------------------------------------------------------------------