├── README.md
└── exp.py


/README.md:
--------------------------------------------------------------------------------
 1 | # FsNet: Feature Selection Network on High-dimensional Biological Data
 2 | 
 3 | Feature Selection Network (FsNet) is a scalable concrete neural network architecture for Wide data. Wide data consists of high-dimensional and small number of samples.
 4 | Specifically, FsNet consists of a selector layer that uses a concrete random variable for discrete feature selection and a supervised deep neural network regularized with the reconstruction loss.
 5 | Because a large number of parameters in the selector and reconstruction layer can easily cause overfitting under a limited number of samples, we use two tiny networks to predict the large virtual weight matrices of the selector and reconstruction layers. 
 6 | 
 7 | For more details, see the accompanying paper: ["FsNet: Feature Selection Network on High-dimensional Biological Data"](https://arxiv.org/abs/2001.08322), *arXive*, and please use the citation below.
 8 | 
 9 | ```
10 | @article{singh2020fsnet,
11 |       title={FsNet: Feature Selection Network on High-dimensional Biological Data}, 
12 |       author={Dinesh Singh and Héctor Climente-González and Mathis Petrovich and Eiryo Kawakami and Makoto Yamada},
13 |       year={2020},
14 |       eprint={2001.08322},
15 |       archivePrefix={arXiv},
16 |       primaryClass={cs.LG}
17 | }
18 | ```
19 | 


--------------------------------------------------------------------------------
/exp.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import math
  4 | import keras
  5 | from keras import backend as K
  6 | #from keras.datasets import mnist
  7 | from keras.models import Sequential, Model
  8 | from keras.layers import Layer, Dense, Dropout, Input, LeakyReLU
  9 | from keras.layers.core import Activation
 10 | from keras.optimizers import RMSprop
 11 | from keras.initializers import Constant, glorot_normal
 12 | from keras.utils import to_categorical
 13 | from keras.callbacks import EarlyStopping
 14 | import numpy as np
 15 | import scipy.io as spio
 16 | import random
 17 | import matplotlib.pyplot as plt
 18 | import sys
 19 | import pandas as pd
 20 | 
 21 | dp=sys.argv[1]
 22 | ds=sys.argv[2]
 23 | h_size=int(sys.argv[3])
 24 | nfeat=int(sys.argv[4])
 25 | rd="eta100/"
 26 | datafile=dp+ds
 27 | 
 28 | num_exp=20
 29 | num_epochs=6400
 30 | bins=10
 31 | batch_size=8
 32 | start_temp=10.0
 33 | min_temp=0.01
 34 | lossWeights = {"recon":100, "classacc":1}
 35 | losses = {"recon": "mean_squared_error", "classacc": "categorical_crossentropy",}
 36 | opt=RMSprop(lr=0.001, decay=0.001/num_epochs)
 37 | 
 38 | cacc=np.zeros(num_epochs)
 39 | acc=np.zeros(num_epochs)
 40 | closs=np.zeros(num_epochs)
 41 | loss=np.zeros(num_epochs)
 42 | cmi=0
 43 | mi=0
 44 | 
 45 | 
 46 | def calc_MI(X,Y,bins):
 47 |    c_XY = np.histogram2d(X,Y,bins)[0]
 48 |    c_X = np.histogram(X,bins)[0]
 49 |    c_Y = np.histogram(Y,bins)[0]
 50 |    H_X = shan_entropy(c_X)
 51 |    H_Y = shan_entropy(c_Y)
 52 |    H_XY = shan_entropy(c_XY)
 53 |    mi1 = H_X + H_Y - H_XY
 54 |    return mi1
 55 | 
 56 | def shan_entropy(c):
 57 |     c_normalized = c / float(np.sum(c))
 58 |     c_normalized = c_normalized[np.nonzero(c_normalized)]
 59 |     H = -sum(c_normalized* np.log2(c_normalized))  
 60 |     return H
 61 | 
 62 | def MI(S):
 63 |   bins = 10
 64 |   n = S.shape[1]
 65 |   mis=0
 66 |   count=0
 67 |   for ix in np.arange(n):
 68 |     for jx in np.arange(ix+1,n):
 69 |         mis = mis+calc_MI(S[:,ix], S[:,jx], bins)
 70 |         count=count+1
 71 |   mis=mis/count
 72 |   return mis
 73 | 
 74 | 
 75 | class tinyLayerE(Layer):
 76 |   def __init__(self, output_dim, u, bins, start_temp=10.0, min_temp=0.1, alpha=0.99999, **kwargs):
 77 |     self.output_dim=output_dim
 78 |     self.u=K.constant(u)
 79 |     self.start_temp = start_temp
 80 |     self.min_temp = K.constant(min_temp)
 81 |     self.alpha = K.constant(alpha)
 82 |     super(tinyLayerE, self).__init__(**kwargs)
 83 | 	
 84 |   def build(self,input_shape):
 85 |     self.temp = self.add_weight(name = 'temp', shape = [], initializer = Constant(self.start_temp), trainable = False)
 86 |     #self.sf = self.add_weight(name = 'sf', shape = [], initializer = Constant(1500), trainable = True)
 87 |     self.tinyW=self.add_weight(name='tinyW', shape=(bins,self.output_dim), initializer='uniform', trainable=True)
 88 |     super(tinyLayerE,self).build(input_shape)
 89 | 	
 90 |   def call(self, X, training = None):
 91 |     al=K.softmax(K.dot(self.u,self.tinyW))
 92 |     al=K.transpose(al) #al=K.transpose(al*K.one_hot(K.argmax(al),al.shape[1]))
 93 |     #al=(np.sqrt(2.0/(al.shape[0].value*al.shape[1].value)))*((al-K.mean(al))/K.std(al))
 94 |     logits=K.log(10*K.maximum(K.minimum(al,0.9999999),K.epsilon()))
 95 |     uniform = K.random_uniform(logits.shape, K.epsilon(), 1.0)
 96 |     gumbel = -K.log(-K.log(uniform))
 97 |     temp = K.update(self.temp, K.maximum(self.min_temp, self.temp * self.alpha))
 98 |     noisy_logits = (logits+gumbel) / temp
 99 |     samples = K.softmax(noisy_logits)
100 |     discrete_logits = K.one_hot(K.argmax(logits), logits.shape[1])
101 |     self.logits=samples
102 |     dl = np.zeros(self.logits.shape)
103 |     p = K.get_value(self.logits)
104 |     for i in range(dl.shape[0]):
105 |       ind = np.argmax(p, axis=None)
106 |       x=ind//dl.shape[1]
107 |       y=ind%dl.shape[1]
108 |       dl[x][y]=1
109 |       p[x]=-np.ones(dl.shape[1])
110 |       p[:,y]=-np.ones(dl.shape[0])
111 |       discrete_logits = K.one_hot(K.argmax(K.variable(dl)), dl.shape[1])
112 |     self.selections = K.in_train_phase(samples, discrete_logits, training)
113 |     Y = K.dot(X, K.transpose(self.selections))
114 |     return Y
115 | 
116 |   def compute_output_shape(self, input_shape):
117 |     return (input_shape[0], self.output_dim)
118 | 
119 | class tinyLayerD(Layer):
120 |   def __init__(self, output_dim, u, bins, **kwargs):
121 |     self.output_dim=output_dim
122 |     self.u=K.constant(u)
123 |     super(tinyLayerD, self).__init__(**kwargs)
124 |   def build(self,input_shape):
125 |     self.tinyW=self.add_weight(name='tinyW', shape=(bins, input_shape[1]), initializer='uniform', trainable=True)
126 |     super(tinyLayerD,self).build(input_shape)
127 | 	
128 |   def call(self, x):
129 |     weights=K.transpose(K.tanh(K.dot(self.u,self.tinyW)))
130 |     return K.dot(x,weights)
131 | 	
132 |   def compute_output_shape(self, input_shape):
133 |     return (input_shape[0], self.output_dim)
134 | 
135 | 
136 | # the data, split between train and test sets
137 | data=spio.loadmat(datafile)
138 | X=data['X']
139 | Y=data['Y']
140 | Y = to_categorical(Y)
141 | 
142 | #Normalization to N(0,1)ds123
143 | 
144 | X=np.delete(X,np.where(np.std(X,axis=0)==0),axis=1)
145 | for i in range(X.shape[1]):
146 |   if np.max(X[:,i])!=0:
147 |     X[:,i]=X[:,i]/np.max(np.absolute(X[:,i]))
148 |     mu_Xi=np.mean(X[:,i])
149 |     std_Xi=np.std(X[:,i])
150 |     X[:,i]=X[:,i]-mu_Xi
151 |     if std_Xi!=0:
152 |       X[:,i]=X[:,i]/std_Xi
153 | 
154 | for ii in range(0,num_exp):
155 |   idx=random.sample(range(0,X.shape[0]),round(X.shape[0]*0.5))
156 |   x_train=X[idx,:]
157 |   y_train=Y[idx,:]
158 |   x_test=np.delete(X,idx,0)
159 |   y_test=np.delete(Y,idx,0)
160 |   x_train = np.reshape(x_train, (len(x_train), -1))
161 |   x_test = np.reshape(x_test, (len(x_test), -1))
162 |   
163 |   u_train=np.zeros([x_train.shape[1],bins],dtype=float)
164 |   for i in range(0,x_train.shape[1]):
165 |     hist=np.histogram(x_train[:,i],bins)
166 |     for j in range(0,bins):
167 |       u_train[i,j]=hist[0][j]*0.5*(hist[1][j]+hist[1][j+1])
168 | 
169 |   steps_per_epoch = (len(x_train) + batch_size - 1) // batch_size
170 |   alpha = math.exp(math.log(min_temp / start_temp) / (num_epochs * steps_per_epoch))
171 | 
172 | 
173 |   ################################
174 |   # FsNet
175 |   ################################
176 | 
177 |   inp1=Input(shape=(x_train.shape[1],))
178 |   x=tinyLayerE(nfeat,u_train,bins,start_temp, min_temp, alpha, name = 'tinyLayerE')(inp1)
179 |   x = Dense(h_size*4)(x)
180 |   x = LeakyReLU(0.2)(x)
181 |   x = Dropout(0.2)(x)
182 |   x = Dense(h_size*2)(x)
183 |   x = LeakyReLU(0.2)(x)
184 |   x = Dropout(0.2)(x)
185 |   x = Dense(h_size)(x)
186 |   x = LeakyReLU(0.2)(x)
187 |   x = Dropout(0.2)(x)
188 |   x1 = Dense(h_size*2)(x)
189 |   x1 = LeakyReLU(0.2)(x1)
190 |   x1 = Dropout(0.2)(x1)
191 |   x1 = Dense(h_size*4)(x1)
192 |   x1 = LeakyReLU(0.2)(x1)
193 |   x1 = Dropout(0.2)(x1)
194 |   x1 = tinyLayerD(x_train.shape[1],u_train,bins,name = 'recon')(x1)
195 |   x2 = Dense(y_train.shape[1])(x)
196 |   x2 = Activation("softmax", name="classacc")(x2)
197 |   model = Model(inputs=inp1, outputs=[x1, x2])
198 |   model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights, metrics=["accuracy","mse"])
199 |   history = model.fit(x_train, {"recon": x_train, "classacc": y_train}, validation_data=(x_test, {"recon": x_test, "classacc": y_test}), epochs=num_epochs, verbose=1)
200 |   probabilities = K.get_value(K.softmax(model.get_layer('tinyLayerE').logits))
201 |   dl=np.zeros(model.get_layer('tinyLayerE').logits.shape)
202 |   p=K.get_value(model.get_layer('tinyLayerE').logits)
203 |   for j in range(dl.shape[0]):
204 |     ind=np.argmax(p,axis=None)
205 |     x=ind//dl.shape[1]
206 |     y=ind%dl.shape[1]
207 |     dl[x][y]=1
208 |     p[x]=-np.ones(dl.shape[1])
209 |     p[:,y]=-np.ones(dl.shape[0])
210 | 
211 |   indices = K.get_value(K.argmax(dl))
212 | 
213 |   hist_df = pd.DataFrame(history.history)
214 |   hist_csv_file = rd+ds+"_"+str(nfeat)+"_"+str(ii)+"_history.csv"
215 |   with open(hist_csv_file, mode='w') as f:
216 |     hist_df.to_csv(f)
217 |   spio.savemat(rd+ds+"_"+str(nfeat)+"_"+str(ii)+'_indices.mat', {'indices': indices})
218 | 
219 | 


--------------------------------------------------------------------------------