├── README.md └── exp.py /README.md: -------------------------------------------------------------------------------- 1 | # FsNet: Feature Selection Network on High-dimensional Biological Data 2 | 3 | Feature Selection Network (FsNet) is a scalable concrete neural network architecture for Wide data. Wide data consists of high-dimensional and small number of samples. 4 | Specifically, FsNet consists of a selector layer that uses a concrete random variable for discrete feature selection and a supervised deep neural network regularized with the reconstruction loss. 5 | Because a large number of parameters in the selector and reconstruction layer can easily cause overfitting under a limited number of samples, we use two tiny networks to predict the large virtual weight matrices of the selector and reconstruction layers. 6 | 7 | For more details, see the accompanying paper: ["FsNet: Feature Selection Network on High-dimensional Biological Data"](https://arxiv.org/abs/2001.08322), *arXive*, and please use the citation below. 8 | 9 | ``` 10 | @article{singh2020fsnet, 11 | title={FsNet: Feature Selection Network on High-dimensional Biological Data}, 12 | author={Dinesh Singh and Héctor Climente-González and Mathis Petrovich and Eiryo Kawakami and Makoto Yamada}, 13 | year={2020}, 14 | eprint={2001.08322}, 15 | archivePrefix={arXiv}, 16 | primaryClass={cs.LG} 17 | } 18 | ``` 19 | -------------------------------------------------------------------------------- /exp.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import math 4 | import keras 5 | from keras import backend as K 6 | #from keras.datasets import mnist 7 | from keras.models import Sequential, Model 8 | from keras.layers import Layer, Dense, Dropout, Input, LeakyReLU 9 | from keras.layers.core import Activation 10 | from keras.optimizers import RMSprop 11 | from keras.initializers import Constant, glorot_normal 12 | from keras.utils import to_categorical 13 | from keras.callbacks import EarlyStopping 14 | import numpy as np 15 | import scipy.io as spio 16 | import random 17 | import matplotlib.pyplot as plt 18 | import sys 19 | import pandas as pd 20 | 21 | dp=sys.argv[1] 22 | ds=sys.argv[2] 23 | h_size=int(sys.argv[3]) 24 | nfeat=int(sys.argv[4]) 25 | rd="eta100/" 26 | datafile=dp+ds 27 | 28 | num_exp=20 29 | num_epochs=6400 30 | bins=10 31 | batch_size=8 32 | start_temp=10.0 33 | min_temp=0.01 34 | lossWeights = {"recon":100, "classacc":1} 35 | losses = {"recon": "mean_squared_error", "classacc": "categorical_crossentropy",} 36 | opt=RMSprop(lr=0.001, decay=0.001/num_epochs) 37 | 38 | cacc=np.zeros(num_epochs) 39 | acc=np.zeros(num_epochs) 40 | closs=np.zeros(num_epochs) 41 | loss=np.zeros(num_epochs) 42 | cmi=0 43 | mi=0 44 | 45 | 46 | def calc_MI(X,Y,bins): 47 | c_XY = np.histogram2d(X,Y,bins)[0] 48 | c_X = np.histogram(X,bins)[0] 49 | c_Y = np.histogram(Y,bins)[0] 50 | H_X = shan_entropy(c_X) 51 | H_Y = shan_entropy(c_Y) 52 | H_XY = shan_entropy(c_XY) 53 | mi1 = H_X + H_Y - H_XY 54 | return mi1 55 | 56 | def shan_entropy(c): 57 | c_normalized = c / float(np.sum(c)) 58 | c_normalized = c_normalized[np.nonzero(c_normalized)] 59 | H = -sum(c_normalized* np.log2(c_normalized)) 60 | return H 61 | 62 | def MI(S): 63 | bins = 10 64 | n = S.shape[1] 65 | mis=0 66 | count=0 67 | for ix in np.arange(n): 68 | for jx in np.arange(ix+1,n): 69 | mis = mis+calc_MI(S[:,ix], S[:,jx], bins) 70 | count=count+1 71 | mis=mis/count 72 | return mis 73 | 74 | 75 | class tinyLayerE(Layer): 76 | def __init__(self, output_dim, u, bins, start_temp=10.0, min_temp=0.1, alpha=0.99999, **kwargs): 77 | self.output_dim=output_dim 78 | self.u=K.constant(u) 79 | self.start_temp = start_temp 80 | self.min_temp = K.constant(min_temp) 81 | self.alpha = K.constant(alpha) 82 | super(tinyLayerE, self).__init__(**kwargs) 83 | 84 | def build(self,input_shape): 85 | self.temp = self.add_weight(name = 'temp', shape = [], initializer = Constant(self.start_temp), trainable = False) 86 | #self.sf = self.add_weight(name = 'sf', shape = [], initializer = Constant(1500), trainable = True) 87 | self.tinyW=self.add_weight(name='tinyW', shape=(bins,self.output_dim), initializer='uniform', trainable=True) 88 | super(tinyLayerE,self).build(input_shape) 89 | 90 | def call(self, X, training = None): 91 | al=K.softmax(K.dot(self.u,self.tinyW)) 92 | al=K.transpose(al) #al=K.transpose(al*K.one_hot(K.argmax(al),al.shape[1])) 93 | #al=(np.sqrt(2.0/(al.shape[0].value*al.shape[1].value)))*((al-K.mean(al))/K.std(al)) 94 | logits=K.log(10*K.maximum(K.minimum(al,0.9999999),K.epsilon())) 95 | uniform = K.random_uniform(logits.shape, K.epsilon(), 1.0) 96 | gumbel = -K.log(-K.log(uniform)) 97 | temp = K.update(self.temp, K.maximum(self.min_temp, self.temp * self.alpha)) 98 | noisy_logits = (logits+gumbel) / temp 99 | samples = K.softmax(noisy_logits) 100 | discrete_logits = K.one_hot(K.argmax(logits), logits.shape[1]) 101 | self.logits=samples 102 | dl = np.zeros(self.logits.shape) 103 | p = K.get_value(self.logits) 104 | for i in range(dl.shape[0]): 105 | ind = np.argmax(p, axis=None) 106 | x=ind//dl.shape[1] 107 | y=ind%dl.shape[1] 108 | dl[x][y]=1 109 | p[x]=-np.ones(dl.shape[1]) 110 | p[:,y]=-np.ones(dl.shape[0]) 111 | discrete_logits = K.one_hot(K.argmax(K.variable(dl)), dl.shape[1]) 112 | self.selections = K.in_train_phase(samples, discrete_logits, training) 113 | Y = K.dot(X, K.transpose(self.selections)) 114 | return Y 115 | 116 | def compute_output_shape(self, input_shape): 117 | return (input_shape[0], self.output_dim) 118 | 119 | class tinyLayerD(Layer): 120 | def __init__(self, output_dim, u, bins, **kwargs): 121 | self.output_dim=output_dim 122 | self.u=K.constant(u) 123 | super(tinyLayerD, self).__init__(**kwargs) 124 | def build(self,input_shape): 125 | self.tinyW=self.add_weight(name='tinyW', shape=(bins, input_shape[1]), initializer='uniform', trainable=True) 126 | super(tinyLayerD,self).build(input_shape) 127 | 128 | def call(self, x): 129 | weights=K.transpose(K.tanh(K.dot(self.u,self.tinyW))) 130 | return K.dot(x,weights) 131 | 132 | def compute_output_shape(self, input_shape): 133 | return (input_shape[0], self.output_dim) 134 | 135 | 136 | # the data, split between train and test sets 137 | data=spio.loadmat(datafile) 138 | X=data['X'] 139 | Y=data['Y'] 140 | Y = to_categorical(Y) 141 | 142 | #Normalization to N(0,1)ds123 143 | 144 | X=np.delete(X,np.where(np.std(X,axis=0)==0),axis=1) 145 | for i in range(X.shape[1]): 146 | if np.max(X[:,i])!=0: 147 | X[:,i]=X[:,i]/np.max(np.absolute(X[:,i])) 148 | mu_Xi=np.mean(X[:,i]) 149 | std_Xi=np.std(X[:,i]) 150 | X[:,i]=X[:,i]-mu_Xi 151 | if std_Xi!=0: 152 | X[:,i]=X[:,i]/std_Xi 153 | 154 | for ii in range(0,num_exp): 155 | idx=random.sample(range(0,X.shape[0]),round(X.shape[0]*0.5)) 156 | x_train=X[idx,:] 157 | y_train=Y[idx,:] 158 | x_test=np.delete(X,idx,0) 159 | y_test=np.delete(Y,idx,0) 160 | x_train = np.reshape(x_train, (len(x_train), -1)) 161 | x_test = np.reshape(x_test, (len(x_test), -1)) 162 | 163 | u_train=np.zeros([x_train.shape[1],bins],dtype=float) 164 | for i in range(0,x_train.shape[1]): 165 | hist=np.histogram(x_train[:,i],bins) 166 | for j in range(0,bins): 167 | u_train[i,j]=hist[0][j]*0.5*(hist[1][j]+hist[1][j+1]) 168 | 169 | steps_per_epoch = (len(x_train) + batch_size - 1) // batch_size 170 | alpha = math.exp(math.log(min_temp / start_temp) / (num_epochs * steps_per_epoch)) 171 | 172 | 173 | ################################ 174 | # FsNet 175 | ################################ 176 | 177 | inp1=Input(shape=(x_train.shape[1],)) 178 | x=tinyLayerE(nfeat,u_train,bins,start_temp, min_temp, alpha, name = 'tinyLayerE')(inp1) 179 | x = Dense(h_size*4)(x) 180 | x = LeakyReLU(0.2)(x) 181 | x = Dropout(0.2)(x) 182 | x = Dense(h_size*2)(x) 183 | x = LeakyReLU(0.2)(x) 184 | x = Dropout(0.2)(x) 185 | x = Dense(h_size)(x) 186 | x = LeakyReLU(0.2)(x) 187 | x = Dropout(0.2)(x) 188 | x1 = Dense(h_size*2)(x) 189 | x1 = LeakyReLU(0.2)(x1) 190 | x1 = Dropout(0.2)(x1) 191 | x1 = Dense(h_size*4)(x1) 192 | x1 = LeakyReLU(0.2)(x1) 193 | x1 = Dropout(0.2)(x1) 194 | x1 = tinyLayerD(x_train.shape[1],u_train,bins,name = 'recon')(x1) 195 | x2 = Dense(y_train.shape[1])(x) 196 | x2 = Activation("softmax", name="classacc")(x2) 197 | model = Model(inputs=inp1, outputs=[x1, x2]) 198 | model.compile(optimizer=opt, loss=losses, loss_weights=lossWeights, metrics=["accuracy","mse"]) 199 | history = model.fit(x_train, {"recon": x_train, "classacc": y_train}, validation_data=(x_test, {"recon": x_test, "classacc": y_test}), epochs=num_epochs, verbose=1) 200 | probabilities = K.get_value(K.softmax(model.get_layer('tinyLayerE').logits)) 201 | dl=np.zeros(model.get_layer('tinyLayerE').logits.shape) 202 | p=K.get_value(model.get_layer('tinyLayerE').logits) 203 | for j in range(dl.shape[0]): 204 | ind=np.argmax(p,axis=None) 205 | x=ind//dl.shape[1] 206 | y=ind%dl.shape[1] 207 | dl[x][y]=1 208 | p[x]=-np.ones(dl.shape[1]) 209 | p[:,y]=-np.ones(dl.shape[0]) 210 | 211 | indices = K.get_value(K.argmax(dl)) 212 | 213 | hist_df = pd.DataFrame(history.history) 214 | hist_csv_file = rd+ds+"_"+str(nfeat)+"_"+str(ii)+"_history.csv" 215 | with open(hist_csv_file, mode='w') as f: 216 | hist_df.to_csv(f) 217 | spio.savemat(rd+ds+"_"+str(nfeat)+"_"+str(ii)+'_indices.mat', {'indices': indices}) 218 | 219 | --------------------------------------------------------------------------------