├── CNN_util.cu
├── LICENSE
├── MNIST_drive.cu
├── MNIST_labeling.cu
├── MNIST_labeling_2.cu
├── README.md
├── ROI_drive.cu
├── cifar10_reader.hpp
├── convolution_kernel.cu
├── data_check.cu
├── data_reader.cu
├── event_based_learning.cu
├── filter_util.cu
├── header.h
├── img_util.cu
├── inference_options.cu
├── learning_options.cu
├── main.cu
├── network_config_generator.cpp
├── read_neuron_list.cu
├── spiking_cnn_main.cu
├── spiking_learning_drive.cu
├── spiking_learning_main.cu
├── synapse_drive_cnn_v2.cu
└── write_neuron_list.cu


/CNN_util.cu:
--------------------------------------------------------------------------------
 1 | #include "header.h"
 2 | 
 3 | __global__ void read_array(float **d_instance_matrix_array, float **d_convolution_result_array){
 4 | 
 5 | 	printf("Reading array on GPU: ");
 6 | 	printf("%f \n", d_instance_matrix_array[1][1]);
 7 | 	printf("%f \n", d_convolution_result_array[0][0]);
 8 | }
 9 | 
10 | int CNN_util(CNN_struct *settings, float **d_instance_matrix_array, float **d_convolution_result_array, float **h_instance_matrix_array, float **h_convolution_result_array, int function_select){
11 | 
12 | 
13 | 	if (function_select==0){//initialize arrays
14 | 		//first initialize instance_matrix, this is the input to convolution kernel
15 | 		int instance_array_size = CNN_total_layer_num;
16 | 
17 | 		for (int i=0;i<instance_array_size;i++){
18 | 			int matrix_size = settings->layer[i].neuron_num;
19 | 			float *temp = new float[matrix_size];
20 | 			for(int j=0;j<matrix_size; j++) temp[j] = 0;
21 | 		    cudaMalloc((void **)&h_instance_matrix_array[i], matrix_size * sizeof(float));
22 | 		    printf("For layer %d Matrix size for h_instance_matrix_array is %d\n", i, matrix_size);
23 | 		    cudaMemcpy(h_instance_matrix_array[i], temp, matrix_size * sizeof(float), cudaMemcpyHostToDevice);
24 | 		}
25 | 		cudaMemcpy(d_instance_matrix_array, h_instance_matrix_array, instance_array_size* sizeof(float*), cudaMemcpyHostToDevice);
26 | 
27 | 
28 | 		int convolution_result_size = CNN_total_layer_num - 1;
29 | 
30 | 		for (int i=0;i<convolution_result_size;i++){
31 | 			//this is the output of convolution kernel
32 | 			int matrix_size = settings->layer[i+1].neuron_num;
33 | 			float *temp = new float[matrix_size];
34 | 			for(int j=0;j<matrix_size; j++) temp[j] = 0;
35 | 			cudaMalloc((void **)&h_convolution_result_array[i], matrix_size * sizeof(float));
36 | 		    printf("Between layer %d and %d Matrix size for h_convolution_result_array is %d\n", i, i+1, matrix_size);
37 | 			cudaMemcpy(h_convolution_result_array[i], temp, matrix_size * sizeof(float), cudaMemcpyHostToDevice);
38 | 		}
39 | 		cudaMemcpy(d_convolution_result_array, h_convolution_result_array, convolution_result_size*sizeof(float*), cudaMemcpyHostToDevice);
40 | 
41 | 
42 | 	}
43 | 
44 |     dim3 dimBlock(1, 1);
45 |     dim3 dimGrid(1, 1);
46 |     read_array<<<dimGrid, dimBlock>>>(d_instance_matrix_array, d_convolution_result_array);
47 | 
48 | 
49 | 	return 0;
50 | }
51 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Cognitive Anteater Robotics Laboratory @ University of Calfornia, Irvine
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/MNIST_drive.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | #include <stdio.h>
  7 | #include<time.h>
  8 | #include<device_functions.h>
  9 | #include<cuda.h>
 10 | #include<math.h>
 11 | 
 12 | using namespace std;
 13 | #define MNIST_img_width 28
 14 | #define MNIST_img_len 28
 15 | 
 16 | __global__ void sc2_update (Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int one_depth_size, int start, int end, int target, bool shifting, bool get_only_one){
 17 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 18 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 19 | 	if(index>=end||index<start){
 20 | 		return;
 21 | 	}
 22 | 
 23 | 	int signal_index = index-start;
 24 | 	if(Input_neuronlist[index].type == 4){
 25 | 		if(shifting){
 26 | 			if(index%one_depth_size!=0){
 27 | 				Input_neuronlist[index].state[1] = Input_neuronlist[index-1].state[1];
 28 | 			}
 29 | 		}
 30 | 		else{
 31 | 			if(index%one_depth_size==0) {
 32 | 				int idx = index/one_depth_size;
 33 | //				printf(" %d gets updated, idx: %d, for value: %2.0f |", index, idx, MNIST_stimulus_freq[target+idx]);
 34 | 				float temp_value = MNIST_stimulus_freq[target+idx];
 35 | 				if(get_only_one) temp_value = temp_value - Input_neuronlist[index].state[1];
 36 | 				//if(temp_value!=1.0) printf("\ %f %f /", MNIST_stimulus_freq[target+idx], Input_neuronlist[index].state[1]);
 37 | 				Input_neuronlist[index].state[1] = 20/(temp_value+0.1);//MNIST_stimulus_freq[target+idx];//500/(MNIST_stimulus_freq[target+idx]+0.1);
 38 | 			}
 39 | 		}
 40 | 
 41 | 
 42 | 
 43 | 	}
 44 | }
 45 | 
 46 | 
 47 | __global__ void time_seq_update_v2 (Input_neuron *Input_neuronlist, int network_size, int start, int end, int target){
 48 | 
 49 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 50 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 51 | 	if(index>=end||index<start){
 52 | 		return;
 53 | 	}
 54 | //	printf("Neuron:%d_isType:%d|",index,Input_neuronlist[index].type);
 55 | 
 56 | 	int signal_index = index-start;
 57 | 	if(Input_neuronlist[index].type == 4){
 58 | 		if(index==end-1) {
 59 | 			Input_neuronlist[index].state[1] = 500/(target+0.1);
 60 | 		}else{
 61 | 			Input_neuronlist[index].state[1] = Input_neuronlist[index+1].state[1];
 62 | 		}
 63 | 
 64 | //		printf("No.: %d, spike interval is: %f, RAW_freq is: %f \n",index, Input_neuronlist[index].state[1], MNIST_stimulus_freq[signal_index]);
 65 | 	}
 66 | }
 67 | 
 68 | __global__ void time_seq_update (Input_neuron *Input_neuronlist, int network_size, int start, int end, int target){
 69 | 
 70 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 71 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 72 | 	if(index>=end||index<start){
 73 | 		return;
 74 | 	}
 75 | //	printf("Neuron:%d_isType:%d|",index,Input_neuronlist[index].type);
 76 | 
 77 | 	int signal_index = index-start;
 78 | 	if(Input_neuronlist[index].type == 4){
 79 | 		if(index==target) {
 80 | 			Input_neuronlist[index].state[1] = 2;
 81 | 		}else{
 82 | 			if(Input_neuronlist[index].state[1]>0) Input_neuronlist[index].state[1] += 2;
 83 | 		}
 84 | 
 85 | //		printf("No.: %d, spike interval is: %f, RAW_freq is: %f \n",index, Input_neuronlist[index].state[1], MNIST_stimulus_freq[signal_index]);
 86 | 	}
 87 | }
 88 | 
 89 | __global__ void reset_stimulus (Input_neuron *Input_neuronlist, int network_size, int start, int end){
 90 | 
 91 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 92 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 93 | 	if(index>=end||index<start){
 94 | 		return;
 95 | 	}
 96 | //	printf("Neuron:%d_isType:%d|",index,Input_neuronlist[index].type);
 97 | 
 98 | 	int signal_index = index-start;
 99 | 	if(Input_neuronlist[index].type == 4){
100 | 		Input_neuronlist[index].state[2] = 0;
101 | 		Input_neuronlist[index].state[1] = 0;	//now this is period btw each spike
102 | 		Input_neuronlist[index].state[4] = 0;
103 | 		Input_neuronlist[index].state[3] = 0;
104 | //		printf("No.: %d, spike interval is: %f, RAW_freq is: %f \n",index, Input_neuronlist[index].state[1], MNIST_stimulus_freq[signal_index]);
105 | 	}
106 | }
107 | 
108 | __global__ void update_stimulus (Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int network_size, int start, int end){
109 | 
110 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
111 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
112 | 	if(index>=end||index<start){
113 | 		return;
114 | 	}
115 | //	printf("Neuron:%d_isType:%d|",index,Input_neuronlist[index].type);
116 | 
117 | 	int signal_index = index-start;
118 | 	if(Input_neuronlist[index].type == 4){
119 | 		Input_neuronlist[index].state[1] = 1000.0/(MNIST_stimulus_freq[signal_index]+0.0001);	//now this is period btw each spike
120 | 		if (Input_neuronlist[index].state[1]<=0) Input_neuronlist[index].state[1] = 1000;
121 | //		printf("No.: %d, spike interval is: %f, RAW_freq is: %f \n",index, Input_neuronlist[index].state[1], MNIST_stimulus_freq[signal_index]);
122 | 	}
123 | }
124 | 
125 | __global__ void turn_off_stimulus (Neuron *NeuronList, float *MNIST_stimulus_freq, int network_size, int start, int end){
126 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
127 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
128 | 	if(index>=end||index<start){
129 | 		return;
130 | 	}
131 | 	//int signal_index = index-start;
132 | 	if(NeuronList[index].type == 4){
133 | 		NeuronList[index].state[1] = 0;
134 | 		//printf("No.: %d, signal is: %f \n",index, NeuronList[index].state[0]);
135 | 	}
136 | }
137 | void image_to_in_phase_proptioanl_normalized_old(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){
138 | 	for (int img_i=0; img_i<training_set_number; ++img_i){
139 | 		int start_index = img_i*pixel_number;
140 | 		float sum = 0, mean, stdv=0.0;
141 | 		int i;
142 | 		for (i=0; i<pixel_number; ++i) sum += image[start_index+i];
143 | 		mean = sum/pixel_number;
144 | 		for (i=0; i<pixel_number; ++i) stdv += pow((image[start_index+i]-mean), 2);
145 | 		for (i=0; i<pixel_number; ++i) MNIST_stimulus_freq[start_index+i]=(image[start_index+i]-mean)/stdv;
146 | 		float min = 1000000000000;
147 | 		for (i=0; i<pixel_number; ++i){
148 | 			if(min>MNIST_stimulus_freq[start_index+i]) min=MNIST_stimulus_freq[start_index+i];
149 | 		}
150 | 		for (i=0; i<pixel_number; ++i)MNIST_stimulus_freq[start_index+i]=(MNIST_stimulus_freq[start_index+i]+min);
151 | 		for(i=0;i<pixel_number;i++){
152 | 			MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency;
153 | 			printf(" %f|", MNIST_stimulus_freq[start_index+i]);
154 | 		}
155 | 
156 | 		printf("\n \n");
157 | 	}
158 | 
159 | //    for (int y=0; y<28; ++y) {
160 | //    	    for (int x=0; x<28; ++x) {
161 | //    	      //std::cout << ((one_mnist_img[y*28+x] == 0.0)? ' ' : '*');
162 | //    	      std::cout << std::to_string((MNIST_stimulus_freq[y*28+x])) << ' ';
163 | //    	    }
164 | //    	    std::cout << std::endl;
165 | //    }
166 | //    cout<<"#############inphasepro#############"<<endl;
167 | 
168 | }
169 | 
170 | void image_to_in_phase_proptioanl_normalized(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){
171 | 	double total_sum[input_image_channel];
172 | 	for(int i=0; i<input_image_channel; i++) total_sum[i] = 0;
173 | 	double total_mean[input_image_channel];
174 | 	int per_channel_pixel = pixel_number/input_image_channel;
175 | 	int channel_flag = -1;
176 | 	for(int tot_i=0;tot_i<pixel_number*training_set_number;tot_i++){
177 | 		if(tot_i%per_channel_pixel==0) channel_flag ++;
178 | 		if (channel_flag>=input_image_channel) channel_flag = 0;
179 | 		total_sum[channel_flag] += image[tot_i];
180 | 	}
181 | 	double denominator = (training_set_number*pixel_number/input_image_channel);
182 | 	printf("====Normalization Per Channel Used======: ");
183 | 
184 | 	for(int i=0; i<input_image_channel; i++){
185 | 		total_mean[i] = total_sum[i]/denominator;
186 | 		printf("total_mean: %f ", total_mean[i]);
187 | 	}
188 | 
189 | //	printf("====Manually override the total pixel number at image to frequency======: ");
190 | //	total_mean = total_sum/training_set_number/28/28;
191 | 
192 | 	for (int img_i=0; img_i<training_set_number; ++img_i){
193 | 		int start_index = img_i*pixel_number;
194 | 		double sum[input_image_channel];
195 | 		for(int i=0; i<input_image_channel; i++) sum[i] = 0;
196 | 		double mean[input_image_channel];
197 | 		double stdv=0.0;
198 | 		int i;
199 | 		int channel_flag = -1;
200 | 		for (i=0; i<pixel_number; ++i) {
201 | 			if(i%per_channel_pixel==0) channel_flag ++;
202 | 			if (channel_flag>=input_image_channel) channel_flag = 0;
203 | 			sum[channel_flag] += image[start_index+i];
204 | 		}
205 | 		for(int channel_i=0; channel_i<input_image_channel; channel_i++) mean[channel_i] = sum[channel_i]/(pixel_number/input_image_channel);
206 | 		//float ratio = total_mean/mean;
207 | //		printf("Ratio %f|", ratio);
208 | 		channel_flag = -1;
209 | 		for (i=0; i<pixel_number; ++i){
210 | 			if(i%per_channel_pixel==0) channel_flag ++;
211 | 			if (channel_flag>=input_image_channel) channel_flag = 0;
212 | 			MNIST_stimulus_freq[start_index+i]=(image[start_index+i]*(total_mean[channel_flag]/mean[channel_flag]));
213 | 			MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency;
214 | 			//channel_flag ++;
215 | 
216 | 		}
217 | //		for(i=0;i<pixel_number;i++){
218 | //			MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency;
219 | //			if(MNIST_stimulus_freq[start_index+i]>10) printf(" %f|", MNIST_stimulus_freq[start_index+i]);
220 | //		}
221 | 		//printf("\n \n");
222 | 	}
223 | }
224 | 
225 | void image_to_in_phase_proptioanl_normalized_0_1(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){
226 | 	float total_sum = 0.0, total_mean;
227 | 	float global_min = 1000000;
228 | 	float global_max = -1000000;
229 | 	for(int tot_i=0;tot_i<pixel_number*training_set_number;tot_i++){
230 | 		if(image[tot_i]>global_max) global_max = image[tot_i];
231 | 		if(image[tot_i]<global_min) global_min = image[tot_i];
232 | 		total_sum += image[tot_i];
233 | 	}
234 | 	float global_gap = global_max - global_min;
235 | 	cout<<"global max: "<<global_max<<" and global min: "<<global_min<<endl;
236 | 	total_mean = total_sum/(training_set_number*pixel_number);
237 | 	for (int img_i=0; img_i<training_set_number; ++img_i){
238 | 		int start_index = img_i*pixel_number;
239 | 		float sum = 0, mean, stdv=0.0;
240 | 		int i;
241 | 		for (i=0; i<pixel_number; ++i) sum += image[start_index+i];
242 | 		mean = sum/pixel_number;
243 | 		float ratio = total_mean/mean;
244 | 		//sbprintf("Ration %f|", ratio);
245 | 		for (i=0; i<pixel_number; ++i)MNIST_stimulus_freq[start_index+i]=(image[start_index+i]-global_min)/global_gap+0;
246 | 		for(i=0;i<pixel_number;i++){
247 | 			MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency;
248 | //			printf(" %f|", MNIST_stimulus_freq[start_index+i]);
249 | 		}
250 | 		//printf("\n \n");
251 | 	}
252 | }
253 | 
254 | void image_to_in_phase_proptioanl_normalized_imagenet(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){
255 | 
256 | 
257 | 	for(int i=0;i<pixel_number*training_set_number;i++){
258 | 
259 | 		if(image[i]>2) image[i]=2;
260 | 		if(image[i]<-2) image[i]=-2;
261 | 
262 | 		MNIST_stimulus_freq[i] = (max_frequency-min_frequency)/2+((max_frequency-min_frequency)/4)*image[i];
263 | //		printf(" %f|", MNIST_stimulus_freq[i]);
264 | 	}
265 | 
266 | //    for (int y=0; y<28; ++y) {
267 | //    	    for (int x=0; x<28; ++x) {
268 | //    	      //std::cout << ((one_mnist_img[y*28+x] == 0.0)? ' ' : '*');
269 | //    	      std::cout << std::to_string((MNIST_stimulus_freq[y*28+x])) << ' ';
270 | //    	    }
271 | //    	    std::cout << std::endl;
272 | //    }
273 | //    cout<<"#############inphasepro#############"<<endl;
274 | 
275 | }
276 | 
277 | void image_to_in_phase_proptioanl_normalized_cifar(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){
278 | 	float mean[3] = {0.4914, 0.4822, 0.4465};
279 | 	float std[3] = {0.247, 0.243, 0.261};
280 | 	int per_channel_pixel = pixel_number/input_image_channel;
281 | 	printf("====Normalization For CIFAR-10 Used======");
282 | 	for (int img_i=0; img_i<training_set_number; ++img_i){
283 | 		int start_index = img_i*pixel_number;
284 | 		int i;
285 | 		int channel_flag = -1;
286 | 
287 | 		//float ratio = total_mean/mean;
288 | //		printf("Ratio %f|", ratio);
289 | 		channel_flag = -1;
290 | 		for (i=0; i<pixel_number; ++i){
291 | 			if(i%per_channel_pixel==0) channel_flag ++;
292 | 			if (channel_flag>=input_image_channel) channel_flag = 0;
293 | 			MNIST_stimulus_freq[start_index+i]=(image[start_index+i]-mean[channel_flag])/std[channel_flag];
294 | 			MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency;
295 | 			//channel_flag ++;
296 | 		}
297 | //		for(i=0;i<pixel_number;i++){
298 | //			MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency;
299 | //			if(MNIST_stimulus_freq[start_index+i]>10) printf(" %f|", MNIST_stimulus_freq[start_index+i]);
300 | //		}
301 | 		//printf("\n \n");
302 | 	}
303 | 
304 | }
305 | 
306 | void image_to_in_phase_proptioanl(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){
307 | 	printf("Not using input normalization___!!");
308 | 	for(int i=0;i<pixel_number*training_set_number;i++){
309 | 		MNIST_stimulus_freq[i] = (max_frequency-min_frequency)*image[i]+min_frequency;
310 | 		//printf(" %f|", MNIST_stimulus_freq[i]);
311 | 	}
312 | 
313 | //    for (int y=0; y<28; ++y) {
314 | //    	    for (int x=0; x<28; ++x) {
315 | //    	      //std::cout << ((one_mnist_img[y*28+x] == 0.0)? ' ' : '*');
316 | //    	      std::cout << std::to_string((MNIST_stimulus_freq[y*28+x])) << ' ';
317 | //    	    }
318 | //    	    std::cout << std::endl;
319 | //    }
320 | //    cout<<"#############inphasepro#############"<<endl;
321 | 
322 | }
323 | 
324 | 
325 | void MNIST_drive(Neuron *NeuronList, Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int network_size, int training_set_number, int start, int end, float max_frequency, float min_frequency, int function_select){
326 | 	if(function_select == -2){//change raw img data to frequency_signal, imagenet norm
327 | 		int pixel_number = input_image_w*input_image_l*input_image_channel;
328 | 		float *old_img = MNIST_stimulus_freq;
329 | 		image_to_in_phase_proptioanl_normalized_imagenet(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number);
330 | 	}
331 | 
332 | 	if(function_select == -1){//change raw img data to frequency_signal
333 | 		int pixel_number = input_image_w*input_image_l*input_image_channel;
334 | 		float *old_img = MNIST_stimulus_freq;
335 | 		image_to_in_phase_proptioanl(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number);
336 | 	}
337 | 
338 | 	if(function_select==0){//update signal
339 | 
340 | 		int SIZE_PER_SIDE = sqrt(network_size)+1;
341 | 		dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
342 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
343 | 
344 | 		float *MNIST_stimulus_freq_device;
345 | 		int signal_size = input_image_w*input_image_l*input_image_channel;
346 | 		cudaMalloc((void **)&MNIST_stimulus_freq_device, signal_size*sizeof(float));
347 | 		cudaMemcpy(MNIST_stimulus_freq_device, MNIST_stimulus_freq,signal_size*sizeof(float),cudaMemcpyHostToDevice);
348 | 
349 | 		update_stimulus<<<dimGrid, dimBlock>>>(Input_neuronlist, MNIST_stimulus_freq_device, network_size, start, end);
350 | 		cudaDeviceSynchronize();
351 | 		//cudaFree(signal_device);
352 | 		cudaFree(MNIST_stimulus_freq_device);
353 | 	}
354 | 
355 | 	if(function_select == 1){//change raw img data to frequency_signal
356 | 		int pixel_number = input_image_w*input_image_l*input_image_channel;
357 | 
358 | 		float *old_img = MNIST_stimulus_freq;
359 | 		//image_to_in_phase_proptioanl_normalized_cifar(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number);
360 | 		image_to_in_phase_proptioanl(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number);
361 | 		//image_to_in_phase_proptioanl_normalized(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number);
362 | 	}
363 | 
364 | 	if(function_select == 2){//reset all input frequency
365 | 		int SIZE_PER_SIDE = sqrt(network_size)+1;
366 | 		dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
367 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
368 | 		int signal_size = input_image_w*input_image_l*input_image_channel;
369 | 		reset_stimulus <<<dimGrid, dimBlock>>>(Input_neuronlist, network_size, start, end);
370 | 		cudaDeviceSynchronize();
371 | 	}
372 | 
373 | }
374 | 
375 | 
376 | 
377 | 
378 | void MNIST_drive(Neuron *NeuronList, Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int network_size, int training_set_number, int start, int end, float max_frequency, float min_frequency, int function_select, int target){
379 | 	if(function_select == 1){//for sc2 sequence
380 | 		int SIZE_PER_SIDE = sqrt(network_size)+1;
381 | 		dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
382 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
383 | //        cout<<"current target: "<<target<<endl;
384 |         int one_depth_size = input_image_w*input_image_l;
385 | 		sc2_update<<<dimGrid, dimBlock>>>(Input_neuronlist, MNIST_stimulus_freq, one_depth_size, start, end, target, True, False);
386 | 		sc2_update<<<dimGrid, dimBlock>>>(Input_neuronlist, MNIST_stimulus_freq, one_depth_size, start, end, target, False, False);
387 | 		cudaDeviceSynchronize();
388 | 	}
389 | 
390 | 
391 | 	if(function_select == 2){//reset all input frequency
392 | 		//printf("resetting all input neurons\n");
393 | 		int SIZE_PER_SIDE = sqrt(network_size)+1;
394 | 		dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
395 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
396 | 		int signal_size = input_image_w*input_image_l*input_image_channel;
397 | //		cout<<"start: "<<start<<", end: "<<end<<endl;
398 | 		reset_stimulus <<<dimGrid, dimBlock>>>(Input_neuronlist, network_size, start, end);
399 | 		cudaDeviceSynchronize();
400 | 	}
401 | 
402 | 	if(function_select == 3){//for time sequence, update one frequency
403 | 		int SIZE_PER_SIDE = sqrt(network_size)+1;
404 | 		dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
405 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
406 | 		int signal_size = input_image_w*input_image_l*input_image_channel;
407 | 		time_seq_update_v2<<<dimGrid, dimBlock>>>(Input_neuronlist, network_size, start, end, target);
408 | 		cudaDeviceSynchronize();
409 | 	}
410 | 
411 | 	if(function_select == 4){//for debug: print out input neuron
412 | 		int input_neuron_num = input_image_w*input_image_l*input_image_channel;
413 | 	    Input_neuron *Input_neuronlist_host = new Input_neuron[input_neuron_num];
414 | 	    cudaMemcpy(Input_neuronlist_host,Input_neuronlist,input_neuron_num*sizeof(Input_neuron),cudaMemcpyDeviceToHost);
415 | 	    cout<<endl<<"check for input neuron: "<<endl;
416 | 	    for(int i=0; i<input_image_channel; i++){
417 | 	    	for(int j=0; j<input_image_w*input_image_l; j++){
418 | 	    		int idx = i*input_image_w*input_image_l+j;
419 | 	    		cout<<(int)Input_neuronlist_host[idx].state[1]<<" ";
420 | 
421 | 	    	}
422 | 	    	cout<<endl;
423 | 	    }
424 | 	    delete[] Input_neuronlist_host;
425 | 		cudaDeviceSynchronize();
426 | 	}
427 | 
428 | 
429 | }
430 | 
431 | 


--------------------------------------------------------------------------------
/MNIST_labeling.cu:
--------------------------------------------------------------------------------
 1 | #include "header.h"
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <fstream>
 5 | #include<stdlib.h>
 6 | #include <stdio.h>
 7 | #include<time.h>
 8 | #include<device_functions.h>
 9 | #include<cuda.h>
10 | #include<math.h>
11 | 
12 | using namespace std;
13 | 
14 | 
15 | void find_max(){
16 | 
17 | }
18 | 
19 | void read_by_type(float *mnist_img, int *mnist_label, float *output_array, int type, int total_num, int *result_size){
20 | 	int count = 0;
21 | 	int total_pixel = 28*28;
22 | 
23 | 	for(int i=0;i<total_num;i++){
24 | 		if (mnist_label[i]==type){
25 |     		for(int j=0;j<total_pixel;j++){
26 |     			output_array[count*total_pixel+j] = mnist_img[i*total_pixel+j];
27 |     		}
28 | 			count ++;
29 | 		}
30 | 
31 | 	}
32 | 
33 | 	result_size[0] = count;
34 | 	printf("number_of_%d_img_is:%d\n",type, count);
35 | }
36 | 
37 | 
38 | 
39 | 
40 | void MNIST_labeling(string input_file_starter, int size, float *input_array_1, int *input_array_2, float *output_array_1, int *output_array_2, int main_neuron_num, int function_select, int function_select_2){
41 | 
42 | 	if (function_select == 0){
43 | 		int *fire_count = new int[main_neuron_num];
44 | 		string output_file_name = "MNIST_labeled_data.csv";
45 | 	}else if (function_select == 1){
46 | 		read_by_type(input_array_1, input_array_2, output_array_1, function_select_2, size, output_array_2);
47 | 	}
48 | 
49 | }
50 | 


--------------------------------------------------------------------------------
/MNIST_labeling_2.cu:
--------------------------------------------------------------------------------
 1 | #include "header.h"
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <fstream>
 5 | #include<stdlib.h>
 6 | #include <stdio.h>
 7 | #include<time.h>
 8 | #include<device_functions.h>
 9 | #include<cuda.h>
10 | #include<math.h>
11 | 
12 | using namespace std;
13 | #define MNIST_img_width 28
14 | #define MNIST_img_len 28
15 | 
16 | __global__ void init_v (float* output_v, int output_neuron_size){
17 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
18 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
19 | 	if(index>output_neuron_size){
20 | 		return;
21 | 	}
22 | 	output_v[index] = 0;
23 | }
24 | 
25 | 
26 | __global__ void calculate_v (Neuron *NeuronList, float *img_raw, float *output_v, int output_neuron_size){
27 | 	//printf("gpu(update_synapse_counter)");
28 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
29 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
30 | 	if(index>output_neuron_size){
31 | 		return;
32 | 	}
33 | 	int i = 0;
34 | 	while(NeuronList[index].connected_in[i] > 0.1){
35 | 		output_v[index] = output_v[index] + NeuronList[index].connected_weight[i]*img_raw[i]*255;
36 | 
37 | 		i++;
38 | 	}
39 | 	//printf("index:%d_added value is: %f\n", index, output_v[index]);
40 | 
41 | 
42 | }
43 | void MNIST_labeling_2(Neuron *NeuronList, float *img_raw, float *output_v, int output_neuron_size){
44 | 
45 | 	int SIZE_PER_SIDE = sqrt(output_neuron_size)+1;
46 | 	dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
47 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
48 | 
49 | 	float *MNIST_stimulus_freq_device;
50 | 
51 | 	int signal_size = MNIST_img_width*MNIST_img_len;
52 | 
53 | 	cudaMalloc((void **)&MNIST_stimulus_freq_device, signal_size*sizeof(float));
54 | 	cudaMemcpy(MNIST_stimulus_freq_device, img_raw, signal_size*sizeof(float), cudaMemcpyHostToDevice);
55 | 
56 | 	int run_time = 2;
57 | 	init_v<<<dimGrid, dimBlock>>>(output_v, output_neuron_size);
58 | 
59 | 	for(int i=0;i<run_time;i++){
60 | 		calculate_v<<<dimGrid, dimBlock>>>(NeuronList, MNIST_stimulus_freq_device, output_v, output_neuron_size);
61 | 	}
62 | 
63 | 
64 | 	cudaDeviceSynchronize();
65 | 	//cudaFree(signal_device);
66 | 	cudaFree(MNIST_stimulus_freq_device);
67 | 
68 | 
69 | }
70 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ParallelSpikeSim (PSS)
 2 | 
 3 | ParallelSpikeSim (PSS) is a GPU accelerated spiking neural network simulator. 
 4 | 
 5 | PSS is first introduced in [paper](https://ieeexplore.ieee.org/abstract/document/8714846) (Author: Xueyuan She, Yun Long and Saibal Mukhopadhyay). Since the introduction, some more functions are added to PSS, including:
 6 | 
 7 | - Spiking convolutional neural network
 8 | - Frequency-dependent STDP
 9 | - Process in memory (ReRAM) hardware simulation
10 | - Heterogeneous Spiking Neural Network
11 | 
12 | If you use PSS in your work, please cite this [paper](https://ieeexplore.ieee.org/abstract/document/8714846).
13 | 
14 | ## Support of Heterogeneous Spiking Neural Network
15 | Heterogeneous Spiking Neural Network (H-SNN) as described in [paper](https://www.frontiersin.org/articles/10.3389/fnins.2020.615756/full?&utm_source=Email_to_authors_&utm_medium=Email&utm_content=T1_11.5e1_author&utm_campaign=Email_publication&field=&journalName=Frontiers_in_Neuroscience&id=615756) (Author: Xueyuan She, Saurabh Dash, Daehyun Kim and Saibal Mukhopadhyay), is now supported. To run a learning example, choose option 7 upon start up. For inference, use option 8.
16 | 
17 | ## Compile
18 | Include options: boost_1_66_0, opencv4, CImg-2.9.2_pre072920
19 | 
20 | library options (-l): opencv_highgui, opencv_imgcodecs, cudadevert, cublas, curand, boost_system, boost_filesystem, cudnn, opencv_imgproc, opencv_core
21 | 
22 | The latest release was tested on Ubuntu 18
23 | 
24 | ## Prerequisites
25 | - CUDA Toolkit 10.0
26 | - A GPU with compute capability 5.0 or higher


--------------------------------------------------------------------------------
/ROI_drive.cu:
--------------------------------------------------------------------------------
 1 | #include "header.h"
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <fstream>
 5 | #include<stdlib.h>
 6 | #include <stdio.h>
 7 | #include<time.h>
 8 | //#include<device_functions.h>
 9 | #include<cuda.h>
10 | #include<math.h>
11 | 
12 | using namespace std;
13 | 
14 | #define img_width 64
15 | #define img_len 64
16 | 
17 | //This function is used to assign image signal values to input neuron
18 | 
19 | __global__ void switch_off_input (Neuron *NeuronList, int network_size, int start, int end){
20 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
21 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
22 | 	if(index>end||index<start){
23 | 		return;
24 | 	}
25 | 	if(NeuronList[index].type == 4){
26 | 		NeuronList[index].state[2] = 0;
27 | 	}
28 | }
29 | 
30 | __global__ void switch_on_input (Neuron *NeuronList, int network_size, int start, int end){
31 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
32 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
33 | 	if(index>end||index<start){
34 | 		return;
35 | 	}
36 | 	if(NeuronList[index].type == 4){
37 | 		NeuronList[index].state[2] = 1;
38 | 	}
39 | }
40 | 
41 | __global__ void update_signal (Neuron *NeuronList, float *signal, int network_size, int start, int end){
42 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
43 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
44 | 	if(index>end||index<start){
45 | 		return;
46 | 	}
47 | 	int signal_index = index-start;
48 | 	if(NeuronList[index].type == 4){
49 | 
50 | 		NeuronList[index].state[0] = signal[signal_index];
51 | 		//printf("No.: %d, signal is: %f \n",index, NeuronList[index].state[0]);
52 | 	}
53 | }
54 | 
55 | void ROI_drive(Neuron *NeuronList, float* image_signal, int network_size, int start_index, int end_index, int function_select){
56 | 
57 | 	int SIZE_PER_SIDE = sqrt(network_size)+1;
58 | 	dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
59 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
60 | 	float *signal_device;
61 | 
62 | 	if(function_select==0){//update signal
63 | 		int signal_size = img_width*img_len*3;
64 | 
65 | 		cudaMalloc((void **)&signal_device, signal_size*sizeof(float));
66 | 		cudaMemcpy(signal_device, image_signal,signal_size*sizeof(float),cudaMemcpyHostToDevice);
67 | 
68 | 		update_signal<<<dimGrid, dimBlock>>>(NeuronList, signal_device, network_size, start_index, end_index);
69 | 		cudaDeviceSynchronize();
70 | 		//cudaFree(signal_device);
71 | 	}
72 | 	else if(function_select==1){//switch on
73 | 		switch_on_input<<<dimGrid, dimBlock>>>(NeuronList, network_size, start_index, end_index);
74 | 
75 | 	}
76 | 	else if(function_select==2){//switch off
77 | 		switch_off_input<<<dimGrid, dimBlock>>>(NeuronList, network_size, start_index, end_index);
78 | 	}
79 | 	//delete[] image_signal;
80 | 	cudaFree(signal_device);
81 | }
82 | 


--------------------------------------------------------------------------------
/cifar10_reader.hpp:
--------------------------------------------------------------------------------
  1 | //=======================================================================
  2 | // Copyright (c) 2017 Baptiste Wicht
  3 | // Distributed under the terms of the MIT License.
  4 | // (See accompanying file LICENSE or copy at
  5 | //  http://opensource.org/licenses/MIT)
  6 | //=======================================================================
  7 | 
  8 | /*!
  9 |  * \file
 10 |  * \brief Contains functions to read the CIFAR-10 dataset
 11 |  */
 12 | 
 13 | #ifndef CIFAR10_READER_HPP
 14 | #define CIFAR10_READER_HPP
 15 | 
 16 | #include <fstream>
 17 | #include <iostream>
 18 | #include <string>
 19 | #include <vector>
 20 | #include <cstdint>
 21 | #include <memory>
 22 | 
 23 | namespace cifar {
 24 | 
 25 | /*!
 26 |  * \brief Represents a complete CIFAR10 dataset
 27 |  * \tparam Container The container to use
 28 |  * \tparam Image The type of image
 29 |  * \tparam Label The type of label
 30 |  */
 31 | template <template <typename...> class Container, typename Image, typename Label>
 32 | struct CIFAR10_dataset {
 33 |     Container<Image> training_images; ///< The training images
 34 |     Container<Image> test_images;     ///< The test images
 35 |     Container<Label> training_labels; ///< The training labels
 36 |     Container<Label> test_labels;     ///< The test labels
 37 | 
 38 |     /*!
 39 |      * \brief Resize the training set to new_size
 40 |      *
 41 |      * If new_size is less than the current size, this function has no effect.
 42 |      *
 43 |      * \param new_size The size to resize the training sets to.
 44 |      */
 45 |     void resize_training(std::size_t new_size) {
 46 |         if (training_images.size() > new_size) {
 47 |             training_images.resize(new_size);
 48 |             training_labels.resize(new_size);
 49 |         }
 50 |     }
 51 | 
 52 |     /*!
 53 |      * \brief Resize the test set to new_size
 54 |      *
 55 |      * If new_size is less than the current size, this function has no effect.
 56 |      *
 57 |      * \param new_size The size to resize the test sets to.
 58 |      */
 59 |     void resize_test(std::size_t new_size) {
 60 |         if (test_images.size() > new_size) {
 61 |             test_images.resize(new_size);
 62 |             test_labels.resize(new_size);
 63 |         }
 64 |     }
 65 | };
 66 | 
 67 | /*!
 68 |  * \brief Read a CIFAR 10 data file inside the given containers
 69 |  * \param images The container to fill with the labels
 70 |  * \param path The path to the label file
 71 |  * \param limit The maximum number of elements to read (0: no limit)
 72 |  */
 73 | template <typename Images, typename Labels, typename Func>
 74 | void read_cifar10_file(Images& images, Labels& labels, const std::string& path, std::size_t limit, Func func) {
 75 |     if(limit && limit <= images.size()){
 76 |         return;
 77 |     }
 78 | 
 79 |     std::ifstream file;
 80 |     file.open(path, std::ios::in | std::ios::binary | std::ios::ate);
 81 | 
 82 |     if (!file) {
 83 |         std::cout << "Error opening file: " << path << std::endl;
 84 |         return;
 85 |     }
 86 | 
 87 |     auto file_size = file.tellg();
 88 |     std::unique_ptr<char[]> buffer(new char[file_size]);
 89 | 
 90 |     //Read the entire file at once
 91 |     file.seekg(0, std::ios::beg);
 92 |     file.read(buffer.get(), file_size);
 93 |     file.close();
 94 | 
 95 |     std::size_t start = images.size();
 96 | 
 97 |     size_t size = 10000;
 98 |     size_t capacity = limit - images.size();
 99 | 
100 |     if(capacity > 0 && capacity < size){
101 |         size = capacity;
102 |     }
103 | 
104 |     // Prepare the size for the new
105 |     images.reserve(images.size() + size);
106 |     labels.resize(labels.size() + size);
107 | 
108 |     for(std::size_t i = 0; i < size; ++i){
109 |         labels[start + i] = buffer[i * 3073];
110 | 
111 |         images.push_back(func());
112 | 
113 |         for(std::size_t j = 1; j < 3073; ++j){
114 |             images[start + i][j - 1] = buffer[i * 3073 + j];
115 |         }
116 |     }
117 | }
118 | 
119 | /*!
120 |  * \brief Read all test data.
121 |  *
122 |  * The dataset is assumed to be in a cifar-10 subfolder
123 |  *
124 |  * \param limit The maximum number of elements to read (0: no limit)
125 |  * \param func The functor to create the image objects.
126 |  */
127 | template <typename Images, typename Labels, typename Functor>
128 | void read_test(const std::string& folder, std::size_t limit, Images& images, Labels& labels, Functor func) {
129 |     read_cifar10_file(images, labels, folder + "/test_batch.bin", limit, func);
130 | }
131 | 
132 | /*!
133 |  * \brief Read all training data
134 |  *
135 |  * The dataset is assumed to be in a cifar-10 subfolder
136 |  *
137 |  * \param limit The maximum number of elements to read (0: no limit)
138 |  * \param func The functor to create the image objects.
139 |  */
140 | template <typename Images, typename Labels, typename Functor>
141 | void read_training(const std::string& folder, std::size_t limit, Images& images, Labels& labels, Functor func) {
142 |     read_cifar10_file(images, labels, folder + "/data_batch_1.bin", limit, func);
143 |     read_cifar10_file(images, labels, folder + "/data_batch_2.bin", limit, func);
144 |     read_cifar10_file(images, labels, folder + "/data_batch_3.bin", limit, func);
145 |     read_cifar10_file(images, labels, folder + "/data_batch_4.bin", limit, func);
146 |     read_cifar10_file(images, labels, folder + "/data_batch_5.bin", limit, func);
147 | }
148 | 
149 | /*!
150 |  * \brief Read all test data.
151 |  *
152 |  * The dataset is assumed to be in a cifar-10 subfolder
153 |  *
154 |  * \param limit The maximum number of elements to read (0: no limit)
155 |  * \param func The functor to create the image objects.
156 |  */
157 | template <typename Images, typename Labels, typename Functor>
158 | void read_test(std::size_t limit, Images& images, Labels& labels, Functor func) {
159 |     read_test("cifar-10/cifar-10-batches-bin", limit, images, labels, func);
160 | }
161 | 
162 | /*!
163 |  * \brief Read all training data
164 |  *
165 |  * The dataset is assumed to be in a cifar-10 subfolder
166 |  *
167 |  * \param limit The maximum number of elements to read (0: no limit)
168 |  * \param func The functor to create the image objects.
169 |  */
170 | template <typename Images, typename Labels, typename Functor>
171 | void read_training(std::size_t limit, Images& images, Labels& labels, Functor func) {
172 |     read_training("cifar-10/cifar-10-batches-bin", limit, images, labels, func);
173 | }
174 | 
175 | /*!
176 |  * \brief Read a CIFAR 10 data file inside the given containers
177 |  * \param images The container to fill with the labels
178 |  * \param path The path to the label file
179 |  * \param limit The maximum number of elements to read (0: no limit)
180 |  */
181 | template <typename Images, typename Labels>
182 | void read_cifar10_file_categorical(Images& images, Labels& labels, const std::string& path, std::size_t limit, size_t start) {
183 |     if(limit && limit <= start){
184 |         return;
185 |     }
186 | 
187 |     std::ifstream file;
188 |     file.open(path, std::ios::in | std::ios::binary | std::ios::ate);
189 | 
190 |     if (!file) {
191 |         std::cout << "Error opening file: " << path << std::endl;
192 |         return;
193 |     }
194 | 
195 |     auto file_size = file.tellg();
196 |     std::unique_ptr<char[]> buffer(new char[file_size]);
197 | 
198 |     //Read the entire file at once
199 |     file.seekg(0, std::ios::beg);
200 |     file.read(buffer.get(), file_size);
201 |     file.close();
202 | 
203 |     size_t size = 10000;
204 |     size_t capacity = limit - start;
205 | 
206 |     if(capacity > 0 && capacity < size){
207 |         size = capacity;
208 |     }
209 | 
210 |     for(std::size_t i = 0; i < size; ++i){
211 |         const size_t l = buffer[i * 3073];
212 | 
213 |         labels(start + i)(l) = 1.0;
214 | 
215 |         for(std::size_t j = 1; j < 3073; ++j){
216 |             images(start + i)[j - 1] = buffer[i * 3073 + j];
217 |         }
218 |     }
219 | }
220 | 
221 | /*!
222 |  * \brief Read all training data
223 |  *
224 |  * The dataset is assumed to be in a cifar-10 subfolder
225 |  *
226 |  * \param limit The maximum number of elements to read (0: no limit)
227 |  * \param func The functor to create the image objects.
228 |  */
229 | template <typename Images, typename Labels>
230 | void read_training_categorical(const std::string& folder, std::size_t limit, Images& images, Labels& labels) {
231 |     read_cifar10_file_categorical(images, labels, folder + "/data_batch_1.bin", limit, 0);
232 |     read_cifar10_file_categorical(images, labels, folder + "/data_batch_2.bin", limit, 10000);
233 |     read_cifar10_file_categorical(images, labels, folder + "/data_batch_3.bin", limit, 20000);
234 |     read_cifar10_file_categorical(images, labels, folder + "/data_batch_4.bin", limit, 30000);
235 |     read_cifar10_file_categorical(images, labels, folder + "/data_batch_5.bin", limit, 40000);
236 | }
237 | 
238 | /*!
239 |  * \brief Read all test data.
240 |  *
241 |  * The dataset is assumed to be in a cifar-10 subfolder
242 |  *
243 |  * \param limit The maximum number of elements to read (0: no limit)
244 |  * \param func The functor to create the image objects.
245 |  */
246 | template <typename Images, typename Labels>
247 | void read_test_categorical(const std::string& folder, std::size_t limit, Images& images, Labels& labels) {
248 |     read_cifar10_file_categorical(images, labels, folder + "/test_batch.bin", limit, 0);
249 | }
250 | 
251 | /*!
252 |  * \brief Read all training data
253 |  *
254 |  * The dataset is assumed to be in a cifar-10 subfolder
255 |  *
256 |  * \param limit The maximum number of elements to read (0: no limit)
257 |  * \param func The functor to create the image objects.
258 |  */
259 | template <typename Images, typename Labels>
260 | void read_training_categorical(std::size_t limit, Images& images, Labels& labels) {
261 |     read_training_categorical("cifar-10/cifar-10-batches-bin", limit, images, labels);
262 | }
263 | 
264 | /*!
265 |  * \brief Read all test data.
266 |  *
267 |  * The dataset is assumed to be in a cifar-10 subfolder
268 |  *
269 |  * \param limit The maximum number of elements to read (0: no limit)
270 |  * \param func The functor to create the image objects.
271 |  */
272 | template <typename Images, typename Labels>
273 | void read_test_categorical(std::size_t limit, Images& images, Labels& labels) {
274 |     read_test_categorical("cifar-10/cifar-10-batches-bin", limit, images, labels);
275 | }
276 | 
277 | /*!
278 |  * \brief Read dataset and assume images in 3D (3x32x32)
279 |  *
280 |  * The dataset is assumed to be in a cifar-10 subfolder
281 |  *
282 |  * \param training_limit The maximum number of elements to read from data set (0: no limit)
283 |  * \param test_limit The maximum number of elements to read from test set (0: no limit)
284 |  *
285 |  * \return The dataset
286 |  */
287 | template <template <typename...> class Container, typename Image, typename Label = uint8_t>
288 | CIFAR10_dataset<Container, Image, Label> read_dataset_3d(std::size_t training_limit = 0, std::size_t test_limit = 0) {
289 |     CIFAR10_dataset<Container, Image, Label> dataset;
290 | 
291 |     read_training(training_limit, dataset.training_images, dataset.training_labels, [] { return Image(3, 32, 32); });
292 |     read_test(test_limit, dataset.training_images, dataset.training_labels, [] { return Image(3, 32, 32); });
293 | 
294 |     return dataset;
295 | }
296 | 
297 | /*!
298 |  * \brief Read dataset.
299 |  *
300 |  * The dataset is assumed to be in a cifar-10 subfolder
301 |  *
302 |  * \param training_limit The maximum number of elements to read from data set (0: no limit)
303 |  * \param test_limit The maximum number of elements to read from test set (0: no limit)
304 |  *
305 |  * \return The dataset
306 |  */
307 | template <template <typename...> class Container, typename Image, typename Label = uint8_t>
308 | CIFAR10_dataset<Container, Image, Label> read_dataset_direct(std::size_t training_limit = 0, std::size_t test_limit = 0) {
309 |     CIFAR10_dataset<Container, Image, Label> dataset;
310 | 
311 |     read_training(training_limit, dataset.training_images, dataset.training_labels, [] { return Image(3 * 32 * 32); });
312 |     read_test(test_limit, dataset.test_images, dataset.test_labels, [] { return Image(3 * 32 * 32); });
313 | 
314 |     return dataset;
315 | }
316 | 
317 | /*!
318 |  * \brief Read dataset.
319 |  *
320 |  * The dataset is assumed to be in a cifar-10 subfolder
321 |  *
322 |  * \param training_limit The maximum number of elements to read from training set (0: no limit)
323 |  * \param test_limit The maximum number of elements to read from test set (0: no limit)
324 |  * \return The dataset
325 |  */
326 | template <template <typename...> class Container = std::vector, template <typename...> class Sub = std::vector, typename Pixel = uint8_t, typename Label = uint8_t>
327 | CIFAR10_dataset<Container, Sub<Pixel>, Label> read_dataset(std::size_t training_limit = 0, std::size_t test_limit = 0) {
328 |     return read_dataset_direct<Container, Sub<Pixel>, Label>(training_limit, test_limit);
329 | }
330 | 
331 | } //end of namespace cifar
332 | 
333 | #endif
334 | 


--------------------------------------------------------------------------------
/convolution_kernel.cu:
--------------------------------------------------------------------------------
  1 | #include <cudnn.h>
  2 | #include <cassert>
  3 | #include <cstdlib>
  4 | #include <iostream>
  5 | #include <cuda_runtime.h>
  6 | #include "header.h"
  7 | #include <opencv2/opencv.hpp>
  8 | 
  9 | #define checkCUDNN(expression)                               \
 10 |   {                                                          \
 11 |     cudnnStatus_t status = (expression);                     \
 12 |     if (status != CUDNN_STATUS_SUCCESS) {                    \
 13 |       std::cerr << "Error on line " << __LINE__ << ": "      \
 14 |                 << cudnnGetErrorString(status) << std::endl; \
 15 |       std::exit(EXIT_FAILURE);                               \
 16 |     }                                                        \
 17 |   }
 18 | 
 19 | //cv::Mat load_image(const char* image_path) {
 20 | //  cv::Mat image = cv::imread(image_path, CV_LOAD_IMAGE_COLOR);
 21 | //  image.convertTo(image, CV_32FC3);
 22 | //  cv::normalize(image, image, 0, 1, cv::NORM_MINMAX);
 23 | //  std::cerr << "Input Image: " << image.rows << " x " << image.cols << " x "
 24 | //            << image.channels() << std::endl;
 25 | //  return image;
 26 | //}
 27 | 
 28 | void save_image(const char* output_filename,
 29 |                 float* buffer,
 30 |                 int height,
 31 |                 int width) {
 32 |   cv::Mat output_image(height, width, CV_32FC3, buffer);
 33 |   // Make negative values zero.
 34 |   cv::threshold(output_image,
 35 |                 output_image,
 36 |                 /*threshold=*/0,
 37 |                 /*maxval=*/0,
 38 |                 cv::THRESH_TOZERO);
 39 |   cv::normalize(output_image, output_image, 0.0, 255.0, cv::NORM_MINMAX);
 40 |   output_image.convertTo(output_image, CV_8UC3);
 41 |   cv::imwrite(output_filename, output_image);
 42 |   std::cerr << "Wrote output to " << output_filename << std::endl;
 43 | }
 44 | 
 45 | __global__ void copy_pointer(float **source, float *target, int index){
 46 | 	target = source[index];
 47 | }
 48 | 
 49 | __global__ void read_data(float *data){
 50 | 	printf("Reading data from GPU");
 51 | 	printf("%f\n", data[0]);
 52 | }
 53 | 
 54 | static int checkCudnnError(cudnnStatus_t code, const char* expr, const char* file, int line) {
 55 |     if (code)  {
 56 |         printf("CUDNN error at %s:%d, code=%d (%s) in '%s'\n", file, line, (int) code, cudnnGetErrorString(code), expr);
 57 |         return 1;
 58 |     }
 59 |     return 0;
 60 | }
 61 | 
 62 | #define checkCudnnErr(...)      do { int err = checkCudnnError(__VA_ARGS__, #__VA_ARGS__, __FILE__, __LINE__);  } while (0)
 63 | 
 64 | //int convolution_kernel(CNN_struct *settings, int layer_index, float **d_input_2d, float **filter_2d, float **output_2d) {
 65 | ////
 66 | //	cv::Mat image = load_image("testimg_small.png");
 67 | //
 68 | //
 69 | //	  cudnnHandle_t cudnn;
 70 | //	  cudnnCreate(&cudnn);
 71 | //
 72 | //	  cudnnTensorDescriptor_t input_descriptor;
 73 | //	  checkCUDNN(cudnnCreateTensorDescriptor(&input_descriptor));
 74 | //	  checkCUDNN(cudnnSetTensor4dDescriptor(input_descriptor,
 75 | //	                                        /*format=*/CUDNN_TENSOR_NHWC,
 76 | //	                                        /*dataType=*/CUDNN_DATA_FLOAT,
 77 | //	                                        /*batch_size=*/1,
 78 | //	                                        /*channels=*/3,
 79 | //	                                        /*image_height=*/image.rows,
 80 | //	                                        /*image_width=*/image.cols));
 81 | //
 82 | //	  cudnnFilterDescriptor_t kernel_descriptor;
 83 | //	  checkCUDNN(cudnnCreateFilterDescriptor(&kernel_descriptor));
 84 | //	  checkCUDNN(cudnnSetFilter4dDescriptor(kernel_descriptor,
 85 | //	                                        /*dataType=*/CUDNN_DATA_FLOAT,
 86 | //	                                        /*format=*/CUDNN_TENSOR_NCHW,
 87 | //	                                        /*out_channels=*/3,
 88 | //	                                        /*in_channels=*/3,
 89 | //	                                        /*kernel_height=*/3,
 90 | //	                                        /*kernel_width=*/3));
 91 | //
 92 | //	  cudnnConvolutionDescriptor_t convolution_descriptor;
 93 | //	  checkCUDNN(cudnnCreateConvolutionDescriptor(&convolution_descriptor));
 94 | //	  checkCUDNN(cudnnSetConvolution2dDescriptor(convolution_descriptor,
 95 | //	                                             /*pad_height=*/1,
 96 | //	                                             /*pad_width=*/1,
 97 | //	                                             /*vertical_stride=*/1,
 98 | //	                                             /*horizontal_stride=*/1,
 99 | //	                                             /*dilation_height=*/1,
100 | //	                                             /*dilation_width=*/1,
101 | //	                                             /*mode=*/CUDNN_CROSS_CORRELATION,
102 | //	                                             /*computeType=*/CUDNN_DATA_FLOAT));
103 | //
104 | //	  int batch_size{0}, channels{0}, height{0}, width{0};
105 | //	  checkCUDNN(cudnnGetConvolution2dForwardOutputDim(convolution_descriptor,
106 | //	                                                   input_descriptor,
107 | //	                                                   kernel_descriptor,
108 | //	                                                   &batch_size,
109 | //	                                                   &channels,
110 | //	                                                   &height,
111 | //	                                                   &width));
112 | //
113 | //	  std::cerr << "Output Image: " << height << " x " << width << " x " << channels
114 | //	            << std::endl;
115 | //
116 | //	  cudnnTensorDescriptor_t output_descriptor;
117 | //	  checkCUDNN(cudnnCreateTensorDescriptor(&output_descriptor));
118 | //	  checkCUDNN(cudnnSetTensor4dDescriptor(output_descriptor,
119 | //	                                        /*format=*/CUDNN_TENSOR_NHWC,
120 | //	                                        /*dataType=*/CUDNN_DATA_FLOAT,
121 | //	                                        /*batch_size=*/1,
122 | //	                                        /*channels=*/3,
123 | //	                                        /*image_height=*/image.rows,
124 | //	                                        /*image_width=*/image.cols));
125 | //
126 | //	  cudnnConvolutionFwdAlgo_t convolution_algorithm;
127 | //	  checkCUDNN(
128 | //	      cudnnGetConvolutionForwardAlgorithm(cudnn,
129 | //	                                          input_descriptor,
130 | //	                                          kernel_descriptor,
131 | //	                                          convolution_descriptor,
132 | //	                                          output_descriptor,
133 | //	                                          CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
134 | //	                                          /*memoryLimitInBytes=*/0,
135 | //	                                          &convolution_algorithm));
136 | //
137 | //	  size_t workspace_bytes{0};
138 | //	  checkCUDNN(cudnnGetConvolutionForwardWorkspaceSize(cudnn,
139 | //	                                                     input_descriptor,
140 | //	                                                     kernel_descriptor,
141 | //	                                                     convolution_descriptor,
142 | //	                                                     output_descriptor,
143 | //	                                                     convolution_algorithm,
144 | //	                                                     &workspace_bytes));
145 | //	  std::cerr << "Workspace size: " << (workspace_bytes / 1048576.0) << "MB"
146 | //	            << std::endl;
147 | //	  assert(workspace_bytes > 0);
148 | //
149 | //	  void* d_workspace{nullptr};
150 | //	  cudaMalloc(&d_workspace, workspace_bytes);
151 | //
152 | //	  int image_bytes = batch_size * channels * height * width * sizeof(float);
153 | //
154 | //	  float* d_input{nullptr};
155 | //	  cudaMalloc(&d_input, image_bytes);
156 | //	  cudaMemcpy(d_input, image.ptr<float>(0), image_bytes, cudaMemcpyHostToDevice);
157 | //
158 | //	  float* d_output{nullptr};
159 | //	  cudaMalloc(&d_output, image_bytes);
160 | //	  cudaMemset(d_output, 0, image_bytes);
161 | //
162 | //	  // clang-format off
163 | //	  const float kernel_template[3][3] = {
164 | //	    {1, 1, 1},
165 | //	    {1, -8, 1},
166 | //	    {1, 1, 1}
167 | //	  };
168 | //	  // clang-format on
169 | //
170 | //	  float h_kernel[3][3][3][3];
171 | //	  for (int kernel = 0; kernel < 3; ++kernel) {
172 | //	    for (int channel = 0; channel < 3; ++channel) {
173 | //	      for (int row = 0; row < 3; ++row) {
174 | //	        for (int column = 0; column < 3; ++column) {
175 | //	          h_kernel[kernel][channel][row][column] = kernel_template[row][column];
176 | //	        }
177 | //	      }
178 | //	    }
179 | //	  }
180 | //
181 | //	  float* d_kernel{nullptr};
182 | //	  cudaMalloc(&d_kernel, sizeof(h_kernel));
183 | //	  cudaMemcpy(d_kernel, h_kernel, sizeof(h_kernel), cudaMemcpyHostToDevice);
184 | //
185 | //	  const float alpha = 1.0f, beta = 0.0f;
186 | //
187 | //	  checkCUDNN(cudnnConvolutionForward(cudnn,
188 | //	                                     &alpha,
189 | //	                                     input_descriptor,
190 | //	                                     d_input,
191 | //	                                     kernel_descriptor,
192 | //	                                     d_kernel,
193 | //	                                     convolution_descriptor,
194 | //	                                     convolution_algorithm,
195 | //	                                     d_workspace,
196 | //	                                     workspace_bytes,
197 | //	                                     &beta,
198 | //	                                     output_descriptor,
199 | //	                                     d_output));
200 | //
201 | //
202 | //
203 | //	  float* h_output = new float[image_bytes];
204 | //	  cudaMemcpy(h_output, d_output, image_bytes, cudaMemcpyDeviceToHost);
205 | //
206 | //	  save_image("cudnn-out.png", h_output, height, width);
207 | //
208 | //	  delete[] h_output;
209 | //	  cudaFree(d_kernel);
210 | //	  cudaFree(d_input);
211 | //	  cudaFree(d_output);
212 | //	  cudaFree(d_workspace);
213 | //
214 | //	  cudnnDestroyTensorDescriptor(input_descriptor);
215 | //	  cudnnDestroyTensorDescriptor(output_descriptor);
216 | //	  cudnnDestroyFilterDescriptor(kernel_descriptor);
217 | //	  cudnnDestroyConvolutionDescriptor(convolution_descriptor);
218 | //
219 | //	  cudnnDestroy(cudnn);
220 | //
221 | //	return 1;
222 | //}
223 | 
224 | 
225 | 
226 | 
227 | int convolution_kernel_setup(Convolution_setting_struct *convolution_settings, CNN_struct *settings, int layer_index){
228 | 	int filter_in_channel;
229 | 	int filter_out_channel;
230 | 	int filter_height;
231 | 	int filter_width;
232 | 
233 | 	int input_batch_size = 1;
234 | 	int input_channel;
235 | 	int input_height;
236 | 	int input_width;
237 | 
238 | 	int output_channel;
239 | 	int output_batch_size = 1;
240 | 	int output_height;
241 | 	int output_width;
242 | 
243 | 
244 | 
245 | 	if(layer_index==0){
246 | 		filter_in_channel = input_image_channel;
247 | 		filter_out_channel = settings->layer[layer_index+1].depth;
248 | 		filter_height = settings->layer[layer_index+1].conv_setting.filter_length;
249 | 		filter_width = settings->layer[layer_index+1].conv_setting.filter_width;
250 | 
251 | 		input_batch_size = 1;
252 | 		input_channel = input_image_channel;
253 | 		input_height = input_image_l;
254 | 		input_width = input_image_w;
255 | 
256 | 		output_channel = settings->layer[layer_index+1].depth;
257 | 		output_batch_size = 1;
258 | 		output_height = settings->layer[layer_index+1].depth_list[0].length;
259 | 		output_width = settings->layer[layer_index+1].depth_list[0].width;
260 | 		printf("\n=====Input Channel: %d, height: %d, width: %d___output: %d, %d, %d=====\n", input_channel, input_height, input_width, output_channel, output_height, output_width);
261 | 	}else{
262 | 		filter_in_channel = settings->layer[layer_index+1].conv_setting.filter_depth;
263 | 		filter_out_channel = settings->layer[layer_index+1].depth;
264 | 		filter_height = settings->layer[layer_index+1].conv_setting.filter_length;
265 | 		filter_width = settings->layer[layer_index+1].conv_setting.filter_width;
266 | 
267 | 		input_batch_size = 1;
268 | 		input_channel = settings->layer[layer_index+1].conv_setting.filter_depth;
269 | 		input_height = settings->layer[layer_index].depth_list[0].length;
270 | 		input_width = settings->layer[layer_index].depth_list[0].width;
271 | 
272 | 		output_channel = settings->layer[layer_index+1].depth;
273 | 		output_batch_size = 1;
274 | 		output_height = settings->layer[layer_index+1].depth_list[0].length;
275 | 		output_width = settings->layer[layer_index+1].depth_list[0].width;
276 | 
277 | 		printf("\n=====Input Channel: %d, height: %d, width: %d___output: %d, %d, %d=====\n", input_channel, input_height, input_width, output_channel, output_height, output_width);
278 | 	}
279 | 
280 | 
281 | 
282 | 
283 | 
284 | 	cudnnHandle_t cudnn;
285 | 	checkCudnnErr(cudnnCreate(&cudnn));
286 | 
287 | 
288 | 
289 | 	cudnnTensorDescriptor_t input_descriptor;
290 | 	checkCudnnErr(cudnnCreateTensorDescriptor(&input_descriptor));
291 | 	checkCudnnErr(cudnnSetTensor4dDescriptor(input_descriptor,
292 | 										/*format=*/CUDNN_TENSOR_NCHW,
293 | 										/*dataType=*/CUDNN_DATA_FLOAT,
294 | 										/*batch_size=*/input_batch_size,
295 | 										/*channels=*/input_channel,
296 | 										/*image_height=*/input_height,
297 | 										/*image_width=*/input_width));
298 | 
299 | 
300 | 
301 | 
302 | 	cudnnFilterDescriptor_t kernel_descriptor;
303 | 	checkCudnnErr(cudnnCreateFilterDescriptor(&kernel_descriptor));
304 | 	checkCudnnErr(cudnnSetFilter4dDescriptor(kernel_descriptor,
305 | 										/*dataType=*/CUDNN_DATA_FLOAT,
306 | 										/*format=*/CUDNN_TENSOR_NCHW,
307 | 										/*out_channels=*/filter_out_channel,
308 | 										/*in_channels=*/filter_in_channel,
309 | 										/*kernel_height=*/filter_height,
310 | 										/*kernel_width=*/filter_width));
311 | 
312 | 
313 | 
314 | 	cudnnConvolutionDescriptor_t convolution_descriptor;
315 | 	checkCudnnErr(cudnnCreateConvolutionDescriptor(&convolution_descriptor));
316 | 	checkCudnnErr(cudnnSetConvolution2dDescriptor(convolution_descriptor,
317 | 											 /*pad_height=*/settings->layer[layer_index+1].conv_setting.pad_height,
318 | 											 /*pad_width=*/settings->layer[layer_index+1].conv_setting.pad_width,
319 | 											 /*vertical_stride=*/settings->layer[layer_index+1].conv_setting.vertical_stride,
320 | 											 /*horizontal_stride=*/settings->layer[layer_index+1].conv_setting.horizontal_stride,
321 | 											 /*dilation_height=*/settings->layer[layer_index+1].conv_setting.dilation_height,
322 | 											 /*dilation_width=*/settings->layer[layer_index+1].conv_setting.dilation_width,
323 | 											 /*mode=*/CUDNN_CROSS_CORRELATION,
324 | 											 /*computeType=*/CUDNN_DATA_FLOAT));
325 | 
326 | 
327 | 
328 | 
329 | 	int batch_size{0}, channels{0}, height{0}, width{0};
330 | 	checkCudnnErr(cudnnGetConvolution2dForwardOutputDim(convolution_descriptor,
331 | 												   input_descriptor,
332 | 												   kernel_descriptor,
333 | 												   &batch_size,
334 | 												   &channels,
335 | 												   &height,
336 | 												   &width));
337 | 
338 | 
339 | 	cudnnTensorDescriptor_t output_descriptor;
340 | 	checkCudnnErr(cudnnCreateTensorDescriptor(&output_descriptor));
341 | 	checkCudnnErr(cudnnSetTensor4dDescriptor(output_descriptor,
342 | 										/*format=*/CUDNN_TENSOR_NCHW,
343 | 										/*dataType=*/CUDNN_DATA_FLOAT,
344 | 										/*batch_size=*/output_batch_size,
345 | 										/*channels=*/output_channel,
346 | 										/*image_height=*/output_height,
347 | 										/*image_width=*/output_width));
348 | 
349 | 
350 | 	cudnnConvolutionFwdAlgo_t convolution_algorithm;
351 | 	checkCudnnErr(
352 | 	  cudnnGetConvolutionForwardAlgorithm(cudnn,
353 | 										  input_descriptor,
354 | 										  kernel_descriptor,
355 | 										  convolution_descriptor,
356 | 										  output_descriptor,
357 | 										  CUDNN_CONVOLUTION_FWD_PREFER_FASTEST,
358 | 										  /*memoryLimitInBytes=*/0,
359 | 										  &convolution_algorithm));
360 | 
361 | 
362 | 	size_t workspace_bytes{0};
363 | 	checkCudnnErr(cudnnGetConvolutionForwardWorkspaceSize(cudnn,
364 | 														input_descriptor,
365 | 														kernel_descriptor,
366 | 														convolution_descriptor,
367 | 														output_descriptor,
368 | 														convolution_algorithm,
369 | 															&workspace_bytes));
370 | 	std::cerr << "Workspace size: " << (workspace_bytes / 1048576.0) << "MB" << std::endl;
371 | 	//assert(workspace_bytes > 0);
372 | 
373 | 	void* d_workspace{nullptr};
374 | 	cudaMalloc(&d_workspace, workspace_bytes);
375 | 
376 | 	convolution_settings[layer_index].convolution_algorithm = convolution_algorithm;
377 | 	convolution_settings[layer_index].convolution_descriptor = convolution_descriptor;
378 | 	convolution_settings[layer_index].cudnn = cudnn;
379 | 
380 | 	convolution_settings[layer_index].input_descriptor = input_descriptor;
381 | 	convolution_settings[layer_index].kernel_descriptor = kernel_descriptor;
382 | 	convolution_settings[layer_index].output_descriptor = output_descriptor;
383 | 	convolution_settings[layer_index].workspace_bytes = workspace_bytes;
384 | 	convolution_settings[layer_index].d_workspace = d_workspace;
385 | 	return 1;
386 | }
387 | 
388 | 
389 | 
390 | int convolution_kernel(Convolution_setting_struct convolution_settings, int layer_index, float **d_input_2d, float **filter_2d, float **output_2d, float *probe) {
391 | 	//int filter_index = layer_index - 1;
392 | 	int convolution_result_index = layer_index;
393 | 	//if (layer_index==0) convolution_result_index = 0;
394 | 
395 | //	float *d_input;
396 | //	float *filter;
397 | //	float *output;
398 | //
399 | //    dim3 dimBlock(1, 1 );
400 | //    dim3 dimGrid(1, 1);
401 | //    copy_pointer<<<dimGrid, dimBlock>>>(d_input_2d, d_input, layer_index);
402 | //    copy_pointer<<<dimGrid, dimBlock>>>(filter_2d, filter, convolution_result_index);
403 | //    copy_pointer<<<dimGrid, dimBlock>>>(output_2d, output, convolution_result_index);
404 | //	float **add = &output_2d[0];
405 | //	printf("Address On GPU: %p\n", add);
406 | 	//read_data<<<1, 1>>>(output_2d[0]);
407 | 
408 | 
409 | //	printf("\nperforming convolution for layer: %d\n", layer_index);
410 | //	cout<<"input_batch_size: "<<input_batch_size<<endl;
411 | //	cout<<"input_channel: "<<input_channel<<endl;
412 | //	cout<<"input_height: "<<input_height<<endl;
413 | //	cout<<"input_width: "<<input_width<<endl;
414 | //	cout<<"filter_in_channel: "<<filter_in_channel<<endl;
415 | //	cout<<"filter_out_channel: "<<filter_out_channel<<endl;
416 | //	cout<<"filter_height: "<<filter_height<<endl;
417 | //	cout<<"filter_width: "<<filter_width<<endl;
418 | //	cout<<"pad_height: "<<settings->layer[layer_index].conv_setting.pad_height<<endl;
419 | //	cout<<"pad_width: "<<settings->layer[layer_index].conv_setting.pad_width<<endl;
420 | //	cout<<"vertical_stride: "<<settings->layer[layer_index].conv_setting.vertical_stride<<endl;
421 | //	cout<<"horizontal_stride: "<<settings->layer[layer_index].conv_setting.horizontal_stride<<endl;
422 | //	cout<<"dilation_height: "<<settings->layer[layer_index].conv_setting.dilation_height<<endl;
423 | //	cout<<"dilation_width: "<<settings->layer[layer_index].conv_setting.dilation_width<<endl;
424 | //	cout<<"output_channel: "<<output_channel<<endl;
425 | //	cout<<"output_batch_size: "<<output_batch_size<<endl;
426 | //	cout<<"output_height: "<<output_height<<endl;
427 | //	cout<<"output_width: "<<output_width<<endl<<endl;
428 | //	cout<<"batch_size(from cuDNN): "<<batch_size<<endl;
429 | //	cout<<"channels(from cuDNN): "<<channels<<endl;
430 | //	cout<<"height(from cuDNN): "<<height<<endl;
431 | //	cout<<"width(from cuDNN): "<<width<<endl;
432 | 
433 | 
434 | 
435 | 
436 | 
437 | 
438 | //	int image_bytes = batch_size * channels * height * width * sizeof(float);
439 | //	float* d_input{nullptr};
440 | //	cudaMalloc(&d_input, image_bytes);
441 | //	cudaMemcpy(d_input, input, image_bytes, cudaMemcpyHostToDevice);
442 | 
443 | 
444 | 
445 | //	float* d_output{nullptr};
446 | //	cudaMalloc(&d_output, image_bytes);
447 | //	cudaMemset(d_output, 0, image_bytes);
448 | 
449 | //	const float kernel_template[3][3] = {
450 | //
451 | //	{1, 1, 1},
452 | //	{1, -8, 1},
453 | //	{1, 1, 1}
454 | //
455 | //	};
456 | //
457 | //	float h_kernel[filter_in_channel][filter_out_channel][filter_height][filter_width];
458 | //	for (int kernel = 0; kernel < filter_in_channel; ++kernel) {
459 | //	for (int channel = 0; channel < filter_out_channel; ++channel) {
460 | //	  for (int row = 0; row < filter_height; ++row) {
461 | //		for (int column = 0; column < filter_width; ++column) {
462 | //		  h_kernel[kernel][channel][row][column] = kernel_template[row][column];
463 | //		}
464 | //	  }
465 | //	}
466 | //	}
467 | //
468 | //	float* d_kernel{nullptr};
469 | //	cudaMalloc(&d_kernel, sizeof(h_kernel));
470 | //	cudaMemcpy(d_kernel, h_kernel, sizeof(h_kernel), cudaMemcpyHostToDevice);
471 | 
472 | 	const float alpha = 1.0f, beta = 0.0f;
473 | //	printf("*IN CONVOLUTION KERNEL*\nThe value of convolution_result_index is: %d\n", convolution_result_index);
474 | 	checkCudnnErr(cudnnConvolutionForward(convolution_settings.cudnn,
475 | 									 &alpha,
476 | 									 convolution_settings.input_descriptor,
477 | 									 d_input_2d[layer_index],
478 | 									 convolution_settings.kernel_descriptor,
479 | 									 filter_2d[convolution_result_index],
480 | 									 convolution_settings.convolution_descriptor,
481 | 									 convolution_settings.convolution_algorithm,
482 | 									 convolution_settings.d_workspace,
483 | 									 convolution_settings.workspace_bytes,
484 | 									 &beta,
485 | 									 convolution_settings.output_descriptor,
486 | 									 output_2d[convolution_result_index]));
487 | 
488 | 	//cudaMemcpy(output, d_output, image_bytes, cudaMemcpyDeviceToHost);
489 | //
490 | //	printf("\n input print: \n");
491 | //	float *temp_1 = new float[784];
492 | //	cudaMemcpy(temp_1, d_input_2d[layer_index], 784*sizeof(float), cudaMemcpyDeviceToHost);
493 | //	for(int ij=0;ij<784;ij++) printf(" %1.0f ", temp_1[ij]);
494 | //	delete [] temp_1;
495 | //
496 | //	printf("\n filter print: \n");
497 | //	float *temp_2 = new float[784000];
498 | //	cudaMemcpy(temp_2, filter_2d[convolution_result_index], 784000*sizeof(float), cudaMemcpyDeviceToHost);
499 | //	for(int ij=0;ij<784000;ij++) printf(" %1.1f|", temp_2[ij]);
500 | //	delete [] temp_2;
501 | //
502 | //	printf("\n output print: \n");
503 | //	float *temp = new float[1000];
504 | //	cudaMemcpy(temp, output_2d[convolution_result_index], 1000*sizeof(float), cudaMemcpyDeviceToHost);
505 | //	for(int ij=0;ij<1000;ij++){
506 | //		probe[ij] = probe[ij]+temp[ij]/100;
507 | //
508 | //	}
509 | //	delete [] temp;
510 | //	float *temp_2 = new float[784000];
511 | //	cudaMemcpy(temp_2, filter_2d[convolution_result_index], 784000*sizeof(float), cudaMemcpyDeviceToHost);
512 | //	for(int ij=0;ij<1000;ij++){
513 | //		float mean_temp = 0;
514 | //		int addition = ij*784;
515 | //		for(int ijk=0;ijk<784;ijk++){
516 | //			mean_temp += temp_2[addition+ijk];
517 | //			//printf("%f|", temp_2[ijk]);
518 | //		}
519 | //		probe[ij] += mean_temp/100;
520 | //	}
521 | //	delete [] temp_2;
522 | 
523 | 	//cudaFree(d_output);
524 | //	cudaFree(d_workspace);
525 | 
526 | //	cudnnDestroyTensorDescriptor(input_descriptor);
527 | //	cudnnDestroyTensorDescriptor(output_descriptor);
528 | //	cudnnDestroyFilterDescriptor(kernel_descriptor);
529 | //	cudnnDestroyConvolutionDescriptor(convolution_descriptor);
530 | //
531 | //	cudnnDestroy(cudnn);
532 | 
533 | 	return 1;
534 | }
535 | 


--------------------------------------------------------------------------------
/data_check.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | #include <stdio.h>
  7 | #include<time.h>
  8 | #include<device_functions.h>
  9 | #include<cuda.h>
 10 | #include<math.h>
 11 | 
 12 | using namespace std;
 13 | 
 14 | 
 15 | //currently using LIF for spike learning
 16 | 
 17 | __global__ void check_weight (Neuron *NeuronList, int network_size){
 18 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 19 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 20 | 	if(index>network_size){
 21 | 		return;
 22 | 	}
 23 | 	//printf("|");
 24 | 	int i = 0;
 25 | 		while(NeuronList[index].connected_in[i] > 0.1){
 26 | 			if(NeuronList[index].connected_weight[i]>1.0){
 27 | 				printf("connection%d---->%d_has_changed_weight:%f\n",i,index,NeuronList[index].connected_weight[i]);
 28 | 			}
 29 | 			i++;
 30 | 		}
 31 | 
 32 | }
 33 | 
 34 | __global__ void check_total_spike (float *log_total_spike, int network_size){
 35 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 36 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 37 | 	if(index>network_size){
 38 | 		return;
 39 | 	}
 40 | 	//printf("|");
 41 | 
 42 | 	printf("spikeNo_ofNeuronNo%d_is_%f\n",index,log_total_spike[index]);
 43 | 
 44 | }
 45 | 
 46 | void print_connected_in_weight_old(Neuron *NeuronList, int output_index_start, int output_index_stop, int plot){
 47 | 
 48 | 	float weight_max = 0;
 49 | 	float weight_min = 10000;
 50 | 
 51 | 	for(int i=0;i<(output_index_stop-output_index_start);i++){
 52 | 
 53 | 		for (int y=0; y<28; ++y) {
 54 | 				for (int x=0; x<28; ++x) {
 55 | 				  //std::cout << ((image[y*28+x] == 0.0)? ' ' : '*');
 56 | 				  //std::cout << std::to_string(int(100*(NeuronList[output_index_start+i].connected_weight[y*28+x]))) << ' ';
 57 | 
 58 | 				  if(NeuronList[output_index_start+i].connected_weight[y*28+x]>weight_max){
 59 | 					  weight_max = NeuronList[output_index_start+i].connected_weight[y*28+x];
 60 | 				  }
 61 | 				  if(NeuronList[output_index_start+i].connected_weight[y*28+x]<weight_min){
 62 | 					  weight_min = NeuronList[output_index_start+i].connected_weight[y*28+x];
 63 | 				  }
 64 | 
 65 | 				}
 66 | 				//std::cout << std::endl;
 67 | 		}
 68 | 		//cout<<"\n\n\n";
 69 | 	}
 70 | 	//printf("weightMaxIs%f;weightMinIs%f\n", weight_max, weight_min);
 71 | 
 72 | 	if(plot){
 73 | 		float weight_diff = weight_max - weight_min;
 74 | 
 75 | 		cimg_library::CImg<unsigned char> image("color_small.jpg");
 76 | 		/*
 77 | 		cimg_library::CImgDisplay display(image, "Click a point");
 78 | 		while (!display.is_closed())
 79 | 		    {
 80 | 		        display.wait();
 81 | 		        if (display.button() && display.mouse_y() >= 0 && display.mouse_x() >= 0)
 82 | 		        {
 83 | 		            const int y = display.mouse_y();
 84 | 		            const int x = display.mouse_x();
 85 | 
 86 | 		            unsigned char randomColor[3];
 87 | 		            randomColor[0] = rand() % 256;
 88 | 		            randomColor[1] = rand() % 256;
 89 | 		            randomColor[2] = rand() % 256;
 90 | 
 91 | 		            image.draw_point(x, y, randomColor);
 92 | 		        }
 93 | 		        image.display(display);
 94 | 		    }
 95 | 		*/
 96 | 		for(int i=0;i<(output_index_stop-output_index_start);i++){
 97 | 			int current_index = i+output_index_start;
 98 | 			int img_i;
 99 | 			int img_j;
100 | 			int img_k;
101 | 			for (img_i=0;img_i<input_image_w;img_i++){
102 | 				for (img_j=0;img_j<input_image_l;img_j++){
103 | 					for(img_k=0;img_k<3;img_k++){
104 | 						float weight_raw = NeuronList[current_index].connected_weight[img_i*input_image_w+img_j];
105 | 						image(img_j, img_i, 0, img_k) = 255*(weight_raw-weight_min)/weight_diff;
106 | 
107 | 						//printf("pixel%d, %d, signal is: %f \n",img_i, img_j, img_temp);
108 | 					}
109 | 				}
110 | 			}
111 | 			/*
112 | 			cimg_library::CImgDisplay main_disp(image,"Synapse_Conductance");
113 | 			while (!main_disp.is_closed()) {
114 | 					main_disp.wait();
115 | 			}
116 | 			*/
117 | 			string out_file_name = "weight_out_index_"+to_string(current_index)+".jpg";
118 | 			image.save(out_file_name.c_str());
119 | 		}
120 | 	}
121 | 
122 | 
123 | }
124 | 
125 | 
126 | void print_connected_in_weight(Neuron *NeuronList, int output_index_start, int output_index_stop, int plot, string plot_prefix){
127 | 
128 | 	float weight_max = 0;
129 | 	float weight_min = 10000;
130 | 
131 | 	for(int i=0;i<(output_index_stop-output_index_start);i++){
132 | 
133 | 		for (int y=0; y<input_image_w; ++y) {
134 | 				for (int x=0; x<input_image_l; ++x) {
135 | 				  //std::cout << ((image[y*28+x] == 0.0)? ' ' : '*');
136 | 				  //std::cout << std::to_string(int(100*(NeuronList[output_index_start+i].connected_weight[y*28+x]))) << ' ';
137 | 
138 | 				  if(NeuronList[output_index_start+i].connected_weight[y*input_image_l+x]>weight_max){
139 | 					  weight_max = NeuronList[output_index_start+i].connected_weight[y*input_image_l+x];
140 | 				  }
141 | 				  if(NeuronList[output_index_start+i].connected_weight[y*input_image_l+x]<weight_min){
142 | 					  weight_min = NeuronList[output_index_start+i].connected_weight[y*input_image_l+x];
143 | 				  }
144 | 
145 | 				}
146 | 				//std::cout << std::endl;
147 | 		}
148 | 		//cout<<"\n\n\n";
149 | 	}
150 | 	//printf("weightMaxIs%f;weightMinIs%f\n", weight_max, weight_min);
151 | 
152 | 	if(plot){
153 | 		float weight_diff = weight_max - weight_min;
154 | 
155 | 		cimg_library::CImg<unsigned char> image("color.jpg");
156 | 		image.resize(input_image_w, input_image_l);
157 | 		/*
158 | 		cimg_library::CImgDisplay display(image, "Click a point");
159 | 		while (!display.is_closed())
160 | 		    {
161 | 		        display.wait();
162 | 		        if (display.button() && display.mouse_y() >= 0 && display.mouse_x() >= 0)
163 | 		        {
164 | 		            const int y = display.mouse_y();
165 | 		            const int x = display.mouse_x();
166 | 
167 | 		            unsigned char randomColor[3];
168 | 		            randomColor[0] = rand() % 256;
169 | 		            randomColor[1] = rand() % 256;
170 | 		            randomColor[2] = rand() % 256;
171 | 
172 | 		            image.draw_point(x, y, randomColor);
173 | 		        }
174 | 		        image.display(display);
175 | 		    }
176 | 		*/
177 | 		for(int i=0;i<(output_index_stop-output_index_start);i++){
178 | 
179 | 			int current_index = i+output_index_start;
180 | 			int img_i;
181 | 			int img_j;
182 | 			int img_k;
183 | 			weight_max = 0;
184 | 			weight_min = 10000;
185 | 			for (int y=0; y<input_image_w; ++y) {
186 | 				for (int x=0; x<input_image_l; ++x) {
187 | 				  //std::cout << ((image[y*28+x] == 0.0)? ' ' : '*');
188 | 				  //std::cout << std::to_string(int(100*(NeuronList[output_index_start+i].connected_weight[y*28+x]))) << ' ';
189 | 
190 | 				  if(NeuronList[output_index_start+i].connected_weight[y*input_image_l+x]>1){
191 | 					  NeuronList[output_index_start+i].connected_weight[y*input_image_l+x] = 1;
192 | 				  }
193 | 				  if(NeuronList[output_index_start+i].connected_weight[y*input_image_l+x]<-1){
194 | 					  NeuronList[output_index_start+i].connected_weight[y*input_image_l+x] = -1;
195 | 				  }
196 | 
197 | 				}
198 | 				//std::cout << std::endl;
199 | 			}
200 | 			for (int y=0; y<input_image_w; ++y) {
201 | 				for (int x=0; x<input_image_l; ++x) {
202 | 				  //std::cout << ((image[y*28+x] == 0.0)? ' ' : '*');
203 | 				  //std::cout << std::to_string(int(100*(NeuronList[output_index_start+i].connected_weight[y*28+x]))) << ' ';
204 | 
205 | 				  if(NeuronList[output_index_start+i].connected_weight[y*input_image_l+x]>weight_max){
206 | 					  weight_max = NeuronList[output_index_start+i].connected_weight[y*input_image_l+x];
207 | 				  }
208 | 				  if(NeuronList[output_index_start+i].connected_weight[y*input_image_l+x]<weight_min){
209 | 					  weight_min = NeuronList[output_index_start+i].connected_weight[y*input_image_l+x];
210 | 				  }
211 | 
212 | 				}
213 | 				//std::cout << std::endl;
214 | 			}
215 | 
216 | 			weight_diff = weight_max - weight_min;
217 | 			int pix_count = 0;
218 | 			bool plot_three_channel = true;
219 | 			if(input_image_channel==1) plot_three_channel = false;
220 | 
221 | 			if(input_image_channel==1)	bool plot_three_channel = false;
222 | 			for(img_k=0;img_k<3;img_k++){
223 | 				for (img_i=0;img_i<input_image_w;img_i++){
224 | 					for (img_j=0;img_j<input_image_l;img_j++){
225 | 						pix_count = img_k*input_image_w*input_image_l + img_i*input_image_l + img_j;
226 | 						float weight_raw = NeuronList[current_index].connected_weight[img_i*input_image_l+img_j];
227 | 						if(plot_three_channel)  weight_raw = NeuronList[current_index].connected_weight[pix_count];
228 | 
229 | 						image(img_j, img_i, 0, img_k) = 255*(weight_raw-weight_min)/weight_diff;
230 | 						//pix_count ++;
231 | 						//printf("pixel%d, %d, signal is: %f \n",img_i, img_j, img_temp);
232 | 					}
233 | 				}
234 | 			}
235 | 			/*
236 | 			cimg_library::CImgDisplay main_disp(image,"Synapse_Conductance");
237 | 			while (!main_disp.is_closed()) {
238 | 					main_disp.wait();
239 | 			}
240 | 			*/
241 | 			string out_file_name = plot_prefix + "weight_out_index_"+to_string(current_index)+".jpg";
242 | 			image.save(out_file_name.c_str());
243 | 		}
244 | 	}
245 | 
246 | 
247 | }
248 | 
249 | 
250 | void print_signal(Neuron *NeuronList, int start, int end){
251 | 	for (int y=0; y<input_image_l; ++y) {
252 | 		for (int x=0; x<input_image_w; ++x) {
253 | 				//std::cout << ((image[y*28+x] == 0.0)? ' ' : '*');
254 | 				int index = start + x + y*28;
255 | 				if(index>end) return;
256 | 				std::cout << std::to_string(int(NeuronList[index].state[1])) << ' ';
257 | 			}
258 | 			std::cout << std::endl;
259 | 	}
260 | }
261 | 
262 | 
263 | 
264 | void data_check(Neuron *NeuronList, float *log_total_spike, int network_size, int mnist_start_index, int mnist_end_index, int function_select, string plot_prefix){
265 | 
266 | 	int SIZE_PER_SIDE = sqrt(network_size)+1;
267 | 	dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
268 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
269 | 
270 | 	int plot = 1;
271 | 	int start_index = 0;
272 | 	int end_index = start_index+30;
273 | 
274 | 	if(function_select==0){
275 | 		check_weight<<<dimGrid, dimBlock>>>(NeuronList, network_size);
276 | 	}
277 | 	if(function_select==1){
278 | 		check_total_spike<<<dimGrid, dimBlock>>>(log_total_spike, network_size);
279 | 	}
280 | 	if(function_select==2){
281 | 
282 | 		if(plot>0){
283 | 			cout<<"Saving conductance visualization"<<endl;
284 | 			print_connected_in_weight(NeuronList, start_index, end_index, 1, plot_prefix);
285 | 		}else{
286 | 			print_connected_in_weight(NeuronList, start_index, end_index, 0, plot_prefix);
287 | 		}
288 | 	}
289 | 	if(function_select==3){
290 | 		print_signal(NeuronList, mnist_start_index, mnist_end_index);
291 | 	}
292 | 
293 | 
294 | }
295 | 


--------------------------------------------------------------------------------
/filter_util.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <stdio.h>      /* printf, scanf, puts, NULL */
  3 | #include <stdlib.h>     /* srand, rand */
  4 | #include <time.h>
  5 | #include <iostream>
  6 | #include <fstream>
  7 | cudaError_t cudaerr;
  8 | using namespace std;
  9 | 
 10 | __global__ void read_filter (CNN_struct *settings, float **device_filter_array){
 11 | 	int counter = 0;
 12 | 	printf("Printing filter array on GPU\n");
 13 | 	for (int i=0;i<CNN_total_layer_num-1;i++){
 14 | 		int filter_size = settings->layer[i+1].conv_setting.filter_depth * settings->layer[i+1].conv_setting.filter_width * settings->layer[i+1].conv_setting.filter_length * settings->layer[i+1].depth;
 15 | 		for(int j=0;j<filter_size;j++){
 16 | 			printf("%f ", device_filter_array[i][j]);
 17 | 			counter ++;
 18 | 		}
 19 | 		printf("\n");
 20 | 	}
 21 | }
 22 | 
 23 | __global__ void read_filter_one_layer (CNN_struct *settings, float *device_filter_array, int layer_num){
 24 | 	int counter = 0;
 25 | 	printf("Printing one layer filter array on GPU\n");
 26 | 
 27 | 	int filter_size = settings->layer[layer_num].conv_setting.filter_depth * settings->layer[layer_num].conv_setting.filter_width * settings->layer[layer_num].conv_setting.filter_length * settings->layer[layer_num].depth;
 28 | 	filter_size = 5;
 29 | 	printf("depth: %f\n", settings->layer[layer_num].conv_setting.filter_depth);
 30 | 	printf("width: %f\n", settings->layer[layer_num].conv_setting.filter_width);
 31 | 	printf("depth: %f\n", settings->layer[layer_num].conv_setting.filter_depth);
 32 | 	for(int j=0;j<filter_size;j++){
 33 | 		printf("%f ", device_filter_array[j]);
 34 | 		counter ++;
 35 | 	}
 36 | 	printf("\n");
 37 | 
 38 | }
 39 | 
 40 | __global__ void weight_cpy_filter_to_neuronlist (CNN_struct *CNN_setttings, Neuron *NeuronList, int network_size, int input_neuron_size, float **filter, int current_layer){
 41 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 42 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 43 |     //printf(" %d", index);
 44 | 	if(index<input_neuron_size) return;
 45 |     if(index>=network_size) return;
 46 | 
 47 | //	printf("relative %d, ", CNN_setttings->layer[current_layer].depth_list[0].first_neuron);
 48 | 
 49 | 
 50 | //	if((NeuronList[index].param[7]-current_layer)>0.01||(NeuronList[index].param[7]-current_layer)<-0.01){
 51 | //		printf("param_7 is: %f", NeuronList[index].param[7]);
 52 | //		return;
 53 | //	}
 54 | 
 55 | 
 56 | 	int neuron_relative_index = index - CNN_setttings->layer[current_layer].depth_list[0].first_neuron;
 57 | 
 58 | 	index = index - input_neuron_size;
 59 | 	if(NeuronList[index].type==4||NeuronList[index].type==5){//if the post-synapse neuron is input-signal-neuron, jump over
 60 | 		return;
 61 | 	}
 62 | 	float start_depth = CNN_setttings->layer[current_layer].first_depth_id - 0.1;
 63 | 	float end_depth = CNN_setttings->layer[current_layer].last_depth_id + 0.1;
 64 | 	if(NeuronList[index].param[7]<start_depth||NeuronList[index].param[7]>end_depth){
 65 | //		printf("StartDepth:%f_End:%f__current:%f||", start_depth, end_depth, NeuronList[index].param[7]);
 66 | 		return;
 67 | 	}
 68 | 
 69 | 	int number_of_neurons_per_depth = CNN_setttings->layer[current_layer].depth_list[0].total_neuron_num;
 70 | 
 71 | 	int filter_size_per_depth = CNN_setttings->layer[current_layer].conv_setting.filter_length * CNN_setttings->layer[current_layer].conv_setting.filter_width* CNN_setttings->layer[current_layer].conv_setting.filter_depth;
 72 | 	int i = 0;
 73 | 	//int current_depth = neuron_relative_index/number_of_neurons_per_depth + CNN_setttings->layer[current_layer].first_depth_id - 1;
 74 | 	int current_depth = NeuronList[index].param[7] - CNN_setttings->layer[current_layer].first_depth_id;
 75 | //	printf("%d=", index);
 76 | 	//if(NeuronList[index].state[2]>0.1){//if post-synapse neuron fired
 77 | 		while(NeuronList[index].connected_in[i] > 0.1){
 78 | 			int connected_in = NeuronList[index].connected_in[i] - 1;
 79 | 			int filter_index = current_depth*filter_size_per_depth+i;
 80 | 			NeuronList[index].connected_weight[i] = filter[current_layer-1][filter_index];
 81 | 			i++;
 82 | 			//printf("%d: %f, ",index, filter[current_layer-1][filter_index]);
 83 | 		}
 84 | 	//}
 85 | }
 86 | 
 87 | 
 88 | 
 89 | 
 90 | void write_weight_to_file(CNN_struct *network_config, float **filter_array, string plot_prefix){
 91 | 
 92 | 	//for(int i=0; i<15; i++) cout<<i<<": "<<filter_array[0][i]<<", ";
 93 | 
 94 | 	ofstream myfile_2 (plot_prefix+"CNN_WEIGHT_OUT.csv");
 95 | 	if (myfile_2.is_open()){
 96 | 		for (int layer_index=1; layer_index<CNN_total_layer_num; layer_index++){
 97 | 			convolution_param current_conv = network_config->layer[layer_index].conv_setting;
 98 | 			//cout<<endl<<"check this: "<<current_conv.filter_depth<<" "<<network_config->layer[layer_index].depth<<endl;
 99 | 			//float filter_mat[current_conv.filter_depth][network_config->layer[layer_index].depth][current_conv.filter_length][current_conv.filter_width];
100 | 			int filter_size=current_conv.filter_depth*network_config->layer[layer_index].depth*current_conv.filter_length*current_conv.filter_width;
101 | 			printf("filer size: %d\n", filter_size);
102 | 			//float *filter_mat = new float[filter_size];
103 | 			//for(int i=0;i<filter_size;i++) filter_mat[i]=0;
104 | 			//memcpy(filter_mat, filter_array[layer_index-1], sizeof(filter_mat));
105 | 			myfile_2 << endl << endl;
106 | 			int index_count = 0;
107 | 				for (int kernel = 0; kernel < current_conv.filter_depth; ++kernel) {
108 | 					for (int channel = 0; channel < network_config->layer[layer_index].depth; ++channel) {
109 | 					  for (int row = 0; row < current_conv.filter_length; ++row) {
110 | 						for (int column = 0; column < current_conv.filter_width; ++column) {
111 | 						  //myfile_2 << filter_mat[kernel][channel][row][column] << ", ";
112 | 						  //cout<<index_count<<": "<<filter_mat[index_count]<<" ,";
113 | 						  myfile_2 << filter_array[layer_index-1][index_count] << ", ";
114 | 						  index_count ++;
115 | 						}
116 | 					  }
117 | 					  //cout<<"Kernel: "<<kernel<<" has depth: "<<network_config->layer[layer_index].depth<<endl;
118 | 					  myfile_2 << endl;
119 | 					}
120 | 					myfile_2 << endl;
121 | 				}
122 | 			//delete[] filter_mat;
123 | 		}
124 | 
125 | 		myfile_2.close();
126 | 	}
127 | }
128 | 
129 | void shared_weight_gen(CNN_struct *network_config, float *filter_array[CNN_total_layer_num-1]){
130 | 	for (int layer_index=1; layer_index<CNN_total_layer_num; layer_index++){
131 | 		convolution_param current_conv = network_config->layer[layer_index].conv_setting;
132 | 		//float *weight = new float[current_conv.filter_depth][network_config->layer[layer_index].depth][current_conv.filter_length][current_conv.filter_width];
133 | 		srand (1);
134 | 		int mid_conductance = 500;
135 | 		const float kernel_template[3][3] = {
136 | 		{1, 1, 1},
137 | 		{1, 0, 1},
138 | 		{1, 1, 1}
139 | 		};
140 | //		const int kernel_count = current_conv.filter_depth;
141 | //		const int channel_count = network_config->layer[layer_index].depth;
142 | //		const int row_count = current_conv.filter_length;
143 | //		const int column_count = current_conv.filter_width;
144 | 		//float filter_mat[current_conv.filter_depth][network_config->layer[layer_index].depth][current_conv.filter_length][current_conv.filter_width] = {{{{0}}}};
145 | 		//float *filter_mat = new float[kernel_count][channel_count][row_count][column_count];
146 | 
147 | 		int filter_size=current_conv.filter_depth*network_config->layer[layer_index].depth*current_conv.filter_length*current_conv.filter_width;
148 | 		float *filter_mat = new float[filter_size];
149 | 		cout<<"Filter size: "<<filter_size<<endl;
150 | 		int index_count = 0;
151 | 		for (int kernel = 0; kernel < current_conv.filter_depth; ++kernel) {
152 | 			for (int channel = 0; channel < network_config->layer[layer_index].depth; ++channel) {
153 | 			  for (int row = 0; row < current_conv.filter_length; ++row) {
154 | 				for (int column = 0; column < current_conv.filter_width; ++column) {
155 | 
156 | 				  int fluct = 50 - (rand() % 100);
157 | 					if(non_random_weight_init) fluct = 0;
158 | 				  //filter_mat[kernel][channel][row][column] = (mid_conductance+fluct)/1000.0;
159 | 				  filter_mat[index_count] = (mid_conductance+fluct)/1000.0;
160 | 				  index_count++;
161 | 				  //filter_mat[kernel][channel][row][column] = kernel_template[row][column];
162 | 				}
163 | 			  }
164 | 			  //printf(" %d, %d||", kernel, channel);
165 | 			}
166 | 		}
167 | 
168 | 		memcpy(filter_array[layer_index-1], filter_mat, filter_size*sizeof(float));
169 | 
170 | 		//network_config->layer[layer_index].filter = filter;
171 | 		delete[] filter_mat;
172 | 	}
173 | }
174 | 
175 | int filter_util(CNN_struct *settings, Neuron *NeuronList, int network_size, int input_neuron_size, float **host_filter_array, float **device_filter_array, string plot_prefix, int function_select){
176 | 
177 | 	if(function_select==0){//whole function, initialize host array and copy to device
178 | 		int filter_array_size = CNN_total_layer_num-1;
179 | 
180 | 		//load filter into host array
181 | 		float *weight_array[CNN_total_layer_num-1];
182 | 		//cout<<sizeof(weight_array[0])<<endl;
183 | 		for (int i=0;i<CNN_total_layer_num-1;i++){
184 | 			int filter_size = settings->layer[i+1].conv_setting.filter_depth * settings->layer[i+1].conv_setting.filter_width * settings->layer[i+1].conv_setting.filter_length * settings->layer[i+1].depth;
185 | 			cout<<"$$Filter_size_is: "<<filter_size<<endl;
186 | 			weight_array[i] = new float[filter_size];
187 | 
188 | //				const int kernel = settings->layer[i+1].depth;
189 | //				const int channel = settings->layer[i+1].conv_setting.filter_depth;
190 | //				const int width = settings->layer[i+1].conv_setting.filter_width;
191 | //				const int length = settings->layer[i+1].conv_setting.filter_length;
192 | //				const int& channel_1 = settings->layer[i+1].conv_setting.filter_depth;
193 | //				weight_array[i] = new float[kernel][channel_1][width][length];
194 | 
195 | 
196 | 
197 | 		}
198 | 
199 | 		shared_weight_gen(settings, weight_array);
200 | //		printf("\n filter print: \n");
201 | //		for(int ij=0;ij<784000;ij++) printf(" %1.1f ", weight_array[0][ij]);
202 | //		printf("\n filter print: \n");
203 | //		for(int ij=0;ij<78400;ij++) printf(" %1.1f ", weight_array[0][ij]);
204 | //		float *filter_out;
205 | 		//memcpy(filter_out, settings->layer[1].filter, sizeof(filter_mat));
206 | 		//cout<<sizeof(weight_array[0])<<endl;
207 | 
208 | //		float **h_filter_array;
209 | //		h_filter_array = (float**)malloc(filter_array_size* sizeof(float*));
210 | //		host_filter_array
211 | 		for (int i=0;i<CNN_total_layer_num-1;i++){
212 | 			int filter_size = settings->layer[i+1].conv_setting.filter_depth * settings->layer[i+1].conv_setting.filter_width * settings->layer[i+1].conv_setting.filter_length * settings->layer[i+1].depth;
213 | 		    printf("Between layer %d and %d, filter size is: %d\n", i, i+1, filter_size);
214 | 			cudaMalloc((void **)&host_filter_array[i], filter_size * sizeof(float));
215 | 		    cudaMemcpy(host_filter_array[i], weight_array[i], filter_size * sizeof(float), cudaMemcpyHostToDevice);
216 | 			//cudaerr = cudaMemcpy(device_filter_array[i], weight_array[i], filter_size*sizeof(float), cudaMemcpyHostToDevice);
217 | 		}
218 | 		cudaerr = cudaMemcpy(device_filter_array, host_filter_array, filter_array_size* sizeof(float*), cudaMemcpyHostToDevice);
219 | 
220 | 		CNN_struct *CNN_settings_device;
221 | 	    cudaMalloc((void **)&CNN_settings_device, 1*sizeof(CNN_struct));
222 | 	    cudaMemcpy(CNN_settings_device,settings,1*sizeof(CNN_struct),cudaMemcpyHostToDevice);
223 | 
224 | 	    dim3 dimBlock(1, 1);
225 | 	    dim3 dimGrid(1, 1);
226 | 	    //read_filter<<<dimGrid, dimBlock>>>(CNN_settings_device, device_filter_array);
227 | 	    //read_filter_one_layer<<<1, 1>>>(CNN_settings_device, host_filter_array[0], 0);
228 | 		cudaFree(CNN_settings_device);
229 | 		//delete[] weight_array;
230 | 	}
231 | 	else if(function_select==1){//save filter to file
232 | 		float **h_filter_array_temp;
233 | 		int filter_array_size = CNN_total_layer_num-1;
234 | 		h_filter_array_temp = (float**)malloc(filter_array_size * sizeof(float*));
235 | 		for (int i=0;i<CNN_total_layer_num-1;i++){
236 | 			int filter_size = settings->layer[i+1].conv_setting.filter_depth * settings->layer[i+1].conv_setting.filter_width * settings->layer[i+1].conv_setting.filter_length * settings->layer[i+1].depth;
237 | 			cout<<filter_size<<" sized filter copied"<<endl;
238 | 			h_filter_array_temp[i] = (float*)malloc(filter_size * sizeof(float));
239 | 			cudaMemcpy(h_filter_array_temp[i], host_filter_array[i], filter_size * sizeof(float), cudaMemcpyDeviceToHost);
240 | 			//cudaerr = cudaMemcpy(device_filter_array[i], weight_array[i], filter_size*sizeof(float), cudaMemcpyHostToDevice);
241 | 		}
242 | 		write_weight_to_file(settings, h_filter_array_temp, plot_prefix);
243 | 	}
244 | 	else if(function_select==2){//copy filter to NeuronList
245 | 		cout<< "copy filter to NeuronList" <<endl;
246 | 		CNN_struct *CNN_settings_device;
247 | 	    cudaMalloc((void **)&CNN_settings_device, 1*sizeof(CNN_struct));
248 | 	    cudaMemcpy(CNN_settings_device,settings,1*sizeof(CNN_struct),cudaMemcpyHostToDevice);
249 | 
250 | //		int total_neuron_num = 0;
251 | //		for(int i=0;i<CNN_total_layer_num;i++){
252 | //			total_neuron_num += settings->layer[i].neuron_num;
253 | //		}
254 | 		int total_neuron_num = network_size;
255 | 		int SIZE_PER_SIDE = sqrt(total_neuron_num)+1;
256 | 		dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
257 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
258 | 		for (int i=1;i<CNN_total_layer_num;i++){
259 | 
260 | 			weight_cpy_filter_to_neuronlist<<<dimGrid, dimBlock>>>(CNN_settings_device, NeuronList, network_size, input_neuron_size, device_filter_array, i);
261 | 			//cout<<network_size<<", "<<input_neuron_size<<endl;
262 | 		}
263 | 
264 | 		cudaFree(CNN_settings_device);
265 | 	}
266 | 
267 | 
268 | 	return 1;
269 | }
270 | 


--------------------------------------------------------------------------------
/header.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * header.h
  3 |  *
  4 |  *  Created on: Nov 29, 2017
  5 |  *      Author: DanShe
  6 |  */
  7 | 
  8 | #ifndef HEADER_H_
  9 | #define HEADER_H_
 10 | 
 11 | #ifdef __CUDACC__
 12 | #define CUDA_CALLABLE_MEMBER __host__ __device__
 13 | #else
 14 | #define CUDA_CALLABLE_MEMBER
 15 | #endif
 16 | 
 17 | #include <vector>
 18 | #include <string>
 19 | #include <stdlib.h>
 20 | #include <opencv2/opencv.hpp>
 21 | #include <curand.h>
 22 | #include <curand_kernel.h>
 23 | #include "CImg.h"
 24 | #include <cudnn.h>
 25 | 
 26 | 
 27 | #define img_width 64	//for ROI
 28 | #define img_len 64
 29 | #define input_image_w 128	//for learning           for sc2 learning make 4x4;  for dvs is 128
 30 | #define input_image_l 128
 31 | #define input_image_channel 2  //for learning           for sc2 learning make 13
 32 | #define non_random_weight_init 0
 33 | #define shuffle_image 0
 34 | #define MAX_CONNECTION 3300
 35 | #define MAX_LOCAL_INHIBITION 20
 36 | #define CNN_total_layer_num 4//this includes input layer
 37 | #define MAX_depth_in_one_layer 256  //this includes input layer
 38 | //MODE Select
 39 | #define depth_wise_inhibition 0
 40 | #define through_depth_inhibition true
 41 | #define apply_local_inhibition false
 42 | #define forced_lateral_inhibition_at_last_layer false
 43 | 
 44 | #define LOW_BIT_TRAINING 0
 45 | #define STOCHASTIC_STDP 0
 46 | #define EXPONENTIAL_STDP 1
 47 | #define STOCHASTIC_ROUNDING 0
 48 | #define HOMEOSTASIS_ENABLE 0
 49 | #define HOMEOSTASIS_BASE_RATE 0.004
 50 | #define LEARNER_HOMEOSTASIS_ENABLE 0
 51 | #define SPIKE_FREQ_SAMPLING_INTV 10000
 52 | #define LAYERWISE_SHARED_WEIGHT 1
 53 | #define FREQUENCY_DEPENDED_STDP 0
 54 | #define ThreadsPerBlock 8
 55 | #define SPIKING_NEURON_NUM 1000
 56 | #define OUTPUT_LAYER_NEURON_NUM 1000
 57 | #define MID_LAYER_STDP_DURATION 25
 58 | #define HOMEOSTASIS_UPDATE_FREQUENCY 50000
 59 | #define LOW_BIT_NUM 8
 60 | #define LOW_BIT_MEM_POT 0
 61 | #define DEVICE_VARIATION 0
 62 | #define TWO_POWER_2 4
 63 | #define TWO_POWER_4 16
 64 | #define TWO_POWER_8 256
 65 | #define TWO_POWER_16 65536
 66 | #define TWO_POWER_32 4294967296
 67 | 
 68 | using namespace std;
 69 | 
 70 | 
 71 | typedef struct {
 72 | 	signed int index;	//start with 1
 73 | 	signed int type;	//0: IZH, 1: Stochastic, 2: LIF, 3: HH
 74 | 	float spike_frequency;
 75 | 	float spike_cnt;
 76 | 	float param[8]; //Izh has 5 parameters, a b c d threshold; LIF has, 1 is threshold, 2 is reset
 77 | 
 78 | 	float state[8]; //Izh has 3 states, 0 is membrane potential, 1 is V Flag
 79 | 
 80 | 	//change connection size, remember to change MACRO in main.cu
 81 | 	unsigned int connected_in[MAX_CONNECTION];	//ROI: 3
 82 | 	float connected_weight[MAX_CONNECTION];
 83 | 	signed char synapse_timer[MAX_CONNECTION];	//used in STPD learning
 84 | 
 85 | 
 86 | 	unsigned int local_inhibition[MAX_LOCAL_INHIBITION];
 87 | } Neuron;
 88 | 
 89 | typedef struct {
 90 | 	signed int index;	//start with 1
 91 | 	signed int type;	//0: IZH, 1: Stochastic, 2: LIF, 3: HH
 92 | 	float spike_frequency;
 93 | 	float spike_cnt;
 94 | 	float param[8]; //Izh has 5 parameters, a b c d threshold; LIF has
 95 | 
 96 | 	float state[8]; //Izh has 3 states, U V Flag
 97 | 
 98 | 	unsigned int connected_in[1];	//ROI: 3
 99 | 	float connected_weight[1];
100 | 	signed char synapse_timer[1];	//used in STPD learning
101 | 
102 | } Input_neuron;
103 | 
104 | typedef struct {
105 | 	 int pad_height;
106 | 	 int pad_width;
107 | 	 int vertical_stride;
108 | 	 int horizontal_stride;
109 | 	 int dilation_height;
110 | 	 int dilation_width;
111 | 	 int filter_width;
112 | 	 int filter_length;
113 | 	 int filter_depth;
114 | } convolution_param;
115 | 
116 | typedef struct {
117 | 	int id;
118 | 	int total_neuron_num;
119 | 	int width;
120 | 	int length;
121 | 	int first_neuron;
122 | 	int last_neuron;
123 | 
124 | 	float param[8];
125 | 	float state[8];
126 | } depth_struct;
127 | 
128 | typedef struct {
129 | 	int layer_id;	//start with zero
130 | 	int first_depth_id;
131 | 	int last_depth_id;
132 | 	int width;
133 | 	int length;
134 | 	int depth;		//total depth in this layer
135 | 	int neuron_num;
136 | 	int input_layer;
137 | 	float *filter;
138 | 	depth_struct depth_list[MAX_depth_in_one_layer];
139 | 
140 | 	convolution_param conv_setting;
141 | } CNN_layer;
142 | 
143 | typedef struct {
144 | 	CNN_layer layer[CNN_total_layer_num];
145 | } CNN_struct;
146 | 
147 | typedef struct {
148 | 	cudnnHandle_t cudnn;
149 | 	cudnnTensorDescriptor_t input_descriptor;
150 | 	cudnnFilterDescriptor_t kernel_descriptor;
151 | 	cudnnConvolutionDescriptor_t convolution_descriptor;
152 | 	cudnnConvolutionFwdAlgo_t convolution_algorithm;
153 | 	cudnnTensorDescriptor_t output_descriptor;
154 | 	size_t workspace_bytes{0};
155 | 	void* d_workspace{nullptr};
156 | } Convolution_setting_struct;
157 | 
158 | typedef struct {
159 | 	int loc_x;
160 | 	int loc_y;
161 | 	unsigned long time;
162 | 	int sign;
163 | 	bool valid;
164 | } Event_Camera_Input;
165 | 
166 | //void kernel_neuron(Neuron *NeuronList, Neuron *old_device_neurons, float *random_number, int network_size);
167 | //void kernel_Stochastic(Neuron *NeuronList, float *random_number, int network_size);
168 | int read_neuron_list(Neuron *NeuronList, int neuron_model, string file_name);
169 | //void neuron_test(Neuron *NeuronList, Neuron *old_device_neurons, float *random_number, int network_size, float *log_v, float *log_spike, float *log_total_spike, int time_stamp);
170 | //void test_2(Neuron *NeuronList, Neuron *old_device_neurons, float *random_number, int network_size, float *log_v, int time_stamp);
171 | void ROI_drive(Neuron *NeuronList, float *image_signal, int network_size, int start_index, int end_index, int function_select);
172 | void spiking_learning_drive(Neuron *NeuronList, int network_size, int inhibit_time, float *log_total_spike, float target_frequency, int time, float *log_spike, int current_layer, CNN_struct *CNN_setttings, int function_select);
173 | void spiking_learning_drive(Neuron *NeuronList, int network_size, int inhibit_time, float *log_total_spike, float target_frequency, int time, float *log_spike, int current_layer, int function_select);
174 | //void synapse_drive_v1(Neuron *NeuronList, int network_size, int syn_timer_max, int connection_size, float *random_number, float StochSTDP_param_1, float StochSTDP_param_2);
175 | void MNIST_drive(Neuron *NeuronList, Input_neuron *Input_neuronlist, float *image, int network_size, int training_set_number, int start, int end, float max_frequency, float min_frequency, int function_select);
176 | void MNIST_drive(Neuron *NeuronList, Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int network_size, int training_set_number, int start, int end, float max_frequency, float min_frequency, int function_select, int target);
177 | void spiking_learning_main(Neuron *NeuronList, Neuron *old_device_neurons, float *random_number, int network_size, float *log_v, float *log_spike, float *log_total_spike, int *spike_flag, int signal_width, int time_stamp);
178 | int write_neuron_list(Neuron *NeuronList, string file_name, int network_size);
179 | void data_check(Neuron *NeuronList, float *log_total_spike, int network_size, int mnist_start_index, int mnist_end_index, int function_select, string plot_prefix);
180 | void MNIST_labeling(string input_file_starter, int size, float *input_array_1, int *input_array_2, float *output_array_1, int *output_array_2, int main_neuron_num, int function_select, int function_select_2);
181 | void MNIST_labeling_2(Neuron *NeuronList, float *img_raw, float *output_v, int output_neuron_size);
182 | int convolution_kernel(Convolution_setting_struct convolution_settings, int layer_index, float **d_input, float **filter, float **output, float *probe);
183 | void img_util(float *img_data, string file_name, int function_select);
184 | int network_config_generator(int function_select, CNN_struct *settings);
185 | int hsnn_config_generator(int* depth_list, CNN_struct *settings);
186 | //void synapse_drive_cnn(Neuron *NeuronList, CNN_struct *host_CNN_settings, CNN_struct *CNN_settings, float **filter, int current_layer, int network_size, int syn_timer_max, int connection_size, float *random_number, float StochSTDP_param_1, float StochSTDP_param_2);
187 | void synapse_drive_cnn_v2(Neuron *NeuronList, Input_neuron *Input_neuronlist, CNN_struct *host_CNN_settings, CNN_struct *CNN_settings, float **filter, int current_layer, int network_size, int input_neuron_size, int syn_timer_max, int connection_size, float *random_number, float *random_number_normal_device, curandState_t *state, float StochSTDP_param_1, float StochSTDP_param_2, float *log_total_spike);
188 | int filter_util(CNN_struct *settings, Neuron *NeuronList, int network_size, int input_neuron_size, float **host_filter_array, float **device_filter_array, string plot_prefix, int function_select);
189 | int CNN_util(CNN_struct *settings, float **d_instance_matrix_array, float **d_convolution_result_array, float **h_instance_matrix_array, float **h_convolution_result_array, int function_select);
190 | void spiking_cnn_main(Neuron *NeuronList, Input_neuron *Input_neuronlist, CNN_struct *CNN_setttings, float *random_number, float **input, float **instance_matrix, int current_layer, int network_size, int input_size, float *log_v, float *log_spike, float *log_total_spike, int *spike_flag, int signal_width, float input_float, int time_stamp, bool enable_inhibition);
191 | void spiking_cnn_main_event_based (Neuron *NeuronList, Input_neuron *Input_neuronlist, Event_Camera_Input *events, int event_cnt, CNN_struct *host_CNN_setttings, CNN_struct *CNN_setttings, float *random_number, float **input, float **instance_matrix, int current_layer, int network_size, int input_size, float *log_v, float *log_spike, float *log_total_spike, int *spike_flag, int signal_width, float input_float, int time_stamp, bool enable_inhibition);
192 | int convolution_kernel_setup(Convolution_setting_struct *convolution_settings, CNN_struct *settings, int layer_index);
193 | void spiking_cnn_main(Neuron *NeuronList, Input_neuron *Input_neuronlist, CNN_struct *CNN_setttings, float *random_number, float **input, float **instance_matrix, int current_layer, int network_size, int input_size, float *log_v, float *log_spike, float *log_total_spike, int *spike_flag, int signal_width, float input_float, int time_stamp, int optional_inp, bool teaching_mode);
194 | int normalize_weight(Neuron *NeuronList, float start_depth, float end_depth, int norm_method, int network_size);
195 | int reset_weight(Neuron *NeuronList, float start_depth, float end_depth, int reset_method, int network_size);
196 | int read_neuron_list_special(Neuron *NeuronList, int duplicate_layer, CNN_struct *settings, string file_name);
197 | /*
198 | void test_function(int a);
199 | //vector<Neuron> read_script(string file_path);
200 | Neuron* Object_initializer(float* parameters);
201 | vector<float> Generate_matrices(Neuron* Pool_neurons, int neuron_number);
202 | float GPU_kernl_Izhikevich(Neuron* Pool_neurons, int neuron_number, float* regulator);
203 | void GPU_kernel_Stochastic(Neuron *NeuronList, float *random_number, int number_of_threads_y, int SIZE);
204 | //void read_neuron_list(Neuron *NeuronList, int neuron_model, string file_name);
205 | int read_neuron_list(Neuron *NeuronList, int neuron_model, string file_name);
206 | */
207 | //==========initialization functions==============
208 | void neuron_list_init(Neuron *NeuronList, int network_size);
209 | void neuron_list_init(Neuron *NeuronList);
210 | void input_neuron_list_init(Input_neuron *NeuronList, int network_size);
211 | void init_log_v (float *log_v);
212 | void init_data_log (float *log_v_host, float *log_spike_host, float *log_total_spike_host, int inter);
213 | void izh_parameter_init(float *izh_parameters);
214 | //==========GPU utility functions==============
215 | __global__ void change_threshold (Neuron *NeuronList, int network_size, float start_depth, float end_depth, float target_threshold);
216 | __global__ void lateral_inhibition_depth_wise_mother_thread (Neuron *NeuronList, int network_size, int depth_ind_to_learn, int inhibit_time, CNN_struct *CNN_setttings, float *spike_flag, int total_depth_number);
217 | __global__ void lateral_inhibition_mother_thread (Neuron *NeuronList, int network_size, int layer_ind_to_learn, int inhibit_time, CNN_struct *CNN_setttings, int *spike_flag);
218 | __global__ void lateral_inhibition_child (Neuron *NeuronList, int network_size, int inhibit_time, float start_depth, float end_depth, int depth_iter);
219 | __global__ void reset_membrane_potential (Neuron *NeuronList, int network_size, float start_depth, float end_depth);
220 | __global__ void reset_all_state (Neuron *NeuronList, int network_size, float start_depth, float end_depth);
221 | __global__ void update_param (Neuron *NeuronList, int network_size, float start_depth, float end_depth, int target_param, float target_value);
222 | 
223 | 
224 | void copy_filter_to_cuDNN(Neuron *NeuronList, CNN_struct *CNN_settings, float **filter, int spiking_neuron_size);
225 | //==========learning options==============
226 | void run_cnn(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
227 | float spiking_learning_label(string network_data, string flag_file, int input_index, int num_test, int function_select, int data_set_select);
228 | void run_cnn_multilayer(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
229 | void run_autotune(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
230 | void run_time_sequence(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
231 | void run_sc2(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
232 | void run_event_based_learning(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
233 | void run_event_based_learning_hsnn(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img, int resume_learning, int start_layer);
234 | //==========inference options==============
235 | void run_cnn_multilayer_inference(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
236 | void run_autotune_inference(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
237 | void run_event_based_inference(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
238 | void run_event_based_inference_hsnn(string index_prefix, float input_float, float input_float_2, int input_int, int input_int_2, string input_img);
239 | //==========data reader===================
240 | void read_filter_data(string image_file, float *image, int num, int pixel_num);
241 | void CIFAR_read_image_one_channel(float *image, int image_size, int channel, int data_set_choise);
242 | void CIFAR_read_image(float *image, int image_size, int total_img_num, int data_set_choise, bool if_gray_scale);
243 | void GTVIR_read_image(float *image, int image_size, int total_img_num);
244 | void CIFAR_read_label(int *label, int data_set_choise);
245 | void MNIST_read_image(string image_file, float *image , int num);
246 | void MNIST_read_label(string label_file, int *label, int num);
247 | void KAIST_PED_read_image(string image_path, float *image , int num);
248 | void read_sine_seq(string image_file, float *image, int num);
249 | void imageNET_read_image(string folder_to_read, float *image , int num);
250 | void read_sc2(string image_file, float *image, int num);
251 | void read_sc2_2(string image_file, float *image, int num);
252 | void read_sc2_3(string image_file, float *image, int num);
253 | void read_polygon(string folder_to_read, float *image, int num);
254 | void read_one_image(string dir_to_read, float *image, int num);
255 | void DVS_read_image_8bit(string image_file, float *image , int num);
256 | void NTU_skeleton_read_image(string image_file, float *image , int num, int offset);
257 | int IBM_DVS128_event_based(string file_name, Event_Camera_Input *events, int image_size, int total_img_num);
258 | int IBM_DVS128_event_based_one_line(string file_name, Event_Camera_Input *events, int this_file_total_line, int target_line);
259 | int IBM_DVS128_event_based_count_line(string file_name);
260 | #endif /* HEADER_H_ */
261 | 


--------------------------------------------------------------------------------
/img_util.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | #include <stdio.h>
  7 | #include<time.h>
  8 | #include<device_functions.h>
  9 | #include<cuda.h>
 10 | #include<math.h>
 11 | #include <opencv2/opencv.hpp>
 12 | #include <opencv2/core/core.hpp>
 13 | using namespace std;
 14 | 
 15 | 
 16 | //void save_image(float *img_data, string file_name){
 17 | //	int in_channel = input_image_channel;
 18 | //	int out_width = input_image_w;
 19 | //	int out_height = input_image_l;
 20 | //	int total_pixel = input_image_w*input_image_l;
 21 | //
 22 | //	//cimg_library::CImg<unsigned char> image("spiking_out_seed.jpg");
 23 | //	cimg_library::CImg<float> image(input_image_w, input_image_l, 1, 3, 0);
 24 | //	int img_i;
 25 | //	int img_j;
 26 | //	int img_k;
 27 | //	if (in_channel == 1){
 28 | //		for (img_i=0;img_i<out_width;img_i++){
 29 | //			for (img_j=0;img_j<out_height;img_j++){
 30 | //				for(img_k=0;img_k<3;img_k++){
 31 | //					float weight_raw = img_data[img_i*out_width+img_j];
 32 | //					image(img_j, img_i, 0, img_k) = 255*weight_raw;
 33 | //				}
 34 | //			}
 35 | //		}
 36 | //	}
 37 | //	else{
 38 | //		for(img_k=0;img_k<in_channel;img_k++){
 39 | //			for (img_i=0;img_i<out_width;img_i++){
 40 | //				for (img_j=0;img_j<out_height;img_j++){
 41 | //					float weight_raw = img_data[total_pixel*img_k+img_i*out_width+img_j];
 42 | //					image(img_j, img_i, img_k) = weight_raw;
 43 | //				}
 44 | //			}
 45 | //		}
 46 | //	}
 47 | //	string out_file_name = file_name;
 48 | //
 49 | //	image._data = img_data;
 50 | //	image.normalize(0,255);
 51 | //	//image.save(out_file_name.c_str());
 52 | //	image.save_png(out_file_name.c_str());
 53 | //
 54 | //}
 55 | //
 56 | //void load_image(float *img_data, string file_name){
 57 | //	int in_channel = input_image_channel;
 58 | //	int out_width = input_image_w;
 59 | //	int out_height = input_image_l;
 60 | //	int total_pixel = input_image_w*input_image_l;
 61 | //
 62 | //	cimg_library::CImg<float> image(file_name.c_str());
 63 | //	image.normalize(0,1);
 64 | //	int img_i;
 65 | //	int img_j;
 66 | //	int img_k;
 67 | //	for(img_k=0;img_k<in_channel;img_k++){
 68 | //		for (img_i=0;img_i<out_width;img_i++){
 69 | //			for (img_j=0;img_j<out_height;img_j++){
 70 | //				img_data[total_pixel*img_k+img_i*out_width+img_j] = image(img_j, img_i, img_k);
 71 | //			}
 72 | //		}
 73 | //	}
 74 | //	img_data = image._data;
 75 | //
 76 | //}
 77 | 
 78 | 
 79 | 
 80 | void load_image(float *img_data, string image_path) {
 81 | 
 82 |   cv::Mat image = cv::imread(image_path.c_str(), cv::IMREAD_COLOR);
 83 | 
 84 |   image.convertTo(image, CV_32FC3);
 85 | 
 86 |   cv::imwrite("test_3.png", image);
 87 |   cv::normalize(image, image, 0, 1, cv::NORM_MINMAX);
 88 | 
 89 |   std::cerr << "Input Image: " << image.rows << " x " << image.cols << " x "
 90 |             << image.channels() << std::endl;
 91 | 
 92 |   int image_bytes = input_image_channel * input_image_l * input_image_w * sizeof(float);
 93 |   memcpy(img_data, image.ptr<float>(0), image_bytes);
 94 | 
 95 | 
 96 | //  int image_size = input_image_channel * input_image_l * input_image_w;
 97 | //  for (int i=0; i<image_size; i++){
 98 | //	  cout<<image.ptr<float>(0);
 99 | //  }
100 | 
101 | }
102 | 
103 | 
104 | 
105 | void save_image(float* buffer, string image_path) {
106 | 	int width = input_image_w;
107 | 	int height = input_image_l;
108 | 
109 |   cv::Mat output_image(height, width, CV_32FC3, buffer);
110 | 
111 | //  int image_bytes = input_image_channel * input_image_l * input_image_w * sizeof(float);
112 | //  int image_size = input_image_channel * input_image_l * input_image_w;
113 | //  for (int i=0; i<image_size; i++){
114 | //	  //output_image.at<float>(i) = buffer[i];
115 | //  }
116 | 
117 |   cv::threshold(output_image,
118 |                 output_image,
119 |                 /*threshold=*/0,
120 |                 /*maxval=*/0,
121 |                 cv::THRESH_TOZERO);
122 |   cv::normalize(output_image, output_image, 0.0, 255.0, cv::NORM_MINMAX);
123 |   output_image.convertTo(output_image, CV_8UC3);
124 |   cv::imwrite(image_path, output_image);
125 | 
126 | }
127 | 
128 | 
129 | void img_util(float *img_data, string file_name, int function_select){
130 | 	switch (function_select){
131 | 						case 0: save_image(img_data, file_name);
132 | 						break;
133 | 						case 1: load_image(img_data, file_name);
134 | 						break;
135 | 	}
136 | }
137 | 


--------------------------------------------------------------------------------
/main.cu:
--------------------------------------------------------------------------------
  1 | #include <iostream>
  2 | #include <time.h>
  3 | #include <vector>
  4 | #include <string>
  5 | #include "header.h"
  6 | #include <stdlib.h>
  7 | #include <streambuf>
  8 | #include <sstream>
  9 | #include <fstream>
 10 | #include <math.h>
 11 | #include "CImg.h"
 12 | #include <curand.h>
 13 | #include <curand_kernel.h>
 14 | #include <assert.h>
 15 | #include "cifar10_reader.hpp"
 16 | //#include "learning_options.cu"
 17 | //#include "mnist/mnist_reader_less.hpp"
 18 | #include <boost/filesystem.hpp>
 19 | #include <opencv2/opencv.hpp>
 20 | using namespace std;
 21 | 
 22 | 
 23 | #define tau 10
 24 | #define exp_coeff 1.442695
 25 | #define SIZE 50000  //for ROI, use 30000
 26 | #define MAX_TIME 2500000 //in ms
 27 | #define TEST_TIME 1000
 28 | 
 29 | int main()
 30 | {
 31 | 	//clock_t t1_reading, t2_reading;
 32 | 	clock_t t_start, t_end;
 33 | 	//float time;
 34 | 
 35 | 	//cout << "================ Welcome to Xueyuan She and Yun Long's ParallelSpikeSim =================" << endl << endl;
 36 | 	//cout << endl;
 37 | 	cout<<"Function Select: ";
 38 | 	int mode_select;
 39 | 	cin >> mode_select;
 40 | 	t_start = clock();
 41 | 
 42 | 	int input_index = 0;
 43 | 	switch (mode_select){
 44 | 
 45 | 					case 0:
 46 | 					{
 47 | 						cudaSetDevice(0);
 48 | 						run_cnn_multilayer_inference("", 0.8, -1.0, 10000, 5, "spike_cnn.txt");
 49 | 					}
 50 | 					break;
 51 | 
 52 | 					case 1:
 53 | 					{
 54 | 						printf("Case 1 selected/n");
 55 | 						spiking_learning_label("device2_output_network.txt", "device2_output_network_flaged_network_4.csv", 500, 1000, 1, 0);
 56 | 					}
 57 | 					break;
 58 | 					//case 2: run_test(); break; ./rotating_f_mnist/test_2_4/rotating_mnist_val
 59 | 					case 3: {
 60 | 
 61 | 					}
 62 | 					break;
 63 | 					case 4:
 64 | 					{
 65 | 						cudaSetDevice(1);
 66 | 						run_time_sequence("", 1, -1.0, 10, 5, "spike_cnn.txt");
 67 | 					}
 68 | 					break;
 69 | 					case 5:
 70 | 						cout<<"Running CNN Multilayer"<<endl;
 71 | 						cudaSetDevice(2);
 72 | 						run_cnn_multilayer("", 1, -1.0, 100, 5, "spike_cnn.txt");
 73 | 					break;
 74 | 					case 6:
 75 | 					{
 76 | 						cudaSetDevice(0);
 77 | 						run_cnn("", 0.8, -1.0, 500, 5, "spike_cnn.txt");
 78 | 					}
 79 | 					break;
 80 | 					case 7:
 81 | 					{
 82 | 						cudaSetDevice(1);
 83 | 						cout<<"Ruuning H-SNN Learning Layer by Layer"<<endl;
 84 | 						//run HSNN learning
 85 | 						for (int layer_to_learn=1; layer_to_learn<CNN_total_layer_num; layer_to_learn++){
 86 | 							cout<<endl<<"==========Learning Layer "<<layer_to_learn<<"=========="<<endl;
 87 | 							if (layer_to_learn==1) run_event_based_learning_hsnn("1", 1, -1.0, 2, 5, "spike_cnn.txt", 0, layer_to_learn);
 88 | 							else run_event_based_learning_hsnn(to_string(layer_to_learn), 1, -1.0, 2, 5, "spike_cnn.txt", 1, layer_to_learn);
 89 | 						}
 90 | 					}
 91 | 					break;
 92 | 					case 8:
 93 | 					{
 94 | 						cudaSetDevice(2);
 95 | 						run_event_based_inference_hsnn("", 0.8, -1.0, 2, 5, "spike_cnn.txt");
 96 | 					}
 97 | 					break;
 98 | 
 99 | 
100 | 	}
101 | 
102 | 	t_end = clock();
103 | 	cout << "Information summary: " << endl;
104 | 
105 | 	//cout << "Calling GPU kernel uses: " << elapase_time[0]/1000 << " (ms)" << endl;
106 | 	//cout << "Actual GPU kernel elapse time is: " << elapase_time[1] << " (ms)" << endl << endl;
107 | 	cout << "Total simulation time is " << (t_end - t_start)/1000 << " (ms)" << endl;
108 | 
109 | 	cout << endl;
110 | 	cout << "============ Simulation is done, please check your output ============" << endl << endl;
111 | 	cout << "Thanks for using my Simulator" << endl << endl;
112 | 
113 | 	return 0;
114 | }


--------------------------------------------------------------------------------
/network_config_generator.cpp:
--------------------------------------------------------------------------------
   1 | #include <iostream>
   2 | #include <time.h>
   3 | #include <vector>
   4 | #include <string>
   5 | #include <stdlib.h>
   6 | #include <streambuf>
   7 | #include <sstream>
   8 | #include <fstream>
   9 | #include <math.h>
  10 | #include <array>
  11 | #include "header.h"
  12 | #include <cudnn.h>
  13 | 
  14 | #include <random>
  15 | using namespace std;
  16 | #define process_variation 1
  17 | #define process_std 0.02
  18 | /*
  19 | test IZH: 1 0 0.15 0.3 -72.14 2.44 30 -60 -14 0 ; 1 0 .
  20 | test LIF: 1 2 -70 -55 -75 20 10 10 -70 0 0 ; 1 0 .
  21 | test HH: 1 3 0.01 55.17 -72.14 -49.42 1.2 0.36 0.003 -60 0.0529 0 0.3177 0.5961 ; 1 0 .
  22 | test signal input: 1 4 1 1 0 1 ; 0 0 .
  23 | */
  24 | 
  25 | void ROI_gen(int network_type, int network_size){
  26 | 
  27 | 	int neuron_count = 1;
  28 | 	float param[8];
  29 | 	float state[8];
  30 | 	int param_num = 0;
  31 | 	int state_num = 0;
  32 | 
  33 | 	if(network_type==0){//this is izh
  34 | 		float param_temp[8] = {5, 0.273, -56, 2, 30, 0, 0, 0};
  35 | 		float state_temp[8] = {-60, -14, 0, 0, 0, 0, 0, 0};
  36 | 
  37 | 		param_num = 5;
  38 | 		state_num = 3;
  39 | 		std::copy(param_temp, param_temp + 8, param);
  40 | 		std::copy(state_temp, state_temp + 8, state);
  41 | 		//printf("%f",param[4]);
  42 | 		//state = state_temp;
  43 | 
  44 | 	}
  45 | 
  46 | 	if(network_type==2){
  47 | 		float param_temp[8] = {-70, -55, -75, 20, 10, 10, 0, 0};
  48 | 		float state_temp[8] = {-70, 0, 0, 0, 0, 0, 0, 0};
  49 | 
  50 | 		param_num = 6;
  51 | 		state_num = 3;
  52 | 		std::copy(param_temp, param_temp + 8, param);
  53 | 		std::copy(state_temp, state_temp + 8, state);
  54 | 	}
  55 | 
  56 | 
  57 | 	ofstream myfile_2 ("visual_lif.txt");
  58 | 	    if (myfile_2.is_open()){
  59 | 	    	for(int i=0; i < network_size; i++){
  60 | 	    		myfile_2 << neuron_count << " " << network_type << " ";
  61 | 
  62 | 	    		for(int j=0; j<param_num;j++){
  63 | 	    			myfile_2 << param[j] << " ";
  64 | 	    		}
  65 | 	    		for(int j=0; j<state_num;j++){
  66 | 	    			myfile_2 << state[j] << " ";
  67 | 	    		}
  68 | 	    		myfile_2 << "; " << network_size+i+1 << " 0 .\n";
  69 | 
  70 | 	    		neuron_count++;
  71 | 	    	}
  72 | 
  73 | 	    		for(int k=0; k < network_size; k++){
  74 | 					myfile_2 << neuron_count << " " << "4 " << "1 ";
  75 | 
  76 | 					for(int j=0; j<3;j++){
  77 | 						myfile_2 << "0" << " ";
  78 | 					}
  79 | 					myfile_2 << "; " << neuron_count << " 0 .\n";
  80 | 					neuron_count++;
  81 | 				}
  82 | 
  83 | 	    	myfile_2.close();
  84 | 	    }
  85 | }
  86 | 
  87 | void spike_learning_gen(int neuron_type, int network_size, int* mid_layer, int mid_layer_num, int input_num){
  88 | 
  89 | 	int neuron_count = 1;
  90 | 	float param[8];
  91 | 	float state[8];
  92 | 	float param_2[8];
  93 | 	float state_2[8];
  94 | 
  95 | 	int param_num = 0;
  96 | 	int state_num = 0;
  97 | 	int mid_conductance = 500;
  98 | 	int total_layer_num = 2+mid_layer_num;
  99 | 	int different_parameter = 0;
 100 | 
 101 | 	float param_temp_2[8] = {-0.1089, -60.2, -74.7, 20, 0.615, -6.07, 0, 0};
 102 | 	float state_temp_2[8] = {-70, 0, 0, 0, 0, 0, 0, 0};
 103 | 
 104 | 	int connection_index[total_layer_num];
 105 | 	int sum_of_mid_layer_neuron = 0;
 106 | 	for(int g=0;g<mid_layer_num;g++){
 107 | 		sum_of_mid_layer_neuron += mid_layer[g];
 108 | 	}
 109 | 	int output_layer_neuron_num = network_size - sum_of_mid_layer_neuron;
 110 | 
 111 | 	for(int h=0;h<total_layer_num;h++){
 112 | 		if(h==0) {
 113 | 			connection_index[h] = output_layer_neuron_num;
 114 | 		}else if(h==1+mid_layer_num){
 115 | 			connection_index[h] = network_size + input_num;
 116 | 		}else{
 117 | 			connection_index[h] = connection_index[h-1] + mid_layer[h-1];
 118 | 		}
 119 | 		printf("index_is_%d\n", connection_index[h]);
 120 | 	}
 121 | 
 122 | 	if(neuron_type==0){//this is izh
 123 | 		float param_temp[8] = {5, 0.273, -56, 2, 30, 0, 0, 0};
 124 | 		float state_temp[8] = {-60, -14, 0, 0, 0, 0, 0, 0};
 125 | 
 126 | 		param_num = 8;
 127 | 		state_num = 8;
 128 | 		std::copy(param_temp, param_temp + 8, param);
 129 | 		std::copy(state_temp, state_temp + 8, state);
 130 | 		//printf("%f",param[4]);
 131 | 		//state = state_temp;
 132 | 
 133 | 	}
 134 | 
 135 | 	if(neuron_type==2){//this is LIF
 136 | 		//type_1 used for 2-20
 137 | 		float param_temp[8] = {-0.0989, -60.2, -74.7, 20, 0.314, -6.77, 0, 0};
 138 | 		float state_temp[8] = {-70, 0, 0, 0, 0, 0, 0, 0};
 139 | 
 140 | 		//type_2 used for 0.2-2
 141 | 
 142 | 
 143 | 		param_num = 8;
 144 | 		state_num = 8;
 145 | 		std::copy(param_temp, param_temp + 8, param);
 146 | 		std::copy(state_temp, state_temp + 8, state);
 147 | 
 148 | 		std::copy(param_temp_2, param_temp_2 + 8, param_2);
 149 | 		std::copy(state_temp_2, state_temp_2 + 8, state_2);
 150 | 
 151 | 	}
 152 | 
 153 | 	if(neuron_type==3){//this is HH
 154 | 		float param_temp[8] = {0.35, 125.17, -122.14, -49.42, 1.2, 0.36, 0.003, 0};
 155 | 		float state_temp[8] = {-60, 0.0529, 0, 0.3177, 0.5961, 0, 0, 0};
 156 | 
 157 | 		param_num = 8;
 158 | 		state_num = 8;
 159 | 		std::copy(param_temp, param_temp + 8, param);
 160 | 		std::copy(state_temp, state_temp + 8, state);
 161 | 	}
 162 | 
 163 | 
 164 | 
 165 | /*
 166 | 	ofstream myfile_2 ("spike_learning.txt");
 167 | 	    if (myfile_2.is_open()){
 168 | 	    	for(int i=0; i < network_size; i++){
 169 | 	    		myfile_2 << neuron_count << " " << neuron_type << " ";
 170 | 
 171 | 	    		for(int j=0; j<param_num;j++){
 172 | 	    			myfile_2 << param[j] << " ";
 173 | 	    		}
 174 | 	    		for(int j=0; j<state_num;j++){
 175 | 	    			myfile_2 << state[j] << " ";
 176 | 	    		}
 177 | 	    		myfile_2 << "; " ;
 178 | 	    		for(int connected_in=0;connected_in<input_num;connected_in++){
 179 | 	    			float cdt = 0;
 180 | 	    			int fluct = 250 - (rand() % 500);
 181 | 	    			//fluct = 0;
 182 | 	    			cdt = (mid_conductance+fluct)/1000.0;
 183 | 
 184 | 	    			myfile_2 << network_size+connected_in+1 << ' ' << to_string(cdt) << ' ';
 185 | 	    		}
 186 | 
 187 | 	    		myfile_2 << ".\n";
 188 | 	    		neuron_count++;
 189 | 	    	}
 190 | 
 191 | 	    		for(int k=0; k < input_num; k++){
 192 | 					myfile_2 << neuron_count << " " << "4 " << "1 ";
 193 | 
 194 | 					for(int j=0; j<3;j++){
 195 | 						myfile_2 << "0" << " ";
 196 | 					}
 197 | 					myfile_2 << "; " << neuron_count << " 1 .\n";
 198 | 					neuron_count++;
 199 | 				}
 200 | 
 201 | 	    	myfile_2.close();
 202 | 	    }
 203 | 
 204 | */
 205 | 	ofstream myfile_2 ("spike_learning.txt");
 206 | 	for(int y=0; y<total_layer_num; y++){
 207 | 		if(y==total_layer_num-1){
 208 | 			for(int k=0; k < input_num; k++){
 209 | 				myfile_2 << neuron_count << " " << "4 " << "1 ";
 210 | 
 211 | 				for(int j=0; j<3;j++){
 212 | 					myfile_2 << "0" << " ";
 213 | 				}
 214 | 				myfile_2 << "; " << neuron_count << " 1 .\n";
 215 | 				neuron_count++;
 216 | 			}
 217 | 		}else{
 218 | 			int start_num = 0;
 219 | 			int end_num = connection_index[y];
 220 | 			if(y!=0){
 221 | 				start_num = connection_index[y-1];
 222 | 				//end_num = connection_index[y];
 223 | 			}
 224 | 
 225 | 			int connection_start = connection_index[y];
 226 | 			int connection_end = connection_index[y+1];
 227 | 
 228 | 			if (myfile_2.is_open()){
 229 | 				for(int i=start_num; i < end_num; i++){
 230 | 					myfile_2 << neuron_count << " " << neuron_type << " ";
 231 | 					if(different_parameter){
 232 | 						if(neuron_count-1>=output_layer_neuron_num){
 233 | 							for(int j=0; j<param_num;j++){
 234 | 								myfile_2 << param[j] << " ";
 235 | 							}
 236 | 							for(int j=0; j<state_num;j++){
 237 | 								myfile_2 << state[j] << " ";
 238 | 							}
 239 | 						}else{
 240 | 							for(int j=0; j<param_num;j++){
 241 | 								myfile_2 << param_2[j] << " ";
 242 | 							}
 243 | 							for(int j=0; j<state_num;j++){
 244 | 								myfile_2 << state_2[j] << " ";
 245 | 							}
 246 | 
 247 | 						}
 248 | 					}else{
 249 | 						for(int j=0; j<param_num;j++){
 250 | 							myfile_2 << param[j] << " ";
 251 | 						}
 252 | 						for(int j=0; j<state_num;j++){
 253 | 							myfile_2 << state[j] << " ";
 254 | 						}
 255 | 					}
 256 | 
 257 | 
 258 | 					myfile_2 << "; " ;
 259 | 					for(int connected_in=connection_start;connected_in<connection_end;connected_in++){
 260 | 						float cdt = 0;
 261 | 						int fluct = 500 - (rand() % 1000);
 262 | 						//fluct = 0;
 263 | 						cdt = (mid_conductance+fluct)/1000.0;
 264 | 
 265 | 						myfile_2 << connected_in+1 << ' ' << to_string(cdt) << ' ';
 266 | 					}
 267 | 
 268 | 					myfile_2 << ".\n";
 269 | 					neuron_count++;
 270 | 				}
 271 | 			}
 272 | 		}
 273 | 	}
 274 | 
 275 | 	myfile_2.close();
 276 | }
 277 | 
 278 | void spike_cnn_gen(CNN_struct *network_config){
 279 | 
 280 | 	srand(1);
 281 | 	printf("Writing CNN to config file\n");
 282 | 	int neuron_type = 2;
 283 | 
 284 | 	float param[8];
 285 | 	float state[8];
 286 | 	float param_2[8];
 287 | 	float state_2[8];
 288 | 
 289 | 	int param_num = 8;
 290 | 	int state_num = 8;
 291 | 	int mid_conductance = 100;
 292 | 
 293 | 	int different_parameter = 0;
 294 | 
 295 | 	float param_temp[8] = {0.01, -60.2, -74.7, 20, 0.314, -0.27, 0, 0};
 296 | 	float state_temp[8] = {-70, 0, 0, 0, 0, 0, 0, 0};
 297 | 	float input_param_temp[8] = {1, 0, 0, 0, 0, 0, 0, 0};
 298 | 	float input_state_temp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
 299 | 
 300 | 	//float param_temp_layer3[8] = {0.01, -60.2, -74.7, 20, 0.314, -1.07, 0, 0};
 301 | 
 302 | 	ofstream myfile_2 ("spike_cnn.txt");
 303 | 
 304 | 	std::default_random_engine generator;
 305 | 	std::normal_distribution<float> distribution(0.0,process_std);
 306 | 
 307 | 	int neuron_count = 1;
 308 | 	int total_neuron = 0;
 309 | 	int depth_count = network_config->layer[0].depth;
 310 | 
 311 | 	for(int i=0;i<CNN_total_layer_num;i++){
 312 | 		total_neuron += network_config->layer[i].neuron_num;
 313 | 	}
 314 | 
 315 | 	depth_count = network_config->layer[0].depth;
 316 | 	//first process the input layer
 317 | 	for(int dep_iter=0; dep_iter<depth_count; dep_iter++){
 318 | 		input_param_temp[7] = network_config->layer[0].depth_list[dep_iter].id;
 319 | 		for(int layer_neuron_count=0; layer_neuron_count<network_config->layer[0].width*network_config->layer[0].length; layer_neuron_count++){
 320 | //			myfile_2 << neuron_count << " " << "4 ";
 321 | 			for(int j=0; j<param_num;j++){
 322 | //				myfile_2 << input_param_temp[j] << " ";
 323 | 			}
 324 | 			for(int j=0; j<state_num;j++){
 325 | //				myfile_2 << input_state_temp[j] << " ";
 326 | 			}
 327 | //			myfile_2 << "; " << neuron_count << " 1 .\n";
 328 | 			neuron_count++;
 329 | 		}
 330 | 	}
 331 | 
 332 | 
 333 | 	for(int first_i=1; first_i<CNN_total_layer_num; first_i++){//first_i start with 1 (0 is input layer)
 334 | 		//param_temp[6] = first_i;
 335 | 		convolution_param conv_setting = network_config->layer[first_i].conv_setting;
 336 | 		CNN_layer current_layer = network_config->layer[first_i];
 337 | 		cout<<"Working on layer: " << first_i << endl;
 338 | 		for(int first_j=0; first_j<current_layer.depth; first_j++){
 339 | 			depth_struct current_depth = current_layer.depth_list[first_j];
 340 | 
 341 | 			param_temp[7] = current_depth.id;
 342 | 			//printf("#%d", current_depth.id);
 343 | 			if(first_i==2){
 344 | 				param_temp[0] = 0.005;
 345 | 				param_temp[5] = -0.5;
 346 | 				param_temp[4] = 1;
 347 | 			}
 348 | 			std::copy(param_temp, param_temp + 8, param);
 349 | 			std::copy(state_temp, state_temp + 8, state);
 350 | 
 351 | 			int input_size_x = network_config->layer[first_i-1].width;
 352 | 			int input_size_y = network_config->layer[first_i-1].length;
 353 | 			int output_size_x = (input_size_x-conv_setting.filter_width+2*conv_setting.pad_width)/conv_setting.horizontal_stride + 1;
 354 | 			int output_size_y = (input_size_y-conv_setting.filter_length+2*conv_setting.pad_height)/conv_setting.vertical_stride + 1;
 355 | 
 356 | //			cout<<"output_size_x: "<<output_size_x<<"output_size_y: "<<output_size_y<<endl;
 357 | //			cout<<"current_depth.width: "<<current_depth.width<<"current_depth.length: "<<current_depth.length<<endl;
 358 | 			if((output_size_x!=current_depth.width)||(output_size_y!=current_depth.length)){
 359 | 				cout<<"============================================"<<endl;
 360 | 				cout<<"WARNING: Layer "<<first_i<<" Sizing Problem"<<endl;
 361 | 				cout<<"output_size_x: "<<output_size_x<<"output_size_y: "<<output_size_y<<endl;
 362 | 				cout<<"current_depth.width: "<<current_depth.width<<"current_depth.length: "<<current_depth.length<<endl;
 363 | 				cout<<"============================================"<<endl;
 364 | 			}
 365 | 			if(current_layer.conv_setting.filter_depth*current_layer.conv_setting.filter_length*current_layer.conv_setting.filter_width>=MAX_CONNECTION){
 366 | 				cout<<"++++++++++ Warning: layer "<<first_i<<" has more parameters than max connection +++++++++"<<endl;
 367 | 			}
 368 | 
 369 | 			int reverse_mapped_start_x = 0 - conv_setting.pad_width;
 370 | 			int reverse_mapped_start_y = 0 - conv_setting.pad_height;
 371 | 
 372 | 			for(int layer_y=0; layer_y<output_size_y; layer_y++){
 373 | 							for(int layer_x=0; layer_x<output_size_x; layer_x++){
 374 | 								if (myfile_2.is_open()){
 375 | 
 376 | 									myfile_2 << neuron_count << " " << neuron_type << " ";
 377 | 									if(different_parameter){
 378 | 										for(int j=0; j<param_num;j++){
 379 | 											myfile_2 << param[j] << " ";
 380 | 										}
 381 | 										for(int j=0; j<state_num;j++){
 382 | 											myfile_2 << state[j] << " ";
 383 | 										}
 384 | 									}else{
 385 | 										for(int j=0; j<param_num;j++){
 386 | 											if(process_variation){
 387 | 
 388 | 												if(j==1){
 389 | 													float random_temp = distribution(generator);
 390 | 
 391 | 													if (random_temp > 0.1) random_temp = 0.1;
 392 | 													else if (random_temp < -0.1) random_temp = -0.1;
 393 | 													myfile_2 << param[j]+(14.5*random_temp) << " ";
 394 | 												}
 395 | 												else if(j==4) myfile_2 << param[j]*(1+distribution(generator)) << " ";
 396 | 												else myfile_2 << param[j] << " ";
 397 | 											}else{
 398 | 												myfile_2 << param[j] << " ";
 399 | 											}
 400 | 										}
 401 | 										for(int j=0; j<state_num;j++){
 402 | 											myfile_2 << state[j] << " ";
 403 | 										}
 404 | 									}
 405 | 									myfile_2 << "; " ;
 406 | 
 407 | 									//write connections:
 408 | 
 409 | 									int reverse_mapped_left_x = reverse_mapped_start_x + layer_x*conv_setting.horizontal_stride;
 410 | 									int reverse_mapped_top_y = reverse_mapped_start_y + layer_y*conv_setting.vertical_stride;
 411 | 
 412 | 									for(int second_i=0; second_i<conv_setting.filter_depth;second_i++){
 413 | 										depth_struct input_depth = network_config->layer[current_layer.input_layer].depth_list[second_i];
 414 | 										int start_neuron_id = input_depth.first_neuron;
 415 | 										int connected_in_count = 0;//for debug
 416 | 										for(int second_k=0; second_k<conv_setting.filter_length;second_k++){
 417 | 											int delta_y = second_k;
 418 | 											int mapped_y = reverse_mapped_top_y + delta_y;
 419 | 											if(mapped_y>=0&&mapped_y<input_depth.length){
 420 | 												for(int second_j=0; second_j<conv_setting.filter_width;second_j++){
 421 | 													int delta_x = second_j;
 422 | 													int mapped_x = reverse_mapped_left_x + delta_x;
 423 | 													if(mapped_x>=0&&mapped_x<input_depth.width){
 424 | 														int mapped_index = start_neuron_id + mapped_y*input_depth.width + mapped_x;
 425 | 														//if(neuron_count==7501)cout<<start_neuron_id<<" "<<mapped_y<<" "<<mapped_x<<endl;
 426 | 														if(mapped_index<=input_depth.last_neuron){
 427 | 															float cdt = 0;
 428 | 															int fluct = 10 - (rand() % 20);
 429 | 															//if(second_i==1)cout<<fluct<<", ";
 430 | 															//fluct = 0;
 431 | 															cdt = (mid_conductance+fluct)/1000.0;
 432 | 															if(non_random_weight_init) cdt=0.1;
 433 | 															myfile_2 << mapped_index + 1 << ' ' << to_string(cdt) << ' ';
 434 | 															connected_in_count ++;
 435 | 														}
 436 | 													}else{
 437 | //														myfile_2 << (total_neuron+1) << ' ' << mapped_x << "|" << mapped_y << ' ';
 438 | 														myfile_2 << (total_neuron+100000) << ' ' << 0 << ' ';
 439 | 													}
 440 | 												}
 441 | 											}else{
 442 | 												for(int second_j=0; second_j<conv_setting.filter_width;second_j++){
 443 | 													int delta_x = second_j;
 444 | 													int mapped_x = reverse_mapped_left_x + delta_x;
 445 | //													myfile_2 << (total_neuron+1) << ' ' << mapped_x << "|" << mapped_y  << ' ';
 446 | 													myfile_2 << (total_neuron+100000) << ' ' << 0 << ' ';
 447 | 												}
 448 | 											}
 449 | 										}
 450 | 										//if(connected_in_count%2!=0&&connected_in_count!=9) cout<<"Wrong connected in number at depth: "<<current_depth.id<<endl;
 451 | 									}
 452 | 
 453 | 									myfile_2 << "| ";
 454 | 									int local_inhibition_size = 1;
 455 | 									//cout<<layer_y<<' '<<output_size_y<<endl;
 456 | 									for(int LI_x=-1*local_inhibition_size; LI_x<=local_inhibition_size; LI_x++){
 457 | 										for(int LI_y=-1*local_inhibition_size; LI_y<=local_inhibition_size; LI_y++){
 458 | 											if (((layer_y+LI_y)>=output_size_y)||((layer_y+LI_y)<0)||((layer_x+LI_x)<0)||((layer_x+LI_x)>=output_size_x)) continue;
 459 | 											if (LI_y==0 && LI_x==0) continue;
 460 | 											int LI_mapped_index = neuron_count + LI_x + output_size_x*LI_y;
 461 | 											myfile_2<<LI_mapped_index<<' ';
 462 | 
 463 | 										}
 464 | 
 465 | 									}
 466 | 
 467 | 									myfile_2 << ".\n";
 468 | 									neuron_count++;
 469 | 
 470 | 								}
 471 | 							}
 472 | 						}
 473 | 
 474 | //									int reverse_mapped_center_x = layer_x*conv_setting.horizontal_stride;
 475 | //									int reverse_mapped_center_y = layer_y*conv_setting.vertical_stride;
 476 | //			for(int layer_y=0; layer_y<current_layer.length; layer_y++){
 477 | //				for(int layer_x=0; layer_x<current_layer.width; layer_x++){
 478 | //					if (myfile_2.is_open()){
 479 | //
 480 | //						myfile_2 << neuron_count << " " << neuron_type << " ";
 481 | //						if(different_parameter){
 482 | //							for(int j=0; j<param_num;j++){
 483 | //								myfile_2 << param[j] << " ";
 484 | //							}
 485 | //							for(int j=0; j<state_num;j++){
 486 | //								myfile_2 << state[j] << " ";
 487 | //							}
 488 | //						}else{
 489 | //							for(int j=0; j<param_num;j++){
 490 | //								myfile_2 << param[j] << " ";
 491 | //							}
 492 | //							for(int j=0; j<state_num;j++){
 493 | //								myfile_2 << state[j] << " ";
 494 | //							}
 495 | //						}
 496 | //						myfile_2 << "; " ;
 497 | //
 498 | //						//write connections:
 499 | //
 500 | //
 501 | //						int reverse_mapped_center_x = layer_x*conv_setting.horizontal_stride;
 502 | //						int reverse_mapped_center_y = layer_y*conv_setting.vertical_stride;
 503 | //
 504 | //
 505 | //						for(int second_i=0; second_i<conv_setting.filter_depth;second_i++){
 506 | //							depth_struct input_depth = network_config->layer[current_layer.input_layer].depth_list[second_i];
 507 | //							int start_neuron_id = input_depth.first_neuron;
 508 | //							int connected_in_count = 0;//for debug
 509 | //							for(int second_k=0; second_k<conv_setting.filter_length;second_k++){
 510 | //								int delta_y = second_k - (int) (conv_setting.filter_length/2);
 511 | //								int mapped_y = reverse_mapped_center_y + delta_y;
 512 | //								if(mapped_y>=0&&mapped_y<input_depth.length){
 513 | //									for(int second_j=0; second_j<conv_setting.filter_width;second_j++){
 514 | //										int delta_x = second_j - (int) (conv_setting.filter_width/2);
 515 | //										int mapped_x = reverse_mapped_center_x + delta_x;
 516 | //										if(mapped_x>=0&&mapped_x<input_depth.width){
 517 | //											int mapped_index = start_neuron_id + mapped_y*input_depth.width + mapped_x;
 518 | //											//if(neuron_count==7501)cout<<start_neuron_id<<" "<<mapped_y<<" "<<mapped_x<<endl;
 519 | //											if(mapped_index<=input_depth.last_neuron){
 520 | //												float cdt = 0;
 521 | //												//int fluct = 500 - (rand() % 1000);
 522 | //												//fluct = 0;
 523 | //												//cdt = (mid_conductance+fluct)/1000.0;
 524 | //												myfile_2 << mapped_index + 1 << ' ' << to_string(cdt) << ' ';
 525 | //												connected_in_count ++;
 526 | //											}
 527 | //										}else{
 528 | //											//myfile_2 << (total_neuron+1) << ' ' << mapped_x << "|" << mapped_y << ' ';
 529 | //											myfile_2 << (total_neuron+1) << ' ' << 0 << ' ';
 530 | //										}
 531 | //									}
 532 | //								}else{
 533 | //									for(int second_j=0; second_j<conv_setting.filter_width;second_j++){
 534 | //										int delta_x = second_j - (int) (conv_setting.filter_width/2);
 535 | //										int mapped_x = reverse_mapped_center_x + delta_x;
 536 | //										//myfile_2 << (total_neuron+1) << ' ' << mapped_x << "|" << mapped_y  << ' ';
 537 | //										myfile_2 << (total_neuron+1) << ' ' << 0 << ' ';
 538 | //									}
 539 | //								}
 540 | //							}
 541 | //							if(connected_in_count%2!=0&&connected_in_count!=9) cout<<"Wrong connected in number at depth: "<<current_depth.id<<endl;
 542 | //						}
 543 | //
 544 | //						myfile_2 << ".\n";
 545 | //						neuron_count++;
 546 | //
 547 | //					}
 548 | //				}
 549 | //			}
 550 | 
 551 | 		}
 552 | 	}
 553 | 	//make the final neuron: padding
 554 | 	myfile_2 << neuron_count << " " << "5 ";
 555 | 	for(int j=0; j<param_num;j++){
 556 | 		myfile_2 << input_param_temp[j] << " ";
 557 | 	}
 558 | 	for(int j=0; j<state_num;j++){
 559 | 		myfile_2 << input_state_temp[j] << " ";
 560 | 	}
 561 | 	myfile_2 << "; " << neuron_count << " 1 .\n";
 562 | 
 563 | 
 564 | 	myfile_2.close();
 565 | }
 566 | 
 567 | //void shared_weight_gen(CNN_struct *network_config, float *filter_array[CNN_total_layer_num-1]){
 568 | //	for (int layer_index=1; layer_index<CNN_total_layer_num; layer_index++){
 569 | //		convolution_param current_conv = network_config->layer[layer_index].conv_setting;
 570 | //		//float *weight = new float[current_conv.filter_depth][network_config->layer[layer_index].depth][current_conv.filter_length][current_conv.filter_width];
 571 | //		const float kernel_template[3][3] = {
 572 | //		{1, 1, 1},
 573 | //		{1, -8, 1},
 574 | //		{1, 1, 1}
 575 | //		};
 576 | //
 577 | //		float filter_mat[current_conv.filter_depth][network_config->layer[layer_index].depth][current_conv.filter_length][current_conv.filter_width];
 578 | //		int filter_size=current_conv.filter_depth*network_config->layer[layer_index].depth*current_conv.filter_length*current_conv.filter_width;
 579 | //		cout<<filter_size<<endl;
 580 | //		for (int kernel = 0; kernel < network_config->layer[layer_index].depth; ++kernel) {
 581 | //			for (int channel = 0; channel < current_conv.filter_depth; ++channel) {
 582 | //			  for (int row = 0; row < current_conv.filter_length; ++row) {
 583 | //				for (int column = 0; column < current_conv.filter_width; ++column) {
 584 | //				  filter_mat[kernel][channel][row][column] = kernel_template[row][column];
 585 | //				}
 586 | //			  }
 587 | //			}
 588 | //		}
 589 | //		memcpy(filter_array[layer_index-1], filter_mat, sizeof(filter_mat));
 590 | //
 591 | //		//network_config->layer[layer_index].filter = filter;
 592 | //
 593 | //	}
 594 | //}
 595 | 
 596 | void CNN_get_dimension(CNN_struct *settings, int layer_index, int *width_result, int *length_result){
 597 | 
 598 | 	int convolution_result_index = layer_index - 1;
 599 | 	if (layer_index==0) convolution_result_index = 0;
 600 | 
 601 | //	float *d_input;
 602 | //	float *filter;
 603 | //	float *output;
 604 | //
 605 | //    dim3 dimBlock(1, 1 );
 606 | //    dim3 dimGrid(1, 1);
 607 | //    copy_pointer<<<dimGrid, dimBlock>>>(d_input_2d, d_input, layer_index);
 608 | //    copy_pointer<<<dimGrid, dimBlock>>>(filter_2d, filter, convolution_result_index);
 609 | //    copy_pointer<<<dimGrid, dimBlock>>>(output_2d, output, convolution_result_index);
 610 | //	float **add = &output_2d[0];
 611 | //	printf("Address On GPU: %p\n", add);
 612 | //	read_data<<<1, 1>>>(output_2d[0]);
 613 | 
 614 | 	int filter_in_channel;
 615 | 	int filter_out_channel;
 616 | 	int filter_height;
 617 | 	int filter_width;
 618 | 
 619 | 	int input_batch_size = 1;
 620 | 	int input_channel;
 621 | 	int input_height;
 622 | 	int input_width;
 623 | 
 624 | 	int output_channel;
 625 | 	int output_batch_size = 1;
 626 | 	int output_height;
 627 | 	int output_width;
 628 | 
 629 | 
 630 | 
 631 | //	if(layer_index==0){
 632 | //		filter_in_channel = input_image_channel;
 633 | //		filter_out_channel = settings->layer[layer_index].depth;
 634 | //		filter_height = settings->layer[layer_index].conv_setting.filter_length;
 635 | //		filter_width = settings->layer[layer_index].conv_setting.filter_width;
 636 | //
 637 | //		input_batch_size = 1;
 638 | //		input_channel = input_image_channel;
 639 | //		input_height = input_image_l;
 640 | //		input_width = input_image_w;
 641 | //
 642 | //	}else{
 643 | //		filter_in_channel = settings->layer[layer_index+1].conv_setting.filter_depth;
 644 | //		filter_out_channel = settings->layer[layer_index+1].depth;
 645 | //		filter_height = settings->layer[layer_index+1].conv_setting.filter_length;
 646 | //		filter_width = settings->layer[layer_index+1].conv_setting.filter_width;
 647 | //
 648 | //		input_batch_size = 1;
 649 | //		input_channel = settings->layer[layer_index+1].conv_setting.filter_depth;
 650 | //		input_height = settings->layer[layer_index].depth_list[0].length;
 651 | //		input_width = settings->layer[layer_index].depth_list[0].width;
 652 | //	}
 653 | 
 654 | 
 655 | 
 656 | 	filter_in_channel = settings->layer[layer_index].conv_setting.filter_depth;
 657 | 	filter_out_channel = settings->layer[layer_index].depth;
 658 | 	filter_height = settings->layer[layer_index].conv_setting.filter_length;
 659 | 	filter_width = settings->layer[layer_index].conv_setting.filter_width;
 660 | 
 661 | 	input_batch_size = 1;
 662 | 	input_channel = settings->layer[layer_index].conv_setting.filter_depth;
 663 | 	input_height = settings->layer[layer_index-1].length;
 664 | 	input_width = settings->layer[layer_index-1].width;
 665 | 
 666 | //	cout<<"***********"<<endl<<filter_in_channel<<','<<filter_out_channel<<','<<filter_height<<','<<filter_width<<','<<input_channel<<','<<input_height<<','<<input_width<<','<<endl;
 667 | 
 668 | //	cudnnHandle_t cudnn;
 669 | //	cudnnCreate(&cudnn);
 670 | 
 671 | 
 672 | 
 673 | 	cudnnTensorDescriptor_t input_descriptor;
 674 | 	cudnnCreateTensorDescriptor(&input_descriptor);
 675 | 	cudnnSetTensor4dDescriptor(input_descriptor,
 676 | 										/*format=*/CUDNN_TENSOR_NCHW,
 677 | 										/*dataType=*/CUDNN_DATA_FLOAT,
 678 | 										/*batch_size=*/input_batch_size,
 679 | 										/*channels=*/input_channel,
 680 | 										/*image_height=*/input_height,
 681 | 										/*image_width=*/input_width);
 682 | 
 683 | 
 684 | 
 685 | 
 686 | 	cudnnFilterDescriptor_t kernel_descriptor;
 687 | 	cudnnCreateFilterDescriptor(&kernel_descriptor);
 688 | 	cudnnSetFilter4dDescriptor(kernel_descriptor,
 689 | 										/*dataType=*/CUDNN_DATA_FLOAT,
 690 | 										/*format=*/CUDNN_TENSOR_NCHW,
 691 | 										/*out_channels=*/filter_out_channel,
 692 | 										/*in_channels=*/filter_in_channel,
 693 | 										/*kernel_height=*/filter_height,
 694 | 										/*kernel_width=*/filter_width);
 695 | 
 696 | 
 697 | 
 698 | 	cudnnConvolutionDescriptor_t convolution_descriptor;
 699 | 	cudnnCreateConvolutionDescriptor(&convolution_descriptor);
 700 | 	cudnnSetConvolution2dDescriptor(convolution_descriptor,
 701 | 											 /*pad_height=*/settings->layer[layer_index].conv_setting.pad_height,
 702 | 											 /*pad_width=*/settings->layer[layer_index].conv_setting.pad_width,
 703 | 											 /*vertical_stride=*/settings->layer[layer_index].conv_setting.vertical_stride,
 704 | 											 /*horizontal_stride=*/settings->layer[layer_index].conv_setting.horizontal_stride,
 705 | 											 /*dilation_height=*/settings->layer[layer_index].conv_setting.dilation_height,
 706 | 											 /*dilation_width=*/settings->layer[layer_index].conv_setting.dilation_width,
 707 | 											 /*mode=*/CUDNN_CROSS_CORRELATION,
 708 | 											 /*computeType=*/CUDNN_DATA_FLOAT);
 709 | 
 710 | 
 711 | 
 712 | 
 713 | 	int batch_size{0}, channels{0}, height{0}, width{0};
 714 | 	cudnnGetConvolution2dForwardOutputDim(convolution_descriptor,
 715 | 												   input_descriptor,
 716 | 												   kernel_descriptor,
 717 | 												   &batch_size,
 718 | 												   &channels,
 719 | 												   &height,
 720 | 												   &width);
 721 | 
 722 | 	width_result[0] = width;
 723 | 	length_result[0] = height;
 724 | 
 725 | 	cout<<"height(from cuDNN calculate): "<<height<<endl;
 726 | 	cout<<"width(from cuDNN calculate): "<<width<<endl;
 727 | 
 728 | 	cudnnDestroyTensorDescriptor(input_descriptor);
 729 | 	cudnnDestroyFilterDescriptor(kernel_descriptor);
 730 | 	cudnnDestroyConvolutionDescriptor(convolution_descriptor);
 731 | 
 732 | //	cudnnDestroy(cudnn);
 733 | 
 734 | }
 735 | 
 736 | void CNN_sturct_build(CNN_struct *network_config){
 737 | 	printf("Building CNN struct\n");
 738 | 	int depth_id_count = 0;
 739 | 	//define the network here
 740 | 	for (int layer_index=0; layer_index<CNN_total_layer_num; layer_index++){
 741 | 			convolution_param conv_build;
 742 | 			if(layer_index==0){//this is the input layer
 743 | 				conv_build.dilation_height = 1;
 744 | 				conv_build.dilation_width = 1;
 745 | 				conv_build.filter_depth = 1;
 746 | 				conv_build.filter_length = 0;
 747 | 				conv_build.filter_width = 0;
 748 | 				conv_build.horizontal_stride = 1;
 749 | 				conv_build.vertical_stride = 1;
 750 | 				conv_build.pad_height = 1;
 751 | 				conv_build.pad_width = 1;
 752 | 
 753 | 				network_config->layer[layer_index].depth = input_image_channel;
 754 | 				network_config->layer[layer_index].conv_setting = conv_build;
 755 | 				network_config->layer[layer_index].layer_id = layer_index;
 756 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 757 | 				network_config->layer[layer_index].width = input_image_w; //stride = 1
 758 | 				network_config->layer[layer_index].length = input_image_l;
 759 | 			}else if(layer_index==1){
 760 | 				conv_build.dilation_height = 1;
 761 | 				conv_build.dilation_width = 1;											//If all connect, use:
 762 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;	//network_config->layer[layer_index-1].depth;
 763 | 				conv_build.filter_length = 3;//network_config->layer[layer_index-1].length; //3;
 764 | 				conv_build.filter_width = 3;//network_config->layer[layer_index-1].width;		//3;
 765 | 				conv_build.horizontal_stride = 2;
 766 | 				conv_build.vertical_stride = 2;
 767 | 				conv_build.pad_height = 1;
 768 | 				conv_build.pad_width = 1;
 769 | 
 770 | 				network_config->layer[layer_index].depth = 16;
 771 | 				network_config->layer[layer_index].conv_setting = conv_build;
 772 | 				network_config->layer[layer_index].layer_id = layer_index;
 773 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 774 | 
 775 | 			}else if(layer_index==2){
 776 | 				conv_build.dilation_height = 1;
 777 | 				conv_build.dilation_width = 1;
 778 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;
 779 | 				conv_build.filter_length = 3;//network_config->layer[0].length;;
 780 | 				conv_build.filter_width = 3;//network_config->layer[0].width;;
 781 | 				conv_build.horizontal_stride = 2;
 782 | 				conv_build.vertical_stride = 2;
 783 | 				conv_build.pad_height = 1;
 784 | 				conv_build.pad_width = 1;
 785 | 				network_config->layer[layer_index].depth = 32;
 786 | 				network_config->layer[layer_index].conv_setting = conv_build;
 787 | 
 788 | 				network_config->layer[layer_index].layer_id = layer_index;
 789 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 790 | 
 791 | 			}else if(layer_index==3){
 792 | 				conv_build.dilation_height = 1;
 793 | 				conv_build.dilation_width = 1;
 794 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;
 795 | 				conv_build.filter_length = 7;
 796 | 				conv_build.filter_width = 7;
 797 | 				conv_build.horizontal_stride = 1;
 798 | 				conv_build.vertical_stride = 1;
 799 | 				conv_build.pad_height = 0;
 800 | 				conv_build.pad_width = 0;
 801 | 
 802 | 				network_config->layer[layer_index].depth = 16;
 803 | 				network_config->layer[layer_index].conv_setting = conv_build;
 804 | 				network_config->layer[layer_index].layer_id = layer_index;
 805 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 806 | 
 807 | 			}else if(layer_index==4){
 808 | 				conv_build.dilation_height = 1;
 809 | 				conv_build.dilation_width = 1;
 810 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;
 811 | 				conv_build.filter_length = 3;
 812 | 				conv_build.filter_width = 3;
 813 | 				conv_build.horizontal_stride = 2;
 814 | 				conv_build.vertical_stride = 2;
 815 | 				conv_build.pad_height = 1;
 816 | 				conv_build.pad_width = 1;
 817 | 
 818 | 				network_config->layer[layer_index].depth = 128;
 819 | 				network_config->layer[layer_index].conv_setting = conv_build;
 820 | 				network_config->layer[layer_index].layer_id = layer_index;
 821 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 822 | 
 823 | 			}
 824 | 	}
 825 | 
 826 | 	for (int layer_index=0; layer_index<CNN_total_layer_num; layer_index++){
 827 | 		printf("Layer:%d\n", layer_index);
 828 | 		convolution_param conv_build;
 829 | 		if(layer_index==0){//this is the input layer
 830 | 
 831 | 		}else{
 832 | 			int *width_result = new int[1];
 833 | 			int *length_result = new int[1];
 834 | 			CNN_get_dimension(network_config, layer_index, width_result, length_result);
 835 | 
 836 | 			network_config->layer[layer_index].width = width_result[0]; //stride = 1
 837 | 			network_config->layer[layer_index].length = length_result[0];
 838 | 		}
 839 | 
 840 | 		//cout<<"current depth is:"<<network_config->layer[layer_index].depth<<endl;
 841 | 		network_config->layer[layer_index].neuron_num = network_config->layer[layer_index].width * network_config->layer[layer_index].length * network_config->layer[layer_index].depth;
 842 | 		if(layer_index==0){
 843 | 			network_config->layer[layer_index].first_depth_id = 0;
 844 | 			network_config->layer[layer_index].last_depth_id = network_config->layer[0].depth - 1;
 845 | 		}else{
 846 | 			network_config->layer[layer_index].first_depth_id = network_config->layer[layer_index-1].last_depth_id + 1;
 847 | 			network_config->layer[layer_index].last_depth_id = network_config->layer[layer_index-1].last_depth_id + network_config->layer[layer_index].depth;
 848 | 		}
 849 | 		cout<<"first depth: "<<network_config->layer[layer_index].first_depth_id<<" last depth: "<<network_config->layer[layer_index].last_depth_id<<endl;
 850 | 		for(int i=0; i<network_config->layer[layer_index].depth; i++){
 851 | 			depth_struct current_depth;
 852 | 			current_depth.id = depth_id_count;
 853 | //			printf("#%d", current_depth.id);
 854 | 			current_depth.total_neuron_num = network_config->layer[layer_index].width * network_config->layer[layer_index].length;
 855 | 			current_depth.width = network_config->layer[layer_index].width;
 856 | 			current_depth.length = network_config->layer[layer_index].length;
 857 | 			if(i==0){
 858 | 				if(layer_index==0){
 859 | 					current_depth.first_neuron = 0;
 860 | 				}else{
 861 | 					current_depth.first_neuron = network_config->layer[layer_index-1].depth_list[(network_config->layer[layer_index-1].depth-1)].last_neuron + 1;
 862 | 				}
 863 | 			}else{
 864 | 				current_depth.first_neuron = network_config->layer[layer_index].depth_list[i-1].last_neuron + 1;
 865 | 			}
 866 | 			current_depth.last_neuron = current_depth.first_neuron + current_depth.total_neuron_num - 1;
 867 | 			network_config->layer[layer_index].depth_list[i] = current_depth;
 868 | 			depth_id_count ++;
 869 | 		}
 870 | 	}
 871 | }
 872 | 
 873 | void HSNN_sturct_build(CNN_struct *network_config, int *depth_list){
 874 | 	printf("Building HSNN struct\n");
 875 | 	cout<<depth_list[0]<<" "<<depth_list[1]<<" "<<depth_list[2]<<endl;
 876 | 	int depth_id_count = 0;
 877 | 	//define the network here
 878 | 	for (int layer_index=0; layer_index<CNN_total_layer_num; layer_index++){
 879 | 
 880 | 			convolution_param conv_build;
 881 | 			if(layer_index==0){//this is the input layer
 882 | 				conv_build.dilation_height = 1;
 883 | 				conv_build.dilation_width = 1;
 884 | 				conv_build.filter_depth = 1;
 885 | 				conv_build.filter_length = 0;
 886 | 				conv_build.filter_width = 0;
 887 | 				conv_build.horizontal_stride = 1;
 888 | 				conv_build.vertical_stride = 1;
 889 | 				conv_build.pad_height = 1;
 890 | 				conv_build.pad_width = 1;
 891 | 
 892 | 				network_config->layer[layer_index].depth = input_image_channel;
 893 | 				network_config->layer[layer_index].conv_setting = conv_build;
 894 | 				network_config->layer[layer_index].layer_id = layer_index;
 895 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 896 | 				network_config->layer[layer_index].width = input_image_w; //stride = 1
 897 | 				network_config->layer[layer_index].length = input_image_l;
 898 | 			}else if(layer_index==1){
 899 | 				conv_build.dilation_height = 1;
 900 | 				conv_build.dilation_width = 1;											//If all connect, use:
 901 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;	//network_config->layer[layer_index-1].depth;
 902 | 				conv_build.filter_length = 5;//network_config->layer[layer_index-1].length; //3;
 903 | 				conv_build.filter_width = 5;//network_config->layer[layer_index-1].width;		//3;
 904 | 				conv_build.horizontal_stride = 4;
 905 | 				conv_build.vertical_stride = 4;
 906 | 				conv_build.pad_height = 0;
 907 | 				conv_build.pad_width = 0;
 908 | 
 909 | 				if(depth_list[layer_index-1]>0) network_config->layer[layer_index].depth = depth_list[layer_index-1];
 910 | 				else cout<<"Warning: Error in HSNN depth list"<<endl;
 911 | 				network_config->layer[layer_index].conv_setting = conv_build;
 912 | 				network_config->layer[layer_index].layer_id = layer_index;
 913 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 914 | 
 915 | 			}else if(layer_index==2){
 916 | 				conv_build.dilation_height = 1;
 917 | 				conv_build.dilation_width = 1;
 918 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;
 919 | 				conv_build.filter_length = 3;//network_config->layer[0].length;;
 920 | 				conv_build.filter_width = 3;//network_config->layer[0].width;;
 921 | 				conv_build.horizontal_stride = 2;
 922 | 				conv_build.vertical_stride = 2;
 923 | 				conv_build.pad_height = 0;
 924 | 				conv_build.pad_width = 0;
 925 | 				if(depth_list[layer_index-1]>0) network_config->layer[layer_index].depth = depth_list[layer_index-1];
 926 | 				else cout<<"Warning: Error in HSNN depth list"<<endl;
 927 | 				network_config->layer[layer_index].conv_setting = conv_build;
 928 | 
 929 | 				network_config->layer[layer_index].layer_id = layer_index;
 930 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 931 | 
 932 | 			}else if(layer_index==3){
 933 | 				conv_build.dilation_height = 1;
 934 | 				conv_build.dilation_width = 1;
 935 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;
 936 | 				conv_build.filter_length = 7;
 937 | 				conv_build.filter_width = 7;
 938 | 				conv_build.horizontal_stride = 2;
 939 | 				conv_build.vertical_stride = 2;
 940 | 				conv_build.pad_height = 0;
 941 | 				conv_build.pad_width = 0;
 942 | 
 943 | 				if(depth_list[layer_index-1]>0) network_config->layer[layer_index].depth = depth_list[layer_index-1];
 944 | 				else cout<<"Warning: Error in HSNN depth list"<<endl;
 945 | 				network_config->layer[layer_index].conv_setting = conv_build;
 946 | 				network_config->layer[layer_index].layer_id = layer_index;
 947 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 948 | 
 949 | 			}else if(layer_index==4){
 950 | 				conv_build.dilation_height = 1;
 951 | 				conv_build.dilation_width = 1;
 952 | 				conv_build.filter_depth = network_config->layer[layer_index-1].depth;
 953 | 				conv_build.filter_length = 3;
 954 | 				conv_build.filter_width = 3;
 955 | 				conv_build.horizontal_stride = 2;
 956 | 				conv_build.vertical_stride = 2;
 957 | 				conv_build.pad_height = 1;
 958 | 				conv_build.pad_width = 1;
 959 | 
 960 | 				network_config->layer[layer_index].depth = 128;
 961 | 				network_config->layer[layer_index].conv_setting = conv_build;
 962 | 				network_config->layer[layer_index].layer_id = layer_index;
 963 | 				network_config->layer[layer_index].input_layer = layer_index - 1;
 964 | 
 965 | 			}
 966 | 	}
 967 | 
 968 | 	for (int layer_index=0; layer_index<CNN_total_layer_num; layer_index++){
 969 | 		printf("Layer:%d\n", layer_index);
 970 | 		convolution_param conv_build;
 971 | 		if(layer_index==0){//this is the input layer
 972 | 
 973 | 		}else{
 974 | 			int *width_result = new int[1];
 975 | 			int *length_result = new int[1];
 976 | 			CNN_get_dimension(network_config, layer_index, width_result, length_result);
 977 | 
 978 | 			network_config->layer[layer_index].width = width_result[0]; //stride = 1
 979 | 			network_config->layer[layer_index].length = length_result[0];
 980 | 		}
 981 | 
 982 | 		//cout<<"current depth is:"<<network_config->layer[layer_index].depth<<endl;
 983 | 		network_config->layer[layer_index].neuron_num = network_config->layer[layer_index].width * network_config->layer[layer_index].length * network_config->layer[layer_index].depth;
 984 | 		if(layer_index==0){
 985 | 			network_config->layer[layer_index].first_depth_id = 0;
 986 | 			network_config->layer[layer_index].last_depth_id = network_config->layer[0].depth - 1;
 987 | 		}else{
 988 | 			network_config->layer[layer_index].first_depth_id = network_config->layer[layer_index-1].last_depth_id + 1;
 989 | 			network_config->layer[layer_index].last_depth_id = network_config->layer[layer_index-1].last_depth_id + network_config->layer[layer_index].depth;
 990 | 		}
 991 | 		cout<<"first depth: "<<network_config->layer[layer_index].first_depth_id<<" last depth: "<<network_config->layer[layer_index].last_depth_id<<endl;
 992 | 		for(int i=0; i<network_config->layer[layer_index].depth; i++){
 993 | 			depth_struct current_depth;
 994 | 			current_depth.id = depth_id_count;
 995 | //			printf("#%d", current_depth.id);
 996 | 			current_depth.total_neuron_num = network_config->layer[layer_index].width * network_config->layer[layer_index].length;
 997 | 			current_depth.width = network_config->layer[layer_index].width;
 998 | 			current_depth.length = network_config->layer[layer_index].length;
 999 | 			if(i==0){
1000 | 				if(layer_index==0){
1001 | 					current_depth.first_neuron = 0;
1002 | 				}else{
1003 | 					current_depth.first_neuron = network_config->layer[layer_index-1].depth_list[(network_config->layer[layer_index-1].depth-1)].last_neuron + 1;
1004 | 				}
1005 | 			}else{
1006 | 				current_depth.first_neuron = network_config->layer[layer_index].depth_list[i-1].last_neuron + 1;
1007 | 			}
1008 | 			current_depth.last_neuron = current_depth.first_neuron + current_depth.total_neuron_num - 1;
1009 | 			network_config->layer[layer_index].depth_list[i] = current_depth;
1010 | 			depth_id_count ++;
1011 | 		}
1012 | 	}
1013 | }
1014 | 
1015 | int hsnn_config_generator(int* depth_list, CNN_struct *settings){
1016 | 	//HSNN generator, first build CNN struct
1017 | 
1018 | 	HSNN_sturct_build(settings, depth_list);
1019 | 	//Then, write to file
1020 | 	spike_cnn_gen(settings);
1021 | 
1022 | 	return 0;
1023 | }
1024 | 
1025 | int network_config_generator(int function_select, CNN_struct *settings){
1026 | 
1027 | 	switch(function_select){
1028 | 		case 1:{
1029 | 			//ROI
1030 | 			int network_size = 64*64*3; //this only includes the main neuron(no signal neurons)
1031 | 			int network_type = 0;
1032 | 			ROI_gen(network_type, network_size);
1033 | 		}
1034 | 		break;
1035 | 		case 2:{
1036 | 			//Spiking Simple
1037 | 			int network_size = 1000;
1038 | 			int mid_layer_num = 0;
1039 | 			int mid_layer[mid_layer_num];
1040 | 
1041 | 			mid_layer[0] = 500;
1042 | 			//mid_layer[1] = 600;
1043 | 
1044 | 			int input_size = 32*32*3;
1045 | 
1046 | 			spike_learning_gen(2, network_size, (int *)mid_layer, mid_layer_num, input_size);
1047 | 		}
1048 | 		break;
1049 | 		case 3:{
1050 | 			//Spiking CNN generator, first build CNN struct
1051 | 			CNN_sturct_build(settings);
1052 | 			//Then, write to file
1053 | 			spike_cnn_gen(settings);
1054 | 			/*
1055 | 			float *weight_array[3];
1056 | 			for (int i=0;i<3;i++){
1057 | 				int filter_size = i*2;
1058 | 				//cout<<filter_size<<endl;
1059 | 				weight_array[i] = new float[filter_size];
1060 | 				for (int j=0; j<filter_size; j++){
1061 | 					weight_array[i][j] = i+j;
1062 | 
1063 | 				}
1064 | 				//cout<<sizeof(weight_array)<<endl;
1065 | 			}
1066 | 			for (int i=0;i<3;i++){
1067 | 				for (int j=0; j<8; j++){
1068 | 					cout<<weight_array[i][j]<<"|";
1069 | 
1070 | 				}
1071 | 				cout<<endl;
1072 | 			}
1073 | 			*/
1074 | 		}
1075 | 		break;
1076 | 	}
1077 | 	return 0;
1078 | }
1079 | 


--------------------------------------------------------------------------------
/read_neuron_list.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | 
  7 | int reset_weight(Neuron *NeuronList, float start_depth, float end_depth, int reset_method, int network_size){
  8 | 	cout<<"resetting weight, start depth: "<<start_depth<<" end depth: "<<end_depth<<endl;
  9 | 	if(reset_method==1){//within this depth, reset all weight to 0.5
 10 | 
 11 | 
 12 | 
 13 | 		for (int index=0; index<network_size; index ++){
 14 | 			if((NeuronList[index].param[7]<start_depth||NeuronList[index].param[7]>end_depth)){
 15 | 				continue;
 16 | 			}
 17 | 			for (int connection_index=0; connection_index<MAX_CONNECTION; connection_index++){
 18 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
 19 | 
 20 | 					NeuronList[index].connected_weight[connection_index] = 0.5;
 21 | 				}
 22 | 			}
 23 | 
 24 | 
 25 | 		}
 26 | 	}
 27 | 	return 0;
 28 | 
 29 | }
 30 | 
 31 | 
 32 | int normalize_weight(Neuron *NeuronList, float start_depth, float end_depth, int norm_method, int network_size){
 33 | 	cout<<"normalizing weight, start depth: "<<start_depth<<" end depth: "<<end_depth<<endl;
 34 | 	if(norm_method==1){//within this depth, normalize all weight to 0 mean unit variance
 35 | 
 36 | 
 37 | 
 38 | 		for (int index=0; index<network_size; index ++){
 39 | 
 40 | 			float mean = 0;
 41 | 			float sum = 0;
 42 | 			float std = 0;
 43 | 			int valid_connection = 0;
 44 | 			if((NeuronList[index].param[7]<start_depth||NeuronList[index].param[7]>end_depth)){
 45 | 				continue;
 46 | 			}
 47 | 			for (int connection_index=0; connection_index<MAX_CONNECTION; connection_index++){
 48 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
 49 | 					sum += NeuronList[index].connected_weight[connection_index];
 50 | 					valid_connection ++;
 51 | 				}
 52 | 			}
 53 | 			mean = sum/valid_connection;
 54 | 
 55 | 			for (int connection_index=0; connection_index<MAX_CONNECTION; connection_index++){
 56 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
 57 | 					std += (NeuronList[index].connected_weight[connection_index]-mean)*(NeuronList[index].connected_weight[connection_index]-mean);
 58 | 				}
 59 | 			}
 60 | 			std /= valid_connection;
 61 | 			std = sqrt(std);
 62 | 			cout<<"Index "<<index<<" connection size: "<<valid_connection<<" mean: "<<mean<<" std: "<<std<<endl;
 63 | 			for (int connection_index=0; connection_index<MAX_CONNECTION; connection_index++){
 64 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
 65 | 
 66 | 					NeuronList[index].connected_weight[connection_index] = (NeuronList[index].connected_weight[connection_index]-mean)/std;
 67 | 				}
 68 | 			}
 69 | 
 70 | 
 71 | 		}
 72 | 
 73 | 		for (int index=0; index<network_size; index ++){
 74 | 
 75 | 			float mean = 0;
 76 | 			float sum = 0;
 77 | 			float std = 0;
 78 | 			int valid_connection = 0;
 79 | 			if((NeuronList[index].param[7]<start_depth||NeuronList[index].param[7]>end_depth)){
 80 | 				continue;
 81 | 			}
 82 | 			for (int connection_index=0; connection_index<MAX_CONNECTION; connection_index++){
 83 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
 84 | 					sum += NeuronList[index].connected_weight[connection_index];
 85 | 					valid_connection ++;
 86 | 				}
 87 | 			}
 88 | 			mean = sum/valid_connection;
 89 | 
 90 | 			for (int connection_index=0; connection_index<MAX_CONNECTION; connection_index++){
 91 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
 92 | 					std += (NeuronList[index].connected_weight[connection_index]-mean)*(NeuronList[index].connected_weight[connection_index]-mean);
 93 | 				}
 94 | 			}
 95 | 			std /= valid_connection;
 96 | 			std = sqrt(std);
 97 | 			cout<<"Index "<<index<<" connection size: "<<valid_connection<<" mean: "<<mean<<" std: "<<std<<endl;
 98 | 
 99 | 
100 | 		}
101 | 
102 | 	}
103 | 	return 0;
104 | 
105 | }
106 | int read_neuron_list(Neuron *NeuronList, int neuron_type, string file_name){
107 | 	int i = 0;//number of neurons
108 | 	int j = 0;//number of connected_in neurons
109 | 	int k = 0;
110 | 	std::string item;
111 | 	ifstream file_read;
112 | 	file_read.open(file_name.c_str());
113 | 	//printf("==0==\n");
114 | 	int flag_0 = 0;
115 | 	int flag_1 = 0;
116 | 	int flag_2 = 0;
117 | 	int line_count = 0;
118 | 	int param_num = 8;
119 | 	int state_num = 8;
120 | 	while(file_read >> item){
121 | 		//printf("ring,");
122 | 		//line_count ++;
123 | 		//j = 0;
124 | 		if(strcmp(item.c_str(), " ") == 0){
125 | 
126 | 		}
127 | 		else if(strcmp(item.c_str(), ";") == 0){
128 | 			flag_1 = 1;
129 | 			flag_0 = 0;
130 | 			j = 0;
131 | 		}
132 | 		else if(strcmp(item.c_str(), "|") == 0){
133 | 			flag_1 = 2;
134 | 			flag_0 = 0;
135 | 			k = 0;
136 | 		}
137 | 		else if(strcmp(item.c_str(), ".") == 0){
138 | 			flag_1 = 0;
139 | 			NeuronList[i].connected_in[j+1] = 0;
140 | 			line_count++;
141 | //			cout<<"_index_"<<i<<"-";
142 | //			cout<<NeuronList[i].index<<"-";
143 | //			printf("%d|", NeuronList[i].type);
144 | 			i++;
145 | 		}
146 | 		else if (flag_1 == 0){
147 | 			//printf("rIZH.");
148 | 			switch (flag_0){
149 | 				case 0: NeuronList[i].index = atoi(item.c_str());
150 | 						//printf("-%d", NeuronList[i].index);
151 | 
152 | 				break;
153 | 				case 1: NeuronList[i].type = atoi(item.c_str());
154 | 
155 | 				break;
156 | //				case 2: NeuronList[i].param[0] = strtof(item.c_str(), NULL);
157 | //				break;
158 | //				case 3: NeuronList[i].state[0] = strtof(item.c_str(), NULL);
159 | //				break;
160 | 			}
161 | 
162 | 			if(flag_0<=param_num+1&&flag_0>1){
163 | 				NeuronList[i].param[flag_0-2] = strtof(item.c_str(), NULL);
164 | 			}else if(flag_0>param_num+1){
165 | 				NeuronList[i].state[flag_0-(param_num+2)] = strtof(item.c_str(), NULL);
166 | 			}
167 | 			flag_0 ++;
168 | 		}
169 | 		else if (flag_1 == 1){
170 | 			//printf("==3==\n");
171 | 			if (flag_2 == 0){
172 | 				NeuronList[i].connected_in[j] = atoi(item.c_str());
173 | 				//string stream = to_string(NeuronList[i].connected_in[j]);
174 | 				//stream = "Neuron No. is" + to_string(i) + ";Connected in index is: " + to_string(j) + ";Connected in is: " + to_string(NeuronList[i].connected_in[j]);
175 | 				//cout<<"= "<<stream<<" =";
176 | 				flag_2 = 1;
177 | 			}else{
178 | 				NeuronList[i].connected_weight[j] = strtof(item.c_str(), NULL);
179 | //				printf("-%f",NeuronList[i].connected_weight[j]);
180 | 				flag_2 = 0;
181 | 				j++;
182 | 			}
183 | 
184 | 			//printf("\n");
185 | 			/*
186 | 			string stream = to_string(j);
187 | 			cout<<stream;
188 | 			cout<<"connected:"<<item.c_str()<<endl;
189 | 			*/
190 | 
191 | 		}
192 | 		else if (flag_1 == 2){
193 | 			NeuronList[i].local_inhibition[k] = atoi(item.c_str());
194 | 			k++;
195 | 		}
196 | 
197 | 
198 | 
199 | 	}
200 | 
201 | 	cout<<"total_line_is: "<<to_string(line_count)<<endl;
202 | 
203 | 	file_read.close();
204 | 
205 | 	return 0;
206 | 
207 | }
208 | 
209 | int read_neuron_list_special(Neuron *NeuronList, int duplicate_layer, CNN_struct *settings, string file_name){
210 | 	//need to increase counting on param[7](depth number)
211 | 	cout<<"Special network loading function from file: "<<file_name<<endl;
212 | 
213 | 	int repeat_start_neuron_index = 0;
214 | 	int repeat_layer_total_neuron =  settings->layer[duplicate_layer].neuron_num/2;
215 | 	for(int i=0; i<duplicate_layer; i++){
216 | 		repeat_start_neuron_index += settings->layer[i].neuron_num;
217 | 	}
218 | 	int repeat_end_neuron_index = repeat_start_neuron_index + settings->layer[duplicate_layer].neuron_num/2;
219 | 	int repeat_layer_depth_num = settings->layer[duplicate_layer].depth/2;
220 | 
221 | 	repeat_start_neuron_index = repeat_start_neuron_index - settings->layer[0].neuron_num;
222 | 	repeat_end_neuron_index = repeat_end_neuron_index - settings->layer[0].neuron_num;
223 | 
224 | 	cout<<"Parameters: repeat_start_neuron_index-"<<repeat_start_neuron_index<<" repeat_layer_total_neuron-"<<repeat_layer_total_neuron<< \
225 | 			" repeat_end_neuron_index-"<<repeat_end_neuron_index<<" repeat_layer_depth_num-"<<repeat_layer_depth_num<<endl;
226 | 
227 | 	int i = 0;//number of neurons
228 | 	int j = 0;//number of connected_in neurons
229 | 	int k = 0;
230 | 	std::string item;
231 | 	ifstream file_read;
232 | 	file_read.open(file_name.c_str());
233 | 	//printf("==0==\n");
234 | 	int flag_0 = 0;
235 | 	int flag_1 = 0;
236 | 	int flag_2 = 0;
237 | 	int line_count = 0;
238 | 	int param_num = 8;
239 | 	int state_num = 8;
240 | 
241 | 	while(file_read >> item){
242 | 		//printf("ring,");
243 | 		//line_count ++;
244 | 		//j = 0;
245 | 
246 | 		if(strcmp(item.c_str(), " ") == 0){
247 | 
248 | 		}
249 | 		else if(strcmp(item.c_str(), ";") == 0){
250 | 			flag_1 = 1;
251 | 			flag_0 = 0;
252 | 			j = 0;
253 | 		}
254 | 		else if(strcmp(item.c_str(), "|") == 0){
255 | 			flag_1 = 2;
256 | 			flag_0 = 0;
257 | 			k = 0;
258 | 		}
259 | 		else if(strcmp(item.c_str(), ".") == 0){
260 | 			flag_1 = 0;
261 | 			NeuronList[i].connected_in[j+1] = 0;
262 | 			if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index) NeuronList[i+repeat_layer_total_neuron].connected_in[j+1] = 0;
263 | 			line_count++;
264 | //			cout<<"_index_"<<i<<"-";
265 | //			cout<<NeuronList[i].index<<"-";
266 | //			printf("%d|", NeuronList[i].type);
267 | 			i++;
268 | 			if(i==repeat_end_neuron_index) i=i+repeat_layer_total_neuron;
269 | 		}
270 | 		else if (flag_1 == 0){
271 | 			//printf("rIZH.");
272 | 			switch (flag_0){
273 | 				case 0: {
274 | 					NeuronList[i].index = atoi(item.c_str());
275 | 
276 | 					if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index){
277 | 						NeuronList[i+repeat_layer_total_neuron].index = NeuronList[i].index + repeat_layer_total_neuron;
278 | 					}else if(i>=repeat_end_neuron_index){
279 | 						NeuronList[i].index = NeuronList[i].index + repeat_layer_total_neuron;
280 | 					}
281 | 
282 | 				}
283 | 				break;
284 | 				case 1: {
285 | 					NeuronList[i].type = atoi(item.c_str());
286 | 					if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index){
287 | 						NeuronList[i+repeat_layer_total_neuron].type = NeuronList[i].type;
288 | 					}
289 | 				}
290 | 				break;
291 | //				case 2: NeuronList[i].param[0] = strtof(item.c_str(), NULL);
292 | //				break;
293 | //				case 3: NeuronList[i].state[0] = strtof(item.c_str(), NULL);
294 | //				break;
295 | 			}
296 | 
297 | 			if(flag_0<=param_num+1&&flag_0>1){
298 | 				NeuronList[i].param[flag_0-2] = strtof(item.c_str(), NULL);
299 | 				if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index){
300 | 					NeuronList[i+repeat_layer_total_neuron].param[flag_0-2] = NeuronList[i].param[flag_0-2];
301 | 					if((flag_0-2)==7) NeuronList[i+repeat_layer_total_neuron].param[flag_0-2] += repeat_layer_depth_num;
302 | 				}else if(i>=repeat_end_neuron_index){
303 | 					if((flag_0-2)==7) NeuronList[i].param[flag_0-2] += repeat_layer_depth_num;
304 | 				}
305 | 
306 | 			}else if(flag_0>param_num+1){
307 | 				NeuronList[i].state[flag_0-(param_num+2)] = strtof(item.c_str(), NULL);
308 | 				if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index) NeuronList[i+repeat_layer_total_neuron].state[flag_0-(param_num+2)] = NeuronList[i].state[flag_0-(param_num+2)];
309 | 			}
310 | 			flag_0 ++;
311 | 		}
312 | 		else if (flag_1 == 1){
313 | 			//printf("==3==\n");
314 | 			if (flag_2 == 0){
315 | 				NeuronList[i].connected_in[j] = atoi(item.c_str());
316 | 				if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index) NeuronList[i+repeat_layer_total_neuron].connected_in[j] = NeuronList[i].connected_in[j];
317 | 				//string stream = to_string(NeuronList[i].connected_in[j]);
318 | 				//stream = "Neuron No. is" + to_string(i) + ";Connected in index is: " + to_string(j) + ";Connected in is: " + to_string(NeuronList[i].connected_in[j]);
319 | 				//cout<<"= "<<stream<<" =";
320 | 				flag_2 = 1;
321 | 			}else{
322 | 				NeuronList[i].connected_weight[j] = strtof(item.c_str(), NULL);
323 | 				if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index) NeuronList[i+repeat_layer_total_neuron].connected_weight[j] = NeuronList[i].connected_weight[j];
324 | //				printf("-%f",NeuronList[i].connected_weight[j]);
325 | 				flag_2 = 0;
326 | 				j++;
327 | 			}
328 | 
329 | 			//printf("\n");
330 | 			/*
331 | 			string stream = to_string(j);
332 | 			cout<<stream;
333 | 			cout<<"connected:"<<item.c_str()<<endl;
334 | 			*/
335 | 
336 | 		}
337 | 		else if (flag_1 == 2){
338 | 			NeuronList[i].local_inhibition[k] = atoi(item.c_str());
339 | 			if(i<repeat_end_neuron_index&&i>=repeat_start_neuron_index) NeuronList[i+repeat_layer_total_neuron].local_inhibition[k] = NeuronList[i].local_inhibition[k];
340 | 			k++;
341 | 		}
342 | 
343 | 
344 | 
345 | 	}
346 | 
347 | 	cout<<"total_line_is: "<<to_string(line_count)<<endl;
348 | 
349 | 	file_read.close();
350 | 
351 | 	return 0;
352 | 	cout<<"Network Reading Done"<<endl;
353 | 
354 | }
355 | 


--------------------------------------------------------------------------------
/spiking_learning_drive.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | #include <stdio.h>
  7 | #include<time.h>
  8 | #include<device_functions.h>
  9 | #include<cuda.h>
 10 | #include<math.h>
 11 | 
 12 | using namespace std;
 13 | #define HOMEOSTASIS_CONSTANT 150
 14 | 
 15 | //currently using LIF for spike learning
 16 | 
 17 | 
 18 | 
 19 | 
 20 | __global__ void update_threshold (Neuron *NeuronList, int network_size, float *log_total_spike, float target_frequency, int time){
 21 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 22 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 23 | 	if(index>=network_size){
 24 | 		return;
 25 | 	}
 26 | 	if(NeuronList[index].type==2){
 27 | 		float frequency_mean = log_total_spike[index]/time;
 28 | 		float delta_thres = HOMEOSTASIS_CONSTANT*(frequency_mean-target_frequency);
 29 | 		NeuronList[index].param[1] = NeuronList[index].param[1] + delta_thres;
 30 | 		//printf("NeuronNo%d:%f] ", index, delta_thres);
 31 | 	}
 32 | 	else{
 33 | 		return;
 34 | 	}
 35 | 
 36 | }
 37 | __global__ void lateral_inhibition (Neuron *NeuronList, int network_size, int inhibit_time){
 38 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 39 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 40 | 	if(index>=network_size){
 41 | 		return;
 42 | 	}
 43 | 	if(NeuronList[index].type==4){
 44 | 		return;
 45 | 	}
 46 | 	if(NeuronList[index].state[2]>0.1){
 47 | 		return;
 48 | 	}
 49 | 
 50 | 	//NeuronList[index].state[7] = inhibit_time;	//
 51 | 	NeuronList[index].state[0] = NeuronList[index].state[0] - 7;//NeuronList[index].param[2];				//change mem potential to reset_value
 52 | 	//float *result = std::find(std::begin(NeuronList[index].state), std::end(NeuronList[index].state), 123);
 53 | 	printf("#");
 54 | }
 55 | 
 56 | __global__ void lateral_inhibition_2 (Neuron *NeuronList, int network_size, int inhibit_time, float start_depth, float end_depth){
 57 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 58 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 59 | 
 60 | 	//printf("%d %d| ", index, network_size);
 61 | 	if(index>=network_size){
 62 | 		return;
 63 | 	}
 64 | 	if(NeuronList[index].type==4){
 65 | 		return;
 66 | 	}
 67 | 	if(NeuronList[index].state[2]>0.1){
 68 | 		//printf("******************%d*****************\n", index);
 69 | 		return;
 70 | 	}
 71 | 	if(NeuronList[index].param[7]<start_depth||NeuronList[index].param[7]>end_depth){
 72 | 		//printf("StartDepth:%f_End:%f__current:%f||", start_depth, end_depth, NeuronList[index].param[7]);
 73 | 		return;
 74 | 	}
 75 | 
 76 | 	//printf("%d | ", index);
 77 | 	NeuronList[index].state[7] = inhibit_time;	//
 78 | 	NeuronList[index].state[0] = NeuronList[index].state[0] - 7;//NeuronList[index].param[2];				//change mem potential to reset_value
 79 | 	//float *result = std::find(std::begin(NeuronList[index].state), std::end(NeuronList[index].state), 123);
 80 | 
 81 | }
 82 | 
 83 | __global__ void read_learning_output (Neuron *NeuronList, int network_size){
 84 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 85 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 86 | 	if(index>=network_size){
 87 | 		return;
 88 | 	}
 89 | 	//printf("|");
 90 | 	int i = 0;
 91 | 		while(NeuronList[index].connected_in[i] > 0.1){
 92 | 			if(NeuronList[index].connected_weight[i]>1.0){
 93 | 				printf("connection%d---->%d_has_changed_weight:%f\n",i,index,NeuronList[index].connected_weight[i]);
 94 | 			}
 95 | 			i++;
 96 | 		}
 97 | 
 98 | }
 99 | 
100 | __global__ void lateral_inhibition_CNN (Neuron *NeuronList, int network_size, int inhibit_time, float *log_spike){
101 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
102 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
103 | 	if(index>=network_size){
104 | 		//printf("network size: %d, Return on index: %d", network_size, index);
105 | 		return;
106 | 	}
107 | 	if(NeuronList[index].type==4||NeuronList[index].type==5){
108 | 		//printf("1.network size: %d, Return on index: %d", network_size, index);
109 | 		return;
110 | 	}
111 | 	if(NeuronList[index].state[2]>0.1){
112 | 		//printf("2.network size: %d, Return on index: %d", network_size, index);
113 | 		return;
114 | 	}
115 | 	int fire_neuron_depth = (int)NeuronList[index].param[7];
116 | 	if(log_spike[fire_neuron_depth]<0.5){
117 | 		//printf("Neuron_index: %d, Return on: depth of %d has log value: %f.\n", index, fire_neuron_depth, log_spike[fire_neuron_depth]);
118 | 		//return;
119 | 	}
120 | 	//NeuronList[index].state[7] = inhibit_time;	//
121 | 	NeuronList[index].state[0] = NeuronList[index].state[0] - 3;//NeuronList[index].param[2];				//change mem potential to reset_value
122 | 	//float *result = std::find(std::begin(NeuronList[index].state), std::end(NeuronList[index].state), 123);
123 | 	//printf("Depth of %f has log value: %f.", fire_neuron_depth, log_spike[fire_neuron_depth]);
124 | 
125 | }
126 | 
127 | void spiking_learning_drive(Neuron *NeuronList, int network_size, int inhibit_time, float *log_total_spike, float target_frequency, int time, float *log_spike, int current_layer, int function_select){
128 | 
129 | 	int SIZE_PER_SIDE = sqrt(network_size)+1;
130 | 	dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
131 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
132 | 
133 | 	int output_neuron_size = OUTPUT_LAYER_NEURON_NUM - 1;
134 | 
135 | 	if(function_select==0){//run lateral_inhibition
136 | 		lateral_inhibition<<<dimGrid, dimBlock>>>(NeuronList, output_neuron_size, inhibit_time);
137 | 	}
138 | 	else if(function_select==1){//run update threshold
139 | 		//printf("\nTIME is: %d\n", time);
140 | 		update_threshold<<<dimGrid, dimBlock>>>(NeuronList, network_size, log_total_spike, target_frequency, time);
141 | 	}
142 | 	else if(function_select==2){
143 | 		read_learning_output<<<dimGrid, dimBlock>>>(NeuronList, network_size);
144 | 	}
145 | 
146 | 	else if(function_select==3){
147 | 		lateral_inhibition_CNN<<<dimGrid, dimBlock>>>(NeuronList, network_size, inhibit_time, log_spike);
148 | 	}
149 | 
150 | 
151 | }
152 | 
153 | void spiking_learning_drive(Neuron *NeuronList, int network_size, int inhibit_time, float *log_total_spike, float target_frequency, int time, float *log_spike, int current_layer, CNN_struct *CNN_setttings, int function_select){
154 | 
155 | 	int SIZE_PER_SIDE = sqrt(network_size)+1;
156 | 	dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
157 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
158 | 
159 | 	int output_neuron_size = OUTPUT_LAYER_NEURON_NUM - 1;
160 | 
161 | 	if(function_select==0){//run lateral_inhibition
162 | 		lateral_inhibition<<<dimGrid, dimBlock>>>(NeuronList, output_neuron_size, inhibit_time);
163 | 	}
164 | 	else if(function_select==1){//run update threshold
165 | 		//printf("\nTIME is: %d\n", time);
166 | 		update_threshold<<<dimGrid, dimBlock>>>(NeuronList, network_size, log_total_spike, target_frequency, time);
167 | 	}
168 | 	else if(function_select==2){
169 | 		read_learning_output<<<dimGrid, dimBlock>>>(NeuronList, network_size);
170 | 	}
171 | 
172 | 	else if(function_select==3){
173 | 		lateral_inhibition_CNN<<<dimGrid, dimBlock>>>(NeuronList, network_size, inhibit_time, log_spike);
174 | 	}
175 | 	else if(function_select==4){
176 | 		//printf("\nNEW INHIB RUN\n\n");
177 |     	float start_depth = CNN_setttings->layer[current_layer].first_depth_id - 0.1;
178 |     	float end_depth = CNN_setttings->layer[current_layer].last_depth_id + 0.1;
179 |     	//printf("Start_depth: %f, end_depth: %f||", start_depth, end_depth);
180 | 		lateral_inhibition_2<<<dimGrid, dimBlock>>>(NeuronList, network_size, inhibit_time, start_depth, end_depth);
181 | 	}
182 | 
183 | 
184 | }
185 | 
186 | 


--------------------------------------------------------------------------------
/spiking_learning_main.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | #include <stdio.h>
  7 | #include<time.h>
  8 | #include<device_functions.h>
  9 | #include<cuda.h>
 10 | #include<math.h>
 11 | 
 12 | using namespace std;
 13 | #define tau 10
 14 | #define PSP 1
 15 | #define TimeStep 0.04
 16 | #define HH_threshold -15
 17 | #define test_current 0
 18 | 
 19 | /*
 20 | test IZH: 1 0 0.15 0.3 -72.14 2.44 30 -60 -14 0 ; 1 0 .
 21 | test LIF: 1 2 -70 -55 -75 20 10 10 -70 0 0 ; 1 0 .
 22 | test HH: 1 3 0.01 55.17 -72.14 -49.42 1.2 0.36 0.003 -60 0.0529 0 0.3177 0.5961 ; 1 0 .
 23 | test signal input: 1 4 1 1 0 1 ; 0 0 .
 24 | */
 25 | 
 26 | 
 27 | 
 28 | __global__ void run_spiking_learning (Neuron *NeuronList, Neuron *old_device_neurons, float *random_number, int network_size, float *log_v, float *log_spike, float *log_total_spike, int *spike_flag, int signal_width, int time_stamp){
 29 | 	//printf("its in gpu(main)\n");
 30 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 31 |     int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
 32 |     //printf("=%d-%d=",index,network_size);
 33 |     //printf("type is %d\n",NeuronList[1].type);
 34 |     //printf("%d\n",index);
 35 |     float current_multiplier = 1;
 36 |     float current_divider = 20.0;
 37 | 
 38 | 	if(index>network_size){
 39 | 		//printf("its wrong!\n");
 40 | 		return;
 41 | 	}
 42 | 	if(index<OUTPUT_LAYER_NEURON_NUM){
 43 | 		//current_multiplier = 1;
 44 | 	}
 45 | 
 46 | 	if(NeuronList[index].state[7] > 0.1){
 47 | 		NeuronList[index].state[7] = NeuronList[index].state[7] - 1;
 48 | 		current_multiplier = 0;
 49 | 		//printf("%d is inhibited, mem potential:%d\n",index, NeuronList[index].state[0]);
 50 | 		//return;
 51 | 	}
 52 | 
 53 | 	//printf("%d,",NeuronList[index].type);
 54 | 
 55 | 	if(NeuronList[index].type == 0){//run IZH
 56 | 		//printf("itInIZH");
 57 | 		if(NeuronList[index].index > 0){
 58 | 			//printf("index is %d, a is %f, b is %f, v is %f, u is %f \n", NeuronList[index].index, NeuronList[index].param[0], NeuronList[index].param[1], NeuronList[index].state[0], NeuronList[index].state[1]);
 59 | 			//printf("U of %d is %f \n", index+1, NeuronList[index].state[0]);
 60 | 			//printf("%f, ", NeuronList[index].state[0]);
 61 | 			//printf("_%f_", log_v[time_stamp]);
 62 | 			log_v[time_stamp] = NeuronList[index].state[0];
 63 | 			log_spike[time_stamp] = NeuronList[index].state[2];
 64 | 			if(NeuronList[index].state[2] > 0.1){
 65 | 				NeuronList[index].state[2] = 0;
 66 | 				log_total_spike[index] = log_total_spike[index] + 1;
 67 | 				spike_flag[0] = spike_flag[0] + 1;
 68 | 			}
 69 | 			//printf("|%f|", log_v[time_stamp]);
 70 | 			float Isynapses = test_current;
 71 | 
 72 | 					int i = 0;
 73 | 					while(NeuronList[index].connected_in[i] > 0.1){
 74 | 						int connected_in = NeuronList[index].connected_in[i] - 1;
 75 | 						//printf("I is: %f;\n", Isynapses);
 76 | 						//printf("state of connected in (to index: %d): %d is %d\n", index, connected_in, NeuronList[connected_in].state[2]);
 77 | 						if(old_device_neurons[connected_in].state[2] > 0.1){//fired
 78 | 							Isynapses = Isynapses + NeuronList[index].connected_weight[i];
 79 | 							//printf("Connected_fired_I is: %f; weight is %f\n", Isynapses, NeuronList[index].connected_weight[i]);
 80 | 							if(old_device_neurons[connected_in].type == 4){
 81 | 								Isynapses = Isynapses + old_device_neurons[connected_in].state[0];
 82 | 								NeuronList[index].synapse_timer[connected_in-SPIKING_NEURON_NUM] = 1;	//update the synapse timer, this is the only place
 83 | 								old_device_neurons[index].synapse_timer[connected_in-SPIKING_NEURON_NUM] = 1;
 84 | 								//printf("I is: %f; signal is: %f\n", Isynapses, old_device_neurons[connected_in].state[0]);
 85 | 							}
 86 | 						}
 87 | 						i++;
 88 | 					}
 89 | 
 90 | 					Isynapses = Isynapses/current_divider;
 91 | 					Isynapses = Isynapses * current_multiplier;
 92 | 
 93 | 					float old_v = old_device_neurons[index].state[0];
 94 | 					float old_u = old_device_neurons[index].state[1];
 95 | 
 96 | 
 97 | 					if(old_v<30){
 98 | 						float dv = (0.04*old_v+5)*old_v + 140 - old_u;
 99 | 						NeuronList[index].state[0] = old_v + (dv + Isynapses)*TimeStep;
100 | 						float du = NeuronList[index].param[0]*(NeuronList[index].param[1]*old_v-old_u);
101 | 						NeuronList[index].state[1] = old_u + TimeStep*du;
102 | 					}else{
103 | 						old_device_neurons[index].state[0] = 30;
104 | 						NeuronList[index].state[0] = NeuronList[index].param[2];
105 | 						NeuronList[index].state[1] = old_u + NeuronList[index].param[3];
106 | 						NeuronList[index].state[2] = 1;
107 | 					}
108 | 
109 | 		}
110 | 	}
111 | 	else if(NeuronList[index].type == 1){//run Stochastic
112 | 		if(NeuronList[index].index >= 0){
113 | 
114 | 			log_v[time_stamp] = NeuronList[index].state[0];
115 | 			log_spike[time_stamp] = NeuronList[index].state[2];
116 | 			if(NeuronList[index].state[2] != 0){
117 | 				if (NeuronList[index].state[2] > tau+1){
118 | 					NeuronList[index].state[2] = 0;
119 | 					//printf("timing of %d is %d \n", index+1, NeuronList[index].state);
120 | 				}
121 | 				else{
122 | 					NeuronList[index].state[2] = NeuronList[index].state[2] + 1;
123 | 				}
124 | 			}
125 | 			else{
126 | 				int i = 0;
127 | 				float potential_value = NeuronList[index].param[0];
128 | 				while(NeuronList[index].connected_in[i] > 0.1){
129 | 					int connected_in = NeuronList[index].connected_in[i] -1 ;
130 | 					//printf("state of connected in (to index: %d): %d is %d\n", index, connected_in, NeuronList[connected_in].state);
131 | 					if(NeuronList[connected_in].state[2] > 0){
132 | 						//int weight_index = index + connected_in*SIZE;
133 | 						potential_value = potential_value + PSP*NeuronList[index].connected_weight[i];
134 | 						//printf("weight in of %d is %f\n", NeuronList[index].connected_in[i], weight_list[weight_index]);
135 | 					}
136 | 					i++;
137 | 				}
138 | 				//printf("MP of %d is %f\n", index, potential_value);
139 | 				float probability_of_firing = expf(potential_value)/tau;
140 | 				//printf("expf is: %f||",expf(potential_value));
141 | 				float random_compare = random_number[index];
142 | 				if(probability_of_firing>random_compare){
143 | 					//printf("Neuron : %d||potential_value is: %f and random# is %f||probability is %f\n",index, potential_value,random_compare,probability_of_firing);
144 | 					NeuronList[index].state[2] = 1;
145 | 				}
146 | 
147 | 			}
148 | 		}
149 | 	}
150 | 	else if(NeuronList[index].type == 2){//run LIF
151 | 		if(NeuronList[index].index >= 0){
152 | 			if(index==30){//this is the membrane potential logger
153 | 				log_spike[time_stamp] = NeuronList[index].state[2];
154 | 				log_v[time_stamp] = NeuronList[index].state[0];
155 | 				//printf("time_is%d, IMultiplier_s_%f__potential_is%f\n",time_stamp, current_multiplier,old_device_neurons[index].state[0]);
156 | 			}
157 | 			/*
158 | 			if (old_device_neurons[index].state[2] > 0.1){
159 | 				old_device_neurons[index].state[2] = old_device_neurons[index].state[2] - 1;
160 | 				NeuronList[index].state[2] = NeuronList[index].state[2] - 1;
161 | 				if(old_device_neurons[index].state[2]<0.1){
162 | 					old_device_neurons[index].state[0] = old_device_neurons[index].param[2];
163 | 					log_total_spike[index] = log_total_spike[index] + 1;
164 | 					spike_flag[0] = spike_flag[0] + 1;
165 | 				}
166 | 			}
167 | 			*/
168 | 			if (old_device_neurons[index].state[2] > 0.1){
169 | 				old_device_neurons[index].state[0] = old_device_neurons[index].param[2];
170 | 				log_total_spike[index] = log_total_spike[index] + 1;
171 | 				spike_flag[0] = spike_flag[0] + 1;
172 | 
173 | 			}
174 | 			float Isynapses = test_current;
175 | 
176 | 			int i = 0;
177 | 			while(NeuronList[index].connected_in[i] > 0.1){
178 | 				int connected_in = NeuronList[index].connected_in[i] - 1;
179 | 				//printf("state of connected in (to index: %d): %d is %d\n", index, connected_in, NeuronList[connected_in].state);
180 | 				if(old_device_neurons[connected_in].state[2] > 0.1){
181 | 					//printf("@");
182 | 					Isynapses = Isynapses + NeuronList[index].connected_weight[i];
183 | 					if(old_device_neurons[connected_in].type == 4){
184 | 						Isynapses = Isynapses + old_device_neurons[connected_in].state[0];
185 | 						NeuronList[index].synapse_timer[connected_in-SPIKING_NEURON_NUM] = 1;	//update the synapse timer, this is the only place
186 | 						old_device_neurons[index].synapse_timer[connected_in-SPIKING_NEURON_NUM] = 1;
187 | 						//printf("I is: %f; signal is: %f\n", Isynapses, old_device_neurons[connected_in].state[0]);
188 | 						//printf("I is: %f; signal is: %f\n", Isynapses, old_device_neurons[connected_in].state[0]);
189 | 					}
190 | 				}
191 | 				i++;
192 | 			}
193 | 			//debug
194 | 			//int connected_in = NeuronList[index].connected_in[0] - 1;
195 | 			//printf("INPUTCurrent_of_no. %d is: %f\n", index, Isynapses);
196 | 			//Isynapses = Isynapses + old_device_neurons[connected_in].state[0];
197 | 			//printf("input of no. %d is: %f, switch is %f\n",connected_in, NeuronList[connected_in].state[0], NeuronList[connected_in].state[2]);
198 | 
199 | 			//end of debug
200 | 			Isynapses = Isynapses/current_divider;
201 | 			Isynapses = Isynapses * current_multiplier;
202 | 			/*
203 | 			float v_temp_0 = NeuronList[index].param[0] + Isynapses*NeuronList[index].param[4];
204 | 			float old_v = old_device_neurons[index].state[0];
205 | 			float temp_v = v_temp_0 + (old_v-v_temp_0)*expf(-1*TimeStep/NeuronList[index].param[5]);
206 | 			*/
207 | 
208 | 			float temp_v = NeuronList[index].state[0]+TimeStep*(NeuronList[index].param[5]+NeuronList[index].param[0]*NeuronList[index].state[0] + Isynapses*NeuronList[index].param[4]);
209 | 
210 | 			NeuronList[index].state[0] = (temp_v);
211 | 
212 | 			if (temp_v>(NeuronList[index].param[1])){
213 | 				NeuronList[index].state[0] = NeuronList[index].param[2];
214 | 				NeuronList[index].state[2] = MID_LAYER_STDP_DURATION + 0.0;
215 | 				if(index<OUTPUT_LAYER_NEURON_NUM){
216 | 					NeuronList[index].state[2] = 1.0;
217 | 				}
218 | 				//printf("-%f-",NeuronList[index].state[2]);
219 | 			}
220 | 			else{
221 | 				if(index<OUTPUT_LAYER_NEURON_NUM) NeuronList[index].state[2] = 0;
222 | 			}
223 | 		}
224 | 	}
225 | 	else if(NeuronList[index].type == 3){//run HH
226 | 		if(NeuronList[index].index >= 0){
227 | 
228 | 			log_spike[time_stamp] = NeuronList[index].state[2];
229 | 			log_v[time_stamp] = NeuronList[index].state[0];
230 | 			//printf("%f; ", NeuronList[index].state[0]);
231 | 			//printf("U of %d is %f \n", index+1, NeuronList[index].state[0]);
232 | 			if(NeuronList[index].state[2] > 0.1){
233 | 				NeuronList[index].state[2] = 0;
234 | 				//printf("sp\n");
235 | 				log_total_spike[index] = log_total_spike[index] + 1;
236 | 				spike_flag[0] = spike_flag[0] + 1;
237 | 			}
238 | 			float Isynapses = test_current;
239 | 
240 | 							int i = 0;
241 | 							while(NeuronList[index].connected_in[i] > 0.1){
242 | 								int connected_in = NeuronList[index].connected_in[i] - 1;
243 | 								//printf("state of connected in (to index: %d): %d is %d\n", index, connected_in, NeuronList[connected_in].state);
244 | 								if(old_device_neurons[connected_in].state[2] > 0.1){
245 | 									Isynapses = Isynapses + NeuronList[index].connected_weight[i];
246 | 									if(old_device_neurons[connected_in].type == 4){
247 | 										Isynapses = Isynapses + old_device_neurons[connected_in].state[0];
248 | 										NeuronList[index].synapse_timer[connected_in-SPIKING_NEURON_NUM] = 1;	//update the synapse timer, this is the only place
249 | 										old_device_neurons[index].synapse_timer[connected_in-SPIKING_NEURON_NUM] = 1;
250 | 									}
251 | 								}
252 | 								i++;
253 | 							}
254 | 							Isynapses = Isynapses/current_divider;
255 | 							Isynapses = Isynapses * current_multiplier;
256 | 							//am: y = 0.1*(x+35)/(1-expf(-1*(x+35)/10))
257 | 							//bm: y = 4*expf(-0.0556*(x+60))
258 | 							//an: y = 0.01*(x+50)/(1-expf(-1*(x+50)/10))
259 | 							//bn: y = 0.125*expf(-1*(x+60)/80)
260 | 							//ah: y = 0.07*expf(-0.05*(x+60))
261 | 							//bh: y = 1/(1+expf(-0.1*(x+30)))
262 | 							float old_v = old_device_neurons[index].state[0];
263 | 							float old_m = old_device_neurons[index].state[1];
264 | 							//float old_flag = old_device_neurons[index].state[2];
265 | 							float old_n = old_device_neurons[index].state[3];
266 | 							float old_h = old_device_neurons[index].state[4];
267 | 
268 | 							NeuronList[index].state[1] = old_m + TimeStep*(((0.1*(old_v+35)/(1-expf(-1*(old_v+35)/10.0)))*(1-old_m))-((4*expf(-0.0556*(old_v+60)))*old_m));
269 | 							NeuronList[index].state[3] = old_n + TimeStep*((0.01*(old_v+50)/(1-expf(-1*(old_v+50)/10.0)))*(1-old_n)-((0.125*expf(-1*(old_v+60)/80.0))*old_n));
270 | 							NeuronList[index].state[4] = old_h + TimeStep*(((0.07*expf(-0.05*(old_v+60)))*(1-old_h))-((1.0/(1+expf(-0.1*(old_v+30))))*old_h));
271 | 
272 | 							float gNa = NeuronList[index].param[4]*powf(old_m,3)*old_h;
273 | 							float gK = NeuronList[index].param[5]*powf(old_n,4);
274 | 							float gl = NeuronList[index].param[6];
275 | 
276 | 							float INa = gNa * (old_v - NeuronList[index].param[1]);
277 | 							float IK = gK * (old_v - NeuronList[index].param[2]);
278 | 							float Il = gl * (old_v - NeuronList[index].param[3]);
279 | 
280 | 							NeuronList[index].state[0] = old_v + TimeStep*((1.0/NeuronList[index].param[0])*(Isynapses-(INa+IK+Il))); //Euler method to find next voltage
281 | 
282 | 							if (NeuronList[index].state[0] > -50){
283 | 								NeuronList[index].state[5] = 1;
284 | 							}
285 | 							else{
286 | 								if (NeuronList[index].state[5] > 0.1){
287 | 									NeuronList[index].state[2] = 1;
288 | 									//printf("\n spike! \n");
289 | 								}
290 | 								NeuronList[index].state[5] = 0;
291 | 							}
292 | 				}
293 | 	}
294 | 	else if(NeuronList[index].type == 4){
295 | 		//printf("No.: %d, counter is: %f \n",index, NeuronList[index].state[1]);
296 | 		//P.S. state[0] is the signal strength, state[1] is firing frequency;
297 | 		//printf("-%d-",index);
298 | 		if(NeuronList[index].index >= 0){
299 | 			if(NeuronList[index].state[1] == 0){//if the target frequency is zero, turn off
300 | 				NeuronList[index].state[2] = 0;
301 | 				return;
302 | 			}
303 | 			if(NeuronList[index].state[2] > 0){	//state[3] is used to count current signal width
304 | 				//printf("No.: %d, counter is: %f \n",index, NeuronList[index].state[3]);
305 | 				//printf("*%d*",index);
306 | 				NeuronList[index].state[3] = NeuronList[index].state[3] + 1;
307 | 				//printf("`");
308 | 				if(NeuronList[index].state[3]>signal_width){
309 | 					NeuronList[index].state[2] = 0;
310 | 					NeuronList[index].state[3] = 0;
311 | 				}
312 | 			}else{
313 | 				NeuronList[index].state[4] = NeuronList[index].state[4] + 1;//use state[4] to count the time it has not fired
314 | 				//printf("counter is: %f, period is: %f \n", NeuronList[index].state[3], NeuronList[index].state[1]);
315 | 
316 | 				if((NeuronList[index].state[4])>(NeuronList[index].state[1])){
317 | 								//printf("SignalNeuron_%d:counter is: %f, period is: %f \n", index, NeuronList[index].state[4], NeuronList[index].state[1]);
318 | 								log_total_spike[index] = log_total_spike[index] + 1;
319 | 								NeuronList[index].state[2] = 1;
320 | 								NeuronList[index].state[4] = 0;
321 | 				}
322 | 			}
323 | 
324 | 
325 | 		}
326 | 
327 | 	}
328 | 
329 | }
330 | 
331 | void spiking_learning_main(Neuron *NeuronList, Neuron *old_device_neurons, float *random_number, int network_size, float *log_v, float *log_spike, float *log_total_spike, int *spike_flag, int signal_width, int time_stamp){
332 | 
333 | 	int SIZE_PER_SIDE = sqrt(network_size)+1;
334 | 	//printf("sizeperside: %d\n",SIZE_PER_SIDE);
335 |     dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock );
336 |     dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
337 |     //cout<<to_string(dimBlock);
338 |     //time_stamp = 1;
339 |     run_spiking_learning<<<dimGrid, dimBlock>>>(NeuronList, old_device_neurons, random_number, network_size, log_v, log_spike, log_total_spike, spike_flag, signal_width, time_stamp);
340 |     //printf("inSpikingLearning");
341 |     cudaDeviceSynchronize();
342 | 
343 | 
344 | 
345 | 
346 | 
347 | }
348 | 


--------------------------------------------------------------------------------
/synapse_drive_cnn_v2.cu:
--------------------------------------------------------------------------------
  1 | #include "header.h"
  2 | #include <iostream>
  3 | #include <string>
  4 | #include <fstream>
  5 | #include<stdlib.h>
  6 | #include <stdio.h>
  7 | #include<time.h>
  8 | #include<device_functions.h>
  9 | #include<cuda.h>
 10 | #include<math.h>
 11 | #include <curand.h>
 12 | #include <curand_kernel.h>
 13 | 
 14 | using namespace std;
 15 | #define ALPHA_M 0.005
 16 | #define ALPHA_P 0.5
 17 | #define BETA_P 3
 18 | #define BETA_M 3
 19 | #define G_MAX 1
 20 | #define G_MIN 0
 21 | #define STOCH_gamma_pot 0.7
 22 | #define STOCH_tau_pot 100
 23 | #define STOCH_gamma_dep 0.6
 24 | #define STOCH_tau_dep 5
 25 | 
 26 | #define EXP_STDP_gamma_pot 0.2
 27 | #define EXP_STDP_tau_pot 50
 28 | #define EXP_STDP_gamma_dep 0.2
 29 | #define EXP_STDP_tau_dep 50
 30 | 
 31 | #define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
 32 | inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
 33 | {
 34 |    if (code != cudaSuccess)
 35 |    {
 36 |       fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
 37 |       //if (abort) exit(code);
 38 |    }
 39 | }
 40 | 
 41 | __global__ void random_synapse_drive_v1 (float *random_number, int rand_number_size, curandState_t *state){
 42 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 43 |     int index = blockId * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x;
 44 |     int connection_index = blockIdx.z * blockDim.z + threadIdx.z;
 45 |     index = index*MAX_CONNECTION+connection_index;
 46 |     if (index>=rand_number_size) return;
 47 | 	random_number[index] = (curand(&state[index])%1000)/1000.0;
 48 |     //printf("rand_gen_complete\n");
 49 | 	//if(index==31)printf("The no.%d of random nubmer is %f\n", index, random_number[index]);
 50 | 	//if(index==31)printf("%f|", random_number[index]);
 51 | 
 52 | }
 53 | 
 54 | __global__ void copy_filter(Neuron *NeuronList, CNN_struct *CNN_setttings, float **filter, int spiking_neuron_size)
 55 | {
 56 | 	//printf("*");
 57 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
 58 |     int connection_index = blockId * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x;
 59 |     if(connection_index>0) return;
 60 | 
 61 | 
 62 | 	int current_layer = 1;
 63 | 	int processed_neuron_num = 0;
 64 | 	int filter_size_per_depth = CNN_setttings->layer[current_layer].conv_setting.filter_length * CNN_setttings->layer[current_layer].conv_setting.filter_width* CNN_setttings->layer[current_layer].conv_setting.filter_depth;
 65 | 	for (int neuron_i=0; neuron_i<spiking_neuron_size; neuron_i++){
 66 | 		int connected_in_i = 0;
 67 | 		if (neuron_i==processed_neuron_num+CNN_setttings->layer[current_layer].neuron_num){
 68 | 			if (current_layer==CNN_total_layer_num-1) return;
 69 | 			processed_neuron_num += CNN_setttings->layer[current_layer].neuron_num;
 70 | 			current_layer++;
 71 | 			filter_size_per_depth = CNN_setttings->layer[current_layer].conv_setting.filter_length * CNN_setttings->layer[current_layer].conv_setting.filter_width* CNN_setttings->layer[current_layer].conv_setting.filter_depth;
 72 | 
 73 | 		}
 74 | 
 75 | 		int current_depth = NeuronList[neuron_i].param[7] - CNN_setttings->layer[current_layer].first_depth_id;
 76 | 		int filter_index;
 77 | 		//printf("neuron id %d, depth is %d, layer is %d (size_per_depth: %d)\n", neuron_i, current_depth, current_layer, filter_size_per_depth);
 78 | 		while(NeuronList[neuron_i].connected_in[connected_in_i] > 0.1){
 79 | 			filter_index = current_depth*filter_size_per_depth+connected_in_i;
 80 | 			if(connected_in_i>=filter_size_per_depth) printf("Error in filter copying: size mismatches\n");
 81 | 			filter[current_layer-1][filter_index] = NeuronList[neuron_i].connected_weight[connected_in_i];
 82 | 			connected_in_i++;
 83 | 			//NeuronList[index].connected_weight[connection_index] = filter[current_layer-1][filter_index];
 84 | 			//printf("%f-", filter[current_layer-1][filter_index]);
 85 | 			//if(filter[current_layer-1][filter_index]>2) printf("$$ %d-%d is at %f", index, connection_index, filter[current_layer-1][filter_index]);
 86 | 		}
 87 | 	}
 88 | 
 89 | 
 90 | 
 91 | 
 92 | }
 93 | 
 94 | __global__ void update_filter_v2(Neuron *NeuronList, Input_neuron *Input_neuronlist, int index, CNN_struct *CNN_setttings, float **filter, int current_layer, int network_size, \
 95 | 		int input_neuron_size, int total_neuron_num, int connection_size, long two_power, float half_delta_g_step, float *random_number_list_device, float *random_number_normal_device, \
 96 | 		int neuron_number_per_layer, int start_index, float StochSTDP_param_1, float StochSTDP_param_2, float *log_total_spike){
 97 | //    int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;
 98 | //    int thread_index = blockId * (blockDim.x * blockDim.y * blockDim.z) + threadIdx.z * (blockDim.y * blockDim.x) + threadIdx.y * blockDim.x + threadIdx.x;
 99 | //    int connection_blockID =
100 | //    int connection_index = blockIdx.z * blockDim.z + threadIdx.z;
101 | //	int max_connection_index = CNN_setttings->layer[current_layer].conv_setting.filter_depth*CNN_setttings->layer[current_layer].conv_setting.filter_length*CNN_setttings->layer[current_layer].conv_setting.filter_width;
102 | //	if(connection_index>max_connection_index){
103 | //		return;
104 | //	}
105 | //    int neuron_number_per_layer = CNN_setttings->layer[current_layer].depth * CNN_setttings->layer[current_layer].width * CNN_setttings->layer[current_layer].length;
106 | //    int connection_index = thread_index%(neuron_number_per_layer);
107 | //    int index = thread_index/neuron_number_per_layer;
108 |     int blockId = blockIdx.x + blockIdx.y * gridDim.x;
109 |     int connection_index = blockId * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x;
110 |     if(connection_index>connection_size) return;
111 | 
112 | 	//index = index + start_index;
113 | 	int random_index = index*MAX_CONNECTION+connection_index;// - start_index;
114 | 	//if(connection_index==1) printf("#%d", index);
115 | 
116 | 	if(NeuronList[index].type==4||NeuronList[index].type==5){//if the post-synapse neuron is input-signal-neuron, jump over
117 | 		return;
118 | 	}
119 | 	//printf("-%d",index);
120 | 	//printf("-%d",index);
121 |     //if(index==1000) printf("-%d",connection_index);
122 | //	if((NeuronList[index].param[7]-current_layer)>0.01||(NeuronList[index].param[7]-current_layer)<-0.01){
123 | //		printf("param_7 is: %f", NeuronList[index].param[7]);
124 | //		return;
125 | //	}
126 | 
127 | 
128 | 	//if(connection_index==1&&index<1001)printf("%d|",index);
129 |     //float ALPHA_P_layer = ALPHA_P*(1+2*current_layer);
130 | //    float ALPHA_M_layer = ALPHA_M/(1+current_layer);
131 | //    float ALPHA_P_layer = ALPHA_P*(current_layer);
132 | //
133 | //    if(current_layer==1){
134 | //
135 | //    }
136 | //    else if(current_layer==2){
137 | //        ALPHA_M_layer = ALPHA_M/3;
138 | //    }
139 | //    else if(current_layer==3){
140 | //        ALPHA_M_layer = ALPHA_M/3;
141 | //        ALPHA_P_layer = ALPHA_P;
142 | //    }
143 | 	float ALPHA_M_layer = ALPHA_M;
144 | 	float ALPHA_P_layer = ALPHA_P;
145 | //	if(current_layer>1) {
146 | //		ALPHA_M_layer = ALPHA_M*1.2;
147 | //		ALPHA_P_layer = ALPHA_P*1.2;
148 | //	}
149 | //	if(current_layer>2) {
150 | //		ALPHA_M_layer = ALPHA_M;
151 | //		ALPHA_P_layer = ALPHA_P*1.2;
152 | //	}
153 |     //float ALPHA_M_layer = ALPHA_M;
154 | 	//int neuron_relative_index = index - CNN_setttings->layer[current_layer].depth_list[0].first_neuron;
155 | 	//int number_of_neurons_per_depth = CNN_setttings->layer[current_layer].depth_list[0].total_neuron_num;
156 | 
157 | 	//int current_depth = neuron_relative_index/number_of_neurons_per_depth + CNN_setttings->layer[current_layer].first_depth_id - 1;
158 | 	int current_depth = NeuronList[index].param[7] - CNN_setttings->layer[current_layer].first_depth_id;
159 | 	if(NeuronList[index].connected_in[connection_index] < 0.1){
160 | 		return;
161 | 	}
162 | 
163 | 	//if(connection_index==1)printf("-%d", current_depth);
164 | 	//for debug
165 | //	for(int ii=0; ii<MAX_CONNECTION; ii++){
166 | //		if(NeuronList[index].connected_in[ii] > 0.1){
167 | //			int filter_index_db = current_depth*connection_size+ii;
168 | ////			if(current_layer==2) printf("|%d_%d|", current_depth, connection_size);
169 | //			filter[current_layer-1][filter_index_db] += 0.02;
170 | //		}
171 | //	}
172 | 	//printf("**[%d]**neuron_relative_index:%d__Current_depth:%d||", number_of_neurons_per_depth, neuron_relative_index, current_depth);
173 | //	printf("*");
174 | 
175 | 	float delta_g = 0;
176 | 
177 | 	if(NeuronList[index].state[2]>0.1){//if post-synapse neuron fired
178 | 
179 | 			//printf("| %d fired", index);
180 | 			//if(index==1000) printf("%d!", connection_index);
181 | 		int filter_index = current_depth*connection_size+connection_index;
182 | 				if(NeuronList[index].connected_in[connection_index] > 0.1){
183 | 
184 | 					int connected_in = NeuronList[index].connected_in[connection_index] - 1;
185 | 					if(connected_in>=total_neuron_num||connected_in<0) return;
186 | //					printf("post id: %d, first pre id: %d", index, NeuronList[index].connected_in[0]);
187 | 					float pre_neuron_state_1;
188 | 					float pre_neuron_state_2;
189 | 					float pre_neuron_state_3;
190 | 					float pre_neuron_state_4;
191 | 					int pre_neuron_type;
192 | 
193 | 					if(NeuronList[index].connected_in[connection_index]<=input_neuron_size){
194 | 						pre_neuron_state_1 = Input_neuronlist[connected_in].state[1];
195 | 						pre_neuron_state_2 = Input_neuronlist[connected_in].state[2];
196 | 						pre_neuron_state_3 = Input_neuronlist[connected_in].state[3];
197 | 						pre_neuron_state_4 = Input_neuronlist[connected_in].state[4];
198 | 
199 | 						pre_neuron_type = Input_neuronlist[connected_in].type;
200 | 					}else{
201 | 
202 | 						connected_in = connected_in - input_neuron_size;
203 | 						pre_neuron_state_1 = NeuronList[connected_in].state[1];
204 | 						pre_neuron_state_2 = NeuronList[connected_in].state[2];
205 | 						pre_neuron_state_3 = NeuronList[connected_in].state[3];
206 | 						pre_neuron_state_4 = NeuronList[connected_in].state[4];
207 | 
208 | 						pre_neuron_type = NeuronList[connected_in].type;
209 | 
210 | 					}
211 | //					printf("$%d$", pre_neuron_state_2);
212 | 
213 | 					if( ((pre_neuron_state_2 > 0.1)&&(pre_neuron_type!=5)) || ((pre_neuron_type!=4)&&(pre_neuron_state_3>0)) ){//if pre-neuron fired
214 | 						//if (current_layer==3) printf("-%d_%f-", connected_in, pre_neuron_state_3);
215 | 						if(LOW_BIT_TRAINING){
216 | //							if(LOW_BIT_NUM <= 8){
217 | //								delta_g = half_delta_g_step*2;
218 | //							}else{
219 | //								if(STOCHASTIC_ROUNDING){
220 | //									int32_t fixed = (int32_t)(delta_g * (two_power+0.0) / (LOW_BIT_NUM+0.0));
221 | //									float delta_g_truncated = LOW_BIT_NUM*((fixed+0.0)/two_power);
222 | //									float rounding_up_prob = (delta_g - delta_g_truncated)/(2*half_delta_g_step);
223 | //									if(random_number_list_device[random_index]<rounding_up_prob) {
224 | //										delta_g = delta_g_truncated+half_delta_g_step;
225 | //									}else{
226 | //										delta_g = delta_g_truncated;
227 | //									}
228 | //
229 | //								}else{
230 | //									delta_g += half_delta_g_step;
231 | //									int32_t fixed = (int32_t)(delta_g * (two_power+0.0) / (LOW_BIT_NUM+0.0));
232 | //									delta_g = LOW_BIT_NUM*((fixed+0.0)/two_power);
233 | //								}
234 | //							}
235 | 							delta_g = ALPHA_P_layer*__expf(-1*BETA_P*((filter[current_layer-1][filter_index]-G_MIN)/(G_MAX-G_MIN)));
236 | 						}else{
237 | 							delta_g = ALPHA_P_layer*__expf(-1*BETA_P*((filter[current_layer-1][filter_index]-G_MIN)/(G_MAX-G_MIN)));	//use hardware implemetation for exp
238 | 							//delta_g = 0.1;
239 | 						}
240 | 						if(DEVICE_VARIATION){
241 | 							delta_g = (1+random_number_normal_device[random_index]) * delta_g;
242 | 						}
243 | 						if(EXPONENTIAL_STDP){
244 | 							//printf("[%f]", delta_g);
245 | 							delta_g = delta_g*EXP_STDP_gamma_pot*__expf(-1*pre_neuron_state_3/EXP_STDP_tau_pot);
246 | //							printf("|%f|", delta_g);
247 | 						}
248 | 						float StochSTDP_tau = STOCH_tau_pot;
249 | 						if(FREQUENCY_DEPENDED_STDP){
250 | 							//first get period between spikes, convert back to hertz, /1000, then 1 over
251 | 							//for 1Hz-22Hz
252 | 							float input_freq = 1000/pre_neuron_state_1;
253 | 							float phi = 0.6;
254 | 							StochSTDP_tau = StochSTDP_tau*(1+phi*(input_freq-1)/(22-1));
255 | 							//printf("input_freq is: %f, tau is: %f, phi is %f\n", input_freq, StochSTDP_tau, phi);
256 | 						}
257 | 
258 | 						if(STOCHASTIC_STDP){
259 | 							float prob = StochSTDP_param_1*__expf(-1*pre_neuron_state_3/StochSTDP_tau);
260 | 							if(random_number_list_device[random_index]>prob) {
261 | 								delta_g = 0;
262 | 							}
263 | 						}
264 | 						if(NeuronList[index].state[5]==1){
265 | 							delta_g = -delta_g;// if set to "no pot", clear delta_g to zero
266 | 						}
267 | 
268 | 
269 | 					}else{//pre didn't fire, apply depression
270 | 
271 | 						if(LOW_BIT_TRAINING){
272 | //							if(LOW_BIT_NUM <= 8){
273 | //								delta_g = half_delta_g_step*2;
274 | //							}else{
275 | //								if(STOCHASTIC_ROUNDING){
276 | //									int32_t fixed = (int32_t)(delta_g * (two_power+0.0) / (LOW_BIT_NUM+0.0));
277 | //									float delta_g_truncated = LOW_BIT_NUM*((fixed+0.0)/two_power);
278 | //									float rounding_up_prob = (delta_g - delta_g_truncated)/(2*half_delta_g_step);
279 | //									if(random_number_list_device[random_index]<rounding_up_prob) {
280 | //										delta_g = delta_g_truncated+half_delta_g_step;
281 | //									}else{
282 | //										delta_g = delta_g_truncated;
283 | //									}
284 | //
285 | //								}else{
286 | //									delta_g += half_delta_g_step;
287 | //									int32_t fixed = (int32_t)(delta_g * (two_power+0.0) / (LOW_BIT_NUM+0.0));
288 | //									delta_g = LOW_BIT_NUM*((fixed+0.0)/two_power);
289 | //								}
290 | //							}
291 | 							delta_g = ALPHA_M_layer*__expf(-1*BETA_M*((G_MAX-filter[current_layer-1][filter_index])/(G_MAX-G_MIN)));
292 | 						}else{
293 | 							delta_g = ALPHA_M_layer*__expf(-1*BETA_M*((G_MAX-filter[current_layer-1][filter_index])/(G_MAX-G_MIN)));
294 | 							//delta_g = 0.1;
295 | 						}
296 | 						if(DEVICE_VARIATION){
297 | 							//printf("-%f", random_number_normal_device[random_index*MAX_CONNECTION+connection_index]);
298 | 							delta_g = (1+random_number_normal_device[random_index]) * delta_g;
299 | 						}
300 | 						if(EXPONENTIAL_STDP){
301 | //							printf("*%f*", delta_g);
302 | //							float temp_min = fminf(pre_neuron_state_4, EXP_STDP_tau_dep);
303 | //							printf("| %f, %f | ", pre_neuron_state_4, temp_min);
304 | 							delta_g = delta_g*EXP_STDP_gamma_dep*__expf(fminf(pre_neuron_state_4, EXP_STDP_tau_dep)/EXP_STDP_tau_dep);
305 | //							printf("|%f|", delta_g);
306 | 						}
307 | 
308 | 						float StochSTDP_tau = STOCH_tau_dep;
309 | 						if(FREQUENCY_DEPENDED_STDP){
310 | 							//first get period between spikes, convert back to hertz, /1000, then 1 over
311 | 							//for 1Hz-22Hz
312 | 							float input_freq = 1000/pre_neuron_state_1;
313 | 							float phi = 0.6;
314 | 							StochSTDP_tau = StochSTDP_tau*(1+phi*(input_freq-1)/(22-1));
315 | 							//if(input_freq>15)
316 | 							//printf("input_freq is: %f, tau is: %f \n", input_freq, StochSTDP_tau);
317 | 						}
318 | 
319 | 						if(STOCHASTIC_STDP){
320 | 							float prob = StochSTDP_param_2*__expf(pre_neuron_state_4/StochSTDP_tau);
321 | 							//printf("%f, ", pre_neuron_state_4);
322 | 							if(random_number_list_device[random_index]>prob) {
323 | 								//if(random_index>800&&random_index<900) printf("%f|", random_number_list_device[random_index*MAX_CONNECTION+connection_index]);
324 | 								delta_g = 0;
325 | 							}
326 | 						}
327 | 						delta_g = -1*delta_g;
328 | 
329 | 						if(NeuronList[index].state[5]>0){
330 | 
331 | 							delta_g = 0; //if set to "no dep", clear delta_g to 0
332 | 						}
333 | 
334 | 					}
335 | 
336 | 					//if(connection_index==1&&index<1001)printf("%d|",filter_index);
337 | 					//printf("|%f|", delta_g);
338 | 
339 | 
340 | 
341 | 					if (LEARNER_HOMEOSTASIS_ENABLE && NeuronList[index].spike_frequency>0) {
342 | 						//printf("b: %f ", delta_g);
343 | 						float old_delta = delta_g;
344 | 						delta_g += 0.07*ALPHA_M_layer*(1-NeuronList[index].spike_frequency/(HOMEOSTASIS_BASE_RATE/current_layer));
345 | //						if (((delta_g-old_delta)/old_delta)>1)printf("%f, %f, %f, %f, %f|", old_delta, delta_g, delta_g-old_delta, NeuronList[index].spike_frequency, NeuronList[index].spike_frequency/(HOMEOSTASIS_BASE_RATE/current_layer));
346 | //						if (((delta_g-old_delta)/old_delta)<-1)printf("%f, %f, %f, %f, %f|", old_delta, delta_g, delta_g-old_delta, NeuronList[index].spike_frequency, NeuronList[index].spike_frequency/(HOMEOSTASIS_BASE_RATE/current_layer));
347 | 					}
348 | 
349 | 					if(LOW_BIT_TRAINING){
350 | 						delta_g += half_delta_g_step;
351 | 						int32_t fixed = (int32_t)(delta_g * (two_power+0.0) / (LOW_BIT_NUM+0.0));
352 | 						delta_g = LOW_BIT_NUM*((fixed+0.0)/two_power);
353 | 					}
354 | 
355 | 					//float check_temp = filter[current_layer-1][filter_index] + delta_g;
356 | 					float *mod_pointer = &filter[current_layer-1][filter_index];
357 | 					atomicAdd(mod_pointer, delta_g);
358 | 					//printf("|%f, %f|", check_temp, filter[current_layer-1][filter_index]);
359 | 
360 | 					if(filter[current_layer-1][filter_index] > G_MAX) filter[current_layer-1][filter_index] = G_MAX;
361 | 					else if(filter[current_layer-1][filter_index] < G_MIN) filter[current_layer-1][filter_index] = G_MIN;
362 | 
363 | //					if(LOW_BIT_TRAINING){
364 | //						filter[current_layer-1][filter_index] += half_delta_g_step;
365 | //						int32_t fixed = (int32_t)(filter[current_layer-1][filter_index] * (two_power+0.0) / (LOW_BIT_NUM+0.0));
366 | //						filter[current_layer-1][filter_index] = LOW_BIT_NUM*((fixed+0.0)/two_power);
367 | //					}
368 | 
369 | 					NeuronList[index].connected_weight[connection_index] = filter[current_layer-1][filter_index];
370 | 					//printf("%f-", filter[current_layer-1][filter_index]);
371 | 					//if(filter[current_layer-1][filter_index]>2) printf("$$ %d-%d is at %f", index, connection_index, filter[current_layer-1][filter_index]);
372 | 				}
373 | 			}
374 | 
375 | 
376 | }
377 | 
378 | __global__ void log_all_fired(Neuron *NeuronList, Input_neuron *Input_neuronlist, CNN_struct *CNN_settings, float **filter, int current_layer, int start_index, int connection_size,\
379 | 		float *random_number, float *random_number_normal_device, int network_size, int input_neuron_size, int filter_size, long two_power, float half_delta_g, \
380 | 		int neuron_number_per_layer, float StochSTDP_param_1, float StochSTDP_param_2, float *log_total_spike){
381 | //	printf("a#");
382 | 
383 | 
384 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
385 | 	int index = blockId * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x;
386 | 
387 | 	if(index>=network_size){
388 | 		return;
389 | 	}
390 | 
391 | 	if(NeuronList[index].type==4||NeuronList[index].type==5){//if the post-synapse neuron is input-signal-neuron, jump over
392 | 		return;
393 | 	}
394 | 	float start_depth = CNN_settings->layer[current_layer].first_depth_id - 0.1;
395 | 	float end_depth = CNN_settings->layer[current_layer].last_depth_id + 0.1;
396 | 	if(NeuronList[index].param[7]<start_depth||NeuronList[index].param[7]>end_depth){
397 | 		return;
398 | 	}
399 | 
400 | 	//printf("%d|", index);
401 | 	if(NeuronList[index].state[2]>0.1){
402 | //		printf("%d|", index);
403 | 		if(NeuronList[index].state[5]==2)	printf(" .%d ", index-58880);
404 | 		if(NeuronList[index].state[5]==1)	printf(" '%d ", index-58880);
405 |     	int SIZE_PER_SIDE = sqrt((float)filter_size)+1;
406 | 		dim3 dimBlock(4,4);
407 | 		dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1) );
408 | 
409 | 		int total_neuron_size = network_size + input_neuron_size;
410 | 		//printf("%d,", total_neuron_size);
411 | 		update_filter_v2<<<dimGrid, dimBlock>>>(NeuronList, Input_neuronlist, index, CNN_settings, filter, current_layer, network_size, input_neuron_size, total_neuron_size, filter_size, \
412 | 				two_power, half_delta_g, random_number, random_number_normal_device,  neuron_number_per_layer, start_index, StochSTDP_param_1, StochSTDP_param_2, log_total_spike);
413 | 
414 | 
415 | 	}
416 | 
417 | }
418 | 
419 | __global__ void init_indicator(char *spike_indicator, int network_size){
420 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
421 | 	int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x;
422 | 
423 | 	if(index>=network_size){
424 | 		return;
425 | 	}
426 | 	//printf("%d|", index);
427 | 	spike_indicator[index] = 0;
428 | 
429 | }
430 | 
431 | __global__ void list_fired_index(int *fired_index, char *spike_indicator, int total_spike, int network_size){
432 | 	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
433 | 	int index = blockId * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x;
434 | 
435 | 	if(index==1){
436 | 		int fired_count = 0;
437 | 		for (int i=1; i<network_size; i++){
438 | 			if(spike_indicator[i]){
439 | 				fired_index[fired_count] = i;
440 | 				if(fired_count>total_spike) printf("Error in spike counting\n");
441 | 			}
442 | 		}
443 | 	}
444 | }
445 | 
446 | //__global__ int sum_spiked(int spike_counter, char *spike_indicator, int network_size){
447 | //	int blockId = blockIdx.x + blockIdx.y * gridDim.x;
448 | //	int index = blockId * (blockDim.x * blockDim.y) + threadIdx.y * blockDim.x + threadIdx.x;
449 | //
450 | //	if(index==1){
451 | //		for(int i=0; i<network_size; i++){
452 | //			if(spike_indicator[i]) spike_counter ++;
453 | //		}
454 | //	}
455 | //}
456 | 
457 | 
458 | void copy_filter_to_cuDNN(Neuron *NeuronList, CNN_struct *CNN_settings, float **filter, int spiking_neuron_size){
459 | 	cout<<"Copying current NeruonList weight to cuDNN filter. \n";
460 | 	int SIZE_PER_SIDE = 2;
461 | 	dim3 dimBlock(1, 1);
462 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
463 | 	copy_filter<<<dimGrid, dimBlock>>>(NeuronList, CNN_settings, filter, spiking_neuron_size);
464 | }
465 | 
466 | void synapse_drive_cnn_v2(Neuron *NeuronList, Input_neuron *Input_neuronlist, CNN_struct *host_CNN_settings, CNN_struct *CNN_settings, float **filter, \
467 | 		int current_layer, int network_size, int input_neuron_size, int syn_timer_max, int connection_size, float *random_number, \
468 | 		float *random_number_normal_device, curandState_t *state, float StochSTDP_param_1, float StochSTDP_param_2, float *log_total_spike){
469 | 
470 | 	//first make an array of fired neuron index, and sum up the total number of fired neuron
471 | 	int SIZE_PER_SIDE = sqrt(network_size)+1;
472 | 	dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock);
473 | 	dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1));
474 | //	std::cout<<SIZE_PER_SIDE<<" "<<endl;
475 | 
476 | //	dim3 sum_block(1,1);
477 | //	dim3 pre_process_grid(1, 1);
478 | //	int *spike_counter;
479 | //	cudaMalloc((void *)&spike_indicator,sizeof(int));
480 | //	sum_spiked(spike_counter, *spike_indicator, network_size);
481 | //	printf("\n total fired:%d \n", total_spike);
482 | //	printf("&");
483 | 
484 | 	//int SIZE_PER_SIDE = sqrt(network_size*MAX_CONNECTION)+1;
485 | 	int neuron_number_per_layer = host_CNN_settings->layer[current_layer].depth * host_CNN_settings->layer[current_layer].width * host_CNN_settings->layer[current_layer].length;
486 | 	int filter_size_per_depth = host_CNN_settings->layer[current_layer].conv_setting.filter_length * host_CNN_settings->layer[current_layer].conv_setting.filter_width* host_CNN_settings->layer[current_layer].conv_setting.filter_depth;
487 | 	int start_index = host_CNN_settings->layer[current_layer].depth_list[0].first_neuron;
488 | 	//printf("Start index is %d\n", start_index);
489 | 	//printf("Neuron#is:%d, filterSizeIs:%d", neuron_number_per_layer, filter_size_per_depth);
490 | 	//int SIZE_ALONG_Z = 784;
491 | 	//int SIZE_PER_SIDE = std::pow(neuron_number_per_layer*filter_size_per_depth, 1.0/3) + 1;
492 | 
493 | 	//dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock, ThreadsPerBlock);
494 | 	//int SIZE_PER_SIDE = sqrt(network_size)+1;
495 | 	//int SIZE_PER_SIDE = sqrt(neuron_number_per_layer)+1;
496 | 	//dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1), (filter_size_per_depth/dimBlock.z+1));
497 | 	/*
498 | 	float *random_number_list_device;
499 | 	curandState_t *states;
500 | 
501 | 	if(STOCHASTIC_STDP){
502 | 		int rand_numb_size = SPIKING_NEURON_NUM*MAX_CONNECTION;
503 | 		cudaMalloc((void **)&random_number_list_device,rand_numb_size*sizeof(float));
504 | 		cudaMalloc((void **)&states, rand_numb_size * sizeof(curandState_t));
505 | 		synapse_rand_gen(rand_numb_size, random_number_list_device, states);
506 | 	}
507 | 	*/
508 | 	long two_power;
509 | 	if(StochSTDP_param_1<=0){
510 | 		StochSTDP_param_1 = STOCH_gamma_pot;
511 | 	}
512 | 
513 | 	if(StochSTDP_param_2<=0){
514 | 		StochSTDP_param_2 = STOCH_gamma_dep;
515 | 	}
516 | 
517 | 	float half_delta_g;
518 | 	switch (LOW_BIT_NUM){
519 | 
520 | 					case 2: two_power = TWO_POWER_2;
521 | 					break;
522 | 					case 4: two_power = TWO_POWER_4;
523 | 					break;
524 | 					case 8: two_power = TWO_POWER_8;
525 | 					break;
526 | 					case 16: two_power = TWO_POWER_16;
527 | 					break;
528 | 					case 32: two_power = TWO_POWER_32;
529 | 					break;
530 | 
531 | 	}
532 | 	half_delta_g = 0.5/two_power;
533 | //	cout<<"Filter size: "<<filter_size_per_depth<<endl;
534 | //	printf("\n \n Updating filter\n");
535 | 	//update_synapse_counter<<<dimGrid, dimBlock>>>(NeuronList, network_size, syn_timer_max, connection_size);
536 | 
537 | 	//update_filter_NOSTOCH_NOLOWBIT<<<dimGrid, dimBlock>>>(NeuronList, CNN_settings, filter, current_layer, network_size, connection_size, two_power, half_delta_g, random_number, neuron_number_per_layer, StochSTDP_param_1, StochSTDP_param_2);
538 | 
539 | 	log_all_fired<<<dimBlock, dimGrid>>>(NeuronList, Input_neuronlist, CNN_settings, filter, current_layer, start_index, connection_size, random_number, random_number_normal_device, \
540 | 			network_size, input_neuron_size, filter_size_per_depth, two_power, half_delta_g, neuron_number_per_layer, StochSTDP_param_1, StochSTDP_param_2, log_total_spike);
541 | }
542 | 


--------------------------------------------------------------------------------
/write_neuron_list.cu:
--------------------------------------------------------------------------------
 1 | #include "header.h"
 2 | #include <iostream>
 3 | #include <string>
 4 | #include <fstream>
 5 | #include<stdlib.h>
 6 | 
 7 | using namespace std;
 8 | 
 9 | int write_neuron_list(Neuron *NeuronList, string file_name, int network_size){
10 | 	cout<<"Writing network to file"<<endl;
11 | 	ofstream file_write;
12 | 	file_write.open(file_name);
13 | 	//printf("==0==\n");
14 | 
15 | 
16 | 	for(int i=0;i<network_size;i++){
17 | 		//cout<<i<<" ";
18 | 		int para_num = 8;
19 | 		int state_num = 8;
20 | 		string each_neuron;
21 | 
22 | //		switch (NeuronList[i].type){
23 | //			case 0: //IZH
24 | //			para_num = 5;
25 | //			state_num = 3;
26 | //			break;
27 | //			case 1://Stoch
28 | //			para_num = 1;
29 | //			state_num = 1;
30 | //			break;
31 | //			case 2://LIF
32 | //			para_num = 6;
33 | //			state_num = 3;
34 | //			break;
35 | //			case 3://HH
36 | //			para_num = 7;
37 | //			state_num = 4;
38 | //			break;
39 | //			case 4://signal
40 | //			para_num = 1;
41 | //			state_num = 3;
42 | //			break;
43 | //		}
44 | 
45 | 		each_neuron = each_neuron + to_string(NeuronList[i].index) + ' ' + to_string(NeuronList[i].type) + ' ';
46 | 		//printf("-%d", NeuronList[i].type);
47 | 		for(int param_i=0;param_i<para_num;param_i++){
48 | 			each_neuron = each_neuron + to_string(NeuronList[i].param[param_i]) + ' ';
49 | 		}
50 | 
51 | 		for(int state_i=0;state_i<state_num;state_i++){
52 | 			each_neuron = each_neuron + to_string(NeuronList[i].state[state_i]) + ' ';
53 | 		}
54 | 		each_neuron = each_neuron + ';' + ' ';
55 | 
56 | 		int connected_in_i = 0;
57 | 		//cout << to_string(NeuronList[i].connected_in[connected_in_i])<<"\n";
58 | 		while(NeuronList[i].connected_in[connected_in_i] > 0.1){
59 | 			//cout << to_string(NeuronList[i].connected_in[connected_in_i]);
60 | 			each_neuron = each_neuron + to_string(NeuronList[i].connected_in[connected_in_i]) + ' ' + to_string(NeuronList[i].connected_weight[connected_in_i]) + ' ';
61 | 			connected_in_i ++;
62 | 		}
63 | 		//cout<<each_neuron;
64 | 		each_neuron = each_neuron + '|' + ' ';
65 | 		connected_in_i = 0;
66 | 		while(NeuronList[i].local_inhibition[connected_in_i] > 1){
67 | 			//cout << to_string(NeuronList[i].connected_in[connected_in_i]);
68 | 			each_neuron = each_neuron + to_string(NeuronList[i].local_inhibition[connected_in_i]) + ' ';
69 | 			connected_in_i ++;
70 | 		}
71 | 		//cout<<each_neuron;
72 | 		each_neuron = each_neuron + '.' + ' ';
73 | 		file_write << each_neuron << '\n';
74 | 
75 | 	}
76 | 
77 | 	file_write.close();
78 | 	cout<<"File write complete\n";
79 | 	return 0;
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------