├── CNN_util.cu ├── LICENSE ├── MNIST_drive.cu ├── MNIST_labeling.cu ├── MNIST_labeling_2.cu ├── README.md ├── ROI_drive.cu ├── cifar10_reader.hpp ├── convolution_kernel.cu ├── data_check.cu ├── data_reader.cu ├── event_based_learning.cu ├── filter_util.cu ├── header.h ├── img_util.cu ├── inference_options.cu ├── learning_options.cu ├── main.cu ├── network_config_generator.cpp ├── read_neuron_list.cu ├── spiking_cnn_main.cu ├── spiking_learning_drive.cu ├── spiking_learning_main.cu ├── synapse_drive_cnn_v2.cu └── write_neuron_list.cu /CNN_util.cu: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | 3 | __global__ void read_array(float **d_instance_matrix_array, float **d_convolution_result_array){ 4 | 5 | printf("Reading array on GPU: "); 6 | printf("%f \n", d_instance_matrix_array[1][1]); 7 | printf("%f \n", d_convolution_result_array[0][0]); 8 | } 9 | 10 | int CNN_util(CNN_struct *settings, float **d_instance_matrix_array, float **d_convolution_result_array, float **h_instance_matrix_array, float **h_convolution_result_array, int function_select){ 11 | 12 | 13 | if (function_select==0){//initialize arrays 14 | //first initialize instance_matrix, this is the input to convolution kernel 15 | int instance_array_size = CNN_total_layer_num; 16 | 17 | for (int i=0;ilayer[i].neuron_num; 19 | float *temp = new float[matrix_size]; 20 | for(int j=0;jlayer[i+1].neuron_num; 33 | float *temp = new float[matrix_size]; 34 | for(int j=0;j>>(d_instance_matrix_array, d_convolution_result_array); 47 | 48 | 49 | return 0; 50 | } 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Cognitive Anteater Robotics Laboratory @ University of Calfornia, Irvine 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /MNIST_drive.cu: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | #define MNIST_img_width 28 14 | #define MNIST_img_len 28 15 | 16 | __global__ void sc2_update (Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int one_depth_size, int start, int end, int target, bool shifting, bool get_only_one){ 17 | int blockId = blockIdx.x + blockIdx.y * gridDim.x; 18 | int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; 19 | if(index>=end||index=end||index=end||index0) Input_neuronlist[index].state[1] += 2; 83 | } 84 | 85 | // printf("No.: %d, spike interval is: %f, RAW_freq is: %f \n",index, Input_neuronlist[index].state[1], MNIST_stimulus_freq[signal_index]); 86 | } 87 | } 88 | 89 | __global__ void reset_stimulus (Input_neuron *Input_neuronlist, int network_size, int start, int end){ 90 | 91 | int blockId = blockIdx.x + blockIdx.y * gridDim.x; 92 | int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; 93 | if(index>=end||index=end||index=end||indexMNIST_stimulus_freq[start_index+i]) min=MNIST_stimulus_freq[start_index+i]; 149 | } 150 | for (i=0; i=input_image_channel) channel_flag = 0; 179 | total_sum[channel_flag] += image[tot_i]; 180 | } 181 | double denominator = (training_set_number*pixel_number/input_image_channel); 182 | printf("====Normalization Per Channel Used======: "); 183 | 184 | for(int i=0; i=input_image_channel) channel_flag = 0; 203 | sum[channel_flag] += image[start_index+i]; 204 | } 205 | for(int channel_i=0; channel_i=input_image_channel) channel_flag = 0; 212 | MNIST_stimulus_freq[start_index+i]=(image[start_index+i]*(total_mean[channel_flag]/mean[channel_flag])); 213 | MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency; 214 | //channel_flag ++; 215 | 216 | } 217 | // for(i=0;i10) printf(" %f|", MNIST_stimulus_freq[start_index+i]); 220 | // } 221 | //printf("\n \n"); 222 | } 223 | } 224 | 225 | void image_to_in_phase_proptioanl_normalized_0_1(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){ 226 | float total_sum = 0.0, total_mean; 227 | float global_min = 1000000; 228 | float global_max = -1000000; 229 | for(int tot_i=0;tot_iglobal_max) global_max = image[tot_i]; 231 | if(image[tot_i]2) image[i]=2; 260 | if(image[i]<-2) image[i]=-2; 261 | 262 | MNIST_stimulus_freq[i] = (max_frequency-min_frequency)/2+((max_frequency-min_frequency)/4)*image[i]; 263 | // printf(" %f|", MNIST_stimulus_freq[i]); 264 | } 265 | 266 | // for (int y=0; y<28; ++y) { 267 | // for (int x=0; x<28; ++x) { 268 | // //std::cout << ((one_mnist_img[y*28+x] == 0.0)? ' ' : '*'); 269 | // std::cout << std::to_string((MNIST_stimulus_freq[y*28+x])) << ' '; 270 | // } 271 | // std::cout << std::endl; 272 | // } 273 | // cout<<"#############inphasepro#############"<=input_image_channel) channel_flag = 0; 293 | MNIST_stimulus_freq[start_index+i]=(image[start_index+i]-mean[channel_flag])/std[channel_flag]; 294 | MNIST_stimulus_freq[start_index+i] = (max_frequency-min_frequency)*MNIST_stimulus_freq[start_index+i]+min_frequency; 295 | //channel_flag ++; 296 | } 297 | // for(i=0;i10) printf(" %f|", MNIST_stimulus_freq[start_index+i]); 300 | // } 301 | //printf("\n \n"); 302 | } 303 | 304 | } 305 | 306 | void image_to_in_phase_proptioanl(float *MNIST_stimulus_freq, float *image, int training_set_number, float max_frequency, float min_frequency, int pixel_number){ 307 | printf("Not using input normalization___!!"); 308 | for(int i=0;i>>(Input_neuronlist, MNIST_stimulus_freq_device, network_size, start, end); 350 | cudaDeviceSynchronize(); 351 | //cudaFree(signal_device); 352 | cudaFree(MNIST_stimulus_freq_device); 353 | } 354 | 355 | if(function_select == 1){//change raw img data to frequency_signal 356 | int pixel_number = input_image_w*input_image_l*input_image_channel; 357 | 358 | float *old_img = MNIST_stimulus_freq; 359 | //image_to_in_phase_proptioanl_normalized_cifar(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number); 360 | image_to_in_phase_proptioanl(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number); 361 | //image_to_in_phase_proptioanl_normalized(MNIST_stimulus_freq, old_img, training_set_number, max_frequency, min_frequency, pixel_number); 362 | } 363 | 364 | if(function_select == 2){//reset all input frequency 365 | int SIZE_PER_SIDE = sqrt(network_size)+1; 366 | dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock ); 367 | dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1)); 368 | int signal_size = input_image_w*input_image_l*input_image_channel; 369 | reset_stimulus <<>>(Input_neuronlist, network_size, start, end); 370 | cudaDeviceSynchronize(); 371 | } 372 | 373 | } 374 | 375 | 376 | 377 | 378 | void MNIST_drive(Neuron *NeuronList, Input_neuron *Input_neuronlist, float *MNIST_stimulus_freq, int network_size, int training_set_number, int start, int end, float max_frequency, float min_frequency, int function_select, int target){ 379 | if(function_select == 1){//for sc2 sequence 380 | int SIZE_PER_SIDE = sqrt(network_size)+1; 381 | dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock ); 382 | dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1)); 383 | // cout<<"current target: "<>>(Input_neuronlist, MNIST_stimulus_freq, one_depth_size, start, end, target, True, False); 386 | sc2_update<<>>(Input_neuronlist, MNIST_stimulus_freq, one_depth_size, start, end, target, False, False); 387 | cudaDeviceSynchronize(); 388 | } 389 | 390 | 391 | if(function_select == 2){//reset all input frequency 392 | //printf("resetting all input neurons\n"); 393 | int SIZE_PER_SIDE = sqrt(network_size)+1; 394 | dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock ); 395 | dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1)); 396 | int signal_size = input_image_w*input_image_l*input_image_channel; 397 | // cout<<"start: "<>>(Input_neuronlist, network_size, start, end); 399 | cudaDeviceSynchronize(); 400 | } 401 | 402 | if(function_select == 3){//for time sequence, update one frequency 403 | int SIZE_PER_SIDE = sqrt(network_size)+1; 404 | dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock ); 405 | dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1)); 406 | int signal_size = input_image_w*input_image_l*input_image_channel; 407 | time_seq_update_v2<<>>(Input_neuronlist, network_size, start, end, target); 408 | cudaDeviceSynchronize(); 409 | } 410 | 411 | if(function_select == 4){//for debug: print out input neuron 412 | int input_neuron_num = input_image_w*input_image_l*input_image_channel; 413 | Input_neuron *Input_neuronlist_host = new Input_neuron[input_neuron_num]; 414 | cudaMemcpy(Input_neuronlist_host,Input_neuronlist,input_neuron_num*sizeof(Input_neuron),cudaMemcpyDeviceToHost); 415 | cout< 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | 15 | void find_max(){ 16 | 17 | } 18 | 19 | void read_by_type(float *mnist_img, int *mnist_label, float *output_array, int type, int total_num, int *result_size){ 20 | int count = 0; 21 | int total_pixel = 28*28; 22 | 23 | for(int i=0;i 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | #define MNIST_img_width 28 14 | #define MNIST_img_len 28 15 | 16 | __global__ void init_v (float* output_v, int output_neuron_size){ 17 | int blockId = blockIdx.x + blockIdx.y * gridDim.x; 18 | int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; 19 | if(index>output_neuron_size){ 20 | return; 21 | } 22 | output_v[index] = 0; 23 | } 24 | 25 | 26 | __global__ void calculate_v (Neuron *NeuronList, float *img_raw, float *output_v, int output_neuron_size){ 27 | //printf("gpu(update_synapse_counter)"); 28 | int blockId = blockIdx.x + blockIdx.y * gridDim.x; 29 | int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; 30 | if(index>output_neuron_size){ 31 | return; 32 | } 33 | int i = 0; 34 | while(NeuronList[index].connected_in[i] > 0.1){ 35 | output_v[index] = output_v[index] + NeuronList[index].connected_weight[i]*img_raw[i]*255; 36 | 37 | i++; 38 | } 39 | //printf("index:%d_added value is: %f\n", index, output_v[index]); 40 | 41 | 42 | } 43 | void MNIST_labeling_2(Neuron *NeuronList, float *img_raw, float *output_v, int output_neuron_size){ 44 | 45 | int SIZE_PER_SIDE = sqrt(output_neuron_size)+1; 46 | dim3 dimBlock( ThreadsPerBlock, ThreadsPerBlock ); 47 | dim3 dimGrid( (SIZE_PER_SIDE/dimBlock.x+1), (SIZE_PER_SIDE/dimBlock.y+1)); 48 | 49 | float *MNIST_stimulus_freq_device; 50 | 51 | int signal_size = MNIST_img_width*MNIST_img_len; 52 | 53 | cudaMalloc((void **)&MNIST_stimulus_freq_device, signal_size*sizeof(float)); 54 | cudaMemcpy(MNIST_stimulus_freq_device, img_raw, signal_size*sizeof(float), cudaMemcpyHostToDevice); 55 | 56 | int run_time = 2; 57 | init_v<<>>(output_v, output_neuron_size); 58 | 59 | for(int i=0;i>>(NeuronList, MNIST_stimulus_freq_device, output_v, output_neuron_size); 61 | } 62 | 63 | 64 | cudaDeviceSynchronize(); 65 | //cudaFree(signal_device); 66 | cudaFree(MNIST_stimulus_freq_device); 67 | 68 | 69 | } 70 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParallelSpikeSim (PSS) 2 | 3 | ParallelSpikeSim (PSS) is a GPU accelerated spiking neural network simulator. 4 | 5 | PSS is first introduced in [paper](https://ieeexplore.ieee.org/abstract/document/8714846) (Author: Xueyuan She, Yun Long and Saibal Mukhopadhyay). Since the introduction, some more functions are added to PSS, including: 6 | 7 | - Spiking convolutional neural network 8 | - Frequency-dependent STDP 9 | - Process in memory (ReRAM) hardware simulation 10 | - Heterogeneous Spiking Neural Network 11 | 12 | If you use PSS in your work, please cite this [paper](https://ieeexplore.ieee.org/abstract/document/8714846). 13 | 14 | ## Support of Heterogeneous Spiking Neural Network 15 | Heterogeneous Spiking Neural Network (H-SNN) as described in [paper](https://www.frontiersin.org/articles/10.3389/fnins.2020.615756/full?&utm_source=Email_to_authors_&utm_medium=Email&utm_content=T1_11.5e1_author&utm_campaign=Email_publication&field=&journalName=Frontiers_in_Neuroscience&id=615756) (Author: Xueyuan She, Saurabh Dash, Daehyun Kim and Saibal Mukhopadhyay), is now supported. To run a learning example, choose option 7 upon start up. For inference, use option 8. 16 | 17 | ## Compile 18 | Include options: boost_1_66_0, opencv4, CImg-2.9.2_pre072920 19 | 20 | library options (-l): opencv_highgui, opencv_imgcodecs, cudadevert, cublas, curand, boost_system, boost_filesystem, cudnn, opencv_imgproc, opencv_core 21 | 22 | The latest release was tested on Ubuntu 18 23 | 24 | ## Prerequisites 25 | - CUDA Toolkit 10.0 26 | - A GPU with compute capability 5.0 or higher -------------------------------------------------------------------------------- /ROI_drive.cu: -------------------------------------------------------------------------------- 1 | #include "header.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | //#include 9 | #include 10 | #include 11 | 12 | using namespace std; 13 | 14 | #define img_width 64 15 | #define img_len 64 16 | 17 | //This function is used to assign image signal values to input neuron 18 | 19 | __global__ void switch_off_input (Neuron *NeuronList, int network_size, int start, int end){ 20 | int blockId = blockIdx.x + blockIdx.y * gridDim.x; 21 | int index = blockId * (blockDim.x * blockDim.y) + (threadIdx.y * blockDim.x) + threadIdx.x; 22 | if(index>end||indexend||indexend||index>>(NeuronList, signal_device, network_size, start_index, end_index); 69 | cudaDeviceSynchronize(); 70 | //cudaFree(signal_device); 71 | } 72 | else if(function_select==1){//switch on 73 | switch_on_input<<>>(NeuronList, network_size, start_index, end_index); 74 | 75 | } 76 | else if(function_select==2){//switch off 77 | switch_off_input<<>>(NeuronList, network_size, start_index, end_index); 78 | } 79 | //delete[] image_signal; 80 | cudaFree(signal_device); 81 | } 82 | -------------------------------------------------------------------------------- /cifar10_reader.hpp: -------------------------------------------------------------------------------- 1 | //======================================================================= 2 | // Copyright (c) 2017 Baptiste Wicht 3 | // Distributed under the terms of the MIT License. 4 | // (See accompanying file LICENSE or copy at 5 | // http://opensource.org/licenses/MIT) 6 | //======================================================================= 7 | 8 | /*! 9 | * \file 10 | * \brief Contains functions to read the CIFAR-10 dataset 11 | */ 12 | 13 | #ifndef CIFAR10_READER_HPP 14 | #define CIFAR10_READER_HPP 15 | 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | 23 | namespace cifar { 24 | 25 | /*! 26 | * \brief Represents a complete CIFAR10 dataset 27 | * \tparam Container The container to use 28 | * \tparam Image The type of image 29 | * \tparam Label The type of label 30 | */ 31 | template