├── Deploying Image Super Resolution Deep Learning Models in FPGA.pdf ├── RDN_SR.cpp ├── README.md ├── calib.py ├── freeze_graph.py └── keras_2_tf.py /Deploying Image Super Resolution Deep Learning Models in FPGA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gkrislara/Image-super-resolution-FPGA/58cd87e98c4d60ea99dc6bc4c308415064b750d4/Deploying Image Super Resolution Deep Learning Models in FPGA.pdf -------------------------------------------------------------------------------- /RDN_SR.cpp: -------------------------------------------------------------------------------- 1 | /*This is an application created to test the functionality of 2 | Deep Learning Image Super Resolution Model - Residual Dense Network in XILINX ZYNQ ULTRASCALE+ ZCU102 MPSoC 3 | This code is considered opensource and accepts contributions and constructive criticism 4 | Developed in Healthcare Technology Innovation Centre - Indian Institute of Technology, Madras 5 | 6 | Further Upgradations 7 | Unified Memory model for all functions 8 | Portablilty to other applications eg :gstreamer 9 | */ 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | 36 | using namespace std; 37 | using namespace std::chrono; //time 38 | 39 | /* Specify Kernel name, input and output nodes as produced in compilation and 40 | load the kernel libraries to ZCU102 before running this application 41 | */ 42 | 43 | #define DPU_KERNEL1 "RDN_44x44_C6D20G64G064x2" 44 | #define DPU_KERNEL2 "RDN_84x84_C6D20G64G064x2" 45 | #define DPU_KERNEL3 "RDN_124x124_C6D20G64G064x2" 46 | 47 | #define INPUT_NODE "F_m1_convolution" //tf v1.12.0 48 | #define OUTPUT_NODE "SR_convolution" //tf v1.12.0 49 | 50 | // Profiling time taken for a function 51 | 52 | #define SHOWTIME 53 | #ifdef SHOWTIME 54 | #define _T(func) \ 55 | { \ 56 | auto _start = system_clock::now(); \ 57 | func; \ 58 | auto _end = system_clock::now(); \ 59 | auto duration = (duration_cast(_end - _start)).count(); \ 60 | string tmp = #func; \ 61 | tmp = tmp.substr(0, tmp.find('(')); \ 62 | cout << "[TimeTest]" << left << setw(30) << tmp; \ 63 | cout << left << setw(10) << duration << "us" << endl; \ 64 | } 65 | #else 66 | #define _T(func) func; 67 | #endif 68 | 69 | void ListImages(std::string const &path, std::vector &images) { 70 | images.clear(); 71 | struct dirent *entry; 72 | 73 | /*Check if path is a valid directory path. */ 74 | struct stat s; 75 | lstat(path.c_str(), &s); 76 | 77 | if (!S_ISDIR(s.st_mode)) { 78 | images.push_back(path); 79 | return; 80 | } 81 | 82 | DIR *dir = opendir(path.c_str()); 83 | if (dir == nullptr) { 84 | fprintf(stderr, "Error: Open %s path failed.\n", path.c_str()); 85 | exit(1); 86 | } 87 | 88 | while ((entry = readdir(dir)) != nullptr) { 89 | if (entry->d_type == DT_REG || entry->d_type == DT_UNKNOWN) { 90 | std::string name = entry->d_name; 91 | std::string ext = name.substr(name.find_last_of(".") + 1); 92 | if ((ext == "JPEG") || (ext == "jpeg") || (ext == "JPG") || (ext == "jpg") || 93 | (ext == "PNG") || (ext == "png")) { 94 | images.push_back(path + "/" + name); 95 | } 96 | } 97 | } 98 | 99 | closedir(dir); 100 | } 101 | 102 | // setimage to DPU buffer 103 | 104 | int dpuSetInputImage(DPUTask *task, const char* nodeName, const cv::Mat &image,int idx=0) 105 | { 106 | int value; 107 | int8_t *inputAddr; 108 | unsigned char *resized_data; 109 | cv::Mat newImage; 110 | float scaleFix; 111 | int height, width, channel; 112 | 113 | height = dpuGetInputTensorHeight(task, nodeName, idx); 114 | width = dpuGetInputTensorWidth(task, nodeName, idx); 115 | channel = dpuGetInputTensorChannel(task, nodeName, idx); 116 | 117 | 118 | if (height == image.rows && width == image.cols) { 119 | newImage = image; 120 | } 121 | else{ 122 | std::cout<<"Required image size "<(idx_h, idx_w)[idx_c]* scaleFix; 135 | } 136 | } 137 | } 138 | return scaleFix; 139 | } 140 | 141 | /*normalises input patch to 0-1, 142 | dpusetinputimage the data to buffer, 143 | DPU processes the data and puts back to global buffer, 144 | getoutputtensor gets the data from global buffer to application memory 145 | rescale the pixel values to 0-255 146 | */ 147 | cv::Mat runPatch(cv::Mat patch,DPUTask *taskConv) 148 | { 149 | int height=dpuGetOutputTensorHeight(taskConv,OUTPUT_NODE,0); 150 | int width=dpuGetOutputTensorWidth(taskConv,OUTPUT_NODE,0); 151 | int channel=dpuGetOutputTensorChannel(taskConv,OUTPUT_NODE,0); 152 | int total_size=height*width*channel; 153 | cv::Mat reimg= cv::Mat(height,width,CV_32FC3,cv::Scalar(0.0,0.0,0.0)); 154 | 155 | int8_t outdata[total_size]={0}; 156 | float scaleFix=0.0; 157 | 158 | cv::Mat image= patch; 159 | cv::cvtColor(image, image, cv::COLOR_BGR2RGB);//cvtcolor bgr-rgb 160 | cv::normalize(image,image,0,1,cv::NORM_MINMAX,CV_32F);///norm - must be present 32 -const 161 | scaleFix=dpuSetInputImage(taskConv,INPUT_NODE,image);//setimage--Flag issue 162 | 163 | dpuRunTask(taskConv); 164 | 165 | dpuGetOutputTensorInHWCInt8(taskConv,OUTPUT_NODE,outdata,total_size);//get output 166 | 167 | //tensor to cv::mat 168 | 169 | for (int idx_h=0; idx_h(idx_h, idx_w)[2-idx_c]=outdata[idx_h*width*channel+idx_w*channel+idx_c]; 173 | } 174 | } 175 | } 176 | cv::normalize(reimg,reimg,0,255,cv::NORM_MINMAX,-1); 177 | 178 | return reimg; 179 | 180 | } 181 | 182 | 183 | static int oheight; 184 | static int owidth; 185 | 186 | /*given an image matrix, this function splits the matrix into sub matrices of specified 2d size*/ 187 | 188 | std::vector patchify(cv::Mat img,int opatch_size,int padding_size=2) 189 | { 190 | int patch_size=opatch_size; 191 | std::vector cpatches; 192 | cv::Mat patch; 193 | cv::Mat image= img; 194 | int width=image.cols; 195 | int height= image.rows; 196 | int w_rem= width % patch_size; 197 | int h_rem = height % patch_size; 198 | int w_extend = patch_size-w_rem; 199 | int h_extend = patch_size-h_rem; 200 | cv::Mat ext_image; 201 | cv::copyMakeBorder(image,ext_image,0,h_extend,0,w_extend,cv::BORDER_REPLICATE); 202 | cv::copyMakeBorder(ext_image,ext_image,padding_size,padding_size,padding_size,padding_size,cv::BORDER_REPLICATE); 203 | 204 | oheight=ext_image.rows; 205 | owidth=ext_image.cols; 206 | int w_left,w_width,h_top,h_height; 207 | for(int i=padding_size;i unpad(std::vector patches,int pad){ 225 | cv::Mat patch; 226 | std::vector uppatches; 227 | for(auto it=patches.begin();it!=patches.end();++it) 228 | { 229 | patch=*it; 230 | uppatches.push_back(patch(cv::Rect(2*pad,2*pad,patch.cols-2*pad,patch.rows-2*pad))); 231 | } 232 | return uppatches; 233 | } 234 | 235 | /*stiches the matrices back to specified size*/ 236 | 237 | cv::Mat depatchify(std::vector cpatches, int op_width, int op_height,int padding_size=4) 238 | { 239 | cv::Mat image(2*oheight,2*owidth,CV_32FC3,cv::Scalar(0,0,0)); 240 | std::vector patches=unpad(cpatches,padding_size); 241 | int patch_size=patches[0].cols; 242 | int patches_per_col = 2*oheight/patch_size; 243 | 244 | int col=-1,row=0; 245 | for(int i=0;i patches,patchesx2; 264 | cv::Mat patchx2; 265 | 266 | cv::Mat image= cv::imread(imgpath); 267 | int imwidth=image.cols; 268 | int imheight=image.rows; 269 | int errsize; 270 | std::cout<<"\n\nPROCESS STARTED\nProcessing "< images; 326 | ListImages(ipath, images); 327 | for (std::string &filepath : images) 328 | { 329 | _T(runRDN_SR(filepath, taskConv,ipsize)); 330 | } 331 | dpuDestroyTask(taskConv); 332 | dpuDestroyKernel(kernelConv); 333 | 334 | } 335 | 336 | 337 | int main(int argc, char** argv) 338 | { 339 | if (argc != 2) { 340 | cout << "Usage of RDN: ./RDN_SR file_name" << endl; 341 | cout << "\tfile_name: path to your image file" << endl; 342 | return -1; 343 | } 344 | dpuOpen(); 345 | RDN_SR(DPU_KERNEL1,argv[1]); 346 | RDN_SR(DPU_KERNEL2,argv[1]); 347 | RDN_SR(DPU_KERNEL3,argv[1]); 348 | dpuClose(); 349 | 350 | return 0; 351 | } 352 | 353 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Edge Inference of Image Super Resolution Deep Learning Models 2 | Deployment of Deep learning Image Super-Resolution Models in Xilinx Zynq MPSoC ZCU102 3 | 4 | ## Instructions 5 | 6 | Follow the .pdf file in the repository for complete information 7 | 8 | ## Prerequisites 9 | - Knowledge on FPGA and Integrating IPs in Vivado 10 | - Knowledge on Computer Architecture and Embedded Systems 11 | - Linux Operating system - Petalinux/Yocto 12 | - Basics of Deep learning 13 | - Programming Languages: Cpp and Python 14 | - Frameworks/libraries: Opencv,Numpy,Tensorflow 15 | 16 | ## Tools 17 | - Vitis v2019.2 18 | - Vitis AI SDK v1.0 19 | - Vivado v2019.2 20 | - Petalinux v2019.2 21 | 22 | ## Credits 23 | - [Francesco Cardinale for Image Super Resolution Models ](https://github.com/idealo/image-super-resolution) 24 | - [Morgan giraud for freeze_graph.py](https://github.com/morgangiraud) 25 | - [fengwang for subpixel layer](https://github.com/fengwang/subpixel_conv2d) 26 | 27 | ## Citation 28 | This is my Under Graduate Project done at Healthcare Technology Innovation Centre IIT Madras. Please Cite if it helps your work 29 | ``` 30 | @misc{gokulakrishnan2020ISRFPGA, 31 | title={Deploying Deep learning Image Super-Resolution Models in Xilinx Zynq MPSoC ZCU102}, 32 | author={Gokula Krishnan Ravi}, 33 | year={2020}, 34 | howpublished={\url{https://github.com/gkrislara/Image-super-resolution-FPGA}}, 35 | } 36 | ``` 37 | -------------------------------------------------------------------------------- /calib.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import os 3 | import numpy as np 4 | import glob 5 | import random 6 | 7 | 8 | calib_image_dir = "path/to/image/directory" 9 | 10 | calib_batch_size = 50 11 | 12 | image_files = [f for f in glob.glob(calib_image_dir+'*.png')] 13 | 14 | 15 | random_index=[] 16 | for i in range(100): 17 | random_index.append(random.randrange(0,len(image_files))) 18 | random_index=np.asarray(random_index) 19 | 20 | def calib_input(iter): 21 | images=[] 22 | for index in range(0, calib_batch_size): 23 | curimg = random_index[iter * calib_batch_size + index] 24 | filename = image_files[curimg] 25 | im = Image.open(filename) 26 | image = np.asarray(im) 27 | image=image/255.0 28 | images.append(image.tolist()) 29 | return {"LR": images} 30 | -------------------------------------------------------------------------------- /freeze_graph.py: -------------------------------------------------------------------------------- 1 | import os, argparse 2 | 3 | import tensorflow as tf 4 | 5 | # The original freeze_graph function 6 | # from tensorflow.python.tools.freeze_graph import freeze_graph 7 | 8 | dir = os.path.dirname(os.path.realpath(__file__)) 9 | 10 | def freeze_graph(model_dir, output_node_names): 11 | """Extract the sub graph defined by the output nodes and convert 12 | all its variables into constant 13 | Args: 14 | model_dir: the root folder containing the checkpoint state file 15 | output_node_names: a string, containing all the output node's names, 16 | comma separated 17 | """ 18 | if not tf.gfile.Exists(model_dir): 19 | raise AssertionError( 20 | "Export directory doesn't exists. Please specify an export " 21 | "directory: %s" % model_dir) 22 | 23 | if not output_node_names: 24 | print("You need to supply the name of a node to --output_node_names.") 25 | return -1 26 | 27 | # We retrieve our checkpoint fullpath 28 | checkpoint = tf.train.get_checkpoint_state(model_dir) 29 | input_checkpoint = checkpoint.model_checkpoint_path 30 | 31 | # We precise the file fullname of our freezed graph 32 | absolute_model_dir = "/".join(input_checkpoint.split('/')[:-1]) 33 | output_graph = absolute_model_dir + "/frozen_model.pb" 34 | 35 | # We clear devices to allow TensorFlow to control on which device it will load operations 36 | clear_devices = True 37 | 38 | # We start a session using a temporary fresh Graph 39 | with tf.Session(graph=tf.Graph()) as sess: 40 | # We import the meta graph in the current default Graph 41 | saver = tf.train.import_meta_graph(input_checkpoint + '.meta', clear_devices=clear_devices) 42 | 43 | # We restore the weights 44 | saver.restore(sess, input_checkpoint) 45 | 46 | # We use a built-in TF helper to export variables to constants 47 | output_graph_def = tf.graph_util.convert_variables_to_constants( 48 | sess, # The session is used to retrieve the weights 49 | tf.get_default_graph().as_graph_def(), # The graph_def is used to retrieve the nodes 50 | output_node_names.split(",") # The output node names are used to select the usefull nodes 51 | ) 52 | 53 | # Finally we serialize and dump the output graph to the filesystem 54 | with tf.gfile.GFile(output_graph, "wb") as f: 55 | f.write(output_graph_def.SerializeToString()) 56 | print("%d ops in the final graph." % len(output_graph_def.node)) 57 | 58 | return output_graph_def 59 | 60 | if __name__ == '__main__': 61 | parser = argparse.ArgumentParser() 62 | parser.add_argument("--model_dir", type=str, default="", help="Model folder to export") 63 | parser.add_argument("--output_node_names", type=str, default="", help="The name of the output nodes, comma separated.") 64 | args = parser.parse_args() 65 | 66 | freeze_graph(args.model_dir, args.output_node_names) -------------------------------------------------------------------------------- /keras_2_tf.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import sys 4 | 5 | import keras 6 | from keras import backend 7 | from keras.models import model_from_json, load_model 8 | import tensorflow as tf 9 | 10 | # Silence TensorFlow messages 11 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' 12 | 13 | 14 | def keras2tf(keras_json,keras_hdf5,tfckpt,tf_graph): 15 | 16 | # set learning phase for no training 17 | backend.set_learning_phase(0) 18 | 19 | if (keras_json != ''): 20 | json_file = open(keras_json, 'r') 21 | loaded_model_json = json_file.read() 22 | json_file.close() 23 | loaded_model = model_from_json(loaded_model_json) 24 | loaded_model.load_weights(keras_hdf5) 25 | 26 | else: 27 | loaded_model = load_model(keras_hdf5) 28 | 29 | 30 | print ('Keras model information:') 31 | print (' Input names :',loaded_model.inputs) 32 | print (' Output names:',loaded_model.outputs) 33 | print('-------------------------------------') 34 | 35 | # set up tensorflow saver object 36 | saver = tf.train.Saver() 37 | 38 | # fetch the tensorflow session using the Keras backend 39 | tf_session = backend.get_session() 40 | 41 | # get the tensorflow session graph 42 | input_graph_def = tf_session.graph.as_graph_def() 43 | 44 | 45 | # get the TensorFlow graph path, flilename and file extension 46 | tfgraph_path = os.path.dirname(tf_graph) 47 | tfgraph_filename = os.path.basename(tf_graph) 48 | _, ext = os.path.splitext(tfgraph_filename) 49 | 50 | if ext == '.pbtxt': 51 | asText = True 52 | else: 53 | asText = False 54 | 55 | # write out tensorflow checkpoint & inference graph for use with freeze_graph script 56 | save_path = saver.save(tf_session, tfckpt) 57 | tf.train.write_graph(input_graph_def, tfgraph_path, tfgraph_filename, as_text=asText) 58 | 59 | print ('TensorFlow information:') 60 | print (' Checkpoint saved as:',tfckpt) 61 | print (' Graph saved as :',os.path.join(tfgraph_path,tfgraph_filename)) 62 | print('-------------------------------------') 63 | 64 | return 65 | 66 | 67 | def main(): 68 | 69 | ap = argparse.ArgumentParser() 70 | 71 | ap.add_argument('-kj', '--keras_json', 72 | type=str, 73 | default='', 74 | help='path of Keras JSON. Default is empty string to indicate no JSON file') 75 | ap.add_argument('-kh', '--keras_hdf5', 76 | type=str, 77 | default='./model.hdf5', 78 | help='path of Keras HDF5. Default is ./model.hdf5') 79 | ap.add_argument('-tfc', '--tfckpt', 80 | type=str, 81 | default='./tfchkpt.ckpt', 82 | help='path of TensorFlow checkpoint. Default is ./tfchkpt.ckpt') 83 | ap.add_argument('-tfg', '--tf_graph', 84 | type=str, 85 | default='./tf_graph.pb', 86 | help='path of TensorFlow graph. Default is ./tf_graph.pb') 87 | args = ap.parse_args() 88 | 89 | 90 | print('\n------------------------------------') 91 | print('Keras version :',keras.__version__) 92 | print('TensorFlow version :',tf.__version__) 93 | print('Python version :',(sys.version)) 94 | print('-------------------------------------') 95 | print('keras_2_tf command line arguments:') 96 | print(' --keras_json:', args.keras_json) 97 | print(' --keras_hdf5:', args.keras_hdf5) 98 | print(' --tfckpt :', args.tfckpt) 99 | print(' --tf_graph :', args.tf_graph) 100 | print('-------------------------------------') 101 | 102 | keras2tf(args.keras_json,args.keras_hdf5,args.tfckpt,args.tf_graph) 103 | 104 | 105 | 106 | if __name__ == '__main__': 107 | main() 108 | 109 | 110 | --------------------------------------------------------------------------------