├── pics ├── pictures.txt ├── v4_.png ├── v4_a.png ├── v4 mk2.png ├── 4_24000val.gif ├── runet_v4_a.png ├── 4_24000val_gt.gif ├── runet_v2.0_std.png └── runet_v2.5_std.png ├── runet_v1.jpg ├── config.json ├── parse_argument.py ├── README.md ├── models ├── conv_lstm.py ├── R_Unet_ver_2_7.py ├── R_Unet_ver_2_5.py ├── R_Unet_v1.py ├── R_Unet_ver_2.py ├── R_Unet_ver_4.py ├── R_Unet_ver_3.py ├── R_Unet_ver_M2.py └── R_Unet_ver_M.py ├── train.py ├── utils.py └── train_v2.py /pics/pictures.txt: -------------------------------------------------------------------------------- 1 | here saves uploaded pictures and gifs. 2 | -------------------------------------------------------------------------------- /pics/v4_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/v4_.png -------------------------------------------------------------------------------- /pics/v4_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/v4_a.png -------------------------------------------------------------------------------- /runet_v1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/runet_v1.jpg -------------------------------------------------------------------------------- /pics/v4 mk2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/v4 mk2.png -------------------------------------------------------------------------------- /pics/4_24000val.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/4_24000val.gif -------------------------------------------------------------------------------- /pics/runet_v4_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/runet_v4_a.png -------------------------------------------------------------------------------- /pics/4_24000val_gt.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/4_24000val_gt.gif -------------------------------------------------------------------------------- /pics/runet_v2.0_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/runet_v2.0_std.png -------------------------------------------------------------------------------- /pics/runet_v2.5_std.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Michael-MuChienHsu/R_Unet/HEAD/pics/runet_v2.5_std.png -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "videopath" : "video path", 3 | "output_path": "output path", 4 | "version" :"version to specify", 5 | "epoch": "500", 6 | "step" : "5", 7 | "input_frame": "5", 8 | "predict_frame": "5", 9 | "skip_frame": "2", 10 | "learning_rate": "float number", 11 | "normalize": "False", 12 | "size_idx":"resize to k by k square pic", 13 | "gray_scale": "True", 14 | "loss_function": "l1", 15 | "load" : "True", 16 | "gpu":"0", 17 | "load_model_name":"model_path" 18 | } 19 | -------------------------------------------------------------------------------- /parse_argument.py: -------------------------------------------------------------------------------- 1 | import json 2 | import argparse 3 | 4 | class argrements(): 5 | def __init__(self): 6 | self.videopath = '' 7 | self.step = '' 8 | self.epoch_num = '' 9 | self.output_path = '' 10 | self.gray_scale = '' 11 | self.sz_idx = '' 12 | self.lr = '' 13 | self.version = '' 14 | self.loss_func = '' 15 | self.predict_frame = '' 16 | self.parseJSON() 17 | 18 | def parseJSON(self): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('JSON', type= str, help='display an integer') 21 | args = parser.parse_args() 22 | 23 | with open(args.JSON+".json") as json_file: 24 | try: 25 | config = json.load(json_file) 26 | self.videopath = config["videopath"] 27 | self.output_path = config["output_path"] 28 | self.step = config["step"] 29 | self.sz_idx = config["size_idx"] 30 | self.loss_func = config["loss_function"] 31 | self.epoch_num = config["epoch"] 32 | self.version = config["version"] 33 | self.lr = config["learning_rate"] 34 | self.gray_scale = config["gray_scale"] 35 | self.predict_frame = config["predict_frame"] 36 | if self.gray_scale == 'True': 37 | self.gray_scale = True 38 | else: 39 | self.gray_scale = False 40 | except Exception as e: 41 | print(str(e)) 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # R_Unet 2 | This project applies recurrent method upon U-net to perform pixel level video frame prediction.
3 | Part of our result is published at [IEEE GCCE 2020](https://ieeexplore.ieee.org/document/9292008). [pdf](https://www.ams.giti.waseda.ac.jp/data/pdf-files/2020_GCCE_hsu.pdf).
4 | 5 | # Brief introduction 6 | Taking advantage of LSTM and U-net encode-decoder, we wish to be able in predicting next (n) frame(s).
7 | Currently using a 2 layer LSTM network (V1) or convolution LSTM (V2) as RNN network applying on latent feature of U net
8 | In our latest v4 model, we use convolutional LSTM in each level and take short cut in v2 out
9 | 10 | On the other hand, we are now using v4_mask model to train mask, image input and mask, image prediction output
11 | This model holds same structure as v4 but simply change output layer to output mask tensor.
12 | 13 | # Usage 14 | * configuration: config.json
15 | * parse configuration: class parse_arguement.py
16 | * training file: train.py
17 | * V1 model: R_Unet_v1.py
18 | * V2 model: R_Unet_ver_2.py
19 | * V4 model: R_Unet_ver_4.py
20 | ``` 21 | to train v1 model: python3 train.py config 22 | to train other model: python3 train_v2.py config 23 | ``` 24 | 25 | # Our Model Architecture 26 | Current we are working on a better model using convolution lstm, name as runet_v2
27 | * model v1:
28 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/runet_v1.jpg)

29 | 30 | * model v2:
31 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/pics/runet_v2.0_std.png)

32 | 33 | * model v4:
34 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/pics/v4_.png) 35 |
36 |
37 | 38 | 39 | # Some result 40 | prediction: 41 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/pics/4_24000val.gif) 42 | Ground truth: 43 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/pics/4_24000val_gt.gif) 44 |
45 | mask prediction 46 |
47 | prediction: 48 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/pics/mask%206000%20gt.gif) 49 | Ground truth: 50 | ![alt_text](https://github.com/vagr8/R_Unet/blob/master/pics/mask%206000.gif) 51 |
52 | 53 | 54 | # References 55 | [1] Stochastic Adversarial Video Prediction, CVPR 2018
56 | [2] High Fidelity Video Prediction with 57 | Large Stochastic Recurrent Neural Networks, NeurIPS 2019
58 | [3] [convLSTM](https://github.com/automan000/Convolutional_LSTM_PyTorch) - The convolution lstm framework used

59 | Hsu Mu Chien, Watanabe Lab, Department of Fundamental Science and Engineering, Waseda University. 60 | -------------------------------------------------------------------------------- /models/conv_lstm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | 7 | 8 | class ConvLSTMCell(nn.Module): 9 | def __init__(self, input_channels, hidden_channels, kernel_size, gpu_num): 10 | super(ConvLSTMCell, self).__init__() 11 | 12 | assert hidden_channels % 2 == 0 13 | use_cuda = torch.cuda.is_available() 14 | 15 | #gpu_num = 1 16 | 17 | torch.cuda.set_device(gpu_num) 18 | os.environ["CUDA_VISIBLE_DEVICE"] = str(gpu_num) 19 | self.device = torch.device('cuda:'+str(gpu_num) if use_cuda else 'cpu') 20 | #self.device = torch.device("cuda" if use_cuda else "cpu") 21 | 22 | self.input_channels = input_channels 23 | self.hidden_channels = hidden_channels 24 | self.kernel_size = kernel_size 25 | self.num_features = 4 26 | 27 | self.padding = int((kernel_size - 1) / 2) 28 | 29 | self.Wxi = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True) 30 | self.Whi = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False) 31 | self.Wxf = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True) 32 | self.Whf = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False) 33 | self.Wxc = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True) 34 | self.Whc = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False) 35 | self.Wxo = nn.Conv2d(self.input_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=True) 36 | self.Who = nn.Conv2d(self.hidden_channels, self.hidden_channels, self.kernel_size, 1, self.padding, bias=False) 37 | 38 | self.Wci = None 39 | self.Wcf = None 40 | self.Wco = None 41 | 42 | def forward(self, x, h, c): 43 | ci = torch.sigmoid(self.Wxi(x) + self.Whi(h) + c * self.Wci) 44 | cf = torch.sigmoid(self.Wxf(x) + self.Whf(h) + c * self.Wcf) 45 | cc = cf * c + ci * torch.tanh(self.Wxc(x) + self.Whc(h)) 46 | co = torch.sigmoid(self.Wxo(x) + self.Who(h) + cc * self.Wco) 47 | ch = co * torch.tanh(cc) 48 | return ch, cc 49 | 50 | ## initialize h and c internal state only, keep weight mask the same 51 | def init_hidden(self, batch_size, hidden, shape): 52 | if self.Wci is None: 53 | self.Wci = Variable(torch.zeros(1, hidden, shape[0], shape[1])).to(self.device) 54 | self.Wcf = Variable(torch.zeros(1, hidden, shape[0], shape[1])).to(self.device) 55 | self.Wco = Variable(torch.zeros(1, hidden, shape[0], shape[1])).to(self.device) 56 | else: 57 | assert shape[0] == self.Wci.size()[2], 'Input Height Mismatched!' 58 | assert shape[1] == self.Wci.size()[3], 'Input Width Mismatched!' 59 | return (Variable(torch.zeros(batch_size, hidden, shape[0], shape[1])).to(self.device), 60 | Variable(torch.zeros(batch_size, hidden, shape[0], shape[1])).to(self.device)) 61 | 62 | 63 | class ConvLSTM(nn.Module): 64 | # input_channels corresponds to the first input feature map 65 | # hidden state is a list of succeeding lstm layers. 66 | def __init__(self, input_channels, hidden_channels, kernel_size, step = 1, effective_step=[1], gpu_num=0): 67 | super(ConvLSTM, self).__init__() 68 | self.input_channels = [input_channels] + hidden_channels 69 | self.hidden_channels = hidden_channels 70 | self.kernel_size = kernel_size 71 | self.num_layers = len(hidden_channels) 72 | self.step = step 73 | self.effective_step = effective_step 74 | self._all_layers = [] 75 | 76 | #self.internal_state = [] 77 | 78 | #gpu_num = 1 79 | torch.cuda.set_device(gpu_num) 80 | os.environ["CUDA_VISIBLE_DEVICE"] = str(gpu_num) 81 | self.device = torch.device('cuda:'+str(gpu_num) if True else 'cpu') 82 | 83 | 84 | for i in range(self.num_layers): 85 | name = 'cell{}'.format(i) 86 | cell = ConvLSTMCell(self.input_channels[i], self.hidden_channels[i], self.kernel_size, gpu_num).to(self.device) 87 | setattr(self, name, cell) 88 | self._all_layers.append(cell) 89 | 90 | def forward(self, input, init_token = False): 91 | 92 | outputs = [] 93 | if init_token == True: 94 | self.internal_state = [] 95 | 96 | for step in range(1): 97 | x = input 98 | 99 | for i in range(self.num_layers): 100 | name = 'cell{}'.format(i) 101 | 102 | # at first step of each video, pass init_token = True in, and initiallize internal states 103 | if step == 0 and init_token == True : 104 | bsize, _, height, width = x.size() 105 | (h, c) = getattr(self, name).init_hidden(batch_size=bsize, hidden=self.hidden_channels[i], 106 | shape=(height, width)) 107 | self.internal_state.append((h, c)) 108 | 109 | # do forward 110 | (h, c) = self.internal_state[i] 111 | x, new_c = getattr(self, name)(x, h, c) 112 | self.internal_state[i] = (x, new_c) 113 | 114 | # only record last output 115 | outputs.append(x) 116 | 117 | return outputs 118 | 119 | 120 | if __name__ == '__main__': 121 | # gradient check 122 | convlstm = ConvLSTM(input_channels=1024, hidden_channels=[1024, 512, 512], kernel_size=3, step=3, 123 | effective_step=[2]) 124 | loss_fn = torch.nn.MSELoss() 125 | 126 | input = Variable(torch.randn(1, 1024, 16, 16)) 127 | target = Variable(torch.randn(1, 512, 16, 16)).double() 128 | 129 | output = convlstm(input) 130 | output = output[0][0].double() 131 | print( np.asarray( output[0].detach(), dtype=float ).shape ) 132 | res = torch.autograd.gradcheck(loss_fn, (output, target), eps=1e-6, raise_exception=True) 133 | print(res) 134 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.autograd import Variable 5 | import R_Unet as net 6 | import numpy as np 7 | import parse_argument 8 | from utils import * 9 | import os 10 | import csv 11 | import datetime 12 | import time 13 | import psutil 14 | import gc 15 | #from torchviz import make_dot, make_dot_from_trace 16 | # possible size_index: 2^n, n >= 4, n is int 17 | 18 | # set arguements 19 | args = parse_argument.argrements() 20 | video_path, learn_rate, step, gray_scale_bol = args.videopath, float(args.lr), int(args.step), bool(args.gray_scale) 21 | output_path = args.output_path 22 | epoch_num = int(args.epoch_num) 23 | size_idx = int(args.sz_idx) 24 | loss_function = str(args.loss_func) 25 | input_frmae = int( args.input_frame ) 26 | predict_frame_num = int(args.predict_frame) 27 | laod_model_name = args.load_model_name 28 | Load = args.load 29 | 30 | save_img = True 31 | 32 | assert ( input_frmae <= step ) 33 | assert (os.path.isdir( output_path )) # check output path exist 34 | 35 | # get lists of frame paths 36 | cwd = os.getcwd() 37 | os.chdir(cwd+video_path[1:]) 38 | dir_list = next(os.walk('.'))[1] 39 | video_dir_list = [] 40 | for i in dir_list: 41 | i = video_path + str(i) + '/' 42 | video_dir_list.append(i) 43 | os.chdir(cwd) 44 | 45 | ## ste gpu, set data, check gpu, define network, 46 | gpus = [0] 47 | start_date = str(datetime.datetime.now())[0:10] 48 | cuda_gpu = torch.cuda.is_available() 49 | 50 | ## if gpu exist, use cuda 51 | if( cuda_gpu ): 52 | network = torch.nn.DataParallel(net.unet(Gary_Scale = gray_scale_bol, size_index=size_idx), device_ids=gpus).cuda() 53 | else: 54 | network = net.unet(Gary_Scale = gray_scale_bol, size_index=size_idx) 55 | 56 | ## GC memory 57 | gc.enable() 58 | 59 | # set training parameters 60 | optimizer = optim.Adam( network.parameters(), lr = learn_rate ) 61 | if loss_function != 'l1': 62 | critiria = nn.MSELoss() 63 | else: 64 | critiria = nn.SmoothL1Loss() 65 | 66 | loss_list = [] ## records loss through each step in training 67 | batch_size = len(video_dir_list) 68 | 69 | # load previous model 70 | if Load == True: 71 | network, optimizer, start_epoch = load_checkpoint( network, optimizer, laod_model_name ) 72 | else: 73 | start_epoch = 0 74 | 75 | # print training info 76 | pytorch_total_params = sum(p.numel() for p in network.parameters()) 77 | print("==========================") 78 | print("number of parameters:", pytorch_total_params) 79 | print("leaening rate:", learn_rate) 80 | print("frame size:", size_idx, 'x', size_idx) 81 | print("input", step, "frames") 82 | print("predict", predict_frame_num, "frames") 83 | print("number of epochs", (start_epoch + epoch_num) ) 84 | print ("output path", output_path) 85 | print("optimizer", optimizer) 86 | print("==========================") 87 | 88 | 89 | for epochs in range(start_epoch, start_epoch + epoch_num): 90 | ## randomly choose tarining video sequence for each epoch 91 | train_seq = np.random.permutation(batch_size) 92 | for batch in range(0, batch_size): 93 | frame_paths = get_file_path(video_dir_list[ train_seq[batch] ]) 94 | new_frame_paths = [ frame_paths[i] for i in range(0, len(frame_paths), 5) ] 95 | step_size = step + predict_frame_num 96 | avalible_len = len(new_frame_paths) 97 | print ('current batch:', video_dir_list[ train_seq[batch] ] ) 98 | # reset buffer for each video 99 | buffer = [] 100 | 101 | if avalible_len < step_size or avalible_len == step_size: 102 | print( 'not enough image ' ) 103 | pass 104 | else: 105 | for steps in range(0, step_size): 106 | if (steps == 0): 107 | free_mem = True 108 | else: 109 | free_mem = False 110 | #print("epoch", epochs, "steps", steps) 111 | # Clear the gradients, since PyTorch accumulates them 112 | start_time = time.time() 113 | optimizer.zero_grad() 114 | 115 | # load picture, step = pic num 116 | test, target = load_pic( steps, new_frame_paths, gray_scale=gray_scale_bol, size_index = size_idx) 117 | 118 | if cuda_gpu: 119 | test = test.cuda() 120 | target = target.cuda() 121 | ''' 122 | img = tensor_to_pic(test, normalize=False, gray_scale=False, size_index=size_idx) 123 | cv.imshow('My Image', img) 124 | cv.waitKey(0) 125 | exit() 126 | ''' 127 | # Reshape and Forward propagation 128 | #test = unet_model.reshape(test) 129 | #pass in buffer with length = steps-1, concatenate latent feature to buffer in network 130 | if steps < step: 131 | output, l_feature = network.forward(test, buffer, free_mem) 132 | else: 133 | print('doing prediction') 134 | output, l_feature = network.forward(previous_output, buffer, free_mem) 135 | 136 | previous_output = output 137 | 138 | #make_dot( output.mean(), params = dict(network.named_parameters() ) ) 139 | #exit() 140 | # update buffer for storing latent feature 141 | buffer = buf_update( l_feature, buffer, 6 ) 142 | 143 | # Calculate loss 144 | #loss = critiria( Variable(output.long()), Variable(target.long())) 145 | loss = critiria( output, target) 146 | 147 | # record loss in to csv 148 | loss_value = float( loss.item() ) 149 | string = 'epoch_' + str(epochs) + '_batch_' + str(batch) + '_step_' + str(steps) 150 | loss_list.append( [ string, loss_value ]) 151 | 152 | # save img 153 | if save_img == True or float(loss_value) > 400: 154 | if ( (epochs + 1) % 20 == 0) or ( epochs == 0 ) or ( (epochs+1) == ( start_epoch + epoch_num) ): 155 | if steps % 1 == 0: 156 | output_img = tensor_to_pic(output, normalize=False, gray_scale=gray_scale_bol, size_index = size_idx) 157 | output_img_name = output_path + str(start_date) + '_E' + str(epochs) + '_B'+ str(batch) + '_S'+ str(steps) +'_2output.jpg' 158 | cv.imwrite(str(output_img_name), output_img) 159 | 160 | # Backward propagation 161 | loss.backward(retain_graph = True) 162 | 163 | end_time = time.time() 164 | elapse_time = round((end_time - start_time), 2) 165 | 166 | # Update the gradients 167 | optimizer.step() 168 | 169 | # print memory used 170 | process = psutil.Process(os.getpid()) 171 | 172 | print('epoch', epochs, 'batch', batch, 'step', steps, "loss:", loss, 'time used', elapse_time, 'sec') 173 | print('used memory', round((int(process.memory_info().rss)/(1024*1024)), 2), 'MB' ) 174 | print("-------------------------------------") 175 | 176 | if cuda_gpu: 177 | test = test.cpu() 178 | target = target.cpu() 179 | 180 | gc.collect() 181 | 182 | #check_tensors() 183 | 184 | if cuda_gpu: 185 | torch.cuda.empty_cache() 186 | 187 | if cuda_gpu: 188 | torch.cuda.empty_cache() 189 | # log loss after each epoch 190 | write_csv_file( output_path + start_date +'_loss_record.csv', loss_list ) 191 | 192 | # save model 193 | if ( ( ( (epochs+1) % 50 ) == 0 ) or ((epochs+1) == ( start_epoch + epoch_num)) or ( (epochs+1) == 1 ) ): 194 | path = output_path + start_date + 'epoch_' + str(epochs) +'_R_'+ str(step) + '_P_' + str(predict_frame_num) + '_size_idx_' + str(size_idx) + '_R_Unet.pt' 195 | state = { 'epoch': epochs+1, 'state_dict': network.state_dict(), 'optimizer':optimizer.state_dict() } 196 | 197 | torch.save( state, path) 198 | print('save model to:', path) 199 | 200 | 201 | if cuda_gpu: 202 | torch.cuda.empty_cache() 203 | -------------------------------------------------------------------------------- /models/R_Unet_ver_2_7.py: -------------------------------------------------------------------------------- 1 | ## Recurrent U-net, with LSTM 2 | ## default step = 6 3 | ## Future plan: multi-layer LSTM, conv LSTM, currently contains 2 layer LSTM 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | from torch.autograd import Variable 10 | import numpy as np 11 | import os 12 | import gc 13 | from conv_lstm import ConvLSTM 14 | 15 | # Down convolution layer 16 | class Down_Layer(nn.Sequential): 17 | def __init__(self, ch_in, ch_out): 18 | super(Down_Layer, self).__init__() 19 | self.layer = self.define_layer( ch_in, ch_out ) 20 | 21 | def define_layer(self, ch_in, ch_out): 22 | use_bias = True 23 | 24 | model = [] 25 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 26 | nn.Conv2d( ch_out, ch_out, kernel_size=(1,3), bias=use_bias), 27 | nn.ReLU(True), 28 | nn.Conv2d( ch_out, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 29 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 30 | nn.ReLU(True) ] 31 | 32 | return nn.Sequential(*model) 33 | 34 | def forward(self, x): 35 | return self.layer(x) 36 | 37 | # Up convolution layer 38 | # input x and res_x 39 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 40 | class Up_Layer(nn.Sequential): 41 | def __init__(self, ch_in, ch_out): 42 | super(Up_Layer, self).__init__() 43 | self.ch_in = ch_in 44 | self.ch_out = ch_out 45 | self.layer = self.define_layer( ) 46 | 47 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 48 | # add 0 padding on right and down to keep shape the same 49 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 50 | self.degradation = nn.Conv2d( self.ch_in, self.ch_out, kernel_size=2 ) 51 | 52 | def define_layer(self): 53 | use_bias = True 54 | pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 55 | 56 | model = [] 57 | model += [ nn.Conv2d( self.ch_in, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 58 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 59 | nn.ReLU(True), 60 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 61 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 62 | nn.ReLU(True) ] 63 | 64 | return nn.Sequential(*model) 65 | 66 | def forward(self, x, resx): 67 | output = self.degradation( self.pad( self.upsample(x) ) ) 68 | output = torch.cat((output, resx), dim = 1) 69 | 70 | output = self.layer(output) 71 | return output 72 | 73 | class Up_Layer0(nn.Sequential): 74 | def __init__(self, ch_in, ch_out): 75 | super(Up_Layer0, self).__init__() 76 | self.ch_in = ch_in 77 | self.ch_out = ch_out 78 | self.layer1 = self.define_layer() 79 | self.layer2 = self.define_layer() 80 | 81 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 82 | # add 0 padding on right and down to keep shape the same 83 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 84 | self.degradation = nn.Conv2d( self.ch_out, self.ch_out, kernel_size=2 ) 85 | 86 | def define_layer(self): 87 | use_bias = True 88 | pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 89 | 90 | model = [] 91 | model += [ nn.Conv2d( self.ch_in, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 92 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 93 | nn.ReLU(True), 94 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 95 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 96 | nn.ReLU(True) ] 97 | 98 | return nn.Sequential(*model) 99 | 100 | def forward(self, x, resx): 101 | # 1st conv box, up sample 102 | output = self.layer1( x ) 103 | output = self.degradation( self.pad( self.upsample(output) ) ) 104 | 105 | # concate output and res_x, 2nd conv_box 106 | output = torch.cat((output, resx), dim = 1) 107 | output = self.layer2(output) 108 | return output 109 | 110 | class unet(nn.Module): 111 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256): 112 | print("gray scale:", Gary_Scale) 113 | super( unet, self ).__init__() 114 | if size_index != 256: 115 | self.resize_fraction = window_size = 256/size_index 116 | else: 117 | self.resize_fraction = 1 118 | 119 | cuda_gpu = torch.cuda.is_available() 120 | 121 | self.latent_feature = 0 122 | self.lstm_buf = [] 123 | self.step = step_ 124 | self.pred = predict_ 125 | self.free_mem_counter = 0 126 | self.max_pool = nn.MaxPool2d(2) 127 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 128 | 129 | self.convlstm = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 130 | effective_step=[2]) 131 | 132 | if Gary_Scale == True: 133 | self.down1 = Down_Layer(1, 62) 134 | else: 135 | self.down1 = Down_Layer( 3, 62 ) 136 | 137 | self.down2 = Down_Layer( 62, 120 ) 138 | self.down3 = Down_Layer( 120, 224 ) 139 | self.down4 = Down_Layer( 224, 384 ) 140 | self.down5 = Down_Layer( 384, 512 ) 141 | 142 | self.up1 = Up_Layer0(1024, 512) 143 | #self.up1 = Up_Layer(1024, 512) 144 | self.up2 = Up_Layer(512, 256) 145 | self.up3 = Up_Layer(256, 128) 146 | self.up4 = Up_Layer(128, 64) 147 | if Gary_Scale == True: 148 | self.up5 = nn.Conv2d( 64, 1, kernel_size = 1 ) 149 | else: 150 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 151 | 152 | def forward(self, x, free_token, test_model = False): 153 | self.free_token = free_token 154 | if ( self.free_token == True ): 155 | self.free_memory() 156 | 157 | # pop oldest buffer 158 | if( len(self.lstm_buf) >= self.step): 159 | self.lstm_buf = self.lstm_buf[1:] 160 | 161 | # down convolution 162 | x1 = self.down1(x) 163 | x2 = self.max_pool(x1) 164 | 165 | x2 = self.down2(x2) 166 | x3 = self.max_pool(x2) 167 | 168 | x3 = self.down3(x3) 169 | x4 = self.max_pool(x3) 170 | 171 | x4 = self.down4(x4) 172 | x5 = self.max_pool(x4) 173 | 174 | x5 = self.down5(x5) 175 | 176 | latent_feature = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 177 | # add latest buffer 178 | # self.lstm_buf.append(latent_feature ) 179 | if( test_model == True ): 180 | return latent_feature 181 | 182 | lstm_output = Variable(self.convlstm(latent_feature)[0]) 183 | 184 | if 'lstm_output' in locals(): 185 | x5 = torch.cat((x5, lstm_output), dim = 1) 186 | 187 | h = lstm_output.view(1, -1, x4.shape[2], x4.shape[3]) 188 | #x4 = self.one_conv4(x4) 189 | x4 = torch.cat((x4, h), dim = 1) 190 | x = self.up1( x5, x4 ) 191 | 192 | h = lstm_output.view(1, -1, x3.shape[2], x3.shape[3]) 193 | #x3 = self.one_conv5(x3) 194 | x3 = torch.cat((x3, h), dim = 1) 195 | x = self.up2( x, x3 ) 196 | 197 | h = lstm_output.view(1, -1, x2.shape[2], x2.shape[3]) 198 | #x2 = self.one_conv6(x2) 199 | x2 = torch.cat((x2, h), dim = 1) 200 | x = self.up3( x, x2 ) 201 | 202 | h = lstm_output.view(1, -1, x1.shape[2], x1.shape[3]) 203 | #x1 = self.one_conv7(x1) 204 | x1 = torch.cat((x1, h), dim = 1) 205 | x = self.up4( x, x1 ) 206 | 207 | x = F.relu(self.up5( x )) 208 | 209 | return x 210 | 211 | def free_memory(self): 212 | 213 | self.free_mem_counter = 0 214 | -------------------------------------------------------------------------------- /models/R_Unet_ver_2_5.py: -------------------------------------------------------------------------------- 1 | ## Recurrent U-net, with LSTM 2 | ## default step = 6 3 | ## Future plan: multi-layer LSTM, conv LSTM, currently contains 2 layer LSTM 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | from torch.autograd import Variable 10 | import numpy as np 11 | import os 12 | import gc 13 | from conv_lstm import ConvLSTM 14 | 15 | # Down convolution layer 16 | class Down_Layer(nn.Sequential): 17 | def __init__(self, ch_in, ch_out): 18 | super(Down_Layer, self).__init__() 19 | self.layer = self.define_layer( ch_in, ch_out ) 20 | 21 | def define_layer(self, ch_in, ch_out): 22 | use_bias = True 23 | 24 | model = [] 25 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 26 | nn.Conv2d( ch_out, ch_out, kernel_size=(1,3), bias=use_bias), 27 | nn.ReLU(True), 28 | nn.Conv2d( ch_out, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 29 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 30 | nn.ReLU(True) ] 31 | 32 | return nn.Sequential(*model) 33 | 34 | def forward(self, x): 35 | return self.layer(x) 36 | 37 | # Up convolution layer 38 | # input x and res_x 39 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 40 | class Up_Layer(nn.Sequential): 41 | def __init__(self, ch_in, ch_out): 42 | super(Up_Layer, self).__init__() 43 | self.ch_in = ch_in 44 | self.ch_out = ch_out 45 | self.layer = self.define_layer( ) 46 | 47 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 48 | # add 0 padding on right and down to keep shape the same 49 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 50 | self.degradation = nn.Conv2d( self.ch_in, self.ch_out, kernel_size=2 ) 51 | 52 | def define_layer(self): 53 | use_bias = True 54 | pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 55 | 56 | model = [] 57 | model += [ nn.Conv2d( self.ch_in, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 58 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 59 | nn.ReLU(True), 60 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 61 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 62 | nn.ReLU(True) ] 63 | 64 | return nn.Sequential(*model) 65 | 66 | def forward(self, x, resx): 67 | output = self.degradation( self.pad( self.upsample(x) ) ) 68 | output = torch.cat((output, resx), dim = 1) 69 | 70 | output = self.layer(output) 71 | return output 72 | 73 | class Up_Layer0(nn.Sequential): 74 | def __init__(self, ch_in, ch_out): 75 | super(Up_Layer0, self).__init__() 76 | self.ch_in = ch_in 77 | self.ch_out = ch_out 78 | self.layer1 = self.define_layer() 79 | self.layer2 = self.define_layer() 80 | 81 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 82 | # add 0 padding on right and down to keep shape the same 83 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 84 | self.degradation = nn.Conv2d( self.ch_in, self.ch_out, kernel_size=2 ) 85 | 86 | def define_layer(self): 87 | use_bias = True 88 | pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 89 | 90 | model = [] 91 | model += [ nn.Conv2d( self.ch_in, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 92 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 93 | nn.ReLU(True), 94 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 95 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=(1, 3), bias=use_bias), 96 | nn.ReLU(True) ] 97 | 98 | return nn.Sequential(*model) 99 | 100 | def forward(self, x, resx): 101 | # 1st conv box, up sample 102 | output = self.layer1( x ) 103 | output = self.degradation( self.pad( self.upsample(x) ) ) 104 | 105 | # concate output and res_x, 2nd conv_box 106 | output = torch.cat((output, resx), dim = 1) 107 | output = self.layer1(output) 108 | return output 109 | 110 | class unet(nn.Module): 111 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256): 112 | print("gray scale:", Gary_Scale) 113 | super( unet, self ).__init__() 114 | if size_index != 256: 115 | self.resize_fraction = window_size = 256/size_index 116 | else: 117 | self.resize_fraction = 1 118 | 119 | cuda_gpu = torch.cuda.is_available() 120 | 121 | self.latent_feature = 0 122 | self.lstm_buf = [] 123 | self.step = step_ 124 | self.pred = predict_ 125 | self.free_mem_counter = 0 126 | self.max_pool = nn.MaxPool2d(2) 127 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 128 | 129 | self.convlstm = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 130 | effective_step=[2]) 131 | 132 | if Gary_Scale == True: 133 | self.down1 = Down_Layer(1, 62) 134 | else: 135 | self.down1 = Down_Layer( 3, 62 ) 136 | 137 | self.down2 = Down_Layer( 62, 120 ) 138 | self.down3 = Down_Layer( 120, 224 ) 139 | self.down4 = Down_Layer( 224, 384 ) 140 | self.down5 = Down_Layer( 384, 512 ) 141 | 142 | #self.up1 = Up_Layer0(1024, 512) 143 | self.up1 = Up_Layer(1024, 512) 144 | self.up2 = Up_Layer(512, 256) 145 | self.up3 = Up_Layer(256, 128) 146 | self.up4 = Up_Layer(128, 64) 147 | if Gary_Scale == True: 148 | self.up5 = nn.Conv2d( 64, 1, kernel_size = 1 ) 149 | else: 150 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 151 | 152 | def forward(self, x, free_token = False, test_model = False): 153 | ''' 154 | self.free_token = free_token 155 | if ( self.free_token == True ): 156 | self.free_memory() 157 | ''' 158 | # pop oldest buffer 159 | if( len(self.lstm_buf) >= self.step): 160 | self.lstm_buf = self.lstm_buf[1:] 161 | 162 | # down convolution 163 | x1 = self.down1(x) 164 | x2 = self.max_pool(x1) 165 | 166 | x2 = self.down2(x2) 167 | x3 = self.max_pool(x2) 168 | 169 | x3 = self.down3(x3) 170 | x4 = self.max_pool(x3) 171 | 172 | x4 = self.down4(x4) 173 | x5 = self.max_pool(x4) 174 | 175 | x5 = self.down5(x5) 176 | 177 | latent_feature = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 178 | # add latest buffer 179 | # self.lstm_buf.append(latent_feature ) 180 | if( test_model == True ): 181 | return latent_feature 182 | 183 | lstm_output = Variable(self.convlstm(latent_feature)[0]) 184 | 185 | if 'lstm_output' in locals(): 186 | x5 = torch.cat((x5, lstm_output), dim = 1) 187 | 188 | h = lstm_output.view(1, -1, x4.shape[2], x4.shape[3]) 189 | #x4 = self.one_conv4(x4) 190 | x4 = torch.cat((x4, h), dim = 1) 191 | x = self.up1( x5, x4 ) 192 | 193 | h = lstm_output.view(1, -1, x3.shape[2], x3.shape[3]) 194 | #x3 = self.one_conv5(x3) 195 | x3 = torch.cat((x3, h), dim = 1) 196 | x = self.up2( x, x3 ) 197 | 198 | h = lstm_output.view(1, -1, x2.shape[2], x2.shape[3]) 199 | #x2 = self.one_conv6(x2) 200 | x2 = torch.cat((x2, h), dim = 1) 201 | x = self.up3( x, x2 ) 202 | 203 | h = lstm_output.view(1, -1, x1.shape[2], x1.shape[3]) 204 | #x1 = self.one_conv7(x1) 205 | x1 = torch.cat((x1, h), dim = 1) 206 | x = self.up4( x, x1 ) 207 | 208 | x = F.relu(self.up5( x )) 209 | ''' 210 | else: 211 | x5 = self.one_conv3( x5 ) 212 | 213 | # up convolution 214 | x = self.up1( x5, x4 ) 215 | x = self.up2( x, x3 ) 216 | x = self.up3( x, x2 ) 217 | x = self.up4( x, x1 ) 218 | x = F.relu(self.up5( x )) 219 | ''' 220 | return x 221 | 222 | def free_memory(self): 223 | 224 | self.free_mem_counter = 0 225 | -------------------------------------------------------------------------------- /models/R_Unet_v1.py: -------------------------------------------------------------------------------- 1 | ## Recurrent U-net, with LSTM 2 | ## default step = 6 3 | ## Future plan: multi-layer LSTM, conv LSTM, currently contains 2 layer LSTM 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | import numpy as np 10 | import os 11 | import gc 12 | 13 | # Down convolution layer 14 | class Down_Layer(nn.Sequential): 15 | def __init__(self, ch_in, ch_out): 16 | super(Down_Layer, self).__init__() 17 | self.layer = self.define_layer( ch_in, ch_out ) 18 | 19 | def define_layer(self, ch_in, ch_out): 20 | use_bias = True 21 | 22 | model = [] 23 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=3, padding=1, bias=use_bias), 24 | nn.ReLU(True), 25 | nn.Conv2d( ch_out, ch_out, kernel_size=3, padding=1, bias=use_bias), 26 | nn.ReLU(True) ] 27 | 28 | return nn.Sequential(*model) 29 | 30 | def forward(self, x): 31 | return self.layer(x) 32 | 33 | # Up convolution layer 34 | # input x and res_x 35 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 36 | class Up_Layer(nn.Sequential): 37 | def __init__(self, ch_in, ch_out): 38 | super(Up_Layer, self).__init__() 39 | self.ch_in = ch_in 40 | self.ch_out = ch_out 41 | self.layer = self.define_layer( ) 42 | 43 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 44 | # add 0 padding on right and down to keep shape the same 45 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 46 | self.degradation = nn.Conv2d( self.ch_in, self.ch_out, kernel_size=2 ) 47 | 48 | def define_layer(self): 49 | use_bias = True 50 | pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 51 | 52 | model = [] 53 | model += [ nn.Conv2d( self.ch_in, self.ch_out, kernel_size=3, padding=1, bias=use_bias), 54 | nn.ReLU(True), 55 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=3, padding=1, bias=use_bias), 56 | nn.ReLU(True) ] 57 | 58 | return nn.Sequential(*model) 59 | 60 | def forward(self, x, resx): 61 | output = self.degradation( self.pad( self.upsample(x) ) ) 62 | output = torch.cat((output, resx), dim = 1) 63 | output = self.layer(output) 64 | return output 65 | 66 | class recurrent_network(nn.Sequential): 67 | def __init__(self, fraction_index = 1): 68 | cuda_gpu = torch.cuda.is_available() 69 | self.resize_fraction = fraction_index 70 | super(recurrent_network, self).__init__() 71 | self.rnn = nn.LSTM(int(16/fraction_index), int(16/fraction_index) ) 72 | if cuda_gpu: 73 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 74 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 75 | else: 76 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 77 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 78 | 79 | def forward(self, x): 80 | for i in x: 81 | # Step through the sequence one element at a time. 82 | # after each step, hidden contains the hidden state. 83 | out, (self.hidden1, self.hidden2) = self.rnn(i, (self.hidden1, self.hidden2) ) 84 | 85 | return out 86 | 87 | class recurrent_network_layer(nn.Sequential): 88 | def __init__(self, fraction_index = 1): 89 | super(recurrent_network_layer, self).__init__() 90 | cuda_gpu = torch.cuda.is_available() 91 | self.rnn = nn.LSTM(int(16/fraction_index), int(16/fraction_index) ) 92 | self.resize_fraction = fraction_index 93 | self.free_mem_counter = 0 94 | if cuda_gpu: 95 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 96 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 97 | else: 98 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 99 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 100 | self.output_buffer = [] 101 | 102 | def forward(self, x): 103 | self.init_buffer() 104 | for i in x: 105 | # Step through the sequence one element at a time. 106 | # after each step, hidden contains the hidden state. 107 | out, (self.hidden1, self.hidden2) = self.rnn(i, (self.hidden1, self.hidden2) ) 108 | self.output_buffer.append(out) 109 | out.clone() 110 | del out 111 | 112 | return self.output_buffer 113 | 114 | def init_buffer(self): 115 | if len(self.output_buffer) > 0: 116 | self.output_buffer = [] 117 | 118 | class unet(nn.Module): 119 | def __init__(self, tot_frame_num = 100, length = 6, Gary_Scale = False, size_index = 256): 120 | print("gray scale:", Gary_Scale) 121 | super( unet, self ).__init__() 122 | if size_index != 256: 123 | self.resize_fraction = window_size = 256/size_index 124 | else: 125 | self.resize_fraction = 1 126 | 127 | cuda_gpu = torch.cuda.is_available() 128 | 129 | 130 | self.latent_feature = 0 131 | 132 | self.step = length 133 | self.free_mem_counter = 0 134 | self.max_pool = nn.MaxPool2d(2) 135 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 136 | self.one_conv1 = nn.Conv2d( 1024, 512, kernel_size=1, bias=True) 137 | self.one_conv2 = nn.Conv2d( 1024, 512, kernel_size=1, bias=True) 138 | 139 | self.rnn = recurrent_network_layer( fraction_index = self.resize_fraction ) 140 | self.rnn2 = recurrent_network( fraction_index = self.resize_fraction ) 141 | 142 | if Gary_Scale == True: 143 | self.down1 = Down_Layer(1, 64) 144 | else: 145 | self.down1 = Down_Layer( 3, 64 ) 146 | 147 | self.down2 = Down_Layer( 64, 128 ) 148 | self.down3 = Down_Layer( 128, 256 ) 149 | self.down4 = Down_Layer( 256, 512 ) 150 | self.down5 = Down_Layer( 512, 1024 ) 151 | 152 | self.up1 = Up_Layer(1024, 512) 153 | self.up2 = Up_Layer(512, 256) 154 | self.up3 = Up_Layer(256, 128) 155 | self.up4 = Up_Layer(128, 64) 156 | if Gary_Scale == True: 157 | self.up5 = nn.Conv2d( 64, 1, kernel_size = 1 ) 158 | else: 159 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 160 | 161 | def forward(self, x, buffer): 162 | self.lstm_buf = buffer.copy() 163 | self.free_mem_counter = self.free_mem_counter + 1 164 | #self.lstm_buf = [] 165 | 166 | # down convolution 167 | x1 = self.down1(x) 168 | x2 = self.max_pool(x1) 169 | 170 | x2 = self.down2(x2) 171 | x3 = self.max_pool(x2) 172 | 173 | x3 = self.down3(x3) 174 | x4 = self.max_pool(x3) 175 | 176 | x4 = self.down4(x4) 177 | x5 = self.max_pool(x4) 178 | 179 | x5 = self.down5(x5) 180 | 181 | latent_feature = x5.view(-1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 182 | 183 | self.lstm_buf.append(latent_feature ) 184 | # print( 'lstm buffer len', len( self.lstm_buf ) ) 185 | # LSTM unit 186 | if len( self.lstm_buf ) > 1 : 187 | lstm_output = self.rnn(self.lstm_buf) 188 | lstm_output = self.rnn2( lstm_output ) 189 | lstm_output = lstm_output.view(1, 1024, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 190 | 191 | # use x5 to perform lstm 192 | if 'lstm_output' in locals(): 193 | x6 = self.one_conv1(lstm_output) 194 | x5 = self.one_conv2(lstm_output) 195 | x5 = torch.cat((x5, x6), dim = 1) 196 | 197 | # up convolution 198 | x = self.up1( x5, x4 ) 199 | x = self.up2( x, x3 ) 200 | x = self.up3( x, x2 ) 201 | x = self.up4( x, x1 ) 202 | x = F.relu(self.up5( x )) 203 | 204 | ## release var 205 | self.lstm_buf = [] 206 | 207 | del x1, x2, x3, x4, x5 208 | if 'x6' in locals(): 209 | del x6 210 | gc.collect() 211 | 212 | if self.free_mem_counter == 10 : 213 | self.free_memory() 214 | 215 | return x, latent_feature 216 | 217 | def free_memory(self): 218 | self.rnn.hidden1 = self.rnn.hidden1.detach() 219 | self.rnn.hidden2 = self.rnn.hidden2.detach() 220 | self.rnn2.hidden1 = self.rnn2.hidden1.detach() 221 | self.rnn2.hidden2 = self.rnn2.hidden2.detach() 222 | self.free_mem_counter = 0 223 | 224 | -------------------------------------------------------------------------------- /models/R_Unet_ver_2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import os 8 | import gc 9 | from conv_lstm import ConvLSTM 10 | 11 | # Down convolution layer 12 | class Down_Layer(nn.Sequential): 13 | def __init__(self, ch_in, ch_out): 14 | super(Down_Layer, self).__init__() 15 | self.layer = self.define_layer( ch_in, ch_out ) 16 | 17 | def define_layer(self, ch_in, ch_out): 18 | use_bias = True 19 | 20 | model = [] 21 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=3, padding=1, bias=use_bias), 22 | nn.ReLU(True), 23 | nn.Conv2d( ch_out, ch_out, kernel_size=3, padding=1, bias=use_bias), 24 | nn.ReLU(True) ] 25 | 26 | return nn.Sequential(*model) 27 | 28 | def forward(self, x): 29 | return self.layer(x) 30 | 31 | # Up convolution layer 32 | # input x and res_x 33 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 34 | class Up_Layer(nn.Sequential): 35 | def __init__(self, ch_in, ch_out): 36 | super(Up_Layer, self).__init__() 37 | self.ch_in = ch_in 38 | self.ch_out = ch_out 39 | self.layer = self.define_layer( ) 40 | 41 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 42 | # add 0 padding on right and down to keep shape the same 43 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 44 | self.degradation = nn.Conv2d( self.ch_in, self.ch_out, kernel_size=2 ) 45 | 46 | def define_layer(self): 47 | use_bias = True 48 | pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 49 | 50 | model = [] 51 | model += [ nn.Conv2d( self.ch_in, self.ch_out, kernel_size=3, padding=1, bias=use_bias), 52 | nn.ReLU(True), 53 | nn.Conv2d( self.ch_out, self.ch_out, kernel_size=3, padding=1, bias=use_bias), 54 | nn.ReLU(True) ] 55 | 56 | return nn.Sequential(*model) 57 | 58 | def forward(self, x, resx): 59 | output = self.degradation( self.pad( self.upsample(x) ) ) 60 | output = torch.cat((output, resx), dim = 1) 61 | output = self.layer(output) 62 | return output 63 | 64 | class recurrent_network(nn.Sequential): 65 | def __init__(self, fraction_index = 1): 66 | cuda_gpu = torch.cuda.is_available() 67 | self.resize_fraction = fraction_index 68 | super(recurrent_network, self).__init__() 69 | self.rnn = nn.LSTM(int(16/fraction_index), int(16/fraction_index) ) 70 | if cuda_gpu: 71 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 72 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 73 | else: 74 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 75 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 76 | 77 | def forward(self, x): 78 | for i in x: 79 | # Step through the sequence one element at a time. 80 | # after each step, hidden contains the hidden state. 81 | out, (self.hidden1, self.hidden2) = self.rnn(i, (self.hidden1, self.hidden2) ) 82 | 83 | return out 84 | 85 | class recurrent_network_layer(nn.Sequential): 86 | def __init__(self, fraction_index = 1): 87 | super(recurrent_network_layer, self).__init__() 88 | cuda_gpu = torch.cuda.is_available() 89 | self.rnn = nn.LSTM(int(16/fraction_index), int(16/fraction_index) ) 90 | self.resize_fraction = fraction_index 91 | self.free_mem_counter = 0 92 | if cuda_gpu: 93 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 94 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)).cuda() 95 | else: 96 | self.hidden1 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 97 | self.hidden2 = torch.zeros(1, int(16/self.resize_fraction), int(16/self.resize_fraction)) 98 | self.output_buffer = [] 99 | 100 | def forward(self, x): 101 | self.init_buffer() 102 | for i in x: 103 | # Step through the sequence one element at a time. 104 | # after each step, hidden contains the hidden state. 105 | out, (self.hidden1, self.hidden2) = self.rnn(i, (self.hidden1, self.hidden2) ) 106 | self.output_buffer.append(out) 107 | out.clone() 108 | del out 109 | 110 | return self.output_buffer 111 | 112 | def init_buffer(self): 113 | if len(self.output_buffer) > 0: 114 | self.output_buffer = [] 115 | 116 | class unet(nn.Module): 117 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256): 118 | print("gray scale:", Gary_Scale) 119 | super( unet, self ).__init__() 120 | if size_index != 256: 121 | self.resize_fraction = window_size = 256/size_index 122 | else: 123 | self.resize_fraction = 1 124 | 125 | cuda_gpu = torch.cuda.is_available() 126 | 127 | self.latent_feature = 0 128 | self.lstm_buf = [] 129 | self.step = step_ 130 | self.pred = predict_ 131 | self.free_mem_counter = 0 132 | self.max_pool = nn.MaxPool2d(2) 133 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 134 | self.one_conv1 = nn.Conv2d( 1024, 512, kernel_size=1, bias=True) 135 | self.one_conv2 = nn.Conv2d( 1024, 512, kernel_size=1, bias=True) 136 | self.one_conv3 = nn.Conv2d( 512, 1024, kernel_size=1, bias=True) 137 | 138 | self.convlstm = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 139 | effective_step=[2]) 140 | 141 | 142 | self.one_conv4 = nn.Conv2d( 512, 384, kernel_size=1, bias=True) 143 | self.one_conv5 = nn.Conv2d( 256, 224, kernel_size=1, bias=True) 144 | self.one_conv6 = nn.Conv2d( 128, 120, kernel_size=1, bias=True) 145 | self.one_conv7 = nn.Conv2d( 64, 62, kernel_size=1, bias=True) 146 | 147 | self.rnn = recurrent_network_layer( fraction_index = 2 ) 148 | self.rnn2 = recurrent_network( fraction_index = 2 ) 149 | 150 | if Gary_Scale == True: 151 | self.down1 = Down_Layer(1, 64) 152 | else: 153 | self.down1 = Down_Layer( 3, 64 ) 154 | 155 | self.down2 = Down_Layer( 64, 128 ) 156 | self.down3 = Down_Layer( 128, 256 ) 157 | self.down4 = Down_Layer( 256, 512 ) 158 | self.down5 = Down_Layer( 512, 512 ) 159 | 160 | self.up1 = Up_Layer(1024, 512) 161 | self.up2 = Up_Layer(512, 256) 162 | self.up3 = Up_Layer(256, 128) 163 | self.up4 = Up_Layer(128, 64) 164 | if Gary_Scale == True: 165 | self.up5 = nn.Conv2d( 64, 1, kernel_size = 1 ) 166 | else: 167 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 168 | 169 | def forward(self, x, free_token, test_model = False): 170 | self.free_token = free_token 171 | if ( self.free_token == True ): 172 | self.free_memory() 173 | 174 | # pop oldest buffer 175 | if( len(self.lstm_buf) >= self.step): 176 | self.lstm_buf = self.lstm_buf[1:] 177 | 178 | # down convolution 179 | x1 = self.down1(x) 180 | x2 = self.max_pool(x1) 181 | 182 | x2 = self.down2(x2) 183 | x3 = self.max_pool(x2) 184 | 185 | x3 = self.down3(x3) 186 | x4 = self.max_pool(x3) 187 | 188 | x4 = self.down4(x4) 189 | x5 = self.max_pool(x4) 190 | 191 | x5 = self.down5(x5) 192 | 193 | latent_feature = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 194 | # add latest buffer 195 | # self.lstm_buf.append(latent_feature ) 196 | if( test_model == True ): 197 | return latent_feature 198 | 199 | lstm_output = Variable(self.convlstm(latent_feature)[0]) 200 | 201 | if 'lstm_output' in locals(): 202 | x5 = torch.cat((x5, lstm_output), dim = 1) 203 | h = lstm_output.view(1, -1, x4.shape[2], x4.shape[3]) 204 | x4 = self.one_conv4(x4) 205 | x4 = torch.cat((x4, h), dim = 1) 206 | x = self.up1( x5, x4 ) 207 | 208 | h = lstm_output.view(1, -1, x3.shape[2], x3.shape[3]) 209 | x3 = self.one_conv5(x3) 210 | x3 = torch.cat((x3, h), dim = 1) 211 | x = self.up2( x, x3 ) 212 | 213 | h = lstm_output.view(1, -1, x2.shape[2], x2.shape[3]) 214 | x2 = self.one_conv6(x2) 215 | x2 = torch.cat((x2, h), dim = 1) 216 | x = self.up3( x, x2 ) 217 | 218 | h = lstm_output.view(1, -1, x1.shape[2], x1.shape[3]) 219 | x1 = self.one_conv7(x1) 220 | x1 = torch.cat((x1, h), dim = 1) 221 | x = self.up4( x, x1 ) 222 | x = F.relu(self.up5( x )) 223 | 224 | return x 225 | 226 | def free_memory(self): 227 | ''' 228 | self.rnn.hidden1 = self.rnn.hidden1.detach() 229 | self.rnn.hidden2 = self.rnn.hidden2.detach() 230 | self.rnn2.hidden1 = self.rnn2.hidden1.detach() 231 | self.rnn2.hidden2 = self.rnn2.hidden2.detach() 232 | #self.convlstm.hidden_channels = self.convlstm.hidden_channels.detach() 233 | ''' 234 | self.free_mem_counter = 0 235 | -------------------------------------------------------------------------------- /models/R_Unet_ver_4.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import os 8 | import gc 9 | from conv_lstm import ConvLSTM 10 | 11 | # Convolution unit 12 | class conv_unit(nn.Sequential): 13 | def __init__(self, ch_in, ch_out): 14 | super(conv_unit, self).__init__() 15 | self.layer1 = self.define_layer1( ch_in, ch_out ) 16 | self.layer3 = self.define_layer1( ch_out, ch_out ) 17 | self.lamda1 = 0 18 | self.lamda2 = 0 19 | 20 | def define_layer1(self, ch_in, ch_out): 21 | use_bias = True 22 | 23 | model = [] 24 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 25 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 26 | nn.ReLU(True) ] 27 | 28 | return nn.Sequential(*model) 29 | 30 | def define_layer2(self, ch_in, ch_out): 31 | use_bias = True 32 | 33 | model = [] 34 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 35 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 36 | nn.ReLU(True) ] 37 | 38 | return nn.Sequential(*model) 39 | 40 | def forward(self, x): 41 | x1 = self.layer1(x) 42 | output = self.layer3(x1) 43 | 44 | return output 45 | 46 | # Up convolution layer 47 | # input x and res_x 48 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 49 | class Up_Layer(nn.Sequential): 50 | def __init__(self, ch_in, ch_out): 51 | super(Up_Layer, self).__init__() 52 | #1st conv 53 | self.layer1 = self.define_layer1(ch_in, ch_out) 54 | #self.layer2 = self.define_layer2(ch_in, ch_out) 55 | #2nd conv 56 | self.layer3 = self.define_layer1(ch_out, ch_out) 57 | #self.layer4 = self.define_layer2(ch_out, ch_out) 58 | 59 | self.lamda1 = 0 60 | self.lamda2 = 0 61 | 62 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 63 | # add 0 padding on right and down to keep shape the same 64 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 65 | self.degradation = nn.Conv2d( ch_in, ch_out, kernel_size=2 ) 66 | 67 | def define_layer1(self, ch_in, ch_out): 68 | use_bias = True 69 | 70 | model = [] 71 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 72 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 73 | nn.ReLU(True)] 74 | 75 | return nn.Sequential(*model) 76 | 77 | def define_layer2(self, ch_in, ch_out): 78 | use_bias = True 79 | 80 | model = [] 81 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 82 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 83 | nn.ReLU(True)] 84 | 85 | return nn.Sequential(*model) 86 | 87 | def forward(self, x, resx): 88 | output = self.degradation( self.pad( self.upsample(x) ) ) 89 | output = torch.cat((output, resx), dim = 1) 90 | 91 | output = self.layer1(output) # 3conv 92 | 93 | output = self.layer3(output) 94 | 95 | return output 96 | 97 | class Up_Layer0(nn.Sequential): 98 | def __init__(self, ch_in, ch_out): 99 | super(Up_Layer0, self).__init__() 100 | #1st conv 101 | self.layer1 = self.define_layer1(ch_in, ch_out) 102 | #self.layer2 = self.define_layer2(ch_in, ch_out) 103 | #2nd conv 104 | self.layer3 = self.define_layer1(ch_out, ch_out) 105 | #self.layer4 = self.define_layer2(ch_out, ch_out) 106 | #3rd conv 107 | self.layer5 = self.define_layer1(ch_in, ch_out) 108 | #self.layer6 = self.define_layer2(ch_in, ch_out) 109 | #4th conv 110 | self.layer7 = self.define_layer1(ch_out, ch_out) 111 | #self.layer8 = self.define_layer2(ch_out, ch_out) 112 | 113 | self.lamda1 = 0 114 | self.lamda2 = 0 115 | self.lamda3 = 0 116 | self.lamda4 = 0 117 | 118 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 119 | # add 0 padding on right and down to keep shape the same 120 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 121 | self.degradation = nn.Conv2d( ch_out, ch_out, kernel_size=2 ) 122 | 123 | def define_layer1(self, ch_in, ch_out): 124 | use_bias = True 125 | 126 | model = [] 127 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 128 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 129 | nn.ReLU(True)] 130 | 131 | return nn.Sequential(*model) 132 | 133 | def define_layer2(self, ch_in, ch_out): 134 | use_bias = True 135 | 136 | model = [] 137 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 138 | 139 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 140 | nn.ReLU(True)] 141 | 142 | return nn.Sequential(*model) 143 | 144 | def forward(self, x, resx): 145 | output = self.layer1(x) # 3conv 146 | output = self.layer3(output) 147 | 148 | output = self.degradation( self.pad( self.upsample(output) ) ) 149 | output = torch.cat((output, resx), dim = 1) 150 | 151 | output = self.layer5(output) # 3conv 152 | 153 | output = self.layer7(output) 154 | 155 | return output 156 | 157 | 158 | class unet(nn.Module): 159 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256, gpu_num = 0): 160 | print("gray scale:", Gary_Scale) 161 | super( unet, self ).__init__() 162 | if size_index != 256: 163 | self.resize_fraction = window_size = 256/size_index 164 | else: 165 | self.resize_fraction = 1 166 | 167 | cuda_gpu = torch.cuda.is_available() 168 | 169 | self.latent_feature = 0 170 | self.lstm_buf = [] 171 | self.step = step_ 172 | self.pred = predict_ 173 | self.free_mem_counter = 0 174 | self.max_pool = nn.MaxPool2d(2) 175 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 176 | 177 | self.convlstm1 = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 178 | effective_step=[2], gpu_num = gpu_num ) 179 | 180 | self.convlstm2 = ConvLSTM(input_channels=384, hidden_channels=[384, 256, 128], kernel_size=3, step=3, 181 | effective_step=[2], gpu_num = gpu_num ) 182 | 183 | self.convlstm3 = ConvLSTM(input_channels=224, hidden_channels=[224, 128, 32], kernel_size=3, step=3, 184 | effective_step=[2], gpu_num = gpu_num ) 185 | 186 | self.convlstm4 = ConvLSTM(input_channels=120, hidden_channels=[120, 64, 8], kernel_size=3, step=3, 187 | effective_step=[2], gpu_num = gpu_num ) 188 | 189 | self.convlstm5 = ConvLSTM(input_channels=62, hidden_channels=[62, 32, 2], kernel_size=3, step=3, 190 | effective_step=[2], gpu_num = gpu_num ) 191 | 192 | if Gary_Scale == True: 193 | self.down1 = conv_unit(1, 62) 194 | else: 195 | self.down1 = conv_unit( 3, 62 ) 196 | 197 | self.down2 = conv_unit(62, 120) 198 | self.down3 = conv_unit( 120, 224 ) 199 | self.down4 = conv_unit( 224, 384 ) 200 | self.down5 = conv_unit( 384, 512 ) 201 | 202 | self.up1 = Up_Layer0(1024, 512) 203 | self.up2 = Up_Layer(512, 256) 204 | self.up3 = Up_Layer(256, 128) 205 | self.up4 = Up_Layer(128, 64) 206 | 207 | if Gary_Scale == True: 208 | self.up5 = nn.Conv2d( 64, 1, kernel_size = 1 ) 209 | else: 210 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 211 | 212 | def forward(self, x, init_token, test_model = False): 213 | # pop oldest buffer 214 | if( len(self.lstm_buf) >= self.step): 215 | self.lstm_buf = self.lstm_buf[1:] 216 | 217 | # down convolution 218 | x1 = self.down1(x) 219 | x2 = self.max_pool(x1) 220 | 221 | x2 = self.down2(x2) 222 | x3 = self.max_pool(x2) 223 | 224 | x3 = self.down3(x3) 225 | x4 = self.max_pool(x3) 226 | 227 | x4 = self.down4(x4) 228 | x5 = self.max_pool(x4) 229 | 230 | x5 = self.down5(x5) 231 | 232 | latent_feature1 = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 233 | lstm_output1 = Variable(self.convlstm1(latent_feature1, init_token)[0]) 234 | 235 | lstm_output2 = Variable(self.convlstm2(x4, init_token )[0]) 236 | lstm_output3 = Variable(self.convlstm3(x3, init_token )[0]) 237 | lstm_output4 = Variable(self.convlstm4(x2, init_token )[0]) 238 | lstm_output5 = Variable(self.convlstm5(x1, init_token )[0]) 239 | 240 | 241 | x5 = torch.cat((x5, lstm_output1), dim = 1) 242 | 243 | x4 = torch.cat((x4, lstm_output2), dim = 1) 244 | x = self.up1( x5, x4 ) 245 | 246 | x3 = torch.cat((x3, lstm_output3), dim = 1) 247 | x = self.up2( x, x3 ) 248 | 249 | x2 = torch.cat((x2, lstm_output4), dim = 1) 250 | x = self.up3( x, x2 ) 251 | 252 | x1 = torch.cat((x1, lstm_output5), dim = 1) 253 | x = self.up4( x, x1 ) 254 | 255 | x = F.relu(self.up5( x )) 256 | 257 | return x 258 | 259 | def free_memory(self): 260 | 261 | #self.convlstm.hidden_channels = self.convlstm.hidden_channels.detach() 262 | 263 | self.free_mem_counter = 0 264 | -------------------------------------------------------------------------------- /models/R_Unet_ver_3.py: -------------------------------------------------------------------------------- 1 | ## Recurrent U-net, with LSTM 2 | ## default step = 6 3 | ## Future plan: multi-layer LSTM, conv LSTM, currently contains 2 layer LSTM 4 | 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import torch.optim as optim 9 | from torch.autograd import Variable 10 | import numpy as np 11 | import os 12 | import gc 13 | from conv_lstm import ConvLSTM 14 | 15 | # Convolution unit 16 | class conv_unit(nn.Sequential): 17 | def __init__(self, ch_in, ch_out): 18 | super(conv_unit, self).__init__() 19 | self.layer1 = self.define_layer1( ch_in, ch_out ) 20 | self.layer2 = self.define_layer2( ch_in, ch_out ) 21 | 22 | self.layer3 = self.define_layer1( ch_out, ch_out ) 23 | self.layer4 = self.define_layer2( ch_out, ch_out ) 24 | 25 | self.lamda1 = np.random.rand() 26 | self.lamda2 = np.random.rand() 27 | 28 | def define_layer1(self, ch_in, ch_out): 29 | use_bias = True 30 | 31 | model = [] 32 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 33 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 34 | nn.ReLU(True) ] 35 | 36 | return nn.Sequential(*model) 37 | 38 | def define_layer2(self, ch_in, ch_out): 39 | use_bias = True 40 | 41 | model = [] 42 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 43 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 44 | nn.ReLU(True) ] 45 | 46 | return nn.Sequential(*model) 47 | 48 | def forward(self, x): 49 | x1 = self.layer1(x) 50 | x2 = self.layer2(x) 51 | output = x1*(1-self.lamda1) + x2*(self.lamda1) 52 | 53 | x1 = self.layer3(output) 54 | x2 = self.layer4(output) 55 | output = x1*(1-self.lamda2) + x2*(self.lamda2) 56 | 57 | return output 58 | 59 | # Up convolution layer 60 | # input x and res_x 61 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 62 | class Up_Layer(nn.Sequential): 63 | def __init__(self, ch_in, ch_out): 64 | super(Up_Layer, self).__init__() 65 | #1st conv 66 | self.layer1 = self.define_layer1(ch_in, ch_out) 67 | self.layer2 = self.define_layer2(ch_in, ch_out) 68 | #2nd conv 69 | self.layer3 = self.define_layer1(ch_out, ch_out) 70 | self.layer4 = self.define_layer2(ch_out, ch_out) 71 | 72 | self.lamda1 = np.random.rand() 73 | self.lamda2 = np.random.rand() 74 | 75 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 76 | # add 0 padding on right and down to keep shape the same 77 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 78 | self.degradation = nn.Conv2d( ch_in, ch_out, kernel_size=2 ) 79 | 80 | def define_layer1(self, ch_in, ch_out): 81 | use_bias = True 82 | 83 | model = [] 84 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 85 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 86 | nn.ReLU(True)] 87 | 88 | return nn.Sequential(*model) 89 | 90 | def define_layer2(self, ch_in, ch_out): 91 | use_bias = True 92 | 93 | model = [] 94 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 95 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 96 | nn.ReLU(True)] 97 | 98 | return nn.Sequential(*model) 99 | 100 | def forward(self, x, resx): 101 | output = self.degradation( self.pad( self.upsample(x) ) ) 102 | output = torch.cat((output, resx), dim = 1) 103 | 104 | output1 = self.layer1(output) # 3conv 105 | output2 = self.layer2(output) # 5conv 106 | output = (1- self.lamda1)*output1 + (self.lamda1)*output2 107 | 108 | output1 = self.layer3(output) 109 | output2 = self.layer4(output) 110 | output = (1- self.lamda2)*output1 + (self.lamda2)*output2 111 | 112 | return output 113 | 114 | 115 | # Up convolution layer 116 | # input x and res_x 117 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 118 | class Up_Layer0(nn.Sequential): 119 | def __init__(self, ch_in, ch_out): 120 | super(Up_Layer0, self).__init__() 121 | #1st conv 122 | self.layer1 = self.define_layer1(ch_in, ch_out) 123 | self.layer2 = self.define_layer2(ch_in, ch_out) 124 | #2nd conv 125 | self.layer3 = self.define_layer1(ch_out, ch_out) 126 | self.layer4 = self.define_layer2(ch_out, ch_out) 127 | 128 | self.lamda1 = np.random.rand() 129 | self.lamda2 = np.random.rand() 130 | 131 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 132 | # add 0 padding on right and down to keep shape the same 133 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 134 | self.degradation = nn.Conv2d( ch_in, ch_out, kernel_size=2 ) 135 | 136 | def define_layer1(self, ch_in, ch_out): 137 | use_bias = True 138 | 139 | model = [] 140 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 141 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 142 | nn.ReLU(True)] 143 | 144 | return nn.Sequential(*model) 145 | 146 | def define_layer2(self, ch_in, ch_out): 147 | use_bias = True 148 | 149 | model = [] 150 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 151 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 152 | nn.ReLU(True)] 153 | 154 | return nn.Sequential(*model) 155 | 156 | def forward(self, x, resx): 157 | output = self.degradation( self.pad( self.upsample(x) ) ) 158 | output = torch.cat((output, resx), dim = 1) 159 | 160 | output1 = self.layer1(output) # 3conv 161 | output2 = self.layer2(output) # 5conv 162 | output = (1- self.lamda1)*output1 + (self.lamda1)*output2 163 | 164 | output1 = self.layer3(output) 165 | output2 = self.layer4(output) 166 | output = (1- self.lamda2)*output1 + (self.lamda2)*output2 167 | 168 | return output 169 | 170 | 171 | class unet(nn.Module): 172 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256): 173 | print("gray scale:", Gary_Scale) 174 | super( unet, self ).__init__() 175 | if size_index != 256: 176 | self.resize_fraction = window_size = 256/size_index 177 | else: 178 | self.resize_fraction = 1 179 | 180 | cuda_gpu = torch.cuda.is_available() 181 | 182 | self.latent_feature = 0 183 | self.lstm_buf = [] 184 | self.step = step_ 185 | self.pred = predict_ 186 | self.free_mem_counter = 0 187 | self.max_pool = nn.MaxPool2d(2) 188 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 189 | 190 | self.convlstm = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 191 | effective_step=[2]) 192 | 193 | if Gary_Scale == True: 194 | self.down1 = conv_unit(1, 62) 195 | else: 196 | self.down1 = conv_unit( 3, 62 ) 197 | 198 | self.down2 = conv_unit(62, 120) 199 | self.down3 = conv_unit( 120, 224 ) 200 | self.down4 = conv_unit( 224, 384 ) 201 | self.down5 = conv_unit( 384, 512 ) 202 | 203 | self.up1 = Up_Layer(1024, 512) 204 | self.up2 = Up_Layer(512, 256) 205 | self.up3 = Up_Layer(256, 128) 206 | self.up4 = Up_Layer(128, 64) 207 | 208 | if Gary_Scale == True: 209 | self.up5 = nn.Conv2d( 64, 1, kernel_size = 1 ) 210 | else: 211 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 212 | 213 | def forward(self, x, free_token, test_model = False): 214 | self.free_token = free_token 215 | if ( self.free_token == True ): 216 | self.free_memory() 217 | 218 | # pop oldest buffer 219 | if( len(self.lstm_buf) >= self.step): 220 | self.lstm_buf = self.lstm_buf[1:] 221 | 222 | # down convolution 223 | x1 = self.down1(x) 224 | x2 = self.max_pool(x1) 225 | 226 | x2 = self.down2(x2) 227 | x3 = self.max_pool(x2) 228 | 229 | x3 = self.down3(x3) 230 | x4 = self.max_pool(x3) 231 | 232 | x4 = self.down4(x4) 233 | x5 = self.max_pool(x4) 234 | 235 | x5 = self.down5(x5) 236 | 237 | latent_feature = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 238 | # add latest buffer 239 | # self.lstm_buf.append(latent_feature ) 240 | if( test_model == True ): 241 | return latent_feature 242 | 243 | lstm_output = Variable(self.convlstm(latent_feature)[0]) 244 | 245 | 246 | if 'lstm_output' in locals(): 247 | x5 = torch.cat((x5, lstm_output), dim = 1) 248 | h = lstm_output.view(1, -1, x4.shape[2], x4.shape[3]) 249 | #x4 = self.one_conv4(x4) 250 | x4 = torch.cat((x4, h), dim = 1) 251 | x = self.up1( x5, x4 ) 252 | 253 | h = lstm_output.view(1, -1, x3.shape[2], x3.shape[3]) 254 | #x3 = self.one_conv5(x3) 255 | x3 = torch.cat((x3, h), dim = 1) 256 | x = self.up2( x, x3 ) 257 | 258 | h = lstm_output.view(1, -1, x2.shape[2], x2.shape[3]) 259 | #x2 = self.one_conv6(x2) 260 | x2 = torch.cat((x2, h), dim = 1) 261 | x = self.up3( x, x2 ) 262 | 263 | h = lstm_output.view(1, -1, x1.shape[2], x1.shape[3]) 264 | #x1 = self.one_conv7(x1) 265 | x1 = torch.cat((x1, h), dim = 1) 266 | x = self.up4( x, x1 ) 267 | x = F.relu(self.up5( x )) 268 | 269 | return x 270 | 271 | def free_memory(self): 272 | 273 | #self.convlstm.hidden_channels = self.convlstm.hidden_channels.detach() 274 | 275 | self.free_mem_counter = 0 276 | -------------------------------------------------------------------------------- /models/R_Unet_ver_M2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import os 8 | import gc 9 | from conv_lstm import ConvLSTM 10 | 11 | # Convolution unit 12 | class conv_unit(nn.Sequential): 13 | def __init__(self, ch_in, ch_out): 14 | super(conv_unit, self).__init__() 15 | self.layer1 = self.define_layer1( ch_in, ch_out ) 16 | #self.layer2 = self.define_layer2( ch_in, ch_out ) 17 | 18 | self.layer3 = self.define_layer1( ch_out, ch_out ) 19 | #self.layer4 = self.define_layer2( ch_out, ch_out ) 20 | 21 | self.lamda1 = 0 22 | self.lamda2 = 0 23 | 24 | def define_layer1(self, ch_in, ch_out): 25 | use_bias = True 26 | 27 | model = [] 28 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 29 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 30 | nn.ReLU(True) ] 31 | 32 | return nn.Sequential(*model) 33 | 34 | def define_layer2(self, ch_in, ch_out): 35 | use_bias = True 36 | 37 | model = [] 38 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 39 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 40 | nn.ReLU(True) ] 41 | 42 | return nn.Sequential(*model) 43 | 44 | def forward(self, x): 45 | x1 = self.layer1(x) 46 | output = self.layer3(x1) 47 | 48 | return output 49 | 50 | # Up convolution layer 51 | # input x and res_x 52 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 53 | class Up_Layer(nn.Sequential): 54 | def __init__(self, ch_in, ch_out): 55 | super(Up_Layer, self).__init__() 56 | #1st conv 57 | self.layer1 = self.define_layer1(ch_in, ch_out) 58 | #self.layer2 = self.define_layer2(ch_in, ch_out) 59 | #2nd conv 60 | self.layer3 = self.define_layer1(ch_out, ch_out) 61 | #self.layer4 = self.define_layer2(ch_out, ch_out) 62 | 63 | self.lamda1 = 0 64 | self.lamda2 = 0 65 | 66 | ''' 67 | self.lamda1 = np.random.rand() 68 | self.lamda2 = np.random.rand() 69 | ''' 70 | 71 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 72 | # add 0 padding on right and down to keep shape the same 73 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 74 | self.degradation = nn.Conv2d( ch_in, ch_out, kernel_size=2 ) 75 | 76 | def define_layer1(self, ch_in, ch_out): 77 | use_bias = True 78 | 79 | model = [] 80 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 81 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 82 | nn.ReLU(True)] 83 | 84 | return nn.Sequential(*model) 85 | 86 | def define_layer2(self, ch_in, ch_out): 87 | use_bias = True 88 | 89 | model = [] 90 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 91 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 92 | nn.ReLU(True)] 93 | 94 | return nn.Sequential(*model) 95 | 96 | def forward(self, x, resx): 97 | output = self.degradation( self.pad( self.upsample(x) ) ) 98 | output = torch.cat((output, resx), dim = 1) 99 | 100 | output = self.layer1(output) # 3conv 101 | 102 | output = self.layer3(output) 103 | 104 | return output 105 | 106 | class Up_Layer0(nn.Sequential): 107 | def __init__(self, ch_in, ch_out): 108 | super(Up_Layer0, self).__init__() 109 | #1st conv 110 | self.layer1 = self.define_layer1(ch_in, ch_out) 111 | #self.layer2 = self.define_layer2(ch_in, ch_out) 112 | #2nd conv 113 | self.layer3 = self.define_layer1(ch_out, ch_out) 114 | #self.layer4 = self.define_layer2(ch_out, ch_out) 115 | #3rd conv 116 | self.layer5 = self.define_layer1(ch_in, ch_out) 117 | #self.layer6 = self.define_layer2(ch_in, ch_out) 118 | #4th conv 119 | self.layer7 = self.define_layer1(ch_out, ch_out) 120 | #self.layer8 = self.define_layer2(ch_out, ch_out) 121 | 122 | self.lamda1 = 0 123 | self.lamda2 = 0 124 | self.lamda3 = 0 125 | self.lamda4 = 0 126 | ''' 127 | self.lamda1 = np.random.rand() 128 | self.lamda2 = np.random.rand() 129 | self.lamda3 = np.random.rand() 130 | self.lamda4 = np.random.rand() 131 | ''' 132 | 133 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 134 | # add 0 padding on right and down to keep shape the same 135 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 136 | self.degradation = nn.Conv2d( ch_out, ch_out, kernel_size=2 ) 137 | 138 | def define_layer1(self, ch_in, ch_out): 139 | use_bias = True 140 | 141 | model = [] 142 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 143 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 144 | nn.ReLU(True)] 145 | 146 | return nn.Sequential(*model) 147 | 148 | def define_layer2(self, ch_in, ch_out): 149 | use_bias = True 150 | 151 | model = [] 152 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 153 | 154 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 155 | nn.ReLU(True)] 156 | 157 | return nn.Sequential(*model) 158 | 159 | def forward(self, x, resx): 160 | output = self.layer1(x) # 3conv 161 | output = self.layer3(output) 162 | 163 | output = self.degradation( self.pad( self.upsample(output) ) ) 164 | output = torch.cat((output, resx), dim = 1) 165 | 166 | output = self.layer5(output) # 3conv 167 | 168 | output = self.layer7(output) 169 | 170 | return output 171 | 172 | 173 | class unet(nn.Module): 174 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256): 175 | print("gray scale:", Gary_Scale) 176 | super( unet, self ).__init__() 177 | 178 | self.size_index = size_index 179 | if size_index != 256: 180 | self.resize_fraction = window_size = 256/size_index 181 | else: 182 | self.resize_fraction = 1 183 | 184 | cuda_gpu = torch.cuda.is_available() 185 | device = torch.device('cuda:0' if cuda_gpu else 'cpu') 186 | 187 | #self.threshold = torch.autograd.Variable( torch.Tensor([1]) ).to(device) 188 | 189 | self.latent_feature = 0 190 | self.lstm_buf = [] 191 | self.step = step_ 192 | self.pred = predict_ 193 | self.free_mem_counter = 0 194 | self.max_pool = nn.MaxPool2d(2) 195 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 196 | 197 | self.softmax = torch.nn.Softmax() 198 | 199 | self.convlstm1 = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 200 | effective_step=[2]) 201 | 202 | self.convlstm2 = ConvLSTM(input_channels=384, hidden_channels=[384, 256, 128], kernel_size=3, step=3, 203 | effective_step=[2]) 204 | 205 | self.convlstm3 = ConvLSTM(input_channels=224, hidden_channels=[224, 128, 32], kernel_size=3, step=3, 206 | effective_step=[2]) 207 | 208 | self.convlstm4 = ConvLSTM(input_channels=120, hidden_channels=[120, 64, 8], kernel_size=3, step=3, 209 | effective_step=[2]) 210 | 211 | self.convlstm5 = ConvLSTM(input_channels=62, hidden_channels=[62, 32, 2], kernel_size=3, step=3, 212 | effective_step=[2]) 213 | 214 | if Gary_Scale == True: 215 | self.down1 = conv_unit( 2, 62) 216 | else: 217 | self.down1 = conv_unit( 3, 62 ) 218 | 219 | self.down2 = conv_unit(62, 120) 220 | self.down3 = conv_unit( 120, 224 ) 221 | self.down4 = conv_unit( 224, 384 ) 222 | self.down5 = conv_unit( 384, 512 ) 223 | 224 | self.up1 = Up_Layer0(1024, 512) 225 | self.up2 = Up_Layer(512, 256) 226 | self.up3 = Up_Layer(256, 128) 227 | self.up4 = Up_Layer(128, 64) 228 | 229 | if Gary_Scale == True: 230 | self.up5 = nn.Conv2d( 64, 2, kernel_size = 1 ) 231 | else: 232 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 233 | 234 | def forward(self, x, free_token, test_model = False): 235 | self.free_token = free_token 236 | if ( self.free_token == True ): 237 | self.free_memory() 238 | 239 | # pop oldest buffer 240 | if( len(self.lstm_buf) >= self.step): 241 | self.lstm_buf = self.lstm_buf[1:] 242 | 243 | # down convolution 244 | x1 = self.down1(x) 245 | x2 = self.max_pool(x1) 246 | 247 | x2 = self.down2(x2) 248 | x3 = self.max_pool(x2) 249 | 250 | x3 = self.down3(x3) 251 | x4 = self.max_pool(x3) 252 | 253 | x4 = self.down4(x4) 254 | x5 = self.max_pool(x4) 255 | 256 | x5 = self.down5(x5) 257 | 258 | latent_feature1 = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 259 | lstm_output1 = Variable(self.convlstm1(latent_feature1)[0]) 260 | 261 | lstm_output2 = Variable(self.convlstm2(x4)[0]) 262 | lstm_output3 = Variable(self.convlstm3(x3)[0]) 263 | lstm_output4 = Variable(self.convlstm4(x2)[0]) 264 | lstm_output5 = Variable(self.convlstm5(x1)[0]) 265 | 266 | 267 | x5 = torch.cat((x5, lstm_output1), dim = 1) 268 | #h = lstm_output.view(1, -1, x4.shape[2], x4.shape[3]) 269 | #x4 = self.one_conv4(x4) 270 | 271 | x4 = torch.cat((x4, lstm_output2), dim = 1) 272 | x = self.up1( x5, x4 ) 273 | 274 | #h = lstm_output.view(1, -1, x3.shape[2], x3.shape[3]) 275 | #x3 = self.one_conv5(x3) 276 | x3 = torch.cat((x3, lstm_output3), dim = 1) 277 | x = self.up2( x, x3 ) 278 | 279 | #h = lstm_output.view(1, -1, x2.shape[2], x2.shape[3]) 280 | #x2 = self.one_conv6(x2) 281 | x2 = torch.cat((x2, lstm_output4), dim = 1) 282 | x = self.up3( x, x2 ) 283 | 284 | #h = lstm_output.view(1, -1, x1.shape[2], x1.shape[3]) 285 | #x1 = self.one_conv7(x1) 286 | x1 = torch.cat((x1, lstm_output5), dim = 1) 287 | x = self.up4( x, x1 ) 288 | 289 | x = self.up5( x ) 290 | 291 | #x[0][1] = (x[0][1] > self.threshold).int() 292 | 293 | #x[0][1] = self.binary_th( x[0][1] ) 294 | 295 | x_sub1 = x[0][0].view( 1, 1, self.size_index, self.size_index ) 296 | x_sub2 = x[0][1].view( 1, 1, self.size_index, self.size_index ) 297 | #x_sub2 = torch.clamp( x_sub2, max = 1 ) 298 | 299 | x_sub1 = F.relu(x_sub1) 300 | x_sub2 = F.sigmoid(x_sub2) 301 | 302 | x = torch.cat( ( x_sub1, x_sub2 ), dim=1 ) 303 | 304 | return x 305 | 306 | def free_memory(self): 307 | 308 | #self.convlstm.hidden_channels = self.convlstm.hidden_channels.detach() 309 | 310 | self.free_mem_counter = 0 311 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import cv2 as cv 2 | import torch 3 | import torch.nn as nn 4 | import torch.optim as optim 5 | import numpy as np 6 | import csv 7 | import os 8 | import shutil 9 | import gc 10 | 11 | cuda_gpu = torch.cuda.is_available() 12 | 13 | ''' 14 | reshape a given image to square image 15 | 16 | input: cv img, gray scale: boolean, size index 17 | output: numpy array 18 | ''' 19 | def reshape(img, gray_scale_r=False, size_idx = 256): 20 | if not gray_scale_r: 21 | return np.reshape(img, (3, size_idx, size_idx)) 22 | else: 23 | return np.reshape(img, (1, size_idx, size_idx)) 24 | 25 | ''' 26 | given a path, get all child directory from the path 27 | return sorted list 28 | ''' 29 | def get_video_dir_list(video_path): 30 | cwd = os.getcwd() 31 | os.chdir(cwd + video_path[1:]) 32 | dir_list = next(os.walk('.'))[1] 33 | video_dir_list = [] 34 | for i in dir_list: 35 | i = video_path + str(i) + '/' 36 | video_dir_list.append(i) 37 | os.chdir(cwd) 38 | video_dir_list.sort() 39 | return video_dir_list 40 | 41 | ''' 42 | given a path, get all 'jpg' and 'png' image 43 | return sorted path 44 | ''' 45 | def get_file_path(video_path): 46 | frame_paths = [] 47 | for r, d, f in os.walk(video_path): 48 | for file in f: 49 | if ".jpg" or ".png" or ".pt" in file: 50 | filepath = video_path + file 51 | frame_paths.append(filepath) 52 | frame_paths.sort() 53 | return frame_paths 54 | 55 | ''' 56 | load pics in batch 57 | 58 | ''' 59 | # return step_size of 4 dimentional tensor, (5 dimentions in total) 60 | def frame_batch_loader(f_start_num, f_path, step_size, normalize = False, gray_scale = False, size_index = 256): 61 | 62 | for i in range(0, step_size+1): 63 | if (i == 0): 64 | tensor = read_single_pic( f_start_num, f_path, normalize, gray_scale, size_index ) 65 | else: 66 | next_tensor = read_single_pic( f_start_num + i, f_path, normalize, gray_scale, size_index ) 67 | tensor = torch.cat( (tensor, next_tensor), dim = 0 ) 68 | 69 | return tensor 70 | 71 | 72 | # return 5 dimentional torch tensor 73 | def read_single_pic(f_num, f_path, normalize = False, gray_scale = False, size_index = 256): 74 | if gray_scale: 75 | pic = cv.imread(f_path[f_num], cv.IMREAD_GRAYSCALE) 76 | else: 77 | pic = cv.imread(f_path[f_num]).transpose(2, 0, 1) 78 | 79 | if normalize: 80 | pic = pic_normalize(pic) 81 | 82 | pic = torch.tensor( cv.resize(pic, (size_index, size_index), interpolation=cv.INTER_CUBIC ), dtype=torch.float) 83 | 84 | if gray_scale: 85 | pic = pic.view(1, 1, 1, size_index, size_index) 86 | else: 87 | pic = pic.view(1, 1, 3, size_index, size_index) 88 | 89 | return(pic) 90 | 91 | ''' 92 | FOR TRAINING PREDICTION AND SEGMENTATION AT THE SAME TIME 93 | ''' 94 | 95 | ''' 96 | get f_num and f_num + 1 pytorch tensors 97 | for loading tensors, not for pic 98 | ''' 99 | def batch_loader( start_num, frame_paths, step_size ): 100 | 101 | for i in range( 0, step_size+1 ): 102 | if i == 0: 103 | tensor = torch.load( frame_paths[start_num] ) 104 | else: 105 | next_tensor = torch.load( frame_paths[start_num + i] ) 106 | tensor = torch.cat( ( tensor, next_tensor ), dim = 0 ) 107 | 108 | return tensor 109 | 110 | 111 | def data_loader(f_num, f_path, gray_scale = False, size_index = 256): 112 | 113 | test_tensor = torch.load(f_path[f_num]) 114 | target_tensor = torch.load(f_path[f_num+1]) 115 | 116 | return test_tensor, target_tensor 117 | 118 | def tensor_reshape(tensor, gray_scale = False, size_index = 256, imgflag = True): 119 | size_idx = size_index 120 | if cuda_gpu == True: 121 | tensor = tensor.clone().cpu() 122 | tensor = tensor.detach().numpy() 123 | 124 | if gray_scale == False: 125 | img = np.reshape(tensor, (size_idx, size_idx, 3)) 126 | else: 127 | img = np.reshape(tensor, (size_idx, size_idx)) 128 | 129 | # set to cv format for saving image 130 | if imgflag == True: 131 | img = np.asarray(img, dtype=np.uint8) 132 | else: 133 | img = np.asarray(img, dtype=float) 134 | 135 | return img 136 | 137 | def merge_image(tensor, size_index = 128, threshold = 0): 138 | img = tensor_reshape(tensor[0][0], True, size_index) 139 | mask = tensor[0][1].reshape( size_index, size_index ).clone().detach().cpu() 140 | 141 | mask = np.asarray( mask*255, dtype = np.uint8 ) 142 | 143 | mask_bol = (mask > int(threshold*255) )*1 144 | 145 | if threshold == 0: 146 | mask = mask*mask_bol 147 | else: 148 | mask = mask_bol*255 149 | 150 | img = cv.cvtColor(img, cv.COLOR_GRAY2BGR) 151 | 152 | B, G, R = cv.split(img) 153 | 154 | R = R + (255-R)*( mask/255 ) 155 | R = np.asarray(R, dtype = np.uint8) 156 | 157 | img = cv.merge( [B, G, R] ) 158 | 159 | return img 160 | 161 | 162 | ''' 163 | tensoe to image 164 | ''' 165 | def tensor_to_image(tensor, size_index = 128 ): 166 | img = tensor_reshape(tensor[0][0], True, size_index) 167 | return img 168 | 169 | ''' 170 | input a mask tensor and visualize mask only on white background 171 | 172 | ''' 173 | def mask_image(tensor, size_index = 128, threshold = 0): 174 | mask = tensor.reshape(size_index, size_index, 1).clone().detach().cpu() 175 | 176 | zero_mask1 = torch.zeros_like(mask) 177 | zero_mask2 = torch.zeros_like(mask) 178 | 179 | mask = torch.cat( (zero_mask1, mask), dim = 2 ) 180 | mask = torch.cat( (zero_mask2, mask), dim = 2 ).numpy() 181 | #print(mask.shape) 182 | 183 | img = np.ones( (size_index, size_index, 3), dtype=np.uint8 )*255 184 | 185 | img = img - 255*mask 186 | 187 | return img 188 | 189 | ''' 190 | put mask on gray background 191 | ''' 192 | def mask_image2(tensor, size_index = 128, threshold = 0): 193 | background_color = 100 194 | 195 | mask = np.asarray( tensor.reshape(size_index, size_index).clone().detach().cpu(), dtype = float ) 196 | 197 | mask_bol = (mask > threshold )*1.0 198 | 199 | if threshold == 0: 200 | mask = mask*mask_bol 201 | else: 202 | mask = mask_bol 203 | 204 | background = np.asarray(np.ones( (size_index, size_index), dtype=np.uint8 )*background_color, dtype = np.uint8) 205 | 206 | R = np.asarray( background + mask*(255 - background_color), dtype = np.uint8 ) 207 | 208 | img = cv.merge( [background, background, R] ) 209 | 210 | return img 211 | 212 | ''' 213 | picture tensor to picture 214 | ''' 215 | def tensor_to_pic(tensor, normalize = False, gray_scale = False, size_index = 256): 216 | size_idx = size_index 217 | if cuda_gpu == True: 218 | tensor = tensor.cpu() 219 | tensor = tensor.detach().numpy() 220 | if gray_scale == False: 221 | img = np.reshape(tensor, (size_idx, size_idx, 3)) 222 | else: 223 | img = np.reshape(tensor, (size_idx, size_idx)) 224 | 225 | if normalize == True: 226 | img = img*256 227 | 228 | # set to cv format for saving image 229 | img = np.asarray(img, dtype=np.uint8) 230 | 231 | return img 232 | 233 | def write_csv_file( filename, data): 234 | with open(filename, 'w', newline='') as csvfile: 235 | for i in range(0, len(data)): 236 | writer = csv.writer(csvfile) 237 | writer.writerow(data[i]) 238 | 239 | def buf_update( latent_feature, buf, step ): 240 | if len(buf) < step-1: 241 | buf.append( latent_feature ) 242 | return buf 243 | else: 244 | buf = buf[1:] 245 | buf.append( latent_feature ) 246 | return buf 247 | 248 | ''' 249 | get number after '@' sign 250 | ''' 251 | def get_epoch_num( string1 ): 252 | str_len = len(string1) 253 | num_flag = False 254 | num = '' 255 | 256 | for i in range( 0, str_len ): 257 | char = string1[i] 258 | if( char == '@' ): 259 | num_flag = True 260 | 261 | if( num_flag == True ): 262 | if char in ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '@']: 263 | num = num + char 264 | if i == str_len-1: 265 | return num 266 | else: 267 | return num 268 | 269 | def check_tensors(): 270 | for obj in gc.get_objects(): 271 | try: 272 | if torch.is_tensor(obj) or (hasattr(obj, 'data') and torch.is_tensor(obj.data)): 273 | print(type(obj), obj.size()) 274 | except: 275 | pass 276 | 277 | def str2bool(v): 278 | if v.lower() in ('yes', 'true', 'y', '1'): 279 | return True 280 | elif( v.lower() in ('no', 'false', 'n', '0') ): 281 | return False 282 | else: 283 | print('wrong') 284 | exit() 285 | 286 | 287 | def load_checkpoint(model, optimizer, filename='checkpoint.pth.tar'): 288 | # Note: Input model & optimizer should be pre-defined. This routine only updates their states. 289 | start_epoch = 0 290 | if os.path.isfile(filename): 291 | print("=> loading checkpoint '{}'".format(filename)) 292 | if cuda_gpu == False: 293 | checkpoint = torch.load(filename, map_location=torch.device('cpu')) 294 | else: 295 | checkpoint = torch.load(filename) 296 | 297 | start_epoch = checkpoint['epoch'] 298 | new_state_dict = {} 299 | for key, val in checkpoint['state_dict'].items(): 300 | key = key.replace('module.', '') 301 | new_state_dict[key] = val 302 | #model.load_state_dict(checkpoint['state_dict']) 303 | model.load_state_dict(new_state_dict) 304 | optimizer.load_state_dict(checkpoint['optimizer']) 305 | print("=> loaded checkpoint '{}' (epoch {})" 306 | .format(filename, checkpoint['epoch'])) 307 | else: 308 | print("=> no checkpoint found at '{}'".format(filename)) 309 | 310 | return model, optimizer, start_epoch 311 | 312 | ''' 313 | check if path exist, 314 | if exist delete and make a new one 315 | else make it 316 | ''' 317 | def refresh_dir(path): 318 | if ( os.path.isdir(path) ): 319 | print("remove", path) 320 | shutil.rmtree(path) 321 | print('make', path) 322 | os.mkdir(path) 323 | 324 | def network_loader(version, gray_scale_bol, size_idx, gpu_num): 325 | if version == 'v4' or version == 'V4': 326 | import R_Unet_ver_4 as net 327 | 328 | elif version == 'M' or version == 'M1' or version == 'm' or version == 'm1' : 329 | import R_Unet_ver_MB as net 330 | 331 | elif version == 'M2' or version == 'm2': 332 | import R_Unet_ver_M2 as net 333 | 334 | elif version == 'MS3' or version == 'M3' or version == 'ms3' or version == 'm3': 335 | import R_Unet_ver_MS3 as net 336 | 337 | elif version == 'v2_5' or version == 'V2_5': 338 | import R_Unet_ver_2_5 as net 339 | 340 | elif version == 'v2_7' or version == 'V2_7': 341 | import R_Unet_ver_2_7 as net 342 | 343 | elif version == 'v2' or version == 'V2': 344 | import R_Unet_ver_2 as net 345 | 346 | else: 347 | print("please specify correct version.") 348 | exit() 349 | 350 | network = net.unet(Gary_Scale = gray_scale_bol, size_index=size_idx, gpu_num=gpu_num) 351 | #network = torch.nn.DataParallel(net.unet(Gary_Scale = gray_scale_bol, size_index=size_idx, gpu_num=gpu_num)) 352 | 353 | return network 354 | 355 | def str2bool(v): 356 | if v.lower() in ('yes', 'true', 'y', '1'): 357 | return True 358 | elif( v.lower() in ('no', 'false', 'n', '0') ): 359 | return False 360 | else: 361 | raise argparse.ArguementTypeError('Wrong Value') 362 | -------------------------------------------------------------------------------- /models/R_Unet_ver_M.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import torch.optim as optim 5 | from torch.autograd import Variable 6 | import numpy as np 7 | import os 8 | import gc 9 | from conv_lstm import ConvLSTM 10 | 11 | # Convolution unit 12 | class conv_unit(nn.Sequential): 13 | def __init__(self, ch_in, ch_out): 14 | super(conv_unit, self).__init__() 15 | self.layer1 = self.define_layer1( ch_in, ch_out ) 16 | #self.layer2 = self.define_layer2( ch_in, ch_out ) 17 | 18 | self.layer3 = self.define_layer1( ch_out, ch_out ) 19 | #self.layer4 = self.define_layer2( ch_out, ch_out ) 20 | 21 | self.lamda1 = 0 22 | self.lamda2 = 0 23 | 24 | def define_layer1(self, ch_in, ch_out): 25 | use_bias = True 26 | 27 | model = [] 28 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 29 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 30 | nn.ReLU(True) ] 31 | 32 | return nn.Sequential(*model) 33 | 34 | def define_layer2(self, ch_in, ch_out): 35 | use_bias = True 36 | 37 | model = [] 38 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 39 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 40 | nn.ReLU(True) ] 41 | 42 | return nn.Sequential(*model) 43 | 44 | def forward(self, x): 45 | x1 = self.layer1(x) 46 | output = self.layer3(x1) 47 | 48 | return output 49 | 50 | # Up convolution layer 51 | # input x and res_x 52 | # upsamle(x) -> reduce_demention -> concatenate x and res_x -> up_conv_layer 53 | class Up_Layer(nn.Sequential): 54 | def __init__(self, ch_in, ch_out): 55 | super(Up_Layer, self).__init__() 56 | #1st conv 57 | self.layer1 = self.define_layer1(ch_in, ch_out) 58 | #self.layer2 = self.define_layer2(ch_in, ch_out) 59 | #2nd conv 60 | self.layer3 = self.define_layer1(ch_out, ch_out) 61 | #self.layer4 = self.define_layer2(ch_out, ch_out) 62 | 63 | self.lamda1 = 0 64 | self.lamda2 = 0 65 | 66 | ''' 67 | self.lamda1 = np.random.rand() 68 | self.lamda2 = np.random.rand() 69 | ''' 70 | 71 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 72 | # add 0 padding on right and down to keep shape the same 73 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 74 | self.degradation = nn.Conv2d( ch_in, ch_out, kernel_size=2 ) 75 | 76 | def define_layer1(self, ch_in, ch_out): 77 | use_bias = True 78 | 79 | model = [] 80 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 81 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 82 | nn.ReLU(True)] 83 | 84 | return nn.Sequential(*model) 85 | 86 | def define_layer2(self, ch_in, ch_out): 87 | use_bias = True 88 | 89 | model = [] 90 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 91 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 92 | nn.ReLU(True)] 93 | 94 | return nn.Sequential(*model) 95 | 96 | def forward(self, x, resx): 97 | output = self.degradation( self.pad( self.upsample(x) ) ) 98 | output = torch.cat((output, resx), dim = 1) 99 | 100 | output = self.layer1(output) # 3conv 101 | 102 | output = self.layer3(output) 103 | 104 | return output 105 | 106 | class Up_Layer0(nn.Sequential): 107 | def __init__(self, ch_in, ch_out): 108 | super(Up_Layer0, self).__init__() 109 | #1st conv 110 | self.layer1 = self.define_layer1(ch_in, ch_out) 111 | #self.layer2 = self.define_layer2(ch_in, ch_out) 112 | #2nd conv 113 | self.layer3 = self.define_layer1(ch_out, ch_out) 114 | #self.layer4 = self.define_layer2(ch_out, ch_out) 115 | #3rd conv 116 | self.layer5 = self.define_layer1(ch_in, ch_out) 117 | #self.layer6 = self.define_layer2(ch_in, ch_out) 118 | #4th conv 119 | self.layer7 = self.define_layer1(ch_out, ch_out) 120 | #self.layer8 = self.define_layer2(ch_out, ch_out) 121 | 122 | self.lamda1 = 0 123 | self.lamda2 = 0 124 | self.lamda3 = 0 125 | self.lamda4 = 0 126 | ''' 127 | self.lamda1 = np.random.rand() 128 | self.lamda2 = np.random.rand() 129 | self.lamda3 = np.random.rand() 130 | self.lamda4 = np.random.rand() 131 | ''' 132 | 133 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 134 | # add 0 padding on right and down to keep shape the same 135 | self.pad = nn.ConstantPad2d( (0, 1, 0, 1), 0 ) 136 | self.degradation = nn.Conv2d( ch_out, ch_out, kernel_size=2 ) 137 | 138 | def define_layer1(self, ch_in, ch_out): 139 | use_bias = True 140 | 141 | model = [] 142 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(3, 1), padding=1, bias=use_bias), 143 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 3), bias=use_bias), 144 | nn.ReLU(True)] 145 | 146 | return nn.Sequential(*model) 147 | 148 | def define_layer2(self, ch_in, ch_out): 149 | use_bias = True 150 | 151 | model = [] 152 | model += [ nn.Conv2d( ch_in, ch_out, kernel_size=(5, 1), padding=2, bias=use_bias), 153 | 154 | nn.Conv2d( ch_out, ch_out, kernel_size=(1, 5), bias=use_bias), 155 | nn.ReLU(True)] 156 | 157 | return nn.Sequential(*model) 158 | 159 | def forward(self, x, resx): 160 | output = self.layer1(x) # 3conv 161 | output = self.layer3(output) 162 | 163 | output = self.degradation( self.pad( self.upsample(output) ) ) 164 | output = torch.cat((output, resx), dim = 1) 165 | 166 | output = self.layer5(output) # 3conv 167 | 168 | output = self.layer7(output) 169 | 170 | return output 171 | 172 | 173 | 174 | class biased_relu(nn.Sequential): 175 | def __init__(self, start_value, end_value): 176 | super(biased_relu, self).__init__() 177 | self.start_value = start_value 178 | self.end_value = end_value 179 | 180 | def forward(self, x): 181 | x = torch.clamp(x, min = self.start_value, max = self.end_value) 182 | x = (x - self.start_value)/( self.end_value - self.start_value ) 183 | 184 | return x 185 | 186 | class unet(nn.Module): 187 | def __init__(self, tot_frame_num = 100, step_ = 6, predict_ = 3 ,Gary_Scale = False, size_index = 256): 188 | print("gray scale:", Gary_Scale) 189 | super( unet, self ).__init__() 190 | 191 | # for mask3 192 | self.biased_relu = biased_relu(-4, 4) 193 | 194 | self.size_index = size_index 195 | if size_index != 256: 196 | self.resize_fraction = window_size = 256/size_index 197 | else: 198 | self.resize_fraction = 1 199 | 200 | cuda_gpu = torch.cuda.is_available() 201 | device = torch.device('cuda:0' if cuda_gpu else 'cpu') 202 | 203 | #self.threshold = torch.autograd.Variable( torch.Tensor([0.5]) ).to(device) 204 | 205 | self.latent_feature = 0 206 | self.lstm_buf = [] 207 | self.step = step_ 208 | self.pred = predict_ 209 | self.free_mem_counter = 0 210 | self.max_pool = nn.MaxPool2d(2) 211 | self.upsample = nn.UpsamplingBilinear2d(scale_factor=2) 212 | 213 | self.softmax = torch.nn.Softmax() 214 | 215 | self.convlstm1 = ConvLSTM(input_channels=512, hidden_channels=[512, 512, 512], kernel_size=3, step=3, 216 | effective_step=[2]) 217 | 218 | self.convlstm2 = ConvLSTM(input_channels=384, hidden_channels=[384, 256, 128], kernel_size=3, step=3, 219 | effective_step=[2]) 220 | 221 | self.convlstm3 = ConvLSTM(input_channels=224, hidden_channels=[224, 128, 32], kernel_size=3, step=3, 222 | effective_step=[2]) 223 | 224 | self.convlstm4 = ConvLSTM(input_channels=120, hidden_channels=[120, 64, 8], kernel_size=3, step=3, 225 | effective_step=[2]) 226 | 227 | self.convlstm5 = ConvLSTM(input_channels=62, hidden_channels=[62, 32, 2], kernel_size=3, step=3, 228 | effective_step=[2]) 229 | 230 | if Gary_Scale == True: 231 | self.down1 = conv_unit( 2, 62) 232 | else: 233 | self.down1 = conv_unit( 3, 62 ) 234 | 235 | self.down2 = conv_unit(62, 120) 236 | self.down3 = conv_unit( 120, 224 ) 237 | self.down4 = conv_unit( 224, 384 ) 238 | self.down5 = conv_unit( 384, 512 ) 239 | 240 | self.up1 = Up_Layer0(1024, 512) 241 | self.up2 = Up_Layer(512, 256) 242 | self.up3 = Up_Layer(256, 128) 243 | self.up4 = Up_Layer(128, 64) 244 | 245 | if Gary_Scale == True: 246 | self.up5 = nn.Conv2d( 64, 2, kernel_size = 1 ) 247 | else: 248 | self.up5 = nn.Conv2d( 64, 3, kernel_size = 1 ) 249 | 250 | def forward(self, x, free_token, test_model = False): 251 | self.free_token = free_token 252 | if ( self.free_token == True ): 253 | self.free_memory() 254 | 255 | # pop oldest buffer 256 | if( len(self.lstm_buf) >= self.step): 257 | self.lstm_buf = self.lstm_buf[1:] 258 | 259 | # down convolution 260 | x1 = self.down1(x) 261 | x2 = self.max_pool(x1) 262 | 263 | x2 = self.down2(x2) 264 | x3 = self.max_pool(x2) 265 | 266 | x3 = self.down3(x3) 267 | x4 = self.max_pool(x3) 268 | 269 | x4 = self.down4(x4) 270 | x5 = self.max_pool(x4) 271 | 272 | x5 = self.down5(x5) 273 | 274 | latent_feature1 = x5.view(1, -1, int(16/self.resize_fraction), int(16/self.resize_fraction) ) 275 | lstm_output1 = Variable(self.convlstm1(latent_feature1)[0]) 276 | 277 | lstm_output2 = Variable(self.convlstm2(x4)[0]) 278 | lstm_output3 = Variable(self.convlstm3(x3)[0]) 279 | lstm_output4 = Variable(self.convlstm4(x2)[0]) 280 | lstm_output5 = Variable(self.convlstm5(x1)[0]) 281 | 282 | 283 | x5 = torch.cat((x5, lstm_output1), dim = 1) 284 | #h = lstm_output.view(1, -1, x4.shape[2], x4.shape[3]) 285 | #x4 = self.one_conv4(x4) 286 | 287 | x4 = torch.cat((x4, lstm_output2), dim = 1) 288 | x = self.up1( x5, x4 ) 289 | 290 | #h = lstm_output.view(1, -1, x3.shape[2], x3.shape[3]) 291 | #x3 = self.one_conv5(x3) 292 | x3 = torch.cat((x3, lstm_output3), dim = 1) 293 | x = self.up2( x, x3 ) 294 | 295 | #h = lstm_output.view(1, -1, x2.shape[2], x2.shape[3]) 296 | #x2 = self.one_conv6(x2) 297 | x2 = torch.cat((x2, lstm_output4), dim = 1) 298 | x = self.up3( x, x2 ) 299 | 300 | #h = lstm_output.view(1, -1, x1.shape[2], x1.shape[3]) 301 | #x1 = self.one_conv7(x1) 302 | x1 = torch.cat((x1, lstm_output5), dim = 1) 303 | x = self.up4( x, x1 ) 304 | 305 | x = self.up5( x ) 306 | 307 | #mask1 use this 308 | #x = F.relu(x) 309 | #x[0][1] = (x[0][1] > self.threshold).int() 310 | 311 | #x[0][1] = self.binary_th( x[0][1] ) 312 | 313 | x_sub1 = x[0][0].view( 1, 1, self.size_index, self.size_index ) 314 | x_sub2 = x[0][1].view( 1, 1, self.size_index, self.size_index ) 315 | 316 | #for mask2 317 | #x_sub1 = F.relu(x_sub1) 318 | #x_sub2 = F.relu(torch.clamp( x_sub2, min = 0, max = 1 )) 319 | 320 | #for mask3 321 | x_sub1 = F.relu(x_sub1) 322 | x_sub2 = self.biased_relu(x_sub2) 323 | 324 | #for sigmoid 1 325 | #x_sub1 = F.relu(x_sub1) 326 | #x_sub2 = F.relu(F.sigmoid(x_sub2)) 327 | 328 | x = torch.cat( ( x_sub1, x_sub2 ), dim=1 ) 329 | 330 | return x 331 | ''' 332 | def binary_th(self, x): 333 | a = x.clone() 334 | x = (x > self.threshold) 335 | x = (a * x) 336 | 337 | return x 338 | ''' 339 | def free_memory(self): 340 | 341 | #self.convlstm.hidden_channels = self.convlstm.hidden_channels.detach() 342 | 343 | self.free_mem_counter = 0 344 | -------------------------------------------------------------------------------- /train_v2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.optim as optim 4 | from torch.autograd import Variable 5 | import numpy as np 6 | import parse_argument 7 | from utils import * 8 | from tensorboardX import SummaryWriter 9 | import os 10 | import csv 11 | import datetime 12 | import time 13 | import psutil 14 | import gc 15 | #from torchviz import make_dot, make_dot_from_trace 16 | # possible size_index: 2^n, n >= 4, n is int 17 | 18 | # set arguements 19 | ''' 20 | video_path: directory of video frames 21 | learn_rate: learning rate 22 | gray_scale_bol: boolean, True for use gray scale image, False for use color image 23 | version: runet version to use 24 | output_path: directory for output image, model and tensorboard summary 25 | epoch_num: number of epochs to train 26 | size_idx: resize image to size_idx x size_idx square image 27 | loss_function: loss funtion. L1 loss as default 28 | skip_frame: sample rate, to reduce compactness 29 | predict_frame_num: number of prections 30 | Load: True: load model from checkpoint; False: start from zero 31 | load_model_name: checkpoint name to load and train 32 | ''' 33 | 34 | args = parse_argument.argrements() 35 | video_path, learn_rate, step, gray_scale_bol, version = args.videopath, float(args.lr), int(args.step), bool(args.gray_scale), args.version 36 | output_path = args.output_path 37 | epoch_num = int(args.epoch_num) 38 | size_idx = int(args.sz_idx) 39 | loss_function = str(args.loss_func) 40 | #input_frame = int( args.input_frame ) 41 | skip_frame = int(args.skip_frame) 42 | predict_frame_num = int(args.predict_frame) 43 | 44 | assert (os.path.isdir( output_path )) # check output path exist 45 | 46 | # if gpu exist, use cuda 47 | gpu_num = args.gpu 48 | device = torch.device('cuda:'+str(gpu_num) if cuda_gpu else 'cpu') 49 | 50 | # load network 51 | network = network_loader(version, gray_scale_bol, size_idx, gpu_num) 52 | network = network.to(device) 53 | 54 | # set optimizer and Loss fuction 55 | optimizer = optim.Adam( network.parameters(), lr = learn_rate ) 56 | writer = SummaryWriter(output_path + 'Summary_writer') 57 | 58 | # load model from check point or start from epoch 0 59 | Load = args.load 60 | if Load == True: 61 | load_model_name = args.load_model_name 62 | if os.path.isfile( load_model_name ): 63 | network, optimizer, start_epoch = load_checkpoint( network, optimizer, load_model_name ) 64 | else: 65 | print("checkpoint do not exist, set Load to False or specify correct checkpoint path") 66 | exit() 67 | else: 68 | start_epoch = 0 69 | 70 | 71 | # save image while training or not 72 | save_img = True 73 | 74 | # get lists of frame paths 75 | all_video_dir_list = get_video_dir_list(video_path) 76 | 77 | video_dir_list = all_video_dir_list[0:160] 78 | val_dir_list = all_video_dir_list[160:180] 79 | 80 | # ste gpu, set data, check gpu, define network, 81 | gpus = [0] 82 | start_date = str(datetime.datetime.now())[0:10] 83 | cuda_gpu = torch.cuda.is_available() 84 | 85 | # Garbage Collector 86 | gc.enable() 87 | 88 | if loss_function == 'MSE' or loss_function == 'mse': 89 | critiria = nn.MSELoss() 90 | else: 91 | critiria = nn.SmoothL1Loss() 92 | 93 | loss_list = [] ## records loss through each step in training 94 | train_video_num = len(video_dir_list) # batch size = number of avaliable video, and later 95 | 96 | # print training info 97 | pytorch_total_params = sum(p.numel() for p in network.parameters()) 98 | print("==========================") 99 | print("model version:", version) 100 | print("training/validation video path", video_path) 101 | print("number of parameters:", pytorch_total_params) 102 | print("leaening rate:", learn_rate) 103 | print("frame size:", size_idx, 'x', size_idx) 104 | print("input", step, "frames") 105 | print("predict", predict_frame_num, "frames") 106 | print("sample every ", skip_frame, "frame(s)") 107 | print("number of total epochs", (start_epoch + epoch_num) ) 108 | print("output path", output_path) 109 | print("optimizer", optimizer) 110 | print("val == True") 111 | print("==========================") 112 | 113 | input("press enter to continue\n\n") 114 | 115 | print("==========================") 116 | 117 | for epochs in range(start_epoch, start_epoch + epoch_num): 118 | # randomly choose 16 videos from video pool as training video for this epoch 119 | train_seq = np.random.permutation(train_video_num)[:16] # random train sequence 120 | train_video_size = len(train_seq) 121 | print('epoch_num', epochs, '/', start_epoch + epoch_num) 122 | 123 | # run validation every 50 epoches 124 | if( (epochs) % 50 == 0 ): 125 | validation = True 126 | else: 127 | validation = False 128 | 129 | for batch in range(0, train_video_size): 130 | frame_paths = get_file_path(video_dir_list[ train_seq[batch] ]) 131 | new_frame_paths = [ frame_paths[i] for i in range(0, len(frame_paths), skip_frame ) ] 132 | # for number 10th predictions, we only need to do 9 times 133 | # eg. input t = 0, output t = 1 134 | # input t = 1, output t = 2 135 | # . 136 | # . 137 | # . 138 | # input t = 8, output t = 10 139 | # so step_size = step + predict_frame_num - 1 140 | # step_size: step per batch 141 | # step: input ground truth frame number 142 | # predict_frame_num: frame munber for prediction 143 | step_size = step + predict_frame_num - 1 144 | #print ('current batch:', video_dir_list[ train_seq[batch] ] ) 145 | 146 | avalible_len = len(new_frame_paths) 147 | start_frame = np.random.randint(0, avalible_len - step_size - 2 ) # random start point 148 | 149 | # ensure there remains enough frame for training after random start point is set 150 | if avalible_len - start_frame < step_size: 151 | print( 'not enough image ' ) 152 | pass 153 | else: 154 | # to enrich uncertainty, there are 50% chance that it need to perform 1 extra prediction 155 | if( np.random.rand() > 0.5 ): 156 | exception = True 157 | step_size = step_size + 1 158 | else: 159 | exception = False 160 | 161 | # load every images needed in this batch 162 | image_tensors = frame_batch_loader(start_frame, new_frame_paths, step_size, gray_scale = gray_scale_bol, size_index = size_idx).to(device) 163 | 164 | for steps in range(0, step_size): 165 | 166 | # init_lstm_token is a token to tell model to initiallize lstm internal state or not, we should initilize internal state at start of each batch. 167 | if (steps == 0): 168 | init_lstm_token = True 169 | else: 170 | init_lstm_token = False 171 | 172 | #print("epoch", epochs, "steps", steps) 173 | # Clear the gradients, since PyTorch accumulates them 174 | start_time = time.time() 175 | optimizer.zero_grad() 176 | 177 | # load picture; step = pic num 178 | # test: input image 179 | # target: next image, prediction target image 180 | test, target = image_tensors[steps], image_tensors[steps+1] 181 | 182 | # FORWARD INPUT: 183 | # groundtruth image as input if steps < step 184 | # else: take previous output as input 185 | # eg. 5 GT input, 5 predictions ( step = 5 and predict = 5 ) 186 | # steps = 0: input groundtruth frame 0 -> output predict 1 187 | # steps = 1: input groundtruth frame 1 -> output predict 2 188 | # steps = 2: input groundtruth frame 2 -> output predict 3 189 | # steps = 3: input groundtruth frame 3 -> output predict 4 190 | # steps = 4: input groundtruth frame 4 -> output predict 5 191 | # -------------------------------------------------------------- steps < step 192 | # steps = 5: input predicted frame 5 -> output predict 6 193 | # steps = 6: input predicted frame 6 -> output predict 7 194 | # steps = 7: input predicted frame 7 -> output predict 8 195 | # steps = 8: input predicted frame 8 -> output predict 9 196 | 197 | if steps < step: 198 | output = network.forward(test, init_lstm_token) 199 | #if ( steps == step - 1 ): 200 | #print('doing first prediction') 201 | else: 202 | #print('doing prediction') 203 | output = network.forward(previous_output, init_lstm_token) 204 | 205 | previous_output = output 206 | 207 | # Calculate loss 208 | # loss = critiria( Variable(output.long()), Variable(target.long())) 209 | loss = critiria( output, target) 210 | 211 | # record loss in to csv 212 | loss_value = float( loss.item() ) 213 | string = 'epoch_' + str(epochs) + '_batch_' + str(batch) + '_step_' + str(steps) 214 | loss_list.append( [ string, loss_value ]) 215 | 216 | # write training loss to tensorboard 217 | writer.add_scalar("train loss", loss.item(), epochs) 218 | 219 | # save img every 50 epochs ( 800 iterations ) 220 | if save_img == True or float(loss_value) > 400: 221 | if ( (epochs + 1) % 50 == 0) or ( epochs == 0 ) or ( (epochs+1) == ( start_epoch + epoch_num) ): 222 | if steps % 1 == 0: 223 | output_img = tensor_to_pic(output, normalize=False, gray_scale=gray_scale_bol, size_index = size_idx) 224 | output_img_name = output_path + str(start_date) + '_E' + str(epochs) + '_B'+ str(batch).zfill(2) + '_S'+ str(steps).zfill(2) +'_output.jpg' 225 | cv.imwrite(str(output_img_name), output_img) 226 | 227 | # Backward propagation 228 | loss.backward(retain_graph = True) 229 | 230 | # Update the gradients 231 | optimizer.step() 232 | 233 | # speed counter 234 | end_time = time.time() 235 | elapse_time = round((end_time - start_time), 2) 236 | 237 | # print memory used 238 | process = psutil.Process(os.getpid()) 239 | 240 | if( epochs % 20 == 0 ): 241 | print('epoch', epochs, 'batch', batch, 'step', steps, "loss:", loss, 'time used', elapse_time, 'sec') 242 | print('used memory', round((int(process.memory_info().rss)/(1024*1024)), 2), 'MB' ) 243 | print("-------------------------------------") 244 | 245 | gc.collect() 246 | 247 | if cuda_gpu: 248 | torch.cuda.empty_cache() 249 | 250 | # return origin step size if extra step is performed 251 | if exception == True: 252 | step_size = step_size - 1 253 | exception = False 254 | # releae memory 255 | if cuda_gpu: 256 | torch.cuda.empty_cache() 257 | 258 | # log loss after each epoch 259 | write_csv_file( output_path + start_date +'_loss_record.csv', loss_list ) 260 | 261 | # do validation (every 50 eopoch as default) 262 | if validation == True: 263 | print("==== validation ====\n\n") 264 | 265 | for batch in range(0, len(val_dir_list)): 266 | frame_paths = get_file_path( val_dir_list[batch] ) 267 | new_frame_paths = [ frame_paths[i] for i in range(0, len(frame_paths), skip_frame ) ] 268 | val_start = time.time() 269 | print(' ----batch{}---- '.format(batch)) 270 | 271 | start_frame = 3 272 | image_tesnors = frame_batch_loader(start_frame, new_frame_paths, step_size, gray_scale = gray_scale_bol, size_index = size_idx).to(device) 273 | 274 | for steps in range(0, step_size): 275 | test, target = image_tensors[steps], image_tensors[steps+1] 276 | 277 | if steps == 0: 278 | init_lstm_token = True 279 | else: 280 | init_lstm_token = False 281 | 282 | if steps < step: 283 | output = network.forward(test, init_lstm_token) 284 | else: 285 | output = network.forward(previous_output, init_lstm_token) 286 | 287 | previous_output = output 288 | loss = critiria( output, target) 289 | print('val: batch', batch, 'step', steps, "loss:", loss, '\n') 290 | # write validation loss to tensorboard 291 | writer.add_scalar("val loss", loss.item(), epochs) 292 | writer.flush() 293 | 294 | val_end = time.time() 295 | print('time used:', round(( val_end - val_start ),2)) 296 | 297 | # save model every 500 epochs 298 | if ( ( ( (epochs+1) % 500 ) == 0 ) or ((epochs+1) == ( start_epoch + epoch_num)) or ( (epochs+1) == 1 ) ): 299 | path = output_path + start_date + 'epoch_' + str(epochs) +'_R_'+ str(step) + '_P_' + str(predict_frame_num) + '_size_idx_' + str(size_idx) + '_R_Unet.pt' 300 | state = { 'epoch': epochs+1, 'state_dict': network.state_dict(), 'optimizer':optimizer.state_dict() } 301 | 302 | torch.save( state, path) 303 | print('save model to:', path) 304 | 305 | if cuda_gpu: 306 | torch.cuda.empty_cache() 307 | --------------------------------------------------------------------------------