├── README.md ├── compile.sh ├── dataloader ├── DrivingStereoLoader.py ├── EXRloader.py ├── GANet │ ├── __init__.py │ ├── data.py │ └── dataset.py ├── KITTILoader.py ├── KITTI_submission_loader.py ├── KITTI_submission_loader2012.py ├── KITTIloader2012.py ├── KITTIloader2015.py ├── SceneFlowLoader.py ├── StereoLoader.py └── __init__.py ├── dltrainer.py ├── exp_configs ├── esnet_DrivingStereo.conf └── esnet_sceneflow.conf ├── finetune.sh ├── kitti_finetune.py ├── kitti_submission.py ├── layers_package ├── __init__.py ├── channelnorm_package │ ├── __init__.py │ ├── channelnorm.py │ ├── channelnorm_cuda.cc │ ├── channelnorm_cuda.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ └── top_level.txt │ ├── channelnorm_kernel.cu │ ├── channelnorm_kernel.cuh │ └── setup.py ├── correlation-pytorch-master │ ├── correlation-pytorch │ │ ├── __init__.py │ │ ├── build.py │ │ ├── correlation_package │ │ │ ├── __init__.py │ │ │ ├── _ext │ │ │ │ ├── __init__.py │ │ │ │ └── corr │ │ │ │ │ └── __init__.py │ │ │ ├── functions │ │ │ │ ├── __init__.py │ │ │ │ └── corr.py │ │ │ ├── modules │ │ │ │ ├── __init__.py │ │ │ │ └── corr.py │ │ │ └── src │ │ │ │ ├── corr.c │ │ │ │ ├── corr.h │ │ │ │ ├── corr1d.c │ │ │ │ ├── corr1d.h │ │ │ │ ├── corr1d_cuda.c │ │ │ │ ├── corr1d_cuda.h │ │ │ │ ├── corr1d_cuda_kernel.cu │ │ │ │ ├── corr1d_cuda_kernel.h │ │ │ │ ├── corr_cuda.c │ │ │ │ ├── corr_cuda.h │ │ │ │ ├── corr_cuda_kernel.cu │ │ │ │ └── corr_cuda_kernel.h │ │ ├── setup.py │ │ └── test │ │ │ ├── .ipynb_checkpoints │ │ │ └── test-checkpoint.ipynb │ │ │ ├── test.ipynb │ │ │ └── test.py │ ├── make_cuda.sh │ └── readme.MD ├── correlation_package │ ├── __init__.py │ ├── correlation.py │ ├── correlation_cuda.cc │ ├── correlation_cuda.egg-info │ │ ├── PKG-INFO │ │ ├── SOURCES.txt │ │ ├── dependency_links.txt │ │ └── top_level.txt │ ├── correlation_cuda_kernel.cu │ ├── correlation_cuda_kernel.cuh │ └── setup.py ├── install.sh └── resample2d_package │ ├── __init__.py │ ├── resample2d.py │ ├── resample2d_cuda.cc │ ├── resample2d_cuda.egg-info │ ├── PKG-INFO │ ├── SOURCES.txt │ ├── dependency_links.txt │ └── top_level.txt │ ├── resample2d_kernel.cu │ ├── resample2d_kernel.cuh │ └── setup.py ├── libs ├── GANet │ ├── __init__.py │ ├── functions │ │ ├── GANet.py │ │ └── __init__.py │ ├── modules │ │ ├── GANet.py │ │ └── __init__.py │ ├── setup.py │ └── src │ │ ├── GANet_cuda.cpp │ │ ├── GANet_cuda.h │ │ ├── GANet_kernel.cu │ │ ├── GANet_kernel.h │ │ └── GANet_kernel0.cu ├── __init__.py └── sync_bn │ ├── __init__.py │ ├── build │ ├── __init__.py │ └── lib │ │ └── __init__.py │ ├── functions │ ├── __init__.py │ └── sync_bn.py │ ├── modules │ ├── __init__.py │ └── sync_bn.py │ ├── setup.py │ └── src │ ├── __init__.py │ ├── cpu │ ├── operator.cpp │ ├── operator.h │ └── sync_bn.cpp │ └── gpu │ ├── common.h │ ├── device_tensor.h │ ├── operator.cpp │ ├── operator.h │ └── sync_bn_cuda.cu ├── lists ├── DrivingStereo_train.list ├── DrivingStereo_val.list ├── FlyingThings3D_release_TEST.list ├── FlyingThings3D_release_TRAIN.list ├── KITTI_TEST.list ├── KITTI_TRAIN.list ├── SceneFlow.list ├── all_unused_files.txt ├── driving_release.list ├── kitti15_train.txt ├── kitti15_val.txt └── monkaa_release.list ├── loss_configs ├── DrivingStereo.json ├── kitti.json └── sceneflow.json ├── losses ├── __init__.py └── multiscaleloss.py ├── main.py ├── net_builder.py ├── networks ├── DispNetC.py ├── DispNetRes.py ├── DispNetS.py ├── ESNet.py ├── ESNet_M.py ├── FADNet.py ├── GANet_deep.py ├── Refinement_modules.py ├── __init__.py ├── basic.py ├── deform.py ├── deform_conv │ ├── __init__.py │ ├── build.sh │ ├── deform_conv.py │ ├── setup.py │ └── src │ │ ├── deform_conv_cuda.cpp │ │ └── deform_conv_cuda_kernel.cu ├── domain_classifier.py ├── resnet_modules.py ├── stackhourglass.py └── submodules.py ├── submission.sh ├── train.sh └── utils ├── AverageMeter.py ├── __init__.py ├── common.py ├── preprocess.py └── readpfm.py /README.md: -------------------------------------------------------------------------------- 1 | ## ESNet: An Efficient Stereo Matching Network 2 | 3 | Code in PyTorch for paper "ES-Net: An Efficient Stereo Matching Network" submitted to IROS 2021 [[Paper Link]](https://arxiv.org/abs/2103.03922). 4 | 5 | ## Dependency 6 | Python 3.6 7 | 8 | PyTorch(1.2.0+) 9 | 10 | torchvision 0.2.0 11 | 12 | ## Usage 13 | "networks/ESNet.py" and "networks/ESNet_M.py" contains the implementation of the proposed efficient stereo matching network. 14 | 15 | To train the ESNet on SceneFlow dataset, you will need to modify the path to the dataset in the "exp_configs/esnet_sceneflow.conf". Then run 16 | ``` 17 | dnn=esnet_sceneflow bash train.sh 18 | ``` 19 | 20 | ## Citation 21 | If you find the code and paper is useful in your work, please cite 22 | ``` 23 | @misc{huang2021esnet, 24 | title={ES-Net: An Efficient Stereo Matching Network}, 25 | author={Zhengyu Huang and Theodore B. Norris and Panqu Wang}, 26 | year={2021}, 27 | eprint={2103.03922}, 28 | archivePrefix={arXiv}, 29 | primaryClass={cs.CV} 30 | } 31 | -------------------------------------------------------------------------------- /compile.sh: -------------------------------------------------------------------------------- 1 | #export LD_LIBRARY_PATH="/home/feihu/anaconda3/lib:$LD_LIBRARY_PATH" 2 | #export LD_INCLUDE_PATH="/home/feihu/anaconda3/include:$LD_INCLUDE_PATH" 3 | #export CUDA_HOME="/usr/local/cuda-10.0" 4 | #export PATH="/home/feihu/anaconda3/bin:/usr/local/cuda-10.0/bin:$PATH" 5 | #export CPATH="/usr/local/cuda-10.0/include" 6 | #export CUDNN_INCLUDE_DIR="/usr/local/cuda-10.0/include" 7 | #export CUDNN_LIB_DIR="/usr/local/cuda-10.0/lib64" 8 | 9 | #export LD_LIBRARY_PATH="/home/zhangfeihu/anaconda3/lib:$LD_LIBRARY_PATH" 10 | #export LD_INCLUDE_PATH="/home/zhangfeihu/anaconda3/include:$LD_INCLUDE_PATH" 11 | #export CUDA_HOME="/home/work/cuda-9.2" 12 | #export PATH="/home/zhangfeihu/anaconda3/bin:/home/work/cuda-9.2/bin:$PATH" 13 | #export CPATH="/home/work/cuda-9.2/include" 14 | #export CUDNN_INCLUDE_DIR="/home/work/cudnn/cudnn_v7/include" 15 | #export CUDNN_LIB_DIR="/home/work/cudnn/cudnn_v7/lib64" 16 | TORCH=$(python -c "import os; import torch; print(os.path.dirname(torch.__file__))") 17 | #echo $TORCH 18 | cd libs/GANet 19 | python setup.py clean 20 | rm -rf build 21 | python setup.py build 22 | cp -r build/lib* build/lib 23 | 24 | cd ../sync_bn 25 | python setup.py clean 26 | rm -rf build 27 | python setup.py build 28 | cp -r build/lib* build/lib 29 | 30 | cd ../.. 31 | echo "" > libs/GANet/build/__init__.py 32 | echo "" > libs/GANet/build/lib/__init__.py 33 | echo "" > libs/sync_bn/build/__init__.py 34 | echo "" > libs/sync_bn/build/lib/__init__.py 35 | 36 | -------------------------------------------------------------------------------- /dataloader/DrivingStereoLoader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | import pandas as pd 5 | from skimage import io, transform 6 | import numpy as np 7 | from torch.utils.data import Dataset, DataLoader 8 | from PIL import Image, ImageOps 9 | from utils.preprocess import * 10 | from torchvision import transforms 11 | import time 12 | from dataloader.EXRloader import load_exr 13 | 14 | class DrivingStereoDataset(Dataset): 15 | 16 | def __init__(self, txt_file, root_dir, phase='train', load_disp=True, scale_size=(448, 896), same_LR_aug = False): 17 | """ 18 | Args: 19 | txt_file [string]: Path to the image list 20 | transform (callable, optional): Optional transform to be applied on a sample 21 | scale_size: Affect if phase=test/detect, used to scale spatially the input image to a scale divisible by network downsampling rate. 22 | self.img_size: Affect for phase=test/detect case, image size of the input image, used to scale back spatially the output disparity map (scaled by the scale_size parameter above) to the self.img_size. 23 | """ 24 | with open(txt_file, "r") as f: 25 | self.imgPairs = f.readlines() 26 | 27 | self.root_dir = root_dir 28 | self.phase = phase 29 | self.load_disp = load_disp 30 | self.scale_size = scale_size 31 | self.img_size = (400, 881) 32 | self.same_LR_aug = same_LR_aug 33 | def get_img_size(self): 34 | return self.img_size 35 | 36 | def get_scale_size(self): 37 | return self.scale_size 38 | 39 | def __len__(self): 40 | return len(self.imgPairs) 41 | 42 | def __getitem__(self, idx): 43 | 44 | img_names = self.imgPairs[idx].rstrip().split() 45 | 46 | img_left_name = os.path.join(self.root_dir, img_names[0]) 47 | img_right_name = os.path.join(self.root_dir, img_names[1]) 48 | if self.load_disp: 49 | gt_disp_name = os.path.join(self.root_dir, img_names[2]) 50 | 51 | def load_rgb(filename): 52 | 53 | img = None 54 | img = io.imread(filename) 55 | h, w, c = img.shape 56 | return img 57 | 58 | def load_disp(filename): 59 | gt_disp = None 60 | gt_disp = Image.open(gt_disp_name) 61 | gt_disp = np.ascontiguousarray(gt_disp,dtype=np.float32)/256 62 | 63 | return gt_disp 64 | 65 | s = time.time() 66 | img_left = load_rgb(img_left_name) 67 | img_right = load_rgb(img_right_name) 68 | if self.load_disp: 69 | gt_disp = load_disp(gt_disp_name) 70 | 71 | #print("load data in %f s." % (time.time() - s)) 72 | 73 | s = time.time() 74 | if self.phase == 'detect' or self.phase == 'test': 75 | img_left = transform.resize(img_left, self.scale_size, preserve_range=True) 76 | img_right = transform.resize(img_right, self.scale_size, preserve_range=True) 77 | 78 | # change image pixel value type ot float32 79 | img_left = img_left.astype(np.float32) 80 | img_right = img_right.astype(np.float32) 81 | #scale = RandomRescale((1024, 1024)) 82 | #sample = scale(sample) 83 | 84 | if self.phase == 'detect' or self.phase == 'test': 85 | rgb_transform = default_transform() 86 | else: 87 | rgb_transform = inception_color_preproccess() 88 | 89 | if self.same_LR_aug: 90 | H,W = img_left.shape[:2] 91 | #Concate along width to have same bright augmentation 92 | im = rgb_transform(np.concatenate((img_left,img_right),axis=1)) 93 | img_left, img_right = im[:,:,:W], im[:,:,W:] 94 | else: 95 | img_left = rgb_transform(img_left) 96 | img_right = rgb_transform(img_right) 97 | 98 | if self.load_disp: 99 | gt_disp = gt_disp[np.newaxis, :] 100 | gt_disp = torch.from_numpy(gt_disp.copy()).float() 101 | 102 | if self.phase == 'train': 103 | 104 | h, w = img_left.shape[1:3] 105 | #th, tw = 384, 768 106 | th, tw = 256, 768 107 | top = random.randint(120, h - th) 108 | left = random.randint(0, w - tw) 109 | 110 | img_left = img_left[:, top: top + th, left: left + tw] 111 | img_right = img_right[:, top: top + th, left: left + tw] 112 | if self.load_disp: 113 | gt_disp = gt_disp[:, top: top + th, left: left + tw] 114 | 115 | 116 | 117 | sample = { 'img_left': img_left, 118 | 'img_right': img_right, 119 | 'img_names': img_names 120 | } 121 | 122 | if self.load_disp: 123 | sample['gt_disp'] = gt_disp 124 | 125 | #print("deal data in %f s." % (time.time() - s)) 126 | 127 | return sample 128 | 129 | -------------------------------------------------------------------------------- /dataloader/EXRloader.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import OpenEXR 3 | import Imath 4 | import imageio 5 | import glob 6 | import os 7 | 8 | def exr2hdr(exrpath): 9 | File = OpenEXR.InputFile(exrpath) 10 | PixType = Imath.PixelType(Imath.PixelType.FLOAT) 11 | DW = File.header()['dataWindow'] 12 | CNum = len(File.header()['channels'].keys()) 13 | if (CNum > 1): 14 | Channels = ['R', 'G', 'B'] 15 | CNum = 3 16 | else: 17 | Channels = ['G'] 18 | Size = (DW.max.x - DW.min.x + 1, DW.max.y - DW.min.y + 1) 19 | Pixels = [numpy.fromstring(File.channel(c, PixType), dtype=numpy.float32) for c in Channels] 20 | hdr = numpy.zeros((Size[1],Size[0],CNum),dtype=numpy.float32) 21 | if (CNum == 1): 22 | hdr[:,:,0] = numpy.reshape(Pixels[0],(Size[1],Size[0])) 23 | else: 24 | hdr[:,:,0] = numpy.reshape(Pixels[0],(Size[1],Size[0])) 25 | hdr[:,:,1] = numpy.reshape(Pixels[1],(Size[1],Size[0])) 26 | hdr[:,:,2] = numpy.reshape(Pixels[2],(Size[1],Size[0])) 27 | return hdr 28 | 29 | def writehdr(hdrpath,hdr): 30 | h, w, c = hdr.shape 31 | if c == 1: 32 | hdr = numpy.pad(hdr, ((0, 0), (0, 0), (0, 2)), 'constant') 33 | hdr[:,:,1] = hdr[:,:,0] 34 | hdr[:,:,2] = hdr[:,:,0] 35 | imageio.imwrite(hdrpath,hdr,format='hdr') 36 | 37 | def load_exr(filename): 38 | hdr = exr2hdr(filename) 39 | h, w, c = hdr.shape 40 | if c == 1: 41 | hdr = numpy.squeeze(hdr) 42 | return hdr 43 | 44 | 45 | def test_exr(): 46 | files = glob.glob('D:/MLProjects/data/home/*.exr') 47 | savepath = 'D:/MLProjects/data/home' 48 | total = len(files) 49 | count = 0 50 | print ('Files Num:', total) 51 | for file in files: 52 | hdr = exr2hdr(file) 53 | filename,file_ext = os.path.splitext(file) 54 | filename = os.path.basename(filename) 55 | filename = filename + '.hdr' 56 | curpath = os.path.join(savepath,filename) 57 | writehdr(curpath,hdr) 58 | count = count + 1 59 | print ('process:', count, '/', total) 60 | 61 | if __name__ == '__main__': 62 | test_exr() 63 | -------------------------------------------------------------------------------- /dataloader/GANet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/dataloader/GANet/__init__.py -------------------------------------------------------------------------------- /dataloader/GANet/data.py: -------------------------------------------------------------------------------- 1 | from .dataset import DatasetFromList 2 | 3 | def get_training_set(data_path, train_list, crop_size=[256,256], left_right=False, kitti=False, kitti2015=False, shift=0): 4 | return DatasetFromList(data_path, train_list, 5 | crop_size, True, left_right, kitti, kitti2015, shift) 6 | 7 | 8 | def get_test_set(data_path, test_list, crop_size=[256,256], left_right=False, kitti=False, kitti2015=False): 9 | return DatasetFromList(data_path, test_list, 10 | crop_size, False, left_right, kitti, kitti2015) 11 | -------------------------------------------------------------------------------- /dataloader/KITTILoader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data as data 4 | import torch 5 | import torchvision.transforms as transforms 6 | import random 7 | from PIL import Image, ImageOps 8 | import numpy as np 9 | from utils import preprocess 10 | from skimage import transform, io 11 | 12 | IMG_EXTENSIONS = [ 13 | '.jpg', '.JPG', '.jpeg', '.JPEG', 14 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 15 | ] 16 | 17 | def is_image_file(filename): 18 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 19 | 20 | def default_loader(path): 21 | return Image.open(path).convert('RGB') 22 | #return io.imread(path) 23 | 24 | def disparity_loader(path): 25 | return Image.open(path) 26 | #return Image.open(path) 27 | 28 | 29 | class myImageFloder(data.Dataset): 30 | def __init__(self, left, right, left_disparity, training, loader=default_loader, dploader= disparity_loader): 31 | 32 | self.left = left 33 | self.right = right 34 | self.disp_L = left_disparity 35 | self.loader = loader 36 | self.dploader = dploader 37 | self.training = training 38 | 39 | #self.scale_size = (384, 1280) 40 | self.scale_size = (1280, 384) 41 | 42 | def __getitem__(self, index): 43 | left = self.left[index] 44 | right = self.right[index] 45 | disp_L= self.disp_L[index] 46 | 47 | left_img = self.loader(left) 48 | right_img = self.loader(right) 49 | dataL = self.dploader(disp_L) 50 | 51 | #origin_width, origin_height = left_img.size 52 | #scale_width = self.scale_size[0] 53 | #print("ori_w: %d, sca_w: %d" % (origin_width, scale_width)) 54 | 55 | #left_img = left_img.resize(self.scale_size, Image.BILINEAR) 56 | #right_img = right_img.resize(self.scale_size, Image.BILINEAR) 57 | #dataL = dataL.resize(self.scale_size, Image.BILINEAR) 58 | #left_img = transform.resize(left_img, self.scale_size, preserve_range=True) 59 | #right_img = transform.resize(right_img, self.scale_size, preserve_range=True) 60 | #dataL = transform.resize(dataL, self.scale_size, preserve_range=True) * 1.0 * scale_width / origin_width 61 | 62 | if self.training: 63 | w, h = left_img.size 64 | th, tw = 256, 512 65 | #th, tw = 256, 896 66 | #th, tw = 384, 768 67 | 68 | x1 = random.randint(0, w - tw) 69 | y1 = random.randint(h // 4, h - th) 70 | #y1 = h - th 71 | #y1 = random.randint((h-th)/2, h - th) 72 | 73 | left_img = left_img.crop((x1, y1, x1 + tw, y1 + th)) 74 | right_img = right_img.crop((x1, y1, x1 + tw, y1 + th)) 75 | 76 | dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256 77 | #dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256 * 1.0 * scale_width / origin_width 78 | dataL = dataL[y1:y1 + th, x1:x1 + tw] 79 | 80 | processed = preprocess.get_transform(augment=False) 81 | #processed = preprocess.get_transform(augment=True) 82 | left_img = processed(left_img) 83 | right_img = processed(right_img) 84 | #print('[index:%d]left: %s, rect(%d,%d,%d,%d)'%(index, self.left[index], x1,y1,x1+tw,y1+th)) 85 | 86 | return left_img, right_img, dataL 87 | else: 88 | w, h = left_img.size 89 | 90 | left_img = left_img.crop((w-1280, h-384, w, h)) 91 | right_img = right_img.crop((w-1280, h-384, w, h)) 92 | w1, h1 = left_img.size 93 | 94 | dataL = dataL.crop((w-1280, h-384, w, h)) 95 | #dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256 * 1.0 * scale_width / origin_width 96 | dataL = np.ascontiguousarray(dataL,dtype=np.float32)/256 97 | 98 | processed = preprocess.get_transform(augment=False) 99 | left_img = processed(left_img) 100 | right_img = processed(right_img) 101 | 102 | return left_img, right_img, dataL 103 | 104 | def __len__(self): 105 | return len(self.left) 106 | -------------------------------------------------------------------------------- /dataloader/KITTI_submission_loader.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from PIL import Image 4 | import os 5 | import os.path 6 | import numpy as np 7 | 8 | IMG_EXTENSIONS = [ 9 | '.jpg', '.JPG', '.jpeg', '.JPEG', 10 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 11 | ] 12 | 13 | 14 | def is_image_file(filename): 15 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 16 | 17 | def dataloader(filepath): 18 | 19 | left_fold = 'image_2/' 20 | right_fold = 'image_3/' 21 | 22 | 23 | image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] 24 | 25 | 26 | left_test = [filepath+left_fold+img for img in image] 27 | right_test = [filepath+right_fold+img for img in image] 28 | 29 | return left_test, right_test 30 | -------------------------------------------------------------------------------- /dataloader/KITTI_submission_loader2012.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from PIL import Image 4 | import os 5 | import os.path 6 | import numpy as np 7 | 8 | IMG_EXTENSIONS = [ 9 | '.jpg', '.JPG', '.jpeg', '.JPEG', 10 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 11 | ] 12 | 13 | 14 | def is_image_file(filename): 15 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 16 | 17 | def dataloader(filepath): 18 | 19 | left_fold = 'colored_0/' 20 | right_fold = 'colored_1/' 21 | 22 | 23 | image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] 24 | 25 | 26 | left_test = [filepath+left_fold+img for img in image] 27 | right_test = [filepath+right_fold+img for img in image] 28 | 29 | return left_test, right_test 30 | -------------------------------------------------------------------------------- /dataloader/KITTIloader2012.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from PIL import Image 4 | import os 5 | import os.path 6 | import numpy as np 7 | 8 | IMG_EXTENSIONS = [ 9 | '.jpg', '.JPG', '.jpeg', '.JPEG', 10 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 11 | ] 12 | 13 | 14 | def is_image_file(filename): 15 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 16 | 17 | def dataloader(filepath): 18 | 19 | left_fold = 'colored_0/' 20 | right_fold = 'colored_1/' 21 | disp_noc = 'disp_occ/' 22 | 23 | image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] 24 | image.sort() 25 | 26 | train = image[:180] 27 | val = image[180:] 28 | 29 | #train = image[:] 30 | #val = image[160:] 31 | #val = image[:] 32 | 33 | left_train = [filepath+left_fold+img for img in train] 34 | right_train = [filepath+right_fold+img for img in train] 35 | disp_train = [filepath+disp_noc+img for img in train] 36 | 37 | 38 | left_val = [filepath+left_fold+img for img in val] 39 | right_val = [filepath+right_fold+img for img in val] 40 | disp_val = [filepath+disp_noc+img for img in val] 41 | 42 | return left_train, right_train, disp_train, left_val, right_val, disp_val 43 | -------------------------------------------------------------------------------- /dataloader/KITTIloader2015.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | 3 | from PIL import Image 4 | import os 5 | import os.path 6 | import numpy as np 7 | 8 | IMG_EXTENSIONS = [ 9 | '.jpg', '.JPG', '.jpeg', '.JPEG', 10 | '.png', '.PNG', '.ppm', '.PPM', '.bmp', '.BMP', 11 | ] 12 | 13 | 14 | def is_image_file(filename): 15 | return any(filename.endswith(extension) for extension in IMG_EXTENSIONS) 16 | 17 | def dataloader(filepath): 18 | 19 | left_fold = 'image_2/' 20 | right_fold = 'image_3/' 21 | disp_L = 'disp_occ_0/' 22 | disp_R = 'disp_occ_1/' 23 | 24 | image = [img for img in os.listdir(filepath+left_fold) if img.find('_10') > -1] 25 | image.sort() 26 | 27 | val_idx = [1, 6, 26, 38, 43, 49, 67, 81, 89, 109, 122, 129, 132, 141, 152, 159, 171, 179, 184, 187] 28 | train_idx = [i for i in range(200) if i not in val_idx] 29 | train = [image[i] for i in train_idx] 30 | val = [image[i] for i in val_idx] 31 | #train = image 32 | #val = image 33 | 34 | left_train = [filepath+left_fold+img for img in train] 35 | right_train = [filepath+right_fold+img for img in train] 36 | disp_train_L = [filepath+disp_L+img for img in train] 37 | #disp_train_R = [filepath+disp_R+img for img in train] 38 | 39 | left_val = [filepath+left_fold+img for img in val] 40 | right_val = [filepath+right_fold+img for img in val] 41 | disp_val_L = [filepath+disp_L+img for img in val] 42 | #disp_val_R = [filepath+disp_R+img for img in val] 43 | 44 | return left_train, right_train, disp_train_L, left_val, right_val, disp_val_L 45 | -------------------------------------------------------------------------------- /dataloader/SceneFlowLoader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | import pandas as pd 5 | from skimage import io, transform 6 | import numpy as np 7 | from torch.utils.data import Dataset, DataLoader 8 | from PIL import Image, ImageOps 9 | from utils.preprocess import * 10 | from torchvision import transforms 11 | import time 12 | from dataloader.EXRloader import load_exr 13 | 14 | class SceneFlowDataset(Dataset): 15 | 16 | def __init__(self, txt_file, root_dir, phase='train', load_disp=True, load_norm=False, to_angle=False, scale_size=(576, 960), same_LR_aug = False): 17 | """ 18 | Args: 19 | txt_file [string]: Path to the image list 20 | transform (callable, optional): Optional transform to be applied on a sample 21 | """ 22 | with open(txt_file, "r") as f: 23 | self.imgPairs = f.readlines() 24 | 25 | self.root_dir = root_dir 26 | self.phase = phase 27 | self.load_disp = load_disp 28 | self.load_norm = load_norm 29 | self.to_angle = to_angle 30 | self.scale_size = scale_size 31 | self.img_size = (540, 960) 32 | self.same_LR_aug = same_LR_aug 33 | 34 | def get_img_size(self): 35 | return self.img_size 36 | 37 | def get_scale_size(self): 38 | return self.scale_size 39 | 40 | def __len__(self): 41 | return len(self.imgPairs) 42 | 43 | def __getitem__(self, idx): 44 | 45 | img_names = self.imgPairs[idx].rstrip().split() 46 | 47 | img_left_name = os.path.join(self.root_dir, img_names[0]) 48 | img_right_name = os.path.join(self.root_dir, img_names[1]) 49 | if self.load_disp: 50 | gt_disp_name = os.path.join(self.root_dir, img_names[2]) 51 | if self.load_norm: 52 | gt_norm_name = os.path.join(self.root_dir, img_names[3]) 53 | 54 | def load_rgb(filename): 55 | 56 | img = None 57 | if filename.find('.npy') > 0: 58 | img = np.load(filename) 59 | else: 60 | img = io.imread(filename) 61 | if len(img.shape) == 2: 62 | img = img[:,:,np.newaxis] 63 | img = np.pad(img, ((0, 0), (0, 0), (0, 2)), 'constant') 64 | img[:,:,1] = img[:,:,0] 65 | img[:,:,2] = img[:,:,0] 66 | h, w, c = img.shape 67 | if c == 4: 68 | img = img[:,:,:3] 69 | return img 70 | 71 | def load_disp(filename): 72 | gt_disp = None 73 | if gt_disp_name.endswith('pfm'): 74 | gt_disp, scale = load_pfm(gt_disp_name) 75 | gt_disp = gt_disp[::-1, :] 76 | elif gt_disp_name.endswith('npy'): 77 | gt_disp = np.load(gt_disp_name) 78 | gt_disp = gt_disp[::-1, :] 79 | elif gt_disp_name.endswith('exr'): 80 | gt_disp = load_exr(filename) 81 | else: 82 | gt_disp = Image.open(gt_disp_name) 83 | gt_disp = np.ascontiguousarray(gt_disp,dtype=np.float32)/256 84 | 85 | return gt_disp 86 | 87 | def load_norm(filename): 88 | gt_norm = None 89 | if filename.endswith('exr'): 90 | gt_norm = load_exr(filename) 91 | 92 | # transform visualization normal to its true value 93 | gt_norm = gt_norm * 2.0 - 1.0 94 | 95 | ## fix opposite normal 96 | #m = gt_norm >= 0 97 | #m[:,:,0] = False 98 | #m[:,:,1] = False 99 | #gt_norm[m] = - gt_norm[m] 100 | 101 | return gt_norm 102 | 103 | s = time.time() 104 | img_left = load_rgb(img_left_name) 105 | img_right = load_rgb(img_right_name) 106 | if self.load_disp: 107 | gt_disp = load_disp(gt_disp_name) 108 | if self.load_norm: 109 | gt_norm = load_norm(gt_norm_name) 110 | #print("load data in %f s." % (time.time() - s)) 111 | 112 | s = time.time() 113 | if self.phase == 'detect' or self.phase == 'test': 114 | img_left = transform.resize(img_left, self.scale_size, preserve_range=True) 115 | img_right = transform.resize(img_right, self.scale_size, preserve_range=True) 116 | 117 | # change image pixel value type ot float32 118 | img_left = img_left.astype(np.float32) 119 | img_right = img_right.astype(np.float32) 120 | #scale = RandomRescale((1024, 1024)) 121 | #sample = scale(sample) 122 | 123 | if self.phase == 'detect' or self.phase == 'test': 124 | rgb_transform = default_transform() 125 | else: 126 | rgb_transform = inception_color_preproccess() 127 | if self.same_LR_aug: 128 | H,W = img_left.shape[:2] 129 | #Concate along width to have same bright augmentation 130 | im = rgb_transform(np.concatenate((img_left,img_right),axis=1)) 131 | img_left, img_right = im[:,:,:W], im[:,:,W:] 132 | else: 133 | img_left = rgb_transform(img_left) 134 | img_right = rgb_transform(img_right) 135 | 136 | if self.load_disp: 137 | gt_disp = gt_disp[np.newaxis, :] 138 | gt_disp = torch.from_numpy(gt_disp.copy()).float() 139 | 140 | if self.load_norm: 141 | gt_norm = gt_norm.transpose([2, 0, 1]) 142 | gt_norm = torch.from_numpy(gt_norm.copy()).float() 143 | 144 | if self.phase == 'train': 145 | 146 | h, w = img_left.shape[1:3] 147 | th, tw = 384, 768 148 | top = random.randint(0, h - th) 149 | left = random.randint(0, w - tw) 150 | 151 | img_left = img_left[:, top: top + th, left: left + tw] 152 | img_right = img_right[:, top: top + th, left: left + tw] 153 | if self.load_disp: 154 | gt_disp = gt_disp[:, top: top + th, left: left + tw] 155 | if self.load_norm: 156 | gt_norm = gt_norm[:, top: top + th, left: left + tw] 157 | 158 | if self.to_angle: 159 | norm_size = gt_norm.size() 160 | gt_angle = torch.empty(2, norm_size[1], norm_size[2], dtype=torch.float) 161 | gt_angle[0, :, :] = torch.atan(gt_norm[0, :, :] / gt_norm[2, :, :]) 162 | gt_angle[1, :, :] = torch.atan(gt_norm[1, :, :] / gt_norm[2, :, :]) 163 | 164 | 165 | sample = { 'img_left': img_left, 166 | 'img_right': img_right, 167 | 'img_names': img_names 168 | } 169 | 170 | if self.load_disp: 171 | sample['gt_disp'] = gt_disp 172 | if self.load_norm: 173 | if self.to_angle: 174 | sample['gt_angle'] = gt_angle 175 | else: 176 | sample['gt_norm'] = gt_norm 177 | 178 | #print("deal data in %f s." % (time.time() - s)) 179 | 180 | return sample 181 | 182 | -------------------------------------------------------------------------------- /dataloader/StereoLoader.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | import torch 4 | import pandas as pd 5 | from skimage import io, transform 6 | import numpy as np 7 | from torch.utils.data import Dataset, DataLoader 8 | from PIL import Image, ImageOps 9 | from utils.preprocess import * 10 | from torchvision import transforms 11 | 12 | class StereoDataset(Dataset): 13 | 14 | def __init__(self, txt_file, root_dir, phase='train', load_disp=True, load_norm=True, scale_size=(576, 960)): 15 | """ 16 | Args: 17 | txt_file [string]: Path to the image list 18 | transform (callable, optional): Optional transform to be applied on a sample 19 | """ 20 | with open(txt_file, "r") as f: 21 | self.imgPairs = f.readlines() 22 | 23 | self.root_dir = root_dir 24 | self.phase = phase 25 | self.load_disp = load_disp 26 | self.load_norm = load_norm 27 | self.scale_size = scale_size 28 | 29 | 30 | def __len__(self): 31 | return len(self.imgPairs) 32 | 33 | def __getitem__(self, idx): 34 | 35 | img_names = self.imgPairs[idx].rstrip().split() 36 | 37 | img_left_name = os.path.join(self.root_dir, img_names[0]) 38 | img_right_name = os.path.join(self.root_dir, img_names[1]) 39 | 40 | def load_rgb(filename): 41 | 42 | img = None 43 | if filename.find('.npy') > 0: 44 | img = np.load(filename) 45 | else: 46 | img = io.imread(filename) 47 | if len(img.shape) == 2: 48 | img = img[:,:,np.newaxis] 49 | img = np.pad(img, ((0, 0), (0, 0), (0, 2)), 'constant') 50 | img[:,:,1] = img[:,:,0] 51 | img[:,:,2] = img[:,:,0] 52 | h, w, c = img.shape 53 | if c == 4: 54 | img = img[:,:,:3] 55 | return img 56 | 57 | img_left = load_rgb(img_left_name) 58 | img_right = load_rgb(img_right_name) 59 | 60 | if self.phase == 'detect' or self.phase == 'test': 61 | img_left = transform.resize(img_left, self.scale_size, preserve_range=True) 62 | img_right = transform.resize(img_right, self.scale_size, preserve_range=True) 63 | 64 | # change image pixel value type ot float32 65 | img_left = img_left.astype(np.float32) 66 | img_right = img_right.astype(np.float32) 67 | #scale = RandomRescale((1024, 1024)) 68 | #sample = scale(sample) 69 | 70 | if self.phase == 'detect' or self.phase == 'test': 71 | rgb_transform = default_transform() 72 | else: 73 | rgb_transform = inception_color_preproccess() 74 | 75 | img_left = rgb_transform(img_left) 76 | img_right = rgb_transform(img_right) 77 | 78 | if self.phase == 'train': 79 | 80 | h, w = img_left.shape[1:3] 81 | th, tw = 384, 768 82 | top = random.randint(0, h - th) 83 | left = random.randint(0, w - tw) 84 | 85 | img_left = img_left[:, top: top + th, left: left + tw] 86 | img_right = img_right[:, top: top + th, left: left + tw] 87 | 88 | 89 | sample = {'img_left': img_left, 90 | 'img_right': img_right, 91 | 'img_names': img_names 92 | } 93 | 94 | return sample 95 | 96 | -------------------------------------------------------------------------------- /dataloader/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/dataloader/__init__.py -------------------------------------------------------------------------------- /exp_configs/esnet_DrivingStereo.conf: -------------------------------------------------------------------------------- 1 | net=esnet 2 | loss=loss_configs/DrivingStereo.json 3 | outf_model=outmodels/ 4 | logf=logs/esnet.log 5 | lr=1e-4 6 | devices=0,1,2,3 7 | dataset=DrivingStereo 8 | datapath=mynfs/DrivingStereo/ 9 | trainlist=lists/DrivingStereo_train.list 10 | vallist=lists/DrivingStereo_val.list 11 | startR=0 12 | startE=0 13 | batchSize=16 14 | maxdisp=-1 15 | model=none 16 | -------------------------------------------------------------------------------- /exp_configs/esnet_sceneflow.conf: -------------------------------------------------------------------------------- 1 | net=esnet 2 | loss=loss_configs/sceneflow.json 3 | outf_model=outmodels/ 4 | logf=logs/esnet.log 5 | lr=1e-4 6 | devices=0,1,2,3 7 | dataset=sceneflow 8 | datapath=mynfs/scene_flow/ 9 | trainlist=lists/SceneFlow.list 10 | vallist=lists/FlyingThings3D_release_TEST.list 11 | startR=0 12 | startE=0 13 | batchSize=16 14 | maxdisp=-1 15 | model=none 16 | 17 | -------------------------------------------------------------------------------- /finetune.sh: -------------------------------------------------------------------------------- 1 | # kitti 2015 2 | #savemodel: which directory to save model 3 | #loadmodel: where to load model to be fine tunned. 4 | python3 kitti_finetune.py --maxdisp 192 \ 5 | --model esnet \ 6 | --devices 0,1 \ 7 | --datatype 2015 \ 8 | --datapath /mnt/wekanfs/scratch/zhengyu.huang/KITTI_2015_Stereo/training/ \ 9 | --loss loss_configs/kitti.json \ 10 | --savemodel kitti_models/ \ 11 | --loadmodel none 12 | 13 | 14 | -------------------------------------------------------------------------------- /kitti_submission.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import argparse 3 | import os, sys 4 | import random 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.parallel 8 | import torch.backends.cudnn as cudnn 9 | import torch.optim as optim 10 | import torch.utils.data 11 | from torch.autograd import Variable 12 | import torch.nn.functional as F 13 | import skimage 14 | import skimage.io 15 | import skimage.transform 16 | import numpy as np 17 | import time 18 | import math 19 | from utils.preprocess import scale_disp, default_transform 20 | 21 | from networks.FADNet import FADNet 22 | from networks.stackhourglass import PSMNet 23 | from networks.DispNetC_corr_warp_diff import DispNetC 24 | # 2012 data /media/jiaren/ImageNet/data_scene_flow_2012/testing/ 25 | 26 | parser = argparse.ArgumentParser(description='FADNet') 27 | parser.add_argument('--KITTI', default='2015', 28 | help='KITTI version') 29 | parser.add_argument('--datapath', default='KITTI_2015_Stereo/testing/', 30 | help='select model') 31 | parser.add_argument('--loadmodel', default='models/kitti_finetune/pretrained_on_sceneflow_run1/best.tar', 32 | help='loading model') 33 | parser.add_argument('--savepath', default='results/', 34 | help='path to save the results.') 35 | parser.add_argument('--model', default='fadnet', 36 | help='select model') 37 | parser.add_argument('--maxdisp', type=int, default=192, 38 | help='maxium disparity') 39 | parser.add_argument('--no-cuda', action='store_true', default=False, 40 | help='enables CUDA training') 41 | parser.add_argument('--devices', type=str, help='indicates CUDA devices, e.g. 0,1,2', default='0') 42 | parser.add_argument('--seed', type=int, default=1, metavar='S', 43 | help='random seed (default: 1)') 44 | args = parser.parse_args() 45 | args.cuda = not args.no_cuda and torch.cuda.is_available() 46 | 47 | if not os.path.exists(args.savepath): 48 | os.makedirs(args.savepath) 49 | 50 | torch.manual_seed(args.seed) 51 | if args.cuda: 52 | torch.cuda.manual_seed(args.seed) 53 | 54 | if args.KITTI == '2015': 55 | from dataloader import KITTI_submission_loader as DA 56 | else: 57 | from dataloader import KITTI_submission_loader2012 as DA 58 | 59 | 60 | test_left_img, test_right_img = DA.dataloader(args.datapath) 61 | 62 | devices = [int(item) for item in args.devices.split(',')] 63 | ngpus = len(devices) 64 | 65 | if args.model == 'esnet': 66 | model = ESNet(batchNorm=False, lastRelu=True, maxdisp=-1) 67 | elif args.model == 'esnet_m': 68 | model = ESNet_M(batchNorm=False, lastRelu=True, maxdisp=-1) 69 | elif args.model == 'psmnet': 70 | model = PSMNet(args.maxdisp) 71 | elif args.model == 'fadnet': 72 | model = FADNet(False, True) 73 | elif args.model == 'dispnetc': 74 | model = DispNetC(batchNorm=False, lastRelu=True, maxdisp=-1) 75 | else: 76 | print('no model') 77 | sys.exit(-1) 78 | 79 | model = nn.DataParallel(model, device_ids=devices) 80 | model.cuda() 81 | 82 | if args.loadmodel is not None: 83 | state_dict = torch.load(args.loadmodel) 84 | model.load_state_dict(state_dict['state_dict']) 85 | 86 | print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) 87 | 88 | def test(imgL,imgR): 89 | model.eval() 90 | 91 | if args.cuda: 92 | imgL = torch.FloatTensor(imgL).cuda() 93 | imgR = torch.FloatTensor(imgR).cuda() 94 | 95 | imgL, imgR= Variable(imgL), Variable(imgR) 96 | 97 | #print(imgL.size(), imgR.size()) 98 | with torch.no_grad(): 99 | if args.model == "fadnet": 100 | output_net1, output_net2 = model(torch.cat((imgL, imgR), 1)) 101 | output = torch.squeeze(output_net2) 102 | elif args.model == "psmnet": 103 | output = model(torch.cat((imgL, imgR), 1)) 104 | output = torch.squeeze(output) 105 | elif args.model in ['esnet', 'esnet_m', 'dispnetc', 'dispnets']: 106 | output = model(torch.cat((imgL, imgR), 1)) 107 | output = torch.squeeze(output[0]) 108 | 109 | pred_disp = output.data.cpu().numpy() 110 | 111 | print(pred_disp.shape) 112 | #print('larger than 192: %s' % pred_disp[pred_disp>0.75].shape) 113 | print('min: %f, max: %f, mean: %f' % (np.min(pred_disp), np.max(pred_disp), np.mean(pred_disp))) 114 | 115 | return pred_disp 116 | 117 | def main(): 118 | 119 | for inx in range(len(test_left_img)): 120 | print('image: %s'%test_left_img[inx]) 121 | 122 | imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32')) 123 | imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32')) 124 | 125 | rgb_transform = default_transform() 126 | imgL = rgb_transform(imgL_o).numpy() 127 | imgR = rgb_transform(imgR_o).numpy() 128 | 129 | # resize 130 | imgsize = imgL_o.shape[:2] 131 | 132 | # scale to resize 133 | ##target_size = (512, 1792) 134 | #target_size = (384, 1344) 135 | #scale_h = imgsize[0]*1.0/target_size[0] 136 | #scale_w = imgsize[1]*1.0/target_size[1] 137 | 138 | ##imgL_o = skimage.transform.resize(imgL_o, target_size, preserve_range=True) 139 | ##imgR_o = skimage.transform.resize(imgR_o, target_size, preserve_range=True) 140 | 141 | #imgL = processed(imgL_o).numpy() 142 | #imgR = processed(imgR_o).numpy() 143 | 144 | imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) 145 | imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) 146 | 147 | # pad to resize (384, 1280) 148 | top_pad = 384-imgL.shape[2] 149 | left_pad = 1280-imgL.shape[3] 150 | imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) 151 | imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) 152 | 153 | start_time = time.time() 154 | pred_disp = test(imgL,imgR) 155 | print('time = %.2f' %(time.time() - start_time)) 156 | 157 | top_pad = 384-imgL_o.shape[0] 158 | left_pad = 1280-imgL_o.shape[1] 159 | img = pred_disp[top_pad:,:-left_pad] 160 | 161 | # scale back 162 | #img = pred_disp 163 | #img = scale_disp(img, (imgsize[0], imgsize[1])) 164 | #round_img = skimage.transform.resize(round_img, imgsize, preserve_range=True) 165 | #print('out shape: ', img.shape) 166 | 167 | round_img = np.round(img*256) 168 | 169 | skimage.io.imsave(os.path.join(args.savepath, test_left_img[inx].split('/')[-1]),round_img.astype('uint16')) 170 | 171 | if __name__ == '__main__': 172 | main() 173 | 174 | 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /layers_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/__init__.py -------------------------------------------------------------------------------- /layers_package/channelnorm_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/channelnorm_package/__init__.py -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm.py: -------------------------------------------------------------------------------- 1 | from torch.autograd import Function, Variable 2 | from torch.nn.modules.module import Module 3 | import channelnorm_cuda 4 | 5 | class ChannelNormFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input1, norm_deg=2): 9 | assert input1.is_contiguous() 10 | b, _, h, w = input1.size() 11 | output = input1.new(b, 1, h, w).zero_() 12 | 13 | channelnorm_cuda.forward(input1, output, norm_deg) 14 | ctx.save_for_backward(input1, output) 15 | ctx.norm_deg = norm_deg 16 | 17 | return output 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | input1, output = ctx.saved_tensors 22 | 23 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 24 | 25 | channelnorm_cuda.backward(input1, output, grad_output.data, 26 | grad_input1.data, ctx.norm_deg) 27 | 28 | return grad_input1, None 29 | 30 | 31 | class ChannelNorm(Module): 32 | 33 | def __init__(self, norm_deg=2): 34 | super(ChannelNorm, self).__init__() 35 | self.norm_deg = norm_deg 36 | 37 | def forward(self, input1): 38 | return ChannelNormFunction.apply(input1, self.norm_deg) 39 | 40 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "channelnorm_kernel.cuh" 5 | 6 | int channelnorm_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& output, 9 | int norm_deg) { 10 | 11 | channelnorm_kernel_forward(input1, output, norm_deg); 12 | return 1; 13 | } 14 | 15 | 16 | int channelnorm_cuda_backward( 17 | at::Tensor& input1, 18 | at::Tensor& output, 19 | at::Tensor& gradOutput, 20 | at::Tensor& gradInput1, 21 | int norm_deg) { 22 | 23 | channelnorm_kernel_backward(input1, output, gradOutput, gradInput1, norm_deg); 24 | return 1; 25 | } 26 | 27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 28 | m.def("forward", &channelnorm_cuda_forward, "Channel norm forward (CUDA)"); 29 | m.def("backward", &channelnorm_cuda_backward, "Channel norm backward (CUDA)"); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_cuda.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: channelnorm-cuda 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_cuda.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | channelnorm_cuda.cc 2 | channelnorm_kernel.cu 3 | setup.py 4 | channelnorm_cuda.egg-info/PKG-INFO 5 | channelnorm_cuda.egg-info/SOURCES.txt 6 | channelnorm_cuda.egg-info/dependency_links.txt 7 | channelnorm_cuda.egg-info/top_level.txt -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_cuda.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_cuda.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | channelnorm_cuda 2 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_kernel.cu: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "channelnorm_kernel.cuh" 6 | 7 | #define CUDA_NUM_THREADS 512 8 | 9 | #define DIM0(TENSOR) ((TENSOR).x) 10 | #define DIM1(TENSOR) ((TENSOR).y) 11 | #define DIM2(TENSOR) ((TENSOR).z) 12 | #define DIM3(TENSOR) ((TENSOR).w) 13 | 14 | #define DIM3_INDEX(TENSOR, xx, yy, zz, ww) ((TENSOR)[((xx) * (TENSOR##_stride.x)) + ((yy) * (TENSOR##_stride.y)) + ((zz) * (TENSOR##_stride.z)) + ((ww) * (TENSOR##_stride.w))]) 15 | 16 | using at::Half; 17 | 18 | template 19 | __global__ void kernel_channelnorm_update_output( 20 | const int n, 21 | const scalar_t* __restrict__ input1, 22 | const long4 input1_size, 23 | const long4 input1_stride, 24 | scalar_t* __restrict__ output, 25 | const long4 output_size, 26 | const long4 output_stride, 27 | int norm_deg) { 28 | 29 | int index = blockIdx.x * blockDim.x + threadIdx.x; 30 | 31 | if (index >= n) { 32 | return; 33 | } 34 | 35 | int dim_b = DIM0(output_size); 36 | int dim_c = DIM1(output_size); 37 | int dim_h = DIM2(output_size); 38 | int dim_w = DIM3(output_size); 39 | int dim_chw = dim_c * dim_h * dim_w; 40 | 41 | int b = ( index / dim_chw ) % dim_b; 42 | int y = ( index / dim_w ) % dim_h; 43 | int x = ( index ) % dim_w; 44 | 45 | int i1dim_c = DIM1(input1_size); 46 | int i1dim_h = DIM2(input1_size); 47 | int i1dim_w = DIM3(input1_size); 48 | int i1dim_chw = i1dim_c * i1dim_h * i1dim_w; 49 | int i1dim_hw = i1dim_h * i1dim_w; 50 | 51 | float result = 0.0; 52 | 53 | for (int c = 0; c < i1dim_c; ++c) { 54 | int i1Index = b * i1dim_chw + c * i1dim_hw + y * i1dim_w + x; 55 | scalar_t val = input1[i1Index]; 56 | result += static_cast(val * val); 57 | } 58 | result = sqrt(result); 59 | output[index] = static_cast(result); 60 | } 61 | 62 | 63 | template 64 | __global__ void kernel_channelnorm_backward_input1( 65 | const int n, 66 | const scalar_t* __restrict__ input1, const long4 input1_size, const long4 input1_stride, 67 | const scalar_t* __restrict__ output, const long4 output_size, const long4 output_stride, 68 | const scalar_t* __restrict__ gradOutput, const long4 gradOutput_size, const long4 gradOutput_stride, 69 | scalar_t* __restrict__ gradInput, const long4 gradInput_size, const long4 gradInput_stride, 70 | int norm_deg) { 71 | 72 | int index = blockIdx.x * blockDim.x + threadIdx.x; 73 | 74 | if (index >= n) { 75 | return; 76 | } 77 | 78 | float val = 0.0; 79 | 80 | int dim_b = DIM0(gradInput_size); 81 | int dim_c = DIM1(gradInput_size); 82 | int dim_h = DIM2(gradInput_size); 83 | int dim_w = DIM3(gradInput_size); 84 | int dim_chw = dim_c * dim_h * dim_w; 85 | int dim_hw = dim_h * dim_w; 86 | 87 | int b = ( index / dim_chw ) % dim_b; 88 | int y = ( index / dim_w ) % dim_h; 89 | int x = ( index ) % dim_w; 90 | 91 | 92 | int outIndex = b * dim_hw + y * dim_w + x; 93 | val = static_cast(gradOutput[outIndex]) * static_cast(input1[index]) / (static_cast(output[outIndex])+1e-9); 94 | gradInput[index] = static_cast(val); 95 | 96 | } 97 | 98 | void channelnorm_kernel_forward( 99 | at::Tensor& input1, 100 | at::Tensor& output, 101 | int norm_deg) { 102 | 103 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 104 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 105 | 106 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 107 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 108 | 109 | int n = output.numel(); 110 | 111 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_forward", ([&] { 112 | 113 | kernel_channelnorm_update_output<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 114 | //at::globalContext().getCurrentCUDAStream() >>>( 115 | n, 116 | input1.data(), 117 | input1_size, 118 | input1_stride, 119 | output.data(), 120 | output_size, 121 | output_stride, 122 | norm_deg); 123 | 124 | })); 125 | 126 | // TODO: ATen-equivalent check 127 | 128 | // THCudaCheck(cudaGetLastError()); 129 | } 130 | 131 | void channelnorm_kernel_backward( 132 | at::Tensor& input1, 133 | at::Tensor& output, 134 | at::Tensor& gradOutput, 135 | at::Tensor& gradInput1, 136 | int norm_deg) { 137 | 138 | const long4 input1_size = make_long4(input1.size(0), input1.size(1), input1.size(2), input1.size(3)); 139 | const long4 input1_stride = make_long4(input1.stride(0), input1.stride(1), input1.stride(2), input1.stride(3)); 140 | 141 | const long4 output_size = make_long4(output.size(0), output.size(1), output.size(2), output.size(3)); 142 | const long4 output_stride = make_long4(output.stride(0), output.stride(1), output.stride(2), output.stride(3)); 143 | 144 | const long4 gradOutput_size = make_long4(gradOutput.size(0), gradOutput.size(1), gradOutput.size(2), gradOutput.size(3)); 145 | const long4 gradOutput_stride = make_long4(gradOutput.stride(0), gradOutput.stride(1), gradOutput.stride(2), gradOutput.stride(3)); 146 | 147 | const long4 gradInput1_size = make_long4(gradInput1.size(0), gradInput1.size(1), gradInput1.size(2), gradInput1.size(3)); 148 | const long4 gradInput1_stride = make_long4(gradInput1.stride(0), gradInput1.stride(1), gradInput1.stride(2), gradInput1.stride(3)); 149 | 150 | int n = gradInput1.numel(); 151 | 152 | AT_DISPATCH_FLOATING_TYPES_AND_HALF(input1.type(), "channelnorm_backward_input1", ([&] { 153 | 154 | kernel_channelnorm_backward_input1<<< (n + CUDA_NUM_THREADS - 1)/CUDA_NUM_THREADS, CUDA_NUM_THREADS, 0, at::cuda::getCurrentCUDAStream() >>>( 155 | //at::globalContext().getCurrentCUDAStream() >>>( 156 | n, 157 | input1.data(), 158 | input1_size, 159 | input1_stride, 160 | output.data(), 161 | output_size, 162 | output_stride, 163 | gradOutput.data(), 164 | gradOutput_size, 165 | gradOutput_stride, 166 | gradInput1.data(), 167 | gradInput1_size, 168 | gradInput1_stride, 169 | norm_deg 170 | ); 171 | 172 | })); 173 | 174 | // TODO: Add ATen-equivalent check 175 | 176 | // THCudaCheck(cudaGetLastError()); 177 | } 178 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/channelnorm_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void channelnorm_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& output, 8 | int norm_deg); 9 | 10 | 11 | void channelnorm_kernel_backward( 12 | at::Tensor& input1, 13 | at::Tensor& output, 14 | at::Tensor& gradOutput, 15 | at::Tensor& gradInput1, 16 | int norm_deg); 17 | -------------------------------------------------------------------------------- /layers_package/channelnorm_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_52,code=sm_52', 12 | '-gencode', 'arch=compute_60,code=sm_60', 13 | '-gencode', 'arch=compute_61,code=sm_61', 14 | '-gencode', 'arch=compute_70,code=sm_70', 15 | '-gencode', 'arch=compute_70,code=compute_70', 16 | '-gencode', 'arch=compute_75,code=sm_75', 17 | '-gencode', 'arch=compute_75,code=compute_75' 18 | ] 19 | 20 | setup( 21 | name='channelnorm_cuda', 22 | ext_modules=[ 23 | CUDAExtension('channelnorm_cuda', [ 24 | 'channelnorm_cuda.cc', 25 | 'channelnorm_kernel.cu' 26 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 27 | ], 28 | cmdclass={ 29 | 'build_ext': BuildExtension 30 | }) 31 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/correlation-pytorch-master/correlation-pytorch/__init__.py -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/build.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.ffi import create_extension 4 | 5 | 6 | sources = ['correlation_package/src/corr.c'] 7 | headers = ['correlation_package/src/corr.h'] 8 | 9 | sources += ['correlation_package/src/corr1d.c'] 10 | headers += ['correlation_package/src/corr1d.h'] 11 | 12 | defines = [] 13 | with_cuda = False 14 | 15 | if torch.cuda.is_available(): 16 | print('Including CUDA code.') 17 | sources += ['correlation_package/src/corr_cuda.c'] 18 | headers += ['correlation_package/src/corr_cuda.h'] 19 | 20 | sources += ['correlation_package/src/corr1d_cuda.c'] 21 | headers += ['correlation_package/src/corr1d_cuda.h'] 22 | 23 | defines += [('WITH_CUDA', None)] 24 | with_cuda = True 25 | 26 | this_file = os.path.dirname(os.path.realpath(__file__)) 27 | extra_objects = ['correlation_package/src/corr_cuda_kernel.cu.o'] 28 | extra_objects += ['correlation_package/src/corr1d_cuda_kernel.cu.o'] 29 | extra_objects = [os.path.join(this_file, fname) for fname in extra_objects] 30 | 31 | ffi = create_extension( 32 | 'correlation_package._ext.corr', 33 | package=True, 34 | headers=headers, 35 | sources=sources, 36 | define_macros=defines, 37 | relative_to=__file__, 38 | with_cuda=with_cuda, 39 | extra_objects=extra_objects, 40 | ) 41 | 42 | if __name__ == '__main__': 43 | ffi.build() 44 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/__init__.py -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/__init__.py -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/_ext/corr/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._corr import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | locals[symbol] = _wrap_function(fn, _ffi) 10 | __all__.append(symbol) 11 | 12 | _import_symbols(locals()) 13 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/__init__.py -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/functions/corr.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.autograd import Function 3 | from .._ext import corr 4 | 5 | class correlation(Function): 6 | 7 | def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=1, corr_multiply=1): 8 | super(correlation, self).__init__() 9 | self.pad_size = pad_size 10 | self.kernel_size = kernel_size 11 | self.max_displacement = max_displacement 12 | self.stride1 = stride1 13 | self.stride2 = stride2 14 | self.corr_multiply = corr_multiply 15 | 16 | def forward(self, input1, input2): 17 | 18 | self.save_for_backward(input1, input2) 19 | 20 | rbot1 = input1.new() 21 | rbot2 = input2.new() 22 | output = input1.new() 23 | 24 | corr.corr_cuda_forward(input1, input2, 25 | rbot1, rbot2, 26 | output, 27 | self.pad_size, 28 | self.kernel_size, 29 | self.max_displacement, 30 | self.stride1, 31 | self.stride2, 32 | self.corr_multiply) 33 | 34 | return output 35 | 36 | def backward(self, grad_output): 37 | 38 | input1, input2 = self.saved_tensors 39 | 40 | rbot1 = input1.new() 41 | rbot2 = input2.new() 42 | 43 | grad_input1 = torch.zeros(input1.size()).cuda() 44 | grad_input2 = torch.zeros(input2.size()).cuda() 45 | 46 | corr.corr_cuda_backward(input1, input2, 47 | rbot1, rbot2, 48 | grad_output, 49 | grad_input1, 50 | grad_input2, 51 | self.pad_size, 52 | self.kernel_size, 53 | self.max_displacement, 54 | self.stride1, 55 | self.stride2, 56 | self.corr_multiply) 57 | 58 | return grad_input1, grad_input2 59 | 60 | 61 | #----- 1D correlation (for disparity) Jinwei Gu ----- 62 | 63 | class correlation1d(Function): 64 | 65 | def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=1, corr_multiply=1): 66 | super(correlation1d, self).__init__() 67 | self.pad_size = pad_size 68 | self.kernel_size = kernel_size 69 | self.max_displacement = max_displacement 70 | self.stride1 = stride1 71 | self.stride2 = stride2 72 | self.corr_multiply = corr_multiply 73 | 74 | def forward(self, input1, input2): 75 | 76 | self.save_for_backward(input1, input2) 77 | 78 | rbot1 = input1.new() 79 | rbot2 = input2.new() 80 | output = input1.new() 81 | 82 | corr.corr1d_cuda_forward(input1, input2, 83 | rbot1, rbot2, 84 | output, 85 | self.pad_size, 86 | self.kernel_size, 87 | self.max_displacement, 88 | self.stride1, 89 | self.stride2, 90 | self.corr_multiply) 91 | 92 | return output 93 | 94 | def backward(self, grad_output): 95 | 96 | input1, input2 = self.saved_tensors 97 | 98 | rbot1 = input1.new() 99 | rbot2 = input2.new() 100 | 101 | grad_input1 = torch.zeros(input1.size()).cuda() 102 | grad_input2 = torch.zeros(input2.size()).cuda() 103 | 104 | #grad_input1 = grad_output.new() 105 | #grad_input2 = grad_output.new() 106 | 107 | corr.corr1d_cuda_backward(input1, input2, 108 | rbot1, rbot2, 109 | grad_output, 110 | grad_input1, 111 | grad_input2, 112 | self.pad_size, 113 | self.kernel_size, 114 | self.max_displacement, 115 | self.stride1, 116 | self.stride2, 117 | self.corr_multiply) 118 | 119 | return grad_input1, grad_input2 120 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/__init__.py -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/modules/corr.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from ..functions.corr import correlation, correlation1d 3 | 4 | class Correlation(Module): 5 | 6 | def __init__(self, pad_size=None, kernel_size=None, max_displacement=None, 7 | stride1=None, stride2=None, corr_multiply=None): 8 | super(Correlation, self).__init__() 9 | self.pad_size = pad_size 10 | self.kernel_size = kernel_size 11 | self.max_displacement = max_displacement 12 | self.stride1 = stride1 13 | self.stride2 = stride2 14 | self.corr_multiply = corr_multiply 15 | 16 | def reset_params(self): 17 | return 18 | 19 | def forward(self, input1, input2): 20 | return correlation(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2) 21 | 22 | def __repr__(self): 23 | return self.__class__.__name__ 24 | 25 | 26 | #----- correlation in 1D (for disparity) Jinwei Gu ----- 27 | 28 | class Correlation1d(Module): 29 | 30 | def __init__(self, pad_size=None, kernel_size=None, max_displacement=None, 31 | stride1=None, stride2=None, corr_multiply=None): 32 | super(Correlation1d, self).__init__() 33 | self.pad_size = pad_size 34 | self.kernel_size = kernel_size 35 | self.max_displacement = max_displacement 36 | self.stride1 = stride1 37 | self.stride2 = stride2 38 | self.corr_multiply = corr_multiply 39 | 40 | def reset_params(self): 41 | return 42 | 43 | def forward(self, input1, input2): 44 | return correlation1d(self.pad_size, self.kernel_size, self.max_displacement, self.stride1, self.stride2, self.corr_multiply)(input1, input2) 45 | 46 | def __repr__(self): 47 | return self.__class__.__name__ 48 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int corr_cpu_forward(THFloatTensor *input1, 4 | THFloatTensor *input2, 5 | THFloatTensor *rbot1, 6 | THFloatTensor *rbot2, 7 | THFloatTensor *output, 8 | int pad_size, 9 | int kernel_size, 10 | int max_displacement, 11 | int stride1, 12 | int stride2, 13 | int corr_type_multiply) 14 | { 15 | return 1; 16 | } 17 | 18 | int corr_cpu_backward(THFloatTensor *input1, 19 | THFloatTensor *input2, 20 | THFloatTensor *rbot1, 21 | THFloatTensor *rbot2, 22 | THFloatTensor *gradOutput, 23 | THFloatTensor *gradInput1, 24 | THFloatTensor *gradInput2, 25 | int pad_size, 26 | int kernel_size, 27 | int max_displacement, 28 | int stride1, 29 | int stride2, 30 | int corr_type_multiply) 31 | { 32 | return 1; 33 | } 34 | 35 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr.h: -------------------------------------------------------------------------------- 1 | int corr_cpu_forward(THFloatTensor *input1, 2 | THFloatTensor *input2, 3 | THFloatTensor *rbot1, 4 | THFloatTensor *rbot2, 5 | THFloatTensor *output, 6 | int pad_size, 7 | int kernel_size, 8 | int max_displacement, 9 | int stride1, 10 | int stride2, 11 | int corr_type_multiply); 12 | 13 | int corr_cpu_backward(THFloatTensor *input1, 14 | THFloatTensor *input2, 15 | THFloatTensor *rbot1, 16 | THFloatTensor *rbot2, 17 | THFloatTensor *gradOutput, 18 | THFloatTensor *gradInput1, 19 | THFloatTensor *gradInput2, 20 | int pad_size, 21 | int kernel_size, 22 | int max_displacement, 23 | int stride1, 24 | int stride2, 25 | int corr_type_multiply); 26 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | int corr1d_cpu_forward(THFloatTensor *input1, 4 | THFloatTensor *input2, 5 | THFloatTensor *rbot1, 6 | THFloatTensor *rbot2, 7 | THFloatTensor *output, 8 | int pad_size, 9 | int kernel_size, 10 | int max_displacement, 11 | int stride1, 12 | int stride2, 13 | int corr_type_multiply) 14 | { 15 | return 1; 16 | } 17 | 18 | int corr1d_cpu_backward(THFloatTensor *input1, 19 | THFloatTensor *input2, 20 | THFloatTensor *rbot1, 21 | THFloatTensor *rbot2, 22 | THFloatTensor *gradOutput, 23 | THFloatTensor *gradInput1, 24 | THFloatTensor *gradInput2, 25 | int pad_size, 26 | int kernel_size, 27 | int max_displacement, 28 | int stride1, 29 | int stride2, 30 | int corr_type_multiply) 31 | { 32 | return 1; 33 | } 34 | 35 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d.h: -------------------------------------------------------------------------------- 1 | int corr1d_cpu_forward(THFloatTensor *input1, 2 | THFloatTensor *input2, 3 | THFloatTensor *rbot1, 4 | THFloatTensor *rbot2, 5 | THFloatTensor *output, 6 | int pad_size, 7 | int kernel_size, 8 | int max_displacement, 9 | int stride1, 10 | int stride2, 11 | int corr_type_multiply); 12 | 13 | int corr1d_cpu_backward(THFloatTensor *input1, 14 | THFloatTensor *input2, 15 | THFloatTensor *rbot1, 16 | THFloatTensor *rbot2, 17 | THFloatTensor *gradOutput, 18 | THFloatTensor *gradInput1, 19 | THFloatTensor *gradInput2, 20 | int pad_size, 21 | int kernel_size, 22 | int max_displacement, 23 | int stride1, 24 | int stride2, 25 | int corr_type_multiply); 26 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "corr1d_cuda_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | // == Forward 7 | int corr1d_cuda_forward(THCudaTensor *input1, 8 | THCudaTensor *input2, 9 | THCudaTensor *rbot1, 10 | THCudaTensor *rbot2, 11 | THCudaTensor *output, 12 | int pad_size, 13 | int kernel_size, 14 | int max_displacement, 15 | int stride1, 16 | int stride2, 17 | int corr_type_multiply 18 | //single_direction=0 19 | ) 20 | { 21 | 22 | // TODO: Shapechecks 23 | 24 | int batchSize = input1->size[0]; 25 | 26 | long nInputPlane = input1->size[1]; 27 | long nInputRows = input1->size[2]; 28 | long nInputCols = input1->size[3]; 29 | long inputWidthHeight = nInputRows * nInputCols; 30 | 31 | long kernel_radius_ = (kernel_size - 1) / 2; 32 | long border_size_ = max_displacement + kernel_radius_; // size of unreachable border region (on each side) 33 | 34 | long paddedbottomheight = nInputRows; 35 | long paddedbottomwidth = nInputCols + 2 * pad_size; 36 | 37 | long nOutputCols = ceil((float)(paddedbottomwidth - border_size_ * 2) / (float)stride1); 38 | long nOutputRows = ceil((float)(paddedbottomheight - kernel_radius_ * 2) / (float)stride1); 39 | 40 | // Given a center position in image 1, how many displaced positions in -x / +x 41 | // direction do we consider in image2 (neighborhood_grid_width) 42 | long neighborhood_grid_radius_ = max_displacement / stride2; 43 | long neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1; 44 | int x_shift = -neighborhood_grid_radius_; 45 | 46 | // Number of output channels amounts to displacement combinations in X direction only!! 47 | int nOutputPlane = neighborhood_grid_width_;//Same, because 1D X-correlation 48 | 49 | // Inputs 50 | float * input1_data = THCudaTensor_data(state, input1); 51 | float * input2_data = THCudaTensor_data(state, input2); 52 | 53 | // Outputs 54 | THCudaTensor_resize4d(state, output, batchSize, nOutputPlane, nOutputRows, nOutputCols); 55 | THCudaTensor_zero(state, output); // added by Jinwei 56 | float * output_data = THCudaTensor_data(state, output); 57 | 58 | THCudaTensor_resize4d(state, rbot1, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 59 | THCudaTensor_resize4d(state, rbot2, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 60 | 61 | THCudaTensor_zero(state, rbot1); // added by Jinwei 62 | THCudaTensor_zero(state, rbot2); // added by Jinwei 63 | 64 | float * rbot1_data = THCudaTensor_data(state, rbot1); 65 | float * rbot2_data = THCudaTensor_data(state, rbot2); 66 | 67 | cudaStream_t stream = THCState_getCurrentStream(state); 68 | 69 | int pwidthheight = paddedbottomwidth * paddedbottomheight; 70 | 71 | blob_rearrange_ongpu_1d(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 72 | 73 | blob_rearrange_ongpu_1d(input2_data,rbot2_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 74 | 75 | CorrelateData_ongpu_1d(rbot1_data,rbot2_data,output_data,batchSize,nOutputCols,nOutputRows,nOutputPlane,max_displacement,x_shift,neighborhood_grid_width_,kernel_radius_,kernel_size,stride1,stride2,paddedbottomwidth,paddedbottomheight,nInputPlane,corr_type_multiply,stream); 76 | 77 | // THCudaTensor_free(state, input1); 78 | // THCudaTensor_free(state, input2); 79 | THCudaTensor_free(state, rbot1); 80 | THCudaTensor_free(state, rbot2); 81 | 82 | return 1; 83 | 84 | } 85 | 86 | int corr1d_cuda_backward(THCudaTensor *input1, 87 | THCudaTensor *input2, 88 | THCudaTensor *rbot1, 89 | THCudaTensor *rbot2, 90 | THCudaTensor *gradOutput, 91 | THCudaTensor *gradInput1, 92 | THCudaTensor *gradInput2, 93 | int pad_size, 94 | int kernel_size, 95 | int max_displacement, 96 | int stride1, 97 | int stride2, 98 | int corr_type_multiply 99 | // single_direction=0 100 | ) 101 | { 102 | 103 | float * input1_data = THCudaTensor_data(state, input1); 104 | float * input2_data = THCudaTensor_data(state, input2); 105 | 106 | long nInputCols = input1->size[3]; 107 | long nInputRows = input1->size[2]; 108 | long nInputPlane = input1->size[1]; 109 | long batchSize = input1->size[0]; 110 | 111 | // THCudaTensor_resizeAs(state, gradInput1, input1); 112 | // THCudaTensor_resizeAs(state, gradInput2, input2); 113 | float * gradOutput_data = THCudaTensor_data(state, gradOutput); 114 | float * gradInput1_data = THCudaTensor_data(state, gradInput1); 115 | float * gradInput2_data = THCudaTensor_data(state, gradInput2); 116 | 117 | long inputWidthHeight = nInputRows * nInputCols; 118 | 119 | long kernel_radius_ = (kernel_size - 1) / 2; 120 | long border_size_ = max_displacement + kernel_radius_; // size of unreachable border region (on each side) 121 | 122 | long paddedbottomheight = nInputRows; 123 | long paddedbottomwidth = nInputCols + 2 * pad_size; 124 | 125 | long nOutputCols = ceil((float)(paddedbottomwidth - border_size_ * 2) / (float)stride1); 126 | long nOutputRows = ceil((float)(paddedbottomheight - kernel_radius_ * 2) / (float)stride1); 127 | 128 | // Given a center position in image 1, how many displaced positions in -x / +x 129 | // direction do we consider in image2 (neighborhood_grid_width) 130 | long neighborhood_grid_radius_ = max_displacement / stride2; 131 | long neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1; 132 | int x_shift = -neighborhood_grid_radius_; 133 | 134 | // Number of output channels amounts to displacement combinations in X direction only!! 135 | int nOutputPlane = neighborhood_grid_width_; // Same, because 1D X-correlation 136 | 137 | THCudaTensor_resize4d(state, rbot1, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 138 | THCudaTensor_resize4d(state, rbot2, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 139 | 140 | THCudaTensor_zero(state, rbot1); // added by Jinwei 141 | THCudaTensor_zero(state, rbot2); // added by Jinwei 142 | 143 | float * rbot1_data = THCudaTensor_data(state, rbot1); 144 | float * rbot2_data = THCudaTensor_data(state, rbot2); 145 | 146 | int pwidthheight = paddedbottomwidth * paddedbottomheight; 147 | 148 | cudaStream_t stream = THCState_getCurrentStream(state); 149 | 150 | blob_rearrange_ongpu_1d(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 151 | 152 | blob_rearrange_ongpu_1d(input2_data,rbot2_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 153 | 154 | // CorrelationLayerBackward 155 | 156 | CorrelateDataBackward_ongpu_1d(rbot1_data,rbot2_data,gradOutput_data,gradInput1_data,gradInput2_data,batchSize,nOutputCols,nOutputRows,nOutputPlane,max_displacement,x_shift,neighborhood_grid_width_,kernel_radius_,stride1,stride2,nInputCols,nInputRows,paddedbottomwidth,paddedbottomheight,nInputPlane,pad_size,corr_type_multiply,stream); 157 | 158 | // THCudaTensor_free(state, input1); 159 | // THCudaTensor_free(state, input2); 160 | THCudaTensor_free(state, rbot1); 161 | THCudaTensor_free(state, rbot2); 162 | 163 | return 1; 164 | 165 | } 166 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda.h: -------------------------------------------------------------------------------- 1 | int corr1d_cuda_forward(THCudaTensor *input1, 2 | THCudaTensor *input2, 3 | THCudaTensor *rbot1, 4 | THCudaTensor *rbot2, 5 | THCudaTensor *output, 6 | int pad_size, 7 | int kernel_size, 8 | int max_displacement, 9 | int stride1, 10 | int stride2, 11 | int corr_type_multiply); 12 | 13 | int corr1d_cuda_backward(THCudaTensor *input1, 14 | THCudaTensor *input2, 15 | THCudaTensor *rbot1, 16 | THCudaTensor *rbot2, 17 | THCudaTensor *grad_output, 18 | THCudaTensor *grad_input1, 19 | THCudaTensor *grad_input2, 20 | int pad_size, 21 | int kernel_size, 22 | int max_displacement, 23 | int stride1, 24 | int stride2, 25 | int corr_type_multiply); 26 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr1d_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _CORR_CUDA_KERNEL 2 | #define _CORR_CUDA_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void blob_rearrange_ongpu_1d(const float *in, float *out, int num, int channels, int width, int height, int widthheight, int padding, int pwidthheight, cudaStream_t stream); 9 | 10 | void CorrelateData_ongpu_1d(const float *rbot1, const float *rbot2, float *output, int batchSize, int nOutputCols, int nOutputRows, int nOutputPlane, int max_displacement, int x_shift, int neighborhood_grid_width_, int kernel_radius_, int kernel_size, int stride1, int stride2, int paddedbottomwidth, int paddedbottomheight, int nInputPlane, int corr_type_multiply, cudaStream_t stream); 11 | 12 | void CorrelateDataBackward_ongpu_1d(const float *rbot1, const float *rbot2, const float *gradOutput, float *gradInput1, float *gradInput2, int batchSize, int nOutputCols, int nOutputRows, int nOutputPlane, int max_displacement, int x_shift, int neighborhood_grid_width_, int kernel_radius_, int stride1, int stride2, int nInputCols, int nInputRows, int paddedbottomwidth, int paddedbottomheight, int nInputPlane, int pad_size, int corr_type_multiply, cudaStream_t stream); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "corr_cuda_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | // == Forward 7 | int corr_cuda_forward(THCudaTensor *input1, 8 | THCudaTensor *input2, 9 | THCudaTensor *rbot1, 10 | THCudaTensor *rbot2, 11 | THCudaTensor *output, 12 | int pad_size, 13 | int kernel_size, 14 | int max_displacement, 15 | int stride1, 16 | int stride2, 17 | int corr_type_multiply 18 | ) 19 | { 20 | 21 | // TODO: Shapechecks 22 | 23 | int batchSize = input1->size[0]; 24 | 25 | long nInputPlane = input1->size[1]; 26 | long nInputRows = input1->size[2]; 27 | long nInputCols = input1->size[3]; 28 | long inputWidthHeight = nInputRows * nInputCols; 29 | 30 | long kernel_radius_ = (kernel_size - 1) / 2; 31 | long border_size_ = max_displacement + kernel_radius_; // size of unreachable border region (on each side) 32 | 33 | long paddedbottomheight = nInputRows + 2 * pad_size; 34 | long paddedbottomwidth = nInputCols + 2 * pad_size; 35 | 36 | long nOutputCols = ceil((float)(paddedbottomwidth - border_size_ * 2) / (float)stride1); 37 | long nOutputRows = ceil((float)(paddedbottomheight - border_size_ * 2) / (float)stride1); 38 | 39 | // Given a center position in image 1, how many displaced positions in -x / +x 40 | // direction do we consider in image2 (neighborhood_grid_width) 41 | long neighborhood_grid_radius_ = max_displacement / stride2; 42 | long neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1; 43 | 44 | // Number of output channels amounts to displacement combinations in X and Y direction 45 | int nOutputPlane = neighborhood_grid_width_ * neighborhood_grid_width_; 46 | 47 | // Inputs 48 | float * input1_data = THCudaTensor_data(state, input1); 49 | float * input2_data = THCudaTensor_data(state, input2); 50 | 51 | // Outputs 52 | THCudaTensor_resize4d(state, output, batchSize, nOutputPlane, nOutputRows, nOutputCols); 53 | THCudaTensor_zero(state, output); // added by Jinwei 54 | float * output_data = THCudaTensor_data(state, output); 55 | 56 | THCudaTensor_resize4d(state, rbot1, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 57 | THCudaTensor_resize4d(state, rbot2, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 58 | 59 | THCudaTensor_zero(state, rbot1); // added by Jinwei 60 | THCudaTensor_zero(state, rbot2); // added by Jinwei 61 | 62 | float * rbot1_data = THCudaTensor_data(state, rbot1); 63 | float * rbot2_data = THCudaTensor_data(state, rbot2); 64 | 65 | cudaStream_t stream = THCState_getCurrentStream(state); 66 | 67 | int pwidthheight = paddedbottomwidth * paddedbottomheight; 68 | 69 | blob_rearrange_ongpu(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 70 | 71 | blob_rearrange_ongpu(input2_data,rbot2_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 72 | 73 | CorrelateData_ongpu(rbot1_data,rbot2_data,output_data,batchSize,nOutputCols,nOutputRows,nOutputPlane,max_displacement,neighborhood_grid_radius_,neighborhood_grid_width_,kernel_radius_,kernel_size,stride1,stride2,paddedbottomwidth,paddedbottomheight,nInputPlane,corr_type_multiply,stream); 74 | 75 | // THCudaTensor_free(state, input1); 76 | // THCudaTensor_free(state, input2); 77 | THCudaTensor_free(state, rbot1); 78 | THCudaTensor_free(state, rbot2); 79 | 80 | return 1; 81 | 82 | } 83 | 84 | int corr_cuda_backward(THCudaTensor *input1, 85 | THCudaTensor *input2, 86 | THCudaTensor *rbot1, 87 | THCudaTensor *rbot2, 88 | THCudaTensor *gradOutput, 89 | THCudaTensor *gradInput1, 90 | THCudaTensor *gradInput2, 91 | int pad_size, 92 | int kernel_size, 93 | int max_displacement, 94 | int stride1, 95 | int stride2, 96 | int corr_type_multiply 97 | ) 98 | { 99 | 100 | float * input1_data = THCudaTensor_data(state, input1); 101 | float * input2_data = THCudaTensor_data(state, input2); 102 | 103 | long nInputCols = input1->size[3]; 104 | long nInputRows = input1->size[2]; 105 | long nInputPlane = input1->size[1]; 106 | long batchSize = input1->size[0]; 107 | 108 | // THCudaTensor_resizeAs(state, gradInput1, input1); 109 | // THCudaTensor_resizeAs(state, gradInput2, input2); 110 | float * gradOutput_data = THCudaTensor_data(state, gradOutput); 111 | float * gradInput1_data = THCudaTensor_data(state, gradInput1); 112 | float * gradInput2_data = THCudaTensor_data(state, gradInput2); 113 | 114 | long inputWidthHeight = nInputRows * nInputCols; 115 | 116 | long kernel_radius_ = (kernel_size - 1) / 2; 117 | long border_size_ = max_displacement + kernel_radius_; // size of unreachable border region (on each side) 118 | 119 | long paddedbottomheight = nInputRows + 2 * pad_size; 120 | long paddedbottomwidth = nInputCols + 2 * pad_size; 121 | 122 | long nOutputCols = ceil((float)(paddedbottomwidth - border_size_ * 2) / (float)stride1); 123 | long nOutputRows = ceil((float)(paddedbottomheight - border_size_ * 2) / (float)stride1); 124 | 125 | // Given a center position in image 1, how many displaced positions in -x / +x 126 | // direction do we consider in image2 (neighborhood_grid_width) 127 | long neighborhood_grid_radius_ = max_displacement / stride2; 128 | long neighborhood_grid_width_ = neighborhood_grid_radius_ * 2 + 1; 129 | 130 | // Number of output channels amounts to displacement combinations in X and Y direction 131 | int nOutputPlane = neighborhood_grid_width_ * neighborhood_grid_width_; 132 | 133 | THCudaTensor_resize4d(state, rbot1, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 134 | THCudaTensor_resize4d(state, rbot2, batchSize, nInputPlane, paddedbottomheight, paddedbottomwidth); 135 | 136 | THCudaTensor_zero(state, rbot1); // added by Jinwei 137 | THCudaTensor_zero(state, rbot2); // added by Jinwei 138 | 139 | float * rbot1_data = THCudaTensor_data(state, rbot1); 140 | float * rbot2_data = THCudaTensor_data(state, rbot2); 141 | 142 | int pwidthheight = paddedbottomwidth * paddedbottomheight; 143 | 144 | cudaStream_t stream = THCState_getCurrentStream(state); 145 | 146 | blob_rearrange_ongpu(input1_data,rbot1_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 147 | 148 | blob_rearrange_ongpu(input2_data,rbot2_data,batchSize,nInputPlane,nInputCols,nInputRows,inputWidthHeight,pad_size,pwidthheight,stream); 149 | 150 | // CorrelationLayerBackward 151 | 152 | CorrelateDataBackward_ongpu(rbot1_data,rbot2_data,gradOutput_data,gradInput1_data,gradInput2_data,batchSize,nOutputCols,nOutputRows,nOutputPlane,max_displacement,neighborhood_grid_radius_,neighborhood_grid_width_,kernel_radius_,stride1,stride2,nInputCols,nInputRows,paddedbottomwidth,paddedbottomheight,nInputPlane,pad_size,corr_type_multiply,stream); 153 | 154 | // THCudaTensor_free(state, input1); 155 | // THCudaTensor_free(state, input2); 156 | THCudaTensor_free(state, rbot1); 157 | THCudaTensor_free(state, rbot2); 158 | 159 | return 1; 160 | } 161 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda.h: -------------------------------------------------------------------------------- 1 | int corr_cuda_forward(THCudaTensor *input1, 2 | THCudaTensor *input2, 3 | THCudaTensor *rbot1, 4 | THCudaTensor *rbot2, 5 | THCudaTensor *output, 6 | int pad_size, 7 | int kernel_size, 8 | int max_displacement, 9 | int stride1, 10 | int stride2, 11 | int corr_type_multiply); 12 | 13 | int corr_cuda_backward(THCudaTensor *input1, 14 | THCudaTensor *input2, 15 | THCudaTensor *rbot1, 16 | THCudaTensor *rbot2, 17 | THCudaTensor *grad_output, 18 | THCudaTensor *grad_input1, 19 | THCudaTensor *grad_input2, 20 | int pad_size, 21 | int kernel_size, 22 | int max_displacement, 23 | int stride1, 24 | int stride2, 25 | int corr_type_multiply); 26 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/correlation_package/src/corr_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _CORR_CUDA_KERNEL 2 | #define _CORR_CUDA_KERNEL 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void blob_rearrange_ongpu(const float *in, float *out, int num, int channels, int width, int height, int widthheight, int padding, int pwidthheight, cudaStream_t stream); 9 | 10 | void CorrelateData_ongpu(const float *rbot1, const float *rbot2, float *output, int batchSize, int nOutputCols, int nOutputRows, int nOutputPlane, int max_displacement, int neighborhood_grid_radius_, int neighborhood_grid_width_, int kernel_radius_, int kernel_size, int stride1, int stride2, int paddedbottomwidth, int paddedbottomheight, int nInputPlane, int corr_type_multiply, cudaStream_t stream); 11 | 12 | void CorrelateDataBackward_ongpu(const float *rbot1, const float *rbot2, const float *gradOutput, float *gradInput1, float *gradInput2, int batchSize, int nOutputCols, int nOutputRows, int nOutputPlane, int max_displacement, int neighborhood_grid_radius_, int neighborhood_grid_width_, int kernel_radius_, int stride1, int stride2, int nInputCols, int nInputRows, int paddedbottomwidth, int paddedbottomheight, int nInputPlane, int pad_size, int corr_type_multiply, cudaStream_t stream); 13 | 14 | #ifdef __cplusplus 15 | } 16 | #endif 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import os 4 | 5 | from setuptools import setup, find_packages 6 | 7 | this_file = os.path.dirname(__file__) 8 | 9 | setup( 10 | name="correlation_package", 11 | version="0.1", 12 | description="Correlation layer from FlowNetC", 13 | url="https://github.com/jbarker-nvidia/pytorch-correlation", 14 | author="Jon Barker", 15 | author_email="jbarker@nvidia.com", 16 | # Require cffi 17 | install_requires=["cffi>=1.0.0"], 18 | setup_requires=["cffi>=1.0.0"], 19 | # Exclude the build files. 20 | packages=find_packages(exclude=["build"]), 21 | # Package where to put the extensions. Has to be a prefix of build.py 22 | ext_package="", 23 | # Extensions to compile 24 | cffi_modules=[ 25 | os.path.join(this_file, "build.py:ffi") 26 | ], 27 | ) 28 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/test/.ipynb_checkpoints/test-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 0 6 | } 7 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/test/test.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import torch\n", 12 | "from torch.autograd import Variable\n", 13 | "from correlation_package.modules.corr import Correlation" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "a=Variable(torch.rand(1,128,100,100),requires_grad=True)\n", 25 | "b=Variable(torch.rand(1,128,100,100),requires_grad=True)\n", 26 | "m=Correlation(40,1,40,1,1,1)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "a=a.cuda()\n", 38 | "b=b.cuda()" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "collapsed": true 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "y=m(a,b)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": null, 55 | "metadata": { 56 | "collapsed": false 57 | }, 58 | "outputs": [], 59 | "source": [ 60 | "y.size()" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": true 68 | }, 69 | "outputs": [], 70 | "source": [] 71 | } 72 | ], 73 | "metadata": { 74 | "kernelspec": { 75 | "display_name": "Python 2", 76 | "language": "python", 77 | "name": "python2" 78 | }, 79 | "language_info": { 80 | "codemirror_mode": { 81 | "name": "ipython", 82 | "version": 2 83 | }, 84 | "file_extension": ".py", 85 | "mimetype": "text/x-python", 86 | "name": "python", 87 | "nbconvert_exporter": "python", 88 | "pygments_lexer": "ipython2", 89 | "version": "2.7.12" 90 | } 91 | }, 92 | "nbformat": 4, 93 | "nbformat_minor": 0 94 | } 95 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/correlation-pytorch/test/test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | from correlation_package.modules.corr import Correlation, Correlation1d 5 | from correlation_package.functions.corr import correlation, correlation1d 6 | 7 | from torch.autograd import gradcheck 8 | 9 | import numpy as np 10 | 11 | def test_correlation(): 12 | # model = correlation(1, 1, 1, 1, 1, 1) 13 | # A = Variable(torch.randn(1,1,3,3)) 14 | # A_ = A.cuda() 15 | # B = Variable(torch.randn(1,1,3,3)) 16 | # B_ = B.cuda() 17 | # 18 | # #import pdb; pdb.set_trace() 19 | # #model = correlation1d(3, 1, 20, 1, 1, 1) 20 | # y = model(A_, B_) 21 | # print(y.size()) 22 | # 23 | # print(y) 24 | # return 25 | 26 | 27 | A = Variable(torch.randn(2,3,100,100), requires_grad=True) 28 | A_ = A.cuda() 29 | B = Variable(torch.randn(2,3,100,100), requires_grad=True) 30 | B_ = B.cuda() 31 | 32 | model = correlation(3, 3, 20, 1, 2, 1) 33 | y = model(A_, B_) 34 | print(y.size()) 35 | 36 | print('Functional interface test passed') 37 | 38 | z = torch.mean(y) 39 | z.backward() 40 | print(A.grad.size()) 41 | print(B.grad.size()) 42 | 43 | if A.grad is not None and B.grad is not None: 44 | print('Backward pass test passed') 45 | 46 | A = Variable(torch.randn(2,3,100,100), requires_grad=True) 47 | A_ = A.cuda() 48 | B = Variable(torch.randn(2,3,100,100), requires_grad=True) 49 | B_ = B.cuda() 50 | 51 | y = Correlation(3, 3, 20, 1, 2, 1)(A_, B_) 52 | print(y.size()) 53 | 54 | print('Module interface test passed') 55 | 56 | z = torch.mean(y) 57 | z.backward() 58 | print(A.grad.size()) 59 | print(B.grad.size()) 60 | 61 | if A.grad is not None and B.grad is not None: 62 | print('Backward pass test passed') 63 | 64 | def test_correlation_0(): 65 | #model = correlation(0, 1, 0, 1, 1, 1) 66 | 67 | A = torch.Tensor([[1,2],[3,4]]) 68 | B = torch.Tensor([[5,6],[7,8]]) 69 | A = A.view((1,1,2,2)) 70 | B = B.view((1,1,2,2)) 71 | A = Variable(A) 72 | B = Variable(B) 73 | A_ = A.cuda() 74 | B_ = B.cuda() 75 | 76 | #y = model(A_, B_) 77 | #print(y) # should be 1x1x2x2 [[5,12],[21,32]] 78 | 79 | model2 = correlation(1, 1, 1, 1, 1, 1) 80 | y2 = model2(A_, B_) 81 | print(y2) # should be 1x9x2x2 82 | 83 | 84 | 85 | def test_correlation1d_0(): 86 | #model = correlation1d(0, 1, 0, 1, 1, 1) 87 | 88 | A = torch.Tensor([[1,2],[3,4]]) 89 | B = torch.Tensor([[5,6],[7,8]]) 90 | A = A.view((1,1,2,2)) 91 | B = B.view((1,1,2,2)) 92 | A = Variable(A) 93 | B = Variable(B) 94 | A_ = A.cuda() 95 | B_ = B.cuda() 96 | 97 | #y = model(A_, B_) 98 | #print(y) # should be 1x1x2x2 [[5,12],[21,32]] 99 | 100 | 101 | model2 = correlation1d(1, 1, 1, 1, 1, 1) 102 | y2 = model2(A_, B_) 103 | print(y2) # should be 1x3x2x2 104 | 105 | return 106 | 107 | 108 | def test_correlation1d(): 109 | A = Variable(torch.randn(2,3,100,100), requires_grad=True) 110 | A_ = A.cuda() 111 | B = Variable(torch.randn(2,3,100,100), requires_grad=True) 112 | B_ = B.cuda() 113 | 114 | #import pdb; pdb.set_trace() 115 | model = correlation1d(20, 1, 20, 1, 1, 1) 116 | y = model(A_, B_) 117 | print(y.size()) 118 | 119 | print('Functional interface test passed') 120 | 121 | z = torch.mean(y) 122 | z.backward() 123 | print(A.grad.size()) 124 | print(B.grad.size()) 125 | 126 | if A.grad is not None and B.grad is not None: 127 | print('Backward pass test passed') 128 | 129 | A = Variable(torch.randn(2,3,100,100), requires_grad=True) 130 | A_ = A.cuda() 131 | B = Variable(torch.randn(2,3,100,100), requires_grad=True) 132 | B_ = B.cuda() 133 | 134 | y = Correlation1d(20, 1, 20, 1, 1, 1)(A_, B_) 135 | print(y.size()) 136 | 137 | print('Module interface test passed') 138 | 139 | z = torch.mean(y) 140 | z.backward() 141 | print(A.grad.size()) 142 | print(B.grad.size()) 143 | 144 | if A.grad is not None and B.grad is not None: 145 | print('Backward pass test passed') 146 | 147 | 148 | if __name__=='__main__': 149 | 150 | #test_correlation() 151 | 152 | #test_correlation1d() 153 | 154 | #test_correlation_0() 155 | 156 | test_correlation1d_0() 157 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/make_cuda.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CUDA_PATH=/usr/local/cuda-10.0 4 | 5 | cd correlation-pytorch/correlation_package/src 6 | echo "Compiling correlation layer kernels by nvcc..." 7 | 8 | # TODO (JEB): Check which arches we need 9 | nvcc -c -o corr_cuda_kernel.cu.o corr_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 10 | nvcc -c -o corr1d_cuda_kernel.cu.o corr1d_cuda_kernel.cu -x cu -Xcompiler -fPIC -arch=sm_61 11 | 12 | cd ../../ 13 | python setup.py build install 14 | -------------------------------------------------------------------------------- /layers_package/correlation-pytorch-master/readme.MD: -------------------------------------------------------------------------------- 1 | This repository contains a custom pytorch package that adds a module and functional interface for the correlation layer described in "FlowNet: Learning Optical Flow with Convolutional Networks" (https://arxiv.org/abs/1504.06852) 2 | 3 | To install: 4 | 5 | 1. Run `pip install cffi` 6 | 7 | 2. Run `make_cuda.sh` 8 | 9 | 3. Run `python setup.py build install` 10 | 11 | 4. (optional) Run `python test/test.py` 12 | 13 | 14 | #### Acknowledgement 15 | - Thanks to Dr. Fitsum Reda for providing the wrapper to the correlation code 16 | 17 | If you find this implementation useful in your work, please acknowledge it appropriately and cite the paper: 18 | ``` 19 | @misc{flownet2-pytorch, 20 | author = {Fitsum Reda and Robert Pottorff and Jon Barker and Bryan Catanzaro}, 21 | title = {flownet2-pytorch: Pytorch implementation of FlowNet 2.0: Evolution of Optical Flow Estimation with Deep Networks}, 22 | year = {2017}, 23 | publisher = {GitHub}, 24 | journal = {GitHub repository}, 25 | howpublished = {\url{https://github.com/NVIDIA/flownet2-pytorch}} 26 | } 27 | ``` 28 | -------------------------------------------------------------------------------- /layers_package/correlation_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/correlation_package/__init__.py -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn.modules.module import Module 3 | from torch.autograd import Function 4 | import correlation_cuda 5 | 6 | class CorrelationFunction(Function): 7 | 8 | def __init__(self, pad_size=3, kernel_size=3, max_displacement=20, stride1=1, stride2=2, corr_multiply=1): 9 | super(CorrelationFunction, self).__init__() 10 | self.pad_size = pad_size 11 | self.kernel_size = kernel_size 12 | self.max_displacement = max_displacement 13 | self.stride1 = stride1 14 | self.stride2 = stride2 15 | self.corr_multiply = corr_multiply 16 | # self.out_channel = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1) 17 | 18 | def forward(self, input1, input2): 19 | self.save_for_backward(input1, input2) 20 | 21 | with torch.cuda.device_of(input1): 22 | rbot1 = input1.new() 23 | rbot2 = input2.new() 24 | output = input1.new() 25 | 26 | correlation_cuda.forward(input1, input2, rbot1, rbot2, output, 27 | self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply) 28 | 29 | return output 30 | 31 | def backward(self, grad_output): 32 | input1, input2 = self.saved_tensors 33 | 34 | with torch.cuda.device_of(input1): 35 | rbot1 = input1.new() 36 | rbot2 = input2.new() 37 | 38 | grad_input1 = input1.new() 39 | grad_input2 = input2.new() 40 | 41 | correlation_cuda.backward(input1, input2, rbot1, rbot2, grad_output, grad_input1, grad_input2, 42 | self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply) 43 | 44 | return grad_input1, grad_input2 45 | 46 | 47 | class Correlation(Module): 48 | def __init__(self, pad_size=0, kernel_size=0, max_displacement=0, stride1=1, stride2=2, corr_multiply=1): 49 | super(Correlation, self).__init__() 50 | self.pad_size = pad_size 51 | self.kernel_size = kernel_size 52 | self.max_displacement = max_displacement 53 | self.stride1 = stride1 54 | self.stride2 = stride2 55 | self.corr_multiply = corr_multiply 56 | 57 | def forward(self, input1, input2): 58 | 59 | result = CorrelationFunction(self.pad_size, self.kernel_size, self.max_displacement,self.stride1, self.stride2, self.corr_multiply)(input1, input2) 60 | 61 | return result 62 | 63 | -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "correlation_cuda_kernel.cuh" 9 | 10 | int correlation_forward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& output, 11 | int pad_size, 12 | int kernel_size, 13 | int max_displacement, 14 | int stride1, 15 | int stride2, 16 | int corr_type_multiply) 17 | { 18 | 19 | int batchSize = input1.size(0); 20 | 21 | int nInputChannels = input1.size(1); 22 | int inputHeight = input1.size(2); 23 | int inputWidth = input1.size(3); 24 | 25 | int kernel_radius = (kernel_size - 1) / 2; 26 | int border_radius = kernel_radius + max_displacement; 27 | 28 | int paddedInputHeight = inputHeight + 2 * pad_size; 29 | int paddedInputWidth = inputWidth + 2 * pad_size; 30 | 31 | int nOutputChannels = ((max_displacement/stride2)*2 + 1) * ((max_displacement/stride2)*2 + 1); 32 | 33 | int outputHeight = ceil(static_cast(paddedInputHeight - 2 * border_radius) / static_cast(stride1)); 34 | int outputwidth = ceil(static_cast(paddedInputWidth - 2 * border_radius) / static_cast(stride1)); 35 | 36 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 37 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 38 | output.resize_({batchSize, nOutputChannels, outputHeight, outputwidth}); 39 | 40 | rInput1.fill_(0); 41 | rInput2.fill_(0); 42 | output.fill_(0); 43 | 44 | int success = correlation_forward_cuda_kernel( 45 | output, 46 | output.size(0), 47 | output.size(1), 48 | output.size(2), 49 | output.size(3), 50 | output.stride(0), 51 | output.stride(1), 52 | output.stride(2), 53 | output.stride(3), 54 | input1, 55 | input1.size(1), 56 | input1.size(2), 57 | input1.size(3), 58 | input1.stride(0), 59 | input1.stride(1), 60 | input1.stride(2), 61 | input1.stride(3), 62 | input2, 63 | input2.size(1), 64 | input2.stride(0), 65 | input2.stride(1), 66 | input2.stride(2), 67 | input2.stride(3), 68 | rInput1, 69 | rInput2, 70 | pad_size, 71 | kernel_size, 72 | max_displacement, 73 | stride1, 74 | stride2, 75 | corr_type_multiply, 76 | at::cuda::getCurrentCUDAStream() 77 | //at::globalContext().getCurrentCUDAStream() 78 | ); 79 | 80 | //check for errors 81 | if (!success) { 82 | AT_ERROR("CUDA call failed"); 83 | } 84 | 85 | return 1; 86 | 87 | } 88 | 89 | int correlation_backward_cuda(at::Tensor& input1, at::Tensor& input2, at::Tensor& rInput1, at::Tensor& rInput2, at::Tensor& gradOutput, 90 | at::Tensor& gradInput1, at::Tensor& gradInput2, 91 | int pad_size, 92 | int kernel_size, 93 | int max_displacement, 94 | int stride1, 95 | int stride2, 96 | int corr_type_multiply) 97 | { 98 | 99 | int batchSize = input1.size(0); 100 | int nInputChannels = input1.size(1); 101 | int paddedInputHeight = input1.size(2)+ 2 * pad_size; 102 | int paddedInputWidth = input1.size(3)+ 2 * pad_size; 103 | 104 | int height = input1.size(2); 105 | int width = input1.size(3); 106 | 107 | rInput1.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 108 | rInput2.resize_({batchSize, paddedInputHeight, paddedInputWidth, nInputChannels}); 109 | gradInput1.resize_({batchSize, nInputChannels, height, width}); 110 | gradInput2.resize_({batchSize, nInputChannels, height, width}); 111 | 112 | rInput1.fill_(0); 113 | rInput2.fill_(0); 114 | gradInput1.fill_(0); 115 | gradInput2.fill_(0); 116 | 117 | int success = correlation_backward_cuda_kernel(gradOutput, 118 | gradOutput.size(0), 119 | gradOutput.size(1), 120 | gradOutput.size(2), 121 | gradOutput.size(3), 122 | gradOutput.stride(0), 123 | gradOutput.stride(1), 124 | gradOutput.stride(2), 125 | gradOutput.stride(3), 126 | input1, 127 | input1.size(1), 128 | input1.size(2), 129 | input1.size(3), 130 | input1.stride(0), 131 | input1.stride(1), 132 | input1.stride(2), 133 | input1.stride(3), 134 | input2, 135 | input2.stride(0), 136 | input2.stride(1), 137 | input2.stride(2), 138 | input2.stride(3), 139 | gradInput1, 140 | gradInput1.stride(0), 141 | gradInput1.stride(1), 142 | gradInput1.stride(2), 143 | gradInput1.stride(3), 144 | gradInput2, 145 | gradInput2.size(1), 146 | gradInput2.stride(0), 147 | gradInput2.stride(1), 148 | gradInput2.stride(2), 149 | gradInput2.stride(3), 150 | rInput1, 151 | rInput2, 152 | pad_size, 153 | kernel_size, 154 | max_displacement, 155 | stride1, 156 | stride2, 157 | corr_type_multiply, 158 | at::cuda::getCurrentCUDAStream() 159 | //at::globalContext().getCurrentCUDAStream() 160 | ); 161 | 162 | if (!success) { 163 | AT_ERROR("CUDA call failed"); 164 | } 165 | 166 | return 1; 167 | } 168 | 169 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 170 | m.def("forward", &correlation_forward_cuda, "Correlation forward (CUDA)"); 171 | m.def("backward", &correlation_backward_cuda, "Correlation backward (CUDA)"); 172 | } 173 | 174 | -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation_cuda.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: correlation-cuda 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation_cuda.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | correlation_cuda.cc 2 | correlation_cuda_kernel.cu 3 | setup.py 4 | correlation_cuda.egg-info/PKG-INFO 5 | correlation_cuda.egg-info/SOURCES.txt 6 | correlation_cuda.egg-info/dependency_links.txt 7 | correlation_cuda.egg-info/top_level.txt -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation_cuda.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation_cuda.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | correlation_cuda 2 | -------------------------------------------------------------------------------- /layers_package/correlation_package/correlation_cuda_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | int correlation_forward_cuda_kernel(at::Tensor& output, 8 | int ob, 9 | int oc, 10 | int oh, 11 | int ow, 12 | int osb, 13 | int osc, 14 | int osh, 15 | int osw, 16 | 17 | at::Tensor& input1, 18 | int ic, 19 | int ih, 20 | int iw, 21 | int isb, 22 | int isc, 23 | int ish, 24 | int isw, 25 | 26 | at::Tensor& input2, 27 | int gc, 28 | int gsb, 29 | int gsc, 30 | int gsh, 31 | int gsw, 32 | 33 | at::Tensor& rInput1, 34 | at::Tensor& rInput2, 35 | int pad_size, 36 | int kernel_size, 37 | int max_displacement, 38 | int stride1, 39 | int stride2, 40 | int corr_type_multiply, 41 | cudaStream_t stream); 42 | 43 | 44 | int correlation_backward_cuda_kernel( 45 | at::Tensor& gradOutput, 46 | int gob, 47 | int goc, 48 | int goh, 49 | int gow, 50 | int gosb, 51 | int gosc, 52 | int gosh, 53 | int gosw, 54 | 55 | at::Tensor& input1, 56 | int ic, 57 | int ih, 58 | int iw, 59 | int isb, 60 | int isc, 61 | int ish, 62 | int isw, 63 | 64 | at::Tensor& input2, 65 | int gsb, 66 | int gsc, 67 | int gsh, 68 | int gsw, 69 | 70 | at::Tensor& gradInput1, 71 | int gisb, 72 | int gisc, 73 | int gish, 74 | int gisw, 75 | 76 | at::Tensor& gradInput2, 77 | int ggc, 78 | int ggsb, 79 | int ggsc, 80 | int ggsh, 81 | int ggsw, 82 | 83 | at::Tensor& rInput1, 84 | at::Tensor& rInput2, 85 | int pad_size, 86 | int kernel_size, 87 | int max_displacement, 88 | int stride1, 89 | int stride2, 90 | int corr_type_multiply, 91 | cudaStream_t stream); 92 | -------------------------------------------------------------------------------- /layers_package/correlation_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup, find_packages 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70', 17 | '-gencode', 'arch=compute_75,code=sm_75', 18 | '-gencode', 'arch=compute_75,code=compute_75' 19 | ] 20 | 21 | setup( 22 | name='correlation_cuda', 23 | ext_modules=[ 24 | CUDAExtension('correlation_cuda', [ 25 | 'correlation_cuda.cc', 26 | 'correlation_cuda_kernel.cu' 27 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 28 | ], 29 | cmdclass={ 30 | 'build_ext': BuildExtension 31 | }) 32 | -------------------------------------------------------------------------------- /layers_package/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #cd ./correlation_package 3 | #python setup.py install --user 4 | cd ./resample2d_package 5 | rm -rf *_cuda.egg-info build dist __pycache__ 6 | python setup.py install --user 7 | 8 | cd ../channelnorm_package 9 | rm -rf *_cuda.egg-info build dist __pycache__ 10 | python setup.py install --user 11 | cd .. 12 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/layers_package/resample2d_package/__init__.py -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | from torch.autograd import Function, Variable 3 | import resample2d_cuda 4 | 5 | class Resample2dFunction(Function): 6 | 7 | @staticmethod 8 | def forward(ctx, input1, input2, kernel_size=1): 9 | assert input1.is_contiguous() 10 | assert input2.is_contiguous() 11 | 12 | ctx.save_for_backward(input1, input2) 13 | ctx.kernel_size = kernel_size 14 | 15 | _, d, _, _ = input1.size() 16 | b, _, h, w = input2.size() 17 | output = input1.new(b, d, h, w).zero_() 18 | 19 | resample2d_cuda.forward(input1, input2, output, kernel_size) 20 | 21 | return output 22 | 23 | @staticmethod 24 | def backward(ctx, grad_output): 25 | assert grad_output.is_contiguous() 26 | 27 | input1, input2 = ctx.saved_tensors 28 | 29 | grad_input1 = Variable(input1.new(input1.size()).zero_()) 30 | grad_input2 = Variable(input1.new(input2.size()).zero_()) 31 | 32 | resample2d_cuda.backward(input1, input2, grad_output.data, 33 | grad_input1.data, grad_input2.data, 34 | ctx.kernel_size) 35 | 36 | return grad_input1, grad_input2, None 37 | 38 | class Resample2d(Module): 39 | 40 | def __init__(self, kernel_size=1): 41 | super(Resample2d, self).__init__() 42 | self.kernel_size = kernel_size 43 | 44 | def forward(self, input1, input2): 45 | input1_c = input1.contiguous() 46 | return Resample2dFunction.apply(input1_c, input2, self.kernel_size) 47 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d_cuda.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "resample2d_kernel.cuh" 5 | 6 | int resample2d_cuda_forward( 7 | at::Tensor& input1, 8 | at::Tensor& input2, 9 | at::Tensor& output, 10 | int kernel_size) { 11 | resample2d_kernel_forward(input1, input2, output, kernel_size); 12 | return 1; 13 | } 14 | 15 | int resample2d_cuda_backward( 16 | at::Tensor& input1, 17 | at::Tensor& input2, 18 | at::Tensor& gradOutput, 19 | at::Tensor& gradInput1, 20 | at::Tensor& gradInput2, 21 | int kernel_size) { 22 | resample2d_kernel_backward(input1, input2, gradOutput, gradInput1, gradInput2, kernel_size); 23 | return 1; 24 | } 25 | 26 | 27 | 28 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 29 | m.def("forward", &resample2d_cuda_forward, "Resample2D forward (CUDA)"); 30 | m.def("backward", &resample2d_cuda_backward, "Resample2D backward (CUDA)"); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d_cuda.egg-info/PKG-INFO: -------------------------------------------------------------------------------- 1 | Metadata-Version: 1.0 2 | Name: resample2d-cuda 3 | Version: 0.0.0 4 | Summary: UNKNOWN 5 | Home-page: UNKNOWN 6 | Author: UNKNOWN 7 | Author-email: UNKNOWN 8 | License: UNKNOWN 9 | Description: UNKNOWN 10 | Platform: UNKNOWN 11 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d_cuda.egg-info/SOURCES.txt: -------------------------------------------------------------------------------- 1 | resample2d_cuda.cc 2 | resample2d_kernel.cu 3 | setup.py 4 | resample2d_cuda.egg-info/PKG-INFO 5 | resample2d_cuda.egg-info/SOURCES.txt 6 | resample2d_cuda.egg-info/dependency_links.txt 7 | resample2d_cuda.egg-info/top_level.txt -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d_cuda.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d_cuda.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | resample2d_cuda 2 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/resample2d_kernel.cuh: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | 5 | void resample2d_kernel_forward( 6 | at::Tensor& input1, 7 | at::Tensor& input2, 8 | at::Tensor& output, 9 | int kernel_size); 10 | 11 | void resample2d_kernel_backward( 12 | at::Tensor& input1, 13 | at::Tensor& input2, 14 | at::Tensor& gradOutput, 15 | at::Tensor& gradInput1, 16 | at::Tensor& gradInput2, 17 | int kernel_size); 18 | -------------------------------------------------------------------------------- /layers_package/resample2d_package/setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import os 3 | import torch 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | cxx_args = ['-std=c++11'] 9 | 10 | nvcc_args = [ 11 | '-gencode', 'arch=compute_50,code=sm_50', 12 | '-gencode', 'arch=compute_52,code=sm_52', 13 | '-gencode', 'arch=compute_60,code=sm_60', 14 | '-gencode', 'arch=compute_61,code=sm_61', 15 | '-gencode', 'arch=compute_70,code=sm_70', 16 | '-gencode', 'arch=compute_70,code=compute_70', 17 | '-gencode', 'arch=compute_75,code=sm_75', 18 | '-gencode', 'arch=compute_75,code=compute_75' 19 | ] 20 | 21 | setup( 22 | name='resample2d_cuda', 23 | ext_modules=[ 24 | CUDAExtension('resample2d_cuda', [ 25 | 'resample2d_cuda.cc', 26 | 'resample2d_kernel.cu' 27 | ], extra_compile_args={'cxx': cxx_args, 'nvcc': nvcc_args}) 28 | ], 29 | cmdclass={ 30 | 'build_ext': BuildExtension 31 | }) 32 | -------------------------------------------------------------------------------- /libs/GANet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/libs/GANet/__init__.py -------------------------------------------------------------------------------- /libs/GANet/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .GANet import * 2 | -------------------------------------------------------------------------------- /libs/GANet/modules/GANet.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.module import Module 2 | import torch 3 | import numpy as np 4 | from torch.autograd import Variable 5 | from ..functions import * 6 | 7 | from ..functions.GANet import MyLossFunction 8 | from ..functions.GANet import SgaFunction 9 | from ..functions.GANet import LgaFunction 10 | from ..functions.GANet import Lga2Function 11 | from ..functions.GANet import Lga3Function 12 | from ..functions.GANet import Lga3dFunction 13 | from ..functions.GANet import Lga3d2Function 14 | from ..functions.GANet import Lga3d3Function 15 | from ..functions.GANet import MyLoss2Function 16 | 17 | 18 | class MyNormalize(Module): 19 | def __init__(self, dim): 20 | self.dim = dim 21 | super(MyNormalize, self).__init__() 22 | def forward(self, x): 23 | # assert(x.is_contiguous() == True) 24 | with torch.cuda.device_of(x): 25 | norm = torch.sum(torch.abs(x),self.dim) 26 | norm[norm <= 0] = norm[norm <= 0] - 1e-6 27 | norm[norm >= 0] = norm[norm >= 0] + 1e-6 28 | norm = torch.unsqueeze(norm, self.dim) 29 | size = np.ones(x.dim(), dtype='int') 30 | size[self.dim] = x.size()[self.dim] 31 | norm = norm.repeat(*size) 32 | x = torch.div(x, norm) 33 | return x 34 | class MyLoss2(Module): 35 | def __init__(self, thresh=1, alpha=2): 36 | super(MyLoss2, self).__init__() 37 | self.thresh = thresh 38 | self.alpha = alpha 39 | def forward(self, input1, input2): 40 | result = MyLoss2Function(self.thresh, self.alpha)(input1, input2) 41 | return result 42 | class MyLoss(Module): 43 | def __init__(self, upper_thresh=5, lower_thresh=1): 44 | super(MyLoss, self).__init__() 45 | self.upper_thresh = 5 46 | self.lower_thresh = 1 47 | def forward(self, input1, input2): 48 | result = MyLossFunction(self.upper_thresh, self.lower_thresh)(input1, input2) 49 | return result 50 | 51 | 52 | 53 | class SGA(Module): 54 | def __init__(self): 55 | super(SGA, self).__init__() 56 | 57 | def forward(self, input, g0, g1, g2, g3): 58 | result = SgaFunction()(input, g0, g1, g2, g3) 59 | return result 60 | 61 | 62 | 63 | class LGA3D3(Module): 64 | def __init__(self, radius=2): 65 | super(LGA3D3, self).__init__() 66 | self.radius = radius 67 | 68 | def forward(self, input1, input2): 69 | result = Lga3d3Function(self.radius)(input1, input2) 70 | return result 71 | class LGA3D2(Module): 72 | def __init__(self, radius=2): 73 | super(LGA3D2, self).__init__() 74 | self.radius = radius 75 | 76 | def forward(self, input1, input2): 77 | result = Lga3d2Function(self.radius)(input1, input2) 78 | return result 79 | class LGA3D(Module): 80 | def __init__(self, radius=2): 81 | super(LGA3D, self).__init__() 82 | self.radius = radius 83 | 84 | def forward(self, input1, input2): 85 | result = Lga3dFunction(self.radius)(input1, input2) 86 | return result 87 | 88 | class LGA3(Module): 89 | def __init__(self, radius=2): 90 | super(LGA3, self).__init__() 91 | self.radius = radius 92 | 93 | def forward(self, input1, input2): 94 | result = Lga3Function(self.radius)(input1, input2) 95 | return result 96 | class LGA2(Module): 97 | def __init__(self, radius=2): 98 | super(LGA2, self).__init__() 99 | self.radius = radius 100 | 101 | def forward(self, input1, input2): 102 | result = Lga2Function(self.radius)(input1, input2) 103 | return result 104 | class LGA(Module): 105 | def __init__(self, radius=2): 106 | super(LGA, self).__init__() 107 | self.radius = radius 108 | 109 | def forward(self, input1, input2): 110 | result = LgaFunction(self.radius)(input1, input2) 111 | return result 112 | 113 | 114 | 115 | class GetCostVolume(Module): 116 | def __init__(self, maxdisp): 117 | super(GetCostVolume, self).__init__() 118 | self.maxdisp = maxdisp + 1 119 | 120 | def forward(self, x, y): 121 | assert(x.is_contiguous() == True) 122 | with torch.cuda.device_of(x): 123 | num, channels, height, width = x.size() 124 | cost = x.new().resize_(num, channels * 2, self.maxdisp, height, width).zero_() 125 | # cost = Variable(torch.FloatTensor(x.size()[0], x.size()[1]*2, self.maxdisp, x.size()[2], x.size()[3]).zero_(), volatile= not self.training).cuda() 126 | for i in range(self.maxdisp): 127 | if i > 0 : 128 | cost[:, :x.size()[1], i, :,i:] = x[:,:,:,i:] 129 | cost[:, x.size()[1]:, i, :,i:] = y[:,:,:,:-i] 130 | else: 131 | cost[:, :x.size()[1], i, :,:] = x 132 | cost[:, x.size()[1]:, i, :,:] = y 133 | 134 | cost = cost.contiguous() 135 | return cost 136 | 137 | class DisparityRegression(Module): 138 | def __init__(self, maxdisp): 139 | super(DisparityRegression, self).__init__() 140 | self.maxdisp = maxdisp + 1 141 | # self.disp = Variable(torch.Tensor(np.reshape(np.array(range(self.maxdisp)),[1,self.maxdisp,1,1])).cuda(), requires_grad=False) 142 | 143 | def forward(self, x): 144 | assert(x.is_contiguous() == True) 145 | with torch.cuda.device_of(x): 146 | disp = Variable(torch.Tensor(np.reshape(np.array(range(self.maxdisp)),[1, self.maxdisp, 1, 1])).cuda(), requires_grad=False) 147 | disp = disp.repeat(x.size()[0], 1, x.size()[2], x.size()[3]) 148 | out = torch.sum(x * disp, 1) 149 | return out 150 | 151 | -------------------------------------------------------------------------------- /libs/GANet/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .GANet import * 2 | -------------------------------------------------------------------------------- /libs/GANet/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension 3 | 4 | 5 | setup( 6 | name='GANet', 7 | ext_modules=[ 8 | CUDAExtension('GANet', [ 9 | 'src/GANet_cuda.cpp', 10 | 'src/GANet_kernel.cu', 11 | ]) 12 | ], 13 | cmdclass={ 14 | 'build_ext': BuildExtension 15 | }) 16 | -------------------------------------------------------------------------------- /libs/GANet/src/GANet_cuda.cpp: -------------------------------------------------------------------------------- 1 | //#include 2 | #include 3 | #include "GANet_kernel.h" 4 | 5 | extern "C" int 6 | lga_cuda_backward (at::Tensor input, at::Tensor filters, 7 | at::Tensor gradOutput, at::Tensor gradInput, 8 | at::Tensor gradFilters, const int radius) 9 | { 10 | lga_backward (input, filters, gradOutput, gradInput, gradFilters, radius); 11 | return 1; 12 | } 13 | 14 | extern "C" int 15 | lga_cuda_forward (at::Tensor input, at::Tensor filters, at::Tensor output, 16 | const int radius) 17 | { 18 | lga_forward (input, filters, output, radius); 19 | return 1; 20 | } 21 | 22 | extern "C" int 23 | lga3d_cuda_backward (at::Tensor input, at::Tensor filters, 24 | at::Tensor gradOutput, at::Tensor gradInput, 25 | at::Tensor gradFilters, const int radius) 26 | { 27 | lga3d_backward (input, filters, gradOutput, gradInput, gradFilters, radius); 28 | return 1; 29 | } 30 | 31 | extern "C" int 32 | lga3d_cuda_forward (at::Tensor input, at::Tensor filters, at::Tensor output, 33 | const int radius) 34 | { 35 | lga3d_forward (input, filters, output, radius); 36 | return 1; 37 | } 38 | 39 | extern "C" int 40 | sga_cuda_forward (at::Tensor input, at::Tensor guidance_down, 41 | at::Tensor guidance_up, at::Tensor guidance_right, 42 | at::Tensor guidance_left, at::Tensor temp_out, 43 | at::Tensor output, at::Tensor mask) 44 | { 45 | sga_kernel_forward (input, guidance_down, guidance_up, guidance_right, 46 | guidance_left, temp_out, output, mask); 47 | return 1; 48 | } 49 | 50 | extern "C" int 51 | sga_cuda_backward (at::Tensor input, at::Tensor guidance_down, 52 | at::Tensor guidance_up, at::Tensor guidance_right, 53 | at::Tensor guidance_left, at::Tensor temp_out, 54 | at::Tensor mask, at::Tensor max_idx, at::Tensor gradOutput, 55 | at::Tensor temp_grad, at::Tensor gradInput, 56 | at::Tensor grad_down, at::Tensor grad_up, 57 | at::Tensor grad_right, at::Tensor grad_left) 58 | { 59 | sga_kernel_backward (input, guidance_down, guidance_up, guidance_right, 60 | guidance_left, temp_out, mask, max_idx, gradOutput, 61 | temp_grad, gradInput, grad_down, grad_up, grad_right, 62 | grad_left); 63 | return 1; 64 | } 65 | 66 | 67 | PYBIND11_MODULE (TORCH_EXTENSION_NAME, GANet) 68 | { 69 | GANet.def ("lga_cuda_forward", &lga_cuda_forward, "lga forward (CUDA)"); 70 | GANet.def ("lga_cuda_backward", &lga_cuda_backward, "lga backward (CUDA)"); 71 | GANet.def ("lga3d_cuda_forward", &lga3d_cuda_forward, "lga3d forward (CUDA)"); 72 | GANet.def ("lga3d_cuda_backward", &lga3d_cuda_backward, "lga3d backward (CUDA)"); 73 | GANet.def ("sga_cuda_backward", &sga_cuda_backward, "sga backward (CUDA)"); 74 | GANet.def ("sga_cuda_forward", &sga_cuda_forward, "sga forward (CUDA)"); 75 | } 76 | 77 | -------------------------------------------------------------------------------- /libs/GANet/src/GANet_cuda.h: -------------------------------------------------------------------------------- 1 | int lga_cuda_backward (at::Tensor input, at::Tensor filters, 2 | at::Tensor gradOutput, at::Tensor gradInput, 3 | at::Tensor gradFilters, const int radius); 4 | int lga_cuda_forward (at::Tensor input, at::Tensor filters, at::Tensor output, 5 | const int radius); 6 | int lga3d_cuda_backward (at::Tensor input, at::Tensor filters, 7 | at::Tensor gradOutput, at::Tensor gradInput, 8 | at::Tensor gradFilters, const int radius); 9 | int lga3d_cuda_forward (at::Tensor input, at::Tensor filters, 10 | at::Tensor output, const int radius); 11 | int sga_cuda_forward (at::Tensor input, at::Tensor guidance_down, 12 | at::Tensor guidance_up, at::Tensor guidance_right, 13 | at::Tensor guidance_left, at::Tensor temp_out, 14 | at::Tensor output, at::Tensor mask); 15 | int sga_cuda_backward (at::Tensor input, at::Tensor guidance_down, 16 | at::Tensor guidance_up, at::Tensor guidance_right, 17 | at::Tensor guidance_left, at::Tensor temp_out, 18 | at::Tensor mask, at::Tensor max_idx, 19 | at::Tensor gradOutput, at::Tensor temp_grad, 20 | at::Tensor gradInput, at::Tensor grad_down, 21 | at::Tensor grad_up, at::Tensor grad_right, 22 | at::Tensor grad_left); 23 | 24 | -------------------------------------------------------------------------------- /libs/GANet/src/GANet_kernel.h: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | void sga_kernel_forward (at::Tensor input, at::Tensor guidance_down, 9 | at::Tensor guidance_up, at::Tensor guidance_right, 10 | at::Tensor guidance_left, at::Tensor temp_out, 11 | at::Tensor output, at::Tensor mask); 12 | void sga_kernel_backward (at::Tensor input, at::Tensor guidance_down, 13 | at::Tensor guidance_up, at::Tensor guidance_right, 14 | at::Tensor guidance_left, at::Tensor temp_out, 15 | at::Tensor mask, at::Tensor max_idx, 16 | at::Tensor gradOutput, at::Tensor temp_grad, 17 | at::Tensor gradInput, at::Tensor grad_down, 18 | at::Tensor grad_up, at::Tensor grad_right, 19 | at::Tensor grad_left); 20 | 21 | void lga_backward (at::Tensor input, at::Tensor filters, 22 | at::Tensor gradOutput, at::Tensor gradInput, 23 | at::Tensor gradFilters, const int radius); 24 | void lga_forward (at::Tensor input, at::Tensor filters, at::Tensor output, 25 | const int radius); 26 | 27 | void lga3d_backward (at::Tensor input, at::Tensor filters, 28 | at::Tensor gradOutput, at::Tensor gradInput, 29 | at::Tensor gradFilters, const int radius); 30 | void lga3d_forward (at::Tensor input, at::Tensor filters, at::Tensor output, 31 | const int radius); 32 | 33 | 34 | #ifdef __cplusplus 35 | } 36 | #endif 37 | -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/libs/__init__.py -------------------------------------------------------------------------------- /libs/sync_bn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/libs/sync_bn/__init__.py -------------------------------------------------------------------------------- /libs/sync_bn/build/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/sync_bn/build/lib/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /libs/sync_bn/functions/__init__.py: -------------------------------------------------------------------------------- 1 | from .sync_bn import * 2 | -------------------------------------------------------------------------------- /libs/sync_bn/modules/__init__.py: -------------------------------------------------------------------------------- 1 | from .sync_bn import * 2 | -------------------------------------------------------------------------------- /libs/sync_bn/modules/sync_bn.py: -------------------------------------------------------------------------------- 1 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 | ## Created by: Hang Zhang 3 | ## ECE Department, Rutgers University 4 | ## Email: zhang.hang@rutgers.edu 5 | ## Copyright (c) 2017 6 | ## 7 | ## This source code is licensed under the MIT-style license found in the 8 | ## LICENSE file in the root directory of this source tree 9 | ##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 | 11 | """Synchronized Cross-GPU Batch Normalization Module""" 12 | import warnings 13 | try: 14 | from queue import Queue 15 | except ImportError: 16 | from Queue import Queue 17 | 18 | import torch 19 | from torch.nn.modules.batchnorm import _BatchNorm 20 | 21 | from ..functions import * 22 | 23 | 24 | __all__ = ['SyncBatchNorm', 'BatchNorm1d', 'BatchNorm2d', 'BatchNorm3d'] 25 | 26 | 27 | class SyncBatchNorm(_BatchNorm): 28 | r"""Cross-GPU Synchronized Batch normalization (SyncBN) 29 | 30 | Standard BN [1]_ implementation only normalize the data within each device (GPU). 31 | SyncBN normalizes the input within the whole mini-batch. 32 | We follow the sync-onece implmentation described in the paper [2]_ . 33 | Please see the design idea in the `notes <./notes/syncbn.html>`_. 34 | 35 | .. math:: 36 | 37 | y = \frac{x - mean[x]}{ \sqrt{Var[x] + \epsilon}} * gamma + beta 38 | 39 | The mean and standard-deviation are calculated per-channel over 40 | the mini-batches and gamma and beta are learnable parameter vectors 41 | of size C (where C is the input size). 42 | 43 | During training, this layer keeps a running estimate of its computed mean 44 | and variance. The running sum is kept with a default momentum of 0.1. 45 | 46 | During evaluation, this running mean/variance is used for normalization. 47 | 48 | Because the BatchNorm is done over the `C` dimension, computing statistics 49 | on `(N, H, W)` slices, it's common terminology to call this Spatial BatchNorm 50 | 51 | Args: 52 | num_features: num_features from an expected input of 53 | size batch_size x num_features x height x width 54 | eps: a value added to the denominator for numerical stability. 55 | Default: 1e-5 56 | momentum: the value used for the running_mean and running_var 57 | computation. Default: 0.1 58 | sync: a boolean value that when set to ``True``, synchronize across 59 | different gpus. Default: ``True`` 60 | activation : str 61 | Name of the activation functions, one of: `leaky_relu` or `none`. 62 | slope : float 63 | Negative slope for the `leaky_relu` activation. 64 | 65 | Shape: 66 | - Input: :math:`(N, C, H, W)` 67 | - Output: :math:`(N, C, H, W)` (same shape as input) 68 | 69 | Reference: 70 | .. [1] Ioffe, Sergey, and Christian Szegedy. "Batch normalization: Accelerating deep network training by reducing internal covariate shift." *ICML 2015* 71 | .. [2] Hang Zhang, Kristin Dana, Jianping Shi, Zhongyue Zhang, Xiaogang Wang, Ambrish Tyagi, and Amit Agrawal. "Context Encoding for Semantic Segmentation." *CVPR 2018* 72 | 73 | Examples: 74 | >>> m = SyncBatchNorm(100) 75 | >>> net = torch.nn.DataParallel(m) 76 | >>> output = net(input) 77 | """ 78 | 79 | def __init__(self, num_features, eps=1e-5, momentum=0.1, sync=True, activation="none", slope=0.01, 80 | inplace=True): 81 | super(SyncBatchNorm, self).__init__(num_features, eps=eps, momentum=momentum, affine=True) 82 | self.activation = activation 83 | self.inplace = False if activation == 'none' else inplace 84 | #self.inplace = inplace 85 | self.slope = slope 86 | self.devices = list(range(torch.cuda.device_count())) 87 | self.sync = sync if len(self.devices) > 1 else False 88 | # Initialize queues 89 | self.worker_ids = self.devices[1:] 90 | self.master_queue = Queue(len(self.worker_ids)) 91 | self.worker_queues = [Queue(1) for _ in self.worker_ids] 92 | # running_exs 93 | #self.register_buffer('running_exs', torch.ones(num_features)) 94 | 95 | def forward(self, x): 96 | # Resize the input to (B, C, -1). 97 | input_shape = x.size() 98 | x = x.view(input_shape[0], self.num_features, -1) 99 | if x.get_device() == self.devices[0]: 100 | # Master mode 101 | extra = { 102 | "is_master": True, 103 | "master_queue": self.master_queue, 104 | "worker_queues": self.worker_queues, 105 | "worker_ids": self.worker_ids 106 | } 107 | else: 108 | # Worker mode 109 | extra = { 110 | "is_master": False, 111 | "master_queue": self.master_queue, 112 | "worker_queue": self.worker_queues[self.worker_ids.index(x.get_device())] 113 | } 114 | if self.inplace: 115 | return inp_syncbatchnorm(x, self.weight, self.bias, self.running_mean, self.running_var, 116 | extra, self.sync, self.training, self.momentum, self.eps, 117 | self.activation, self.slope).view(input_shape) 118 | else: 119 | return syncbatchnorm(x, self.weight, self.bias, self.running_mean, self.running_var, 120 | extra, self.sync, self.training, self.momentum, self.eps, 121 | self.activation, self.slope).view(input_shape) 122 | 123 | def extra_repr(self): 124 | if self.activation == 'none': 125 | return 'sync={}'.format(self.sync) 126 | else: 127 | return 'sync={}, act={}, slope={}, inplace={}'.format( 128 | self.sync, self.activation, self.slope, self.inplace 129 | ) 130 | 131 | class BatchNorm1d(SyncBatchNorm): 132 | r""" 133 | .. warning:: 134 | BatchNorm1d is deprecated in favor of :class:`encoding.nn.SyncBatchNorm`. 135 | """ 136 | def __init__(self, *args, **kwargs): 137 | super(BatchNorm1d, self).__init__(*args, **kwargs) 138 | 139 | class BatchNorm2d(SyncBatchNorm): 140 | r""" 141 | .. warning:: 142 | BatchNorm2d is deprecated in favor of :class:`encoding.nn.SyncBatchNorm`. 143 | """ 144 | def __init__(self, *args, **kwargs): 145 | super(BatchNorm2d, self).__init__(*args, **kwargs) 146 | 147 | class BatchNorm3d(SyncBatchNorm): 148 | r""" 149 | .. warning:: 150 | BatchNorm3d is deprecated in favor of :class:`encoding.nn.SyncBatchNorm`. 151 | """ 152 | def __init__(self, *args, **kwargs): 153 | super(BatchNorm3d, self).__init__(*args, **kwargs) 154 | -------------------------------------------------------------------------------- /libs/sync_bn/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import CppExtension, BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='sync_bn_cpu', 6 | ext_modules=[ 7 | CppExtension('sync_bn_cpu', [ 8 | 'src/cpu/operator.cpp', 9 | 'src/cpu/sync_bn.cpp', 10 | ]) 11 | ], 12 | cmdclass={ 13 | 'build_ext': BuildExtension 14 | }) 15 | 16 | 17 | setup( 18 | name='sync_bn_gpu', 19 | ext_modules=[ 20 | CUDAExtension('sync_bn_gpu', [ 21 | 'src/gpu/operator.cpp', 22 | 'src/gpu/sync_bn_cuda.cu', 23 | ]) 24 | ], 25 | cmdclass={ 26 | 'build_ext': BuildExtension 27 | }) 28 | -------------------------------------------------------------------------------- /libs/sync_bn/src/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.cpp_extension import load 4 | 5 | cwd = os.path.dirname(os.path.realpath(__file__)) 6 | cpu_path = os.path.join(cwd, 'cpu') 7 | gpu_path = os.path.join(cwd, 'gpu') 8 | 9 | cpu = load('sync_bn_cpu', [ 10 | os.path.join(cpu_path, 'operator.cpp'), 11 | os.path.join(cpu_path, 'sync_bn.cpp'), 12 | ], build_directory=cpu_path, verbose=False) 13 | 14 | if torch.cuda.is_available(): 15 | gpu = load('sync_bn_gpu', [ 16 | os.path.join(gpu_path, 'operator.cpp'), 17 | os.path.join(gpu_path, 'sync_bn_cuda.cu'), 18 | ], build_directory=gpu_path, verbose=False) 19 | -------------------------------------------------------------------------------- /libs/sync_bn/src/cpu/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("batchnorm_forward", &BatchNorm_Forward_CPU, "BatchNorm forward (CPU)"); 5 | m.def("batchnorm_backward", &BatchNorm_Backward_CPU, "BatchNorm backward (CPU)"); 6 | m.def("sumsquare_forward", &Sum_Square_Forward_CPU, "SumSqu forward (CPU)"); 7 | m.def("sumsquare_backward", &Sum_Square_Backward_CPU, "SumSqu backward (CPU)"); 8 | } 9 | -------------------------------------------------------------------------------- /libs/sync_bn/src/cpu/operator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | at::Tensor BatchNorm_Forward_CPU( 5 | const at::Tensor input_, 6 | const at::Tensor mean_, 7 | const at::Tensor std_, 8 | const at::Tensor gamma_, 9 | const at::Tensor beta_); 10 | 11 | std::vector BatchNorm_Backward_CPU( 12 | const at::Tensor gradoutput_, 13 | const at::Tensor input_, 14 | const at::Tensor mean_, 15 | const at::Tensor std_, 16 | const at::Tensor gamma_, 17 | const at::Tensor beta_, 18 | bool train); 19 | 20 | std::vector Sum_Square_Forward_CPU( 21 | const at::Tensor input_); 22 | 23 | at::Tensor Sum_Square_Backward_CPU( 24 | const at::Tensor input_, 25 | const at::Tensor gradSum_, 26 | const at::Tensor gradSquare_); 27 | -------------------------------------------------------------------------------- /libs/sync_bn/src/cpu/sync_bn.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | at::Tensor broadcast_to(at::Tensor v, at::Tensor x) { 6 | if (x.ndimension() == 2) { 7 | return v; 8 | } else { 9 | std::vector broadcast_size = {1, -1}; 10 | for (int64_t i = 2; i < x.ndimension(); ++i) 11 | broadcast_size.push_back(1); 12 | 13 | return v.view(broadcast_size); 14 | } 15 | } 16 | 17 | at::Tensor BatchNorm_Forward_CPU( 18 | const at::Tensor input, 19 | const at::Tensor mean, 20 | const at::Tensor std, 21 | const at::Tensor gamma, 22 | const at::Tensor beta) { 23 | auto output = (input - broadcast_to(mean, input)) / broadcast_to(std, input); 24 | output = output * broadcast_to(gamma, input) + broadcast_to(beta, input); 25 | return output; 26 | } 27 | 28 | // Not implementing CPU backward for now 29 | std::vector BatchNorm_Backward_CPU( 30 | const at::Tensor gradoutput, 31 | const at::Tensor input, 32 | const at::Tensor mean, 33 | const at::Tensor std, 34 | const at::Tensor gamma, 35 | const at::Tensor beta, 36 | bool train) { 37 | /* outputs*/ 38 | at::Tensor gradinput = at::zeros_like(input); 39 | at::Tensor gradgamma = at::zeros_like(gamma); 40 | at::Tensor gradbeta = at::zeros_like(beta); 41 | at::Tensor gradMean = at::zeros_like(mean); 42 | at::Tensor gradStd = at::zeros_like(std); 43 | return {gradinput, gradMean, gradStd, gradgamma, gradbeta}; 44 | } 45 | 46 | std::vector Sum_Square_Forward_CPU( 47 | const at::Tensor input) { 48 | /* outputs */ 49 | at::Tensor sum = torch::zeros({input.size(1)}, input.options()); 50 | at::Tensor square = torch::zeros({input.size(1)}, input.options()); 51 | return {sum, square}; 52 | } 53 | 54 | at::Tensor Sum_Square_Backward_CPU( 55 | const at::Tensor input, 56 | const at::Tensor gradSum, 57 | const at::Tensor gradSquare) { 58 | /* outputs */ 59 | at::Tensor gradInput = at::zeros_like(input); 60 | return gradInput; 61 | } 62 | -------------------------------------------------------------------------------- /libs/sync_bn/src/gpu/common.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | static const unsigned WARP_SIZE = 32; 5 | 6 | // The maximum number of threads in a block 7 | static const unsigned MAX_BLOCK_SIZE = 512U; 8 | 9 | template 10 | struct ScalarConvert { 11 | static __host__ __device__ __forceinline__ Out to(const In v) { return (Out) v; } 12 | }; 13 | 14 | // Number of threads in a block given an input size up to MAX_BLOCK_SIZE 15 | static int getNumThreads(int nElem) { 16 | int threadSizes[5] = { 32, 64, 128, 256, MAX_BLOCK_SIZE }; 17 | for (int i = 0; i != 5; ++i) { 18 | if (nElem <= threadSizes[i]) { 19 | return threadSizes[i]; 20 | } 21 | } 22 | return MAX_BLOCK_SIZE; 23 | } 24 | 25 | // Returns the index of the most significant 1 bit in `val`. 26 | __device__ __forceinline__ int getMSB(int val) { 27 | return 31 - __clz(val); 28 | } 29 | 30 | template 31 | __device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff) 32 | { 33 | #if CUDA_VERSION >= 9000 34 | return __shfl_xor_sync(mask, value, laneMask, width); 35 | #else 36 | return __shfl_xor(value, laneMask, width); 37 | #endif 38 | } 39 | 40 | // Sum across all threads within a warp 41 | template 42 | static __device__ __forceinline__ T warpSum(T val) { 43 | #if __CUDA_ARCH__ >= 300 44 | for (int i = 0; i < getMSB(WARP_SIZE); ++i) { 45 | val += WARP_SHFL_XOR(val, 1 << i, WARP_SIZE); 46 | } 47 | #else 48 | __shared__ T values[MAX_BLOCK_SIZE]; 49 | values[threadIdx.x] = val; 50 | __threadfence_block(); 51 | const int base = (threadIdx.x / WARP_SIZE) * WARP_SIZE; 52 | for (int i = 1; i < WARP_SIZE; i++) { 53 | val += values[base + ((i + threadIdx.x) % WARP_SIZE)]; 54 | } 55 | #endif 56 | return val; 57 | } 58 | 59 | template 60 | struct Float2 { 61 | Acctype v1, v2; 62 | __device__ Float2() {} 63 | __device__ Float2(DType v1, DType v2) : v1(ScalarConvert::to(v1)), v2(ScalarConvert::to(v2)) {} 64 | __device__ Float2(DType v) : v1(ScalarConvert::to(v)), v2(ScalarConvert::to(v)) {} 65 | __device__ Float2(int v) : v1(ScalarConvert::to(v)), v2(ScalarConvert::to(v)) {} 66 | __device__ Float2& operator+=(const Float2& a) { 67 | v1 += a.v1; 68 | v2 += a.v2; 69 | return *this; 70 | } 71 | }; 72 | 73 | template 74 | static __device__ __forceinline__ Float2 warpSum(Float2 value) { 75 | value.v1 = warpSum(value.v1); 76 | value.v2 = warpSum(value.v2); 77 | return value; 78 | } 79 | 80 | template 81 | __device__ T reduceD( 82 | Op op, int b, int i, int k, int D) { 83 | T sum = 0; 84 | for (int x = threadIdx.x; x < D; x += blockDim.x) { 85 | sum += op(b,i,k,x); 86 | } 87 | // sum over NumThreads within a warp 88 | sum = warpSum(sum); 89 | 90 | // 'transpose', and reduce within warp again 91 | __shared__ T shared[32]; 92 | 93 | __syncthreads(); 94 | if (threadIdx.x % WARP_SIZE == 0) { 95 | if (threadIdx.x / WARP_SIZE < 32) { 96 | shared[threadIdx.x / WARP_SIZE] = sum; 97 | } 98 | } 99 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 100 | // zero out the other entries in shared 101 | shared[threadIdx.x] = (T) 0; 102 | } 103 | __syncthreads(); 104 | if (threadIdx.x / WARP_SIZE == 0) { 105 | sum = warpSum(shared[threadIdx.x]); 106 | if (threadIdx.x == 0) { 107 | shared[0] = sum; 108 | } 109 | } 110 | __syncthreads(); 111 | 112 | // Everyone picks it up, should be broadcast into the whole gradInput 113 | return shared[0]; 114 | } 115 | 116 | template 117 | __device__ T reduceN( 118 | Op op, int b, int k, int d, int N) { 119 | T sum = 0; 120 | for (int x = threadIdx.x; x < N; x += blockDim.x) { 121 | sum += op(b,x,k,d); 122 | } 123 | // sum over NumThreads within a warp 124 | sum = warpSum(sum); 125 | 126 | // 'transpose', and reduce within warp again 127 | __shared__ T shared[32]; 128 | 129 | __syncthreads(); 130 | if (threadIdx.x % WARP_SIZE == 0) { 131 | if (threadIdx.x / WARP_SIZE < 32) { 132 | shared[threadIdx.x / WARP_SIZE] = sum; 133 | } 134 | } 135 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 136 | // zero out the other entries in shared 137 | shared[threadIdx.x] = (T) 0; 138 | } 139 | __syncthreads(); 140 | if (threadIdx.x / WARP_SIZE == 0) { 141 | sum = warpSum(shared[threadIdx.x]); 142 | if (threadIdx.x == 0) { 143 | shared[0] = sum; 144 | } 145 | } 146 | __syncthreads(); 147 | 148 | // Everyone picks it up, should be broadcast into the whole gradInput 149 | return shared[0]; 150 | } 151 | 152 | template 153 | __device__ T reduceK( 154 | Op op, int b, int i, int d, int K) { 155 | T sum = 0; 156 | for (int x = threadIdx.x; x < K; x += blockDim.x) { 157 | sum += op(b,i,x,d); 158 | } 159 | // sum over NumThreads within a warp 160 | sum = warpSum(sum); 161 | 162 | // 'transpose', and reduce within warp again 163 | __shared__ T shared[32]; 164 | 165 | __syncthreads(); 166 | if (threadIdx.x % WARP_SIZE == 0) { 167 | if (threadIdx.x / WARP_SIZE < 32) { 168 | shared[threadIdx.x / WARP_SIZE] = sum; 169 | } 170 | } 171 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 172 | // zero out the other entries in shared 173 | shared[threadIdx.x] = (T) 0; 174 | } 175 | __syncthreads(); 176 | if (threadIdx.x / WARP_SIZE == 0) { 177 | sum = warpSum(shared[threadIdx.x]); 178 | if (threadIdx.x == 0) { 179 | shared[0] = sum; 180 | } 181 | } 182 | __syncthreads(); 183 | 184 | // Everyone picks it up, should be broadcast into the whole gradInput 185 | return shared[0]; 186 | } 187 | 188 | template 189 | __device__ T reduceBN( 190 | Op op, 191 | int k, int d, int B, int N) { 192 | T sum = 0; 193 | for (int batch = 0; batch < B; ++batch) { 194 | for (int x = threadIdx.x; x < N; x += blockDim.x) { 195 | sum += op(batch,x,k,d); 196 | } 197 | } 198 | // sum over NumThreads within a warp 199 | sum = warpSum(sum); 200 | // 'transpose', and reduce within warp again 201 | __shared__ T shared[32]; 202 | 203 | __syncthreads(); 204 | if (threadIdx.x % WARP_SIZE == 0) { 205 | if (threadIdx.x / WARP_SIZE < 32) { 206 | shared[threadIdx.x / WARP_SIZE] = sum; 207 | } 208 | } 209 | if (threadIdx.x >= blockDim.x / WARP_SIZE && threadIdx.x < WARP_SIZE) { 210 | // zero out the other entries in shared 211 | shared[threadIdx.x] = (T) 0; 212 | } 213 | __syncthreads(); 214 | if (threadIdx.x / WARP_SIZE == 0) { 215 | sum = warpSum(shared[threadIdx.x]); 216 | if (threadIdx.x == 0) { 217 | shared[0] = sum; 218 | } 219 | } 220 | __syncthreads(); 221 | 222 | // Everyone picks it up, should be broadcast into the whole gradInput 223 | return shared[0]; 224 | } 225 | -------------------------------------------------------------------------------- /libs/sync_bn/src/gpu/device_tensor.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | template 4 | struct DeviceTensor { 5 | public: 6 | inline __device__ __host__ DeviceTensor(DType *p, const int *size) 7 | : dptr_(p) { 8 | for (int i = 0; i < Dim; ++i) { 9 | size_[i] = size ? size[i] : 0; 10 | } 11 | } 12 | 13 | inline __device__ __host__ unsigned getSize(const int i) const { 14 | assert(i < Dim); 15 | return size_[i]; 16 | } 17 | 18 | inline __device__ __host__ int numElements() const { 19 | int n = 1; 20 | for (int i = 0; i < Dim; ++i) { 21 | n *= size_[i]; 22 | } 23 | return n; 24 | } 25 | 26 | inline __device__ __host__ DeviceTensor select(const size_t x) const { 27 | assert(Dim > 1); 28 | int offset = x; 29 | for (int i = 1; i < Dim; ++i) { 30 | offset *= size_[i]; 31 | } 32 | DeviceTensor tensor(dptr_ + offset, nullptr); 33 | for (int i = 0; i < Dim - 1; ++i) { 34 | tensor.size_[i] = this->size_[i+1]; 35 | } 36 | return tensor; 37 | } 38 | 39 | inline __device__ __host__ DeviceTensor operator[](const size_t x) const { 40 | assert(Dim > 1); 41 | int offset = x; 42 | for (int i = 1; i < Dim; ++i) { 43 | offset *= size_[i]; 44 | } 45 | DeviceTensor tensor(dptr_ + offset, nullptr); 46 | for (int i = 0; i < Dim - 1; ++i) { 47 | tensor.size_[i] = this->size_[i+1]; 48 | } 49 | return tensor; 50 | } 51 | 52 | inline __device__ __host__ size_t InnerSize() const { 53 | assert(Dim >= 3); 54 | size_t sz = 1; 55 | for (size_t i = 2; i < Dim; ++i) { 56 | sz *= size_[i]; 57 | } 58 | return sz; 59 | } 60 | 61 | inline __device__ __host__ size_t ChannelCount() const { 62 | assert(Dim >= 3); 63 | return size_[1]; 64 | } 65 | 66 | inline __device__ __host__ DType* data_ptr() const { 67 | return dptr_; 68 | } 69 | 70 | DType *dptr_; 71 | int size_[Dim]; 72 | }; 73 | 74 | template 75 | struct DeviceTensor { 76 | inline __device__ __host__ DeviceTensor(DType *p, const int *size) 77 | : dptr_(p) { 78 | size_[0] = size ? size[0] : 0; 79 | } 80 | 81 | inline __device__ __host__ unsigned getSize(const int i) const { 82 | assert(i == 0); 83 | return size_[0]; 84 | } 85 | 86 | inline __device__ __host__ int numElements() const { 87 | return size_[0]; 88 | } 89 | 90 | inline __device__ __host__ DType &operator[](const size_t x) const { 91 | return *(dptr_ + x); 92 | } 93 | 94 | inline __device__ __host__ DType* data_ptr() const { 95 | return dptr_; 96 | } 97 | 98 | DType *dptr_; 99 | int size_[1]; 100 | }; 101 | 102 | template 103 | static DeviceTensor devicetensor(const at::Tensor &blob) { 104 | DType *data = blob.data(); 105 | DeviceTensor tensor(data, nullptr); 106 | for (int i = 0; i < Dim; ++i) { 107 | tensor.size_[i] = blob.size(i); 108 | } 109 | return tensor; 110 | } 111 | -------------------------------------------------------------------------------- /libs/sync_bn/src/gpu/operator.cpp: -------------------------------------------------------------------------------- 1 | #include "operator.h" 2 | 3 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 4 | m.def("batchnorm_forward", &BatchNorm_Forward_CUDA, "BatchNorm forward (CUDA)"); 5 | m.def("batchnorm_inp_forward", &BatchNorm_Forward_Inp_CUDA, "BatchNorm forward (CUDA)"); 6 | m.def("batchnorm_backward", &BatchNorm_Backward_CUDA, "BatchNorm backward (CUDA)"); 7 | m.def("batchnorm_inp_backward", &BatchNorm_Inp_Backward_CUDA, "BatchNorm backward (CUDA)"); 8 | m.def("expectation_forward", &Expectation_Forward_CUDA, "Expectation forward (CUDA)"); 9 | m.def("expectation_backward", &Expectation_Backward_CUDA, "Expectation backward (CUDA)"); 10 | m.def("expectation_inp_backward", &Expectation_Inp_Backward_CUDA, 11 | "Inplace Expectation backward (CUDA)"); 12 | } 13 | -------------------------------------------------------------------------------- /libs/sync_bn/src/gpu/operator.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | at::Tensor BatchNorm_Forward_CUDA( 6 | const at::Tensor input_, 7 | const at::Tensor mean_, 8 | const at::Tensor std_, 9 | const at::Tensor gamma_, 10 | const at::Tensor beta_, 11 | float eps); 12 | 13 | at::Tensor BatchNorm_Forward_Inp_CUDA( 14 | const at::Tensor input_, 15 | const at::Tensor ex_, 16 | const at::Tensor exs_, 17 | const at::Tensor gamma_, 18 | const at::Tensor beta_, 19 | float eps); 20 | 21 | std::vector BatchNorm_Backward_CUDA( 22 | const at::Tensor gradoutput_, 23 | const at::Tensor input_, 24 | const at::Tensor ex_, 25 | const at::Tensor exs_, 26 | const at::Tensor gamma_, 27 | const at::Tensor beta_, 28 | float eps); 29 | 30 | std::vector BatchNorm_Inp_Backward_CUDA( 31 | const at::Tensor gradoutput_, 32 | const at::Tensor output_, 33 | const at::Tensor ex_, 34 | const at::Tensor exs_, 35 | const at::Tensor gamma_, 36 | const at::Tensor beta_, 37 | float eps); 38 | 39 | std::vector Expectation_Forward_CUDA( 40 | const at::Tensor input_); 41 | 42 | at::Tensor Expectation_Backward_CUDA( 43 | const at::Tensor input_, 44 | const at::Tensor gradEx_, 45 | const at::Tensor gradExs_); 46 | 47 | at::Tensor Expectation_Inp_Backward_CUDA( 48 | const at::Tensor gradInput_, 49 | const at::Tensor output_, 50 | const at::Tensor gradEx_, 51 | const at::Tensor gradExs_, 52 | const at::Tensor ex_, 53 | const at::Tensor exs_, 54 | const at::Tensor gamma_, 55 | const at::Tensor beta_, 56 | float eps); 57 | -------------------------------------------------------------------------------- /lists/kitti15_val.txt: -------------------------------------------------------------------------------- 1 | training/image_2/000001_10.png training/image_3/000001_10.png training/disp_occ_0/000001_10.png 2 | training/image_2/000006_10.png training/image_3/000006_10.png training/disp_occ_0/000006_10.png 3 | training/image_2/000026_10.png training/image_3/000026_10.png training/disp_occ_0/000026_10.png 4 | training/image_2/000038_10.png training/image_3/000038_10.png training/disp_occ_0/000038_10.png 5 | training/image_2/000043_10.png training/image_3/000043_10.png training/disp_occ_0/000043_10.png 6 | training/image_2/000049_10.png training/image_3/000049_10.png training/disp_occ_0/000049_10.png 7 | training/image_2/000067_10.png training/image_3/000067_10.png training/disp_occ_0/000067_10.png 8 | training/image_2/000081_10.png training/image_3/000081_10.png training/disp_occ_0/000081_10.png 9 | training/image_2/000089_10.png training/image_3/000089_10.png training/disp_occ_0/000089_10.png 10 | training/image_2/000109_10.png training/image_3/000109_10.png training/disp_occ_0/000109_10.png 11 | training/image_2/000122_10.png training/image_3/000122_10.png training/disp_occ_0/000122_10.png 12 | training/image_2/000129_10.png training/image_3/000129_10.png training/disp_occ_0/000129_10.png 13 | training/image_2/000132_10.png training/image_3/000132_10.png training/disp_occ_0/000132_10.png 14 | training/image_2/000141_10.png training/image_3/000141_10.png training/disp_occ_0/000141_10.png 15 | training/image_2/000152_10.png training/image_3/000152_10.png training/disp_occ_0/000152_10.png 16 | training/image_2/000159_10.png training/image_3/000159_10.png training/disp_occ_0/000159_10.png 17 | training/image_2/000171_10.png training/image_3/000171_10.png training/disp_occ_0/000171_10.png 18 | training/image_2/000179_10.png training/image_3/000179_10.png training/disp_occ_0/000179_10.png 19 | training/image_2/000184_10.png training/image_3/000184_10.png training/disp_occ_0/000184_10.png 20 | training/image_2/000187_10.png training/image_3/000187_10.png training/disp_occ_0/000187_10.png 21 | -------------------------------------------------------------------------------- /loss_configs/DrivingStereo.json: -------------------------------------------------------------------------------- 1 | { 2 | "loss_scale":7, 3 | "round":4, 4 | "loss_weights":[[0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005], 5 | [0.6, 0.32, 0.08, 0.04, 0.02, 0.01, 0.005], 6 | [0.8, 0.16, 0.04, 0.02, 0.01, 0.005, 0.0025], 7 | [1.0, 0., 0., 0., 0., 0., 0.]], 8 | "epoches":[7, 7, 7, 10] 9 | } 10 | -------------------------------------------------------------------------------- /loss_configs/kitti.json: -------------------------------------------------------------------------------- 1 | { 2 | "loss_scale":7, 3 | "round":3, 4 | "loss_weights":[[0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005], 5 | [0.6, 0.32, 0.08, 0.04, 0.02, 0.01, 0.005], 6 | [0.8, 0.16, 0.04, 0.02, 0.01, 0.005, 0.0025]], 7 | "epoches":[1200, 1200, 1200] 8 | } 9 | -------------------------------------------------------------------------------- /loss_configs/sceneflow.json: -------------------------------------------------------------------------------- 1 | { 2 | "loss_scale":7, 3 | "round":4, 4 | "loss_weights":[[0.32, 0.16, 0.08, 0.04, 0.02, 0.01, 0.005], 5 | [0.6, 0.32, 0.08, 0.04, 0.02, 0.01, 0.005], 6 | [0.8, 0.16, 0.04, 0.02, 0.01, 0.005, 0.0025], 7 | [1.0, 0., 0., 0., 0., 0., 0.]], 8 | "epoches":[20, 20, 20, 30] 9 | } 10 | -------------------------------------------------------------------------------- /losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/losses/__init__.py -------------------------------------------------------------------------------- /losses/multiscaleloss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | import numpy as np 5 | import torch.nn.functional as F 6 | 7 | 8 | def EPE(input_flow, target_flow, size_average = True): 9 | target_valid = (target_flow < 192) & (target_flow > 0) 10 | return F.l1_loss(input_flow[target_valid], target_flow[target_valid], size_average=size_average) 11 | def smoothL1(input_flow, target_flow, size_average = True): 12 | """ 13 | When size_average = True (used in FADNet), the loss is averaged over pixels. 14 | When size_average = False (for PWCNet), the loss is summed over pixels. 15 | """ 16 | target_valid = (target_flow < 192) & (target_flow > 0) 17 | return F.smooth_l1_loss(input_flow[target_valid], target_flow[target_valid], size_average=size_average) 18 | def robust_EPE(input_flow, target_flow, _div_flow = 0.05): 19 | N = input_flow.shape[0] 20 | target_flow = target_flow * _div_flow 21 | target_valid = (target_flow < 192 * _div_flow) & (target_flow > 0) 22 | return torch.pow((torch.abs(target_flow[target_valid] - input_flow[target_valid]) + 0.01),0.4).sum()/N 23 | 24 | class MultiScaleLoss(nn.Module): 25 | 26 | def __init__(self, scales, downscale, weights=None, train_loss = 'smoothL1', test_loss='L1', mask=False): 27 | """ 28 | downscale is 1 in fadnet and has no effect. 29 | test loss specifies what loss to use in the test case. L1 is used in FADNet. During training, the loss is calculated 30 | using EPE defined above( smoothL1 loss). 31 | """ 32 | super(MultiScaleLoss, self).__init__() 33 | self.downscale = downscale 34 | self.mask = mask 35 | self.weights = torch.Tensor(scales).fill_(1).cuda() if weights is None else torch.Tensor(weights).cuda() 36 | assert(len(self.weights) == scales) 37 | if train_loss == 'smoothL1': 38 | self.train_loss = smoothL1 39 | elif train_loss == 'L1': 40 | self.train_loss = EPE 41 | else: 42 | raise NotImplementedError 43 | if type(test_loss) is str: 44 | 45 | if test_loss == 'L1': 46 | self.test_loss = nn.L1Loss() 47 | else: 48 | raise NotImplementedError 49 | else: 50 | self.test_loss = test_loss 51 | self.multiScales = [nn.AvgPool2d(self.downscale*(2**i), self.downscale*(2**i)) for i in range(scales)] 52 | 53 | print('self.multiScales: ', self.multiScales, ' self.downscale: ', self.downscale) 54 | 55 | def forward(self, input, target): 56 | #print(len(input)) 57 | if (type(input) is tuple) or (type(input) is list): 58 | out = 0 59 | for i, input_ in enumerate(input): 60 | 61 | target_ = self.multiScales[i](target) 62 | 63 | if self.mask: 64 | # work for sparse 65 | mask = target > 0 66 | mask.detach_() 67 | 68 | mask = mask.type(torch.cuda.FloatTensor) 69 | pooling_mask = self.multiScales[i](mask) 70 | 71 | # use unbalanced avg 72 | target_ = target_ / pooling_mask # div by 0 generates nan 73 | 74 | 75 | mask = target_ > 0 # exclude nan pixel 76 | mask.detach_() 77 | input_ = input_[mask] 78 | target_ = target_[mask] 79 | 80 | loss_ = self.train_loss(input_, target_) 81 | out += self.weights[i] * loss_ 82 | else: 83 | #This is used in trainer validate for calculating val loss, but val loss is not recored or used anywhere. 84 | out = self.test_loss(input, self.multiScales[0](target)) 85 | return out 86 | 87 | def multiscaleloss(scales=5, downscale=4, weights=None, train_loss = 'smoothL1', test_loss='L1', mask=False): 88 | if weights is None: 89 | weights = (0.005, 0.01, 0.02, 0.08, 0.32) 90 | if scales == 1 and type(weights) is not tuple: 91 | weights = (weights, ) 92 | return MultiScaleLoss(scales, downscale, weights, train_loss, test_loss, mask) 93 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import argparse 4 | import datetime 5 | import random 6 | import torch 7 | import logging 8 | import shutil 9 | 10 | import torch.nn as nn 11 | import torch.backends.cudnn as cudnn 12 | 13 | from utils.common import * 14 | from dltrainer import DisparityTrainer 15 | from net_builder import SUPPORT_NETS 16 | from losses.multiscaleloss import multiscaleloss 17 | 18 | cudnn.benchmark = True 19 | 20 | def save_checkpoint(state, is_best, filename='checkpoint.pth'): 21 | #if state['epoch'] % 10 == 0: 22 | torch.save(state, os.path.join(opt.outf,filename)) 23 | if is_best: 24 | torch.save(state, os.path.join(opt.outf,'model_best.pth')) 25 | #shutil.copyfile(os.path.join(opt.outf,filename), os.path.join(opt.outf,'model_best.pth')) 26 | 27 | def main(opt): 28 | 29 | # load the training loss scheme 30 | loss_json = load_loss_scheme(opt.loss) 31 | train_round = loss_json["round"] 32 | loss_scale = loss_json["loss_scale"] 33 | loss_weights = loss_json["loss_weights"] 34 | epoches = loss_json["epoches"] 35 | logger.info(loss_weights) 36 | 37 | #whethe calculate multiscaleloss only on..... 38 | if opt.dataset == 'sceneflow': 39 | mask = None 40 | elif opt.dataset == 'DrivingStereo': 41 | mask = True 42 | 43 | #high_res_EPE = multiscaleloss(scales=1, downscale=1, weights=(1), loss='L1', sparse=False) 44 | # initialize a trainer 45 | trainer = DisparityTrainer(opt.net, opt.lr, opt.devices, opt.dataset, opt.trainlist, opt.vallist, opt.datapath, opt.batch_size, opt.maxdisp, opt.model) 46 | # validate the pretrained model on test data 47 | best_EPE = -1 48 | if trainer.is_pretrain: 49 | best_EPE = trainer.validate() 50 | 51 | start_epoch = opt.startEpoch 52 | for r in range(opt.startRound, train_round): 53 | criterion = multiscaleloss(loss_scale, 1, loss_weights[r], train_loss = 'smoothL1', test_loss='L1', mask = mask) 54 | trainer.set_criterion(criterion) 55 | end_epoch = epoches[r] 56 | #end_epoch = min(epoches[r], opt.endEpoch) 57 | 58 | logger.info('round %d: %s' % (r, str(loss_weights[r]))) 59 | logger.info('num of epoches: %d' % end_epoch) 60 | logger.info('\t'.join(['epoch', 'time_stamp', 'train_loss', 'train_EPE', 'EPE', 'lr'])) 61 | for i in range(start_epoch, end_epoch): 62 | avg_loss, avg_EPE = trainer.train_one_epoch(i) 63 | val_EPE = trainer.validate() 64 | is_best = best_EPE < 0 or val_EPE < best_EPE 65 | if is_best: 66 | best_EPE = val_EPE 67 | 68 | save_checkpoint({ 69 | 'round': r + 1, 70 | 'epoch': i + 1, 71 | 'arch': 'dispnet', 72 | 'state_dict': trainer.get_model(), 73 | 'best_EPE': best_EPE, 74 | }, is_best, '%s_%d_%d.pth' % (opt.net, r, i)) 75 | 76 | logger.info('Validation[epoch:%d]: '%i+'\t'.join([datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"), str(avg_loss), str(avg_EPE), str(val_EPE), str(trainer.current_lr)])) 77 | start_epoch = 0 78 | 79 | 80 | if __name__ == '__main__': 81 | #For resolving vscode debugger bug:AssertionError: can only join a child process 82 | # import multiprocessing 83 | # multiprocessing.set_start_method('spawn', True) 84 | 85 | parser = argparse.ArgumentParser() 86 | parser.add_argument('--net', type=str, help='indicates the name of net', default='simplenet', choices=SUPPORT_NETS) 87 | parser.add_argument('--loss', type=str, help='indicates the loss scheme', default='simplenet_flying') 88 | #parser.add_argument('--trainloss', type=str, help='indicates the train loss scheme, only used for pwcnet', default = 'L1_pwc', choices=['L1_pwc','smoothL1_pwc','robust_EPE']) 89 | parser.add_argument('--workers', type=int, help='number of data loading workers', default=8) 90 | parser.add_argument('--batch_size', type=int, default=8, help='input batch size') 91 | parser.add_argument('--lr', type=float, default=0.0002, help='learning rate, default=0.0002') 92 | parser.add_argument('--momentum', type=float, default=0.9, help='momentum for sgd, alpha parameter for adam. default=0.9') 93 | parser.add_argument('--beta', type=float, default=0.999, help='beta parameter for adam. default=0.999') 94 | parser.add_argument('--cuda', action='store_true', help='enables, cuda') 95 | parser.add_argument('--devices', type=str, help='indicates CUDA devices, e.g. 0,1,2', default='0') 96 | parser.add_argument('--outf', default='.', help='folder to output images and model checkpoints') 97 | parser.add_argument('--manualSeed', type=int, help='manual seed') 98 | parser.add_argument('--model', type=str, help='model for finetuning', default='') 99 | parser.add_argument('--startRound', type=int, help='the round number to start training, useful of lr scheduler', default='0') 100 | parser.add_argument('--startEpoch', type=int, help='the epoch number to start training, useful of lr scheduler', default='0') 101 | parser.add_argument('--logFile', type=str, help='logging file', default='./train.log') 102 | parser.add_argument('--showFreq', type=int, help='display frequency', default='100') 103 | parser.add_argument('--flowDiv', type=float, help='the number by which the flow is divided.', default='1.0') 104 | parser.add_argument('--maxdisp', type=int, help='disparity search range.', default='-1') 105 | parser.add_argument('--dataset', type=str, help='provide the dataset name', default='sceneflow') 106 | parser.add_argument('--datapath', type=str, help='provide the root path of the data', default='mynfs/scene_flow/') 107 | parser.add_argument('--trainlist', type=str, help='provide the train file (with file list)', default='FlyingThings3D_release_TRAIN.list') 108 | parser.add_argument('--vallist', type=str, help='provide the val file (with file list)', default='FlyingThings3D_release_TEST.list') 109 | parser.add_argument('--augment', type=int, help='if augment data in training', default=0) 110 | 111 | opt = parser.parse_args() 112 | 113 | os.makedirs(opt.outf, exist_ok=True) 114 | 115 | hdlr = logging.FileHandler(opt.logFile) 116 | hdlr.setFormatter(formatter) 117 | logger.addHandler(hdlr) 118 | logger.info('Configurations: %s', opt) 119 | 120 | if opt.manualSeed is None: 121 | opt.manualSeed = random.randint(1, 10000) 122 | logger.info("Random Seed: %s", opt.manualSeed) 123 | random.seed(opt.manualSeed) 124 | torch.manual_seed(opt.manualSeed) 125 | if opt.cuda: 126 | torch.cuda.manual_seed_all(opt.manualSeed) 127 | 128 | if torch.cuda.is_available() and not opt.cuda: 129 | logger.warning("WARNING: You should run with --cuda since you have a CUDA device.") 130 | main(opt) 131 | 132 | -------------------------------------------------------------------------------- /net_builder.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from networks.ESNet import ESNet 4 | from networks.ESNet_M import ESNet_M 5 | from networks.DispNetC import DispNetC 6 | from networks.DispNetS import DispNetS 7 | from networks.FADNet import FADNet 8 | from networks.stackhourglass import PSMNet 9 | from networks.GANet_deep import GANet 10 | from utils.common import logger 11 | 12 | SUPPORT_NETS = { 13 | 'esnet': ESNet, 14 | 'esnet_m':ESNet_M, 15 | 'fadnet': FADNet, 16 | 'dispnetc': DispNetC, 17 | 'dispnets': DispNetS, 18 | 'psmnet': PSMNet, 19 | 'ganet':GANet, 20 | } 21 | 22 | def build_net(net_name): 23 | net = SUPPORT_NETS.get(net_name, None) 24 | if net is None: 25 | logger.error('Current supporting nets: %s , Unsupport net: %s', SUPPORT_NETS.keys(), net_name) 26 | raise 'Unsupport net: %s' % net_name 27 | return net 28 | -------------------------------------------------------------------------------- /networks/DispNetRes.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from torch.autograd import Function 7 | from torch.nn import init 8 | from torch.nn.init import kaiming_normal 9 | #from layers_package.resample2d_package.modules.resample2d import Resample2d 10 | #from layers_package.channelnorm_package.modules.channelnorm import ChannelNorm 11 | #from correlation_package.modules.corr import Correlation1d # from PWC-Net 12 | from networks.submodules import * 13 | 14 | 15 | class DispNetRes(nn.Module): 16 | 17 | def __init__(self, in_planes, batchNorm=True, lastRelu=False, maxdisp=-1, input_channel=3): 18 | super(DispNetRes, self).__init__() 19 | 20 | self.input_channel = input_channel 21 | self.batchNorm = batchNorm 22 | self.lastRelu = lastRelu 23 | self.maxdisp = maxdisp 24 | self.res_scale = 7 # number of residuals 25 | 26 | # improved with shrink res-block layers 27 | self.conv1 = conv(in_planes, 64, 7, 2, batchNorm=self.batchNorm) 28 | self.conv2 = ResBlock(64, 128, 2) 29 | self.conv3 = ResBlock(128, 256, 2) 30 | self.conv3_1 = ResBlock(256, 256) 31 | self.conv4 = ResBlock(256, 512, stride=2) 32 | self.conv4_1 = ResBlock(512, 512) 33 | self.conv5 = ResBlock(512, 512, stride=2) 34 | self.conv5_1 = ResBlock(512, 512) 35 | self.conv6 = ResBlock(512, 1024, stride=2) 36 | self.conv6_1 = ResBlock(1024, 1024) 37 | 38 | # original shrink conv layers 39 | #self.conv2 = conv(self.batchNorm, 64, 128, 5, 2) 40 | #self.conv3 = conv(self.batchNorm, 128, 256, 5, 2) 41 | #self.conv3_1 = conv(self.batchNorm, 256, 256) 42 | #self.conv4 = conv(self.batchNorm, 256, 512, stride=2) 43 | #self.conv4_1 = conv(self.batchNorm, 512, 512) 44 | #self.conv5 = conv(self.batchNorm, 512, 512, stride=2) 45 | #self.conv5_1 = conv(self.batchNorm, 512, 512) 46 | #self.conv6 = conv(self.batchNorm, 512, 1024, stride=2) 47 | #self.conv6_1 = conv(self.batchNorm, 1024, 1024) 48 | 49 | self.pred_res6 = predict_flow(1024) 50 | 51 | # iconv with deconv layers 52 | self.iconv5 = nn.ConvTranspose2d(1025, 512, 3, 1, 1) 53 | self.iconv4 = nn.ConvTranspose2d(769, 256, 3, 1, 1) 54 | self.iconv3 = nn.ConvTranspose2d(385, 128, 3, 1, 1) 55 | self.iconv2 = nn.ConvTranspose2d(193, 64, 3, 1, 1) 56 | self.iconv1 = nn.ConvTranspose2d(97, 32, 3, 1, 1) 57 | self.iconv0 = nn.ConvTranspose2d(17+self.input_channel, 16, 3, 1, 1) 58 | 59 | # expand and produce disparity 60 | self.upconv5 = deconv(1024, 512) 61 | self.upflow6to5 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 62 | self.pred_res5 = predict_flow(512) 63 | 64 | self.upconv4 = deconv(512, 256) 65 | self.upflow5to4 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 66 | self.pred_res4 = predict_flow(256) 67 | 68 | self.upconv3 = deconv(256, 128) 69 | self.upflow4to3 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 70 | self.pred_res3 = predict_flow(128) 71 | 72 | self.upconv2 = deconv(128, 64) 73 | self.upflow3to2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 74 | self.pred_res2 = predict_flow(64) 75 | 76 | self.upconv1 = deconv(64, 32) 77 | self.upflow2to1 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 78 | self.pred_res1 = predict_flow(32) 79 | 80 | self.upconv0 = deconv(32, 16) 81 | self.upflow1to0 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 82 | 83 | if self.maxdisp == -1: 84 | self.pred_res0 = predict_flow(16) 85 | self.relu = nn.ReLU(inplace=False) 86 | else: 87 | self.disp_expand = ResBlock(16, self.maxdisp) 88 | 89 | # weight initialization 90 | for m in self.modules(): 91 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 92 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 93 | # m.weight.data.normal_(0, 0.02 / n) 94 | # m.weight.data.normal_(0, 0.02) 95 | kaiming_normal(m.weight.data) 96 | if m.bias is not None: 97 | m.bias.data.zero_() 98 | elif isinstance(m, nn.BatchNorm2d): 99 | m.weight.data.fill_(1) 100 | m.bias.data.zero_() 101 | 102 | def forward(self, inputs, get_feature=False): 103 | 104 | input = inputs[0] 105 | base_flow = inputs[1] 106 | 107 | conv1 = self.conv1(input) 108 | conv2 = self.conv2(conv1) 109 | conv3a = self.conv3(conv2) 110 | conv3b = self.conv3_1(conv3a) 111 | conv4a = self.conv4(conv3b) 112 | conv4b = self.conv4_1(conv4a) 113 | conv5a = self.conv5(conv4b) 114 | conv5b = self.conv5_1(conv5a) 115 | conv6a = self.conv6(conv5b) 116 | conv6b = self.conv6_1(conv6a) 117 | 118 | pr6_res = self.pred_res6(conv6b) 119 | pr6 = pr6_res + base_flow[6] 120 | 121 | upconv5 = self.upconv5(conv6b) 122 | upflow6 = self.upflow6to5(pr6) 123 | concat5 = torch.cat((upconv5, upflow6, conv5b), 1) 124 | iconv5 = self.iconv5(concat5) 125 | 126 | pr5_res = self.pred_res5(iconv5) 127 | pr5 = pr5_res + base_flow[5] 128 | 129 | upconv4 = self.upconv4(iconv5) 130 | upflow5 = self.upflow5to4(pr5) 131 | concat4 = torch.cat((upconv4, upflow5, conv4b), 1) 132 | iconv4 = self.iconv4(concat4) 133 | 134 | pr4_res = self.pred_res4(iconv4) 135 | pr4 = pr4_res + base_flow[4] 136 | 137 | upconv3 = self.upconv3(iconv4) 138 | upflow4 = self.upflow4to3(pr4) 139 | concat3 = torch.cat((upconv3, upflow4, conv3b), 1) 140 | iconv3 = self.iconv3(concat3) 141 | 142 | pr3_res = self.pred_res3(iconv3) 143 | pr3 = pr3_res + base_flow[3] 144 | 145 | upconv2 = self.upconv2(iconv3) 146 | upflow3 = self.upflow3to2(pr3) 147 | concat2 = torch.cat((upconv2, upflow3, conv2), 1) 148 | iconv2 = self.iconv2(concat2) 149 | 150 | pr2_res = self.pred_res2(iconv2) 151 | pr2 = pr2_res + base_flow[2] 152 | 153 | upconv1 = self.upconv1(iconv2) 154 | upflow2 = self.upflow2to1(pr2) 155 | concat1 = torch.cat((upconv1, upflow2, conv1), 1) 156 | iconv1 = self.iconv1(concat1) 157 | 158 | pr1_res = self.pred_res1(iconv1) 159 | pr1 = pr1_res + base_flow[1] 160 | 161 | upconv0 = self.upconv0(iconv1) 162 | upflow1 = self.upflow1to0(pr1) 163 | concat0 = torch.cat((upconv0, upflow1, input[:, :self.input_channel, :, :]), 1) 164 | iconv0 = self.iconv0(concat0) 165 | 166 | # predict flow residual 167 | if self.maxdisp == -1: 168 | pr0_res = self.pred_res0(iconv0) 169 | pr0 = pr0_res + base_flow[0] 170 | 171 | if self.lastRelu: 172 | pr0 = self.relu(pr0) 173 | pr1 = self.relu(pr1) 174 | pr2 = self.relu(pr2) 175 | pr3 = self.relu(pr3) 176 | pr4 = self.relu(pr4) 177 | pr5 = self.relu(pr5) 178 | pr6 = self.relu(pr6) 179 | else: 180 | pr0_res = self.disp_expand(iconv0) 181 | pr0_res = F.softmax(pr0_res, dim=1) 182 | pr0_res = disparity_regression(pr0_res, self.maxdisp) 183 | 184 | if get_feature: 185 | return pr0, pr1, pr2, pr3, pr4, pr5, pr6, iconv0 186 | else: 187 | return pr0, pr1, pr2, pr3, pr4, pr5, pr6 188 | 189 | def weight_parameters(self): 190 | return [param for name, param in self.named_parameters() if 'weight' in name] 191 | 192 | def bias_parameters(self): 193 | return [param for name, param in self.named_parameters() if 'bias' in name] 194 | 195 | -------------------------------------------------------------------------------- /networks/DispNetS.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from torch.autograd import Function 7 | from torch.nn import init 8 | from torch.nn.init import kaiming_normal 9 | from networks.submodules import * 10 | from layers_package.resample2d_package.resample2d import Resample2d 11 | from layers_package.channelnorm_package.channelnorm import ChannelNorm 12 | #from correlation_package.modules.corr import Correlation1d # from PWC-Net 13 | 14 | class DispNetS(nn.Module): 15 | 16 | def __init__(self, in_planes=6, batchNorm=True, resBlock=True, maxdisp=-1, input_channel=3): 17 | super(DispNetS, self).__init__() 18 | 19 | self.batchNorm = batchNorm 20 | self.in_planes = in_planes 21 | self.maxdisp = maxdisp 22 | self.input_channel = input_channel 23 | 24 | # shrink and extract features 25 | self.conv1 = conv(self.in_planes, 64, 7, 2) 26 | 27 | if resBlock: 28 | self.conv2 = ResBlock(64, 128, 2) 29 | self.conv3 = ResBlock(128, 256, 2) 30 | self.conv3_1 = ResBlock(256, 256) 31 | self.conv4 = ResBlock(256, 512, stride=2) 32 | self.conv4_1 = ResBlock(512, 512) 33 | self.conv5 = ResBlock(512, 512, stride=2) 34 | self.conv5_1 = ResBlock(512, 512) 35 | self.conv6 = ResBlock(512, 1024, stride=2) 36 | self.conv6_1 = ResBlock(1024, 1024) 37 | else: 38 | self.conv2 = conv(64, 128, 2) 39 | self.conv3 = conv(128, 256, 2) 40 | self.conv3_1 = conv(256, 256) 41 | self.conv4 = conv(256, 512, stride=2) 42 | self.conv4_1 = conv(512, 512) 43 | self.conv5 = conv(512, 512, stride=2) 44 | self.conv5_1 = conv(512, 512) 45 | self.conv6 = conv(512, 1024, stride=2) 46 | self.conv6_1 = conv(1024, 1024) 47 | 48 | self.pred_flow6 = predict_flow(1024) 49 | 50 | # expand and produce disparity 51 | self.upconv5 = deconv(1024, 512) 52 | self.upflow6to5 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 53 | self.iconv5 = nn.ConvTranspose2d(1025, 512, 3, 1, 1) 54 | self.pred_flow5 = predict_flow(512) 55 | 56 | self.upconv4 = deconv(512, 256) 57 | self.upflow5to4 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 58 | self.iconv4 = nn.ConvTranspose2d(769, 256, 3, 1, 1) 59 | self.pred_flow4 = predict_flow(256) 60 | 61 | self.upconv3 = deconv(256, 128) 62 | self.upflow4to3 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 63 | self.iconv3 = nn.ConvTranspose2d(385, 128, 3, 1, 1) 64 | self.pred_flow3 = predict_flow(128) 65 | 66 | self.upconv2 = deconv(128, 64) 67 | self.upflow3to2 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 68 | self.iconv2 = nn.ConvTranspose2d(193, 64, 3, 1, 1) 69 | self.pred_flow2 = predict_flow(64) 70 | 71 | self.upconv1 = deconv(64, 32) 72 | self.upflow2to1 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 73 | self.iconv1 = nn.ConvTranspose2d(97, 32, 3, 1, 1) 74 | self.pred_flow1 = predict_flow(32) 75 | 76 | self.upconv0 = deconv(32, 16) 77 | self.upflow1to0 = nn.ConvTranspose2d(1, 1, 4, 2, 1, bias=False) 78 | self.iconv0 = nn.ConvTranspose2d(17+self.input_channel, 16, 3, 1, 1) 79 | if self.maxdisp == -1: 80 | self.pred_flow0 = predict_flow(16) 81 | else: 82 | self.disp_expand = ResBlock(16, self.maxdisp) 83 | 84 | 85 | # weight initialization 86 | for m in self.modules(): 87 | if isinstance(m, nn.Conv2d) or isinstance(m, nn.ConvTranspose2d): 88 | # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 89 | # m.weight.data.normal_(0, 0.02 / n) 90 | # m.weight.data.normal_(0, 0.02) 91 | kaiming_normal(m.weight.data) 92 | if m.bias is not None: 93 | m.bias.data.zero_() 94 | elif isinstance(m, nn.BatchNorm2d): 95 | m.weight.data.fill_(1) 96 | m.bias.data.zero_() 97 | 98 | def forward(self, input): 99 | 100 | # split left image and right image 101 | # print(input.size()) 102 | img_left = input[:, :3, :, :] 103 | 104 | conv1 = self.conv1(input) 105 | conv2 = self.conv2(conv1) 106 | conv3a = self.conv3(conv2) 107 | conv3b = self.conv3_1(conv3a) 108 | conv4a = self.conv4(conv3b) 109 | conv4b = self.conv4_1(conv4a) 110 | conv5a = self.conv5(conv4b) 111 | conv5b = self.conv5_1(conv5a) 112 | conv6a = self.conv6(conv5b) 113 | conv6b = self.conv6_1(conv6a) 114 | 115 | pr6 = self.pred_flow6(conv6b) 116 | 117 | upconv5 = self.upconv5(conv6b) 118 | upflow6 = self.upflow6to5(pr6) 119 | concat5 = torch.cat((upconv5, upflow6, conv5b), 1) 120 | iconv5 = self.iconv5(concat5) 121 | pr5 = self.pred_flow5(iconv5) 122 | 123 | upconv4 = self.upconv4(iconv5) 124 | upflow5 = self.upflow5to4(pr5) 125 | concat4 = torch.cat((upconv4, upflow5, conv4b), 1) 126 | iconv4 = self.iconv4(concat4) 127 | pr4 = self.pred_flow4(iconv4) 128 | 129 | upconv3 = self.upconv3(iconv4) 130 | upflow4 = self.upflow4to3(pr4) 131 | concat3 = torch.cat((upconv3, upflow4, conv3b), 1) 132 | iconv3 = self.iconv3(concat3) 133 | pr3 = self.pred_flow3(iconv3) 134 | 135 | upconv2 = self.upconv2(iconv3) 136 | upflow3 = self.upflow3to2(pr3) 137 | concat2 = torch.cat((upconv2, upflow3, conv2), 1) 138 | iconv2 = self.iconv2(concat2) 139 | pr2 = self.pred_flow2(iconv2) 140 | 141 | upconv1 = self.upconv1(iconv2) 142 | upflow2 = self.upflow2to1(pr2) 143 | concat1 = torch.cat((upconv1, upflow2, conv1), 1) 144 | iconv1 = self.iconv1(concat1) 145 | pr1 = self.pred_flow1(iconv1) 146 | 147 | upconv0 = self.upconv0(iconv1) 148 | upflow1 = self.upflow1to0(pr1) 149 | concat0 = torch.cat((upconv0, upflow1, img_left), 1) 150 | iconv0 = self.iconv0(concat0) 151 | 152 | # predict flow 153 | if self.maxdisp == -1: 154 | pr0 = self.pred_flow0(iconv0) 155 | else: 156 | pr0 = self.disp_expand(iconv0) 157 | pr0 = F.softmax(pr0, dim=1) 158 | pr0 = disparity_regression(pr0, self.maxdisp) 159 | 160 | # img_right_rec = warp(img_left, pr0) 161 | 162 | # if self.training: 163 | # # print("finish forwarding.") 164 | # return pr0, pr1, pr2, pr3, pr4, pr5, pr6 165 | # else: 166 | # return pr0 167 | 168 | # can be chosen outside 169 | return pr0, pr1, pr2, pr3, pr4, pr5, pr6 170 | 171 | def weight_parameters(self): 172 | return [param for name, param in self.named_parameters() if 'weight' in name] 173 | 174 | def bias_parameters(self): 175 | return [param for name, param in self.named_parameters() if 'bias' in name] 176 | 177 | 178 | -------------------------------------------------------------------------------- /networks/FADNet.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import numpy as np 6 | from torch.autograd import Function 7 | from torch.nn import init 8 | from torch.nn.init import kaiming_normal 9 | from layers_package.resample2d_package.resample2d import Resample2d 10 | from layers_package.channelnorm_package.channelnorm import ChannelNorm 11 | from networks.DispNetC import DispNetC 12 | from networks.DispNetRes import DispNetRes 13 | from networks.submodules import * 14 | 15 | class FADNet(nn.Module): 16 | 17 | def __init__(self, batchNorm=True, lastRelu=False, resBlock=True, maxdisp=-1, input_channel=3): 18 | super(FADNet, self).__init__() 19 | self.input_channel = input_channel 20 | self.batchNorm = batchNorm 21 | self.lastRelu = lastRelu 22 | self.maxdisp = maxdisp 23 | self.resBlock = resBlock 24 | 25 | # First Block (DispNetC) 26 | self.dispnetc = DispNetC(self.batchNorm, maxdisp=self.maxdisp, input_channel=input_channel) 27 | 28 | # warp layer and channelnorm layer 29 | self.channelnorm = ChannelNorm() 30 | self.resample1 = Resample2d() 31 | 32 | # Second Block (DispNetRes), input is 11 channels(img0, img1, img1->img0, flow, diff-mag) 33 | in_planes = 3 * 3 + 1 + 1 34 | self.dispnetres = DispNetRes(in_planes, self.batchNorm, lastRelu=self.lastRelu, maxdisp=self.maxdisp, input_channel=input_channel) 35 | 36 | self.relu = nn.ReLU(inplace=False) 37 | 38 | # # parameter initialization 39 | # for m in self.modules(): 40 | # if isinstance(m, nn.Conv2d): 41 | # if m.bias is not None: 42 | # init.uniform(m.bias) 43 | # init.xavier_uniform(m.weight) 44 | 45 | # if isinstance(m, nn.ConvTranspose2d): 46 | # if m.bias is not None: 47 | # init.uniform(m.bias) 48 | # init.xavier_uniform(m.weight) 49 | 50 | def forward(self, inputs): 51 | 52 | # split left image and right image 53 | # inputs = inputs_target[0] 54 | # target = inputs_target[1] 55 | imgs = torch.chunk(inputs, 2, dim = 1) 56 | img_left = imgs[0] 57 | img_right = imgs[1] 58 | 59 | # dispnetc 60 | dispnetc_flows = self.dispnetc(inputs) 61 | dispnetc_final_flow = dispnetc_flows[0] 62 | 63 | # warp img1 to img0; magnitude of diff between img0 and warped_img1, 64 | dummy_flow = torch.autograd.Variable(torch.zeros(dispnetc_final_flow.data.shape).cuda()) 65 | # dispnetc_final_flow_2d = torch.cat((target, dummy_flow), dim = 1) 66 | dispnetc_final_flow_2d = torch.cat((dispnetc_final_flow, dummy_flow), dim = 1) 67 | resampled_img1 = self.resample1(inputs[:, self.input_channel:, :, :], -dispnetc_final_flow_2d) 68 | diff_img0 = inputs[:, :self.input_channel, :, :] - resampled_img1 69 | norm_diff_img0 = self.channelnorm(diff_img0) 70 | 71 | # concat img0, img1, img1->img0, flow, diff-mag 72 | inputs_net2 = torch.cat((inputs, resampled_img1, dispnetc_final_flow, norm_diff_img0), dim = 1) 73 | 74 | # dispnetres 75 | dispnetres_flows = self.dispnetres([inputs_net2, dispnetc_flows]) 76 | index = 0 77 | #print('Index: ', index) 78 | dispnetres_final_flow = dispnetres_flows[index] 79 | 80 | 81 | if self.training: 82 | return dispnetc_flows, dispnetres_flows 83 | else: 84 | return dispnetc_final_flow, dispnetres_final_flow# , inputs[:, :3, :, :], inputs[:, 3:, :, :], resampled_img1 85 | 86 | 87 | def weight_parameters(self): 88 | return [param for name, param in self.named_parameters() if 'weight' in name] 89 | 90 | def bias_parameters(self): 91 | return [param for name, param in self.named_parameters() if 'bias' in name] 92 | 93 | 94 | -------------------------------------------------------------------------------- /networks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/networks/__init__.py -------------------------------------------------------------------------------- /networks/basic.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.utils.data 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | import math 8 | from submodule import * 9 | 10 | class PSMNet(nn.Module): 11 | def __init__(self, maxdisp): 12 | super(PSMNet, self).__init__() 13 | self.maxdisp = maxdisp 14 | self.feature_extraction = feature_extraction() 15 | 16 | ######## 17 | self.dres0 = nn.Sequential(convbn_3d(64, 32, 3, 1, 1), 18 | nn.ReLU(inplace=True), 19 | convbn_3d(32, 32, 3, 1, 1), 20 | nn.ReLU(inplace=True)) 21 | 22 | self.dres1 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 23 | nn.ReLU(inplace=True), 24 | convbn_3d(32, 32, 3, 1, 1)) 25 | 26 | self.dres2 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 27 | nn.ReLU(inplace=True), 28 | convbn_3d(32, 32, 3, 1, 1)) 29 | 30 | self.dres3 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 31 | nn.ReLU(inplace=True), 32 | convbn_3d(32, 32, 3, 1, 1)) 33 | 34 | self.dres4 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 35 | nn.ReLU(inplace=True), 36 | convbn_3d(32, 32, 3, 1, 1)) 37 | 38 | self.classify = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 39 | nn.ReLU(inplace=True), 40 | nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1,bias=False)) 41 | 42 | 43 | for m in self.modules(): 44 | if isinstance(m, nn.Conv2d): 45 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 46 | m.weight.data.normal_(0, math.sqrt(2. / n)) 47 | elif isinstance(m, nn.Conv3d): 48 | n = m.kernel_size[0] * m.kernel_size[1]*m.kernel_size[2] * m.out_channels 49 | m.weight.data.normal_(0, math.sqrt(2. / n)) 50 | elif isinstance(m, nn.BatchNorm2d): 51 | m.weight.data.fill_(1) 52 | m.bias.data.zero_() 53 | elif isinstance(m, nn.BatchNorm3d): 54 | m.weight.data.fill_(1) 55 | m.bias.data.zero_() 56 | elif isinstance(m, nn.Linear): 57 | m.bias.data.zero_() 58 | 59 | 60 | def forward(self, left, right): 61 | 62 | refimg_fea = self.feature_extraction(left) 63 | targetimg_fea = self.feature_extraction(right) 64 | 65 | #matching 66 | cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4, refimg_fea.size()[2], refimg_fea.size()[3]).zero_(), volatile= not self.training).cuda() 67 | 68 | for i in range(self.maxdisp/4): 69 | if i > 0 : 70 | cost[:, :refimg_fea.size()[1], i, :,i:] = refimg_fea[:,:,:,i:] 71 | cost[:, refimg_fea.size()[1]:, i, :,i:] = targetimg_fea[:,:,:,:-i] 72 | else: 73 | cost[:, :refimg_fea.size()[1], i, :,:] = refimg_fea 74 | cost[:, refimg_fea.size()[1]:, i, :,:] = targetimg_fea 75 | cost = cost.contiguous() 76 | 77 | cost0 = self.dres0(cost) 78 | cost0 = self.dres1(cost0) + cost0 79 | cost0 = self.dres2(cost0) + cost0 80 | cost0 = self.dres3(cost0) + cost0 81 | cost0 = self.dres4(cost0) + cost0 82 | 83 | cost = self.classify(cost0) 84 | cost = F.upsample(cost, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') 85 | cost = torch.squeeze(cost,1) 86 | pred = F.softmax(cost) 87 | pred = disparityregression(self.maxdisp)(pred) 88 | 89 | return pred 90 | -------------------------------------------------------------------------------- /networks/deform.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | from networks.deform_conv import DeformConv, ModulatedDeformConv 4 | 5 | 6 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 7 | """3x3 convolution with padding""" 8 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 9 | padding=dilation, groups=groups, bias=False, dilation=dilation) 10 | 11 | 12 | def conv1x1(in_planes, out_planes, stride=1): 13 | """1x1 convolution""" 14 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 15 | 16 | 17 | class DeformConv2d(nn.Module): 18 | """A single (modulated) deformable conv layer""" 19 | 20 | def __init__(self, in_channels, 21 | out_channels, 22 | kernel_size=3, 23 | stride=1, 24 | dilation=2, 25 | groups=1, 26 | deformable_groups=2, 27 | modulation=True, 28 | double_mask=True, 29 | bias=False): 30 | super(DeformConv2d, self).__init__() 31 | 32 | self.modulation = modulation 33 | self.deformable_groups = deformable_groups 34 | self.kernel_size = kernel_size 35 | self.double_mask = double_mask 36 | 37 | if self.modulation: 38 | self.deform_conv = ModulatedDeformConv(in_channels, 39 | out_channels, 40 | kernel_size=kernel_size, 41 | stride=stride, 42 | padding=dilation, 43 | dilation=dilation, 44 | groups=groups, 45 | deformable_groups=deformable_groups, 46 | bias=bias) 47 | else: 48 | self.deform_conv = DeformConv(in_channels, 49 | out_channels, 50 | kernel_size=kernel_size, 51 | stride=stride, 52 | padding=dilation, 53 | dilation=dilation, 54 | groups=groups, 55 | deformable_groups=deformable_groups, 56 | bias=bias) 57 | 58 | k = 3 if self.modulation else 2 59 | 60 | offset_out_channels = deformable_groups * k * kernel_size * kernel_size 61 | 62 | # Group-wise offset leraning when deformable_groups > 1 63 | self.offset_conv = nn.Conv2d(in_channels, offset_out_channels, kernel_size=kernel_size, 64 | stride=stride, padding=dilation, dilation=dilation, 65 | groups=deformable_groups, bias=True) 66 | 67 | # Initialize the weight for offset_conv as 0 to act like regular conv 68 | nn.init.constant_(self.offset_conv.weight, 0.) 69 | nn.init.constant_(self.offset_conv.bias, 0.) 70 | 71 | def forward(self, x): 72 | if self.modulation: 73 | offset_mask = self.offset_conv(x) 74 | 75 | offset_channel = self.deformable_groups * 2 * self.kernel_size * self.kernel_size 76 | offset = offset_mask[:, :offset_channel, :, :] 77 | 78 | mask = offset_mask[:, offset_channel:, :, :] 79 | mask = mask.sigmoid() # [0, 1] 80 | 81 | if self.double_mask: 82 | mask = mask * 2 # initialize as 1 to work as regular conv 83 | 84 | out = self.deform_conv(x, offset, mask) 85 | 86 | else: 87 | offset = self.offset_conv(x) 88 | out = self.deform_conv(x, offset) 89 | 90 | return out 91 | 92 | 93 | class DeformBottleneck(nn.Module): 94 | expansion = 4 95 | __constants__ = ['downsample'] 96 | 97 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 98 | base_width=64, dilation=1, norm_layer=None): 99 | super(DeformBottleneck, self).__init__() 100 | if norm_layer is None: 101 | norm_layer = nn.BatchNorm2d 102 | width = int(planes * (base_width / 64.)) * groups 103 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 104 | self.conv1 = conv1x1(inplanes, width) 105 | self.bn1 = norm_layer(width) 106 | self.conv2 = DeformConv2d(width, width, stride=stride) 107 | self.bn2 = norm_layer(width) 108 | self.conv3 = conv1x1(width, planes * self.expansion) 109 | self.bn3 = norm_layer(planes * self.expansion) 110 | self.relu = nn.ReLU(inplace=True) 111 | self.downsample = downsample 112 | self.stride = stride 113 | 114 | def forward(self, x): 115 | identity = x 116 | 117 | out = self.conv1(x) 118 | out = self.bn1(out) 119 | out = self.relu(out) 120 | 121 | out = self.conv2(out) 122 | out = self.bn2(out) 123 | out = self.relu(out) 124 | 125 | out = self.conv3(out) 126 | out = self.bn3(out) 127 | 128 | if self.downsample is not None: 129 | identity = self.downsample(x) 130 | 131 | out += identity 132 | out = self.relu(out) 133 | 134 | return out 135 | 136 | 137 | class SimpleBottleneck(nn.Module): 138 | """Simple bottleneck block without channel expansion""" 139 | 140 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 141 | base_width=64, dilation=1, norm_layer=None): 142 | super(SimpleBottleneck, self).__init__() 143 | if norm_layer is None: 144 | norm_layer = nn.BatchNorm2d 145 | width = int(planes * (base_width / 64.)) * groups 146 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 147 | self.conv1 = conv1x1(inplanes, width) 148 | self.bn1 = norm_layer(width) 149 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 150 | self.bn2 = norm_layer(width) 151 | self.conv3 = conv1x1(width, planes) 152 | self.bn3 = norm_layer(planes) 153 | self.relu = nn.ReLU(inplace=True) 154 | self.downsample = downsample 155 | self.stride = stride 156 | 157 | def forward(self, x): 158 | identity = x 159 | 160 | out = self.conv1(x) 161 | out = self.bn1(out) 162 | out = self.relu(out) 163 | 164 | out = self.conv2(out) 165 | out = self.bn2(out) 166 | out = self.relu(out) 167 | 168 | out = self.conv3(out) 169 | out = self.bn3(out) 170 | 171 | if self.downsample is not None: 172 | identity = self.downsample(x) 173 | 174 | out += identity 175 | out = self.relu(out) 176 | 177 | return out 178 | 179 | 180 | class DeformSimpleBottleneck(nn.Module): 181 | """Used for cost aggregation""" 182 | 183 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 184 | base_width=64, norm_layer=None, 185 | mdconv_dilation=2, 186 | deformable_groups=2, 187 | modulation=True, 188 | double_mask=True, 189 | ): 190 | super(DeformSimpleBottleneck, self).__init__() 191 | if norm_layer is None: 192 | norm_layer = nn.BatchNorm2d 193 | width = int(planes * (base_width / 64.)) * groups 194 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 195 | self.conv1 = conv1x1(inplanes, width) 196 | self.bn1 = norm_layer(width) 197 | self.conv2 = DeformConv2d(width, width, stride=stride, 198 | dilation=mdconv_dilation, 199 | deformable_groups=deformable_groups, 200 | modulation=modulation, 201 | double_mask=double_mask) 202 | self.bn2 = norm_layer(width) 203 | self.conv3 = conv1x1(width, planes) 204 | self.bn3 = norm_layer(planes) 205 | self.relu = nn.ReLU(inplace=True) 206 | self.downsample = downsample 207 | self.stride = stride 208 | 209 | def forward(self, x): 210 | identity = x 211 | 212 | out = self.conv1(x) 213 | out = self.bn1(out) 214 | out = self.relu(out) 215 | 216 | out = self.conv2(out) 217 | out = self.bn2(out) 218 | out = self.relu(out) 219 | 220 | out = self.conv3(out) 221 | out = self.bn3(out) 222 | 223 | if self.downsample is not None: 224 | identity = self.downsample(x) 225 | 226 | out += identity 227 | out = self.relu(out) 228 | 229 | return out 230 | -------------------------------------------------------------------------------- /networks/deform_conv/__init__.py: -------------------------------------------------------------------------------- 1 | from .deform_conv import (DeformConv, DeformConvPack, ModulatedDeformConv, 2 | ModulatedDeformConvPack, deform_conv, 3 | modulated_deform_conv) 4 | 5 | __all__ = [ 6 | 'DeformConv', 'DeformConvPack', 'ModulatedDeformConv', 7 | 'ModulatedDeformConvPack', 'deform_conv', 'modulated_deform_conv', 8 | ] 9 | -------------------------------------------------------------------------------- /networks/deform_conv/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PYTHON=${PYTHON:-"python3"} 4 | 5 | if [ -d "build" ]; then 6 | rm -r build 7 | fi 8 | $PYTHON setup.py build_ext --inplace 9 | -------------------------------------------------------------------------------- /networks/deform_conv/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 3 | 4 | setup( 5 | name='deform_conv', 6 | ext_modules=[ 7 | CUDAExtension('deform_conv_cuda', [ 8 | 'src/deform_conv_cuda.cpp', 9 | 'src/deform_conv_cuda_kernel.cu', 10 | ]), 11 | ], 12 | cmdclass={'build_ext': BuildExtension}) 13 | -------------------------------------------------------------------------------- /networks/domain_classifier.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class GradReverse(nn.function.Function): 6 | def forward(self, x, lambd): 7 | self.lambd = lambd 8 | return x.view_as(x) 9 | 10 | def backward(self, grad_output): 11 | return (grad_output * -self.lamdb) 12 | 13 | def grad_reverse(x, lamdb): 14 | return GradReverse()(x, lamdb) 15 | 16 | 17 | class DomainClassifier(nn.Module): 18 | def __init__(self): 19 | super(DomainClassifier, self).__init__() 20 | self.fc1 = nn.Linear(1024, 1024) 21 | self.fc2 = nn.Linear(1024, 1) 22 | self.drop = nn.Dropout2d(0.25) 23 | self.relu = nn.ReLU(inplace=False) 24 | 25 | def forward(self, x, lambd=0.5): 26 | x = grad_reverse(x, lambd) 27 | x = self.relu(self.drop(self.fc1(x))) 28 | x = self.fc2(x) 29 | return torch.sigmoid(x) 30 | -------------------------------------------------------------------------------- /networks/resnet_modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | def resnet18( **kwargs): 6 | r"""ResNet-18 model from 7 | `"Deep Residual Learning for Image Recognition" `_ 8 | """ 9 | return ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 10 | 11 | def resnet50(pretrained=False, progress=True, **kwargs): 12 | r"""ResNet-50 model from 13 | `"Deep Residual Learning for Image Recognition" `_ 14 | """ 15 | return ResNet('resnet50', Bottleneck, [3, 4, 6, 3], pretrained, progress, 16 | **kwargs) 17 | def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1): 18 | """3x3 convolution with padding""" 19 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 20 | padding=dilation, groups=groups, bias=False, dilation=dilation) 21 | 22 | 23 | def conv1x1(in_planes, out_planes, stride=1): 24 | """1x1 convolution""" 25 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 26 | 27 | 28 | class BasicBlock(nn.Module): 29 | expansion = 1 30 | 31 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 32 | base_width=64, dilation=1, norm_layer=None): 33 | super(BasicBlock, self).__init__() 34 | if norm_layer is None: 35 | norm_layer = nn.BatchNorm2d 36 | if groups != 1 or base_width != 64: 37 | raise ValueError('BasicBlock only supports groups=1 and base_width=64') 38 | if dilation > 1: 39 | raise NotImplementedError("Dilation > 1 not supported in BasicBlock") 40 | # Both self.conv1 and self.downsample layers downsample the input when stride != 1 41 | self.conv1 = conv3x3(inplanes, planes, stride) 42 | self.bn1 = norm_layer(planes) 43 | self.relu = nn.ReLU(inplace=True) 44 | self.conv2 = conv3x3(planes, planes) 45 | self.bn2 = norm_layer(planes) 46 | self.downsample = downsample 47 | self.stride = stride 48 | 49 | def forward(self, x): 50 | residual = x 51 | if self.downsample is not None: 52 | residual = self.downsample(x) 53 | out = self.conv1(x) 54 | out = self.bn1(out) 55 | out = self.relu(out) 56 | out = self.conv2(out) 57 | out = self.bn2(out) 58 | 59 | out += residual 60 | out = self.relu(out) 61 | return out 62 | 63 | #Not used for ResNet 18 64 | class Bottleneck(nn.Module): 65 | 66 | expansion = 4 67 | 68 | def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, 69 | base_width=64, dilation=1, norm_layer=None): 70 | super(Bottleneck, self).__init__() 71 | if norm_layer is None: 72 | norm_layer = nn.BatchNorm2d 73 | width = int(planes * (base_width / 64.)) * groups 74 | # Both self.conv2 and self.downsample layers downsample the input when stride != 1 75 | self.conv1 = conv1x1(inplanes, width) 76 | self.bn1 = norm_layer(width) 77 | self.conv2 = conv3x3(width, width, stride, groups, dilation) 78 | self.bn2 = norm_layer(width) 79 | self.conv3 = conv1x1(width, planes * self.expansion) 80 | self.bn3 = norm_layer(planes * self.expansion) 81 | self.relu = nn.ReLU(inplace=True) 82 | self.downsample = downsample 83 | self.stride = stride 84 | 85 | def forward(self, x): 86 | identity = x 87 | 88 | out = self.conv1(x) 89 | out = self.bn1(out) 90 | out = self.relu(out) 91 | 92 | out = self.conv2(out) 93 | out = self.bn2(out) 94 | out = self.relu(out) 95 | 96 | out = self.conv3(out) 97 | out = self.bn3(out) 98 | 99 | if self.downsample is not None: 100 | identity = self.downsample(x) 101 | 102 | out += identity 103 | out = self.relu(out) 104 | 105 | return out 106 | 107 | 108 | class ResNet(nn.Module): 109 | 110 | def __init__(self, block, layers, 111 | groups=1, width_per_group=64, replace_stride_with_dilation=None, 112 | norm_layer=None): 113 | super(ResNet, self).__init__() 114 | if norm_layer is None: 115 | norm_layer = nn.BatchNorm2d 116 | self._norm_layer = norm_layer 117 | 118 | self.inplanes = 64 119 | self.dilation = 1 120 | if replace_stride_with_dilation is None: 121 | # each element in the tuple indicates if we should replace 122 | # the 2x2 stride with a dilated convolution instead 123 | replace_stride_with_dilation = [False, False, False] 124 | if len(replace_stride_with_dilation) != 3: 125 | raise ValueError("replace_stride_with_dilation should be None " 126 | "or a 3-element tuple, got {}".format(replace_stride_with_dilation)) 127 | self.groups = groups 128 | self.base_width = width_per_group 129 | self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, 130 | bias=False) 131 | self.bn1 = norm_layer(self.inplanes) 132 | self.relu = nn.ReLU(inplace=True) 133 | self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) 134 | self.layer1 = self._make_layer(block, 64, layers[0]) 135 | self.layer2 = self._make_layer(block, 128, layers[1], stride=2, 136 | dilate=replace_stride_with_dilation[0]) 137 | self.layer3 = self._make_layer(block, 256, layers[2], stride=2, 138 | dilate=replace_stride_with_dilation[1]) 139 | self.layer4 = self._make_layer(block, 512, layers[3], stride=2, 140 | dilate=replace_stride_with_dilation[2]) 141 | # self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) 142 | # self.fc = nn.Linear(512 * block.expansion, num_classes) 143 | 144 | for m in self.modules(): 145 | if isinstance(m, nn.Conv2d): 146 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 147 | elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): 148 | nn.init.constant_(m.weight, 1) 149 | nn.init.constant_(m.bias, 0) 150 | 151 | def _make_layer(self, block, planes, blocks, stride=1, dilate=False): 152 | norm_layer = self._norm_layer 153 | downsample = None 154 | previous_dilation = self.dilation 155 | if dilate: 156 | self.dilation *= stride 157 | stride = 1 158 | if stride != 1 or self.inplanes != planes * block.expansion: 159 | downsample = nn.Sequential( 160 | conv1x1(self.inplanes, planes * block.expansion, stride), 161 | norm_layer(planes * block.expansion), 162 | ) 163 | 164 | layers = [] 165 | layers.append(block(self.inplanes, planes, stride, downsample, self.groups, 166 | self.base_width, previous_dilation, norm_layer)) 167 | self.inplanes = planes * block.expansion 168 | for _ in range(1, blocks): 169 | layers.append(block(self.inplanes, planes, groups=self.groups, 170 | base_width=self.base_width, dilation=self.dilation, 171 | norm_layer=norm_layer)) 172 | 173 | return nn.Sequential(*layers) 174 | 175 | def _forward_impl(self, x): 176 | x = self.conv1(x) 177 | x = self.bn1(x) 178 | x = self.relu(x) 179 | x = self.maxpool(x) 180 | 181 | x = self.layer1(x) 182 | x = self.layer2(x) 183 | x = self.layer3(x) 184 | x = self.layer4(x) 185 | 186 | # x = self.avgpool(x) 187 | # x = torch.flatten(x, 1) 188 | # x = self.fc(x) 189 | 190 | return x 191 | 192 | def forward(self, x): 193 | return self._forward_impl(x) 194 | -------------------------------------------------------------------------------- /networks/stackhourglass.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import torch 3 | import torch.nn as nn 4 | import torch.utils.data 5 | from torch.autograd import Variable 6 | import torch.nn.functional as F 7 | import math 8 | from networks.submodules import * 9 | 10 | class hourglass(nn.Module): 11 | def __init__(self, inplanes): 12 | super(hourglass, self).__init__() 13 | 14 | self.conv1 = nn.Sequential(convbn_3d(inplanes, inplanes*2, kernel_size=3, stride=2, pad=1), 15 | nn.ReLU(inplace=True)) 16 | 17 | self.conv2 = convbn_3d(inplanes*2, inplanes*2, kernel_size=3, stride=1, pad=1) 18 | 19 | self.conv3 = nn.Sequential(convbn_3d(inplanes*2, inplanes*2, kernel_size=3, stride=2, pad=1), 20 | nn.ReLU(inplace=True)) 21 | 22 | self.conv4 = nn.Sequential(convbn_3d(inplanes*2, inplanes*2, kernel_size=3, stride=1, pad=1), 23 | nn.ReLU(inplace=True)) 24 | 25 | self.conv5 = nn.Sequential(nn.ConvTranspose3d(inplanes*2, inplanes*2, kernel_size=3, padding=1, output_padding=1, stride=2,bias=False), 26 | nn.BatchNorm3d(inplanes*2)) #+conv2 27 | 28 | self.conv6 = nn.Sequential(nn.ConvTranspose3d(inplanes*2, inplanes, kernel_size=3, padding=1, output_padding=1, stride=2,bias=False), 29 | nn.BatchNorm3d(inplanes)) #+x 30 | 31 | def forward(self, x ,presqu, postsqu): 32 | 33 | out = self.conv1(x) #in:1/4 out:1/8 34 | pre = self.conv2(out) #in:1/8 out:1/8 35 | if postsqu is not None: 36 | pre = F.relu(pre + postsqu, inplace=True) 37 | else: 38 | pre = F.relu(pre, inplace=True) 39 | 40 | out = self.conv3(pre) #in:1/8 out:1/16 41 | out = self.conv4(out) #in:1/16 out:1/16 42 | 43 | if presqu is not None: 44 | post = F.relu(self.conv5(out)+presqu, inplace=True) #in:1/16 out:1/8 45 | else: 46 | post = F.relu(self.conv5(out)+pre, inplace=True) 47 | 48 | out = self.conv6(post) #in:1/8 out:1/4 49 | 50 | return out, pre, post 51 | 52 | class PSMNet(nn.Module): 53 | def __init__(self, maxdisp=192): 54 | super(PSMNet, self).__init__() 55 | self.maxdisp = maxdisp 56 | 57 | self.feature_extraction = feature_extraction() 58 | 59 | self.dres0 = nn.Sequential(convbn_3d(64, 32, 3, 1, 1), 60 | nn.ReLU(inplace=True), 61 | convbn_3d(32, 32, 3, 1, 1), 62 | nn.ReLU(inplace=True)) 63 | 64 | self.dres1 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 65 | nn.ReLU(inplace=True), 66 | convbn_3d(32, 32, 3, 1, 1)) 67 | 68 | self.dres2 = hourglass(32) 69 | 70 | self.dres3 = hourglass(32) 71 | 72 | self.dres4 = hourglass(32) 73 | 74 | self.classif1 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 75 | nn.ReLU(inplace=True), 76 | nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1,bias=False)) 77 | 78 | self.classif2 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 79 | nn.ReLU(inplace=True), 80 | nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1,bias=False)) 81 | 82 | self.classif3 = nn.Sequential(convbn_3d(32, 32, 3, 1, 1), 83 | nn.ReLU(inplace=True), 84 | nn.Conv3d(32, 1, kernel_size=3, padding=1, stride=1,bias=False)) 85 | 86 | for m in self.modules(): 87 | if isinstance(m, nn.Conv2d): 88 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 89 | m.weight.data.normal_(0, math.sqrt(2. / n)) 90 | elif isinstance(m, nn.Conv3d): 91 | n = m.kernel_size[0] * m.kernel_size[1]*m.kernel_size[2] * m.out_channels 92 | m.weight.data.normal_(0, math.sqrt(2. / n)) 93 | elif isinstance(m, nn.BatchNorm2d): 94 | m.weight.data.fill_(1) 95 | m.bias.data.zero_() 96 | elif isinstance(m, nn.BatchNorm3d): 97 | m.weight.data.fill_(1) 98 | m.bias.data.zero_() 99 | elif isinstance(m, nn.Linear): 100 | m.bias.data.zero_() 101 | 102 | 103 | def forward(self, input): 104 | 105 | imgs = torch.chunk(input, 2, dim = 1) 106 | left = imgs[0] 107 | right = imgs[1] 108 | 109 | refimg_fea = self.feature_extraction(left) 110 | targetimg_fea = self.feature_extraction(right) 111 | 112 | 113 | #matching 114 | cost = Variable(torch.FloatTensor(refimg_fea.size()[0], refimg_fea.size()[1]*2, self.maxdisp/4, refimg_fea.size()[2], refimg_fea.size()[3]).zero_()).cuda() 115 | 116 | for i in range(self.maxdisp/4): 117 | if i > 0 : 118 | cost[:, :refimg_fea.size()[1], i, :,i:] = refimg_fea[:,:,:,i:] 119 | cost[:, refimg_fea.size()[1]:, i, :,i:] = targetimg_fea[:,:,:,:-i] 120 | else: 121 | cost[:, :refimg_fea.size()[1], i, :,:] = refimg_fea 122 | cost[:, refimg_fea.size()[1]:, i, :,:] = targetimg_fea 123 | cost = cost.contiguous() 124 | 125 | cost0 = self.dres0(cost) 126 | cost0 = self.dres1(cost0) + cost0 127 | 128 | out1, pre1, post1 = self.dres2(cost0, None, None) 129 | out1 = out1+cost0 130 | 131 | out2, pre2, post2 = self.dres3(out1, pre1, post1) 132 | out2 = out2+cost0 133 | 134 | out3, pre3, post3 = self.dres4(out2, pre1, post2) 135 | out3 = out3+cost0 136 | 137 | cost1 = self.classif1(out1) 138 | cost2 = self.classif2(out2) + cost1 139 | cost3 = self.classif3(out3) + cost2 140 | 141 | if self.training: 142 | cost1 = F.upsample(cost1, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') 143 | cost2 = F.upsample(cost2, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') 144 | 145 | cost1 = torch.squeeze(cost1,1) 146 | pred1 = F.softmax(cost1,dim=1) 147 | pred1 = disparityregression(self.maxdisp)(pred1) 148 | 149 | cost2 = torch.squeeze(cost2,1) 150 | pred2 = F.softmax(cost2,dim=1) 151 | pred2 = disparityregression(self.maxdisp)(pred2) 152 | 153 | cost3 = F.upsample(cost3, [self.maxdisp,left.size()[2],left.size()[3]], mode='trilinear') 154 | cost3 = torch.squeeze(cost3,1) 155 | pred3 = F.softmax(cost3,dim=1) 156 | #For your information: This formulation 'softmax(c)' learned "similarity" 157 | #while 'softmax(-c)' learned 'matching cost' as mentioned in the paper. 158 | #However, 'c' or '-c' do not affect the performance because feature-based cost volume provided flexibility. 159 | pred3 = disparityregression(self.maxdisp)(pred3) 160 | 161 | if self.training: 162 | return pred1, pred2, pred3 163 | else: 164 | return pred3 165 | -------------------------------------------------------------------------------- /submission.sh: -------------------------------------------------------------------------------- 1 | model_path=/mnt/wekanfs/scratch/zhengyu.huang/FADNet_result/models/kitti_finetune/pretrained_on_sceneflow_run1/best.tar 2 | save_path=./submit_results/fadnet-KITTI2015-split_run1/ 3 | net=fadnet 4 | # model_path=./trained/psmnet-imn-KITTI2015-split/best.tar 5 | # save_path=./submit_results/psmnet-imn-KITTI2015-split/ 6 | # net=psmnet 7 | python3 kitti_submission.py --maxdisp 192 \ 8 | --model $net \ 9 | --KITTI 2015 \ 10 | --datapath /mnt/wekanfs/scratch/zhengyu.huang/KITTI_2015_Stereo/testing/ \ 11 | --savepath $save_path \ 12 | --loadmodel $model_path \ 13 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | dnn="${dnn:-esnet_sceneflow}" 2 | source exp_configs/$dnn.conf 3 | 4 | python3 -W ignore main.py --cuda --net $net --loss $loss --lr $lr \ 5 | --outf $outf_model --logFile $logf \ 6 | --devices $devices --batch_size $batchSize \ 7 | --datapath $datapath \ 8 | --dataset $dataset --trainlist $trainlist --vallist $vallist \ 9 | --startRound $startR --startEpoch $startE \ 10 | --model $model \ 11 | --maxdisp $maxdisp \ 12 | --manualSeed 1024 \ 13 | 14 | -------------------------------------------------------------------------------- /utils/AverageMeter.py: -------------------------------------------------------------------------------- 1 | 2 | class AverageMeter(object): 3 | 4 | def __init__(self): 5 | self.reset() 6 | 7 | def reset(self): 8 | self.val = 0 9 | self.avg = 0 10 | self.sum = 0 11 | self.count = 0 12 | 13 | def update(self, val, n=1): 14 | self.val = val 15 | self.sum += val * n 16 | self.count += n 17 | self.avg = self.sum / self.count 18 | 19 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/macrohuang1993/ESNet/40e606772d0ab6b773b080225449791d95dff138/utils/__init__.py -------------------------------------------------------------------------------- /utils/common.py: -------------------------------------------------------------------------------- 1 | import json, yaml 2 | import logging 3 | 4 | def load_loss_scheme(loss_config): 5 | 6 | with open(loss_config, 'r') as f: 7 | loss_json = yaml.safe_load(f) 8 | 9 | return loss_json 10 | 11 | DEBUG =0 12 | logger = logging.getLogger() 13 | 14 | if DEBUG: 15 | #coloredlogs.install(level='DEBUG') 16 | logger.setLevel(logging.DEBUG) 17 | else: 18 | #coloredlogs.install(level='INFO') 19 | logger.setLevel(logging.INFO) 20 | 21 | strhdlr = logging.StreamHandler() 22 | logger.addHandler(strhdlr) 23 | formatter = logging.Formatter('%(asctime)s [%(filename)s:%(lineno)d] %(levelname)s %(message)s') 24 | strhdlr.setFormatter(formatter) 25 | 26 | def count_parameters(model): 27 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 28 | -------------------------------------------------------------------------------- /utils/readpfm.py: -------------------------------------------------------------------------------- 1 | import re 2 | import numpy as np 3 | import sys 4 | 5 | 6 | def readPFM(file): 7 | file = open(file, 'rb') 8 | 9 | color = None 10 | width = None 11 | height = None 12 | scale = None 13 | endian = None 14 | 15 | header = file.readline().rstrip() 16 | if header == 'PF': 17 | color = True 18 | elif header == 'Pf': 19 | color = False 20 | else: 21 | raise Exception('Not a PFM file.') 22 | 23 | dim_match = re.match(r'^(\d+)\s(\d+)\s$', file.readline()) 24 | if dim_match: 25 | width, height = map(int, dim_match.groups()) 26 | else: 27 | raise Exception('Malformed PFM header.') 28 | 29 | scale = float(file.readline().rstrip()) 30 | if scale < 0: # little-endian 31 | endian = '<' 32 | scale = -scale 33 | else: 34 | endian = '>' # big-endian 35 | 36 | data = np.fromfile(file, endian + 'f') 37 | shape = (height, width, 3) if color else (height, width) 38 | 39 | data = np.reshape(data, shape) 40 | data = np.flipud(data) 41 | return data, scale 42 | 43 | --------------------------------------------------------------------------------