├── .gitignore
├── Data
    ├── ['DUTS-TR']_statistics.pth
    ├── __init__.py
    ├── dataloader.py
    └── preprocess.py
├── Models
    ├── PCSA.py
    ├── PCSA
    │   ├── .cproject
    │   ├── .project
    │   ├── PCSA_Module
    │   │   ├── .cproject
    │   │   ├── .project
    │   │   ├── CMakeLists.txt
    │   │   ├── cmakebuild
    │   │   │   ├── CMakeCache.txt
    │   │   │   ├── CMakeFiles
    │   │   │   │   ├── 3.10.2
    │   │   │   │   │   ├── CMakeCCompiler.cmake
    │   │   │   │   │   ├── CMakeCXXCompiler.cmake
    │   │   │   │   │   ├── CMakeDetermineCompilerABI_C.bin
    │   │   │   │   │   ├── CMakeDetermineCompilerABI_CXX.bin
    │   │   │   │   │   ├── CMakeSystem.cmake
    │   │   │   │   │   ├── CompilerIdC
    │   │   │   │   │   │   ├── CMakeCCompilerId.c
    │   │   │   │   │   │   └── a.out
    │   │   │   │   │   └── CompilerIdCXX
    │   │   │   │   │   │   ├── CMakeCXXCompilerId.cpp
    │   │   │   │   │   │   └── a.out
    │   │   │   │   ├── CMakeDirectoryInformation.cmake
    │   │   │   │   ├── CMakeError.log
    │   │   │   │   ├── CMakeOutput.log
    │   │   │   │   ├── CMakeRuleHashes.txt
    │   │   │   │   ├── Makefile.cmake
    │   │   │   │   ├── Makefile2
    │   │   │   │   ├── SA.dir
    │   │   │   │   │   ├── CXX.includecache
    │   │   │   │   │   ├── DependInfo.cmake
    │   │   │   │   │   ├── SA_generated_sa.cu.o
    │   │   │   │   │   ├── SA_generated_sa.cu.o.cmake
    │   │   │   │   │   ├── SA_generated_sa.cu.o.cmake.pre-gen
    │   │   │   │   │   ├── SA_generated_sa.cu.o.depend
    │   │   │   │   │   ├── build.make
    │   │   │   │   │   ├── cmake_clean.cmake
    │   │   │   │   │   ├── depend.internal
    │   │   │   │   │   ├── depend.make
    │   │   │   │   │   ├── flags.make
    │   │   │   │   │   ├── link.txt
    │   │   │   │   │   ├── progress.make
    │   │   │   │   │   └── sa_ext.cpp.o
    │   │   │   │   ├── TargetDirectories.txt
    │   │   │   │   ├── cmake.check_cache
    │   │   │   │   ├── feature_tests.bin
    │   │   │   │   ├── feature_tests.c
    │   │   │   │   ├── feature_tests.cxx
    │   │   │   │   └── progress.marks
    │   │   │   ├── Makefile
    │   │   │   ├── SA
    │   │   │   ├── cmake_install.cmake
    │   │   │   ├── detect_cuda_compute_capabilities.cpp
    │   │   │   └── detect_cuda_version.cc
    │   │   ├── reference.cpp
    │   │   ├── reference.h
    │   │   ├── sa.cu
    │   │   ├── sa.cu.bak
    │   │   ├── sa_ext.cpp
    │   │   ├── self_cuda
    │   │   │   └── function.py
    │   │   ├── timer.h
    │   │   └── utils.h
    │   ├── build
    │   │   ├── lib.linux-x86_64-3.6
    │   │   │   └── self_cuda_backend.cpython-36m-x86_64-linux-gnu.so
    │   │   └── temp.linux-x86_64-3.6
    │   │   │   └── SelfAttention_Module
    │   │   │       ├── reference.o
    │   │   │       ├── sa.o
    │   │   │       └── sa_ext.o
    │   ├── dist
    │   │   └── self_cuda-0.0.0-py3.6-linux-x86_64.egg
    │   ├── self_cuda.egg-info
    │   │   ├── PKG-INFO
    │   │   ├── SOURCES.txt
    │   │   ├── dependency_links.txt
    │   │   └── top_level.txt
    │   └── setup.py
    ├── __init__.py
    ├── lightrfb.py
    ├── mobilenetv3_pretrain.py
    ├── mobilenetv3temporal_PCSA.py
    └── statedict
    │   └── mobilenetv3-large.pth
├── README.md
├── build.sh
├── config.py
├── finetune.sh
├── finetune_temporal_distribute.py
├── pretrain.sh
├── pretrain_distribute.py
├── speed.py
└── utils
    ├── Distribute
        └── engine.py
    └── utils.py


/.gitignore:
--------------------------------------------------------------------------------
1 | **/__pycache__
2 | .idea/
3 | 
4 | 


--------------------------------------------------------------------------------
/Data/['DUTS-TR']_statistics.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Data/['DUTS-TR']_statistics.pth


--------------------------------------------------------------------------------
/Data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Data/__init__.py


--------------------------------------------------------------------------------
/Data/dataloader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from torch.utils.data import Dataset,DataLoader
  3 | from Data.preprocess import *
  4 | from PIL import Image
  5 | import torch
  6 | from config import config
  7 | import glob
  8 | ##pretrain_dataset
  9 | class Pretrain(Dataset):
 10 |     def __init__(self,img_dataset_list,video_dataset_list,transform):
 11 |         self.file_list=[]
 12 |         for dataset in img_dataset_list:
 13 |             listfile=os.path.join(config.img_dataset_root,dataset+'.lst')
 14 |             with open(listfile,"r") as f:
 15 |                 file_list=f.readlines()
 16 |             file_list=[filename.strip() for filename in file_list]
 17 |             self.file_list.extend([(os.path.join(config.img_dataset_root,filename.split(' ')[0][1:]),os.path.join(config.img_dataset_root,filename.split(' ')[1][1:])) for filename in file_list])
 18 | 
 19 |         for dataset in video_dataset_list:
 20 |             path=os.path.join(config.video_dataset_root,dataset)
 21 |             video_list=glob.glob(path+"/**/*.jpg",recursive=True)
 22 |             self.file_list.extend([(filepath,filepath.replace("Imgs","ground-truth").replace("jpg","png")) for filepath in video_list])
 23 | 
 24 |         self.img_label_transform = transform
 25 | 
 26 |     def __getitem__(self, idx):
 27 |         img_path, label_path = self.file_list[idx]
 28 |         img = Image.open(img_path).convert('RGB')
 29 |         label = Image.open(label_path).convert('L')
 30 |         img, label = self._process(img, label)
 31 |         return img,label
 32 | 
 33 |     def _process(self, img, label):
 34 |         img, label = self.img_label_transform(img, label)
 35 |         return img, label
 36 | 
 37 |     def __len__(self):
 38 |         return len(self.file_list)
 39 | 
 40 | def get_pretrain_loader():
 41 |     statistics = torch.load(config.data_statistics)
 42 |     trsf_main=Compose_imglabel([
 43 |         Resize(config.size[0],config.size[1]),
 44 |         Random_crop_Resize(15),
 45 |         Random_horizontal_flip(0.5),
 46 |         toTensor(),
 47 |         Normalize(statistics["mean"],statistics["std"])
 48 |     ])
 49 |     trsf_scale1 = Compose_imglabel([
 50 |         Resize(int(config.size[0] * 1.5), int(config.size[1] * 1.5)),
 51 |         # ColorAug(),
 52 |         Random_crop_Resize(50),
 53 |         Random_horizontal_flip(0.5),
 54 |         toTensor(),
 55 |         Normalize(statistics["mean"], statistics["std"])
 56 |     ])
 57 |     trsf_scale2 = Compose_imglabel([
 58 |         Resize(int(config.size[0] * 1.25), int(config.size[1] * 1.25)),
 59 |         Random_crop_Resize(25),
 60 |         Random_horizontal_flip(0.5),
 61 |         toTensor(),
 62 |         Normalize(statistics["mean"], statistics["std"])
 63 |     ])
 64 |     trsf_scale3 = Compose_imglabel([
 65 |         Resize(int(config.size[0] * 0.75), int(config.size[1] * 0.75)),
 66 |         Random_crop_Resize(15),
 67 |         Random_horizontal_flip(0.5),
 68 |         toTensor(),
 69 |         Normalize(statistics["mean"], statistics["std"])
 70 |     ])
 71 | 
 72 |     trsf_scale4 = Compose_imglabel([
 73 |         Resize(int(config.size[0] * 0.5), int(config.size[1] * 0.5)),
 74 |         Random_horizontal_flip(0.5),
 75 |         toTensor(),
 76 |         Normalize(statistics["mean"], statistics["std"])
 77 |     ])
 78 |     train_loader=DataLoader(Pretrain(config.img_dataset_list,config.video_dataset_list,transform=trsf_main),batch_size=config.pretrain_batchsize,shuffle=True,drop_last=False,num_workers=8)
 79 |     multicale_loader=[
 80 |         DataLoader(Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale1),
 81 |                    batch_size=config.pretrain_batchsize, shuffle=True, drop_last=False,num_workers=8),
 82 |         DataLoader(Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale2),
 83 |                    batch_size=config.pretrain_batchsize, shuffle=True, drop_last=False,num_workers=8),
 84 |         DataLoader(Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale3),
 85 |                    batch_size=config.pretrain_batchsize, shuffle=True, drop_last=False,num_workers=8),
 86 |         DataLoader(Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale4),
 87 |                    batch_size=config.pretrain_batchsize, shuffle=True, drop_last=False,num_workers=8)
 88 |     ]
 89 |     return train_loader,multicale_loader,statistics
 90 | 
 91 | def get_pretrain_dataset():
 92 |     statistics = torch.load(config.data_statistics)
 93 |     trsf_main=Compose_imglabel([
 94 |         Resize(config.size[0],config.size[1]),
 95 |         Random_crop_Resize(15),
 96 |         Random_horizontal_flip(0.5),
 97 |         toTensor(),
 98 |         Normalize(statistics["mean"],statistics["std"])
 99 |     ])
100 |     trsf_scale1 = Compose_imglabel([
101 |         Resize(int(config.size[0] * 1.5), int(config.size[1] * 1.5)),
102 |         # ColorAug(),
103 |         Random_crop_Resize(50),
104 |         Random_horizontal_flip(0.5),
105 |         toTensor(),
106 |         Normalize(statistics["mean"], statistics["std"])
107 |     ])
108 |     trsf_scale2 = Compose_imglabel([
109 |         Resize(int(config.size[0] * 1.25), int(config.size[1] * 1.25)),
110 |         Random_crop_Resize(25),
111 |         Random_horizontal_flip(0.5),
112 |         toTensor(),
113 |         Normalize(statistics["mean"], statistics["std"])
114 |     ])
115 |     trsf_scale3 = Compose_imglabel([
116 |         Resize(int(config.size[0] * 0.75), int(config.size[1] * 0.75)),
117 |         Random_crop_Resize(15),
118 |         Random_horizontal_flip(0.5),
119 |         toTensor(),
120 |         Normalize(statistics["mean"], statistics["std"])
121 |     ])
122 | 
123 |     trsf_scale4 = Compose_imglabel([
124 |         Resize(int(config.size[0] * 0.5), int(config.size[1] * 0.5)),
125 |         Random_horizontal_flip(0.5),
126 |         toTensor(),
127 |         Normalize(statistics["mean"], statistics["std"])
128 |     ])
129 |     train_loader=Pretrain(config.img_dataset_list,config.video_dataset_list,transform=trsf_main)
130 |     multicale_loader=[
131 |         Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale1),
132 |         Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale2),
133 |         Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale3),
134 |         Pretrain(config.img_dataset_list, config.video_dataset_list, transform=trsf_scale4)
135 |     ]
136 |     return train_loader,multicale_loader,statistics
137 | 
138 | class VideoDataset(Dataset):
139 |     def __init__(self,video_dataset_list,transform=None,time_interval=1):
140 |         super(VideoDataset, self).__init__()
141 |         self.video_filelist=video_dataset_list
142 |         self.time_clips=config.video_time_clips
143 |         self.video_train_list = []
144 | 
145 |         for video_name in video_dataset_list:
146 |             video_root=os.path.join(config.video_dataset_root,video_name)
147 |             cls_list=os.listdir(video_root)
148 |             self.video_filelist={}
149 |             for cls in cls_list:
150 |                 self.video_filelist[cls]=[]
151 |                 cls_path=os.path.join(video_root,cls)
152 |                 cls_img_path=os.path.join(cls_path,"Imgs")
153 |                 cls_label_path=os.path.join(cls_path,"ground-truth")
154 |                 tmp_list=os.listdir(cls_img_path)
155 |                 tmp_list.sort()
156 |                 for filename in tmp_list:
157 |                     self.video_filelist[cls].append((
158 |                         os.path.join(cls_img_path,filename),
159 |                         os.path.join(cls_label_path,filename.replace(".jpg",".png"))
160 |                     ))
161 |             #emsemble
162 |             for cls in cls_list:
163 |                 li=self.video_filelist[cls]
164 |                 for begin in range(len(li)-(self.time_clips-1)*time_interval):
165 |                     batch_clips=[]
166 |                     for t in range(self.time_clips):
167 |                         batch_clips.append(li[begin+time_interval*t])
168 |                     self.video_train_list.append(batch_clips)
169 |             self.img_label_transform=transform
170 | 
171 |     def __getitem__(self, idx):
172 |         img_label_li = self.video_train_list[idx]
173 |         IMG = None
174 |         LABEL = None
175 |         img_li=[]
176 |         label_li=[]
177 |         for idx, (img_path, label_path) in enumerate(img_label_li):
178 |             img = Image.open(img_path).convert('RGB')
179 |             label = Image.open(label_path).convert('L')
180 |             img_li.append(img)
181 |             label_li.append(label)
182 |         img_li,label_li=self.img_label_transform(img_li,label_li)
183 |         for idx,(img,label) in enumerate(zip(img_li,label_li)):
184 |             if IMG is not None:
185 |                 IMG[idx,:,:,:]=img
186 |                 LABEL[idx,:,:,:]=label
187 |             else:
188 |                 IMG=torch.zeros(len(img_li),*(img.shape))
189 |                 LABEL=torch.zeros(len(img_li),*(label.shape))
190 |                 IMG[idx, :, :, :] = img
191 |                 LABEL[idx, :, :, :] = label
192 |         return IMG, LABEL
193 | 
194 |     def __len__(self):
195 |         return len(self.video_train_list)
196 | 
197 | def get_video_loader():
198 |     statistics = torch.load(config.data_statistics)
199 |     trsf_main = Compose_imglabel([
200 |         Resize_video(config.size[0], config.size[1]),
201 |         Random_crop_Resize_Video(7),
202 |         Random_horizontal_flip_video(0.5),
203 |         toTensor_video(),
204 |         Normalize_video(statistics["mean"], statistics["std"])
205 |     ])
206 | 
207 |     trsf_scale1 = Compose_imglabel([
208 |         Resize_video(int(config.size[0]), int(config.size[1])),
209 |         Random_crop_Resize_Video(22),
210 |         Random_horizontal_flip_video(0.5),
211 |         toTensor_video(),
212 |         Normalize_video(statistics["mean"], statistics["std"])
213 |     ])
214 | 
215 |     train_loader = DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_main, time_interval=1),
216 |                               batch_size=config.video_batchsize, shuffle=True, num_workers=8)
217 |     multiscale_loader = [
218 |         DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=1),
219 |                    batch_size=config.video_batchsize,
220 |                    shuffle=True, num_workers=8),
221 |         DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=2),
222 |                    batch_size=config.video_batchsize,
223 |                    shuffle=True, num_workers=8),
224 |         DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=3),
225 |                    batch_size=config.video_batchsize,
226 |                    shuffle=True, num_workers=8),
227 |         DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=4),
228 |                    batch_size=config.video_batchsize,
229 |                    shuffle=True, num_workers=8),
230 |         DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=5),
231 |                    batch_size=config.video_batchsize,
232 |                    shuffle=True, num_workers=8),
233 |         DataLoader(VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=6),
234 |                    batch_size=config.video_batchsize,
235 |                    shuffle=True, num_workers=8)]
236 |     return train_loader,multiscale_loader,statistics
237 | 
238 | def get_video_dataset():
239 |     statistics = torch.load(config.data_statistics)
240 |     trsf_main = Compose_imglabel([
241 |         Resize_video(config.size[0], config.size[1]),
242 |         Random_crop_Resize_Video(7),
243 |         Random_horizontal_flip_video(0.5),
244 |         toTensor_video(),
245 |         Normalize_video(statistics["mean"], statistics["std"])
246 |     ])
247 | 
248 |     trsf_scale1 = Compose_imglabel([
249 |         Resize_video(int(config.size[0]), int(config.size[1])),
250 |         Random_crop_Resize_Video(22),
251 |         Random_horizontal_flip_video(0.5),
252 |         toTensor_video(),
253 |         Normalize_video(statistics["mean"], statistics["std"])
254 |     ])
255 | 
256 |     train_loader = VideoDataset(config.video_dataset_list, transform=trsf_main, time_interval=1)
257 |     multiscale_loader = [
258 |         VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=1),
259 |         VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=2),
260 |         VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=3),
261 |         VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=4),
262 |         VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=5),
263 |         VideoDataset(config.video_dataset_list, transform=trsf_scale1, time_interval=6)]
264 |     return train_loader,multiscale_loader,statistics
265 | 
266 | if __name__=="__main__":
267 |    pass


--------------------------------------------------------------------------------
/Data/preprocess.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import random
  3 | from torchvision.transforms import Normalize as torchnorm
  4 | from torchvision.transforms import ToTensor as torchtotensor
  5 | class Compose_imglabel(object):
  6 |     def __init__(self, transforms):
  7 |         self.transforms = transforms
  8 | 
  9 |     def __call__(self, img,label):
 10 |         for t in self.transforms:
 11 |             img,label = t(img,label)
 12 |         return img,label
 13 | 
 14 | class Random_vertical_flip(object):
 15 |     def _vertical_flip(self,img,label):
 16 |         return img.transpose(Image.FLIP_TOP_BOTTOM),label.transpose(Image.FLIP_TOP_BOTTOM)
 17 |     def __init__(self,prob):
 18 |         '''
 19 |         :param prob: should be (0,1)
 20 |         '''
 21 |         assert prob>=0 and prob<=1,"prob should be [0,1]"
 22 |         self.prob=prob
 23 |     def __call__(self, img,label):
 24 |         '''
 25 |         flip img and label simultaneously
 26 |         :param img:should be PIL image
 27 |         :param label:should be PIL image
 28 |         :return:
 29 |         '''
 30 |         assert isinstance(img, Image.Image),"should be PIL image"
 31 |         assert isinstance(label, Image.Image),"should be PIL image"
 32 |         if random.random()<self.prob:
 33 |             return self._vertical_flip(img,label)
 34 |         return img,label
 35 | 
 36 | class Random_horizontal_flip(object):
 37 |     def _horizontal_flip(self,img,label):
 38 |         #dsa
 39 |         return img.transpose(Image.FLIP_LEFT_RIGHT),label.transpose(Image.FLIP_LEFT_RIGHT)
 40 | 
 41 |     def __init__(self,prob):
 42 |         '''
 43 |         :param prob: should be (0,1)
 44 |         '''
 45 |         assert prob>=0 and prob<=1,"prob should be [0,1]"
 46 |         self.prob=prob
 47 | 
 48 |     def __call__(self, img,label):
 49 |         '''
 50 |         flip img and label simultaneously
 51 |         :param img:should be PIL image
 52 |         :param label:should be PIL image
 53 |         :return:
 54 |         '''
 55 |         assert isinstance(img, Image.Image),"should be PIL image"
 56 |         assert isinstance(label, Image.Image),"should be PIL image"
 57 |         if random.random()<self.prob:
 58 |             return self._horizontal_flip(img,label)
 59 |         else:
 60 |             return img,label
 61 | 
 62 | class Random_rotation(object):
 63 |     def _randomRotation(self,image,label):
 64 |         """
 65 |          对图像进行随机任意角度(0~360度)旋转
 66 |         :param mode 邻近插值,双线性插值,双三次B样条插值(default)
 67 |         :param image PIL的图像image
 68 |         :return: 旋转转之后的图像
 69 |         """
 70 |         return image.rotate(self.angle),label.rotate(self.angle)
 71 |     def __init__(self):
 72 |         self.angle=random.choice([0,90,180,270])
 73 | 
 74 |     def __call__(self, img,label):
 75 |         return self._randomRotation(img,label)
 76 | 
 77 | class Random_crop_Resize(object):
 78 |     def _randomCrop(self,img,label):
 79 |         width, height = img.size
 80 |         x, y = random.randint(0, self.crop_size), random.randint(0, self.crop_size)
 81 |         region = [x, y, width-x, height-y]
 82 |         img,label=img.crop(region),label.crop(region)
 83 |         img=img.resize((width,height),Image.BILINEAR)
 84 |         label=label.resize((width,height),Image.NEAREST)
 85 |         return img,label
 86 | 
 87 |     def __init__(self,crop_size):
 88 |         self.crop_size=crop_size
 89 | 
 90 |     def __call__(self,img,label):
 91 |         assert img.size==label.size,"img should have the same shape as label"
 92 |         return self._randomCrop(img,label)
 93 | 
 94 | class Resize(object):
 95 |     def __init__(self,height,width):
 96 |         self.height=height
 97 |         self.width=width
 98 | 
 99 |     def __call__(self,img,label):
100 |         img=img.resize((self.width,self.height),Image.BILINEAR)
101 |         label=label.resize((self.width,self.height),Image.NEAREST)
102 |         return img,label
103 | 
104 | class Normalize(object):
105 |     def __init__(self,mean,std):
106 |         self.mean,self.std=mean,std
107 | 
108 |     def __call__(self, img,label):
109 |         for i in range(3):
110 |             img[:,:,i] -= float(self.mean[i])
111 |         for i in range(3):
112 |             img[:,:, i] /= float(self.std[i])
113 |         return img,label
114 | 
115 | class toTensor(object):
116 |     def __init__(self):
117 |         self.totensor=torchtotensor()
118 |     def __call__(self, img,label):
119 |         img,label=self.totensor(img),self.totensor(label).long()
120 |         return img,label
121 | 
122 | #####################################################video################################
123 | class Random_crop_Resize_Video(object):
124 |     def _randomCrop(self,img,label,x,y):
125 |         width, height = img.size
126 |         region = [x, y, width-x, height-y]
127 |         img,label=img.crop(region),label.crop(region)
128 |         img=img.resize((width,height),Image.BILINEAR)
129 |         label=label.resize((width,height),Image.NEAREST)
130 |         return img,label
131 | 
132 |     def __init__(self,crop_size):
133 |         self.crop_size=crop_size
134 | 
135 |     def __call__(self,imgs,labels):
136 |         res_img = []
137 |         res_label = []
138 |         x, y = random.randint(0, self.crop_size), random.randint(0, self.crop_size)
139 |         for img, label in zip(imgs, labels):
140 |             img, label = self._randomCrop(img, label, x, y)
141 |             res_img.append(img)
142 |             res_label.append(label)
143 |         return res_img, res_label
144 | 
145 | 
146 | class Random_horizontal_flip_video(object):
147 |     def _horizontal_flip(self,img,label):
148 |         #dsa
149 |         return img.transpose(Image.FLIP_LEFT_RIGHT),label.transpose(Image.FLIP_LEFT_RIGHT)
150 | 
151 |     def __init__(self,prob):
152 |         '''
153 |         :param prob: should be (0,1)
154 |         '''
155 |         assert prob>=0 and prob<=1,"prob should be [0,1]"
156 |         self.prob=prob
157 | 
158 |     def __call__(self, imgs,labels):
159 |         '''
160 |         flip img and label simultaneously
161 |         :param img:should be PIL image
162 |         :param label:should be PIL image
163 |         :return:
164 |         '''
165 |         if random.random()<self.prob:
166 |             res_img = []
167 |             res_label = []
168 |             for img, label in zip(imgs, labels):
169 |                 img, label = self._horizontal_flip(img, label)
170 |                 res_img.append(img)
171 |                 res_label.append(label)
172 |             return res_img, res_label
173 |         else:
174 |             return imgs,labels
175 | 
176 | class Resize_video(object):
177 |     def __init__(self,height,width):
178 |         self.height=height
179 |         self.width=width
180 | 
181 |     def __call__(self,imgs,labels):
182 |         res_img=[]
183 |         res_label=[]
184 |         for img,label in zip(imgs,labels):
185 |             res_img.append(img.resize((self.width,self.height),Image.BILINEAR))
186 |             res_label.append(label.resize((self.width,self.height),Image.NEAREST))
187 |         return res_img,res_label
188 | 
189 | class Normalize_video(object):
190 |     def __init__(self,mean,std):
191 |         self.mean,self.std=mean,std
192 | 
193 |     def __call__(self, imgs,labels):
194 |         res_img=[]
195 |         for img in imgs:
196 |             for i in range(3):
197 |                 img[:,:,i] -= float(self.mean[i])
198 |             for i in range(3):
199 |                 img[:,:, i] /= float(self.std[i])
200 |             res_img.append(img)
201 |         return res_img,labels
202 | 
203 | class toTensor_video(object):
204 |     def __init__(self):
205 |         self.totensor=torchtotensor()
206 |     def __call__(self, imgs,labels):
207 |         res_img=[]
208 |         res_label=[]
209 |         for img,label in zip(imgs,labels):
210 |             img,label=self.totensor(img),self.totensor(label).long()
211 |             res_img.append(img)
212 |             res_label.append(label)
213 |         return res_img,res_label
214 | 
215 | class RandomRotateVideo(object):
216 |     def __init__(self):
217 |         self.angle=random.choice([0,90,180,270])
218 |     def __call__(self, imgs,labels):
219 |         res_img=[]
220 |         res_label=[]
221 |         for img,label in zip(imgs,labels):
222 |             img,label=img.rotate(self.angle),label.rotate(self.angle)
223 |             res_img.append(img)
224 |             res_label.append(label)
225 |         return res_img,res_label
226 | 
227 | 
228 | 
229 | 
230 | 
231 | 
232 | if __name__=="__main__":
233 | 
234 |     img=Image.open("/home/guyuchao/ssd/PycharmProjects/videofastsal/checkpoints/tensorboard/store/baseline/epoch_1_batch_0/DAVIS/blackswan/pred/blackswan_00000.png")
235 |     print(img.size)


--------------------------------------------------------------------------------
/Models/PCSA.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import self_cuda_backend as _ext
  3 | import torch.nn as nn
  4 | from math import sqrt
  5 | import torch.autograd as autograd
  6 | import torch.nn.functional as F
  7 | from torch.autograd.function import once_differentiable
  8 | import numpy as np
  9 | 
 10 | def _check_contiguous(*args):
 11 |     if not all([mod is None or mod.is_contiguous() for mod in args]):
 12 |         raise ValueError("Non-contiguous input")
 13 | 
 14 | class PCSA_Weight(autograd.Function):
 15 |     @staticmethod
 16 |     def forward(ctx, query, key, radius=1,dilation=1):
 17 |         # Save context
 18 |         ctx.radius=radius
 19 |         ctx.dilation=dilation
 20 | 
 21 |         b, t, c, h, w = query.shape
 22 |         local_size=2*radius+1
 23 |         size = (b, t, local_size*local_size*t, h, w)
 24 |         weight = torch.zeros(size, dtype=query.dtype, layout=query.layout, device=query.device)
 25 |         weight.fill_(-np.inf)
 26 |         _ext.weight_forward(query, key, weight,radius,dilation)
 27 |         # Output
 28 |         ctx.save_for_backward(query, key)
 29 | 
 30 |         return weight
 31 | 
 32 |     @staticmethod
 33 |     @once_differentiable
 34 |     def backward(ctx, dw):
 35 |         query, key= ctx.saved_tensors
 36 |         dquery = torch.zeros_like(query)
 37 |         dkey = torch.zeros_like(key)
 38 |         _ext.weight_backward(dw.contiguous(), query, key, dquery, dkey, ctx.radius,ctx.dilation)
 39 |         _check_contiguous(dquery, dkey)
 40 |         return dquery, dkey,None,None
 41 | 
 42 | 
 43 | class PCSA_Map(autograd.Function):
 44 |     @staticmethod
 45 |     def forward(ctx, weight, proj, radius=1,dilation=1):
 46 |         # Save context
 47 |         ctx.radius=radius
 48 |         ctx.dilation=dilation
 49 |         out = torch.zeros_like(proj)
 50 |         _ext.map_forward(weight, proj, out,radius,dilation)
 51 |         # Output
 52 |         ctx.save_for_backward(weight, proj)
 53 | 
 54 |         return out
 55 | 
 56 |     @staticmethod
 57 |     @once_differentiable
 58 |     def backward(ctx, dout):
 59 |         weight, proj= ctx.saved_tensors
 60 |         dweight = torch.zeros_like(weight)
 61 |         dproj = torch.zeros_like(proj)
 62 |         _ext.map_backward(dout.contiguous(), weight, proj, dweight, dproj, ctx.radius,ctx.dilation)
 63 |         _check_contiguous(dweight, dproj)
 64 |         return dweight, dproj,None,None
 65 | 
 66 | pcsa_weight = PCSA_Weight.apply
 67 | pcsa_map = PCSA_Map.apply
 68 | 
 69 | class T_Moduel(nn.Module):
 70 |     def __init__(self,channels_in=32,n_head=4,d_k=8,d_v=8):
 71 |         super(T_Moduel, self).__init__()
 72 |         self.n_head=n_head
 73 |         self.d_k=d_k
 74 |         self.query_conv=nn.Conv3d(channels_in,n_head*d_k,1,bias=False)
 75 |         self.key_conv=nn.Conv3d(channels_in,n_head*d_k,1,bias=False)
 76 |         self.value_conv=nn.Conv3d(channels_in,n_head*d_v,1,bias=False)
 77 |         self.output_Linear=nn.Conv3d(channels_in,channels_in,1,bias=False)
 78 | 
 79 |     def forward(self, x):
 80 |         dilation=[1,2,1,2]
 81 |         radius=[3,3,4,4]
 82 |         x_ = x.permute(0, 2, 1, 3, 4).contiguous()  # b c t h w
 83 |         query=self.query_conv(x_).permute(0,2,1,3,4)
 84 |         query_chunk=query.chunk(self.n_head,2)
 85 |         key=self.key_conv(x_).permute(0,2,1,3,4)
 86 |         key_chunk=key.chunk(self.n_head,2)
 87 |         value=self.value_conv(x_).permute(0,2,1,3,4)
 88 |         value_chunk=value.chunk(self.n_head,2)
 89 |         out=[]
 90 |         for i in range(self.n_head):
 91 |             tmp_query=query_chunk[i].contiguous()
 92 |             tmp_key=key_chunk[i].contiguous()
 93 |             tmp_value=value_chunk[i].contiguous()
 94 |             energy = pcsa_weight(tmp_query, tmp_key,radius[i],dilation[i])
 95 |             attention=energy/sqrt(8)
 96 |             attention = F.softmax(attention, 2)
 97 |             out.append(pcsa_map(attention,tmp_value,radius[i],dilation[i]))
 98 |         out=torch.cat(out,dim=2).permute(0, 2, 1, 3, 4)
 99 |         out=self.output_Linear(out)
100 |         out=out.permute(0, 2, 1, 3, 4)
101 |         return out+x
102 | 
103 | 
104 | __all__ = ["T_Moduel", "sa_weight", "sa_map"]
105 | 
106 | 
107 | if __name__=="__main__":
108 |     x=torch.zeros(2,5,32,28,42).cuda()
109 |     t_module=T_Moduel().cuda()
110 |     print(t_module(x).shape)
111 | 


--------------------------------------------------------------------------------
/Models/PCSA/.cproject:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
 3 | 	<storageModule moduleId="org.eclipse.cdt.core.settings">
 4 | 		<cconfiguration id="com.nvidia.cuda.ide.toolchain.nine_zero.879351549">
 5 | 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.toolchain.nine_zero.879351549" moduleId="org.eclipse.cdt.core.settings" name="Default">
 6 | 				<externalSettings/>
 7 | 				<extensions>
 8 | 					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
 9 | 					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
10 | 					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
11 | 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
12 | 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
13 | 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
14 | 					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
15 | 					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
16 | 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
17 | 				</extensions>
18 | 			</storageModule>
19 | 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
20 | 				<configuration buildProperties="" id="com.nvidia.cuda.ide.toolchain.nine_zero.879351549" name="Default" parent="org.eclipse.cdt.build.core.emptycfg">
21 | 					<folderInfo id="com.nvidia.cuda.ide.toolchain.nine_zero.879351549.427039774" name="/" resourcePath="">
22 | 						<toolChain id="com.nvidia.cuda.ide.toolchain.nine_zero.180859982" name="CUDA Toolkit 10.0" superClass="com.nvidia.cuda.ide.toolchain.nine_zero">
23 | 							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.1401544514" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
24 | 							<builder id="com.nvidia.cuda.ide.builder.395035082" managedBuildOn="false" name="CUDA Toolkit 10.0 Builder.Default" superClass="com.nvidia.cuda.ide.builder"/>
25 | 							<tool id="nvcc.compiler.base.804780230" name="NVCC Compiler" superClass="nvcc.compiler.base">
26 | 								<option id="nvcc.compiler.pic.1264182730" superClass="nvcc.compiler.pic"/>
27 | 							</tool>
28 | 							<tool id="nvcc.linker.base.1355413137" name="NVCC Linker" superClass="nvcc.linker.base"/>
29 | 							<tool id="nvcc.archiver.base.1751356831" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
30 | 							<tool id="com.nvidia.host.assembler.1203536440" name="Host Assembler" superClass="com.nvidia.host.assembler"/>
31 | 						</toolChain>
32 | 					</folderInfo>
33 | 				</configuration>
34 | 			</storageModule>
35 | 			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration"/>
36 | 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
37 | 		</cconfiguration>
38 | 	</storageModule>
39 | 	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
40 | 		<project id="plocaltnonlocal.null.2015308892" name="plocaltnonlocal"/>
41 | 	</storageModule>
42 | 	<storageModule moduleId="scannerConfiguration">
43 | 		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
44 | 	</storageModule>
45 | 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
46 | </cproject>
47 | 


--------------------------------------------------------------------------------
/Models/PCSA/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>plocaltnonlocal</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
10 | 			<triggers>clean,full,incremental,</triggers>
11 | 			<arguments>
12 | 			</arguments>
13 | 		</buildCommand>
14 | 		<buildCommand>
15 | 			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
16 | 			<triggers>full,incremental,</triggers>
17 | 			<arguments>
18 | 			</arguments>
19 | 		</buildCommand>
20 | 	</buildSpec>
21 | 	<natures>
22 | 		<nature>org.eclipse.cdt.core.cnature</nature>
23 | 		<nature>org.eclipse.cdt.core.ccnature</nature>
24 | 		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
25 | 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
26 | 	</natures>
27 | </projectDescription>
28 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/.cproject:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 2 | <?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
 3 | 	<storageModule moduleId="org.eclipse.cdt.core.settings">
 4 | 		<cconfiguration id="com.nvidia.cuda.ide.toolchain.nine_zero.729184847">
 5 | 			<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="com.nvidia.cuda.ide.toolchain.nine_zero.729184847" moduleId="org.eclipse.cdt.core.settings" name="Default">
 6 | 				<externalSettings/>
 7 | 				<extensions>
 8 | 					<extension id="com.nvidia.cuda.ide.elf" point="org.eclipse.cdt.core.BinaryParser"/>
 9 | 					<extension id="com.nvidia.cuda.ide.cubin" point="org.eclipse.cdt.core.BinaryParser"/>
10 | 					<extension id="com.nvidia.cuda.ide.macho" point="org.eclipse.cdt.core.BinaryParser"/>
11 | 					<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
12 | 					<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
13 | 					<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
14 | 					<extension id="org.eclipse.cdt.core.VCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
15 | 					<extension id="nvcc.errorParser" point="org.eclipse.cdt.core.ErrorParser"/>
16 | 					<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
17 | 				</extensions>
18 | 			</storageModule>
19 | 			<storageModule moduleId="cdtBuildSystem" version="4.0.0">
20 | 				<configuration buildProperties="" id="com.nvidia.cuda.ide.toolchain.nine_zero.729184847" name="Default" parent="org.eclipse.cdt.build.core.emptycfg">
21 | 					<folderInfo id="com.nvidia.cuda.ide.toolchain.nine_zero.729184847.255826473" name="/" resourcePath="">
22 | 						<toolChain id="com.nvidia.cuda.ide.toolchain.nine_zero.196166465" name="CUDA Toolkit 10.0" superClass="com.nvidia.cuda.ide.toolchain.nine_zero">
23 | 							<targetPlatform archList="all" binaryParser="com.nvidia.cuda.ide.elf;com.nvidia.cuda.ide.macho;com.nvidia.cuda.ide.cubin" id="com.nvidia.cuda.ide.targetPlatform.1833953608" isAbstract="false" name="Debug Platform" osList="linux,macosx" superClass="com.nvidia.cuda.ide.targetPlatform"/>
24 | 							<builder id="com.nvidia.cuda.ide.builder.566199906" managedBuildOn="false" name="CUDA Toolkit 10.0 Builder.Default" superClass="com.nvidia.cuda.ide.builder"/>
25 | 							<tool id="nvcc.compiler.base.34474648" name="NVCC Compiler" superClass="nvcc.compiler.base">
26 | 								<option id="nvcc.compiler.pic.1876151915" superClass="nvcc.compiler.pic"/>
27 | 							</tool>
28 | 							<tool id="nvcc.linker.base.2049677275" name="NVCC Linker" superClass="nvcc.linker.base"/>
29 | 							<tool id="nvcc.archiver.base.1326171819" name="NVCC Archiver" superClass="nvcc.archiver.base"/>
30 | 							<tool id="com.nvidia.host.assembler.1389933142" name="Host Assembler" superClass="com.nvidia.host.assembler"/>
31 | 						</toolChain>
32 | 					</folderInfo>
33 | 				</configuration>
34 | 			</storageModule>
35 | 			<storageModule moduleId="com.nvidia.cuda.ide.build.project.ICudaProjectConfiguration"/>
36 | 			<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
37 | 		</cconfiguration>
38 | 	</storageModule>
39 | 	<storageModule moduleId="scannerConfiguration">
40 | 		<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
41 | 	</storageModule>
42 | 	<storageModule moduleId="cdtBuildSystem" version="4.0.0">
43 | 		<project id="SelfAttention_Module.null.351813098" name="SelfAttention_Module"/>
44 | 	</storageModule>
45 | 	<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
46 | </cproject>
47 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/.project:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <projectDescription>
 3 | 	<name>selfatt</name>
 4 | 	<comment></comment>
 5 | 	<projects>
 6 | 	</projects>
 7 | 	<buildSpec>
 8 | 		<buildCommand>
 9 | 			<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
10 | 			<triggers>clean,full,incremental,</triggers>
11 | 			<arguments>
12 | 			</arguments>
13 | 		</buildCommand>
14 | 		<buildCommand>
15 | 			<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
16 | 			<triggers>full,incremental,</triggers>
17 | 			<arguments>
18 | 			</arguments>
19 | 		</buildCommand>
20 | 	</buildSpec>
21 | 	<natures>
22 | 		<nature>org.eclipse.cdt.core.cnature</nature>
23 | 		<nature>org.eclipse.cdt.core.ccnature</nature>
24 | 		<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
25 | 		<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
26 | 	</natures>
27 | </projectDescription>
28 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | ############################################################################
 2 | # <date>    2019-08-05  </date>
 3 | # <author>  Guyuchao </author>
 4 | ############################################################################
 5 | cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
 6 | project(SA)
 7 | 
 8 | set(Torch_DIR /home/guyuchao/software/libtorch/share/cmake/Torch)
 9 | 
10 | find_package(Torch REQUIRED)
11 | find_package(CUDA REQUIRED)
12 | 
13 | set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
14 | set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
15 | 
16 | if(CUDA_FOUND)
17 |   # add -Wextra compiler flag for gcc compilations
18 |   if (UNIX)
19 |     set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0")
20 |   endif (UNIX)
21 | 
22 |   # add debugging to CUDA NVCC flags.  For NVidia's NSight tools.
23 |   set(CUDA_NVCC_FLAGS_DEBUG ${CUDA_NVCC_FLAGS_DEBUG} "-G")
24 | 
25 |   file( GLOB  cu  *.cu)
26 |   file( GLOB  hdr *.hpp *.h )
27 |   SET (CPP_FILES sa_ext.cpp)
28 |   CUDA_ADD_EXECUTABLE(SA ${CPP_FILES} ${cu} ${hdr})
29 |   target_link_libraries(SA "${TORCH_LIBRARIES}")
30 |   set_property(TARGET SA PROPERTY CXX_STANDARD 11)
31 | else(CUDA_FOUND)
32 |   message("CUDA is not found!")
33 | endif()
34 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeCCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_C_COMPILER "/usr/bin/cc")
 2 | set(CMAKE_C_COMPILER_ARG1 "")
 3 | set(CMAKE_C_COMPILER_ID "GNU")
 4 | set(CMAKE_C_COMPILER_VERSION "7.4.0")
 5 | set(CMAKE_C_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_C_COMPILER_WRAPPER "")
 7 | set(CMAKE_C_STANDARD_COMPUTED_DEFAULT "11")
 8 | set(CMAKE_C_COMPILE_FEATURES "c_std_90;c_function_prototypes;c_std_99;c_restrict;c_variadic_macros;c_std_11;c_static_assert")
 9 | set(CMAKE_C90_COMPILE_FEATURES "c_std_90;c_function_prototypes")
10 | set(CMAKE_C99_COMPILE_FEATURES "c_std_99;c_restrict;c_variadic_macros")
11 | set(CMAKE_C11_COMPILE_FEATURES "c_std_11;c_static_assert")
12 | 
13 | set(CMAKE_C_PLATFORM_ID "Linux")
14 | set(CMAKE_C_SIMULATE_ID "")
15 | set(CMAKE_C_SIMULATE_VERSION "")
16 | 
17 | 
18 | 
19 | set(CMAKE_AR "/usr/bin/ar")
20 | set(CMAKE_C_COMPILER_AR "/usr/bin/gcc-ar-7")
21 | set(CMAKE_RANLIB "/usr/bin/ranlib")
22 | set(CMAKE_C_COMPILER_RANLIB "/usr/bin/gcc-ranlib-7")
23 | set(CMAKE_LINKER "/usr/bin/ld")
24 | set(CMAKE_COMPILER_IS_GNUCC 1)
25 | set(CMAKE_C_COMPILER_LOADED 1)
26 | set(CMAKE_C_COMPILER_WORKS TRUE)
27 | set(CMAKE_C_ABI_COMPILED TRUE)
28 | set(CMAKE_COMPILER_IS_MINGW )
29 | set(CMAKE_COMPILER_IS_CYGWIN )
30 | if(CMAKE_COMPILER_IS_CYGWIN)
31 |   set(CYGWIN 1)
32 |   set(UNIX 1)
33 | endif()
34 | 
35 | set(CMAKE_C_COMPILER_ENV_VAR "CC")
36 | 
37 | if(CMAKE_COMPILER_IS_MINGW)
38 |   set(MINGW 1)
39 | endif()
40 | set(CMAKE_C_COMPILER_ID_RUN 1)
41 | set(CMAKE_C_SOURCE_FILE_EXTENSIONS c;m)
42 | set(CMAKE_C_IGNORE_EXTENSIONS h;H;o;O;obj;OBJ;def;DEF;rc;RC)
43 | set(CMAKE_C_LINKER_PREFERENCE 10)
44 | 
45 | # Save compiler ABI information.
46 | set(CMAKE_C_SIZEOF_DATA_PTR "8")
47 | set(CMAKE_C_COMPILER_ABI "ELF")
48 | set(CMAKE_C_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
49 | 
50 | if(CMAKE_C_SIZEOF_DATA_PTR)
51 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_C_SIZEOF_DATA_PTR}")
52 | endif()
53 | 
54 | if(CMAKE_C_COMPILER_ABI)
55 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_C_COMPILER_ABI}")
56 | endif()
57 | 
58 | if(CMAKE_C_LIBRARY_ARCHITECTURE)
59 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
60 | endif()
61 | 
62 | set(CMAKE_C_CL_SHOWINCLUDES_PREFIX "")
63 | if(CMAKE_C_CL_SHOWINCLUDES_PREFIX)
64 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_C_CL_SHOWINCLUDES_PREFIX}")
65 | endif()
66 | 
67 | 
68 | 
69 | 
70 | 
71 | set(CMAKE_C_IMPLICIT_LINK_LIBRARIES "gcc;gcc_s;c;gcc;gcc_s")
72 | set(CMAKE_C_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/7;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
73 | set(CMAKE_C_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
74 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeCXXCompiler.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_CXX_COMPILER "/usr/bin/c++")
 2 | set(CMAKE_CXX_COMPILER_ARG1 "")
 3 | set(CMAKE_CXX_COMPILER_ID "GNU")
 4 | set(CMAKE_CXX_COMPILER_VERSION "7.4.0")
 5 | set(CMAKE_CXX_COMPILER_VERSION_INTERNAL "")
 6 | set(CMAKE_CXX_COMPILER_WRAPPER "")
 7 | set(CMAKE_CXX_STANDARD_COMPUTED_DEFAULT "14")
 8 | set(CMAKE_CXX_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters;cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates;cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates;cxx_std_17")
 9 | set(CMAKE_CXX98_COMPILE_FEATURES "cxx_std_98;cxx_template_template_parameters")
10 | set(CMAKE_CXX11_COMPILE_FEATURES "cxx_std_11;cxx_alias_templates;cxx_alignas;cxx_alignof;cxx_attributes;cxx_auto_type;cxx_constexpr;cxx_decltype;cxx_decltype_incomplete_return_types;cxx_default_function_template_args;cxx_defaulted_functions;cxx_defaulted_move_initializers;cxx_delegating_constructors;cxx_deleted_functions;cxx_enum_forward_declarations;cxx_explicit_conversions;cxx_extended_friend_declarations;cxx_extern_templates;cxx_final;cxx_func_identifier;cxx_generalized_initializers;cxx_inheriting_constructors;cxx_inline_namespaces;cxx_lambdas;cxx_local_type_template_args;cxx_long_long_type;cxx_noexcept;cxx_nonstatic_member_init;cxx_nullptr;cxx_override;cxx_range_for;cxx_raw_string_literals;cxx_reference_qualified_functions;cxx_right_angle_brackets;cxx_rvalue_references;cxx_sizeof_member;cxx_static_assert;cxx_strong_enums;cxx_thread_local;cxx_trailing_return_types;cxx_unicode_literals;cxx_uniform_initialization;cxx_unrestricted_unions;cxx_user_literals;cxx_variadic_macros;cxx_variadic_templates")
11 | set(CMAKE_CXX14_COMPILE_FEATURES "cxx_std_14;cxx_aggregate_default_initializers;cxx_attribute_deprecated;cxx_binary_literals;cxx_contextual_conversions;cxx_decltype_auto;cxx_digit_separators;cxx_generic_lambdas;cxx_lambda_init_captures;cxx_relaxed_constexpr;cxx_return_type_deduction;cxx_variable_templates")
12 | set(CMAKE_CXX17_COMPILE_FEATURES "cxx_std_17")
13 | 
14 | set(CMAKE_CXX_PLATFORM_ID "Linux")
15 | set(CMAKE_CXX_SIMULATE_ID "")
16 | set(CMAKE_CXX_SIMULATE_VERSION "")
17 | 
18 | 
19 | 
20 | set(CMAKE_AR "/usr/bin/ar")
21 | set(CMAKE_CXX_COMPILER_AR "/usr/bin/gcc-ar-7")
22 | set(CMAKE_RANLIB "/usr/bin/ranlib")
23 | set(CMAKE_CXX_COMPILER_RANLIB "/usr/bin/gcc-ranlib-7")
24 | set(CMAKE_LINKER "/usr/bin/ld")
25 | set(CMAKE_COMPILER_IS_GNUCXX 1)
26 | set(CMAKE_CXX_COMPILER_LOADED 1)
27 | set(CMAKE_CXX_COMPILER_WORKS TRUE)
28 | set(CMAKE_CXX_ABI_COMPILED TRUE)
29 | set(CMAKE_COMPILER_IS_MINGW )
30 | set(CMAKE_COMPILER_IS_CYGWIN )
31 | if(CMAKE_COMPILER_IS_CYGWIN)
32 |   set(CYGWIN 1)
33 |   set(UNIX 1)
34 | endif()
35 | 
36 | set(CMAKE_CXX_COMPILER_ENV_VAR "CXX")
37 | 
38 | if(CMAKE_COMPILER_IS_MINGW)
39 |   set(MINGW 1)
40 | endif()
41 | set(CMAKE_CXX_COMPILER_ID_RUN 1)
42 | set(CMAKE_CXX_IGNORE_EXTENSIONS inl;h;hpp;HPP;H;o;O;obj;OBJ;def;DEF;rc;RC)
43 | set(CMAKE_CXX_SOURCE_FILE_EXTENSIONS C;M;c++;cc;cpp;cxx;mm;CPP)
44 | set(CMAKE_CXX_LINKER_PREFERENCE 30)
45 | set(CMAKE_CXX_LINKER_PREFERENCE_PROPAGATES 1)
46 | 
47 | # Save compiler ABI information.
48 | set(CMAKE_CXX_SIZEOF_DATA_PTR "8")
49 | set(CMAKE_CXX_COMPILER_ABI "ELF")
50 | set(CMAKE_CXX_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
51 | 
52 | if(CMAKE_CXX_SIZEOF_DATA_PTR)
53 |   set(CMAKE_SIZEOF_VOID_P "${CMAKE_CXX_SIZEOF_DATA_PTR}")
54 | endif()
55 | 
56 | if(CMAKE_CXX_COMPILER_ABI)
57 |   set(CMAKE_INTERNAL_PLATFORM_ABI "${CMAKE_CXX_COMPILER_ABI}")
58 | endif()
59 | 
60 | if(CMAKE_CXX_LIBRARY_ARCHITECTURE)
61 |   set(CMAKE_LIBRARY_ARCHITECTURE "x86_64-linux-gnu")
62 | endif()
63 | 
64 | set(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX "")
65 | if(CMAKE_CXX_CL_SHOWINCLUDES_PREFIX)
66 |   set(CMAKE_CL_SHOWINCLUDES_PREFIX "${CMAKE_CXX_CL_SHOWINCLUDES_PREFIX}")
67 | endif()
68 | 
69 | 
70 | 
71 | 
72 | 
73 | set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "stdc++;m;gcc_s;gcc;c;gcc_s;gcc")
74 | set(CMAKE_CXX_IMPLICIT_LINK_DIRECTORIES "/usr/lib/gcc/x86_64-linux-gnu/7;/usr/lib/x86_64-linux-gnu;/usr/lib;/lib/x86_64-linux-gnu;/lib")
75 | set(CMAKE_CXX_IMPLICIT_LINK_FRAMEWORK_DIRECTORIES "")
76 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeDetermineCompilerABI_C.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeDetermineCompilerABI_C.bin


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeDetermineCompilerABI_CXX.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeDetermineCompilerABI_CXX.bin


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CMakeSystem.cmake:
--------------------------------------------------------------------------------
 1 | set(CMAKE_HOST_SYSTEM "Linux-5.0.0-23-generic")
 2 | set(CMAKE_HOST_SYSTEM_NAME "Linux")
 3 | set(CMAKE_HOST_SYSTEM_VERSION "5.0.0-23-generic")
 4 | set(CMAKE_HOST_SYSTEM_PROCESSOR "x86_64")
 5 | 
 6 | 
 7 | 
 8 | set(CMAKE_SYSTEM "Linux-5.0.0-23-generic")
 9 | set(CMAKE_SYSTEM_NAME "Linux")
10 | set(CMAKE_SYSTEM_VERSION "5.0.0-23-generic")
11 | set(CMAKE_SYSTEM_PROCESSOR "x86_64")
12 | 
13 | set(CMAKE_CROSSCOMPILING "FALSE")
14 | 
15 | set(CMAKE_SYSTEM_LOADED 1)
16 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CompilerIdC/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CompilerIdC/a.out


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CompilerIdCXX/a.out:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/3.10.2/CompilerIdCXX/a.out


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/CMakeDirectoryInformation.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10
 3 | 
 4 | # Relative path conversion top directories.
 5 | set(CMAKE_RELATIVE_PATH_TOP_SOURCE "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module")
 6 | set(CMAKE_RELATIVE_PATH_TOP_BINARY "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild")
 7 | 
 8 | # Force unix paths in dependencies.
 9 | set(CMAKE_FORCE_UNIX_PATHS 1)
10 | 
11 | 
12 | # The C and CXX include file regular expressions for this directory.
13 | set(CMAKE_C_INCLUDE_REGEX_SCAN "^.*$")
14 | set(CMAKE_C_INCLUDE_REGEX_COMPLAIN "^$")
15 | set(CMAKE_CXX_INCLUDE_REGEX_SCAN ${CMAKE_C_INCLUDE_REGEX_SCAN})
16 | set(CMAKE_CXX_INCLUDE_REGEX_COMPLAIN ${CMAKE_C_INCLUDE_REGEX_COMPLAIN})
17 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/CMakeError.log:
--------------------------------------------------------------------------------
 1 | Determining if the pthread_create exist failed with the following output:
 2 | Change Dir: /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp
 3 | 
 4 | Run Build Command:"/usr/bin/make" "cmTC_38ddb/fast"
 5 | /usr/bin/make -f CMakeFiles/cmTC_38ddb.dir/build.make CMakeFiles/cmTC_38ddb.dir/build
 6 | make[1]: 进入目录“/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp”
 7 | Building C object CMakeFiles/cmTC_38ddb.dir/CheckSymbolExists.c.o
 8 | /usr/bin/cc    -o CMakeFiles/cmTC_38ddb.dir/CheckSymbolExists.c.o   -c /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp/CheckSymbolExists.c
 9 | Linking C executable cmTC_38ddb
10 | /usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_38ddb.dir/link.txt --verbose=1
11 | /usr/bin/cc      -rdynamic CMakeFiles/cmTC_38ddb.dir/CheckSymbolExists.c.o  -o cmTC_38ddb 
12 | CMakeFiles/cmTC_38ddb.dir/CheckSymbolExists.c.o：在函数‘main’中：
13 | CheckSymbolExists.c:(.text+0x1b)：对‘pthread_create’未定义的引用
14 | collect2: error: ld returned 1 exit status
15 | CMakeFiles/cmTC_38ddb.dir/build.make:97: recipe for target 'cmTC_38ddb' failed
16 | make[1]: *** [cmTC_38ddb] Error 1
17 | make[1]: 离开目录“/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp”
18 | Makefile:126: recipe for target 'cmTC_38ddb/fast' failed
19 | make: *** [cmTC_38ddb/fast] Error 2
20 | 
21 | File /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp/CheckSymbolExists.c:
22 | /* */
23 | #include <pthread.h>
24 | 
25 | int main(int argc, char** argv)
26 | {
27 |   (void)argv;
28 | #ifndef pthread_create
29 |   return ((int*)(&pthread_create))[argc];
30 | #else
31 |   (void)argc;
32 |   return 0;
33 | #endif
34 | }
35 | 
36 | Determining if the function pthread_create exists in the pthreads failed with the following output:
37 | Change Dir: /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp
38 | 
39 | Run Build Command:"/usr/bin/make" "cmTC_8df24/fast"
40 | /usr/bin/make -f CMakeFiles/cmTC_8df24.dir/build.make CMakeFiles/cmTC_8df24.dir/build
41 | make[1]: 进入目录“/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp”
42 | Building C object CMakeFiles/cmTC_8df24.dir/CheckFunctionExists.c.o
43 | /usr/bin/cc   -DCHECK_FUNCTION_EXISTS=pthread_create   -o CMakeFiles/cmTC_8df24.dir/CheckFunctionExists.c.o   -c /usr/share/cmake-3.10/Modules/CheckFunctionExists.c
44 | Linking C executable cmTC_8df24
45 | /usr/bin/cmake -E cmake_link_script CMakeFiles/cmTC_8df24.dir/link.txt --verbose=1
46 | /usr/bin/cc  -DCHECK_FUNCTION_EXISTS=pthread_create    -rdynamic CMakeFiles/cmTC_8df24.dir/CheckFunctionExists.c.o  -o cmTC_8df24 -lpthreads 
47 | /usr/bin/ld: 找不到 -lpthreads
48 | collect2: error: ld returned 1 exit status
49 | CMakeFiles/cmTC_8df24.dir/build.make:97: recipe for target 'cmTC_8df24' failed
50 | make[1]: *** [cmTC_8df24] Error 1
51 | make[1]: 离开目录“/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/CMakeTmp”
52 | Makefile:126: recipe for target 'cmTC_8df24/fast' failed
53 | make: *** [cmTC_8df24/fast] Error 2
54 | 
55 | 
56 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/CMakeRuleHashes.txt:
--------------------------------------------------------------------------------
1 | # Hashes of file build rules.
2 | 51619ed7518b028b471246c895ad49fc CMakeFiles/SA.dir/SA_generated_sa.cu.o
3 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/Makefile.cmake:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10
 3 | 
 4 | # The generator used is:
 5 | set(CMAKE_DEPENDS_GENERATOR "Unix Makefiles")
 6 | 
 7 | # The top level Makefile was generated from the following files:
 8 | set(CMAKE_MAKEFILE_DEPENDS
 9 |   "CMakeCache.txt"
10 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Caffe2Config.cmake"
11 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Caffe2ConfigVersion.cmake"
12 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Caffe2Targets-release.cmake"
13 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Caffe2Targets.cmake"
14 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/FindCUDA.cmake"
15 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/CMakeInitializeConfigs.cmake"
16 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA.cmake"
17 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/run_nvcc.cmake"
18 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/select_compute_arch.cmake"
19 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindPackageHandleStandardArgs.cmake"
20 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindPackageMessage.cmake"
21 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/public/cuda.cmake"
22 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/public/mkl.cmake"
23 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/public/mkldnn.cmake"
24 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/public/threads.cmake"
25 |   "/home/guyuchao/software/libtorch/share/cmake/Caffe2/public/utils.cmake"
26 |   "/home/guyuchao/software/libtorch/share/cmake/Torch/TorchConfig.cmake"
27 |   "/home/guyuchao/software/libtorch/share/cmake/Torch/TorchConfigVersion.cmake"
28 |   "../CMakeLists.txt"
29 |   "CMakeFiles/3.10.2/CMakeCCompiler.cmake"
30 |   "CMakeFiles/3.10.2/CMakeCXXCompiler.cmake"
31 |   "CMakeFiles/3.10.2/CMakeSystem.cmake"
32 |   "CMakeFiles/SA.dir/SA_generated_sa.cu.o.cmake.pre-gen"
33 |   "CMakeFiles/SA.dir/SA_generated_sa.cu.o.depend"
34 |   "detect_cuda_compute_capabilities.cpp"
35 |   "detect_cuda_version.cc"
36 |   "/usr/share/cmake-3.10/Modules/CMakeCInformation.cmake"
37 |   "/usr/share/cmake-3.10/Modules/CMakeCXXInformation.cmake"
38 |   "/usr/share/cmake-3.10/Modules/CMakeCommonLanguageInclude.cmake"
39 |   "/usr/share/cmake-3.10/Modules/CMakeGenericSystem.cmake"
40 |   "/usr/share/cmake-3.10/Modules/CMakeLanguageInformation.cmake"
41 |   "/usr/share/cmake-3.10/Modules/CMakeSystemSpecificInformation.cmake"
42 |   "/usr/share/cmake-3.10/Modules/CMakeSystemSpecificInitialize.cmake"
43 |   "/usr/share/cmake-3.10/Modules/CheckIncludeFile.cmake"
44 |   "/usr/share/cmake-3.10/Modules/CheckLibraryExists.cmake"
45 |   "/usr/share/cmake-3.10/Modules/CheckSymbolExists.cmake"
46 |   "/usr/share/cmake-3.10/Modules/Compiler/CMakeCommonCompilerMacros.cmake"
47 |   "/usr/share/cmake-3.10/Modules/Compiler/GNU-C.cmake"
48 |   "/usr/share/cmake-3.10/Modules/Compiler/GNU-CXX.cmake"
49 |   "/usr/share/cmake-3.10/Modules/Compiler/GNU.cmake"
50 |   "/usr/share/cmake-3.10/Modules/FindPackageHandleStandardArgs.cmake"
51 |   "/usr/share/cmake-3.10/Modules/FindPackageMessage.cmake"
52 |   "/usr/share/cmake-3.10/Modules/FindThreads.cmake"
53 |   "/usr/share/cmake-3.10/Modules/Platform/Linux-GNU-C.cmake"
54 |   "/usr/share/cmake-3.10/Modules/Platform/Linux-GNU-CXX.cmake"
55 |   "/usr/share/cmake-3.10/Modules/Platform/Linux-GNU.cmake"
56 |   "/usr/share/cmake-3.10/Modules/Platform/Linux.cmake"
57 |   "/usr/share/cmake-3.10/Modules/Platform/UnixPaths.cmake"
58 |   )
59 | 
60 | # The corresponding makefile is:
61 | set(CMAKE_MAKEFILE_OUTPUTS
62 |   "Makefile"
63 |   "CMakeFiles/cmake.check_cache"
64 |   )
65 | 
66 | # Byproducts of CMake generate step:
67 | set(CMAKE_MAKEFILE_PRODUCTS
68 |   "CMakeFiles/SA.dir/SA_generated_sa.cu.o.cmake.pre-gen"
69 |   "CMakeFiles/SA.dir/SA_generated_sa.cu.o.cmake"
70 |   "CMakeFiles/CMakeDirectoryInformation.cmake"
71 |   )
72 | 
73 | # Dependency information for all targets:
74 | set(CMAKE_DEPEND_INFO_FILES
75 |   "CMakeFiles/SA.dir/DependInfo.cmake"
76 |   )
77 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/Makefile2:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | 
  7 | .PHONY : default_target
  8 | 
  9 | # The main recursive all target
 10 | all:
 11 | 
 12 | .PHONY : all
 13 | 
 14 | # The main recursive preinstall target
 15 | preinstall:
 16 | 
 17 | .PHONY : preinstall
 18 | 
 19 | #=============================================================================
 20 | # Special targets provided by cmake.
 21 | 
 22 | # Disable implicit rules so canonical targets will work.
 23 | .SUFFIXES:
 24 | 
 25 | 
 26 | # Remove some rules from gmake that .SUFFIXES does not remove.
 27 | SUFFIXES =
 28 | 
 29 | .SUFFIXES: .hpux_make_needs_suffix_list
 30 | 
 31 | 
 32 | # Suppress display of executed commands.
 33 | $(VERBOSE).SILENT:
 34 | 
 35 | 
 36 | # A target that is always out of date.
 37 | cmake_force:
 38 | 
 39 | .PHONY : cmake_force
 40 | 
 41 | #=============================================================================
 42 | # Set environment variables for the build.
 43 | 
 44 | # The shell in which to execute make rules.
 45 | SHELL = /bin/sh
 46 | 
 47 | # The CMake executable.
 48 | CMAKE_COMMAND = /usr/bin/cmake
 49 | 
 50 | # The command to remove a file.
 51 | RM = /usr/bin/cmake -E remove -f
 52 | 
 53 | # Escaping for special characters.
 54 | EQUALS = =
 55 | 
 56 | # The top-level source directory on which CMake was run.
 57 | CMAKE_SOURCE_DIR = /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module
 58 | 
 59 | # The top-level build directory on which CMake was run.
 60 | CMAKE_BINARY_DIR = /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild
 61 | 
 62 | #=============================================================================
 63 | # Target rules for target CMakeFiles/SA.dir
 64 | 
 65 | # All Build rule for target.
 66 | CMakeFiles/SA.dir/all:
 67 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/depend
 68 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/build
 69 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --progress-dir=/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles --progress-num=1,2,3 "Built target SA"
 70 | .PHONY : CMakeFiles/SA.dir/all
 71 | 
 72 | # Include target in all.
 73 | all: CMakeFiles/SA.dir/all
 74 | 
 75 | .PHONY : all
 76 | 
 77 | # Build rule for subdir invocation for target.
 78 | CMakeFiles/SA.dir/rule: cmake_check_build_system
 79 | 	$(CMAKE_COMMAND) -E cmake_progress_start /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles 3
 80 | 	$(MAKE) -f CMakeFiles/Makefile2 CMakeFiles/SA.dir/all
 81 | 	$(CMAKE_COMMAND) -E cmake_progress_start /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles 0
 82 | .PHONY : CMakeFiles/SA.dir/rule
 83 | 
 84 | # Convenience name for target.
 85 | SA: CMakeFiles/SA.dir/rule
 86 | 
 87 | .PHONY : SA
 88 | 
 89 | # clean rule for target.
 90 | CMakeFiles/SA.dir/clean:
 91 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/clean
 92 | .PHONY : CMakeFiles/SA.dir/clean
 93 | 
 94 | # clean rule for target.
 95 | clean: CMakeFiles/SA.dir/clean
 96 | 
 97 | .PHONY : clean
 98 | 
 99 | #=============================================================================
100 | # Special targets to cleanup operation of make.
101 | 
102 | # Special rule to run CMake to check the build system integrity.
103 | # No rule that depends on this can have commands that come from listfiles
104 | # because they might be regenerated.
105 | cmake_check_build_system:
106 | 	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
107 | .PHONY : cmake_check_build_system
108 | 
109 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/DependInfo.cmake:
--------------------------------------------------------------------------------
 1 | # The set of languages for which implicit dependencies are needed:
 2 | set(CMAKE_DEPENDS_LANGUAGES
 3 |   "CXX"
 4 |   )
 5 | # The set of files for implicit dependencies of each language:
 6 | set(CMAKE_DEPENDS_CHECK_CXX
 7 |   "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/sa_ext.cpp" "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir/sa_ext.cpp.o"
 8 |   )
 9 | set(CMAKE_CXX_COMPILER_ID "GNU")
10 | 
11 | # The include file search paths:
12 | set(CMAKE_CXX_TARGET_INCLUDE_PATH
13 |   "/usr/local/cuda-10.0/include"
14 |   "/home/guyuchao/software/libtorch/include"
15 |   "/home/guyuchao/software/libtorch/include/torch/csrc/api/include"
16 |   )
17 | 
18 | # Targets to which this target links.
19 | set(CMAKE_TARGET_LINKED_INFO_FILES
20 |   )
21 | 
22 | # Fortran module output directory.
23 | set(CMAKE_Fortran_TARGET_MODULE_DIR "")
24 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/SA_generated_sa.cu.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/SA_generated_sa.cu.o


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/SA_generated_sa.cu.o.cmake:
--------------------------------------------------------------------------------
  1 | #  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
  2 | #
  3 | #  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
  4 | #
  5 | #  This code is licensed under the MIT License.  See the FindCUDA.cmake script
  6 | #  for the text of the license.
  7 | 
  8 | # The MIT License
  9 | #
 10 | # License for the specific language governing rights and limitations under
 11 | # Permission is hereby granted, free of charge, to any person obtaining a
 12 | # copy of this software and associated documentation files (the "Software"),
 13 | # to deal in the Software without restriction, including without limitation
 14 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 15 | # and/or sell copies of the Software, and to permit persons to whom the
 16 | # Software is furnished to do so, subject to the following conditions:
 17 | #
 18 | # The above copyright notice and this permission notice shall be included
 19 | # in all copies or substantial portions of the Software.
 20 | #
 21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 22 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 24 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 26 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 27 | # DEALINGS IN THE SOFTWARE.
 28 | 
 29 | 
 30 | ##########################################################################
 31 | # This file runs the nvcc commands to produce the desired output file along with
 32 | # the dependency file needed by CMake to compute dependencies.  In addition the
 33 | # file checks the output of each command and if the command fails it deletes the
 34 | # output files.
 35 | 
 36 | # Input variables
 37 | #
 38 | # verbose:BOOL=<>          OFF: Be as quiet as possible (default)
 39 | #                          ON : Describe each step
 40 | #
 41 | # build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
 42 | #                               RelWithDebInfo, but it should match one of the
 43 | #                               entries in CUDA_HOST_FLAGS. This is the build
 44 | #                               configuration used when compiling the code.  If
 45 | #                               blank or unspecified Debug is assumed as this is
 46 | #                               what CMake does.
 47 | #
 48 | # generated_file:STRING=<> File to generate.  This argument must be passed in.
 49 | #
 50 | # generated_cubin_file:STRING=<> File to generate.  This argument must be passed
 51 | #                                                   in if build_cubin is true.
 52 | 
 53 | cmake_policy(PUSH)
 54 | cmake_policy(SET CMP0007 NEW)
 55 | if(NOT generated_file)
 56 |   message(FATAL_ERROR "You must specify generated_file on the command line")
 57 | endif()
 58 | 
 59 | # Set these up as variables to make reading the generated file easier
 60 | set(CMAKE_COMMAND "/usr/bin/cmake") # path
 61 | set(source_file "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/sa.cu") # path
 62 | set(NVCC_generated_dependency_file "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//SA_generated_sa.cu.o.NVCC-depend") # path
 63 | set(cmake_dependency_file "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//SA_generated_sa.cu.o.depend") # path
 64 | set(CUDA_make2cmake "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake") # path
 65 | set(CUDA_parse_cubin "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake") # path
 66 | set(build_cubin OFF) # bool
 67 | set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
 68 | # We won't actually use these variables for now, but we need to set this, in
 69 | # order to force this file to be run again if it changes.
 70 | set(generated_file_path "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//.") # path
 71 | set(generated_file_internal "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//./SA_generated_sa.cu.o") # path
 72 | set(generated_cubin_file_internal "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//./SA_generated_sa.cu.o.cubin.txt") # path
 73 | 
 74 | set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda-10.0/bin/nvcc") # path
 75 | set(CUDA_NVCC_FLAGS -DONNX_NAMESPACE=onnx_c2;-gencode;arch=compute_61,code=sm_61;-Xcudafe;--diag_suppress=cc_clobber_ignored;-Xcudafe;--diag_suppress=integer_sign_change;-Xcudafe;--diag_suppress=useless_using_declaration;-Xcudafe;--diag_suppress=set_but_not_used;-std=c++11;-Xcompiler;-fPIC;--expt-relaxed-constexpr;--expt-extended-lambda;-Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0 ;; ) # list
 76 | # Build specific configuration flags
 77 | set(CUDA_NVCC_FLAGS_DEBUG -G ; )
 78 | set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
 79 | set(CUDA_NVCC_FLAGS_RELEASE  ; )
 80 | set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
 81 | set(nvcc_flags -m64) # list
 82 | set(CUDA_NVCC_INCLUDE_DIRS "/usr/local/cuda-10.0/include;/usr/local/cuda-10.0/include;/home/guyuchao/software/libtorch/include;/home/guyuchao/software/libtorch/include/torch/csrc/api/include;/home/guyuchao/software/libtorch/include;/home/guyuchao/software/libtorch/include;/home/guyuchao/software/libtorch/include;/usr/local/cuda-10.0/include;/home/guyuchao/software/libtorch/include;/usr/local/cuda-10.0/include;/usr/local/cuda-10.0/include;/usr/include;/usr/local/cuda-10.0/include") # list (needs to be in quotes to handle spaces properly).
 83 | set(CUDA_NVCC_COMPILE_DEFINITIONS [==[]==]) # list (needs to be in lua quotes see #16510 ).
 84 | set(format_flag "-c") # string
 85 | set(cuda_language_flag ) # list
 86 | 
 87 | # Clean up list of include directories and add -I flags
 88 | list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
 89 | set(CUDA_NVCC_INCLUDE_ARGS)
 90 | foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
 91 |   # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
 92 |   list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
 93 | endforeach()
 94 | 
 95 | # Clean up list of compile definitions, add -D flags, and append to nvcc_flags
 96 | list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
 97 | foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
 98 |   list(APPEND nvcc_flags "-D${def}")
 99 | endforeach()
100 | 
101 | if(build_cubin AND NOT generated_cubin_file)
102 |   message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
103 | endif()
104 | 
105 | # This is the list of host compilation flags.  It C or CXX should already have
106 | # been chosen by FindCUDA.cmake.
107 | set(CMAKE_HOST_FLAGS  -fPIC )
108 | set(CMAKE_HOST_FLAGS_DEBUG -g)
109 | set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
110 | set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
111 | set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
112 | 
113 | # Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
114 | set(nvcc_host_compiler_flags "")
115 | # If we weren't given a build_configuration, use Debug.
116 | if(NOT build_configuration)
117 |   set(build_configuration Debug)
118 | endif()
119 | string(TOUPPER "${build_configuration}" build_configuration)
120 | #message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
121 | foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
122 |   # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
123 |   string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
124 | endforeach()
125 | if (nvcc_host_compiler_flags)
126 |   set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
127 | endif()
128 | #message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
129 | # Add the build specific configuration flags
130 | list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
131 | 
132 | # Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
133 | list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
134 | list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
135 | if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
136 |   if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
137 |     set(CCBIN -ccbin "${CCBIN}")
138 |   else()
139 |     set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
140 |   endif()
141 | endif()
142 | 
143 | # cuda_execute_process - Executes a command with optional command echo and status message.
144 | #
145 | #   status  - Status message to print if verbose is true
146 | #   command - COMMAND argument from the usual execute_process argument structure
147 | #   ARGN    - Remaining arguments are the command with arguments
148 | #
149 | #   CUDA_result - return value from running the command
150 | #
151 | # Make this a macro instead of a function, so that things like RESULT_VARIABLE
152 | # and other return variables are present after executing the process.
153 | macro(cuda_execute_process status command)
154 |   set(_command ${command})
155 |   if(NOT "x${_command}" STREQUAL "xCOMMAND")
156 |     message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
157 |   endif()
158 |   if(verbose)
159 |     execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
160 |     # Now we need to build up our command string.  We are accounting for quotes
161 |     # and spaces, anything else is left up to the user to fix if they want to
162 |     # copy and paste a runnable command line.
163 |     set(cuda_execute_process_string)
164 |     foreach(arg ${ARGN})
165 |       # If there are quotes, excape them, so they come through.
166 |       string(REPLACE "\"" "\\\"" arg ${arg})
167 |       # Args with spaces need quotes around them to get them to be parsed as a single argument.
168 |       if(arg MATCHES " ")
169 |         list(APPEND cuda_execute_process_string "\"${arg}\"")
170 |       else()
171 |         list(APPEND cuda_execute_process_string ${arg})
172 |       endif()
173 |     endforeach()
174 |     # Echo the command
175 |     execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
176 |   endif()
177 |   # Run the command
178 |   execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
179 | endmacro()
180 | 
181 | # Delete the target file
182 | cuda_execute_process(
183 |   "Removing ${generated_file}"
184 |   COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
185 |   )
186 | 
187 | # For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
188 | # for dependency generation and hope for the best.
189 | set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
190 | set(CUDA_VERSION 10.0)
191 | if(CUDA_VERSION VERSION_LESS "3.0")
192 |   # Note that this will remove all occurrences of -G.
193 |   list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
194 | endif()
195 | 
196 | # nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
197 | # can cause incorrect dependencies when #including files based on this macro which is
198 | # defined in the generating passes of nvcc invocation.  We will go ahead and manually
199 | # define this for now until a future version fixes this bug.
200 | set(CUDACC_DEFINE -D__CUDACC__)
201 | 
202 | # Generate the dependency file
203 | cuda_execute_process(
204 |   "Generating dependency file: ${NVCC_generated_dependency_file}"
205 |   COMMAND "${CUDA_NVCC_EXECUTABLE}"
206 |   -M
207 |   ${CUDACC_DEFINE}
208 |   "${source_file}"
209 |   -o "${NVCC_generated_dependency_file}"
210 |   ${CCBIN}
211 |   ${nvcc_flags}
212 |   ${nvcc_host_compiler_flags}
213 |   ${depends_CUDA_NVCC_FLAGS}
214 |   -DNVCC
215 |   ${CUDA_NVCC_INCLUDE_ARGS}
216 |   )
217 | 
218 | if(CUDA_result)
219 |   message(FATAL_ERROR "Error generating ${generated_file}")
220 | endif()
221 | 
222 | # Generate the cmake readable dependency file to a temp file.  Don't put the
223 | # quotes just around the filenames for the input_file and output_file variables.
224 | # CMake will pass the quotes through and not be able to find the file.
225 | cuda_execute_process(
226 |   "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
227 |   COMMAND "${CMAKE_COMMAND}"
228 |   -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
229 |   -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
230 |   -D "verbose=${verbose}"
231 |   -P "${CUDA_make2cmake}"
232 |   )
233 | 
234 | if(CUDA_result)
235 |   message(FATAL_ERROR "Error generating ${generated_file}")
236 | endif()
237 | 
238 | # Copy the file if it is different
239 | cuda_execute_process(
240 |   "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
241 |   COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
242 |   )
243 | 
244 | if(CUDA_result)
245 |   message(FATAL_ERROR "Error generating ${generated_file}")
246 | endif()
247 | 
248 | # Delete the temporary file
249 | cuda_execute_process(
250 |   "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
251 |   COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
252 |   )
253 | 
254 | if(CUDA_result)
255 |   message(FATAL_ERROR "Error generating ${generated_file}")
256 | endif()
257 | 
258 | # Generate the code
259 | cuda_execute_process(
260 |   "Generating ${generated_file}"
261 |   COMMAND "${CUDA_NVCC_EXECUTABLE}"
262 |   "${source_file}"
263 |   ${cuda_language_flag}
264 |   ${format_flag} -o "${generated_file}"
265 |   ${CCBIN}
266 |   ${nvcc_flags}
267 |   ${nvcc_host_compiler_flags}
268 |   ${CUDA_NVCC_FLAGS}
269 |   -DNVCC
270 |   ${CUDA_NVCC_INCLUDE_ARGS}
271 |   )
272 | 
273 | if(CUDA_result)
274 |   # Since nvcc can sometimes leave half done files make sure that we delete the output file.
275 |   cuda_execute_process(
276 |     "Removing ${generated_file}"
277 |     COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
278 |     )
279 |   message(FATAL_ERROR "Error generating file ${generated_file}")
280 | else()
281 |   if(verbose)
282 |     message("Generated ${generated_file} successfully.")
283 |   endif()
284 | endif()
285 | 
286 | # Cubin resource report commands.
287 | if( build_cubin )
288 |   # Run with -cubin to produce resource usage report.
289 |   cuda_execute_process(
290 |     "Generating ${generated_cubin_file}"
291 |     COMMAND "${CUDA_NVCC_EXECUTABLE}"
292 |     "${source_file}"
293 |     ${CUDA_NVCC_FLAGS}
294 |     ${nvcc_flags}
295 |     ${CCBIN}
296 |     ${nvcc_host_compiler_flags}
297 |     -DNVCC
298 |     -cubin
299 |     -o "${generated_cubin_file}"
300 |     ${CUDA_NVCC_INCLUDE_ARGS}
301 |     )
302 | 
303 |   # Execute the parser script.
304 |   cuda_execute_process(
305 |     "Executing the parser script"
306 |     COMMAND  "${CMAKE_COMMAND}"
307 |     -D "input_file:STRING=${generated_cubin_file}"
308 |     -P "${CUDA_parse_cubin}"
309 |     )
310 | 
311 | endif()
312 | 
313 | cmake_policy(POP)
314 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/SA_generated_sa.cu.o.cmake.pre-gen:
--------------------------------------------------------------------------------
  1 | #  James Bigler, NVIDIA Corp (nvidia.com - jbigler)
  2 | #
  3 | #  Copyright (c) 2008 - 2009 NVIDIA Corporation.  All rights reserved.
  4 | #
  5 | #  This code is licensed under the MIT License.  See the FindCUDA.cmake script
  6 | #  for the text of the license.
  7 | 
  8 | # The MIT License
  9 | #
 10 | # License for the specific language governing rights and limitations under
 11 | # Permission is hereby granted, free of charge, to any person obtaining a
 12 | # copy of this software and associated documentation files (the "Software"),
 13 | # to deal in the Software without restriction, including without limitation
 14 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 15 | # and/or sell copies of the Software, and to permit persons to whom the
 16 | # Software is furnished to do so, subject to the following conditions:
 17 | #
 18 | # The above copyright notice and this permission notice shall be included
 19 | # in all copies or substantial portions of the Software.
 20 | #
 21 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
 22 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 24 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 26 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 27 | # DEALINGS IN THE SOFTWARE.
 28 | 
 29 | 
 30 | ##########################################################################
 31 | # This file runs the nvcc commands to produce the desired output file along with
 32 | # the dependency file needed by CMake to compute dependencies.  In addition the
 33 | # file checks the output of each command and if the command fails it deletes the
 34 | # output files.
 35 | 
 36 | # Input variables
 37 | #
 38 | # verbose:BOOL=<>          OFF: Be as quiet as possible (default)
 39 | #                          ON : Describe each step
 40 | #
 41 | # build_configuration:STRING=<> Typically one of Debug, MinSizeRel, Release, or
 42 | #                               RelWithDebInfo, but it should match one of the
 43 | #                               entries in CUDA_HOST_FLAGS. This is the build
 44 | #                               configuration used when compiling the code.  If
 45 | #                               blank or unspecified Debug is assumed as this is
 46 | #                               what CMake does.
 47 | #
 48 | # generated_file:STRING=<> File to generate.  This argument must be passed in.
 49 | #
 50 | # generated_cubin_file:STRING=<> File to generate.  This argument must be passed
 51 | #                                                   in if build_cubin is true.
 52 | 
 53 | cmake_policy(PUSH)
 54 | cmake_policy(SET CMP0007 NEW)
 55 | if(NOT generated_file)
 56 |   message(FATAL_ERROR "You must specify generated_file on the command line")
 57 | endif()
 58 | 
 59 | # Set these up as variables to make reading the generated file easier
 60 | set(CMAKE_COMMAND "/usr/bin/cmake") # path
 61 | set(source_file "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/sa.cu") # path
 62 | set(NVCC_generated_dependency_file "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//SA_generated_sa.cu.o.NVCC-depend") # path
 63 | set(cmake_dependency_file "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//SA_generated_sa.cu.o.depend") # path
 64 | set(CUDA_make2cmake "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/make2cmake.cmake") # path
 65 | set(CUDA_parse_cubin "/home/guyuchao/software/libtorch/share/cmake/Caffe2/Modules_CUDA_fix/upstream/FindCUDA/parse_cubin.cmake") # path
 66 | set(build_cubin OFF) # bool
 67 | set(CUDA_HOST_COMPILER "/usr/bin/cc") # path
 68 | # We won't actually use these variables for now, but we need to set this, in
 69 | # order to force this file to be run again if it changes.
 70 | set(generated_file_path "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//.") # path
 71 | set(generated_file_internal "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//./SA_generated_sa.cu.o") # path
 72 | set(generated_cubin_file_internal "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir//./SA_generated_sa.cu.o.cubin.txt") # path
 73 | 
 74 | set(CUDA_NVCC_EXECUTABLE "/usr/local/cuda-10.0/bin/nvcc") # path
 75 | set(CUDA_NVCC_FLAGS -DONNX_NAMESPACE=onnx_c2;-gencode;arch=compute_61,code=sm_61;-Xcudafe;--diag_suppress=cc_clobber_ignored;-Xcudafe;--diag_suppress=integer_sign_change;-Xcudafe;--diag_suppress=useless_using_declaration;-Xcudafe;--diag_suppress=set_but_not_used;-std=c++11;-Xcompiler;-fPIC;--expt-relaxed-constexpr;--expt-extended-lambda;-Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0 ;; ) # list
 76 | # Build specific configuration flags
 77 | set(CUDA_NVCC_FLAGS_DEBUG -G ; )
 78 | set(CUDA_NVCC_FLAGS_MINSIZEREL  ; )
 79 | set(CUDA_NVCC_FLAGS_RELEASE  ; )
 80 | set(CUDA_NVCC_FLAGS_RELWITHDEBINFO  ; )
 81 | set(nvcc_flags -m64) # list
 82 | set(CUDA_NVCC_INCLUDE_DIRS "/usr/local/cuda-10.0/include;$<TARGET_PROPERTY:SA,INCLUDE_DIRECTORIES>") # list (needs to be in quotes to handle spaces properly).
 83 | set(CUDA_NVCC_COMPILE_DEFINITIONS [==[$<TARGET_PROPERTY:SA,COMPILE_DEFINITIONS>]==]) # list (needs to be in lua quotes see #16510 ).
 84 | set(format_flag "-c") # string
 85 | set(cuda_language_flag ) # list
 86 | 
 87 | # Clean up list of include directories and add -I flags
 88 | list(REMOVE_DUPLICATES CUDA_NVCC_INCLUDE_DIRS)
 89 | set(CUDA_NVCC_INCLUDE_ARGS)
 90 | foreach(dir ${CUDA_NVCC_INCLUDE_DIRS})
 91 |   # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
 92 |   list(APPEND CUDA_NVCC_INCLUDE_ARGS "-I${dir}")
 93 | endforeach()
 94 | 
 95 | # Clean up list of compile definitions, add -D flags, and append to nvcc_flags
 96 | list(REMOVE_DUPLICATES CUDA_NVCC_COMPILE_DEFINITIONS)
 97 | foreach(def ${CUDA_NVCC_COMPILE_DEFINITIONS})
 98 |   list(APPEND nvcc_flags "-D${def}")
 99 | endforeach()
100 | 
101 | if(build_cubin AND NOT generated_cubin_file)
102 |   message(FATAL_ERROR "You must specify generated_cubin_file on the command line")
103 | endif()
104 | 
105 | # This is the list of host compilation flags.  It C or CXX should already have
106 | # been chosen by FindCUDA.cmake.
107 | set(CMAKE_HOST_FLAGS  -fPIC )
108 | set(CMAKE_HOST_FLAGS_DEBUG -g)
109 | set(CMAKE_HOST_FLAGS_MINSIZEREL -Os -DNDEBUG)
110 | set(CMAKE_HOST_FLAGS_RELEASE -O3 -DNDEBUG)
111 | set(CMAKE_HOST_FLAGS_RELWITHDEBINFO -O2 -g -DNDEBUG)
112 | 
113 | # Take the compiler flags and package them up to be sent to the compiler via -Xcompiler
114 | set(nvcc_host_compiler_flags "")
115 | # If we weren't given a build_configuration, use Debug.
116 | if(NOT build_configuration)
117 |   set(build_configuration Debug)
118 | endif()
119 | string(TOUPPER "${build_configuration}" build_configuration)
120 | #message("CUDA_NVCC_HOST_COMPILER_FLAGS = ${CUDA_NVCC_HOST_COMPILER_FLAGS}")
121 | foreach(flag ${CMAKE_HOST_FLAGS} ${CMAKE_HOST_FLAGS_${build_configuration}})
122 |   # Extra quotes are added around each flag to help nvcc parse out flags with spaces.
123 |   string(APPEND nvcc_host_compiler_flags ",\"${flag}\"")
124 | endforeach()
125 | if (nvcc_host_compiler_flags)
126 |   set(nvcc_host_compiler_flags "-Xcompiler" ${nvcc_host_compiler_flags})
127 | endif()
128 | #message("nvcc_host_compiler_flags = \"${nvcc_host_compiler_flags}\"")
129 | # Add the build specific configuration flags
130 | list(APPEND CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS_${build_configuration}})
131 | 
132 | # Any -ccbin existing in CUDA_NVCC_FLAGS gets highest priority
133 | list( FIND CUDA_NVCC_FLAGS "-ccbin" ccbin_found0 )
134 | list( FIND CUDA_NVCC_FLAGS "--compiler-bindir" ccbin_found1 )
135 | if( ccbin_found0 LESS 0 AND ccbin_found1 LESS 0 AND CUDA_HOST_COMPILER )
136 |   if (CUDA_HOST_COMPILER STREQUAL "" AND DEFINED CCBIN)
137 |     set(CCBIN -ccbin "${CCBIN}")
138 |   else()
139 |     set(CCBIN -ccbin "${CUDA_HOST_COMPILER}")
140 |   endif()
141 | endif()
142 | 
143 | # cuda_execute_process - Executes a command with optional command echo and status message.
144 | #
145 | #   status  - Status message to print if verbose is true
146 | #   command - COMMAND argument from the usual execute_process argument structure
147 | #   ARGN    - Remaining arguments are the command with arguments
148 | #
149 | #   CUDA_result - return value from running the command
150 | #
151 | # Make this a macro instead of a function, so that things like RESULT_VARIABLE
152 | # and other return variables are present after executing the process.
153 | macro(cuda_execute_process status command)
154 |   set(_command ${command})
155 |   if(NOT "x${_command}" STREQUAL "xCOMMAND")
156 |     message(FATAL_ERROR "Malformed call to cuda_execute_process.  Missing COMMAND as second argument. (command = ${command})")
157 |   endif()
158 |   if(verbose)
159 |     execute_process(COMMAND "${CMAKE_COMMAND}" -E echo -- ${status})
160 |     # Now we need to build up our command string.  We are accounting for quotes
161 |     # and spaces, anything else is left up to the user to fix if they want to
162 |     # copy and paste a runnable command line.
163 |     set(cuda_execute_process_string)
164 |     foreach(arg ${ARGN})
165 |       # If there are quotes, excape them, so they come through.
166 |       string(REPLACE "\"" "\\\"" arg ${arg})
167 |       # Args with spaces need quotes around them to get them to be parsed as a single argument.
168 |       if(arg MATCHES " ")
169 |         list(APPEND cuda_execute_process_string "\"${arg}\"")
170 |       else()
171 |         list(APPEND cuda_execute_process_string ${arg})
172 |       endif()
173 |     endforeach()
174 |     # Echo the command
175 |     execute_process(COMMAND ${CMAKE_COMMAND} -E echo ${cuda_execute_process_string})
176 |   endif()
177 |   # Run the command
178 |   execute_process(COMMAND ${ARGN} RESULT_VARIABLE CUDA_result )
179 | endmacro()
180 | 
181 | # Delete the target file
182 | cuda_execute_process(
183 |   "Removing ${generated_file}"
184 |   COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
185 |   )
186 | 
187 | # For CUDA 2.3 and below, -G -M doesn't work, so remove the -G flag
188 | # for dependency generation and hope for the best.
189 | set(depends_CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
190 | set(CUDA_VERSION 10.0)
191 | if(CUDA_VERSION VERSION_LESS "3.0")
192 |   # Note that this will remove all occurrences of -G.
193 |   list(REMOVE_ITEM depends_CUDA_NVCC_FLAGS "-G")
194 | endif()
195 | 
196 | # nvcc doesn't define __CUDACC__ for some reason when generating dependency files.  This
197 | # can cause incorrect dependencies when #including files based on this macro which is
198 | # defined in the generating passes of nvcc invocation.  We will go ahead and manually
199 | # define this for now until a future version fixes this bug.
200 | set(CUDACC_DEFINE -D__CUDACC__)
201 | 
202 | # Generate the dependency file
203 | cuda_execute_process(
204 |   "Generating dependency file: ${NVCC_generated_dependency_file}"
205 |   COMMAND "${CUDA_NVCC_EXECUTABLE}"
206 |   -M
207 |   ${CUDACC_DEFINE}
208 |   "${source_file}"
209 |   -o "${NVCC_generated_dependency_file}"
210 |   ${CCBIN}
211 |   ${nvcc_flags}
212 |   ${nvcc_host_compiler_flags}
213 |   ${depends_CUDA_NVCC_FLAGS}
214 |   -DNVCC
215 |   ${CUDA_NVCC_INCLUDE_ARGS}
216 |   )
217 | 
218 | if(CUDA_result)
219 |   message(FATAL_ERROR "Error generating ${generated_file}")
220 | endif()
221 | 
222 | # Generate the cmake readable dependency file to a temp file.  Don't put the
223 | # quotes just around the filenames for the input_file and output_file variables.
224 | # CMake will pass the quotes through and not be able to find the file.
225 | cuda_execute_process(
226 |   "Generating temporary cmake readable file: ${cmake_dependency_file}.tmp"
227 |   COMMAND "${CMAKE_COMMAND}"
228 |   -D "input_file:FILEPATH=${NVCC_generated_dependency_file}"
229 |   -D "output_file:FILEPATH=${cmake_dependency_file}.tmp"
230 |   -D "verbose=${verbose}"
231 |   -P "${CUDA_make2cmake}"
232 |   )
233 | 
234 | if(CUDA_result)
235 |   message(FATAL_ERROR "Error generating ${generated_file}")
236 | endif()
237 | 
238 | # Copy the file if it is different
239 | cuda_execute_process(
240 |   "Copy if different ${cmake_dependency_file}.tmp to ${cmake_dependency_file}"
241 |   COMMAND "${CMAKE_COMMAND}" -E copy_if_different "${cmake_dependency_file}.tmp" "${cmake_dependency_file}"
242 |   )
243 | 
244 | if(CUDA_result)
245 |   message(FATAL_ERROR "Error generating ${generated_file}")
246 | endif()
247 | 
248 | # Delete the temporary file
249 | cuda_execute_process(
250 |   "Removing ${cmake_dependency_file}.tmp and ${NVCC_generated_dependency_file}"
251 |   COMMAND "${CMAKE_COMMAND}" -E remove "${cmake_dependency_file}.tmp" "${NVCC_generated_dependency_file}"
252 |   )
253 | 
254 | if(CUDA_result)
255 |   message(FATAL_ERROR "Error generating ${generated_file}")
256 | endif()
257 | 
258 | # Generate the code
259 | cuda_execute_process(
260 |   "Generating ${generated_file}"
261 |   COMMAND "${CUDA_NVCC_EXECUTABLE}"
262 |   "${source_file}"
263 |   ${cuda_language_flag}
264 |   ${format_flag} -o "${generated_file}"
265 |   ${CCBIN}
266 |   ${nvcc_flags}
267 |   ${nvcc_host_compiler_flags}
268 |   ${CUDA_NVCC_FLAGS}
269 |   -DNVCC
270 |   ${CUDA_NVCC_INCLUDE_ARGS}
271 |   )
272 | 
273 | if(CUDA_result)
274 |   # Since nvcc can sometimes leave half done files make sure that we delete the output file.
275 |   cuda_execute_process(
276 |     "Removing ${generated_file}"
277 |     COMMAND "${CMAKE_COMMAND}" -E remove "${generated_file}"
278 |     )
279 |   message(FATAL_ERROR "Error generating file ${generated_file}")
280 | else()
281 |   if(verbose)
282 |     message("Generated ${generated_file} successfully.")
283 |   endif()
284 | endif()
285 | 
286 | # Cubin resource report commands.
287 | if( build_cubin )
288 |   # Run with -cubin to produce resource usage report.
289 |   cuda_execute_process(
290 |     "Generating ${generated_cubin_file}"
291 |     COMMAND "${CUDA_NVCC_EXECUTABLE}"
292 |     "${source_file}"
293 |     ${CUDA_NVCC_FLAGS}
294 |     ${nvcc_flags}
295 |     ${CCBIN}
296 |     ${nvcc_host_compiler_flags}
297 |     -DNVCC
298 |     -cubin
299 |     -o "${generated_cubin_file}"
300 |     ${CUDA_NVCC_INCLUDE_ARGS}
301 |     )
302 | 
303 |   # Execute the parser script.
304 |   cuda_execute_process(
305 |     "Executing the parser script"
306 |     COMMAND  "${CMAKE_COMMAND}"
307 |     -D "input_file:STRING=${generated_cubin_file}"
308 |     -P "${CUDA_parse_cubin}"
309 |     )
310 | 
311 | endif()
312 | 
313 | cmake_policy(POP)
314 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/cmake_clean.cmake:
--------------------------------------------------------------------------------
 1 | file(REMOVE_RECURSE
 2 |   "CMakeFiles/SA.dir/SA_generated_sa.cu.o"
 3 |   "CMakeFiles/SA.dir/sa_ext.cpp.o"
 4 |   "SA.pdb"
 5 |   "SA"
 6 | )
 7 | 
 8 | # Per-language clean rules from dependency scanning.
 9 | foreach(lang CXX)
10 |   include(CMakeFiles/SA.dir/cmake_clean_${lang}.cmake OPTIONAL)
11 | endforeach()
12 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/flags.make:
--------------------------------------------------------------------------------
 1 | # CMAKE generated file: DO NOT EDIT!
 2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10
 3 | 
 4 | # compile CXX with /usr/bin/c++
 5 | CXX_FLAGS =  -fPIC   -D_GLIBCXX_USE_CXX11_ABI=0 -std=c++11 -std=gnu++11
 6 | 
 7 | CXX_DEFINES = 
 8 | 
 9 | CXX_INCLUDES = -isystem /usr/local/cuda-10.0/include -isystem /home/guyuchao/software/libtorch/include -isystem /home/guyuchao/software/libtorch/include/torch/csrc/api/include 
10 | 
11 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/link.txt:
--------------------------------------------------------------------------------
1 | /usr/bin/c++   -fPIC  -rdynamic CMakeFiles/SA.dir/sa_ext.cpp.o CMakeFiles/SA.dir/SA_generated_sa.cu.o  -o SA -Wl,-rpath,/usr/local/cuda-10.0/lib64:/home/guyuchao/software/libtorch/lib /usr/local/cuda-10.0/lib64/libcudart.so /home/guyuchao/software/libtorch/lib/libtorch.so /home/guyuchao/software/libtorch/lib/libc10.so -lcuda /usr/local/cuda-10.0/lib64/libnvrtc.so /usr/local/cuda-10.0/lib64/libnvToolsExt.so /usr/local/cuda-10.0/lib64/libcudart.so /home/guyuchao/software/libtorch/lib/libc10_cuda.so -Wl,--no-as-needed,/home/guyuchao/software/libtorch/lib/libcaffe2.so -Wl,--as-needed -Wl,--no-as-needed,/home/guyuchao/software/libtorch/lib/libcaffe2_gpu.so -Wl,--as-needed /home/guyuchao/software/libtorch/lib/libc10_cuda.so /home/guyuchao/software/libtorch/lib/libcaffe2.so /home/guyuchao/software/libtorch/lib/libc10.so -lpthread /usr/local/cuda-10.0/lib64/libcufft.so /usr/local/cuda-10.0/lib64/libcurand.so /usr/lib/x86_64-linux-gnu/libcudnn.so /usr/local/cuda/lib64/libculibos.a -ldl /usr/local/cuda/lib64/libculibos.a -ldl /usr/local/cuda-10.0/lib64/libcublas.so /usr/local/cuda-10.0/lib64/libcudart.so 
2 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/progress.make:
--------------------------------------------------------------------------------
1 | CMAKE_PROGRESS_1 = 1
2 | CMAKE_PROGRESS_2 = 2
3 | CMAKE_PROGRESS_3 = 3
4 | 
5 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/sa_ext.cpp.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/SA.dir/sa_ext.cpp.o


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/TargetDirectories.txt:
--------------------------------------------------------------------------------
1 | /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/rebuild_cache.dir
2 | /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/edit_cache.dir
3 | /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/SA.dir
4 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/cmake.check_cache:
--------------------------------------------------------------------------------
1 | # This file is generated by cmake for dependency checking of the CMakeCache.txt file
2 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/feature_tests.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/feature_tests.bin


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/feature_tests.c:
--------------------------------------------------------------------------------
 1 | 
 2 |   const char features[] = {"\n"
 3 | "C_FEATURE:"
 4 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 304
 5 | "1"
 6 | #else
 7 | "0"
 8 | #endif
 9 | "c_function_prototypes\n"
10 | "C_FEATURE:"
11 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 304 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
12 | "1"
13 | #else
14 | "0"
15 | #endif
16 | "c_restrict\n"
17 | "C_FEATURE:"
18 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201000L
19 | "1"
20 | #else
21 | "0"
22 | #endif
23 | "c_static_assert\n"
24 | "C_FEATURE:"
25 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 304 && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
26 | "1"
27 | #else
28 | "0"
29 | #endif
30 | "c_variadic_macros\n"
31 | 
32 | };
33 | 
34 | int main(int argc, char** argv) { (void)argv; return features[argc]; }
35 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/feature_tests.cxx:
--------------------------------------------------------------------------------
  1 | 
  2 |   const char features[] = {"\n"
  3 | "CXX_FEATURE:"
  4 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L
  5 | "1"
  6 | #else
  7 | "0"
  8 | #endif
  9 | "cxx_aggregate_default_initializers\n"
 10 | "CXX_FEATURE:"
 11 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
 12 | "1"
 13 | #else
 14 | "0"
 15 | #endif
 16 | "cxx_alias_templates\n"
 17 | "CXX_FEATURE:"
 18 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
 19 | "1"
 20 | #else
 21 | "0"
 22 | #endif
 23 | "cxx_alignas\n"
 24 | "CXX_FEATURE:"
 25 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
 26 | "1"
 27 | #else
 28 | "0"
 29 | #endif
 30 | "cxx_alignof\n"
 31 | "CXX_FEATURE:"
 32 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
 33 | "1"
 34 | #else
 35 | "0"
 36 | #endif
 37 | "cxx_attributes\n"
 38 | "CXX_FEATURE:"
 39 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
 40 | "1"
 41 | #else
 42 | "0"
 43 | #endif
 44 | "cxx_attribute_deprecated\n"
 45 | "CXX_FEATURE:"
 46 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
 47 | "1"
 48 | #else
 49 | "0"
 50 | #endif
 51 | "cxx_auto_type\n"
 52 | "CXX_FEATURE:"
 53 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
 54 | "1"
 55 | #else
 56 | "0"
 57 | #endif
 58 | "cxx_binary_literals\n"
 59 | "CXX_FEATURE:"
 60 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
 61 | "1"
 62 | #else
 63 | "0"
 64 | #endif
 65 | "cxx_constexpr\n"
 66 | "CXX_FEATURE:"
 67 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
 68 | "1"
 69 | #else
 70 | "0"
 71 | #endif
 72 | "cxx_contextual_conversions\n"
 73 | "CXX_FEATURE:"
 74 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
 75 | "1"
 76 | #else
 77 | "0"
 78 | #endif
 79 | "cxx_decltype\n"
 80 | "CXX_FEATURE:"
 81 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
 82 | "1"
 83 | #else
 84 | "0"
 85 | #endif
 86 | "cxx_decltype_auto\n"
 87 | "CXX_FEATURE:"
 88 | #if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L
 89 | "1"
 90 | #else
 91 | "0"
 92 | #endif
 93 | "cxx_decltype_incomplete_return_types\n"
 94 | "CXX_FEATURE:"
 95 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
 96 | "1"
 97 | #else
 98 | "0"
 99 | #endif
100 | "cxx_default_function_template_args\n"
101 | "CXX_FEATURE:"
102 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
103 | "1"
104 | #else
105 | "0"
106 | #endif
107 | "cxx_defaulted_functions\n"
108 | "CXX_FEATURE:"
109 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
110 | "1"
111 | #else
112 | "0"
113 | #endif
114 | "cxx_defaulted_move_initializers\n"
115 | "CXX_FEATURE:"
116 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
117 | "1"
118 | #else
119 | "0"
120 | #endif
121 | "cxx_delegating_constructors\n"
122 | "CXX_FEATURE:"
123 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
124 | "1"
125 | #else
126 | "0"
127 | #endif
128 | "cxx_deleted_functions\n"
129 | "CXX_FEATURE:"
130 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
131 | "1"
132 | #else
133 | "0"
134 | #endif
135 | "cxx_digit_separators\n"
136 | "CXX_FEATURE:"
137 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
138 | "1"
139 | #else
140 | "0"
141 | #endif
142 | "cxx_enum_forward_declarations\n"
143 | "CXX_FEATURE:"
144 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
145 | "1"
146 | #else
147 | "0"
148 | #endif
149 | "cxx_explicit_conversions\n"
150 | "CXX_FEATURE:"
151 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
152 | "1"
153 | #else
154 | "0"
155 | #endif
156 | "cxx_extended_friend_declarations\n"
157 | "CXX_FEATURE:"
158 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
159 | "1"
160 | #else
161 | "0"
162 | #endif
163 | "cxx_extern_templates\n"
164 | "CXX_FEATURE:"
165 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
166 | "1"
167 | #else
168 | "0"
169 | #endif
170 | "cxx_final\n"
171 | "CXX_FEATURE:"
172 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
173 | "1"
174 | #else
175 | "0"
176 | #endif
177 | "cxx_func_identifier\n"
178 | "CXX_FEATURE:"
179 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
180 | "1"
181 | #else
182 | "0"
183 | #endif
184 | "cxx_generalized_initializers\n"
185 | "CXX_FEATURE:"
186 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
187 | "1"
188 | #else
189 | "0"
190 | #endif
191 | "cxx_generic_lambdas\n"
192 | "CXX_FEATURE:"
193 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
194 | "1"
195 | #else
196 | "0"
197 | #endif
198 | "cxx_inheriting_constructors\n"
199 | "CXX_FEATURE:"
200 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
201 | "1"
202 | #else
203 | "0"
204 | #endif
205 | "cxx_inline_namespaces\n"
206 | "CXX_FEATURE:"
207 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
208 | "1"
209 | #else
210 | "0"
211 | #endif
212 | "cxx_lambdas\n"
213 | "CXX_FEATURE:"
214 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
215 | "1"
216 | #else
217 | "0"
218 | #endif
219 | "cxx_lambda_init_captures\n"
220 | "CXX_FEATURE:"
221 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
222 | "1"
223 | #else
224 | "0"
225 | #endif
226 | "cxx_local_type_template_args\n"
227 | "CXX_FEATURE:"
228 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
229 | "1"
230 | #else
231 | "0"
232 | #endif
233 | "cxx_long_long_type\n"
234 | "CXX_FEATURE:"
235 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
236 | "1"
237 | #else
238 | "0"
239 | #endif
240 | "cxx_noexcept\n"
241 | "CXX_FEATURE:"
242 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
243 | "1"
244 | #else
245 | "0"
246 | #endif
247 | "cxx_nonstatic_member_init\n"
248 | "CXX_FEATURE:"
249 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
250 | "1"
251 | #else
252 | "0"
253 | #endif
254 | "cxx_nullptr\n"
255 | "CXX_FEATURE:"
256 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
257 | "1"
258 | #else
259 | "0"
260 | #endif
261 | "cxx_override\n"
262 | "CXX_FEATURE:"
263 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
264 | "1"
265 | #else
266 | "0"
267 | #endif
268 | "cxx_range_for\n"
269 | "CXX_FEATURE:"
270 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 405 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
271 | "1"
272 | #else
273 | "0"
274 | #endif
275 | "cxx_raw_string_literals\n"
276 | "CXX_FEATURE:"
277 | #if ((__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) >= 40801) && __cplusplus >= 201103L
278 | "1"
279 | #else
280 | "0"
281 | #endif
282 | "cxx_reference_qualified_functions\n"
283 | "CXX_FEATURE:"
284 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L
285 | "1"
286 | #else
287 | "0"
288 | #endif
289 | "cxx_relaxed_constexpr\n"
290 | "CXX_FEATURE:"
291 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 409 && __cplusplus > 201103L
292 | "1"
293 | #else
294 | "0"
295 | #endif
296 | "cxx_return_type_deduction\n"
297 | "CXX_FEATURE:"
298 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
299 | "1"
300 | #else
301 | "0"
302 | #endif
303 | "cxx_right_angle_brackets\n"
304 | "CXX_FEATURE:"
305 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
306 | "1"
307 | #else
308 | "0"
309 | #endif
310 | "cxx_rvalue_references\n"
311 | "CXX_FEATURE:"
312 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
313 | "1"
314 | #else
315 | "0"
316 | #endif
317 | "cxx_sizeof_member\n"
318 | "CXX_FEATURE:"
319 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
320 | "1"
321 | #else
322 | "0"
323 | #endif
324 | "cxx_static_assert\n"
325 | "CXX_FEATURE:"
326 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
327 | "1"
328 | #else
329 | "0"
330 | #endif
331 | "cxx_strong_enums\n"
332 | "CXX_FEATURE:"
333 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && __cplusplus
334 | "1"
335 | #else
336 | "0"
337 | #endif
338 | "cxx_template_template_parameters\n"
339 | "CXX_FEATURE:"
340 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 408 && __cplusplus >= 201103L
341 | "1"
342 | #else
343 | "0"
344 | #endif
345 | "cxx_thread_local\n"
346 | "CXX_FEATURE:"
347 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
348 | "1"
349 | #else
350 | "0"
351 | #endif
352 | "cxx_trailing_return_types\n"
353 | "CXX_FEATURE:"
354 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
355 | "1"
356 | #else
357 | "0"
358 | #endif
359 | "cxx_unicode_literals\n"
360 | "CXX_FEATURE:"
361 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
362 | "1"
363 | #else
364 | "0"
365 | #endif
366 | "cxx_uniform_initialization\n"
367 | "CXX_FEATURE:"
368 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 406 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
369 | "1"
370 | #else
371 | "0"
372 | #endif
373 | "cxx_unrestricted_unions\n"
374 | "CXX_FEATURE:"
375 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 407 && __cplusplus >= 201103L
376 | "1"
377 | #else
378 | "0"
379 | #endif
380 | "cxx_user_literals\n"
381 | "CXX_FEATURE:"
382 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 500 && __cplusplus >= 201402L
383 | "1"
384 | #else
385 | "0"
386 | #endif
387 | "cxx_variable_templates\n"
388 | "CXX_FEATURE:"
389 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
390 | "1"
391 | #else
392 | "0"
393 | #endif
394 | "cxx_variadic_macros\n"
395 | "CXX_FEATURE:"
396 | #if (__GNUC__ * 100 + __GNUC_MINOR__) >= 404 && (__cplusplus >= 201103L || (defined(__GXX_EXPERIMENTAL_CXX0X__) && __GXX_EXPERIMENTAL_CXX0X__))
397 | "1"
398 | #else
399 | "0"
400 | #endif
401 | "cxx_variadic_templates\n"
402 | 
403 | };
404 | 
405 | int main(int argc, char** argv) { (void)argv; return features[argc]; }
406 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/CMakeFiles/progress.marks:
--------------------------------------------------------------------------------
1 | 3
2 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/Makefile:
--------------------------------------------------------------------------------
  1 | # CMAKE generated file: DO NOT EDIT!
  2 | # Generated by "Unix Makefiles" Generator, CMake Version 3.10
  3 | 
  4 | # Default target executed when no arguments are given to make.
  5 | default_target: all
  6 | 
  7 | .PHONY : default_target
  8 | 
  9 | # Allow only one "make -f Makefile2" at a time, but pass parallelism.
 10 | .NOTPARALLEL:
 11 | 
 12 | 
 13 | #=============================================================================
 14 | # Special targets provided by cmake.
 15 | 
 16 | # Disable implicit rules so canonical targets will work.
 17 | .SUFFIXES:
 18 | 
 19 | 
 20 | # Remove some rules from gmake that .SUFFIXES does not remove.
 21 | SUFFIXES =
 22 | 
 23 | .SUFFIXES: .hpux_make_needs_suffix_list
 24 | 
 25 | 
 26 | # Suppress display of executed commands.
 27 | $(VERBOSE).SILENT:
 28 | 
 29 | 
 30 | # A target that is always out of date.
 31 | cmake_force:
 32 | 
 33 | .PHONY : cmake_force
 34 | 
 35 | #=============================================================================
 36 | # Set environment variables for the build.
 37 | 
 38 | # The shell in which to execute make rules.
 39 | SHELL = /bin/sh
 40 | 
 41 | # The CMake executable.
 42 | CMAKE_COMMAND = /usr/bin/cmake
 43 | 
 44 | # The command to remove a file.
 45 | RM = /usr/bin/cmake -E remove -f
 46 | 
 47 | # Escaping for special characters.
 48 | EQUALS = =
 49 | 
 50 | # The top-level source directory on which CMake was run.
 51 | CMAKE_SOURCE_DIR = /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module
 52 | 
 53 | # The top-level build directory on which CMake was run.
 54 | CMAKE_BINARY_DIR = /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild
 55 | 
 56 | #=============================================================================
 57 | # Targets provided globally by CMake.
 58 | 
 59 | # Special rule for the target rebuild_cache
 60 | rebuild_cache:
 61 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake to regenerate build system..."
 62 | 	/usr/bin/cmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 63 | .PHONY : rebuild_cache
 64 | 
 65 | # Special rule for the target rebuild_cache
 66 | rebuild_cache/fast: rebuild_cache
 67 | 
 68 | .PHONY : rebuild_cache/fast
 69 | 
 70 | # Special rule for the target edit_cache
 71 | edit_cache:
 72 | 	@$(CMAKE_COMMAND) -E cmake_echo_color --switch=$(COLOR) --cyan "Running CMake cache editor..."
 73 | 	/usr/bin/ccmake -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR)
 74 | .PHONY : edit_cache
 75 | 
 76 | # Special rule for the target edit_cache
 77 | edit_cache/fast: edit_cache
 78 | 
 79 | .PHONY : edit_cache/fast
 80 | 
 81 | # The main all target
 82 | all: cmake_check_build_system
 83 | 	$(CMAKE_COMMAND) -E cmake_progress_start /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles/progress.marks
 84 | 	$(MAKE) -f CMakeFiles/Makefile2 all
 85 | 	$(CMAKE_COMMAND) -E cmake_progress_start /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/CMakeFiles 0
 86 | .PHONY : all
 87 | 
 88 | # The main clean target
 89 | clean:
 90 | 	$(MAKE) -f CMakeFiles/Makefile2 clean
 91 | .PHONY : clean
 92 | 
 93 | # The main clean target
 94 | clean/fast: clean
 95 | 
 96 | .PHONY : clean/fast
 97 | 
 98 | # Prepare targets for installation.
 99 | preinstall: all
100 | 	$(MAKE) -f CMakeFiles/Makefile2 preinstall
101 | .PHONY : preinstall
102 | 
103 | # Prepare targets for installation.
104 | preinstall/fast:
105 | 	$(MAKE) -f CMakeFiles/Makefile2 preinstall
106 | .PHONY : preinstall/fast
107 | 
108 | # clear depends
109 | depend:
110 | 	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 1
111 | .PHONY : depend
112 | 
113 | #=============================================================================
114 | # Target rules for targets named SA
115 | 
116 | # Build rule for target.
117 | SA: cmake_check_build_system
118 | 	$(MAKE) -f CMakeFiles/Makefile2 SA
119 | .PHONY : SA
120 | 
121 | # fast build rule for target.
122 | SA/fast:
123 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/build
124 | .PHONY : SA/fast
125 | 
126 | sa_ext.o: sa_ext.cpp.o
127 | 
128 | .PHONY : sa_ext.o
129 | 
130 | # target to build an object file
131 | sa_ext.cpp.o:
132 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/sa_ext.cpp.o
133 | .PHONY : sa_ext.cpp.o
134 | 
135 | sa_ext.i: sa_ext.cpp.i
136 | 
137 | .PHONY : sa_ext.i
138 | 
139 | # target to preprocess a source file
140 | sa_ext.cpp.i:
141 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/sa_ext.cpp.i
142 | .PHONY : sa_ext.cpp.i
143 | 
144 | sa_ext.s: sa_ext.cpp.s
145 | 
146 | .PHONY : sa_ext.s
147 | 
148 | # target to generate assembly for a file
149 | sa_ext.cpp.s:
150 | 	$(MAKE) -f CMakeFiles/SA.dir/build.make CMakeFiles/SA.dir/sa_ext.cpp.s
151 | .PHONY : sa_ext.cpp.s
152 | 
153 | # Help Target
154 | help:
155 | 	@echo "The following are some of the valid targets for this Makefile:"
156 | 	@echo "... all (the default if no target is provided)"
157 | 	@echo "... clean"
158 | 	@echo "... depend"
159 | 	@echo "... rebuild_cache"
160 | 	@echo "... edit_cache"
161 | 	@echo "... SA"
162 | 	@echo "... sa_ext.o"
163 | 	@echo "... sa_ext.i"
164 | 	@echo "... sa_ext.s"
165 | .PHONY : help
166 | 
167 | 
168 | 
169 | #=============================================================================
170 | # Special targets to cleanup operation of make.
171 | 
172 | # Special rule to run CMake to check the build system integrity.
173 | # No rule that depends on this can have commands that come from listfiles
174 | # because they might be regenerated.
175 | cmake_check_build_system:
176 | 	$(CMAKE_COMMAND) -H$(CMAKE_SOURCE_DIR) -B$(CMAKE_BINARY_DIR) --check-build-system CMakeFiles/Makefile.cmake 0
177 | .PHONY : cmake_check_build_system
178 | 
179 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/SA:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/PCSA_Module/cmakebuild/SA


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/cmake_install.cmake:
--------------------------------------------------------------------------------
 1 | # Install script for directory: /home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module
 2 | 
 3 | # Set the install prefix
 4 | if(NOT DEFINED CMAKE_INSTALL_PREFIX)
 5 |   set(CMAKE_INSTALL_PREFIX "/usr/local")
 6 | endif()
 7 | string(REGEX REPLACE "/$" "" CMAKE_INSTALL_PREFIX "${CMAKE_INSTALL_PREFIX}")
 8 | 
 9 | # Set the install configuration name.
10 | if(NOT DEFINED CMAKE_INSTALL_CONFIG_NAME)
11 |   if(BUILD_TYPE)
12 |     string(REGEX REPLACE "^[^A-Za-z0-9_]+" ""
13 |            CMAKE_INSTALL_CONFIG_NAME "${BUILD_TYPE}")
14 |   else()
15 |     set(CMAKE_INSTALL_CONFIG_NAME "")
16 |   endif()
17 |   message(STATUS "Install configuration: \"${CMAKE_INSTALL_CONFIG_NAME}\"")
18 | endif()
19 | 
20 | # Set the component getting installed.
21 | if(NOT CMAKE_INSTALL_COMPONENT)
22 |   if(COMPONENT)
23 |     message(STATUS "Install component: \"${COMPONENT}\"")
24 |     set(CMAKE_INSTALL_COMPONENT "${COMPONENT}")
25 |   else()
26 |     set(CMAKE_INSTALL_COMPONENT)
27 |   endif()
28 | endif()
29 | 
30 | # Install shared libraries without execute permission?
31 | if(NOT DEFINED CMAKE_INSTALL_SO_NO_EXE)
32 |   set(CMAKE_INSTALL_SO_NO_EXE "1")
33 | endif()
34 | 
35 | # Is this installation the result of a crosscompile?
36 | if(NOT DEFINED CMAKE_CROSSCOMPILING)
37 |   set(CMAKE_CROSSCOMPILING "FALSE")
38 | endif()
39 | 
40 | if(CMAKE_INSTALL_COMPONENT)
41 |   set(CMAKE_INSTALL_MANIFEST "install_manifest_${CMAKE_INSTALL_COMPONENT}.txt")
42 | else()
43 |   set(CMAKE_INSTALL_MANIFEST "install_manifest.txt")
44 | endif()
45 | 
46 | string(REPLACE ";" "\n" CMAKE_INSTALL_MANIFEST_CONTENT
47 |        "${CMAKE_INSTALL_MANIFEST_FILES}")
48 | file(WRITE "/home/guyuchao/ssd/PycharmProjects/videofastsal/Models/plocaltnonlocal/SelfAttention_Module/cmakebuild/${CMAKE_INSTALL_MANIFEST}"
49 |      "${CMAKE_INSTALL_MANIFEST_CONTENT}")
50 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/detect_cuda_compute_capabilities.cpp:
--------------------------------------------------------------------------------
 1 | #include <cuda_runtime.h>
 2 | #include <cstdio>
 3 | int main()
 4 | {
 5 |   int count = 0;
 6 |   if (cudaSuccess != cudaGetDeviceCount(&count)) return -1;
 7 |   if (count == 0) return -1;
 8 |   for (int device = 0; device < count; ++device)
 9 |   {
10 |     cudaDeviceProp prop;
11 |     if (cudaSuccess == cudaGetDeviceProperties(&prop, device))
12 |       std::printf("%d.%d ", prop.major, prop.minor);
13 |   }
14 |   return 0;
15 | }
16 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/cmakebuild/detect_cuda_version.cc:
--------------------------------------------------------------------------------
1 | #include <cuda.h>
2 | #include <cstdio>
3 | int main() {
4 |   printf("%d.%d", CUDA_VERSION / 1000, (CUDA_VERSION / 10) % 100);
5 |   return 0;
6 | }
7 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/reference.cpp:
--------------------------------------------------------------------------------
  1 | #include<iostream>
  2 | //#include<torch/torch.h>
  3 | #include <torch/extension.h>
  4 | #include<vector>
  5 | 
  6 | //串行比对
  7 | void sa_weight_forward_Ref(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation){
  8 | 	int diameter=2*radius+1;
  9 | 
 10 | 	for(int batch=0;batch<B;batch++){
 11 | 		for(int time=0;time<T;time++){
 12 | 			for(int h=0;h<H;h++){
 13 | 				for(int w=0;w<W;w++){
 14 | 					//batch time h w
 15 | 					for(int cal_time=0;cal_time<T;cal_time++){
 16 | 						for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
 17 | 							for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
 18 | 								//reference_position batch cal_time h+dh w+dw
 19 | 								//float sum=0.0;
 20 | 								weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]=0;
 21 | 								if(dh+h<H&&dw+w<W&&dh+h>=0&&dw+w>=0){
 22 | 									for(int c=0;c<C;c++){
 23 | 										//sum+=query[batch][time][c][h][w]*key[batch][cal_time][c][h+dh][w+dw];
 24 | 										weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]+=query[batch][time][c][h][w]*key[batch][cal_time][c][h+dh][w+dw];
 25 | 									}
 26 | 								}
 27 | 								//weight[batch][time][cal_time*9+(dh+1)*3+(dw+1)][h][w]=sum;
 28 | 							}
 29 | 						}
 30 | 					}
 31 | 				}
 32 | 			}
 33 | 		}
 34 | 	}
 35 | }
 36 | 
 37 | void sa_weight_backward_query_Ref(const torch::Tensor& dweight,const torch::Tensor& query,
 38 | 		const torch::Tensor& key,torch::Tensor& dquery,int B,int T,int C,int H,int W,int radius,int dilation){
 39 | 	int diameter=2*radius+1;
 40 | 
 41 | 	for(int batch=0;batch<B;batch++){
 42 | 		for(int time=0;time<T;time++){
 43 | 			for(int h=0;h<H;h++){
 44 | 				for(int w=0;w<W;w++){
 45 | 					//batch time h w
 46 | 					for(int c=0;c<C;c++){
 47 | 						//double sum=0.0;
 48 | 						dquery[batch][time][c][h][w]=0;
 49 | 						//batch time c h w 的梯度来自于45个位置
 50 | 						for(int cal_time=0;cal_time<T;cal_time++){
 51 | 							for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
 52 | 								for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
 53 | 									if(h+dh>=0&&h+dh<H&&w+dw>=0&&w+dw<W){
 54 | 										//sum+=key[batch][cal_time][c][h+dh][w+dw]*dweight[batch][time][cal_time*9+(dh+1)*3+(dw+1)][h][w];
 55 | 										dquery[batch][time][c][h][w]+=key[batch][cal_time][c][h+dh][w+dw]*dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
 56 | 									}
 57 | 								}
 58 | 							}
 59 | 						}
 60 | 						//dquery[batch][time][c][h][w]=sum;
 61 | 					}
 62 | 				}
 63 | 			}
 64 | 		}
 65 | 	}
 66 | }
 67 | 
 68 | void sa_weight_backward_key_Ref(const torch::Tensor& dweight,const torch::Tensor& query,
 69 | 		const torch::Tensor& key,torch::Tensor& dkey,int B,int T,int C,int H,int W,int radius,int dilation){
 70 | 	int diameter=2*radius+1;
 71 | 
 72 | 	for(int batch=0;batch<B;batch++){
 73 | 		for(int time=0;time<T;time++){
 74 | 			for(int h=0;h<H;h++){
 75 | 				for(int w=0;w<W;w++){
 76 | 					//batch time h w
 77 | 					for(int c=0;c<C;c++){
 78 | 						//d_key的梯度累加
 79 | 						for(int cal_time=0;cal_time<T;cal_time++){
 80 | 							for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
 81 | 								for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
 82 | 									if(h+dh>=0&&h+dh<H&&w+dw>=0&&w+dw<W){
 83 | 										dkey[batch][cal_time][c][h+dh][w+dw]+=query[batch][time][c][h][w]*dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
 84 | 									}
 85 | 								}
 86 | 							}
 87 | 						}
 88 | 					}
 89 | 				}
 90 | 			}
 91 | 		}
 92 | 	}
 93 | }
 94 | 
 95 | void sa_map_forward_Ref(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W,int radius,int dilation){
 96 | 	int diameter=2*radius+1;
 97 | 
 98 | 	for(int batch=0;batch<B;batch++){
 99 | 		for(int time=0;time<T;time++){
100 | 			for(int h=0;h<H;h++){
101 | 				for(int w=0;w<W;w++){
102 | 					//batch time h w
103 | 					for(int c=0;c<C;c++){
104 | 						//float sum=0.0;
105 | 						out[batch][time][c][h][w]=0;
106 | 						//batch time c h w
107 | 						for(int cal_time=0;cal_time<T;cal_time++){
108 | 							for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
109 | 								for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
110 | 									if(h+dh>=0&&h+dh<H&&w+dw>=0&&w+dw<W){
111 | 										//sum+=proj[batch][cal_time][c][h+dh][w+dw]*weight[batch][time][cal_time*9+(dh+1)*3+(dw+1)][h][w];
112 | 										out[batch][time][c][h][w]+=proj[batch][cal_time][c][h+dh][w+dw]*weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
113 | 									}
114 | 								}
115 | 							}
116 | 						}
117 | 						//out[batch][time][c][h][w]=sum;
118 | 					}
119 | 				}
120 | 			}
121 | 		}
122 | 	}
123 | }
124 | 
125 | void sa_map_backward_weight_Ref(const torch::Tensor& dout,const torch::Tensor& weight,
126 | 		const torch::Tensor& proj,torch::Tensor& dweight,int B,int T,int C,int H,int W,int radius,int dilation){
127 | 	int diameter=2*radius+1;
128 | 
129 | 	for(int batch=0;batch<B;batch++){
130 | 		for(int time=0;time<T;time++){
131 | 			for(int h=0;h<H;h++){
132 | 				for(int w=0;w<W;w++){
133 | 					//batch time h w
134 | 					for(int cal_time=0;cal_time<T;cal_time++){
135 | 						for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
136 | 							for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
137 | 								//reference_position batch cal_time h+dh w+dw
138 | 								//float sum=0.0;
139 | 								dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]=0;
140 | 								if(dh+h<H&&dw+w<W&&dh+h>=0&&dw+w>=0){
141 | 									for(int c=0;c<C;c++){
142 | 										//sum+=dout[batch][time][c][h][w]*proj[batch][cal_time][c][h+dh][w+dw];
143 | 										dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]+=dout[batch][time][c][h][w]*proj[batch][cal_time][c][h+dh][w+dw];
144 | 									}
145 | 								}
146 | 								//dweight[batch][time][cal_time*9+(dh+1)*3+(dw+1)][h][w]=sum;
147 | 							}
148 | 						}
149 | 					}
150 | 				}
151 | 			}
152 | 		}
153 | 	}
154 | }
155 | 
156 | void sa_map_backward_proj_Ref(const torch::Tensor& dout,const torch::Tensor& weight,
157 | 		const torch::Tensor& proj,torch::Tensor& dproj,int B,int T,int C,int H,int W,int radius,int dilation){
158 | 	int diameter=2*radius+1;
159 | 
160 | 	for(int batch=0;batch<B;batch++){
161 | 		for(int time=0;time<T;time++){
162 | 			for(int h=0;h<H;h++){
163 | 				for(int w=0;w<W;w++){
164 | 					//batch time h w
165 | 					for(int c=0;c<C;c++){
166 | 						//d_key的梯度累加
167 | 						for(int cal_time=0;cal_time<T;cal_time++){
168 | 							for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
169 | 								for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
170 | 									if(h+dh>=0&&h+dh<H&&w+dw>=0&&w+dw<W){
171 | 										dproj[batch][cal_time][c][h+dh][w+dw]+=dout[batch][time][c][h][w]*weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
172 | 									}
173 | 								}
174 | 							}
175 | 						}
176 | 					}
177 | 				}
178 | 			}
179 | 		}
180 | 	}
181 | }
182 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/reference.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * reference.h
 3 |  *
 4 |  *  Created on: 2019年8月8日
 5 |  *      Author: guyuchao
 6 |  */
 7 | 
 8 | #ifndef REFERENCE_H_
 9 | #define REFERENCE_H_
10 | 
11 | void sa_weight_forward_Ref(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation);
12 | 
13 | void sa_weight_backward_query_Ref(const torch::Tensor& dweight,const torch::Tensor& query,
14 | 		const torch::Tensor& key,torch::Tensor& dquery,int B,int T,int C,int H,int W,int radius,int dilation);
15 | 
16 | void sa_weight_backward_key_Ref(const torch::Tensor& dweight,const torch::Tensor& query,
17 | 		const torch::Tensor& key,torch::Tensor& dkey,int B,int T,int C,int H,int W,int radius,int dilaiton);
18 | 
19 | void sa_map_forward_Ref(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W,int radius,int dilation);
20 | 
21 | void sa_map_backward_weight_Ref(const torch::Tensor& dout,const torch::Tensor& weight,
22 | 		const torch::Tensor& proj,torch::Tensor& dweight,int B,int T,int C,int H,int W,int radius,int dilation);
23 | 
24 | void sa_map_backward_proj_Ref(const torch::Tensor& dout,const torch::Tensor& weight,
25 | 		const torch::Tensor& proj,torch::Tensor& dproj,int B,int T,int C,int H,int W,int radius,int dilation);
26 | 
27 | #endif /* REFERENCE_H_ */
28 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/sa.cu:
--------------------------------------------------------------------------------
  1 | //#include <torch/torch.h>
  2 | #include <torch/extension.h>
  3 | #include <cuda_runtime.h>
  4 | #include <iostream>
  5 | #include <stdio.h>
  6 | 
  7 | #define TensorAccessor5D torch::PackedTensorAccessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>
  8 | /*
  9 | #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
 10 | #else
 11 |   static __inline__ __device__ double atomicAdd(double *address, double val) {
 12 |     unsigned long long int* address_as_ull = (unsigned long long int*)address;
 13 |     unsigned long long int old = *address_as_ull, assumed;
 14 |     if (val==0.0)
 15 |       return __longlong_as_double(old);
 16 |     do {
 17 |       assumed = old;
 18 |       old = atomicCAS(address_as_ull, assumed, __double_as_longlong(val +__longlong_as_double(assumed)));
 19 |     } while (assumed != old);
 20 |     return __longlong_as_double(old);
 21 |   }
 22 | #endif
 23 | */
 24 | template <typename scalar_t>
 25 | __global__
 26 | void sa_weight_forward_kernel(
 27 | 	const TensorAccessor5D query,
 28 | 	const TensorAccessor5D key,
 29 | 	TensorAccessor5D weight,int B,int T,int C,int H,int W,int radius,int dilation){
 30 | 	int w = blockIdx.x * blockDim.x + threadIdx.x;//col
 31 | 	int h = blockIdx.y * blockDim.y + threadIdx.y;//row
 32 | 	int time = blockIdx.z;//time
 33 | 	int diameter=2*radius+1;
 34 | 
 35 | 	//query B*T*C*H*W
 36 | 	//key B*T*C*H*W
 37 | 	//weight B*T*9T*H*W
 38 | 	if(w<W&&h<H&&time<T){
 39 | 		for(int batch=0;batch<B;++batch){
 40 | 			for(int cal_time=0;cal_time<T;++cal_time){
 41 | 				for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
 42 | 					for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
 43 | 						scalar_t sum=0.0;
 44 | 						if(h+dh<H&&h+dh>=0&&w+dw<W&&w+dw>=0){
 45 | 							for(int c=0;c<C;++c){
 46 | 								scalar_t q=query[batch][time][c][h][w];
 47 | 								scalar_t k=key[batch][cal_time][c][h+dh][w+dw];
 48 | 								sum+=q*k;
 49 | 							}
 50 | 						}
 51 | 						weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]=sum;
 52 | 					}
 53 | 				}
 54 | 			}
 55 | 		}
 56 | 	}
 57 | }
 58 | 
 59 | template <typename scalar_t>
 60 | __global__
 61 | void sa_map_forward_kernel(
 62 | 	const TensorAccessor5D weight,
 63 | 	const TensorAccessor5D proj,
 64 | 	TensorAccessor5D out,int B,int T,int C,int H,int W,int radius,int dilation){
 65 | 	int w = blockIdx.x * blockDim.x + threadIdx.x;//col
 66 | 	int h = blockIdx.y * blockDim.y + threadIdx.y;//row
 67 | 	int time = blockIdx.z;//time
 68 | 	int diameter=2*radius+1;
 69 | 
 70 | 	//weight B*T*9T*H*W
 71 | 	//proj B*T*C*H*W
 72 | 	//out B*T*C*H*W
 73 | 	if(w<W&&h<H&&time<T){
 74 | 		for(int batch=0;batch<B;++batch){
 75 | 			for(int c=0;c<C;++c){
 76 | 				scalar_t sum=0.0;
 77 | 				for(int cal_time=0;cal_time<T;++cal_time){
 78 | 					for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
 79 | 						for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
 80 | 							if(h+dh<H&&h+dh>=0&&w+dw<W&&w+dw>=0){
 81 | 								scalar_t weight_temp=weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
 82 | 								scalar_t proj_value=proj[batch][cal_time][c][h+dh][w+dw];
 83 | 								sum+=weight_temp*proj_value;
 84 | 							}
 85 | 						}
 86 | 					}
 87 | 				}
 88 | 				out[batch][time][c][h][w]=sum;
 89 | 			}
 90 | 		}
 91 | 	}
 92 | }
 93 | 
 94 | template <typename scalar_t>
 95 | __global__
 96 | void sa_weight_backward_kernel_query(
 97 | 	const TensorAccessor5D dweight,
 98 | 	const TensorAccessor5D key,
 99 | 	TensorAccessor5D dquery,int B,int T,int C,int H,int W,int radius,int dilation){
100 | 	int w = blockIdx.x * blockDim.x + threadIdx.x;//col
101 | 	int h = blockIdx.y * blockDim.y + threadIdx.y;//row
102 | 	int time = blockIdx.z;//time
103 | 	int diameter=2*radius+1;
104 | 
105 | 	//weight B*T*9T*H*W
106 | 	//proj B*T*C*H*W
107 | 	//out B*T*C*H*W
108 | 	if(w<W&&h<H&&time<T){
109 | 		for(int batch=0;batch<B;++batch){
110 | 			for(int c=0;c<C;++c){
111 | 				scalar_t sum=0.0;
112 | 				for(int cal_time=0;cal_time<T;++cal_time){
113 | 					for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
114 | 						for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
115 | 							if(h+dh<H&&h+dh>=0&&w+dw<W&&w+dw>=0){
116 | 								scalar_t _dweight=dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
117 | 								scalar_t _key=key[batch][cal_time][c][h+dh][w+dw];
118 | 								sum+=_dweight*_key;
119 | 							}
120 | 						}
121 | 					}
122 | 				}
123 | 				dquery[batch][time][c][h][w]=sum;
124 | 			}
125 | 		}
126 | 	}
127 | }
128 | 
129 | template <typename scalar_t>
130 | __global__
131 | void sa_weight_backward_kernel_key(
132 | 	const TensorAccessor5D dweight,
133 | 	const TensorAccessor5D query,
134 | 	TensorAccessor5D dkey,int B,int T,int C,int H,int W,int radius,int dilation){
135 | 	int w = blockIdx.x * blockDim.x + threadIdx.x;//col
136 | 	int h = blockIdx.y * blockDim.y + threadIdx.y;//row
137 | 	int time = blockIdx.z;//time
138 | 	int diameter=2*radius+1;
139 | 
140 | 	//weight B*T*9T*H*W
141 | 	//proj B*T*C*H*W
142 | 	//out B*T*C*H*W
143 | 	if(w<W&&h<H&&time<T){
144 | 		for(int batch=0;batch<B;++batch){
145 | 			for(int c=0;c<C;++c){
146 | 				for(int cal_time=0;cal_time<T;++cal_time){
147 | 					for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
148 | 						for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
149 | 							if(h+dh<H&&h+dh>=0&&w+dw<W&&w+dw>=0){
150 | 								scalar_t _dweight=dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
151 | 								scalar_t _query=query[batch][time][c][h][w];
152 | 								atomicAdd(&dkey[batch][cal_time][c][h+dh][w+dw],_dweight*_query);
153 | 							}
154 | 						}
155 | 					}
156 | 				}
157 | 			}
158 | 		}
159 | 	}
160 | }
161 | 
162 | template <typename scalar_t>
163 | __global__
164 | void sa_map_backward_kernel_weight(
165 | 	const TensorAccessor5D dout,
166 | 	const TensorAccessor5D proj,
167 | 	TensorAccessor5D dweight,int B,int T,int C,int H,int W,int radius,int dilation){
168 | 	int w = blockIdx.x * blockDim.x + threadIdx.x;//col
169 | 	int h = blockIdx.y * blockDim.y + threadIdx.y;//row
170 | 	int time = blockIdx.z;//time
171 | 	int diameter=2*radius+1;
172 | 
173 | 	//weight B*T*9T*H*W
174 | 	//proj B*T*C*H*W
175 | 	//out B*T*C*H*W
176 | 	if(w<W&&h<H&&time<T){
177 | 		for(int batch=0;batch<B;++batch){
178 | 			for(int cal_time=0;cal_time<T;++cal_time){
179 | 				for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
180 | 					for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
181 | 						scalar_t sum=0.0;
182 | 						for(int c=0;c<C;++c){
183 | 							if(h+dh<H&&h+dh>=0&&w+dw<W&&w+dw>=0){
184 | 								scalar_t _proj=proj[batch][cal_time][c][h+dh][w+dw];
185 | 								scalar_t _dout=dout[batch][time][c][h][w];
186 | 								sum+=_dout*_proj;
187 | 							}
188 | 						}
189 | 						dweight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w]=sum;
190 | 					}
191 | 				}
192 | 			}
193 | 		}
194 | 	}
195 | }
196 | 
197 | template <typename scalar_t>
198 | __global__
199 | void sa_map_backward_kernel_proj(
200 | 	const TensorAccessor5D dout,
201 | 	const TensorAccessor5D weight,
202 | 	TensorAccessor5D dproj,int B,int T,int C,int H,int W,int radius,int dilation){
203 | 	int w = blockIdx.x * blockDim.x + threadIdx.x;//col
204 | 	int h = blockIdx.y * blockDim.y + threadIdx.y;//row
205 | 	int time = blockIdx.z;//time
206 | 	int diameter=2*radius+1;
207 | 	//weight B*T*9T*H*W
208 | 	//proj B*T*C*H*W
209 | 	//out B*T*C*H*W
210 | 	if(w<W&&h<H&&time<T){
211 | 		for(int batch=0;batch<B;++batch){
212 | 			for(int c=0;c<C;++c){
213 | 				for(int cal_time=0;cal_time<T;++cal_time){
214 | 					for(int dh=-radius*dilation;dh<=radius*dilation;dh+=dilation){
215 | 						for(int dw=-radius*dilation;dw<=radius*dilation;dw+=dilation){
216 | 							if(h+dh<H&&h+dh>=0&&w+dw<W&&w+dw>=0){
217 | 								scalar_t weight_temp=weight[batch][time][cal_time*diameter*diameter+(dh/dilation+radius)*(2*radius+1)+(dw/dilation+radius)][h][w];
218 | 								scalar_t _dout=dout[batch][time][c][h][w];
219 | 								atomicAdd(&dproj[batch][cal_time][c][h+dh][w+dw],_dout*weight_temp);
220 | 							}
221 | 						}
222 | 					}
223 | 				}
224 | 			}
225 | 		}
226 | 	}
227 | }
228 | 
229 | void _sa_weight_forward_cuda(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation){
230 | 	dim3 threads(16,16);
231 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
232 | 
233 | 	AT_DISPATCH_FLOATING_TYPES(weight.scalar_type(), "sa_weight_forward_cuda", ([&] {
234 | 		sa_weight_forward_kernel<scalar_t><<<blocks, threads>>>(
235 | 			query.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
236 | 			key.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
237 | 			weight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),B,T,C,H,W,radius,dilation);
238 | 	  }));
239 | }
240 | 
241 | void _sa_map_forward_cuda(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W,int radius,int dilation){
242 | 	dim3 threads(16,16);
243 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
244 | 	AT_DISPATCH_FLOATING_TYPES(weight.scalar_type(), "sa_map_forward_cuda", ([&] {
245 | 		sa_map_forward_kernel<scalar_t><<<blocks, threads>>>(
246 | 		weight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
247 | 		proj.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
248 | 		out.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),B,T,C,H,W,radius,dilation);
249 | 	}));
250 | }
251 | 
252 | void _sa_weight_backward_cuda(const torch::Tensor& dw,const torch::Tensor& query,
253 | 		const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,
254 | 		int B,int T,int C,int H,int W,int radius,int dilation){
255 | 	dim3 threads(16,16);
256 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
257 | 	AT_DISPATCH_FLOATING_TYPES(dw.scalar_type(), "sa_weight_backward_cuda", ([&] {
258 | 		const TensorAccessor5D dw_acc=dw.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
259 | 		const TensorAccessor5D query_acc=query.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
260 | 		const TensorAccessor5D key_acc=key.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
261 | 		TensorAccessor5D dquery_acc=dquery.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
262 | 		TensorAccessor5D dkey_acc=dkey.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
263 | 		sa_weight_backward_kernel_query<scalar_t><<<blocks, threads>>>(dw_acc,key_acc,dquery_acc,B,T,C,H,W,radius,dilation);
264 | 		sa_weight_backward_kernel_key<scalar_t><<<blocks, threads>>>(dw_acc,query_acc,dkey_acc,B,T,C,H,W,radius,dilation);
265 | 	}));
266 | }
267 | 
268 | void _sa_map_backward_cuda(const torch::Tensor& dout, const torch::Tensor& weight,
269 | 		const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,
270 | 		int B,int T,int C,int H,int W,int radius,int dilation){
271 | 	dim3 threads(16,16);
272 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
273 | 
274 | 	AT_DISPATCH_FLOATING_TYPES(dout.scalar_type(), "sa_map_backward_cuda", ([&] {
275 | 		const TensorAccessor5D dout_acc=dout.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
276 | 		const TensorAccessor5D weight_acc=weight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
277 | 		const TensorAccessor5D proj_acc=proj.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
278 | 		TensorAccessor5D dweight_acc=dweight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
279 | 		TensorAccessor5D dproj_acc=dproj.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
280 | 		sa_map_backward_kernel_weight<scalar_t><<<blocks, threads>>>(dout_acc,proj_acc,dweight_acc,B,T,C,H,W,radius,dilation);
281 | 		sa_map_backward_kernel_proj<scalar_t><<<blocks, threads>>>(dout_acc,weight_acc,dproj_acc,B,T,C,H,W,radius,dilation);
282 | 	}));
283 | }
284 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/sa.cu.bak:
--------------------------------------------------------------------------------
  1 | #include <torch/torch.h>
  2 | //#include <torch/extension.h>
  3 | #include <cuda_runtime.h>
  4 | #include <iostream>
  5 | #include <stdio.h>
  6 | 
  7 | #define TensorAccessor5D torch::PackedTensorAccessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>
  8 | 
  9 | template <typename scalar_t>
 10 | __global__
 11 | void sa_weight_forward_kernel(
 12 | 	const TensorAccessor5D query,
 13 | 	const TensorAccessor5D key,
 14 | 	TensorAccessor5D weight,int B,int T,int C,int H,int W){
 15 | 	int col = blockIdx.x * blockDim.x + threadIdx.x;//col
 16 | 	int row = blockIdx.y * blockDim.y + threadIdx.y;//row
 17 | 	int time = blockIdx.z;//time
 18 | 	//query B*T*C*H*W
 19 | 	//key B*T*C*H*W
 20 | 	//weight B*T*9T*H*W
 21 | 	if(col<W&&row<H&&time<T){
 22 | 		for(int batch=0;batch<B;++batch){
 23 | 			for(int t=0;t<T;++t){
 24 | 				for(int h=-1;h<=1;h++){
 25 | 					for(int w=-1;w<=1;w++){
 26 | 						float sum=0.0;
 27 | 						if(row+h<H&&row+h>=0&&col+w<W&&col+w>=0){
 28 | 							for(int channel=0;channel<C;++channel){
 29 | 								float q=query[batch][time][channel][row][col];
 30 | 								float k=key[batch][t][channel][row+h][col+w];
 31 | 								sum+=q*k;
 32 | 							}
 33 | 						}
 34 | 						weight[batch][time][t*9+(h+1)*3+(w+1)][row][col]=sum;
 35 | 					}
 36 | 				}
 37 | 			}
 38 | 		}
 39 | 	}
 40 | }
 41 | 
 42 | template <typename scalar_t>
 43 | __global__
 44 | void sa_map_forward_kernel(
 45 | 	const TensorAccessor5D weight,
 46 | 	const TensorAccessor5D proj,
 47 | 	TensorAccessor5D out,int B,int T,int C,int H,int W){
 48 | 	int col = blockIdx.x * blockDim.x + threadIdx.x;//col
 49 | 	int row = blockIdx.y * blockDim.y + threadIdx.y;//row
 50 | 	int time = blockIdx.z;//time
 51 | 	//weight B*T*9T*H*W
 52 | 	//proj B*T*C*H*W
 53 | 	//out B*T*C*H*W
 54 | 	if(col<W&&row<H&&time<T){
 55 | 		for(int batch=0;batch<B;++batch){
 56 | 			for(int t=0;t<T;++t){
 57 | 				for(int h=-1;h<=1;h++){
 58 | 					for(int w=-1;w<=1;w++){
 59 | 						if(row+h<H&&row+h>=0&&col+w<W&&col+w>=0){
 60 | 							float weight_temp=weight[batch][time][t*9+(h+1)*3+(w+1)][row][col];
 61 | 							for(int channel=0;channel<C;++channel){
 62 | 								float proj_value=proj[batch][t][channel][row+h][col+w];
 63 | 								out[batch][time][channel][row][col]+=weight_temp*proj_value;
 64 | 							}
 65 | 						}
 66 | 					}
 67 | 				}
 68 | 			}
 69 | 		}
 70 | 	}
 71 | }
 72 | 
 73 | template <typename scalar_t>
 74 | __global__
 75 | void sa_weight_backward_kernel_query(
 76 | 	const TensorAccessor5D dw,
 77 | 	const TensorAccessor5D key,
 78 | 	TensorAccessor5D dquery,int B,int T,int C,int H,int W){
 79 | 	int col = blockIdx.x * blockDim.x + threadIdx.x;//col
 80 | 	int row = blockIdx.y * blockDim.y + threadIdx.y;//row
 81 | 	int time = blockIdx.z;//time
 82 | 	//weight B*T*9T*H*W
 83 | 	//proj B*T*C*H*W
 84 | 	//out B*T*C*H*W
 85 | 	if(col<W&&row<H&&time<T){
 86 | 		for(int batch=0;batch<B;++batch){
 87 | 			for(int t=0;t<T;++t){
 88 | 				for(int h=-1;h<=1;h++){
 89 | 					for(int w=-1;w<=1;w++){
 90 | 						if(row+h<H&&row+h>=0&&col+w<W&&col+w>=0){
 91 | 							float _dw=dw[batch][time][t*9+(h+1)*3+(w+1)][row][col];
 92 | 							for(int channel=0;channel<C;++channel){
 93 | 								float _key=key[batch][t][channel][row+h][col+w];
 94 | 								dquery[batch][time][channel][row][col]+=_dw*_key;
 95 | 							}
 96 | 						}
 97 | 					}
 98 | 				}
 99 | 			}
100 | 		}
101 | 	}
102 | }
103 | 
104 | template <typename scalar_t>
105 | __global__
106 | void sa_weight_backward_kernel_key(
107 | 	const TensorAccessor5D dw,
108 | 	const TensorAccessor5D query,
109 | 	TensorAccessor5D dkey,int B,int T,int C,int H,int W){
110 | 	int col = blockIdx.x * blockDim.x + threadIdx.x;//col
111 | 	int row = blockIdx.y * blockDim.y + threadIdx.y;//row
112 | 	int time = blockIdx.z;//time
113 | 	//weight B*T*9T*H*W
114 | 	//proj B*T*C*H*W
115 | 	//out B*T*C*H*W
116 | 	if(col<W&&row<H&&time<T){
117 | 		for(int batch=0;batch<B;++batch){
118 | 			for(int t=0;t<T;++t){
119 | 				for(int h=-1;h<=1;h++){
120 | 					for(int w=-1;w<=1;w++){
121 | 						if(row+h<H&&row+h>=0&&col+w<W&&col+w>=0){
122 | 							float _dw=dw[batch][time][t*9+(h+1)*3+(w+1)][row][col];
123 | 							for(int channel=0;channel<C;++channel){
124 | 								float _query=query[batch][t][channel][row+h][col+w];
125 | 								dkey[batch][time][channel][row][col]+=_dw*_query;
126 | 							}
127 | 						}
128 | 					}
129 | 				}
130 | 			}
131 | 		}
132 | 	}
133 | }
134 | 
135 | template <typename scalar_t>
136 | __global__
137 | void sa_map_backward_kernel_weight(
138 | 	const TensorAccessor5D dout,
139 | 	const TensorAccessor5D proj,
140 | 	TensorAccessor5D dweight,int B,int T,int C,int H,int W){
141 | 	int col = blockIdx.x * blockDim.x + threadIdx.x;//col
142 | 	int row = blockIdx.y * blockDim.y + threadIdx.y;//row
143 | 	int time = blockIdx.z;//time
144 | 	//weight B*T*9T*H*W
145 | 	//proj B*T*C*H*W
146 | 	//out B*T*C*H*W
147 | 	if(col<W&&row<H&&time<T){
148 | 		for(int batch=0;batch<B;++batch){
149 | 			for(int t=0;t<T;++t){
150 | 				for(int channel=0;channel<C;++channel){
151 | 					float _dout=dout[batch][time][channel][row][col];
152 | 					for(int h=-1;h<=1;h++){
153 | 						for(int w=-1;w<=1;w++){
154 | 							if(row+h<H&&row+h>=0&&col+w<W&&col+w>=0){
155 | 								float _proj=proj[batch][t][channel][row+h][col+w];
156 | 								dweight[batch][time][t*9+(h+1)*3+(w+1)][row][col]+=_dout*_proj;
157 | 							}
158 | 						}
159 | 					}
160 | 				}
161 | 			}
162 | 		}
163 | 	}
164 | }
165 | 
166 | template <typename scalar_t>
167 | __global__
168 | void sa_map_backward_kernel_proj(
169 | 	const TensorAccessor5D dout,
170 | 	const TensorAccessor5D weight,
171 | 	TensorAccessor5D dproj,int B,int T,int C,int H,int W){
172 | 	int col = blockIdx.x * blockDim.x + threadIdx.x;//col
173 | 	int row = blockIdx.y * blockDim.y + threadIdx.y;//row
174 | 	int time = blockIdx.z;//time
175 | 	//weight B*T*9T*H*W
176 | 	//proj B*T*C*H*W
177 | 	//out B*T*C*H*W
178 | 	if(col<W&&row<H&&time<T){
179 | 		for(int batch=0;batch<B;++batch){
180 | 			for(int t=0;t<T;++t){
181 | 				for(int h=-1;h<=1;h++){
182 | 					for(int w=-1;w<=1;w++){
183 | 						if(row+h<H&&row+h>=0&&col+w<W&&col+w>=0){
184 | 							float weight_temp=weight[batch][time][t*9+(h+1)*3+(w+1)][row][col];
185 | 							for(int channel=0;channel<C;++channel){
186 | 								dproj[batch][t][channel][row+h][col+w]+=weight_temp*dout[batch][time][channel][row][col];
187 | 							}
188 | 						}
189 | 					}
190 | 				}
191 | 			}
192 | 		}
193 | 	}
194 | }
195 | 
196 | void _sa_weight_forward_cuda(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W){
197 | 	dim3 threads(32,32);
198 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
199 | 
200 | 	AT_DISPATCH_FLOATING_TYPES(weight.scalar_type(), "sa_weight_forward_cuda", ([&] {
201 | 		sa_weight_forward_kernel<scalar_t><<<blocks, threads>>>(
202 | 			query.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
203 | 			key.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
204 | 			weight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),B,T,C,H,W);
205 | 	  }));
206 | }
207 | 
208 | void _sa_map_forward_cuda(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W){
209 | 	dim3 threads(32,32);
210 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
211 | 	AT_DISPATCH_FLOATING_TYPES(weight.scalar_type(), "sa_weight_forward_cuda", ([&] {
212 | 		sa_map_forward_kernel<scalar_t><<<blocks, threads>>>(
213 | 		weight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
214 | 		proj.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),
215 | 		out.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>(),B,T,C,H,W);
216 | 	}));
217 | }
218 | 
219 | void _sa_weight_backward_cuda(const torch::Tensor& dw,const torch::Tensor& query,
220 | 		const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,
221 | 		int B,int T,int C,int H,int W){
222 | 	dim3 threads(32,32);
223 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
224 | 	AT_DISPATCH_FLOATING_TYPES(dw.scalar_type(), "sa_weight_forward_cuda", ([&] {
225 | 		const TensorAccessor5D dw_acc=dw.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
226 | 		const TensorAccessor5D query_acc=query.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
227 | 		const TensorAccessor5D key_acc=key.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
228 | 		TensorAccessor5D dquery_acc=dquery.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
229 | 		TensorAccessor5D dkey_acc=dkey.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
230 | 		sa_weight_backward_kernel_query<scalar_t><<<blocks, threads>>>(dw_acc,key_acc,dquery_acc,B,T,C,H,W);
231 | 		sa_weight_backward_kernel_key<scalar_t><<<blocks, threads>>>(dw_acc,query_acc,dkey_acc,B,T,C,H,W);
232 | 	}));
233 | }
234 | 
235 | void _sa_map_backward_cuda(const torch::Tensor& dout,const torch::Tensor& weight,
236 | 		const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,
237 | 		int B,int T,int C,int H,int W){
238 | 	dim3 threads(32,32);
239 | 	dim3 blocks((W+threads.x-1)/threads.x,(H+threads.y-1)/threads.y,T);
240 | 	AT_DISPATCH_FLOATING_TYPES(dout.scalar_type(), "sa_weight_forward_cuda", ([&] {
241 | 		const TensorAccessor5D dout_acc=dout.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
242 | 		const TensorAccessor5D weight_acc=weight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
243 | 		const TensorAccessor5D proj_acc=proj.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
244 | 		TensorAccessor5D dweight_acc=dweight.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
245 | 		TensorAccessor5D dproj_acc=dproj.packed_accessor<scalar_t,5,torch::RestrictPtrTraits,int32_t>();
246 | 		sa_map_backward_kernel_weight<scalar_t><<<blocks, threads>>>(dout_acc,proj_acc,dweight_acc,B,T,C,H,W);
247 | 		sa_map_backward_kernel_proj<scalar_t><<<blocks, threads>>>(dout_acc,weight_acc,dproj_acc,B,T,C,H,W);
248 | 	}));
249 | }
250 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/sa_ext.cpp:
--------------------------------------------------------------------------------
  1 | //#include<torch/torch.h>
  2 | #include<torch/extension.h>
  3 | #include"utils.h"
  4 | #include"timer.h"
  5 | #include"reference.h"
  6 | 
  7 | void get_sizes(const torch::Tensor& t,int *B,int *T,int *C,int *H,int *W){
  8 | 	*B=t.size(0);
  9 | 	*T=t.size(1);
 10 | 	*C=t.size(2);
 11 | 	*H=t.size(3);
 12 | 	*W=t.size(4);
 13 | }
 14 | 
 15 | void _sa_weight_forward_cuda(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int B,int T,int C,int H,int W,int radius,int dilation);
 16 | void _sa_map_forward_cuda(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int B,int T,int C,int H,int W,int radius,int dilation);
 17 | void _sa_weight_backward_cuda(const torch::Tensor& dw,const torch::Tensor& query,
 18 | 		const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,
 19 | 		int B,int T,int C,int H,int W,int radius,int dilation);
 20 | void _sa_map_backward_cuda(const torch::Tensor& dout,const torch::Tensor& weight,
 21 | 		const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,
 22 | 		int B,int T,int C,int H,int W,int radius,int dilaiton);
 23 | 
 24 | 
 25 | //forward declarations-------python pass information here
 26 | void sa_weight_forward(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int radius,int dilation){
 27 | 	int B,T,C,H,W;
 28 | 	get_sizes(query,&B,&T,&C,&H,&W);
 29 | 	//GpuTimer timer;
 30 | 	//timer.Start();
 31 | 	_sa_weight_forward_cuda(query,key,weight,B,T,C,H,W,radius,dilation);
 32 | 	//timer.Stop();
 33 | 	//cudaDeviceSynchronize();
 34 | 	checkCudaErrors(cudaGetLastError());
 35 | 	//printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 36 | }
 37 | 
 38 | void sa_map_forward(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int radius,int dilation){
 39 | 	int B,T,C,H,W;
 40 | 	get_sizes(proj,&B,&T,&C,&H,&W);
 41 | 	//GpuTimer timer;
 42 | 	//timer.Start();
 43 | 	_sa_map_forward_cuda(weight,proj,out,B,T,C,H,W,radius,dilation);
 44 | 	//timer.Stop();
 45 | 	//cudaDeviceSynchronize();
 46 | 	checkCudaErrors(cudaGetLastError());
 47 | 	//printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 48 | }
 49 | 
 50 | void sa_weight_backward(const torch::Tensor& dw,const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,int radius,int dilation){
 51 | 	int B,T,C,H,W;
 52 | 	get_sizes(query,&B,&T,&C,&H,&W);
 53 | 	//GpuTimer timer;
 54 | 	//timer.Start();
 55 | 	_sa_weight_backward_cuda(dw,query,key,dquery,dkey,B,T,C,H,W,radius,dilation);
 56 | 	//timer.Stop();
 57 | 	//cudaDeviceSynchronize();
 58 | 	checkCudaErrors(cudaGetLastError());
 59 | 	//printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 60 | }
 61 | 
 62 | void sa_map_backward(const torch::Tensor& dout,const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,int radius,int dilation){
 63 | 	int B,T,C,H,W;
 64 | 	get_sizes(proj,&B,&T,&C,&H,&W);
 65 | 	//GpuTimer timer;
 66 | 	//timer.Start();
 67 | 	_sa_map_backward_cuda(dout,weight,proj,dweight,dproj,B,T,C,H,W,radius,dilation);
 68 | 	//timer.Stop();
 69 | 	//cudaDeviceSynchronize();
 70 | 	checkCudaErrors(cudaGetLastError());
 71 | 	//printf("Your code ran in: %f msecs.\n", timer.Elapsed());
 72 | }
 73 | 
 74 | void sa_weight_forward_ref(const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& weight,int radius,int dilation){
 75 | 	int B,T,C,H,W;
 76 | 	get_sizes(query,&B,&T,&C,&H,&W);
 77 | 	sa_weight_forward_Ref(query,key,weight,B,T,C,H,W,radius,dilation);
 78 | }
 79 | 
 80 | void sa_weight_backward_ref(const torch::Tensor& dw,const torch::Tensor& query,const torch::Tensor& key,torch::Tensor& dquery,torch::Tensor& dkey,int radius,int dilation){
 81 | 	int B,T,C,H,W;
 82 | 	get_sizes(query,&B,&T,&C,&H,&W);
 83 | 	sa_weight_backward_query_Ref(dw,query,key,dquery,B,T,C,H,W,radius,dilation);
 84 | 	sa_weight_backward_key_Ref(dw,query,key,dkey,B,T,C,H,W,radius,dilation);
 85 | }
 86 | 
 87 | void sa_map_forward_ref(const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& out,int radius,int dilation){
 88 | 	int B,T,C,H,W;
 89 | 	get_sizes(proj,&B,&T,&C,&H,&W);
 90 | 	sa_map_forward_Ref(weight,proj,out,B,T,C,H,W,radius,dilation);
 91 | }
 92 | 
 93 | void sa_map_backward_ref(const torch::Tensor& dout,const torch::Tensor& weight,const torch::Tensor& proj,torch::Tensor& dweight,torch::Tensor& dproj,int radius,int dilation){
 94 | 	int B,T,C,H,W;
 95 | 	get_sizes(proj,&B,&T,&C,&H,&W);
 96 | 	sa_map_backward_weight_Ref(dout,weight,proj,dweight,B,T,C,H,W,radius,dilation);
 97 | 	sa_map_backward_proj_Ref(dout,weight,proj,dproj,B,T,C,H,W,radius,dilation);
 98 | }
 99 | 
100 | 
101 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
102 | 	m.def("weight_forward", &sa_weight_forward, "weight forward (CUDA)");
103 | 	m.def("weight_backward", &sa_weight_backward, "weight backward (CUDA)");
104 | 	m.def("map_forward", &sa_map_forward, "map forward (CUDA)");
105 | 	m.def("map_backward", &sa_map_backward, "map backward (CUDA)");
106 | 	m.def("weight_forward_ref", &sa_weight_forward_ref, "weight forward ref (CUDA)");
107 | 	m.def("weight_backward_ref", &sa_weight_backward_ref, "weight backward ref (CUDA)");
108 | 	m.def("map_forward_ref", &sa_map_forward_ref, "map forward ref (CUDA)");
109 | 	m.def("map_backward_ref", &sa_map_backward_ref, "map backward ref (CUDA)");
110 | }
111 | /*
112 | int main() {
113 | 	//torch::Tensor weight=torch::ones({2,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
114 | 	//torch::Tensor query=torch::ones({2,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
115 | 	//torch::Tensor key=torch::ones({2,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
116 | 	//sa_weight_forward(query,key,weight);
117 | 	/*
118 | 	torch::Tensor weight=torch::ones({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
119 | 	torch::Tensor proj=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
120 | 	torch::Tensor out=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
121 | 	sa_map_forward(weight,proj,out);
122 | 
123 | 	/*
124 | 	torch::Tensor dw=torch::ones({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
125 | 	torch::Tensor query=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
126 | 	torch::Tensor key=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
127 | 	torch::Tensor dquery=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
128 | 	torch::Tensor dkey=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
129 | 	sa_weight_backward(dw,query,key,dquery,dkey);
130 | 
131 | 	torch::Tensor dout=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
132 | 
133 | 	torch::Tensor weight=torch::ones({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
134 | 	torch::Tensor proj=torch::ones({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
135 | 	torch::Tensor dweight=torch::zeros({1,5,5*9,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
136 | 	torch::Tensor dproj=torch::zeros({1,5,8,28,42}, at::kFloat).to(torch::Device(torch::kCUDA, 0));
137 | 	sa_map_backward(dout,weight,proj,dweight,dproj);
138 | 	std::cout<<dweight[0][0][0][0][0];
139 | 	return 0;
140 | }*/
141 | 
142 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/self_cuda/function.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import self_cuda_backend as _ext
  3 | import torch.nn as nn
  4 | import torch.autograd as autograd
  5 | import torch.cuda.comm as comm
  6 | import torch.nn.functional as F
  7 | from torch.autograd.function import once_differentiable
  8 | 
  9 | 
 10 | 
 11 | def _check_contiguous(*args):
 12 |     if not all([mod is None or mod.is_contiguous() for mod in args]):
 13 |         raise ValueError("Non-contiguous input")
 14 | 
 15 | 
 16 | class SA_Weight(autograd.Function):
 17 |     @staticmethod
 18 |     def forward(ctx, query, key):
 19 |         # Save context
 20 |         b, t, c, h, w = query.shape
 21 |         size = (b, t, 9*t, h, w)
 22 |         weight = torch.zeros(size, dtype=query.dtype, layout=query.layout, device=query.device)
 23 | 
 24 |         _ext.weight_forward(query, key, weight)
 25 | 
 26 |         # Output
 27 |         ctx.save_for_backward(query, key)
 28 | 
 29 |         return weight
 30 | 
 31 |     @staticmethod
 32 |     @once_differentiable
 33 |     def backward(ctx, dw):
 34 |         query, key = ctx.saved_tensors
 35 | 
 36 |         dquery = torch.zeros_like(query)
 37 |         dkey = torch.zeros_like(key)
 38 | 
 39 |         _ext.weight_backward(dw.contiguous(), query, key, dquery, dkey)
 40 | 
 41 |         _check_contiguous(dquery, dkey)
 42 | 
 43 |         return dquery, dkey
 44 | 
 45 | 
 46 | class SA_Map(autograd.Function):
 47 |     @staticmethod
 48 |     def forward(ctx, weight, proj):
 49 |         # Save context
 50 |         out = torch.zeros_like(proj)
 51 |         _ext.map_forward(weight, proj, out)
 52 | 
 53 |         # Output
 54 |         ctx.save_for_backward(weight, proj)
 55 | 
 56 |         return out
 57 | 
 58 |     @staticmethod
 59 |     @once_differentiable
 60 |     def backward(ctx, dout):
 61 |         weight, proj = ctx.saved_tensors
 62 | 
 63 |         dweight = torch.zeros_like(weight)
 64 |         dproj = torch.zeros_like(proj)
 65 | 
 66 |         _ext.map_backward(dout.contiguous(), weight, g, dweight, dproj)
 67 | 
 68 |         _check_contiguous(dweight, dproj)
 69 | 
 70 |         return dweight, dproj
 71 | 
 72 | 
 73 | sa_weight = SA_Weight.apply
 74 | sa_map = SA_Map.apply
 75 | 
 76 | class T_Moduel(nn.Module):
 77 |     def __init__(self,in_dim):
 78 |         super(T_Moduel, self).__init__()
 79 |         self.channel_in=in_dim
 80 |         self.query_conv=nn.Conv3d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1,bias=False)
 81 |         self.key_conv = nn.Conv3d(in_channels=in_dim, out_channels=in_dim//8, kernel_size=1,bias=False)
 82 |         self.proj_conv = nn.Conv3d(in_channels=in_dim, out_channels=in_dim, kernel_size=1,bias=False)
 83 |         self.gamma = nn.Parameter(torch.zeros(1))
 84 | 
 85 |     def forward(self, x):
 86 |         x=x.permute(0,2,1,3,4).contiguous()#b c t h w
 87 |         query=self.query_conv(x).permute(0,2,1,3,4).contiguous()
 88 |         key=self.key_conv(x).permute(0,2,1,3,4).contiguous()
 89 |         proj=self.proj_conv(x).permute(0,2,1,3,4).contiguous()
 90 |         energy=sa_weight(query,key)
 91 |         attention=F.softmax(energy,2)
 92 |         out=sa_map(attention,proj)
 93 |         x = x.permute(0, 2, 1, 3, 4).contiguous()
 94 |         out=self.gamma*out+x
 95 |         return out
 96 | 
 97 | __all__ = ["T_Moduel", "sa_weight", "sa_map"]
 98 | 
 99 | if __name__=="__main__":
100 |     x=torch.zeros(2,5,32,28,42).cuda()
101 |     t_module=T_Moduel(32).cuda()
102 |     print(t_module(x).shape)


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/timer.h:
--------------------------------------------------------------------------------
 1 | #ifndef GPU_TIMER_H__
 2 | #define GPU_TIMER_H__
 3 | 
 4 | #include <cuda_runtime.h>
 5 | 
 6 | struct GpuTimer
 7 | {
 8 |   cudaEvent_t start;
 9 |   cudaEvent_t stop;
10 | 
11 |   GpuTimer()
12 |   {
13 |     cudaEventCreate(&start);
14 |     cudaEventCreate(&stop);
15 |   }
16 | 
17 |   ~GpuTimer()
18 |   {
19 |     cudaEventDestroy(start);
20 |     cudaEventDestroy(stop);
21 |   }
22 | 
23 |   void Start()
24 |   {
25 |     cudaEventRecord(start, 0);
26 |   }
27 | 
28 |   void Stop()
29 |   {
30 |     cudaEventRecord(stop, 0);
31 |   }
32 | 
33 |   float Elapsed()
34 |   {
35 |     float elapsed;
36 |     cudaEventSynchronize(stop);
37 |     cudaEventElapsedTime(&elapsed, start, stop);
38 |     return elapsed;
39 |   }
40 | };
41 | 
42 | #endif  /* GPU_TIMER_H__ */
43 | 


--------------------------------------------------------------------------------
/Models/PCSA/PCSA_Module/utils.h:
--------------------------------------------------------------------------------
 1 | #ifndef UTILS_H__
 2 | #define UTILS_H__
 3 | 
 4 | #include <iostream>
 5 | #include <iomanip>
 6 | #include <cuda.h>
 7 | #include <cuda_runtime.h>
 8 | #include <cuda_runtime_api.h>
 9 | #include <cassert>
10 | #include <cmath>
11 | 
12 | #define checkCudaErrors(val) check( (val), #val, __FILE__, __LINE__)
13 | 
14 | template<typename T>
15 | void check(T err, const char* const func, const char* const file, const int line) {
16 |   if (err != cudaSuccess) {
17 |     std::cerr << "CUDA error at: " << file << ":" << line << std::endl;
18 |     std::cerr << cudaGetErrorString(err) << " " << func << std::endl;
19 |     exit(1);
20 |   }
21 | }
22 | 
23 | template<typename T>
24 | void checkResultsExact(const T* const ref, const T* const gpu, size_t numElem) {
25 |   //check that the GPU result matches the CPU result
26 |   for (size_t i = 0; i < numElem; ++i) {
27 |     if (ref[i] != gpu[i]) {
28 |       std::cerr << "Difference at pos " << i << std::endl;
29 |       //the + is magic to convert char to int without messing
30 |       //with other types
31 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i] <<
32 |                  "\nGPU      : " << +gpu[i] << std::endl;
33 |       exit(1);
34 |     }
35 |   }
36 | }
37 | 
38 | template<typename T>
39 | void checkResultsEps(const T* const ref, const T* const gpu, size_t numElem, double eps1, double eps2) {
40 |   assert(eps1 >= 0 && eps2 >= 0);
41 |   unsigned long long totalDiff = 0;
42 |   unsigned numSmallDifferences = 0;
43 |   for (size_t i = 0; i < numElem; ++i) {
44 |     //subtract smaller from larger in case of unsigned types
45 |     T smaller = std::min(ref[i], gpu[i]);
46 |     T larger = std::max(ref[i], gpu[i]);
47 |     T diff = larger - smaller;
48 |     if (diff > 0 && diff <= eps1) {
49 |       numSmallDifferences++;
50 |     }
51 |     else if (diff > eps1) {
52 |       std::cerr << "Difference at pos " << +i << " exceeds tolerance of " << eps1 << std::endl;
53 |       std::cerr << "Reference: " << std::setprecision(17) << +ref[i] <<
54 |         "\nGPU      : " << +gpu[i] << std::endl;
55 |       exit(1);
56 |     }
57 |     totalDiff += diff * diff;
58 |   }
59 |   double percentSmallDifferences = (double)numSmallDifferences / (double)numElem;
60 |   if (percentSmallDifferences > eps2) {
61 |     std::cerr << "Total percentage of non-zero pixel difference between the two images exceeds " << 100.0 * eps2 << "%" << std::endl;
62 |     std::cerr << "Percentage of non-zero pixel differences: " << 100.0 * percentSmallDifferences << "%" << std::endl;
63 |     exit(1);
64 |   }
65 | }
66 | 
67 | //Uses the autodesk method of image comparison
68 | //Note the the tolerance here is in PIXELS not a percentage of input pixels
69 | template<typename T>
70 | void checkResultsAutodesk(const T* const ref, const T* const gpu, size_t numElem, double variance, size_t tolerance)
71 | {
72 | 
73 |   size_t numBadPixels = 0;
74 |   for (size_t i = 0; i < numElem; ++i) {
75 |     T smaller = std::min(ref[i], gpu[i]);
76 |     T larger = std::max(ref[i], gpu[i]);
77 |     T diff = larger - smaller;
78 |     if (diff > variance)
79 |       ++numBadPixels;
80 |   }
81 | 
82 |   if (numBadPixels > tolerance) {
83 |     std::cerr << "Too many bad pixels in the image." << numBadPixels << "/" << tolerance << std::endl;
84 |     exit(1);
85 |   }
86 | }
87 | 
88 | #endif
89 | 


--------------------------------------------------------------------------------
/Models/PCSA/build/lib.linux-x86_64-3.6/self_cuda_backend.cpython-36m-x86_64-linux-gnu.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/build/lib.linux-x86_64-3.6/self_cuda_backend.cpython-36m-x86_64-linux-gnu.so


--------------------------------------------------------------------------------
/Models/PCSA/build/temp.linux-x86_64-3.6/SelfAttention_Module/reference.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/build/temp.linux-x86_64-3.6/SelfAttention_Module/reference.o


--------------------------------------------------------------------------------
/Models/PCSA/build/temp.linux-x86_64-3.6/SelfAttention_Module/sa.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/build/temp.linux-x86_64-3.6/SelfAttention_Module/sa.o


--------------------------------------------------------------------------------
/Models/PCSA/build/temp.linux-x86_64-3.6/SelfAttention_Module/sa_ext.o:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/build/temp.linux-x86_64-3.6/SelfAttention_Module/sa_ext.o


--------------------------------------------------------------------------------
/Models/PCSA/dist/self_cuda-0.0.0-py3.6-linux-x86_64.egg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/PCSA/dist/self_cuda-0.0.0-py3.6-linux-x86_64.egg


--------------------------------------------------------------------------------
/Models/PCSA/self_cuda.egg-info/PKG-INFO:
--------------------------------------------------------------------------------
 1 | Metadata-Version: 1.0
 2 | Name: self-cuda
 3 | Version: 0.0.0
 4 | Summary: UNKNOWN
 5 | Home-page: UNKNOWN
 6 | Author: UNKNOWN
 7 | Author-email: UNKNOWN
 8 | License: UNKNOWN
 9 | Description-Content-Type: UNKNOWN
10 | Description: UNKNOWN
11 | Platform: UNKNOWN
12 | 


--------------------------------------------------------------------------------
/Models/PCSA/self_cuda.egg-info/SOURCES.txt:
--------------------------------------------------------------------------------
1 | setup.py
2 | SelfAttention_Module/reference.cpp
3 | SelfAttention_Module/sa.cu
4 | SelfAttention_Module/sa_ext.cpp
5 | self_cuda.egg-info/PKG-INFO
6 | self_cuda.egg-info/SOURCES.txt
7 | self_cuda.egg-info/dependency_links.txt
8 | self_cuda.egg-info/top_level.txt


--------------------------------------------------------------------------------
/Models/PCSA/self_cuda.egg-info/dependency_links.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Models/PCSA/self_cuda.egg-info/top_level.txt:
--------------------------------------------------------------------------------
1 | self_cuda_backend
2 | 


--------------------------------------------------------------------------------
/Models/PCSA/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension
 3 | from os.path import join
 4 | 
 5 | project_root = 'PCSA_Module'
 6 | sources = [join(project_root, file) for file in ['sa_ext.cpp',
 7 |                                                  'sa.cu','reference.cpp']]
 8 | '''
 9 | with open("README.md", "r") as fh:
10 |     long_description = fh.read()
11 | '''
12 | 
13 | nvcc_args = [
14 |     '-gencode', 'arch=compute_61,code=sm_61',
15 |     '-gencode', 'arch=compute_70,code=sm_70',
16 |     '-gencode', 'arch=compute_70,code=compute_70'
17 | ]
18 | cxx_args = ['-std=c++11']
19 | 
20 | setup(
21 |     name='self_cuda',
22 |     #version="0.1.0",
23 |     #author="Clément Pinard",
24 |     #author_email="clement.pinard@ensta-paristech.fr",
25 |     #description="Correlation module for pytorch",
26 |     #long_description=long_description,
27 |     #long_description_content_type="text/markdown",
28 |     #url="https://github.com/ClementPinard/Pytorch-Correlation-extension",
29 |     #install_requires=['torch>=1.0.1','numpy'],
30 |     ext_modules=[
31 |         CUDAExtension('self_cuda_backend',
32 |                       sources, extra_compile_args={'cxx': cxx_args,'nvcc': nvcc_args})
33 |     ],
34 |     #package_dir={'': project_root},
35 |     #packages=['spatial_correlation_sampler'],
36 |     cmdclass={
37 |         'build_ext': BuildExtension
38 |     })
39 | 


--------------------------------------------------------------------------------
/Models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/__init__.py


--------------------------------------------------------------------------------
/Models/lightrfb.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | class h_sigmoid(nn.Module):
  5 |     def __init__(self, inplace=True):
  6 |         super(h_sigmoid, self).__init__()
  7 |         self.relu = nn.ReLU6(inplace=inplace)
  8 | 
  9 |     def forward(self, x):
 10 |         return self.relu(x + 3) / 6
 11 | 
 12 | class SELayer(nn.Module):
 13 |     def __init__(self, channel, reduction=4):
 14 |         super(SELayer, self).__init__()
 15 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 16 |         self.fc = nn.Sequential(
 17 |             nn.Linear(channel, channel // reduction),
 18 |             nn.ReLU(inplace=True),
 19 |             nn.Linear(channel // reduction, channel),
 20 |             h_sigmoid()
 21 |         )
 22 | 
 23 |     def forward(self, x):
 24 |         b, c, _, _ = x.size()
 25 |         y = self.avg_pool(x).view(b, c)
 26 |         y = self.fc(y).view(b, c, 1, 1)
 27 |         return x * y
 28 | 
 29 | class BasicConv(nn.Module):
 30 | 
 31 |     def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
 32 |         super(BasicConv, self).__init__()
 33 |         self.out_channels = out_planes
 34 |         self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
 35 |         self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
 36 |         self.relu = nn.PReLU() if relu else None
 37 |         #self.relu = h_sigmoid() if relu else None
 38 | 
 39 |     def forward(self, x):
 40 |         x = self.conv(x)
 41 |         if self.bn is not None:
 42 |             x = self.bn(x)
 43 |         if self.relu is not None:
 44 |             x = self.relu(x)
 45 |         return x
 46 | 
 47 | class LightRFB(nn.Module):
 48 |     def __init__(self, channels_in=160,channels_mid=128,channels_out=32):
 49 |         super(LightRFB, self).__init__()
 50 |         self.global_se=SELayer(channels_in)
 51 |         self.reduce=nn.Sequential(nn.Conv2d(channels_in,channels_mid,kernel_size=1,bias=False),
 52 |                                   nn.BatchNorm2d(channels_mid),
 53 |                                   nn.PReLU(channels_mid))
 54 |         self.br0 = nn.Sequential(
 55 |             BasicConv(channels_mid, channels_mid, kernel_size=1,bias=False,
 56 |                       bn=True, relu=True),
 57 |             BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=1, padding=1, groups=channels_mid, bias=False,
 58 |                       relu=False),
 59 |         )
 60 |         self.br1 = nn.Sequential(
 61 |             BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=1, padding=1, groups=channels_mid, bias=False,bn=True,relu=False),
 62 |             BasicConv(channels_mid, channels_mid, kernel_size=1, dilation=1, bias=False,bn=True,relu=True),
 63 | 
 64 |             BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=3, padding=3, groups=channels_mid, bias=False,
 65 |                       relu=False),
 66 |         )
 67 |         self.br2 = nn.Sequential(
 68 |             BasicConv(channels_mid, channels_mid, kernel_size=5, dilation=1, padding=2, groups=channels_mid, bias=False,
 69 |                       bn=True, relu=False),
 70 |             BasicConv(channels_mid, channels_mid, kernel_size=1, dilation=1, bias=False, bn=True, relu=True),
 71 | 
 72 |             BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=5, padding=5, groups=channels_mid, bias=False,
 73 |                       relu=False),
 74 |         )
 75 |         self.br3 = nn.Sequential(
 76 |             BasicConv(channels_mid, channels_mid, kernel_size=7, dilation=1, padding=3, groups=channels_mid, bias=False,
 77 |                       bn=True, relu=False),
 78 |             BasicConv(channels_mid, channels_mid, kernel_size=1, dilation=1, bias=False, bn=True, relu=True),
 79 | 
 80 |             BasicConv(channels_mid, channels_mid, kernel_size=3, dilation=7, padding=7, groups=channels_mid, bias=False,
 81 |                       relu=False),
 82 |         )
 83 |         self.point_global=BasicConv(channels_mid*4+channels_in,channels_out,kernel_size=1,bias=False,bn=True, relu=True)
 84 | 
 85 |     def forward(self, x):
 86 |         x_reduce=self.reduce(self.global_se(x))
 87 |         x0=self.br0(x_reduce)
 88 |         x1=self.br1(x_reduce)
 89 |         x2=self.br2(x_reduce)
 90 |         x3=self.br3(x_reduce)
 91 |         out=self.point_global(torch.cat([x,x0,x1,x2,x3],dim=1))
 92 |         return out
 93 | 
 94 | 
 95 | 
 96 | 
 97 | if __name__=="__main__":
 98 |     m=GycModule(196,128,32)
 99 |     t=torch.zeros(1,196,14,14)
100 |     print(m(t).shape)
101 | 


--------------------------------------------------------------------------------
/Models/mobilenetv3_pretrain.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import numpy as np
  4 | import math
  5 | import torch.nn.functional as F
  6 | from config import config
  7 | from Models.lightrfb import LightRFB
  8 | __all__ = ['mobilenetv3_large']
  9 | 
 10 | 
 11 | def _make_divisible(v, divisor, min_value=None):
 12 |     """
 13 |     This function is taken from the original tf repo.
 14 |     It ensures that all layers have a channel number that is divisible by 8
 15 |     It can be seen here:
 16 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 17 |     :param v:
 18 |     :param divisor:
 19 |     :param min_value:
 20 |     :return:
 21 |     """
 22 |     if min_value is None:
 23 |         min_value = divisor
 24 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 25 |     # Make sure that round down does not go down by more than 10%.
 26 |     if new_v < 0.9 * v:
 27 |         new_v += divisor
 28 |     return new_v
 29 | 
 30 | 
 31 | class h_sigmoid(nn.Module):
 32 |     def __init__(self, inplace=True):
 33 |         super(h_sigmoid, self).__init__()
 34 |         self.relu = nn.ReLU6(inplace=inplace)
 35 | 
 36 |     def forward(self, x):
 37 |         return self.relu(x + 3) / 6
 38 | 
 39 | 
 40 | class h_swish(nn.Module):
 41 |     def __init__(self, inplace=True):
 42 |         super(h_swish, self).__init__()
 43 |         self.sigmoid = h_sigmoid(inplace=inplace)
 44 | 
 45 |     def forward(self, x):
 46 |         return x * self.sigmoid(x)
 47 | 
 48 | 
 49 | class SELayer(nn.Module):
 50 |     def __init__(self, channel, reduction=4):
 51 |         super(SELayer, self).__init__()
 52 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 53 |         self.fc = nn.Sequential(
 54 |                 nn.Linear(channel, channel // reduction),
 55 |                 nn.ReLU(inplace=True),
 56 |                 nn.Linear(channel // reduction, channel),
 57 |                 h_sigmoid()
 58 |         )
 59 | 
 60 |     def forward(self, x):
 61 |         b, c, _, _ = x.size()
 62 |         y = self.avg_pool(x).view(b, c)
 63 |         y = self.fc(y).view(b, c, 1, 1)
 64 |         return x * y
 65 | 
 66 | 
 67 | def conv_3x3_bn(inp, oup, stride):
 68 |     return nn.Sequential(
 69 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 70 |         nn.BatchNorm2d(oup),
 71 |         h_swish()
 72 |     )
 73 | 
 74 | 
 75 | def conv_1x1_bn(inp, oup):
 76 |     return nn.Sequential(
 77 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 78 |         nn.BatchNorm2d(oup),
 79 |         h_swish()
 80 |     )
 81 | 
 82 | 
 83 | class InvertedResidual(nn.Module):
 84 |     def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
 85 |         super(InvertedResidual, self).__init__()
 86 |         assert stride in [1, 2]
 87 | 
 88 |         self.identity = stride == 1 and inp == oup
 89 | 
 90 |         if inp == hidden_dim:
 91 |             self.conv = nn.Sequential(
 92 |                 # dw
 93 |                 nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
 94 |                 nn.BatchNorm2d(hidden_dim),
 95 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
 96 |                 # Squeeze-and-Excite
 97 |                 SELayer(hidden_dim) if use_se else nn.Sequential(),
 98 |                 # pw-linear
 99 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
100 |                 nn.BatchNorm2d(oup),
101 |             )
102 |         else:
103 |             self.conv = nn.Sequential(
104 |                 # pw
105 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
106 |                 nn.BatchNorm2d(hidden_dim),
107 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
108 |                 # dw
109 |                 nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
110 |                 nn.BatchNorm2d(hidden_dim),
111 |                 # Squeeze-and-Excite
112 |                 SELayer(hidden_dim) if use_se else nn.Sequential(),
113 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
114 |                 # pw-linear
115 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
116 |                 nn.BatchNorm2d(oup),
117 |             )
118 | 
119 |     def forward(self, x):
120 |         if self.identity:
121 |             return x + self.conv(x)
122 |         else:
123 |             return self.conv(x)
124 | 
125 | 
126 | class MobileNetV3(nn.Module):
127 |     def __init__(self, cfgs, mode,width_mult=1.):
128 |         super(MobileNetV3, self).__init__()
129 |         # setting of inverted residual blocks
130 |         self.cfgs = cfgs
131 |         assert mode in ['large', 'small']
132 | 
133 |         # building first layer
134 |         input_channel = _make_divisible(16 * width_mult, 8)
135 |         layers = [conv_3x3_bn(3, input_channel, 2)]
136 |         # building inverted residual blocks
137 |         block = InvertedResidual
138 |         for k, exp_size, c, use_se, use_hs, s in self.cfgs:
139 |             output_channel = _make_divisible(c * width_mult, 8)
140 |             layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
141 |             input_channel = output_channel
142 |         self.features = nn.Sequential(*layers)
143 |         self.load_backbone(
144 |             torch.load(config.backbone_imagenet_pretrain,map_location=torch.device('cpu')))
145 | 
146 |     def forward(self, x):
147 |         x = self.features[:4](x)
148 |         low_level=x
149 |         high_level = self.features[4:](x)
150 |         return low_level,high_level
151 | 
152 |     def load_backbone(self,pretrained_dict):
153 |         # step2: get model state_dict
154 |         model_dict = self.state_dict()
155 |         # step3: remove pretrained_dict params which is not in model_dict
156 |         pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in model_dict)}
157 |         # step4: update model_dict using pretrained_dict
158 |         model_dict.update(pretrained_dict)
159 |         # step5: update model using model_dict
160 |         self.load_state_dict(model_dict)
161 | 
162 |     def _initialize_weights(self):
163 |         for m in self.modules():
164 |             if isinstance(m, nn.Conv2d):
165 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
166 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
167 |                 if m.bias is not None:
168 |                     m.bias.data.zero_()
169 |             elif isinstance(m, nn.BatchNorm2d):
170 |                 m.weight.data.fill_(1)
171 |                 m.bias.data.zero_()
172 |             elif isinstance(m, nn.Linear):
173 |                 n = m.weight.size(1)
174 |                 m.weight.data.normal_(0, 0.01)
175 |                 m.bias.data.zero_()
176 | 
177 | class conbine_feature(nn.Module):
178 |     def __init__(self):
179 |         super(conbine_feature, self).__init__()
180 |         self.up2_high = DilatedParallelConvBlockD2(32, 16)
181 |         self.up2_low = nn.Conv2d(24, 16, 1, stride=1, padding=0,bias=False)
182 |         self.up2_bn2 = nn.BatchNorm2d(16)
183 |         self.up2_act = nn.PReLU(16)
184 |         self.refine=nn.Sequential(nn.Conv2d(16,16,3,padding=1,bias=False),nn.BatchNorm2d(16),nn.PReLU())
185 | 
186 |     def forward(self, low_fea,high_fea):
187 |         high_fea = self.up2_high(high_fea)
188 |         low_fea = self.up2_bn2(self.up2_low(low_fea))
189 |         refine_feature = self.refine(self.up2_act(high_fea+low_fea))
190 |         return refine_feature
191 | 
192 | 
193 | class DilatedParallelConvBlockD2(nn.Module):
194 |     def __init__(self, nIn, nOut, add=False):
195 |         super(DilatedParallelConvBlockD2, self).__init__()
196 |         n = int(np.ceil(nOut / 2.))
197 |         n2 = nOut - n
198 | 
199 |         self.conv0 = nn.Conv2d(nIn, nOut, 1, stride=1, padding=0, dilation=1, bias=False)
200 |         self.conv1 = nn.Conv2d(n, n, 3, stride=1, padding=1, dilation=1, bias=False)
201 |         self.conv2 = nn.Conv2d(n2, n2, 3, stride=1, padding=2, dilation=2, bias=False)
202 | 
203 |         self.bn = nn.BatchNorm2d(nOut)
204 |         #self.act = nn.PReLU(nOut)
205 |         self.add = add
206 | 
207 |     def forward(self, input):
208 |         in0 = self.conv0(input)
209 |         in1, in2 = torch.chunk(in0, 2, dim=1)
210 |         b1 = self.conv1(in1)
211 |         b2 = self.conv2(in2)
212 |         output = torch.cat([b1, b2], dim=1)
213 | 
214 |         if self.add:
215 |             output = input + output
216 |         output = self.bn(output)
217 |         #output = self.act(output)
218 | 
219 |         return output
220 | 
221 | class Fastnet(nn.Module):
222 |     def __init__(self):
223 |         super(Fastnet, self).__init__()
224 |         self.feature_extractor=mobilenetv3_large()
225 |         self.dilation_conv_group=LightRFB()
226 |         self.combine=conbine_feature()
227 |         self.SegNIN = nn.Sequential(nn.Dropout2d(0.1),nn.Conv2d(16, 1, kernel_size=1,bias=False))
228 | 
229 |     def forward(self, x):
230 |         if len(x.shape)==4:
231 |             origin_shape=x.shape
232 |             low_feature,high_feature=self.feature_extractor(x)
233 |             high_feature=self.dilation_conv_group(high_feature)
234 |             high_feature = F.interpolate(high_feature, size=(low_feature.shape[-2], low_feature.shape[-1]),
235 |                                          mode="bilinear",
236 |                                          align_corners=False)
237 |             out = self.combine(low_feature, high_feature)
238 |             out = torch.sigmoid(F.interpolate(self.SegNIN(out), size=(origin_shape[-2], origin_shape[-1]), mode="bilinear",
239 |                                               align_corners=False))
240 |         else:
241 |             origin_shape=x.shape
242 |             x=x.view(-1,*origin_shape[2:])
243 |             low_feature,high_feature=self.feature_extractor(x)
244 |             high_feature=self.dilation_conv_group(high_feature)
245 |             high_feature = F.interpolate(high_feature, size=(low_feature.shape[-2], low_feature.shape[-1]),
246 |                                          mode="bilinear",
247 |                                          align_corners=False)
248 |             out = self.combine(low_feature, high_feature)
249 |             out = torch.sigmoid(
250 |                 F.interpolate(self.SegNIN(out), size=(origin_shape[-2], origin_shape[-1]), mode="bilinear",
251 |                               align_corners=False))
252 |         return out
253 | 
254 | 
255 | def mobilenetv3_large(**kwargs):
256 |     """
257 |     Constructs a MobileNetV3-Large model
258 |     """
259 |     cfgs = [
260 |         # k, t, c, SE, NL, s 
261 |         [3,  16,  16, 0, 0, 1],
262 |         [3,  64,  24, 0, 0, 2],
263 |         [3,  72,  24, 0, 0, 1],
264 |         [5,  72,  40, 1, 0, 1],
265 |         [5, 120,  40, 1, 0, 1],
266 |         [5, 120,  40, 1, 0, 1],
267 |         [3, 240,  80, 0, 1, 2],
268 |         [3, 200,  80, 0, 1, 1],
269 |         [3, 184,  80, 0, 1, 1],
270 |         [3, 184,  80, 0, 1, 1],
271 |         [3, 480, 112, 1, 1, 1],
272 |         [3, 672, 112, 1, 1, 1],
273 |         [5, 672, 160, 1, 1, 1],
274 |         [5, 672, 160, 1, 1, 1],
275 |         [5, 960, 160, 1, 1, 1]
276 |     ]
277 |     return MobileNetV3(cfgs, mode='large', **kwargs)
278 | 
279 | if __name__=="__main__":
280 |     from torchstat import stat
281 |     a=torch.zeros(1,3,224,336)
282 |     mobile=Fastnet()
283 |     mobile(a)
284 |     dsa
285 | 
286 | 
287 |     print(mobile(a)[0].shape)
288 |     total_paramters = sum([np.prod(p.size()) for p in mobile.parameters()])
289 |     print('Total network parameters: ' + str(total_paramters))


--------------------------------------------------------------------------------
/Models/mobilenetv3temporal_PCSA.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import numpy as np
  4 | import math
  5 | import torch.nn.functional as F
  6 | from config import config
  7 | from Models.lightrfb import LightRFB
  8 | from Models.PCSA import T_Moduel
  9 | __all__ = ['mobilenetv3_large']
 10 | 
 11 | 
 12 | def _make_divisible(v, divisor, min_value=None):
 13 |     """
 14 |     This function is taken from the original tf repo.
 15 |     It ensures that all layers have a channel number that is divisible by 8
 16 |     It can be seen here:
 17 |     https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
 18 |     :param v:
 19 |     :param divisor:
 20 |     :param min_value:
 21 |     :return:
 22 |     """
 23 |     if min_value is None:
 24 |         min_value = divisor
 25 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 26 |     # Make sure that round down does not go down by more than 10%.
 27 |     if new_v < 0.9 * v:
 28 |         new_v += divisor
 29 |     return new_v
 30 | 
 31 | 
 32 | class h_sigmoid(nn.Module):
 33 |     def __init__(self, inplace=True):
 34 |         super(h_sigmoid, self).__init__()
 35 |         self.relu = nn.ReLU6(inplace=inplace)
 36 | 
 37 |     def forward(self, x):
 38 |         return self.relu(x + 3) / 6
 39 | 
 40 | 
 41 | class h_swish(nn.Module):
 42 |     def __init__(self, inplace=True):
 43 |         super(h_swish, self).__init__()
 44 |         self.sigmoid = h_sigmoid(inplace=inplace)
 45 | 
 46 |     def forward(self, x):
 47 |         return x * self.sigmoid(x)
 48 | 
 49 | 
 50 | class SELayer(nn.Module):
 51 |     def __init__(self, channel, reduction=4):
 52 |         super(SELayer, self).__init__()
 53 |         self.avg_pool = nn.AdaptiveAvgPool2d(1)
 54 |         self.fc = nn.Sequential(
 55 |                 nn.Linear(channel, channel // reduction),
 56 |                 nn.ReLU(inplace=True),
 57 |                 nn.Linear(channel // reduction, channel),
 58 |                 h_sigmoid()
 59 |         )
 60 | 
 61 |     def forward(self, x):
 62 |         b, c, _, _ = x.size()
 63 |         y = self.avg_pool(x).view(b, c)
 64 |         y = self.fc(y).view(b, c, 1, 1)
 65 |         return x * y
 66 | 
 67 | 
 68 | def conv_3x3_bn(inp, oup, stride):
 69 |     return nn.Sequential(
 70 |         nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 71 |         nn.BatchNorm2d(oup),
 72 |         h_swish()
 73 |     )
 74 | 
 75 | 
 76 | def conv_1x1_bn(inp, oup):
 77 |     return nn.Sequential(
 78 |         nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
 79 |         nn.BatchNorm2d(oup),
 80 |         h_swish()
 81 |     )
 82 | 
 83 | 
 84 | class InvertedResidual(nn.Module):
 85 |     def __init__(self, inp, hidden_dim, oup, kernel_size, stride, use_se, use_hs):
 86 |         super(InvertedResidual, self).__init__()
 87 |         assert stride in [1, 2]
 88 | 
 89 |         self.identity = stride == 1 and inp == oup
 90 | 
 91 |         if inp == hidden_dim:
 92 |             self.conv = nn.Sequential(
 93 |                 # dw
 94 |                 nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
 95 |                 nn.BatchNorm2d(hidden_dim),
 96 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
 97 |                 # Squeeze-and-Excite
 98 |                 SELayer(hidden_dim) if use_se else nn.Sequential(),
 99 |                 # pw-linear
100 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
101 |                 nn.BatchNorm2d(oup),
102 |             )
103 |         else:
104 |             self.conv = nn.Sequential(
105 |                 # pw
106 |                 nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
107 |                 nn.BatchNorm2d(hidden_dim),
108 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
109 |                 # dw
110 |                 nn.Conv2d(hidden_dim, hidden_dim, kernel_size, stride, (kernel_size - 1) // 2, groups=hidden_dim, bias=False),
111 |                 nn.BatchNorm2d(hidden_dim),
112 |                 # Squeeze-and-Excite
113 |                 SELayer(hidden_dim) if use_se else nn.Sequential(),
114 |                 h_swish() if use_hs else nn.ReLU(inplace=True),
115 |                 # pw-linear
116 |                 nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
117 |                 nn.BatchNorm2d(oup),
118 |             )
119 | 
120 |     def forward(self, x):
121 |         if self.identity:
122 |             return x + self.conv(x)
123 |         else:
124 |             return self.conv(x)
125 | 
126 | 
127 | class MobileNetV3(nn.Module):
128 |     def __init__(self, cfgs, mode,width_mult=1.):
129 |         super(MobileNetV3, self).__init__()
130 |         # setting of inverted residual blocks
131 |         self.cfgs = cfgs
132 |         assert mode in ['large', 'small']
133 | 
134 |         # building first layer
135 |         input_channel = _make_divisible(16 * width_mult, 8)
136 |         layers = [conv_3x3_bn(3, input_channel, 2)]
137 |         # building inverted residual blocks
138 |         block = InvertedResidual
139 |         for k, exp_size, c, use_se, use_hs, s in self.cfgs:
140 |             output_channel = _make_divisible(c * width_mult, 8)
141 |             layers.append(block(input_channel, exp_size, output_channel, k, s, use_se, use_hs))
142 |             input_channel = output_channel
143 |         self.features = nn.Sequential(*layers)
144 |         self.load_backbone(
145 |             torch.load(config.backbone_imagenet_pretrain,map_location=torch.device('cpu')))
146 | 
147 |     def forward(self, x):
148 |         x = self.features[:4](x)
149 |         low_level = x
150 |         high_level = self.features[4:](x)
151 |         return low_level, high_level
152 | 
153 |     def load_backbone(self,pretrained_dict):
154 |         # step2: get model state_dict
155 |         model_dict = self.state_dict()
156 |         # step3: remove pretrained_dict params which is not in model_dict
157 |         pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in model_dict)}
158 | 
159 |         # step4: update model_dict using pretrained_dict
160 |         model_dict.update(pretrained_dict)
161 |         # step5: update model using model_dict
162 |         self.load_state_dict(model_dict)
163 | 
164 |     def _initialize_weights(self):
165 |         for m in self.modules():
166 |             if isinstance(m, nn.Conv2d):
167 |                 n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
168 |                 m.weight.data.normal_(0, math.sqrt(2. / n))
169 |                 if m.bias is not None:
170 |                     m.bias.data.zero_()
171 |             elif isinstance(m, nn.BatchNorm2d):
172 |                 m.weight.data.fill_(1)
173 |                 m.bias.data.zero_()
174 |             elif isinstance(m, nn.Linear):
175 |                 n = m.weight.size(1)
176 |                 m.weight.data.normal_(0, 0.01)
177 |                 m.bias.data.zero_()
178 | 
179 | class conbine_feature(nn.Module):
180 |     def __init__(self):
181 |         super(conbine_feature, self).__init__()
182 |         self.up2_high = DilatedParallelConvBlockD2(32, 16)
183 |         self.up2_low = nn.Conv2d(24, 16, 1, stride=1, padding=0,bias=False)
184 |         self.up2_bn2 = nn.BatchNorm2d(16)
185 |         self.up2_act = nn.PReLU(16)
186 |         self.refine=nn.Sequential(nn.Conv2d(16,16,3,padding=1,bias=False),nn.BatchNorm2d(16),nn.PReLU())
187 | 
188 |     def forward(self, low_fea,high_fea):
189 |         high_fea = self.up2_high(high_fea)
190 |         low_fea = self.up2_bn2(self.up2_low(low_fea))
191 |         refine_feature = self.refine(self.up2_act(high_fea+low_fea))
192 |         return refine_feature
193 | 
194 | 
195 | class DilatedParallelConvBlockD2(nn.Module):
196 |     def __init__(self, nIn, nOut, add=False):
197 |         super(DilatedParallelConvBlockD2, self).__init__()
198 |         n = int(np.ceil(nOut / 2.))
199 |         n2 = nOut - n
200 | 
201 |         self.conv0 = nn.Conv2d(nIn, nOut, 1, stride=1, padding=0, dilation=1, bias=False)
202 |         self.conv1 = nn.Conv2d(n, n, 3, stride=1, padding=1, dilation=1, bias=False)
203 |         self.conv2 = nn.Conv2d(n2, n2, 3, stride=1, padding=2, dilation=2, bias=False)
204 | 
205 |         self.bn = nn.BatchNorm2d(nOut)
206 |         #self.act = nn.PReLU(nOut)
207 |         self.add = add
208 | 
209 |     def forward(self, input):
210 |         in0 = self.conv0(input)
211 |         in1, in2 = torch.chunk(in0, 2, dim=1)
212 |         b1 = self.conv1(in1)
213 |         b2 = self.conv2(in2)
214 |         output = torch.cat([b1, b2], dim=1)
215 | 
216 |         if self.add:
217 |             output = input + output
218 |         output = self.bn(output)
219 |         #output = self.act(output)
220 | 
221 |         return output
222 | 
223 | class Fastnet(nn.Module):
224 |     def __init__(self):
225 |         super(Fastnet, self).__init__()
226 |         self.feature_extractor=mobilenetv3_large()
227 |         self.dilation_conv_group=LightRFB()
228 |         self.combine=conbine_feature()
229 |         self.SegNIN = nn.Sequential(nn.Dropout2d(0.1),nn.Conv2d(16, 1, kernel_size=1,bias=False))
230 |         self.temporal_high=T_Moduel(32)
231 | 
232 |     def load_backbone(self,pretrained_dict,logger):
233 |         # step2: get model state_dict
234 |         model_dict = self.state_dict()
235 |         # step3: remove pretrained_dict params which is not in model_dict
236 |         logger.info("load_state_dict!!!")
237 |         for k, v in pretrained_dict.items():
238 |             if (k in model_dict):
239 |                 logger.info("load:%s"%k)
240 |             else:
241 |                 logger.info("jump over:%s"%k)
242 | 
243 |         pretrained_dict = {k: v for k, v in pretrained_dict.items() if (k in model_dict)}
244 | 
245 |         # step4: update model_dict using pretrained_dict
246 |         model_dict.update(pretrained_dict)
247 |         # step5: update model using model_dict
248 |         self.load_state_dict(model_dict)
249 | 
250 |     def freezen_feature_extractor(self,logger):
251 |         logger.info("freeze feature extractor!!!!!!")
252 |         for params in self.feature_extractor.parameters():
253 |             params.requires_grad=False
254 | 
255 |     def forward(self, x):
256 |         if len(x.shape)==4:
257 |             origin_shape=x.shape
258 |             low_feature, high_feature = self.feature_extractor(x)
259 |             high_feature=self.dilation_conv_group(high_feature)
260 |             high_feature = F.interpolate(high_feature, size=(low_feature.shape[-2], low_feature.shape[-1]),
261 |                                          mode="bilinear",
262 |                                          align_corners=False)
263 |             out = self.combine(low_feature, high_feature)
264 |             out = torch.sigmoid(F.interpolate(self.SegNIN(out), size=(origin_shape[-2], origin_shape[-1]), mode="bilinear",
265 |                                               align_corners=False))
266 |         else:
267 |             origin_shape = x.shape
268 |             x = x.view(-1, *origin_shape[2:])
269 |             low_feature, high_feature = self.feature_extractor(x)
270 |             high_feature = self.dilation_conv_group(high_feature)
271 |             high_feature=high_feature.view(*origin_shape[:2],*high_feature.shape[1:])
272 |             high_feature=self.temporal_high(high_feature)
273 |             high_feature = high_feature.view(-1, *high_feature.shape[2:])
274 | 
275 |             high_feature = F.interpolate(high_feature, size=(low_feature.shape[-2], low_feature.shape[-1]),
276 |                                          mode="bilinear",
277 |                                          align_corners=False)
278 | 
279 |             out = self.combine(low_feature, high_feature)
280 |             out = torch.sigmoid(
281 |                 F.interpolate(self.SegNIN(out), size=(origin_shape[-2], origin_shape[-1]), mode="bilinear",
282 |                               align_corners=False))
283 |         return out
284 | 
285 | 
286 | def mobilenetv3_large(**kwargs):
287 |     """
288 |     Constructs a MobileNetV3-Large model
289 |     """
290 |     cfgs = [
291 |         # k, t, c, SE, NL, s 
292 |         [3,  16,  16, 0, 0, 1],
293 |         [3,  64,  24, 0, 0, 2],
294 |         [3,  72,  24, 0, 0, 1],
295 |         [5,  72,  40, 1, 0, 1],
296 |         [5, 120,  40, 1, 0, 1],
297 |         [5, 120,  40, 1, 0, 1],
298 |         [3, 240,  80, 0, 1, 2],
299 |         [3, 200,  80, 0, 1, 1],
300 |         [3, 184,  80, 0, 1, 1],
301 |         [3, 184,  80, 0, 1, 1],
302 |         [3, 480, 112, 1, 1, 1],
303 |         [3, 672, 112, 1, 1, 1],
304 |         [5, 672, 160, 1, 1, 1],
305 |         [5, 672, 160, 1, 1, 1],
306 |         [5, 960, 160, 1, 1, 1]
307 |     ]
308 |     return MobileNetV3(cfgs, mode='large', **kwargs)
309 | 
310 | if __name__=="__main__":
311 |     a=torch.zeros(1,5,3,224,336).cuda()
312 |     mobile=Fastnet().cuda()
313 |     print(mobile(a)[0].shape)
314 | 


--------------------------------------------------------------------------------
/Models/statedict/mobilenetv3-large.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/guyuchao/PyramidCSA/45025dbfb9e95b832be8a82de281eadf9a2c2e5c/Models/statedict/mobilenetv3-large.pth


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PyramidCSA
 2 | 
 3 | Code for "Pyramid Constrained Self-Attention Network for Fast Video Salient Object Detection" (AAAI 2020)
 4 | 
 5 | ## Build
 6 | 
 7 | ```bash
 8 | conda create -n PCSA python=3.6
 9 | conda activate PCSA
10 | conda install pytorch=1.1.0 torchvision -c pytorch
11 | pip install tensorboardX tqdm Pillow==6.2.2
12 | pip install git+https://github.com/pytorch/tnt.git@master
13 | cd Models/PCSA
14 | python setup.py build develop
15 | ```
16 | 
17 | ## Training
18 | 
19 | ### pretrain phase
20 | ```bash
21 | bash pretrain.sh
22 | ```
23 | ### finetune phase
24 | ```bash
25 | bash finetune.sh
26 | ```
27 | 
28 | ## Results
29 | The result saliency map and model can be downloaded [baidu pan](https://pan.baidu.com/s/1bktiBwBUprIpfstK9fDehg) (password t781), or [google drive](https://drive.google.com/drive/folders/1Xe2Eob173M6e6dcapThrOD0rmnuDA2MN?usp=sharing).
30 | 
31 | ## Evaluation
32 | For VSOD, we use the evaluation code provided by [DAVSOD](https://github.com/DengPingFan/DAVSOD).
33 | 
34 | For UVOS, we use the evaluation code provided by [Davis16](https://github.com/fperazzi/davis).
35 | 
36 | ## Speed Evaluation
37 | ```python3
38 | python speed.py
39 | ```
40 | 
41 | ## Cite
42 | If you think this work is helpful, please cite
43 | ```latex
44 | @inproceedings{gu2020PCSA,
45 |  title={Pyramid Constrained Self-Attention Network for Fast Video Salient Object Detection},
46 |  author={Gu, Yuchao and Wang, Lijuan and Wang, Ziqin and Liu, Yun and Cheng, Ming-Ming and Lu, Shao-Ping},
47 |  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
48 |  year={2020},
49 | }
50 | ```
51 | 
52 | ## License
53 | This project is licensed under the [Creative Commons NonCommercial (CC BY-NC 3.0)](https://creativecommons.org/licenses/by-nc/3.0/) license where only
54 | non-commercial usage is allowed. For commercial usage, please contact us.
55 | 
56 | ## Related Project
57 | The feature extraction backbone is borrowed from [d-li14/mobilenetv3.pytorch](https://github.com/d-li14/mobilenetv3.pytorch)
58 | 
59 | ## Contact
60 | Any questions and suggestions, please email [ycgu@mail.nankai.edu.cn](ycgu@mail.nankai.edu.cn).
61 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | conda create -n PCSA python=3.6
2 | conda activate PCSA
3 | conda install pytorch=1.1.0 torchvision -c pytorch
4 | pip install tensorboardX tqdm Pillow==6.2.2
5 | pip install git+https://github.com/pytorch/tnt.git@master
6 | cd Models/PCSA
7 | python setup.py build develop
8 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | parser = argparse.ArgumentParser()
 3 | #optimizer
 4 | parser.add_argument('--lr_mode', type=str, default="poly")
 5 | parser.add_argument('--base_lr', type=float, default=1e-4)
 6 | parser.add_argument('--finetune_lr', type=float, default=1e-6)
 7 | 
 8 | #train schedule
 9 | parser.add_argument('--pretrain_epoches', type=int, default=15)
10 | parser.add_argument('--finetune_epoches', type=int, default=15)
11 | parser.add_argument('--log_inteval', type=int, default=50)
12 | 
13 | 
14 | ##data
15 | parser.add_argument('--data_statistics', type=str, default="Data/['DUTS-TR']_statistics.pth")
16 | parser.add_argument('--img_dataset_list', type=str, default=["DUTS-TR"])
17 | parser.add_argument('--video_dataset_list', type=str, default=["DAVIS","DAVSOD"])
18 | parser.add_argument('--img_dataset_root', type=str,default="/media/data/guyuchao/dataset/fastsaliency")
19 | parser.add_argument('--video_dataset_root', type=str,default="/media/data/guyuchao/dataset/saliency/trainDataset")
20 | parser.add_argument('--size', type=tuple,default=(256,448))
21 | parser.add_argument('--pretrain_batchsize', type=int, default=24)
22 | parser.add_argument('--video_batchsize', type=int, default=12)
23 | parser.add_argument('--video_time_clips', type=int, default=5)
24 | parser.add_argument('--video_testset_root', type=str,default="/media/data/guyuchao/dataset/saliency/testDataset")
25 | parser.add_argument('--parallel', type=bool, default=True)
26 | parser.add_argument('--device_idxs', type=list, default=[0,1,2,3])
27 | parser.add_argument('--local_rank', type=int,default=0)
28 | 
29 | #pretrain
30 | parser.add_argument('--pretrain_state_dict', type=str, default="/media/data/guyuchao/project/released/videofastsal/checkpoints/tensorboard/pretrain_baseline/epoch_15_batch_0/checkpoint.pth")
31 | parser.add_argument('--backbone_imagenet_pretrain', type=str, default="Models/statedict/mobilenetv3-large.pth")
32 | 
33 | config = parser.parse_args()
34 | 


--------------------------------------------------------------------------------
/finetune.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | source /home/ycgu/anaconda3/bin/activate pt10py36
4 | 
5 | GPU_IDS=0,1,2,3
6 | NUM_THREADS=2
7 | CUDA_VISIBLE_DEVICES=${GPU_IDS} OMP_NUM_THREADS=${NUM_THREADS} python -m torch.distributed.launch --nproc_per_node=4 --master_port=1234 finetune_temporal_distribute.py
8 | 


--------------------------------------------------------------------------------
/finetune_temporal_distribute.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import SGD,Adam
  2 | import torch
  3 | from Data.dataloader import get_video_dataset
  4 | from analysis.evaluate import AutoEvaluate
  5 | from analysis.onlinetest import AutoTest
  6 | import torch.nn.functional as F
  7 | from utils.utils import get_Logger_and_SummaryWriter
  8 | import os
  9 | from utils.Distribute.engine import Engine
 10 | from config import config
 11 | from Models.mobilenetv3temporal_PCSA import Fastnet
 12 | import torch.nn as nn
 13 | from utils.SalEval import SalEval
 14 | from torchnet.meter import AverageValueMeter
 15 | import numpy as np
 16 | class CrossEntropyLoss(nn.Module):
 17 |     def __init__(self):
 18 |         super(CrossEntropyLoss, self).__init__()
 19 | 
 20 |     def forward(self, *inputs):
 21 |         pred, target = tuple(inputs)
 22 |         total_loss = F.binary_cross_entropy(pred, target.float())
 23 |         return total_loss
 24 | 
 25 | 
 26 | class TrainSchedule(object):
 27 |     def __init__(self, batches_per_epoch):
 28 |         self.cur_epoch = 0
 29 |         self.total_epoch = config.finetune_epoches
 30 |         self.cur_batches = 0
 31 |         self.batches_per_epoch = batches_per_epoch
 32 | 
 33 |     def update(self):
 34 |         self.cur_batches += 1
 35 |         if not self.cur_batches < self.batches_per_epoch:
 36 |             self.cur_batches = 0
 37 |             self.cur_epoch += 1
 38 | 
 39 |     def state_dict(self):
 40 |         state_dict = {"cur_batches": self.cur_batches,
 41 |                       "cur_epoch": self.cur_epoch,
 42 |                       "total_epoch": self.total_epoch,
 43 |                       "batches_per_epoch": self.batches_per_epoch}
 44 |         return state_dict
 45 | 
 46 |     def load_state_dict(self, state_dict):
 47 |         self.cur_batches = state_dict["cur_batches"]
 48 |         self.cur_epoch = state_dict["cur_epoch"]
 49 |         self.total_epoch = state_dict["total_epoch"]
 50 |         self.batches_per_epoch = state_dict["batches_per_epoch"]
 51 | 
 52 | 
 53 | class Train(object):
 54 | 
 55 |     def __init__(self):
 56 |         self.logger, self.writer, self.tag_dir = get_Logger_and_SummaryWriter()
 57 |         self.engine=Engine(self.logger)
 58 |         self.device = torch.device("cuda")
 59 | 
 60 |         self.network=Fastnet()
 61 |         self.load_backbone(self.logger)
 62 |         self.network=self.network.cuda()
 63 |         self.network = self.engine.data_parallel(self.network)
 64 | 
 65 |         self.criterion = CrossEntropyLoss().to(self.device)
 66 | 
 67 |         base_params = [params for name, params in self.network.named_parameters() if ("temporal_high" in name)]
 68 |         finetune_params = [params for name, params in self.network.named_parameters() if ("temporal_high" not in name)]
 69 | 
 70 |         self.optim = Adam([
 71 |             {'params': base_params, 'lr': config.base_lr,'weight_decay':1e-4, 'name': "base_params"},
 72 |             {'params': finetune_params, 'lr': config.finetune_lr,'weight_decay':1e-4,  'name': 'finetune_params'}])
 73 | 
 74 |         self.train_dataset, self.train_multiscale_dataset, statistics = get_video_dataset()
 75 | 
 76 |         self.train_multiscale_loader = []
 77 |         self.train_multiscale_smapler = []
 78 |         for dst in self.train_multiscale_dataset:
 79 |             ld, sp = self.engine.get_train_loader(dst,config.video_batchsize)
 80 |             self.train_multiscale_loader.append(ld)
 81 |             self.train_multiscale_smapler.append(sp)
 82 |         self.train_loader,self.train_sampler=self.engine.get_train_loader(self.train_dataset,config.video_batchsize)
 83 | 
 84 |         batches_per_epoch = 0
 85 |         batches_per_epoch += len(self.train_loader)
 86 |         for loader in self.train_multiscale_loader:
 87 |             batches_per_epoch += len(loader)
 88 | 
 89 |         self.sche = TrainSchedule(batches_per_epoch)
 90 |         if self.engine.local_rank==0:
 91 |             self.logger.info(config)
 92 |             self.logger.info(self.network)
 93 |             total_paramters = sum([np.prod(p.size()) for p in self.network.parameters()])
 94 |             self.logger.info('Total network parameters: ' + str(total_paramters))
 95 | 
 96 |     def save_checkpoint(self):
 97 |         os.makedirs(os.path.join(self.tag_dir, "epoch_%d_batch_%d" % (
 98 |             self.sche.cur_epoch, self.sche.cur_batches)), exist_ok=True)
 99 |         save_root = os.path.join(self.tag_dir, "epoch_%d_batch_%d" % (
100 |             self.sche.cur_epoch, self.sche.cur_batches))
101 |         torch.save(self.state_dict(), os.path.join(save_root, "checkpoint.pth"))
102 | 
103 |     def adjust_learning_rate(self):
104 |         if config.lr_mode == 'poly':
105 |             cur_iter = self.sche.batches_per_epoch * self.sche.cur_epoch + self.sche.cur_batches
106 |             max_iter = self.sche.batches_per_epoch * self.sche.total_epoch
107 |             base_lr = config.base_lr * (1 - cur_iter * 1.0 / max_iter) ** 0.9
108 |             finetune_lr = config.finetune_lr * (1 - cur_iter * 1.0 / max_iter) ** 0.9
109 | 
110 |         for param_group in self.optim.param_groups:
111 |             if param_group["name"] == "base_params":
112 |                 param_group['lr'] = base_lr
113 |             if param_group["name"] == "finetune_params":
114 |                 param_group['lr'] = finetune_lr
115 | 
116 |         return base_lr, finetune_lr
117 | 
118 |     def train_per_loader(self, trainloader):
119 |         self.network.train()
120 |         loss_meter = AverageValueMeter()
121 |         for idx, (img, label) in enumerate(trainloader):
122 |             baselr,finetunelr = self.adjust_learning_rate()
123 |             img = img.to(self.device)
124 |             label = label.to(self.device)
125 |             if len(label.shape) == 5:
126 |                 label = label.view(-1, *(label.shape[2:]))
127 |             output = self.network(img)
128 |             loss = self.criterion(output, label)
129 |             self.optim.zero_grad()
130 |             loss.backward()
131 |             self.optim.step()
132 |             loss_meter.add(float(loss))
133 | 
134 |             if self.engine.local_rank == 0:
135 |                 if self.sche.cur_batches % config.log_inteval == 0:
136 |                     self.logger.info("%s-epoch:%d/%d batch:%d/%d loss:%.4f base_lr:%e finetune_lr:%e" % (
137 |                         self.tag_dir.split("/")[-1], self.sche.cur_epoch, self.sche.total_epoch, self.sche.cur_batches,
138 |                         self.sche.batches_per_epoch, loss_meter.value()[0], baselr,finetunelr))
139 |             self.sche.update()
140 | 
141 |         return loss_meter.value()[0]
142 | 
143 |     def train_per_epoch(self):
144 |         for idx,loader in enumerate(self.train_multiscale_loader):
145 |             self.train_per_loader(loader)
146 |         loss_train= self.train_per_loader(self.train_loader)
147 |         if self.engine.local_rank == 0:
148 |             self.logger.info("train_img_loss:%.4f" % (loss_train))
149 | 
150 |     def train(self):
151 |         while self.sche.cur_epoch < self.sche.total_epoch:
152 |             self.train_sampler.set_epoch(self.sche.cur_epoch)
153 |             for sp in self.train_multiscale_smapler:
154 |                 sp.set_epoch(self.sche.cur_epoch)
155 |             self.train_per_epoch()
156 |             if self.engine.local_rank == 0:
157 |                 self.save_checkpoint()
158 | 
159 |     def state_dict(self):
160 |         if config.parallel is True:
161 |             state_dict = {"net": self.network.module.state_dict(),
162 |                           'optimizer': self.optim.state_dict(),
163 |                           'sche': self.sche.state_dict()}
164 |         else:
165 |             state_dict = {"net": self.network.state_dict(),
166 |                           'optimizer': self.optim.state_dict(),
167 |                           'sche': self.sche.state_dict()}
168 |         return state_dict
169 | 
170 |     def load_state_dict(self, state_dict):
171 |         if config.parallel is True:
172 |             #self.sche.load_state_dict(state_dict["sche"])
173 |             #self.optim.load_state_dict(state_dict["optimizer"])
174 |             self.network.module.load_state_dict(state_dict["net"])
175 |         else:
176 |             self.sche.load_state_dict(state_dict["sche"])
177 |             self.optim.load_state_dict(state_dict["optimizer"])
178 |             self.network.load_state_dict(state_dict["net"])
179 | 
180 |     def load_backbone(self,logger):
181 |         assert config.pretrain_state_dict is not None,"error"
182 |         self.network.load_backbone(torch.load(config.pretrain_state_dict,map_location=torch.device('cpu'))["net"],logger)
183 | 
184 | if __name__ == "__main__":
185 |     torch.backends.cudnn.benchmark = True
186 |     trainer = Train()
187 |     trainer.train()
188 | 
189 | 


--------------------------------------------------------------------------------
/pretrain.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | source /home/ycgu/anaconda3/bin/activate pt10py36
4 | 
5 | GPU_IDS=0,1,2,3
6 | NUM_THREADS=2
7 | CUDA_VISIBLE_DEVICES=${GPU_IDS} OMP_NUM_THREADS=${NUM_THREADS} python -m torch.distributed.launch --nproc_per_node=4 --master_port=1235 pretrain_distribute.py
8 | 


--------------------------------------------------------------------------------
/pretrain_distribute.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import Adam
  2 | import torch
  3 | from utils.Distribute.engine import Engine
  4 | from Data.dataloader import get_pretrain_dataset
  5 | import torch.nn.functional as F
  6 | from utils.utils import get_Logger_and_SummaryWriter
  7 | import os
  8 | from config import config
  9 | from Models.mobilenetv3_pretrain import Fastnet
 10 | import torch.nn as nn
 11 | from utils.SalEval import SalEval
 12 | from torchnet.meter import AverageValueMeter
 13 | import numpy as np
 14 | import random
 15 | 
 16 | def setup_seed(seed):
 17 |     torch.manual_seed(seed)
 18 |     torch.cuda.manual_seed_all(seed)
 19 |     np.random.seed(seed)
 20 |     random.seed(seed)
 21 |     torch.backends.cudnn.deterministic = True
 22 | 
 23 | class CrossEntropyLoss(nn.Module):
 24 |     def __init__(self):
 25 |         super(CrossEntropyLoss, self).__init__()
 26 | 
 27 |     def forward(self, *inputs):
 28 |         pred,target = tuple(inputs)
 29 |         total_loss = F.binary_cross_entropy(pred, target.float())
 30 |         return total_loss
 31 | 
 32 | class TrainSchedule(object):
 33 |     def __init__(self,batches_per_epoch):
 34 |         self.cur_epoch = 0
 35 |         self.total_epoch=config.pretrain_epoches
 36 |         self.cur_batches = 0
 37 |         self.batches_per_epoch=batches_per_epoch
 38 | 
 39 |     def update(self):
 40 |         self.cur_batches+=1
 41 |         if not self.cur_batches<self.batches_per_epoch:
 42 |             self.cur_batches=0
 43 |             self.cur_epoch+=1
 44 | 
 45 |     def state_dict(self):
 46 |         state_dict={"cur_batches":self.cur_batches,
 47 |                     "cur_epoch":self.cur_epoch,
 48 |                     "total_epoch":self.total_epoch,
 49 |                     "batches_per_epoch":self.batches_per_epoch}
 50 |         return state_dict
 51 | 
 52 |     def load_state_dict(self,state_dict):
 53 |         self.cur_batches=state_dict["cur_batches"]
 54 |         self.cur_epoch=state_dict["cur_epoch"]
 55 |         self.total_epoch=state_dict["total_epoch"]
 56 |         self.batches_per_epoch=state_dict["batches_per_epoch"]
 57 | 
 58 | class Train(object):
 59 | 
 60 |     def __init__(self):
 61 |         self.logger,self.writer,self.tag_dir=get_Logger_and_SummaryWriter()
 62 |         self.engine=Engine(self.logger)
 63 |         self.device = torch.device("cuda")
 64 |         self.network=Fastnet().cuda()
 65 |         self.network = self.engine.data_parallel(self.network)
 66 | 
 67 |         self.criterion=CrossEntropyLoss().to(self.device)
 68 |         self.optim=Adam(self.network.parameters(),lr=config.base_lr)
 69 |         self.train_dataset,self.train_multiscale_dataset,statistics=get_pretrain_dataset()
 70 |         self.train_multiscale_loader=[]
 71 |         self.train_multiscale_smapler=[]
 72 |         for dst in self.train_multiscale_dataset:
 73 |             ld,sp=self.engine.get_train_loader(dst,config.pretrain_batchsize)
 74 |             self.train_multiscale_loader.append(ld)
 75 |             self.train_multiscale_smapler.append(sp)
 76 |         self.train_loader,self.train_sampler=self.engine.get_train_loader(self.train_dataset,config.pretrain_batchsize)
 77 | 
 78 |         batches_per_epoch=0
 79 |         batches_per_epoch+=len(self.train_loader)
 80 | 
 81 |         for loader in self.train_multiscale_loader:
 82 |             batches_per_epoch+=len(loader)
 83 | 
 84 |         self.sche=TrainSchedule(batches_per_epoch)
 85 |         if self.engine.local_rank==0:
 86 |             self.logger.info(self.network)
 87 |             total_paramters = sum([np.prod(p.size()) for p in self.network.parameters()])
 88 |             self.logger.info('Total network parameters: ' + str(total_paramters))
 89 |             self.logger.info(config)
 90 | 
 91 |     def save_checkpoint(self):
 92 |         os.makedirs(os.path.join(self.tag_dir, "epoch_%d_batch_%d" % (
 93 |         self.sche.cur_epoch,self.sche.cur_batches)), exist_ok=True)
 94 |         save_root = os.path.join(self.tag_dir, "epoch_%d_batch_%d" % (
 95 |         self.sche.cur_epoch,self.sche.cur_batches))
 96 |         torch.save(self.state_dict(), os.path.join(save_root, "checkpoint.pth"))
 97 | 
 98 |     def adjust_learning_rate(self):
 99 |         if config.lr_mode == 'poly':
100 |             cur_iter = self.sche.batches_per_epoch * self.sche.cur_epoch + self.sche.cur_batches
101 |             max_iter = self.sche.batches_per_epoch * self.sche.total_epoch
102 |             base_lr = config.base_lr * (1 - cur_iter * 1.0 / max_iter) ** 0.9
103 | 
104 |         for param_group in self.optim.param_groups:
105 |             param_group['lr'] = base_lr
106 | 
107 |         return base_lr
108 | 
109 |     def train_per_loader(self,trainloader):
110 |         self.network.train()
111 |         loss_meter=AverageValueMeter()
112 |         for idx,(img,label) in enumerate(trainloader):
113 |             baselr=self.adjust_learning_rate()
114 |             img=img.cuda(non_blocking=True)
115 |             label=label.cuda(non_blocking=True)
116 |             if len(label.shape)==5:
117 |                 label=label.view(-1,*(label.shape[2:]))
118 |             output=self.network(img)
119 |             loss=self.criterion(output,label)
120 |             reduce_loss = self.engine.all_reduce_tensor(loss)
121 |             self.optim.zero_grad()
122 |             loss.backward()
123 |             self.optim.step()
124 |             loss_meter.add(float(reduce_loss))
125 | 
126 |             if self.engine.local_rank == 0:
127 |                 if self.sche.cur_batches % config.log_inteval == 0:
128 |                     self.logger.info("%s-epoch:%d/%d batch:%d/%d loss:%.4f base_lr:%e" % (
129 |                     self.tag_dir.split("/")[-1], self.sche.cur_epoch, self.sche.total_epoch, self.sche.cur_batches,
130 |                     self.sche.batches_per_epoch,loss_meter.value()[0],baselr))
131 |             self.sche.update()
132 |         return loss_meter.value()[0]
133 | 
134 |     def train_per_epoch(self):
135 |         for loader in self.train_multiscale_loader:
136 |             self.train_per_loader(loader)
137 |         loss_train=self.train_per_loader(self.train_loader)
138 |         if self.engine.local_rank == 0:
139 |             self.logger.info("train_img_loss:%.4f"%(loss_train))
140 | 
141 |     def train(self):
142 |         while self.sche.cur_epoch < self.sche.total_epoch:
143 | 
144 |             self.train_sampler.set_epoch(self.sche.cur_epoch)
145 |             for sp in self.train_multiscale_smapler:
146 |                 sp.set_epoch(self.sche.cur_epoch)
147 |             self.train_per_epoch()
148 | 
149 |             if self.engine.local_rank == 0:
150 |                 self.save_checkpoint()
151 | 
152 |     def state_dict(self):
153 |         if config.parallel is True:
154 |             state_dict={"net":self.network.module.state_dict(),
155 |                         'optimizer':self.optim.state_dict(),
156 |                         'sche':self.sche.state_dict()}
157 |         else:
158 |             state_dict = {"net": self.network.state_dict(),
159 |                           'optimizer': self.optim.state_dict(),
160 |                           'sche': self.sche.state_dict()}
161 |         return state_dict
162 | 
163 |     def load_state_dict(self,state_dict):
164 |         self.sche.load_state_dict(state_dict["sche"])
165 |         self.optim.load_state_dict(state_dict["optimizer"])
166 |         self.network.load_state_dict(state_dict["net"])
167 | 
168 | if __name__=="__main__":
169 |     torch.backends.cudnn.benchmark=True
170 |     setup_seed(0)
171 |     trainer=Train()
172 |     trainer.train()
173 | 
174 | 


--------------------------------------------------------------------------------
/speed.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import time
 4 | 
 5 | def computeTime(model, device='cuda'):
 6 |     inputs = torch.randn(1,1, 3, 256, 448)
 7 |     if device == 'cuda':
 8 |         model = model.cuda()
 9 |         inputs = inputs.cuda()
10 | 
11 |     model.eval()
12 | 
13 |     time_spent = []
14 |     for idx in range(100):
15 |         start_time = time.time()
16 |         with torch.no_grad():
17 |             _ = model(inputs)
18 | 
19 |         if device == 'cuda':
20 |             torch.cuda.synchronize()  # wait for cuda to finish (cuda is asynchronous!)
21 |         if idx > 10:
22 |             time_spent.append(time.time() - start_time)
23 |     print('Avg execution time (ms): %.4f, FPS:%d'%(np.mean(time_spent),1*1//np.mean(time_spent)))
24 |     return 1*1//np.mean(time_spent)
25 | 
26 | if __name__=="__main__":
27 | 
28 |     torch.backends.cudnn.benchmark = True
29 | 
30 |     from Models import mobilenetv3temporal_PCSA as net
31 |     model = net.Fastnet()
32 | 
33 |     computeTime(model)
34 | 


--------------------------------------------------------------------------------
/utils/Distribute/engine.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from config import config
 3 | import torch
 4 | import torch.distributed as dist
 5 | from torch.utils.data import DistributedSampler,DataLoader
 6 | import torch.nn as nn
 7 | 
 8 | def all_reduce_tensor(tensor, op=dist.ReduceOp.SUM, world_size=1, norm=True):
 9 |     tensor = tensor.clone()
10 |     dist.all_reduce(tensor, op)
11 |     if norm:
12 |         tensor.div_(world_size)
13 |     return tensor
14 | 
15 | class Engine(object):
16 | 
17 |     ## distribute init
18 |     def __init__(self, logger=None):
19 |         self.distributed = False
20 |         self.logger=logger
21 |         if 'WORLD_SIZE' in os.environ:
22 |             self.distributed = int(os.environ['WORLD_SIZE']) > 1
23 |         else:
24 |             raise NotImplementedError
25 | 
26 |         if self.distributed:
27 |             self.local_rank = config.local_rank
28 |             self.world_size = int(os.environ['WORLD_SIZE'])
29 |             torch.cuda.set_device(self.local_rank)
30 |             dist.init_process_group(backend="nccl", init_method='env://')
31 |         else:
32 |             raise NotImplementedError
33 | 
34 |     ## convert model
35 | 
36 |     def data_parallel(self, model):
37 |         if self.distributed:
38 |             #model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
39 |             model = nn.parallel.DistributedDataParallel(model,device_ids=[self.local_rank],output_device=self.local_rank)
40 |         else:
41 |             raise NotImplementedError
42 |         return model
43 | 
44 |     def get_train_loader(self, train_dataset,batchsize):
45 |         if self.distributed:
46 |             train_sampler = DistributedSampler(
47 |                 train_dataset)
48 |             local_bs = batchsize // self.world_size
49 |             is_shuffle = False
50 |             train_loader = DataLoader(train_dataset,
51 |                    batch_size=local_bs,
52 |                    num_workers=2,
53 |                    drop_last=False,
54 |                    shuffle=is_shuffle,
55 |                    pin_memory=False,
56 |                    sampler=train_sampler)
57 | 
58 |         else:
59 |             raise NotImplementedError
60 | 
61 |         return train_loader, train_sampler
62 | 
63 |     def all_reduce_tensor(self, tensor, norm=True):
64 |         if self.distributed:
65 |             return all_reduce_tensor(tensor, world_size=self.world_size, norm=norm)
66 |         else:
67 |             raise NotImplementedError
68 | 
69 | 
70 |     def __enter__(self):
71 |         return self
72 | 
73 |     def __exit__(self, type, value, tb):
74 |         torch.cuda.empty_cache()
75 |         if type is not None:
76 |             self.logger.warning(
77 |                 "A exception occurred during Engine initialization, "
78 |                 "give up running process")
79 |             return False
80 | 


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
 1 | from tensorboardX import SummaryWriter
 2 | import logging
 3 | import os
 4 | import torch
 5 | from PIL import Image
 6 | from math import ceil
 7 | 
 8 | def get_Logger_and_SummaryWriter():
 9 |     for i in range(1000):
10 |         tag_dir = 'checkpoints/tensorboard/try_{}'.format(i)
11 |         if not os.path.exists(tag_dir):
12 |             os.makedirs(tag_dir, exist_ok=True)
13 |             logger = logging.getLogger("PGGAN")
14 |             file_handler = logging.FileHandler(os.path.join(tag_dir, 'log.txt'), "w")
15 |             stdout_handler = logging.StreamHandler()
16 |             logger.addHandler(file_handler)
17 |             logger.addHandler(stdout_handler)
18 |             stdout_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
19 |             file_handler.setFormatter(logging.Formatter('%(asctime)s %(levelname)s %(message)s'))
20 |             logger.setLevel(logging.INFO)
21 |             return logger, SummaryWriter(tag_dir),tag_dir
22 | 
23 | def safe_save(img,save_path):
24 |     os.makedirs(save_path.replace(save_path.split('/')[-1],""),exist_ok=True)
25 |     img.save(save_path)
26 | 


--------------------------------------------------------------------------------