├── loss_formula.png ├── C3DHash_structure.png ├── network_structure.png ├── .idea ├── vcs.xml ├── inspectionProfiles │ └── profiles_settings.xml ├── modules.xml ├── misc.xml ├── video_src0.iml ├── deployment.xml └── workspace.xml ├── DataProcess ├── HDMB_label_split.py ├── video2frames.py └── CCV_download_train.py ├── data_loader.py ├── triplet_loss.py ├── utils.py ├── README.md ├── train.py └── model.py /loss_formula.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongzhh8/Video-Retrieval-C3D/HEAD/loss_formula.png -------------------------------------------------------------------------------- /C3DHash_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongzhh8/Video-Retrieval-C3D/HEAD/C3DHash_structure.png -------------------------------------------------------------------------------- /network_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zhongzhh8/Video-Retrieval-C3D/HEAD/network_structure.png -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/video_src0.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 15 | -------------------------------------------------------------------------------- /DataProcess/HDMB_label_split.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, division 2 | import os 3 | 4 | 5 | if __name__=='__main__': 6 | split_dir_path='/home/disk3/a_zhongzhanhui/data/HMDB-51/testTrainMulti_7030_splits' 7 | labels_dir_path='/home/disk3/a_zhongzhanhui/data/HMDB-51/testTrainMulti_7030_splits/labels' 8 | classid=0 9 | train_file=open(os.path.join(labels_dir_path,'train1.txt'),'w') 10 | test_file = open(os.path.join(labels_dir_path, 'test1.txt'), 'w') 11 | classID_file = open(os.path.join(labels_dir_path, 'classID.txt'), 'w') 12 | 13 | txt_list=os.listdir(split_dir_path) 14 | txt_list.sort(key=lambda x: str(x[:-4])) 15 | for txt_name in txt_list: #txt_name=brush_hair_test_split1.txt 16 | if 'split1' not in txt_name: 17 | continue 18 | 19 | test_str_index=txt_name.index('_test') 20 | label_name=txt_name[0:test_str_index] 21 | 22 | classID_file.write(label_name+' '+str(classid)+'\n') 23 | 24 | txt_file = open( os.path.join(split_dir_path,txt_name)) 25 | trainsample_cnt= 0 26 | testsample_cnt =0 27 | for line in txt_file: 28 | # video_name = line.strip().split()[0].split('.')[0] # Depending on your fpath_label file 29 | video_name = line.strip().split()[0] 30 | split_id = line.strip().split()[1] # default for single label, while [1:] for single label 31 | split_id = int(split_id) 32 | if split_id==1: 33 | trainsample_cnt+=1 34 | train_file.write(label_name+'/'+video_name+' '+str(classid)+'\n') 35 | elif split_id==2: 36 | testsample_cnt+=1 37 | test_file.write(label_name+'/'+video_name+' '+str(classid)+'\n') 38 | 39 | # print(str(trainsample_cnt)+' vs '+str(testsample_cnt)) 40 | txt_file.close() 41 | 42 | classid+=1 43 | 44 | classID_file.close() -------------------------------------------------------------------------------- /data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | import torch 5 | import torch.utils.data as data 6 | 7 | from PIL import Image 8 | 9 | 10 | class CustomDataset(data.Dataset): 11 | 12 | def __init__(self, root_folder, fpath_label, transform=None,num_frames=32): # fpath_label.txt: frames_dir video_label 13 | 14 | f = open(fpath_label) 15 | l = f.readlines() 16 | f.close() 17 | # print l 18 | fpaths = list() 19 | labels = list() 20 | for item in l: 21 | path = item.strip().split()[0].split('.')[0] # Depending on your fpath_label file 22 | label = item.strip().split()[1] # default for single label, while [1:] for single label 23 | label = int(label) 24 | fpaths.append(path) 25 | labels.append(label) 26 | 27 | self.root_folder = root_folder 28 | self.fpaths = fpaths 29 | self.labels = labels 30 | self.label_size = len(self.labels) 31 | self.transform = transform 32 | self.num_frames=num_frames 33 | 34 | 35 | def __getitem__(self, index): 36 | 37 | label = self.labels[index] 38 | ########## can use cv2 to process frames...######### 39 | frames_dir = self.root_folder + self.fpaths[index] 40 | l_ = os.listdir(frames_dir) 41 | l_.sort(key=lambda x: str(x[:-4])) 42 | 43 | frames_length = self.num_frames 44 | 45 | l = [l_[int(round(i * len(l_) / float(frames_length)))] for i in range(frames_length)] 46 | 47 | assert len(l) == self.num_frames 48 | frames_array = np.zeros((frames_length, 3, 112, 112), dtype=np.float32) 49 | 50 | for i in range(frames_length): 51 | # frame=cv2.imread(frames_dir+"/"+l[i]) 52 | # frame=cv2.resize(frame,(171,128))k 53 | frame = Image.open(frames_dir + "/" + l[i]).convert("RGB") 54 | # cv2.imshow("training frames",frame) 55 | # cv2.waitKey(1) 56 | if not self.transform == None: 57 | frame = self.transform(frame) 58 | frame = frame.numpy() 59 | frames_array[i, :, :, :] = frame 60 | # print frames_array[i,:,:,:].sum() 61 | frames_array = frames_array.transpose((1, 0, 2, 3)) 62 | # print frames_array 63 | ########################################################## 64 | 65 | label = torch.tensor(label) 66 | frames = torch.tensor(frames_array) 67 | return frames, label 68 | 69 | 70 | def __len__(self): 71 | return len(self.fpaths) 72 | -------------------------------------------------------------------------------- /triplet_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import time 5 | import numpy as np 6 | import itertools 7 | 8 | class TripletLoss(nn.Module): 9 | #pass 10 | def __init__(self,margin,device): 11 | super(TripletLoss, self).__init__() 12 | self.margin=margin 13 | self.device=device 14 | def similarity(self,label1,label2): 15 | return label1==label2 # default with singe label 16 | 17 | def forward(self,x,labels): 18 | self.batch_size=x.size()[0] 19 | self.feature_size=x.size()[1] 20 | triplet_loss=torch.tensor(0.0).to(self.device) 21 | # semihard_triplet_loss=torch.tensor(0.0).to(self.device) 22 | #start=time.clock() 23 | labels_=labels.cpu().data.numpy() 24 | triplets=[] 25 | for label in labels_: 26 | label_mask=(labels_==label) 27 | label_indices=np.where(label_mask)[0] 28 | if len(label_indices)<2: 29 | continue 30 | negative_indices=np.where(np.logical_not(label_mask))[0] 31 | if len(negative_indices)<1: 32 | continue 33 | anchor_positives=list(itertools.combinations(label_indices, 2)) 34 | temp=[[anchor_positive[0], anchor_positive[1], neg_ind] for anchor_positive in anchor_positives 35 | for neg_ind in negative_indices] 36 | triplets+=temp 37 | #end=time.clock() 38 | #print ("triplets mining time: %s Seconds"%(end-start)) 39 | 40 | #上面是得到了所有的triplet三元组，下面是计算triplet loss 41 | if triplets: 42 | triplets=np.array(triplets) 43 | #print triplets 44 | sq_ap=(x[triplets[:, 0]]-x[triplets[:, 1]]).pow(2).sum(1) 45 | sq_an=(x[triplets[:, 0]]-x[triplets[:, 2]]).pow(2).sum(1) 46 | losses=F.relu(self.margin+sq_ap-sq_an) 47 | triplet_count=torch.tensor(losses.size()[0]).float().to(self.device) 48 | # semihard_triplet_count=(losses!=0).sum().float().to(self.device) 49 | if triplet_count>0: 50 | triplet_loss=losses.sum()/triplet_count 51 | # if semihard_triplet_count>0: 52 | # semihard_triplet_loss=losses.sum()/semihard_triplet_count 53 | # print ("triplet_count", triplet_count) 54 | # print ("semihard_triplet_count", semihard_triplet_count) 55 | # print ("triplet_loss:",triplet_loss.item()) 56 | # print ("semihard_triplet_loss",semihard_triplet_loss.item()) 57 | 58 | return triplet_loss 59 | # return semihard_triplet_loss 60 | 61 | 62 | -------------------------------------------------------------------------------- /DataProcess/video2frames.py: -------------------------------------------------------------------------------- 1 | import os 2 | import cv2 3 | import numpy as np 4 | 5 | 6 | def video2frames(video, newdir): 7 | cap = cv2.VideoCapture(video) 8 | count = 0 9 | cnt = 0 10 | # 29.97/30FPS 11 | while (cap.isOpened()): 12 | ret, frame = cap.read() 13 | if True:#cnt % 1 == 0: # 30 FPS 14 | if ret == True: 15 | # cv2.imshow("frame",frame) 16 | cv2.imwrite(newdir + '/' + str(count).zfill(4) + ".jpg", frame) 17 | # cv2.waitKey(0) 18 | count += 1 19 | else: 20 | break 21 | cnt += 1 22 | return count 23 | 24 | 25 | # th14 dataset 26 | # train_fpath = '/home/disk1/wangshaoying/my_video_retrieval/th14/data/val.txt' 27 | # test_fpath = '/home/disk1/wangshaoying/my_video_retrieval/th14/data/test.txt' 28 | # db_fpath = '/home/disk1/wangshaoying/my_video_retrieval/th14/data/db.txt' 29 | 30 | # ucf101 20 31 | # train_fpath = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/train1_20.txt' 32 | # test_fpath = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/test1_20.txt' 33 | # db_fpath = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/db1_20.txt' 34 | 35 | # ucf101 101 36 | # train_fpath = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/train1_101.txt' 37 | # test_fpath = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/test1_101.txt' 38 | # db_fpath = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/db1_101.txt' 39 | 40 | # JHMDB 41 | train_fpath = '/home/disk3/a_zhongzhanhui/data/JointHMDB/Label_Split/train_10_210.txt' 42 | test_fpath = '/home/disk3/a_zhongzhanhui/data/JointHMDB/Label_Split/test_10_210.txt' 43 | db_fpath = '/home/disk3/a_zhongzhanhui/data/JointHMDB/Label_Split/db_20_420.txt' 44 | 45 | 46 | fpath = [train_fpath, test_fpath] 47 | for i in range(len(fpath)): 48 | video_num = 0 49 | print(fpath[i]) 50 | f = open(fpath[i]) 51 | l = f.readlines() 52 | f.close() 53 | # th14 dataset 54 | # root_dir = '/home/disk1/wangshaoying/my_video_retrieval/th14_5FPS/' # th14 55 | root_dir = '/home/disk3/a_zhongzhanhui/data/JointHMDB/Frames/' # JHMDB 56 | # root_dir = '/home/disk1/wangshaoying/data/UCF101/' # UCF101 57 | 58 | for item in l: 59 | # video_dir = '/home/disk1/wangshaoying/my_video_retrieval/th14/' # th14 60 | video_dir = '/home/disk3/a_zhongzhanhui/data/JointHMDB/JHMDB/' # JHMDB 61 | # video_dir = '/home/disk1/wangshaoying/my_video_retrieval/ucf101/UCF101/' # ucf101 62 | 63 | # second_dir = root_dir + item.strip().split('/')[0] + '/' #+ item.strip().split('/')[1] + '/' #UCF101 64 | second_dir = root_dir + item.strip().split('/')[1] + '/' # JHMDB 65 | print('second:', second_dir) 66 | if not os.path.exists(second_dir): 67 | os.mkdir(second_dir) 68 | 69 | video = video_dir + item.strip().split()[0] 70 | # newdir = os.path.join(root_dir, item.strip().split()[0].split('.')[0]) 71 | newdir = os.path.join(second_dir, item.strip().split('/')[2].split()[0].split('.')[0]) # JHMDB 72 | # newdir = os.path.join(second_dir, item.strip().split('/')[1].split()[0].split('.')[0]) 73 | print('newdir:', newdir) 74 | # if os.path.exists(newdir) == True: 75 | # os.system("rm -rf " + newdir) 76 | os.mkdir(newdir) 77 | frames_num = video2frames(video, newdir) 78 | video_num += 1 79 | print(f'the {video_num}th video: {frames_num}frames') 80 | 81 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from tqdm import tqdm 4 | 5 | 6 | def attention_inference(dataloader, backbone, attention, pool, hash_layer, hash_length, device): 7 | hashcodes = list() 8 | labels = list() 9 | backbone.eval() 10 | hash_layer.eval() 11 | attention.eval() 12 | threshold = np.array([0.0] * hash_length) # 0.5 13 | with torch.no_grad(): 14 | for imgs, labels_ in dataloader: 15 | labels.append(labels_.view(labels_.size()[0], ).numpy()) 16 | # print('imgs:', imgs.size()) 17 | features = backbone(imgs.to(device)) 18 | features_atten = features * attention(features) 19 | features_pool = pool(features_atten) 20 | # features_pool = pool(features) 21 | h, _ = hash_layer(features_pool) 22 | hashcodes.append(h.cpu().numpy()) 23 | # print hashcodes-threshold 24 | return (np.sign(np.concatenate(hashcodes) - threshold)).astype(np.int8), np.concatenate(labels) 25 | 26 | def inference(dataloader, net, hash_length, device): 27 | hashcodes = list() 28 | labels = list() 29 | net.eval() 30 | # hash_layer.eval() 31 | threshold = np.array([0.0] * hash_length) # 0.5 32 | with torch.no_grad(): 33 | for imgs, labels_ in tqdm(dataloader): 34 | labels.append(labels_.view(labels_.size()[0], ).numpy()) 35 | # print('imgs:', imgs.size()) 36 | hash_features = net(imgs.to(device)) 37 | hashcodes.append(hash_features.cpu().numpy()) 38 | # print hashcodes-threshold 39 | return (np.sign(np.concatenate(hashcodes) - threshold)).astype(np.int8), np.concatenate(labels) 40 | 41 | 42 | 43 | def compute_MAP(db_binary, db_label, test_binary, test_label): 44 | AP = [] 45 | Ns = np.array(range(1, db_binary.shape[0] + 1)).astype(np.float32) 46 | for i in range(test_binary.shape[0]): 47 | query_binary = test_binary[i] 48 | query_label = test_label[i] 49 | query_result = np.argsort(np.sum((query_binary != db_binary), axis=1)) 50 | correct = (query_label == db_label[query_result]) 51 | P = np.cumsum(correct, axis=0) / Ns 52 | AP.append(np.sum(P * correct) / np.sum(correct)) 53 | MAP = np.mean(np.array(AP)) 54 | # return round(MAP,5) 55 | return MAP 56 | 57 | 58 | def compute_topk_mAP(db_binary, db_label, test_binary, test_label, k): 59 | AP = [] 60 | Ns = np.array(range(1, k + 1)).astype(np.float32) 61 | for i in range(test_binary.shape[0]): 62 | query_binary = test_binary[i] 63 | query_label = test_label[i] 64 | query_result = np.argsort(np.sum((query_binary != db_binary), axis=1)) 65 | correct = (query_label == db_label[query_result[0:k]]) 66 | P = np.cumsum(correct, axis=0) / Ns 67 | if np.sum(correct) == 0: 68 | AP.append(0.) 69 | else: 70 | AP.append(np.sum(P * correct) / np.sum(correct)) 71 | topk_MAP = np.mean(np.array(AP)) 72 | # return round(MAP,5) 73 | return topk_MAP 74 | 75 | 76 | def compute_MAP_mutli(db_binary, db_label, test_binary, test_label): 77 | AP = [] 78 | Ns = np.array(range(1, db_binary.shape[0] + 1)).astype(np.float32) 79 | for i in range(test_binary.shape[0]): 80 | query_binary = test_binary[i] 81 | query_label = test_label[i] 82 | query_result = np.argsort(np.sum((query_binary != db_binary), axis=1)) 83 | # correct=(query_label==db_label[query_result]) 84 | correct = (np.dot(db_label[query_result, query_label]) > 0) 85 | P = np.cumsum(correct, axis=0) / Ns 86 | AP.append(np.sum(P * correct) / np.sum(correct)) 87 | MAP = np.mean(np.array(AP)) 88 | # return round(MAP,5) 89 | return MAP 90 | -------------------------------------------------------------------------------- /DataProcess/CCV_download_train.py: -------------------------------------------------------------------------------- 1 | import re, os, requests 2 | 3 | # url = r"https://www.youtube.com/playlist?list=PLXO45tsB95cK7G-raBeTVjAoZHtJpiKh3" #youtube播放列表 4 | # headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36'} 5 | # html = requests.get(url, headers=headers).text 6 | # # videoIds = re.findall('"videoId":"([A-Za-z0-9_-]{11})","thumbnail"', html) 7 | # # for videoId in videoIds: 8 | # # print(videoId) 9 | # # download_link = "https://youtu.be/" + videoId # 构造下载地址 10 | # # os.chdir(r"D:\DataSet数据集整理\CCV\videos") 11 | # # os.system("youtube-dl " + download_link) # 用youtube-dl下载视频 12 | 13 | # res = requests.get('https://www.ldoceonline.com/',headers={"User-Agent":"Mozilla/5.0"}) 14 | # download_link = "https://youtu.be/nymfb7yldZg" # 构造下载地址 15 | # os.chdir(r"D:\DataSet数据集整理\CCV\videos") 16 | # os.system("youtube-dl " + download_link) # 用youtube-dl下载视频 17 | import time 18 | 19 | 20 | def GetClassid(label_line): 21 | index=0 22 | for char in label_line: 23 | if char=='0': 24 | index+=1 25 | elif char=='1': 26 | return index 27 | return -1 28 | 29 | # 30 | # trainid_path=r'D:\DataSet数据集整理\CCV\trainVidID.txt' 31 | # label_path=r'D:\DataSet数据集整理\CCV\trainLabel.txt' 32 | # train_file=open(trainid_path,'r') 33 | # label_file=open(label_path,'r') 34 | # cnt=0 35 | # class_limit=[] 36 | # for i in range(20): 37 | # class_limit.append(150) #其中只用到120个，30个是用来防止下载错误的 38 | # print(class_limit) 39 | # for (line,label_line )in zip(train_file,label_file): 40 | # # start=time.time() 41 | # # time.sleep(0) 42 | # # end = time.time() 43 | # # print('sleep '+str(end-start)+'s') 44 | # line = line[:-1] 45 | # label_line = label_line[:-1] 46 | # print(line,label_line) 47 | # 48 | # classid=GetClassid(label_line) 49 | # print(classid) 50 | # class_limit[int(classid)]-=1 51 | # print(class_limit[int(classid)]) 52 | # if(class_limit[int(classid)]<=0): 53 | # continue 54 | # try: 55 | # download_link = "https://youtu.be/"+line # 构造下载地址 56 | # # print("youtube-dl " + download_link + ' -o ' + str(cnt) + '_' + line+'.mp4') 57 | # print("youtube-dl " + download_link+' -o '+line+'.mp4') 58 | # download_path="D:/DataSet数据集整理/CCV/videos/trainset/"+str(classid) 59 | # if not os.path.exists(download_path): 60 | # os.mkdir(download_path) 61 | # os.chdir(download_path) 62 | # print('download_path=',download_path) 63 | # os.system("youtube-dl " + download_link+' -o '+line+'.mp4') #str(cnt)+'_' 64 | # except BaseException: 65 | # print('Error') 66 | # else: 67 | # print('Success') 68 | # cnt+=1 69 | 70 | 71 | 72 | trainid_path=r'D:\DataSet数据集整理\CCV\testVidID.txt' 73 | label_path=r'D:\DataSet数据集整理\CCV\testLabel.txt' 74 | train_file=open(trainid_path,'r') 75 | label_file=open(label_path,'r') 76 | cnt=0 77 | class_limit=[] 78 | for i in range(20): 79 | class_limit.append(40) #其中只用到25个，15个是用来防止下载错误的 80 | print(class_limit) 81 | 82 | for (line,label_line )in zip(train_file,label_file): 83 | start=time.time() 84 | time.sleep(5) 85 | end = time.time() 86 | # print('sleep '+str(end-start)+'s') 87 | line = line[:-1] 88 | label_line = label_line[:-1] 89 | print(line,label_line) 90 | 91 | classid=GetClassid(label_line) 92 | print(classid) 93 | if classid==-1: 94 | continue 95 | class_limit[int(classid)]-=1 96 | print(class_limit[int(classid)]) 97 | if(class_limit[int(classid)]<=0): 98 | continue 99 | try: 100 | download_link = "https://youtu.be/"+line # 构造下载地址 101 | # print("youtube-dl " + download_link + ' -o ' + str(cnt) + '_' + line+'.mp4') 102 | print("youtube-dl " + download_link+' -o '+line+'.mp4') 103 | download_path="D:/DataSet数据集整理/CCV/videos/testset/"+str(classid) 104 | if not os.path.exists(download_path): 105 | os.mkdir(download_path) 106 | os.chdir(download_path) 107 | print('download_path=',download_path) 108 | os.system("youtube-dl " + download_link+' -o '+line+'.mp4') #str(cnt)+'_' 109 | except BaseException: 110 | print('Error') 111 | else: 112 | print('Success') 113 | cnt+=1 114 | 115 | 116 | 117 | # testid_path=r'D:\DataSet数据集整理\CCV\testVidID.txt' 118 | # test_file = open(testid_path, 'r') 119 | # cnt=0 120 | # for line in test_file: 121 | # download_link = "https://youtu.be/"+line # 构造下载地址 122 | # os.chdir(r"D:\DataSet数据集整理\CCV\videos\testset") 123 | # os.system("youtube-dl " + download_link+' -o '+str(cnt)+'_'+line) # 用youtube-dl下载视频 124 | # cnt+=1 125 | 126 | # download_link = "https://youtu.be/1DwOlDzZwW4" # 构造下载地址 127 | # os.chdir(r"D:") 128 | # os.system("youtube-dl " + download_link) # 用youtube-dl下载视频 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Video Retrieval C3D 2 | 3 | Pytorch implementation of a video retrieval model on datasets UCF101 and HMDB-51(Human Motion Data Base). 4 | 5 | ## Approach 6 | 7 | ### Network Structure 8 | 9 | I mainly used the idea proposed in paper [*Simultaneous Feature Learning and Hash Coding with Deep Neural Networks*](https://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Lai_Simultaneous_Feature_Learning_2015_CVPR_paper.html) for reference. And its network is proposed as fellow: 10 | 11 | ![](./network_structure.png) 12 | 13 | In order to adapt to the video retrieval task as well as enhance performance , I change the network structure as fellow : 14 | 15 | ![](./C3DHash_structure.png) 16 | 17 | 1. Model input should be triplets of videos rather than images. That is to say, I get all possilble triplets of videos in a batch as model input. 18 | 2. The shared CNN sub-network is replaced with a pre-trained 3D ResNet-18. 19 | 3. Divide-and-encode module is simplified as a fully connected layer. It projects the 512-dim extracted feature into a feature whose dimension can be 16, 32, 48, 64 and so on. I call this feature with lower dimension as approximate hash code. During training phase, it is a real number and is imported into the triplet ranking loss function to calculate loss for model optimization. And during inference phase, it is quantized as binary code for retrieval. 20 | 21 | ### what is hashing? 22 | 23 | With the ever-growing large-scale video data on the Web, much attention has been devoted to nearest neighbor search via hashing methods. Learning-based hashing is an emerging stream of hash methods 24 | that learn similarity-preserving hash functions to encode input data points (e.g., images,videos) into binary codes. A good hashing function is able to reduce the required memory and improve the retrieval speed by a large margin. 25 | 26 | ### Triplet Loss 27 | 28 | Triplet ranking loss is designed to characterize that one video is more similar to the second video than 29 | to the third one. 30 | 31 | ![](./loss_formula.png) 32 | 33 | F(I), F(I+) ,F(I-) denote the embeddings of the query video, similar video and dissimilar video respectively. 34 | 35 | 36 | 37 | ## Experiment 38 | 39 | 1. Train the network with triplet loss on the training set for 100 epochs. 40 | 2. Input the training set and testing set into the network to get embeddings and then turn the embeddings into binary hash codes with a simple quantization function. 41 | 3. Use testing sample as querie to retrieve videos from training samples. Calculate distance between binary codes of testing sample and training samples with Hamming distance. Use mAP to estimate the model's performance. 42 | 43 | ### Prerequisites 44 | 45 | In order to run this code you will need to install: 46 | 47 | 1. Python3 48 | 2. Pytorch 0.4 49 | 50 | ### Usage 51 | 52 | 1. Firstly download and unzip the two datasets. 53 | 2. Use video2frames.py to turn each video into a set of frames. 54 | 3. Generate two files train.txt and test.txt, each line of which is in the format of [video_name,classid]. 55 | 4. Change the datapath arguments in train.py to indicate the file path. 56 | 5. Run the command bellow. 57 | 58 | ```Bash 59 | python train.py --dataset_name 'UCF' --hash_length 48 --margin 14 --num_frames 32 --lr 0.0001 60 | ``` 61 | And you can change the parameters if you want. I recommand to set margin as 20 when hash_length is 64 and 14 when binary_bits is 48. It is suggested to set the value of margin as a bit bigger than a quarter of the value of binary_bits. 62 | 63 | ### Result 64 | 65 | Common parameters： 66 | 67 | | lr | batch size | optimizer | num_epochs | 68 | | ----- | ---------- | --------- | ---------- | 69 | | 0.001 | 120 | SGD | 160 | 70 | 71 | #### UCF101 72 | 73 | | binary bits | margin | num_frames | mAP | 74 | | ----------- | :----- | ---------- | ------ | 75 | | 48 | 14 | 16 | 0.7116 | 76 | | 48 | 14 | 32 | 0.7204 | 77 | | 64 | 20 | 32 | 0.7724 | 78 | 79 | #### HMDB-51 80 | 81 | | binary bits | margin | num_frames | mAP | 82 | | ----------- | :----- | ---------- | ------ | 83 | | 48 | 8 | 32 | 0.4687 | 84 | | 48 | 14 | 16 | 0.4627 | 85 | | 48 | 14 | 32 | 0.4763 | 86 | 87 | 88 | 89 | #### JHMDB 90 | 91 | | binary bits | margin | num_frames | mAP | 92 | | ----------- | :----- | ---------- | ------ | 93 | | 16 | 8 | 10 | 0.4338 | 94 | | 48 | 14 | 5 | 0.3859 | 95 | | 48 | 14 | 10 | 0.4883 | 96 | | 48 | 14 | 15 | 0.5487 | 97 | 98 | ### Reference 99 | 100 | [1] Hanjiang Lai, Yan Pan, Ye Liu, Shuicheng Yan [*Simultaneous Feature Learning and Hash Coding with Deep Neural Networks*](https://www.cv-foundation.org/openaccess/content_cvpr_2015/html/Lai_Simultaneous_Feature_Learning_2015_CVPR_paper.html) 101 | 102 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | import torchvision.transforms as transforms 3 | import argparse 4 | from data_loader import CustomDataset 5 | from model import C3D_Hash_Model 6 | from triplet_loss import TripletLoss 7 | import time 8 | import os 9 | from utils import * 10 | 11 | 12 | 13 | 14 | def load_data(root_folder, fpath_label, batch_size, shuffle=True, num_workers=16, train=False,num_frames=32): 15 | if train: 16 | transform = transforms.Compose([ 17 | # transforms.ToPILImage(),#Converts a Tensor or a numpy of shape H x W x C to a PIL Image C x H x W 18 | transforms.Resize((128, 171)), 19 | transforms.CenterCrop((112, 112)), # Center 20 | transforms.RandomHorizontalFlip(), # 训练集才需要做这一步处理，获得更多的随机化数据 21 | transforms.ToTensor(), 22 | transforms.Normalize(mean=(0.434, 0.405, 0.378), std=(0.152, 0.149, 0.157))]) 23 | else: 24 | transform = transforms.Compose([ 25 | # transforms.ToPILImage(),#Converts a Tensor or a numpy of shape H x W x C to a PIL Image C x H x W 26 | transforms.Resize((128, 171)), 27 | transforms.CenterCrop((112, 112)), # Center 28 | transforms.ToTensor(), 29 | transforms.Normalize(mean=(0.434, 0.405, 0.378), std=(0.152, 0.149, 0.157))]) 30 | 31 | data_ = CustomDataset(root_folder=root_folder, 32 | fpath_label=fpath_label, 33 | transform=transform, 34 | num_frames=num_frames) 35 | 36 | # torch.utils.data.DataLoader 37 | loader_ = data.DataLoader( 38 | dataset=data_, # torch TensorDataset format 39 | batch_size=batch_size, # mini batch size 40 | shuffle=shuffle, # shuffle 41 | num_workers=num_workers) # multi thread 42 | 43 | return loader_ 44 | 45 | 46 | def cycle(iterable): 47 | while True: 48 | for x in iterable: 49 | yield x 50 | 51 | def get_parser(): 52 | parser = argparse.ArgumentParser(description='train C3DHash') 53 | 54 | parser.add_argument('--dataset_name', default='JHMDB', help='HMDB or UCF or JHMDB') 55 | parser.add_argument('--hash_length', type=int, default=48, help='length of hashing binary') 56 | parser.add_argument('--margin', type=float, default=14, help='取bit的四分之一多一点，margin影响很大') 57 | parser.add_argument('--num_frames', type=int, default=10, help='number of frames taken form a video') 58 | 59 | parser.add_argument('--lr', type=float, default=0.0001, help='lr=0.001') 60 | parser.add_argument('--batch_size', type=int, default=120, help='input batch size') 61 | parser.add_argument('--num_epochs', type=int, default=160, help='number of epochs to train for') 62 | parser.add_argument('--step_lr', type=int, default=40, help='change lr per strp_lr epoch') 63 | parser.add_argument('--checkpoint_step', type=int, default=5, help='checkpointing after batches') 64 | 65 | parser.add_argument('--load_model', default=False, help='wether load model checkpoints or not') 66 | parser.add_argument('--load_model_path',default='/home/disk3/a_zhongzhanhui/PycharmProject/video_retrieval_C3D/checkpoints/HMDB_48bits_14margin_/net_epoch50_mAP0.476344.pth',help='location to load model') 67 | 68 | return parser 69 | 70 | if __name__ == "__main__": 71 | parser = get_parser() 72 | opt = parser.parse_args() 73 | device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu") # device configuration 74 | 75 | print('===start setting network and optimizer===') 76 | net = C3D_Hash_Model(opt.hash_length) 77 | net.to(device) 78 | net = torch.nn.DataParallel(net, device_ids=[1,2,3]) # for multi gpu 79 | 80 | if opt.load_model: 81 | net.load_state_dict(torch.load(opt.load_model_path)) 82 | print('loaded model from '+opt.load_model_path) 83 | 84 | triplet_loss = TripletLoss(opt.margin, device).to(device) 85 | 86 | optimizer = torch.optim.SGD(net.parameters(), lr=opt.lr, momentum=0.9, weight_decay=0.0005) 87 | scheduler = torch.optim.lr_scheduler.StepLR(optimizer, opt.step_lr) 88 | print('===finish setting network and optimizer===') 89 | 90 | print('===setting data loader===') 91 | if opt.dataset_name=='UCF': 92 | root_folder = "/home/disk3/a_zhongzhanhui/data/UCF-101/" 93 | train_fpath_label = "/home/disk3/a_zhongzhanhui/data/UCF-101/TrainTestlist/train1.txt" 94 | test_fpath_label = "/home/disk3/a_zhongzhanhui/data/UCF-101/TrainTestlist/test1.txt" 95 | elif opt.dataset_name=='HMDB': 96 | root_folder = "/home/disk3/a_zhongzhanhui/data/HMDB-51/HMDB51/" 97 | train_fpath_label = "/home/disk3/a_zhongzhanhui/data/HMDB-51/TrainTestlist/labels/train1.txt" 98 | test_fpath_label = "/home/disk3/a_zhongzhanhui/data/HMDB-51/TrainTestlist/labels/test1.txt" 99 | elif opt.dataset_name=='JHMDB': 100 | root_folder = "/home/disk3/a_zhongzhanhui/data/JointHMDB/Frames" 101 | train_fpath_label = "/home/disk3/a_zhongzhanhui/data/JointHMDB/Label_Split/train_10_210.txt" 102 | test_fpath_label = "/home/disk3/a_zhongzhanhui/data/JointHMDB/Label_Split/test_10_210.txt" 103 | db_fpath_label='/home/disk3/a_zhongzhanhui/data/JointHMDB/Label_Split/db_20_420.txt' 104 | else: 105 | print('dataset_name error') 106 | exit(0) 107 | train_loader = load_data(root_folder, train_fpath_label, opt.batch_size, shuffle=True, num_workers=16,train=False,num_frames=opt.num_frames) 108 | test_loader = load_data(root_folder, test_fpath_label, opt.batch_size, shuffle= False, num_workers=8,train=False,num_frames=opt.num_frames) 109 | if opt.dataset_name=='UCF' or opt.dataset_name=='HMDB': 110 | db_loader = train_loader 111 | elif opt.dataset_name=='JHMDB': 112 | db_loader=load_data(root_folder, db_fpath_label, opt.batch_size, shuffle=True, num_workers=16,train=False,num_frames=opt.num_frames) 113 | 114 | train_loader_iter = iter(cycle(train_loader)) #iter(dataloader)返回的是一个迭代器，然后可以使用next访问 115 | print('===finish setting data loader===') 116 | 117 | 118 | checkpoint_path = './checkpoints/' + opt.dataset_name+'_' + str(opt.hash_length) + 'bits_' + str(opt.margin) + 'margin_' + str(opt.num_frames) + 'frames' 119 | os.makedirs(checkpoint_path, exist_ok=True) 120 | 121 | print('===start training===') 122 | maxMAP=0 123 | total_step = len(train_loader) #batch数量 124 | for epoch in range(opt.num_epochs): 125 | net.train() 126 | start_time = time.time() 127 | scheduler.step() 128 | epoch_loss = 0. 129 | for i in range(total_step): #逐个batch地遍历整个训练集 130 | frames, labels = next(train_loader_iter) 131 | frames = frames.to(device) 132 | labels = labels.to(device) 133 | hash_features = net(frames) 134 | loss = triplet_loss(hash_features, labels) 135 | print(f'[epoch{epoch}-batch{i}] loss:{loss:0.4}') 136 | if loss == 0: 137 | continue 138 | ### Backward and optimize 139 | optimizer.zero_grad() 140 | loss.backward() 141 | optimizer.step() 142 | epoch_loss += loss.item() 143 | 144 | avg_loss = epoch_loss / total_step 145 | end_time = time.time() 146 | elapsed_time = end_time - start_time 147 | 148 | print(f'[{epoch}/{opt.num_epochs}] loss:{avg_loss:0.5f} ' 149 | f' time:{elapsed_time:0.2f} s') 150 | 151 | if epoch % opt.checkpoint_step == 0: #(epoch + 1) % 2 == 0: 152 | map_start_time=time.time() 153 | print('getting binary code and label') 154 | db_binary, db_label = inference(db_loader, net, opt.hash_length, device) 155 | test_binary, test_label = inference(test_loader, net, opt.hash_length, device) 156 | print('calculating mAP') 157 | MAP_ = compute_MAP(db_binary, db_label, test_binary, test_label) 158 | print("MAP_: %s" % MAP_) 159 | 160 | 161 | f = open(os.path.join(checkpoint_path, "MAP.log"), "a+") 162 | f.write('epoch:'+str(epoch) + " loss:"+str(avg_loss) +' mAP:'+ str(MAP_) + '\n') 163 | f.close() 164 | 165 | if MAP_ > maxMAP: 166 | maxMAP = MAP_ 167 | save_pth_path = os.path.join(checkpoint_path, f'net_epoch{epoch}_mAP{MAP_:04f}.pth') 168 | torch.save(net.state_dict(), save_pth_path) 169 | 170 | map_end_time = time.time() 171 | print('calcualteing mAP used ', map_end_time - map_start_time, 's') 172 | 173 | 174 | 175 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | import math 6 | from functools import partial 7 | 8 | __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101'] 9 | 10 | 11 | def conv3x3x3(in_planes, out_planes, stride=1): 12 | # 3x3x3 convolution with padding 13 | return nn.Conv3d(in_planes, out_planes, kernel_size=3, 14 | stride=stride, padding=1, bias=False) 15 | 16 | 17 | def downsample_basic_block(x, planes, stride): 18 | out = F.avg_pool3d(x, kernel_size=1, stride=stride) 19 | zero_pads = torch.Tensor(out.size(0), planes - out.size(1), 20 | out.size(2), out.size(3), 21 | out.size(4)).zero_() 22 | if isinstance(out.data, torch.cuda.FloatTensor): 23 | zero_pads = zero_pads.cuda() 24 | 25 | out = Variable(torch.cat([out.data, zero_pads], dim=1)) 26 | 27 | return out 28 | 29 | 30 | class BasicBlock(nn.Module): 31 | expansion = 1 32 | 33 | def __init__(self, inplanes, planes, stride=1, downsample=None): 34 | super(BasicBlock, self).__init__() 35 | self.conv1 = conv3x3x3(inplanes, planes, stride) 36 | self.bn1 = nn.BatchNorm3d(planes) 37 | self.relu = nn.ReLU(inplace=True) 38 | self.conv2 = conv3x3x3(planes, planes) 39 | self.bn2 = nn.BatchNorm3d(planes) 40 | self.downsample = downsample 41 | self.stride = stride 42 | 43 | def forward(self, x): 44 | residual = x 45 | 46 | out = self.conv1(x) 47 | out = self.bn1(out) 48 | out = self.relu(out) 49 | 50 | out = self.conv2(out) 51 | out = self.bn2(out) 52 | 53 | if self.downsample is not None: 54 | residual = self.downsample(x) 55 | 56 | out += residual 57 | out = self.relu(out) 58 | 59 | return out 60 | 61 | 62 | class Bottleneck(nn.Module): 63 | expansion = 4 64 | 65 | def __init__(self, inplanes, planes, stride=1, downsample=None): 66 | super(Bottleneck, self).__init__() 67 | self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False) 68 | self.bn1 = nn.BatchNorm3d(planes) 69 | self.conv2 = nn.Conv3d(planes, planes, kernel_size=3, stride=stride, 70 | padding=1, bias=False) 71 | self.bn2 = nn.BatchNorm3d(planes) 72 | self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False) 73 | self.bn3 = nn.BatchNorm3d(planes * 4) 74 | self.relu = nn.ReLU(inplace=True) 75 | self.downsample = downsample 76 | self.stride = stride 77 | 78 | def forward(self, x): 79 | residual = x 80 | 81 | out = self.conv1(x) 82 | out = self.bn1(out) 83 | out = self.relu(out) 84 | 85 | out = self.conv2(out) 86 | out = self.bn2(out) 87 | out = self.relu(out) 88 | 89 | out = self.conv3(out) 90 | out = self.bn3(out) 91 | 92 | if self.downsample is not None: 93 | residual = self.downsample(x) 94 | 95 | out += residual 96 | out = self.relu(out) 97 | 98 | return out 99 | 100 | 101 | class ResNet(nn.Module): 102 | 103 | def __init__(self, block, layers, shortcut_type='A'): #A layers=[2,2,2,2] 104 | self.inplanes = 64 105 | super(ResNet, self).__init__() 106 | self.conv1 = nn.Conv3d(3, 64, kernel_size=7, stride=(1, 2, 2), 107 | padding=(3, 3, 3), bias=False) 108 | self.bn1 = nn.BatchNorm3d(64) 109 | self.relu = nn.ReLU(inplace=True) 110 | self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1) 111 | self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type) 112 | self.layer2 = self._make_layer(block, 128, layers[1], shortcut_type, stride=2) 113 | self.layer3 = self._make_layer(block, 256, layers[2], shortcut_type, stride=2) 114 | self.layer4 = self._make_layer(block, 512, layers[3], shortcut_type, stride=2) 115 | self.avgpool = nn.AvgPool3d((1, 4, 4), stride=1) 116 | #self.fc = nn.Linear(512 * block.expansion, num_classes) 117 | 118 | ### above new 119 | #self.classifier=nn.Linear(512,num_classes) # 120 | #self.hashcoder=nn.Sequential(nn.Linear(512,hash_length),nn.Tanh()) # 121 | 122 | 123 | for m in self.modules(): 124 | if isinstance(m, nn.Conv3d): 125 | n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels 126 | m.weight.data.normal_(0, math.sqrt(2. / n)) 127 | elif isinstance(m, nn.BatchNorm3d): 128 | m.weight.data.fill_(1) 129 | m.bias.data.zero_() 130 | 131 | 132 | 133 | def _make_layer(self, block, planes, blocks, shortcut_type, stride=1): 134 | downsample = None 135 | if stride != 1 or self.inplanes != planes * block.expansion: 136 | if shortcut_type == 'A': 137 | downsample = partial(downsample_basic_block, 138 | planes=planes * block.expansion, 139 | stride=stride) 140 | else: 141 | downsample = nn.Sequential( 142 | nn.Conv3d(self.inplanes, planes * block.expansion, 143 | kernel_size=1, stride=stride, bias=False), 144 | nn.BatchNorm3d(planes * block.expansion) 145 | ) 146 | 147 | layers = [] 148 | layers.append(block(self.inplanes, planes, stride, downsample)) 149 | self.inplanes = planes * block.expansion 150 | for i in range(1, blocks): 151 | layers.append(block(self.inplanes, planes)) 152 | 153 | return nn.Sequential(*layers) 154 | 155 | def forward(self, x): 156 | x = self.conv1(x) 157 | x = self.bn1(x) 158 | x = self.relu(x) 159 | x = self.maxpool(x) 160 | #print x.size() 161 | x = self.layer1(x) 162 | x = self.layer2(x) 163 | x = self.layer3(x) 164 | x = self.layer4(x) 165 | 166 | x = self.avgpool(x) 167 | 168 | #out = x.view(x.size(0), -1) 169 | # out = torch.squeeze(x) 170 | out = x.squeeze(-1).squeeze(-1) 171 | #x = self.fc(x) 172 | 173 | ### above new 174 | #c=self.classifier(x) 175 | #h=self.hashcoder(x) 176 | return out 177 | 178 | 179 | def resnet18(**kwargs): 180 | """Constructs a ResNet-18 model. 181 | """ 182 | model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) 183 | return model 184 | 185 | def resnet34(**kwargs): 186 | """Constructs a ResNet-34 model. 187 | """ 188 | model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) 189 | return model 190 | 191 | 192 | def resnet50(**kwargs): 193 | """Constructs a ResNet-50 model. 194 | """ 195 | model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) 196 | return model 197 | 198 | def resnet101(**kwargs): 199 | """Constructs a ResNet-101 model. 200 | """ 201 | model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) 202 | return model 203 | 204 | 205 | class TemporalAvgPool(nn.Module): 206 | def __init__(self): 207 | super(TemporalAvgPool, self).__init__() 208 | self.filter=nn.AdaptiveAvgPool1d(1) 209 | 210 | def forward(self, x): 211 | out=self.filter(x) 212 | out=torch.squeeze(out) 213 | return out 214 | 215 | 216 | class HashLayer(nn.Module): 217 | def __init__(self,hash_length): 218 | super(HashLayer, self).__init__() 219 | self.hashcoder=nn.Sequential(nn.Linear(512,hash_length), nn.Tanh()) 220 | 221 | def forward(self,x): 222 | if x.size()==5: 223 | x=x.view() 224 | h=self.hashcoder(x) 225 | return h 226 | 227 | 228 | 229 | def load_state(model, model_path): 230 | model_dict = model.state_dict() 231 | pretrained_dict = torch.load(model_path, map_location="cpu")["state_dict"] 232 | key = list(pretrained_dict.keys())[0] 233 | # 1. filter out unnecessary keys 234 | # 1.1 multi-GPU ->CPU 235 | if (str(key).startswith("module.")): 236 | pretrained_dict = {k[7:]: v for k, v in pretrained_dict.items() if 237 | k[7:] in model_dict and v.size() == model_dict[k[7:]].size()} 238 | else: 239 | pretrained_dict = {k: v for k, v in pretrained_dict.items() if 240 | k in model_dict and v.size() == model_dict[k].size()} 241 | # 2. overwrite entries in the existing state dict 242 | model_dict.update(pretrained_dict) 243 | # 3. load the new state dict 244 | model.load_state_dict(model_dict) 245 | 246 | 247 | class C3D_Hash_Model(nn.Module): 248 | """Constructs a (ResNet-18+Avg Pooling+Hashing ) model. 249 | """ 250 | def __init__(self, hash_length): 251 | super(C3D_Hash_Model, self).__init__() 252 | self.resnet=resnet18() 253 | load_state(self.resnet, "./pretrain/resnet-18-kinetics.pth") # 加载保存好的模型 254 | self.avgpooling=TemporalAvgPool() 255 | self.hash_layer =HashLayer(hash_length) 256 | 257 | def forward(self, x): 258 | resnet_feature=self.resnet(x) 259 | avgpooling_feature=self.avgpooling(resnet_feature) 260 | # squeeze_feature=torch.squeeze(resnet_feature) 261 | hash_feature = self.hash_layer(avgpooling_feature) 262 | return hash_feature -------------------------------------------------------------------------------- /.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 13 | 14 | 19 | 20 | 21 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 63 | 64 | 65 | 66 | 67 | 87 | 88 | 89 | 109 | 110 | 111 | 131 | 132 | 133 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 1566701195750 167 | 175 | 176 | 177 | 178 | 180 | 181 | 192 | 193 | 194 | 196 | 197 | 198 | 199 | 200 | file://$PROJECT_DIR$/DataProcess/video2frames.py 201 | 46 202 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | --------------------------------------------------------------------------------