├── asset └── main.png ├── metrics.py ├── utils.py ├── README.md ├── loader ├── ucf_loader.py ├── hmdb_loader.py ├── msrvtt_loader.py ├── crosstask_loader.py ├── youcook_loader.py └── howto100m_loader.py ├── loss.py ├── args.py ├── src ├── eval_youcook.py ├── eval_msrvtt.py ├── eval_crosstask.py ├── eval_ucf.py ├── eval_hmdb.py └── train.py ├── s3dg.py ├── soft_dtw.py └── data └── msrvtt_test.csv /asset/main.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KoDohwan/VT-TWINS/HEAD/asset/main.png -------------------------------------------------------------------------------- /metrics.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import unicode_literals 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | 8 | def retrieval(x): 9 | sx = np.sort(-x, axis=1) 10 | d = np.diag(-x) 11 | d = d[:, np.newaxis] 12 | ind = sx - d 13 | ind = np.where(ind == 0) 14 | ind = ind[1] 15 | metrics = {} 16 | metrics['R1'] = float(np.sum(ind == 0)) / len(ind) * 100 17 | metrics['R5'] = float(np.sum(ind < 5)) / len(ind) * 100 18 | metrics['R10'] = float(np.sum(ind < 10)) / len(ind) * 100 19 | metrics['MR'] = np.median(ind) + 1 20 | return metrics 21 | 22 | def ctr(x): 23 | sx = np.sort(-x, axis=1) 24 | d = np.diag(-x) 25 | d = d[:, np.newaxis] 26 | ind = sx - d 27 | # ind = np.where(ind == 0) 28 | # ind = [(i, j) for i, j in zip(ind[0], ind[1])] 29 | 30 | # new_ind = [] 31 | # for i in ind: 32 | # ind_set = set([j[0] for j in new_ind]) 33 | # if i[0] not in ind_set: 34 | # new_ind.append(i) 35 | # ind = np.array([i[1] for i in new_ind]) 36 | 37 | num = 0. 38 | count = 0. 39 | for i in ind: 40 | if i[0] == 0: 41 | num += 1 42 | count += 1 43 | 44 | metrics = {} 45 | # metrics['CTR'] = float(np.sum(ind == 0)) / len(ind) * 100 46 | metrics['CTR'] = num / count * 100 47 | return metrics['CTR'] -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.distributed as dist 5 | from torch.optim.lr_scheduler import LambdaLR 6 | 7 | 8 | class AllGather(torch.autograd.Function): 9 | """An autograd function that performs allgather on a tensor.""" 10 | 11 | @staticmethod 12 | def forward(ctx, tensor, args): 13 | output = [torch.empty_like(tensor) for _ in range(args.world_size)] 14 | dist.all_gather(output, tensor) 15 | ctx.rank = args.rank 16 | ctx.batch_size = tensor.shape[0] 17 | return torch.cat(output, 0) 18 | 19 | @staticmethod 20 | def backward(ctx, grad_output): 21 | return ( 22 | grad_output[ctx.batch_size * ctx.rank : ctx.batch_size * (ctx.rank + 1)], 23 | None, 24 | ) 25 | 26 | def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, last_epoch=-1): 27 | """ Create a schedule with a learning rate that decreases following the 28 | values of the cosine function between 0 and `pi * cycles` after a warmup 29 | period during which it increases linearly between 0 and 1. 30 | """ 31 | 32 | def lr_lambda(current_step): 33 | if current_step < num_warmup_steps: 34 | return float(current_step) / float(max(1, num_warmup_steps)) 35 | progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps)) 36 | return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) 37 | 38 | return LambdaLR(optimizer, lr_lambda, last_epoch) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # VT-TWINS 2 | 3 | This repositoriy is the implementation of "[Video-Text Representation Learning via Differentiable Weak Temporal Alignment](https://openaccess.thecvf.com/content/CVPR2022/papers/Ko_Video-Text_Representation_Learning_via_Differentiable_Weak_Temporal_Alignment_CVPR_2022_paper.pdf) (CVPR 2022)". 4 | 5 |
6 | 7 |
8 | 9 | ## Preparation 10 | 11 | ### Requirements 12 | - Python 3 13 | - PyTorch (>= 1.0) 14 | - [python-ffmpeg](https://github.com/kkroening/ffmpeg-python) with ffmpeg 15 | - pandas 16 | - numpy 17 | - tqdm 18 | - scikit-learn 19 | - numba 0.53.1 20 | 21 | ### Dataset 22 | The annotation files (.csv) of all datasets are in './data'. If you download the downstream datasets, place the files as follows: 23 | ``` 24 | data 25 | |─ downstream 26 | │ |─ ucf 27 | │ │ └─ ucf101 28 | | │ |─ label1 29 | | │ |─ video1.mp4 30 | | │ : 31 | | │ : 32 | | |─ hmdb 33 | | │ |─ label1 34 | | │ │ |─ video1.avi 35 | | │ │ : 36 | | │ : 37 | | |─ youcook 38 | | │ |─ task1 39 | | │ │ |─ video1.mp4 40 | | │ │ : 41 | | │ : 42 | | |─ msrvtt 43 | | │ └─ TestVideo 44 | | │ |─ video1.mp4 45 | | │ : 46 | | └─ crosstask 47 | | └─ videos 48 | | |─ 105222 49 | | │ |─ 4K4PnQ66LQ8.mp4 50 | | │ : 51 | | : 52 | ``` 53 | 54 | ### Pretrained Weight 55 | The pretrained weight of our model, word2vec, and the tokenizer can be found in [here](https://drive.google.com/drive/folders/16QH4C6Sr6ptGp-wEnVsAwZem-kp-uLkt?usp=sharing). Place the pretrained weight of our model in the './checkpoint', and word2vec and the tokenizer in the './data'. 56 | 57 | ## Evaluation 58 | 59 | ### Action Recognition on UCF101 60 | ``` 61 | python src/eval_ucf.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar 62 | ``` 63 | 64 | ### Action Recognition on HMDB 65 | ``` 66 | python src/eval_hmdb.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar 67 | ``` 68 | 69 | ### Text-to-Video Retrieval on YouCook2 70 | ``` 71 | python src/eval_youcook.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar 72 | ``` 73 | 74 | ### Text-to-Video Retrieval on MSRVTT 75 | ``` 76 | python src/eval_msrvtt.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar 77 | ``` 78 | 79 | ### Action Step Localization on CrossTask 80 | ``` 81 | python src/eval_crosstask.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar 82 | ``` 83 | 84 | ## Citation 85 | ``` 86 | @inproceedings{ko2022video, 87 | title={Video-Text Representation Learning via Differentiable Weak Temporal Alignment}, 88 | author={Ko, Dohwan and Choi, Joonmyung and Ko, Juyeon and Noh, Shinyeong and On, Kyoung-Woon and Kim, Eun-Sol and Kim, Hyunwoo J}, 89 | booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, 90 | year={2022} 91 | } 92 | ``` -------------------------------------------------------------------------------- /loader/ucf_loader.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from torch.utils.data import Dataset 3 | import pandas as pd 4 | import os 5 | import numpy as np 6 | import random 7 | import ffmpeg 8 | import time 9 | import re 10 | import glob 11 | 12 | 13 | class UCF_DataLoader(Dataset): 14 | """HMDB Video-Text loader.""" 15 | 16 | def __init__( 17 | self, 18 | data, 19 | video_root='', 20 | num_clip=4, 21 | num_frames=32, 22 | size=224, 23 | with_flip=True, 24 | crop_only=False, 25 | center_crop=True, 26 | ): 27 | """ 28 | Args: 29 | """ 30 | assert isinstance(size, int) 31 | self.data = pd.read_csv(data) 32 | self.video_root = video_root 33 | self.size = size 34 | self.num_frames = num_frames 35 | self.num_clip = num_clip 36 | self.crop_only = crop_only 37 | self.center_crop = center_crop 38 | self.with_flip = with_flip 39 | 40 | def __len__(self): 41 | return len(self.data) 42 | 43 | def _get_video(self, video_path, num_clip, flip=False): 44 | cmd = ( 45 | ffmpeg 46 | .input(video_path) 47 | ) 48 | if self.center_crop: 49 | aw, ah = 0.5, 0.5 50 | else: 51 | aw, ah = random.uniform(0, 1), random.uniform(0, 1) 52 | if self.crop_only: 53 | cmd = ( 54 | cmd.crop('(iw - {})*{}'.format(self.size, aw), 55 | '(ih - {})*{}'.format(self.size, ah), 56 | str(self.size), str(self.size)) 57 | ) 58 | else: 59 | cmd = ( 60 | cmd.crop('(iw - min(iw,ih))*{}'.format(aw), 61 | '(ih - min(iw,ih))*{}'.format(ah), 62 | 'min(iw,ih)', 63 | 'min(iw,ih)') 64 | .filter('scale', self.size, self.size) 65 | ) 66 | out, _ = ( 67 | cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') 68 | .run(capture_stdout=True, quiet=True) 69 | ) 70 | video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3]) 71 | video = th.from_numpy(video) 72 | video = video.permute(3, 0, 1, 2) 73 | if video.shape[1] < self.num_frames: 74 | zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8) 75 | video = th.cat((video, zeros), axis=1) 76 | output = th.zeros(num_clip, 3, self.num_frames, self.size, self.size) 77 | start_ind = np.linspace(0, video.shape[1] - self.num_frames, num_clip, dtype=int) 78 | for i, s in enumerate(start_ind): 79 | output[i] = video[:, s:s+self.num_frames] 80 | if flip: 81 | video = th.cat((output, th.flip(output, [4])), dim=0) 82 | return output 83 | 84 | def __getitem__(self, idx): 85 | video_id = self.data['video_id'].values[idx] 86 | label = self.data['label'].values[idx] 87 | split1 = self.data['split1'].values[idx] 88 | split2 = self.data['split2'].values[idx] 89 | split3 = self.data['split3'].values[idx] 90 | video_path = os.path.join(self.video_root, 'ucf', 'ucf101', video_id) 91 | if not(os.path.isfile(video_path)): 92 | print(video_path) 93 | raise ValueError 94 | video = self._get_video(video_path, self.num_clip, flip=self.with_flip) 95 | return {'video': video, 'label': label, 'split1': split1, 'split2': split2, 'split3': split3} 96 | 97 | -------------------------------------------------------------------------------- /loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from soft_dtw import SoftDTW 4 | import numpy as np 5 | from itertools import permutations 6 | 7 | class S2DTW(torch.nn.Module): 8 | def __init__(self, args): 9 | super(S2DTW, self).__init__() 10 | self.args = args 11 | self.sdtw = SoftDTW(use_cuda=True, gamma=1e-1, dist_func='negative_dot') 12 | self.tda = TDA(self.args) 13 | 14 | def video_text(self, video_embd, text_embd): 15 | b, n, d = video_embd.shape 16 | pos = -self.sdtw(video_embd, text_embd) 17 | video_embd_row = video_embd.unsqueeze(0).expand(b, b, n ,d).reshape(-1, n ,d) 18 | text_embd_col = text_embd.unsqueeze(1).expand(b, b, n ,d).reshape(-1, n, d) 19 | neg = -self.sdtw(video_embd_row, text_embd_col).reshape(b, b) 20 | neg = torch.logsumexp(neg, 1) 21 | loss = torch.mean(neg - pos) 22 | return loss 23 | 24 | def forward(self, video_embd, text_embd): 25 | # video_embd, text_embd = self.tda(video_embd, text_embd) 26 | loss = self.video_text(video_embd, text_embd) 27 | return loss 28 | 29 | class TDA(torch.nn.Module): 30 | def __init__(self, args): 31 | super(TDA, self).__init__() 32 | self.args = args 33 | self.num_clip = args.num_clip 34 | self.n = self.num_clip * self.num_clip 35 | self.perm = self.generate_permutations(self.num_clip).cuda() 36 | self.num_perm = self.perm.shape[0] 37 | self.softmin = nn.Softmin(dim=1) 38 | 39 | def negative_dot_product(self, x, y): 40 | z = torch.matmul(x, y.transpose(1, 2)) 41 | return -z 42 | 43 | def check_temporal_condition(self, p): 44 | for i in range(len(p)): 45 | if abs(p[i] - i) > 2: 46 | return False 47 | return True 48 | 49 | def generate_permutations(self, num_clip): 50 | perm = permutations([i for i in range(num_clip)]) 51 | temporal_condition_perm = [] 52 | for p in perm: 53 | if self.check_temporal_condition(p): 54 | temporal_condition_perm.append(p) 55 | temporal_condition_perm = torch.tensor(temporal_condition_perm) 56 | return temporal_condition_perm 57 | 58 | def generate_distribution(self, embd): 59 | b = embd.shape[0] 60 | self_similarity = self.negative_dot_product(embd, embd).detach() 61 | self_similarity = self_similarity.unsqueeze(1) 62 | self.perm_ = self.perm.unsqueeze(0).unsqueeze(3).repeat(b, 1, 1, self.num_clip) 63 | perm_similarity = self_similarity.repeat(1, self.num_perm, 1, 1) 64 | perm_similarity = torch.gather(torch.gather(perm_similarity, 2, self.perm_), 3, self.perm_.transpose(2, 3)) 65 | distribution = torch.norm(self_similarity - perm_similarity, p=2, dim=(2, 3)) 66 | distribution = self.softmin(distribution * 50) 67 | distribution = torch.distributions.Categorical(distribution) 68 | return distribution 69 | 70 | 71 | def forward(self, video_embd, text_embd): 72 | b, d = video_embd.shape[0], video_embd.shape[2] 73 | distribution_video = self.generate_distribution(video_embd) 74 | distribution_text = self.generate_distribution(text_embd) 75 | self.perm_ = self.perm.unsqueeze(0).repeat(b, 1, 1) 76 | perm_video = distribution_video.sample().unsqueeze(1).unsqueeze(2).repeat(1, 1, self.num_clip) 77 | perm_video = torch.gather(self.perm_, 1, perm_video).squeeze(1) 78 | perm_text = distribution_text.sample().unsqueeze(1).unsqueeze(2).repeat(1, 1, self.num_clip) 79 | perm_text = torch.gather(self.perm_, 1, perm_text).squeeze(1) 80 | video_embd = torch.gather(video_embd, 1, perm_video.unsqueeze(2).repeat(1, 1, d)) 81 | text_embd = torch.gather(text_embd, 1, perm_text.unsqueeze(2).repeat(1, 1, d)) 82 | return video_embd, text_embd -------------------------------------------------------------------------------- /loader/hmdb_loader.py: -------------------------------------------------------------------------------- 1 | 2 | import torch as th 3 | from torch.utils.data import Dataset 4 | import pandas as pd 5 | import os 6 | import numpy as np 7 | import random 8 | import ffmpeg 9 | import time 10 | import re 11 | import glob 12 | 13 | 14 | class HMDB_DataLoader(Dataset): 15 | """HMDB Video-Text loader.""" 16 | 17 | def __init__( 18 | self, 19 | data, 20 | video_root='', 21 | num_clip=4, 22 | num_frames=32, 23 | size=224, 24 | with_flip=True, 25 | crop_only=False, 26 | center_crop=True, 27 | ): 28 | """ 29 | Args: 30 | """ 31 | assert isinstance(size, int) 32 | self.data = pd.read_csv(data) 33 | self.video_root = video_root 34 | self.size = size 35 | self.num_frames = num_frames 36 | self.num_clip = num_clip 37 | self.crop_only = crop_only 38 | self.center_crop = center_crop 39 | self.with_flip = with_flip 40 | self.label_dict = {'brush_hair': 0, 'cartwheel': 1, 'catch': 2, 'chew': 3, 'clap': 4, 'climb': 5, 'climb_stairs': 6, 'dive': 7, 'draw_sword': 8, 41 | 'dribble': 9, 'drink': 10, 'eat': 11, 'fall_floor': 12, 'fencing': 13, 'flic_flac': 14, 'golf': 15, 'handstand': 16, 'hit': 17, 42 | 'hug': 18, 'jump': 19, 'kick': 20, 'kick_ball': 21, 'kiss': 22, 'laugh': 23, 'pick': 24, 'pour': 25, 'pullup': 26, 'punch': 27, 43 | 'push': 28, 'pushup': 29, 'ride_bike': 30, 'ride_horse': 31, 'run': 32, 'shake_hands': 33, 'shoot_ball': 34, 'shoot_bow': 35, 44 | 'shoot_gun': 36, 'sit': 37, 'situp': 38, 'smile': 39, 'smoke': 40, 'somersault': 41, 'stand': 42, 'swing_baseball': 43, 45 | 'sword': 44, 'sword_exercise': 45, 'talk': 46, 'throw': 47, 'turn': 48, 'walk': 49, 'wave': 50} 46 | 47 | 48 | def __len__(self): 49 | return len(self.data) 50 | 51 | def _get_video(self, video_path, num_clip, flip=False): 52 | cmd = ( 53 | ffmpeg 54 | .input(video_path) 55 | ) 56 | if self.center_crop: 57 | aw, ah = 0.5, 0.5 58 | else: 59 | aw, ah = random.uniform(0, 1), random.uniform(0, 1) 60 | if self.crop_only: 61 | cmd = ( 62 | cmd.crop('(iw - {})*{}'.format(self.size, aw), 63 | '(ih - {})*{}'.format(self.size, ah), 64 | str(self.size), str(self.size)) 65 | ) 66 | else: 67 | cmd = ( 68 | cmd.crop('(iw - min(iw,ih))*{}'.format(aw), 69 | '(ih - min(iw,ih))*{}'.format(ah), 70 | 'min(iw,ih)', 71 | 'min(iw,ih)') 72 | .filter('scale', self.size, self.size) 73 | ) 74 | out, _ = ( 75 | cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') 76 | .run(capture_stdout=True, quiet=True) 77 | ) 78 | video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3]) 79 | video = th.from_numpy(video) 80 | video = video.permute(3, 0, 1, 2) 81 | if video.shape[1] < self.num_frames: 82 | zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8) 83 | video = th.cat((video, zeros), axis=1) 84 | output = th.zeros(num_clip, 3, self.num_frames, self.size, self.size) 85 | start_ind = np.linspace(0, video.shape[1] - self.num_frames, num_clip, dtype=int) 86 | for i, s in enumerate(start_ind): 87 | output[i] = video[:, s:s+self.num_frames] 88 | if flip: 89 | video = th.cat((output, th.flip(output, [4])), dim=0) 90 | return output 91 | 92 | def __getitem__(self, idx): 93 | video_id = self.data['video_id'].values[idx] 94 | label = self.data['label'].values[idx] 95 | split1 = self.data['split1'].values[idx] 96 | split2 = self.data['split2'].values[idx] 97 | split3 = self.data['split3'].values[idx] 98 | video_path = os.path.join(self.video_root, 'hmdb', label[:-5], video_id) 99 | if not(os.path.isfile(video_path)): 100 | raise ValueError 101 | video = self._get_video(video_path, self.num_clip, flip=self.with_flip) 102 | return {'video': video, 'label': self.label_dict[label[:-5]], 'split1': split1, 'split2': split2, 'split3': split3} 103 | 104 | -------------------------------------------------------------------------------- /args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def get_args(description='VT-TWINS'): 4 | parser = argparse.ArgumentParser(description=description) 5 | parser.add_argument('--train_csv', type=str, default='./data/all_videos.csv', help='train csv') 6 | parser.add_argument('--video_path', type=str, default='./data/videos', help='video_path') 7 | parser.add_argument('--caption_root', type=str, default='./data/caption_json', help='video_path') 8 | parser.add_argument('--word2vec_path', type=str, default='./data/word2vec.pth', help='') 9 | parser.add_argument('--eval_video_root', type=str, default='./data/downstream', help='root folder for the video at for evaluation') 10 | parser.add_argument('--checkpoint_root', type=str, default='checkpoint', help='checkpoint dir root') 11 | parser.add_argument('--log_root', type=str, default='log', help='log dir root') 12 | parser.add_argument('--checkpoint_dir', type=str, default='', help='checkpoint model folder') 13 | parser.add_argument('--optimizer', type=str, default='adam', help='opt algorithm') 14 | parser.add_argument('--weight_init', type=str, default='uniform', help='CNN weights inits') 15 | parser.add_argument('--num_thread_reader', type=int, default=4, help='') 16 | parser.add_argument('--num_class', type=int, default=512, help='upper epoch limit') 17 | parser.add_argument('--num_clip', type=int, default=8, help='num clips') 18 | parser.add_argument('--batch_size', type=int, default=16, help='batch size') 19 | parser.add_argument('--num_windows_test', type=int, default=10, help='number of testing windows') 20 | parser.add_argument('--batch_size_val', type=int, default=10, help='batch size eval') 21 | parser.add_argument('--momemtum', type=float, default=0.9, help='SGD momemtum') 22 | parser.add_argument('--n_display', type=int, default=400, help='Information display frequence') 23 | parser.add_argument('--num_frames', type=int, default=32, help='random seed') 24 | parser.add_argument('--video_size', type=int, default=224, help='random seed') 25 | parser.add_argument('--crop_only', type=int, default=1, help='random seed') 26 | parser.add_argument('--centercrop', type=int, default=0, help='random seed') 27 | parser.add_argument('--random_flip', type=int, default=1, help='random seed') 28 | parser.add_argument('--verbose', type=int, default=1, help='') 29 | parser.add_argument('--warmup_steps', type=int, default=100000, help='') 30 | parser.add_argument('--min_time', type=float, default=5.0, help='') 31 | parser.add_argument('--pretrain_cnn_path', type=str, default='', help='') 32 | parser.add_argument('--fps', type=int, default=10, help='') 33 | parser.add_argument('--cudnn_benchmark', type=int, default=0, help='') 34 | parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run') 35 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)') 36 | parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr') 37 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum') 38 | parser.add_argument('--resume', dest='resume', action='store_true', help='resume training from last checkpoint') 39 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set') 40 | parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model') 41 | parser.add_argument('--pin_memory', dest='pin_memory', action='store_true', help='use pin_memory') 42 | parser.add_argument('--world-size', default=-1, type=int, help='number of nodes for distributed training') 43 | parser.add_argument('--rank', default=-1, type=int, help='node rank for distributed training') 44 | parser.add_argument('--dist-file', default='dist-file', type=str, help='url used to set up distributed training') 45 | parser.add_argument('--dist-url', default='tcp://111.111.111.111:12345', type=str, help='url used to set up distributed training') 46 | parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend') 47 | parser.add_argument('--seed', default=1, type=int, help='seed for initializing training. ') 48 | parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.') 49 | parser.add_argument('--multiprocessing-distributed', action='store_true', help='Use multi-processing distributed training to launch N processes per node, ' 50 | 'which has N GPUs. This is the fastest way to use PyTorch for either single node or multi node data parallel training') 51 | args = parser.parse_args() 52 | return args 53 | -------------------------------------------------------------------------------- /src/eval_youcook.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | import os 4 | import random 5 | import socket 6 | import time 7 | import sys 8 | 9 | root_path = os.getcwd() 10 | sys.path.append(root_path) 11 | import torch 12 | import torch.optim as optim 13 | import torch.nn.functional as F 14 | import torch.utils.data 15 | import torch.multiprocessing as mp 16 | import torch.distributed as dist 17 | import torch.backends.cudnn as cudnn 18 | 19 | from metrics import retrieval 20 | from args import get_args 21 | from loader.youcook_loader import Youcook_DataLoader 22 | from s3dg import S3D 23 | from tqdm import tqdm 24 | import numpy as np 25 | import time 26 | from utils import AllGather 27 | allgather = AllGather.apply 28 | 29 | def main(args): 30 | model = deploy_model(args) 31 | test_dataset = Youcook_DataLoader(data='./data/validation_youcook.csv', num_clip=args.num_windows_test, 32 | video_root=args.eval_video_root, fps=args.fps, num_frames=args.num_frames, size=args.video_size, crop_only=False, 33 | center_crop=True,) 34 | test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) 35 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 36 | num_workers=args.num_thread_reader, sampler=test_sampler) 37 | 38 | all_video_embd, all_text_embd = test(test_loader, model, args) 39 | if args.gpu == 0: 40 | t2v = retrieval(np.dot(all_text_embd, all_video_embd.T)) 41 | v2t = retrieval(np.dot(all_video_embd, all_text_embd.T)) 42 | print('YouCook2') 43 | print(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}") 44 | print(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}") 45 | with open('result.txt', 'a') as f: 46 | f.write('YouCook2\n') 47 | f.write(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}\n") 48 | f.write(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}\n") 49 | 50 | def test(test_loader, model, args): 51 | all_text_embd = [] 52 | all_video_embd = [] 53 | with torch.no_grad(): 54 | for i_batch, data in enumerate(tqdm(test_loader)): 55 | text = data['text'].cuda() 56 | video = data['video'].float().cuda() 57 | video = video / 255.0 58 | video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) 59 | video_embd, text_embd = model(video, text) 60 | video_embd = video_embd.view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1]) 61 | video_embd = video_embd.mean(dim=1) 62 | all_text_embd.append(text_embd) 63 | all_video_embd.append(video_embd) 64 | all_text_embd = torch.cat(all_text_embd, dim=0) 65 | all_video_embd = torch.cat(all_video_embd, dim=0) 66 | all_video_embd = allgather(all_video_embd, args) 67 | all_text_embd = allgather(all_text_embd, args) 68 | return all_video_embd.cpu().numpy(), all_text_embd.cpu().numpy() 69 | 70 | 71 | def deploy_model(args): 72 | checkpoint_path = args.pretrain_cnn_path 73 | print("=> loading checkpoint '{}'".format(checkpoint_path)) 74 | checkpoint = torch.load(checkpoint_path, map_location='cpu') 75 | torch.cuda.set_device(args.gpu) 76 | model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path) 77 | model.cuda(args.gpu) 78 | checkpoint_module = {k[7:]:v for k,v in checkpoint.items()} 79 | model.load_state_dict(checkpoint_module) 80 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) 81 | model.eval() 82 | 83 | print(f'Model Loaded on GPU {args.gpu}') 84 | return model 85 | 86 | def main_worker(gpu, ngpus_per_node, main, args): 87 | cudnn.benchmark = True 88 | args.gpu = gpu 89 | args.rank = gpu 90 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 91 | s.connect(("8.8.8.8", 80)) 92 | ip = s.getsockname()[0] 93 | args.dist_url = f'tcp://{ip}:12345' 94 | dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu) 95 | main(args) 96 | 97 | def spawn_workers(main, args): 98 | ngpus_per_node = 8 99 | args.world_size = 8 100 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args)) 101 | 102 | if __name__ == "__main__": 103 | args = get_args() 104 | 105 | 106 | assert args.eval_video_root != '' 107 | spawn_workers(main, args) -------------------------------------------------------------------------------- /src/eval_msrvtt.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | import os 4 | import random 5 | import socket 6 | import time 7 | import sys 8 | 9 | root_path = os.getcwd() 10 | sys.path.append(root_path) 11 | import torch 12 | import torch.optim as optim 13 | import torch.nn.functional as F 14 | import torch.utils.data 15 | import torch.multiprocessing as mp 16 | import torch.distributed as dist 17 | import torch.backends.cudnn as cudnn 18 | 19 | from metrics import retrieval 20 | from args import get_args 21 | from loader.msrvtt_loader import MSRVTT_DataLoader 22 | from s3dg import S3D 23 | from tqdm import tqdm 24 | import numpy as np 25 | import time 26 | from utils import AllGather 27 | allgather = AllGather.apply 28 | 29 | def main(args): 30 | model = deploy_model(args) 31 | test_dataset = MSRVTT_DataLoader(data='./data/msrvtt_test.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root, 32 | fps=args.fps, num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True,) 33 | test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) 34 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 35 | num_workers=args.num_thread_reader, sampler=test_sampler) 36 | 37 | all_video_embd, all_text_embd = test(test_loader, model, args) 38 | if args.gpu == 0: 39 | t2v = retrieval(np.dot(all_text_embd, all_video_embd.T)) 40 | v2t = retrieval(np.dot(all_video_embd, all_text_embd.T)) 41 | print('MSRVTT') 42 | print(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}") 43 | print(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}") 44 | with open('result.txt', 'a') as f: 45 | f.write('MSRVTT\n') 46 | f.write(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}\n") 47 | f.write(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}\n") 48 | 49 | def test(test_loader, model, args): 50 | all_text_embd = [] 51 | all_video_embd = [] 52 | with torch.no_grad(): 53 | for i_batch, data in enumerate(tqdm(test_loader)): 54 | text = data['text'].cuda() 55 | video = data['video'].float().cuda() 56 | video = video / 255.0 57 | video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) 58 | video_embd, text_embd = model(video, text) 59 | video_embd = video_embd.view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1]) 60 | video_embd = video_embd.mean(dim=1) 61 | all_text_embd.append(text_embd) 62 | all_video_embd.append(video_embd) 63 | all_text_embd = torch.cat(all_text_embd, dim=0) 64 | all_video_embd = torch.cat(all_video_embd, dim=0) 65 | all_video_embd = allgather(all_video_embd, args) 66 | all_text_embd = allgather(all_text_embd, args) 67 | return all_video_embd.cpu().numpy(), all_text_embd.cpu().numpy() 68 | 69 | 70 | def deploy_model(args): 71 | checkpoint_path = args.pretrain_cnn_path 72 | print("=> loading checkpoint '{}'".format(checkpoint_path)) 73 | checkpoint = torch.load(checkpoint_path, map_location='cpu') 74 | torch.cuda.set_device(args.gpu) 75 | model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path) 76 | model.cuda(args.gpu) 77 | checkpoint_module = {k[7:]:v for k,v in checkpoint.items()} 78 | model.load_state_dict(checkpoint_module) 79 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) 80 | model.eval() 81 | print(f'Model Loaded on GPU {args.gpu}') 82 | return model 83 | 84 | def main_worker(gpu, ngpus_per_node, main, args): 85 | cudnn.benchmark = True 86 | args.gpu = gpu 87 | args.rank = gpu 88 | args.world_size = 8 89 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 90 | s.connect(("8.8.8.8", 80)) 91 | ip = s.getsockname()[0] 92 | args.dist_url = f'tcp://{ip}:12345' 93 | dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu) 94 | main(args) 95 | 96 | def spawn_workers(main, args): 97 | ngpus_per_node = 8 98 | args.world_size = 8 99 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args)) 100 | 101 | if __name__ == "__main__": 102 | args = get_args() 103 | args.fps = 20 104 | args.num_windows_test = 8 105 | 106 | assert args.eval_video_root != '' 107 | spawn_workers(main, args) -------------------------------------------------------------------------------- /loader/msrvtt_loader.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from torch.utils.data import Dataset 3 | import pandas as pd 4 | import os 5 | import numpy as np 6 | import random 7 | import ffmpeg 8 | import time 9 | import re 10 | import pickle 11 | 12 | 13 | class MSRVTT_DataLoader(Dataset): 14 | """MSRVTT Video-Text loader.""" 15 | 16 | def __init__( 17 | self, 18 | data, 19 | video_root='', 20 | num_clip=4, 21 | fps=16, 22 | num_frames=32, 23 | size=224, 24 | crop_only=False, 25 | center_crop=True, 26 | token_to_word_path='../data/dict.npy', 27 | max_words=30, 28 | ): 29 | """ 30 | Args: 31 | """ 32 | assert isinstance(size, int) 33 | self.data = pd.read_csv(data) 34 | self.video_root = video_root 35 | self.size = size 36 | self.num_frames = num_frames 37 | self.fps = fps 38 | self.num_clip = num_clip 39 | self.num_sec = self.num_frames / float(self.fps) 40 | self.crop_only = crop_only 41 | self.center_crop = center_crop 42 | self.max_words = max_words 43 | self.word_to_token = {} 44 | token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path)) 45 | for i, t in enumerate(token_to_word): 46 | self.word_to_token[t] = i + 1 47 | 48 | def __len__(self): 49 | return len(self.data) 50 | 51 | def _get_video(self, video_path, start, end, num_clip): 52 | video = th.zeros(num_clip, 3, self.num_frames, self.size, self.size) 53 | start_ind = np.linspace(start, max(start, end-self.num_sec - 0.4), num_clip) 54 | for i, s in enumerate(start_ind): 55 | video[i] = self._get_video_start(video_path, s) 56 | return video 57 | 58 | def _get_video_start(self, video_path, start): 59 | start_seek = start 60 | cmd = ( 61 | ffmpeg 62 | .input(video_path, ss=start_seek, t=self.num_sec + 0.1) 63 | .filter('fps', fps=self.fps) 64 | ) 65 | if self.center_crop: 66 | aw, ah = 0.5, 0.5 67 | else: 68 | aw, ah = random.uniform(0, 1), random.uniform(0, 1) 69 | if self.crop_only: 70 | cmd = ( 71 | cmd.crop('(iw - {})*{}'.format(self.size, aw), 72 | '(ih - {})*{}'.format(self.size, ah), 73 | str(self.size), str(self.size)) 74 | ) 75 | else: 76 | cmd = ( 77 | cmd.crop('(iw - min(iw,ih))*{}'.format(aw), 78 | '(ih - min(iw,ih))*{}'.format(ah), 79 | 'min(iw,ih)', 80 | 'min(iw,ih)') 81 | .filter('scale', self.size, self.size) 82 | ) 83 | out, _ = ( 84 | cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') 85 | .run(capture_stdout=True, quiet=True) 86 | ) 87 | video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3]) 88 | video = th.from_numpy(video) 89 | video = video.permute(3, 0, 1, 2) 90 | if video.shape[1] < self.num_frames: 91 | zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8) 92 | video = th.cat((video, zeros), axis=1) 93 | return video[:, :self.num_frames] 94 | 95 | def _split_text(self, sentence): 96 | w = re.findall(r"[\w']+", str(sentence)) 97 | return w 98 | 99 | def _words_to_token(self, words): 100 | words = [self.word_to_token[word] for word in words if word in self.word_to_token] 101 | if words: 102 | we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words) 103 | return we 104 | else: 105 | return th.zeros(self.max_words).long() 106 | 107 | def _zero_pad_tensor_token(self, tensor, size): 108 | if len(tensor) >= size: 109 | return tensor[:size] 110 | else: 111 | zero = th.zeros(size - len(tensor)).long() 112 | return th.cat((tensor, zero), dim=0) 113 | 114 | def words_to_ids(self, x): 115 | return self._words_to_token(self._split_text(x)) 116 | 117 | def _get_duration(self, video_path): 118 | probe = ffmpeg.probe(video_path) 119 | return probe['format']['duration'] 120 | 121 | def __getitem__(self, idx): 122 | video_id = self.data['video_id'].values[idx] 123 | cap = self.data['sentence'].values[idx] 124 | video_path = os.path.join(self.video_root, 'msrvtt', 'TestVideo', video_id + '.mp4') 125 | duration = self._get_duration(video_path) 126 | text = self.words_to_ids(cap) 127 | video = self._get_video(video_path, 0, float(duration), self.num_clip) 128 | return {'video': video, 'text': text} 129 | 130 | -------------------------------------------------------------------------------- /loader/crosstask_loader.py: -------------------------------------------------------------------------------- 1 | 2 | import torch as th 3 | from torch.utils.data import Dataset 4 | import pandas as pd 5 | import os 6 | import numpy as np 7 | import random 8 | import ffmpeg 9 | import time 10 | import re 11 | import pickle 12 | 13 | 14 | class CrossTask_DataLoader(Dataset): 15 | """CrossTask Video-Text loader.""" 16 | 17 | def __init__( 18 | self, 19 | data, 20 | video_root='./data/downstream/', 21 | num_clip=4, 22 | fps=16, 23 | num_frames=32, 24 | size=224, 25 | crop_only=False, 26 | center_crop=True, 27 | token_to_word_path='../data/dict.npy', 28 | max_words=30, 29 | ): 30 | """ 31 | Args: 32 | """ 33 | assert isinstance(size, int) 34 | self.data = pd.read_csv(data) 35 | self.video_root = video_root 36 | self.size = size 37 | self.num_frames = num_frames 38 | self.fps = fps 39 | self.num_clip = num_clip 40 | self.num_sec = self.num_frames / float(self.fps) 41 | self.crop_only = crop_only 42 | self.center_crop = center_crop 43 | self.max_words = max_words 44 | token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path)) 45 | self.word_to_token = {} 46 | for i, t in enumerate(token_to_word): 47 | self.word_to_token[t] = i + 1 48 | 49 | def __len__(self): 50 | return len(self.data) 51 | 52 | def _get_video(self, video_path, start, end, num_clip): 53 | video = th.zeros(num_clip, 3, self.num_frames, self.size, self.size) 54 | start_ind = np.linspace(start, max(start, end-self.num_sec - 0.4), num_clip) 55 | for i, s in enumerate(start_ind): 56 | video[i] = self._get_video_start(video_path, s) 57 | return video 58 | 59 | def _get_video_start(self, video_path, start): 60 | start_seek = start 61 | cmd = ( 62 | ffmpeg 63 | .input(video_path, ss=start_seek, t=self.num_sec + 0.1) 64 | .filter('fps', fps=self.fps) 65 | ) 66 | if self.center_crop: 67 | aw, ah = 0.5, 0.5 68 | else: 69 | aw, ah = random.uniform(0, 1), random.uniform(0, 1) 70 | if self.crop_only: 71 | cmd = ( 72 | cmd.crop('(iw - {})*{}'.format(self.size, aw), 73 | '(ih - {})*{}'.format(self.size, ah), 74 | str(self.size), str(self.size)) 75 | ) 76 | else: 77 | cmd = ( 78 | cmd.crop('(iw - min(iw,ih))*{}'.format(aw), 79 | '(ih - min(iw,ih))*{}'.format(ah), 80 | 'min(iw,ih)', 81 | 'min(iw,ih)') 82 | .filter('scale', self.size, self.size) 83 | ) 84 | out, _ = ( 85 | cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') 86 | .run(capture_stdout=True, quiet=True) 87 | ) 88 | video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3]) 89 | video = th.from_numpy(video) 90 | video = video.permute(3, 0, 1, 2) 91 | if video.shape[1] < self.num_frames: 92 | zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8) 93 | video = th.cat((video, zeros), axis=1) 94 | return video[:, :self.num_frames] 95 | 96 | def _split_text(self, sentence): 97 | w = re.findall(r"[\w']+", str(sentence)) 98 | return w 99 | 100 | def _words_to_token(self, words): 101 | words = [self.word_to_token[word] for word in words if word in self.word_to_token] 102 | if words: 103 | we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words) 104 | return we 105 | else: 106 | return th.zeros(self.max_words).long() 107 | 108 | def _zero_pad_tensor_token(self, tensor, size): 109 | if len(tensor) >= size: 110 | return tensor[:size] 111 | else: 112 | zero = th.zeros(size - len(tensor)).long() 113 | return th.cat((tensor, zero), dim=0) 114 | 115 | def words_to_ids(self, x): 116 | return self._words_to_token(self._split_text(x)) 117 | 118 | def __getitem__(self, idx): 119 | video_id = self.data['video_id'].values[idx] 120 | task = self.data['task_id'].values[idx] 121 | start = self.data['start'].values[idx] 122 | end = self.data['end'].values[idx] 123 | cap = self.data['text'].values[idx] 124 | if os.path.isfile(os.path.join(self.video_root+ '/crosstask/videos/'+ str(task) + '/' + video_id + '.mp4')): 125 | video_path = os.path.join(self.video_root+ '/crosstask/videos/'+ str(task) + '/' + video_id + '.mp4') 126 | else: 127 | raise ValueError 128 | text = self.words_to_ids(cap) 129 | video = self._get_video(video_path, start, end, self.num_clip) 130 | return {'video_id': video_id, 'task_id': task, 'video': video, 'text': text} -------------------------------------------------------------------------------- /loader/youcook_loader.py: -------------------------------------------------------------------------------- 1 | 2 | import torch as th 3 | from torch.utils.data import Dataset 4 | import pandas as pd 5 | import os 6 | import numpy as np 7 | import random 8 | import ffmpeg 9 | import time 10 | import re 11 | import pickle 12 | 13 | 14 | class Youcook_DataLoader(Dataset): 15 | """Youcook Video-Text loader.""" 16 | 17 | def __init__( 18 | self, 19 | data, 20 | video_root='', 21 | num_clip=4, 22 | fps=16, 23 | num_frames=32, 24 | size=224, 25 | crop_only=False, 26 | center_crop=True, 27 | token_to_word_path='../data/dict.npy', 28 | max_words=30, 29 | ): 30 | """ 31 | Args: 32 | """ 33 | assert isinstance(size, int) 34 | self.data = pd.read_csv(data) 35 | self.video_root = video_root 36 | self.size = size 37 | self.num_frames = num_frames 38 | self.fps = fps 39 | self.num_clip = num_clip 40 | self.num_sec = self.num_frames / float(self.fps) 41 | self.crop_only = crop_only 42 | self.center_crop = center_crop 43 | self.max_words = max_words 44 | token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path)) 45 | self.word_to_token = {} 46 | for i, t in enumerate(token_to_word): 47 | self.word_to_token[t] = i + 1 48 | 49 | def __len__(self): 50 | return len(self.data) 51 | 52 | def _get_video(self, video_path, start, end, num_clip): 53 | video = th.zeros(num_clip, 3, self.num_frames, self.size, self.size) 54 | start_ind = np.linspace(start, max(start, end-self.num_sec - 0.4), num_clip) 55 | for i, s in enumerate(start_ind): 56 | video[i] = self._get_video_start(video_path, s) 57 | return video 58 | 59 | def _get_video_start(self, video_path, start): 60 | start_seek = start 61 | cmd = ( 62 | ffmpeg 63 | .input(video_path, ss=start_seek, t=self.num_sec + 0.1) 64 | .filter('fps', fps=self.fps) 65 | ) 66 | if self.center_crop: 67 | aw, ah = 0.5, 0.5 68 | else: 69 | aw, ah = random.uniform(0, 1), random.uniform(0, 1) 70 | if self.crop_only: 71 | cmd = ( 72 | cmd.crop('(iw - {})*{}'.format(self.size, aw), 73 | '(ih - {})*{}'.format(self.size, ah), 74 | str(self.size), str(self.size)) 75 | ) 76 | else: 77 | cmd = ( 78 | cmd.crop('(iw - min(iw,ih))*{}'.format(aw), 79 | '(ih - min(iw,ih))*{}'.format(ah), 80 | 'min(iw,ih)', 81 | 'min(iw,ih)') 82 | .filter('scale', self.size, self.size) 83 | ) 84 | out, _ = ( 85 | cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') 86 | .run(capture_stdout=True, quiet=True) 87 | ) 88 | video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3]) 89 | video = th.from_numpy(video) 90 | video = video.permute(3, 0, 1, 2) 91 | if video.shape[1] < self.num_frames: 92 | zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8) 93 | video = th.cat((video, zeros), axis=1) 94 | return video[:, :self.num_frames] 95 | 96 | def _split_text(self, sentence): 97 | w = re.findall(r"[\w']+", str(sentence)) 98 | return w 99 | 100 | def _words_to_token(self, words): 101 | words = [self.word_to_token[word] for word in words if word in self.word_to_token] 102 | if words: 103 | we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words) 104 | return we 105 | else: 106 | return th.zeros(self.max_words).long() 107 | 108 | def _zero_pad_tensor_token(self, tensor, size): 109 | if len(tensor) >= size: 110 | return tensor[:size] 111 | else: 112 | zero = th.zeros(size - len(tensor)).long() 113 | return th.cat((tensor, zero), dim=0) 114 | 115 | def words_to_ids(self, x): 116 | return self._words_to_token(self._split_text(x)) 117 | 118 | def __getitem__(self, idx): 119 | video_id = self.data['video_id'].values[idx] 120 | task = self.data['task'].values[idx] 121 | start = self.data['start'].values[idx] 122 | end = self.data['end'].values[idx] 123 | cap = self.data['text'].values[idx] 124 | if os.path.isfile(os.path.join(self.video_root, 'youcook', str(task), video_id + '.mp4')): 125 | video_path = os.path.join(self.video_root, 'youcook', str(task), video_id + '.mp4') 126 | elif os.path.isfile(os.path.join(self.video_root, 'youcook', str(task), video_id + '.mkv')): 127 | video_path = os.path.join(self.video_root, 'youcook', str(task), video_id + '.mkv') 128 | elif os.path.isfile(os.path.join(self.video_root, 'youcook', str(task), video_id + '.webm')): 129 | video_path = os.path.join(self.video_root, 'youcook', str(task), video_id + '.webm') 130 | else: 131 | raise ValueError 132 | text = self.words_to_ids(cap) 133 | video = self._get_video(video_path, start, end, self.num_clip) 134 | return {'video': video, 'text': text} 135 | 136 | -------------------------------------------------------------------------------- /src/eval_crosstask.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | import os 4 | import random 5 | import socket 6 | import time 7 | import sys 8 | 9 | root_path = os.getcwd() 10 | sys.path.append(root_path) 11 | import torch 12 | import torch.optim as optim 13 | import torch.nn.functional as F 14 | import torch.utils.data 15 | import torch.multiprocessing as mp 16 | import torch.distributed as dist 17 | import torch.backends.cudnn as cudnn 18 | 19 | from metrics import ctr 20 | from args import get_args 21 | from loader.crosstask_loader import CrossTask_DataLoader 22 | from s3dg import S3D 23 | from tqdm import tqdm 24 | import numpy as np 25 | import time 26 | from utils import AllGather 27 | allgather = AllGather.apply 28 | 29 | def main(args): 30 | model = deploy_model(args) 31 | test_dataset = CrossTask_DataLoader(data='./data/crosstask.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root, fps=args.fps, 32 | num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True, ) 33 | test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) 34 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 35 | num_workers=args.num_thread_reader, sampler=test_sampler) 36 | 37 | all_video_embd, all_text_embd, task_id = test(test_loader, model, args) 38 | if args.gpu == 0: 39 | video_dict = {} 40 | for i in range(len(task_id)): 41 | if task_id[i].item() not in video_dict.keys(): 42 | video_dict[task_id[i].item()] = {} 43 | video_dict[task_id[i].item()]['video_embd'] = [] 44 | video_dict[task_id[i].item()]['text_embd'] = [] 45 | video_dict[task_id[i].item()]['video_embd'].append(all_video_embd[i]) 46 | video_dict[task_id[i].item()]['text_embd'].append(all_text_embd[i]) 47 | 48 | recall_list = [] 49 | for task_id, videos in video_dict.items(): 50 | all_video_embd = [] 51 | all_text_embd = [] 52 | for v, t in zip(videos['video_embd'], videos['text_embd']): 53 | all_video_embd.append(np.expand_dims(v, 0)) 54 | all_text_embd.append(np.expand_dims(t, 0)) 55 | all_video_embd = np.concatenate(all_video_embd, axis=0) 56 | all_text_embd = np.concatenate(all_text_embd, axis=0) 57 | similarity = np.dot(all_video_embd, all_text_embd.T) 58 | recall = ctr(similarity) 59 | recall_list.append(recall) 60 | 61 | print('CrossTask') 62 | print(f'CTR: {np.mean(recall_list):.2f}') 63 | with open('result.txt', 'a') as f: 64 | f.write('CrossTask\n') 65 | f.write(f'CTR: {np.mean(recall_list):.2f}\n') 66 | 67 | def test(test_loader, model, args): 68 | all_text_embd = [] 69 | all_video_embd = [] 70 | video_id = [] 71 | task_id = [] 72 | with torch.no_grad(): 73 | for i_batch, data in enumerate(tqdm(test_loader)): 74 | text = data['text'].cuda() 75 | video = data['video'].float().cuda() 76 | # video_id.append(data['video_id'].cuda()) 77 | task_id.append(data['task_id'].cuda()) 78 | 79 | video = video / 255.0 80 | video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) 81 | video_embd, text_embd = model(video, text) 82 | video_embd = F.normalize(video_embd).view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1]) 83 | video_embd = video_embd.mean(dim=1) 84 | text_embd = F.normalize(text_embd) 85 | all_video_embd.append(video_embd) 86 | all_text_embd.append(text_embd) 87 | 88 | all_video_embd, all_text_embd = torch.cat(all_video_embd, dim=0), torch.cat(all_text_embd, dim=0) 89 | all_video_embd, all_text_embd = allgather(all_video_embd, args), allgather(all_text_embd, args) 90 | task_id = torch.cat(task_id, dim=0) 91 | task_id = allgather(task_id, args) 92 | return all_video_embd.cpu().numpy(), all_text_embd.cpu().numpy(), task_id.cpu().numpy() 93 | 94 | 95 | def deploy_model(args): 96 | checkpoint_path = args.pretrain_cnn_path 97 | print("=> loading checkpoint '{}'".format(checkpoint_path)) 98 | checkpoint = torch.load(checkpoint_path, map_location='cpu') 99 | torch.cuda.set_device(args.gpu) 100 | model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path) 101 | model.cuda(args.gpu) 102 | checkpoint_module = {k[7:]:v for k,v in checkpoint.items()} 103 | model.load_state_dict(checkpoint_module) 104 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) 105 | model.eval() 106 | 107 | print(f'Model Loaded on GPU {args.gpu}') 108 | return model 109 | 110 | def main_worker(gpu, ngpus_per_node, main, args): 111 | cudnn.benchmark = True 112 | args.gpu = gpu 113 | args.rank = gpu 114 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 115 | s.connect(("8.8.8.8", 80)) 116 | ip = s.getsockname()[0] 117 | args.dist_url = f'tcp://{ip}:12345' 118 | dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu) 119 | main(args) 120 | 121 | def spawn_workers(main, args): 122 | ngpus_per_node = 8 123 | args.world_size = 8 124 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args)) 125 | 126 | if __name__ == "__main__": 127 | args = get_args() 128 | args.num_windows_test = 1 129 | 130 | assert args.eval_video_root != '' 131 | spawn_workers(main, args) -------------------------------------------------------------------------------- /loader/howto100m_loader.py: -------------------------------------------------------------------------------- 1 | import torch as th 2 | from torch.utils.data import Dataset 3 | import pandas as pd 4 | import os 5 | import numpy as np 6 | import random 7 | import ffmpeg 8 | import time 9 | import re 10 | import json 11 | 12 | class HT100M_DataLoader(Dataset): 13 | """HowTo100M Video-Text loader.""" 14 | 15 | def __init__(self, csv, video_root='', caption_root='', min_time=4.0, fps=16, num_frames=16, size=224, crop_only=False, center_crop=True, 16 | benchmark=False, token_to_word_path='./data/dict.npy', max_words=20, num_candidates=1, num_clip=8, random_left_right_flip=False,): 17 | """ 18 | Args: 19 | """ 20 | assert isinstance(size, int) 21 | self.csv = pd.read_csv(os.path.join(os.path.dirname(__file__), csv)) 22 | self.video_root = video_root 23 | self.caption_root = caption_root 24 | self.min_time = min_time 25 | self.size = size 26 | self.num_frames = num_frames 27 | self.fps = fps 28 | self.num_sec = self.num_frames / float(self.fps) 29 | self.crop_only = crop_only 30 | self.center_crop = center_crop 31 | self.benchmark = benchmark 32 | self.max_words = max_words 33 | token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path)) 34 | self.word_to_token = {} 35 | for i, t in enumerate(token_to_word): 36 | self.word_to_token[t] = i + 1 37 | self.num_candidates = num_candidates 38 | self.random_flip = random_left_right_flip 39 | self.num_clip = num_clip 40 | 41 | def __len__(self): 42 | return len(self.csv) 43 | 44 | def _get_video(self, video_path, start, end): 45 | videos = th.zeros(self.num_clip, 3, self.num_frames, self.size, self.size) 46 | for i, (s, e) in enumerate(zip(start, end)): 47 | start_seek = random.randint(s, int(max(s, e - self.num_sec))) 48 | cmd = ( 49 | ffmpeg 50 | .input(video_path, ss=start_seek, t=self.num_sec + 0.1) 51 | .filter('fps', fps=self.fps) 52 | ) 53 | if self.center_crop: 54 | aw, ah = 0.5, 0.5 55 | else: 56 | aw, ah = random.uniform(0, 1), random.uniform(0, 1) 57 | if self.crop_only: 58 | cmd = ( 59 | cmd.crop('(iw - {})*{}'.format(self.size, aw), 60 | '(ih - {})*{}'.format(self.size, ah), 61 | str(self.size), str(self.size)) 62 | ) 63 | else: 64 | cmd = ( 65 | cmd.crop('(iw - min(iw,ih))*{}'.format(aw), 66 | '(ih - min(iw,ih))*{}'.format(ah), 67 | 'min(iw,ih)', 68 | 'min(iw,ih)') 69 | .filter('scale', self.size, self.size) 70 | ) 71 | if self.random_flip and random.uniform(0, 1) > 0.5: 72 | cmd = cmd.hflip() 73 | out, _ = ( 74 | cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24') 75 | .run(capture_stdout=True, quiet=True) 76 | ) 77 | video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3]) 78 | video = th.from_numpy(video) 79 | video = video.permute(3, 0, 1, 2) 80 | if video.shape[1] < self.num_frames: 81 | zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8) 82 | video = th.cat((video, zeros), axis=1) 83 | videos[i] = video[:, :self.num_frames] 84 | return videos 85 | 86 | def _split_text(self, sentence): 87 | w = re.findall(r"[\w']+", str(sentence)) 88 | return w 89 | 90 | def _words_to_token(self, words): 91 | words = [self.word_to_token[word] for word in words if word in self.word_to_token] 92 | if words: 93 | we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words) 94 | return we 95 | else: 96 | return th.zeros(self.max_words, dtype=th.long) 97 | 98 | def _zero_pad_tensor_token(self, tensor, size): 99 | if len(tensor) >= size: 100 | return tensor[:size] 101 | else: 102 | zero = th.zeros(size - len(tensor)).long() 103 | return th.cat((tensor, zero), dim=0) 104 | 105 | def words_to_ids(self, x): 106 | return self._words_to_token(self._split_text(x)) 107 | 108 | def _get_text(self, caption): 109 | caption_json = open(caption, 'r') 110 | cap = pd.DataFrame(json.load(caption_json)) 111 | start, end = [], [] 112 | words = th.zeros(self.num_clip, self.max_words, dtype=th.long) 113 | if len(cap) < self.num_clip: 114 | for i in range(self.num_clip): 115 | start.append(int(cap['start'].values[min(i, len(cap)-1)])) 116 | end.append(int(cap['end'].values[min(i, len(cap)-1)])) 117 | words[i] = self.words_to_ids(cap['text'].values[min(i, len(cap)-1)]) 118 | else: 119 | ind = random.randint(0, len(cap) - self.num_clip) 120 | for i in range(self.num_clip): 121 | start.append(int(cap['start'].values[ind + i])) 122 | end.append(int(cap['end'].values[ind + i])) 123 | words[i] = self.words_to_ids(cap['text'].values[ind + i]) 124 | return words, start, end 125 | 126 | def __getitem__(self, idx): 127 | video_file = self.csv['video_path'][idx] 128 | video_id = video_file.split('.')[0] 129 | video_path = os.path.join(self.video_root, video_file) 130 | text, start, end = self._get_text(os.path.join(self.caption_root, video_id + '.json')) 131 | videos = self._get_video(video_path, start, end) 132 | return {'video': videos, 'text': text, 'start': th.tensor(start), 'end': th.tensor(end)} 133 | -------------------------------------------------------------------------------- /src/eval_ucf.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | import os 4 | import random 5 | import socket 6 | import time 7 | import sys 8 | 9 | root_path = os.getcwd() 10 | sys.path.append(root_path) 11 | import torch 12 | import torch.optim as optim 13 | import torch.nn.functional as F 14 | import torch.utils.data 15 | import torch.multiprocessing as mp 16 | import torch.distributed as dist 17 | import torch.backends.cudnn as cudnn 18 | 19 | from metrics import retrieval 20 | from args import get_args 21 | from loader.ucf_loader import UCF_DataLoader 22 | from s3dg import S3D 23 | from tqdm import tqdm 24 | import numpy as np 25 | import time 26 | from utils import AllGather 27 | from sklearn import preprocessing 28 | from sklearn.svm import LinearSVC 29 | 30 | allgather = AllGather.apply 31 | 32 | def main(args): 33 | model = deploy_model(args) 34 | test_dataset = UCF_DataLoader(data='./data/ucf.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root, 35 | num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True, with_flip=True, ) 36 | test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) 37 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 38 | num_workers=args.num_thread_reader, sampler=test_sampler) 39 | 40 | all_video_embd, labels, split1, split2, split3 = test(test_loader, model, args) 41 | if args.gpu == 0: 42 | le = preprocessing.LabelEncoder() 43 | labels = le.fit_transform(labels) 44 | acc_list = [] 45 | for reg in [100.0]: 46 | c = LinearSVC(C=reg) 47 | for split in range(3): 48 | if split == 0: 49 | s = split1 50 | elif split == 1: 51 | s = split2 52 | else: 53 | s = split3 54 | X_train, X_test = all_video_embd[np.where(s == 1)[0]].reshape((-1, 1024)), all_video_embd[np.where(s == 2)[0]].reshape((-1, 1024)) 55 | label_train, label_test = labels[np.where(s == 1)[0]].repeat(args.num_windows_test), labels[np.where(s == 2)[0]] 56 | print('Fitting SVM for split {} and C: {}'.format(split + 1, reg)) 57 | c.fit(X_train, label_train) 58 | X_pred = c.decision_function(X_test) 59 | X_pred = np.reshape(X_pred, (len(label_test), args.num_windows_test, -1)) 60 | X_pred = X_pred.sum(axis=1) 61 | X_pred = np.argmax(X_pred, axis=1) 62 | acc = np.sum(X_pred == label_test) / float(len(X_pred)) 63 | print("Top 1 accuracy split {} and C {} : {}".format(split + 1, reg, acc)) 64 | acc_list.append(acc * 100) 65 | 66 | print('HMDB') 67 | print(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}') 68 | with open('result.txt', 'a') as f: 69 | f.write('UCF\n') 70 | f.write(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}\n') 71 | 72 | def test(test_loader, model, args): 73 | all_video_embd = [] 74 | labels = [] 75 | split1 = [] 76 | split2 = [] 77 | split3 = [] 78 | with torch.no_grad(): 79 | for i_batch, data in enumerate(tqdm(test_loader)): 80 | split1.append(data['split1'].cuda()) 81 | split2.append(data['split2'].cuda()) 82 | split3.append(data['split3'].cuda()) 83 | labels.append(data['label'].cuda()) 84 | video = data['video'].float().cuda() 85 | video = video / 255.0 86 | video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) 87 | video_embd = model(video, None, mode='video', mixed5c=True) 88 | video_embd = video_embd.view(len(data['label']), -1, video_embd.shape[1]) 89 | all_video_embd.append(video_embd) 90 | 91 | all_video_embd = torch.cat(all_video_embd, dim=0) 92 | all_video_embd = allgather(all_video_embd, args) 93 | labels = torch.cat(labels, dim=0) 94 | labels = allgather(labels, args) 95 | split1, split2, split3 = torch.cat(split1, dim=0), torch.cat(split2, dim=0), torch.cat(split3, dim=0) 96 | split1, split2, split3 = allgather(split1, args), allgather(split2, args), allgather(split3, args) 97 | return all_video_embd.cpu().numpy(), labels.cpu().numpy(), split1.cpu().numpy(), split2.cpu().numpy(), split3.cpu().numpy() 98 | 99 | 100 | def deploy_model(args): 101 | checkpoint_path = args.pretrain_cnn_path 102 | print("=> loading checkpoint '{}'".format(checkpoint_path)) 103 | checkpoint = torch.load(checkpoint_path, map_location='cpu') 104 | torch.cuda.set_device(args.gpu) 105 | model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path) 106 | model.cuda(args.gpu) 107 | checkpoint_module = {k[7:]:v for k,v in checkpoint.items()} 108 | model.load_state_dict(checkpoint_module) 109 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) 110 | model.eval() 111 | 112 | print(f'Model Loaded on GPU {args.gpu}') 113 | return model 114 | 115 | def main_worker(gpu, ngpus_per_node, main, args): 116 | cudnn.benchmark = True 117 | args.gpu = gpu 118 | args.rank = gpu 119 | args.world_size = 8 120 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 121 | s.connect(("8.8.8.8", 80)) 122 | ip = s.getsockname()[0] 123 | args.dist_url = f'tcp://{ip}:12345' 124 | dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu) 125 | main(args) 126 | 127 | def spawn_workers(main, args): 128 | ngpus_per_node = 8 129 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args)) 130 | 131 | if __name__ == "__main__": 132 | args = get_args() 133 | assert args.eval_video_root != '' 134 | spawn_workers(main, args) -------------------------------------------------------------------------------- /src/eval_hmdb.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | import os 4 | import random 5 | import socket 6 | import time 7 | import sys 8 | 9 | root_path = os.getcwd() 10 | sys.path.append(root_path) 11 | import torch 12 | import torch.optim as optim 13 | import torch.nn.functional as F 14 | import torch.utils.data 15 | import torch.multiprocessing as mp 16 | import torch.distributed as dist 17 | import torch.backends.cudnn as cudnn 18 | 19 | from metrics import retrieval 20 | from args import get_args 21 | from loader.hmdb_loader import HMDB_DataLoader 22 | from s3dg import S3D 23 | from tqdm import tqdm 24 | import numpy as np 25 | import time 26 | from utils import AllGather 27 | from sklearn import preprocessing 28 | from sklearn.svm import LinearSVC 29 | 30 | allgather = AllGather.apply 31 | 32 | def main(args): 33 | model = deploy_model(args) 34 | test_dataset = HMDB_DataLoader(data='./data/hmdb51.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root, 35 | num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True, with_flip=True, ) 36 | test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset) 37 | test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 38 | num_workers=args.num_thread_reader, sampler=test_sampler) 39 | 40 | all_video_embd, labels, split1, split2, split3 = test(test_loader, model, args) 41 | if args.gpu == 0: 42 | le = preprocessing.LabelEncoder() 43 | labels = le.fit_transform(labels) 44 | acc_list = [] 45 | for reg in [100.0]: 46 | c = LinearSVC(C=reg) 47 | for split in range(3): 48 | if split == 0: 49 | s = split1 50 | elif split == 1: 51 | s = split2 52 | else: 53 | s = split3 54 | X_train, X_test = all_video_embd[np.where(s == 1)[0]].reshape((-1, 1024)), all_video_embd[np.where(s == 2)[0]].reshape((-1, 1024)) 55 | label_train, label_test = labels[np.where(s == 1)[0]].repeat(args.num_windows_test), labels[np.where(s == 2)[0]] 56 | print('Fitting SVM for split {} and C: {}'.format(split + 1, reg)) 57 | c.fit(X_train, label_train) 58 | X_pred = c.decision_function(X_test) 59 | X_pred = np.reshape(X_pred, (len(label_test), args.num_windows_test, -1)) 60 | X_pred = X_pred.sum(axis=1) 61 | X_pred = np.argmax(X_pred, axis=1) 62 | acc = np.sum(X_pred == label_test) / float(len(X_pred)) 63 | print("Top 1 accuracy split {} and C {} : {}".format(split + 1, reg, acc)) 64 | acc_list.append(acc * 100) 65 | 66 | print('HMDB') 67 | print(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}') 68 | with open('result.txt', 'a') as f: 69 | f.write('\nHMDB\n') 70 | f.write(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}\n') 71 | 72 | def test(test_loader, model, args): 73 | all_video_embd = [] 74 | labels = [] 75 | split1 = [] 76 | split2 = [] 77 | split3 = [] 78 | with torch.no_grad(): 79 | for i_batch, data in enumerate(tqdm(test_loader)): 80 | split1.append(data['split1'].cuda()) 81 | split2.append(data['split2'].cuda()) 82 | split3.append(data['split3'].cuda()) 83 | labels.append(data['label'].cuda()) 84 | video = data['video'].float().cuda() 85 | video = video / 255.0 86 | video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) 87 | video_embd = model(video, None, mode='video', mixed5c=True) 88 | video_embd = video_embd.view(len(data['label']), -1, video_embd.shape[1]) 89 | all_video_embd.append(video_embd) 90 | 91 | all_video_embd = torch.cat(all_video_embd, dim=0) 92 | all_video_embd = allgather(all_video_embd, args) 93 | labels = torch.cat(labels, dim=0) 94 | labels = allgather(labels, args) 95 | split1, split2, split3 = torch.cat(split1, dim=0), torch.cat(split2, dim=0), torch.cat(split3, dim=0) 96 | split1, split2, split3 = allgather(split1, args), allgather(split2, args), allgather(split3, args) 97 | return all_video_embd.cpu().numpy(), labels.cpu().numpy(), split1.cpu().numpy(), split2.cpu().numpy(), split3.cpu().numpy() 98 | 99 | 100 | def deploy_model(args): 101 | checkpoint_path = args.pretrain_cnn_path 102 | print("=> loading checkpoint '{}'".format(checkpoint_path)) 103 | checkpoint = torch.load(checkpoint_path, map_location='cpu') 104 | torch.cuda.set_device(args.gpu) 105 | model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path) 106 | model.cuda(args.gpu) 107 | checkpoint_module = {k[7:]:v for k,v in checkpoint.items()} 108 | model.load_state_dict(checkpoint_module) 109 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True) 110 | model.eval() 111 | 112 | print(f'Model Loaded on GPU {args.gpu}') 113 | return model 114 | 115 | def main_worker(gpu, ngpus_per_node, main, args): 116 | cudnn.benchmark = True 117 | args.gpu = gpu 118 | args.rank = gpu 119 | args.world_size = 8 120 | s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 121 | s.connect(("8.8.8.8", 80)) 122 | ip = s.getsockname()[0] 123 | args.dist_url = f'tcp://{ip}:12345' 124 | dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu) 125 | main(args) 126 | 127 | def spawn_workers(main, args): 128 | ngpus_per_node = 8 129 | mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args)) 130 | 131 | if __name__ == "__main__": 132 | args = get_args() 133 | assert args.eval_video_root != '' 134 | spawn_workers(main, args) -------------------------------------------------------------------------------- /src/train.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | warnings.simplefilter("ignore", UserWarning) 3 | import os 4 | import random 5 | import time 6 | import glob 7 | import sys 8 | from tqdm import tqdm 9 | 10 | root_path = os.getcwd() 11 | sys.path.append(root_path) 12 | import numpy as np 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | import torch.nn.parallel 17 | import torch.backends.cudnn as cudnn 18 | import torch.distributed as dist 19 | import torch.optim 20 | import torch.multiprocessing as mp 21 | import torch.utils.data 22 | import torch.utils.data.distributed 23 | 24 | from s3dg import S3D 25 | from args import get_args 26 | from loader.howto100m_loader import HT100M_DataLoader 27 | from loss import S2DTW 28 | from utils import AllGather, get_cosine_schedule_with_warmup 29 | 30 | allgather = AllGather.apply 31 | 32 | 33 | def main(): 34 | args = get_args() 35 | if args.verbose: 36 | print(args) 37 | assert args.eval_video_root != '' or not(args.evaluate) 38 | assert args.video_path != '' 39 | assert args.caption_root != '' 40 | if args.seed is not None: 41 | random.seed(args.seed) 42 | torch.manual_seed(args.seed) 43 | 44 | args.multiprocessing_distributed = True 45 | args.evaluate = False 46 | 47 | args.distributed = args.world_size > 1 or args.multiprocessing_distributed 48 | args.world_size = torch.cuda.device_count() 49 | if args.multiprocessing_distributed: 50 | mp.spawn(main_worker, nprocs=args.world_size, args=(args.world_size, args)) 51 | else: 52 | main_worker(args.gpu, args.world_size, args) 53 | 54 | 55 | 56 | def main_worker(gpu, ngpus_per_node, args): 57 | args.gpu = gpu 58 | if args.distributed: 59 | dist.init_process_group( 60 | backend=args.dist_backend, 61 | init_method=args.dist_url, 62 | world_size=args.world_size, 63 | rank=args.gpu, 64 | ) 65 | # create model 66 | model = S3D(args.num_class, space_to_depth=True, word2vec_path=args.word2vec_path, init=args.weight_init,) 67 | 68 | if args.distributed: 69 | if args.gpu is not None: 70 | torch.cuda.set_device(args.gpu) 71 | model.cuda(args.gpu) 72 | args.batch_size = int(args.batch_size / args.world_size) 73 | args.batch_size_val = int(args.batch_size_val / args.world_size) 74 | args.num_thread_reader = int(args.num_thread_reader / args.world_size) 75 | model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) 76 | else: 77 | model.cuda() 78 | model = torch.nn.parallel.DistributedDataParallel(model) 79 | elif args.gpu is not None: 80 | torch.cuda.set_device(args.gpu) 81 | model = model.cuda(args.gpu) 82 | else: 83 | model = torch.nn.DataParallel(model).cuda() 84 | 85 | # Data loading code 86 | train_dataset = HT100M_DataLoader( 87 | csv=args.train_csv, 88 | video_root=args.video_path, 89 | caption_root=args.caption_root, 90 | min_time=args.min_time, 91 | fps=args.fps, 92 | num_frames=args.num_frames, 93 | size=args.video_size, 94 | crop_only=args.crop_only, 95 | center_crop=args.centercrop, 96 | random_left_right_flip=args.random_flip, 97 | num_candidates=args.num_candidates, 98 | num_clip = args.num_clip, 99 | ) 100 | 101 | if args.distributed: 102 | train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset) 103 | else: 104 | train_sampler = None 105 | 106 | train_loader = torch.utils.data.DataLoader( 107 | train_dataset, 108 | batch_size=args.batch_size, 109 | shuffle=(train_sampler is None), 110 | drop_last=True, 111 | num_workers=args.num_thread_reader, 112 | pin_memory=args.pin_memory, 113 | sampler=train_sampler, 114 | ) 115 | 116 | criterion = S2DTW(args) 117 | 118 | if args.optimizer == 'adam': 119 | optimizer = torch.optim.Adam(model.parameters(), args.lr) 120 | elif args.optimizer == 'sgd': 121 | optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momemtum) 122 | 123 | scheduler = get_cosine_schedule_with_warmup(optimizer, args.warmup_steps, len(train_loader) * args.epochs) 124 | checkpoint_dir = os.path.join(os.path.dirname(__file__), 'checkpoint', args.checkpoint_dir) 125 | if args.checkpoint_dir != '' and not(os.path.isdir(checkpoint_dir)) and args.rank == 0: 126 | os.mkdir(checkpoint_dir) 127 | 128 | if args.cudnn_benchmark: 129 | cudnn.benchmark = True 130 | total_batch_size = args.world_size * args.batch_size 131 | log("Starting training loop for rank: {}, total batch size: {}".format(args.gpu, total_batch_size), args) 132 | for epoch in tqdm(range(args.start_epoch, args.epochs)): 133 | if args.distributed: 134 | train_sampler.set_epoch(epoch) 135 | train(train_loader, model, criterion, optimizer, scheduler, epoch, train_dataset, args) 136 | if args.rank == 0: 137 | save_checkpoint( 138 | { 139 | "epoch": epoch + 1, 140 | "state_dict": model.state_dict(), 141 | "optimizer": optimizer.state_dict(), 142 | "scheduler": scheduler.state_dict(), 143 | }, checkpoint_dir, epoch + 1 144 | ) 145 | 146 | 147 | def train(train_loader, model, criterion, optimizer, scheduler, epoch, dataset, args): 148 | running_loss = 0.0 149 | s = time.time() 150 | for i_batch, sample_batch in enumerate(train_loader): 151 | s_step = time.time() 152 | batch_loss = TrainOneBatch(model, optimizer, scheduler, sample_batch, criterion, epoch, args) 153 | d_step = time.time() - s_step 154 | running_loss += batch_loss 155 | if (i_batch + 1) % args.n_display == 0 and args.verbose and args.rank == 0: 156 | d = time.time() - s 157 | log(f"Epoch {epoch+1:d}, Elapsed Time: {d:.3f}, Epoch status: {args.batch_size * args.world_size * float(i_batch) / len(dataset):.4f}, \ 158 | Training loss: {running_loss / args.n_display:.4f}, Learning rates: {optimizer.param_groups[0]['lr']:.6f}", args) 159 | running_loss = 0.0 160 | s = time.time() 161 | 162 | def TrainOneBatch(model, opt, scheduler, data, loss_fun, epoch, args): 163 | video = data["video"].float().cuda(args.gpu, non_blocking=args.pin_memory) 164 | text = data["text"].cuda(args.gpu, non_blocking=args.pin_memory) 165 | text = text.view(-1, text.shape[-1]) 166 | video = video / 255.0 167 | video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5]) 168 | opt.zero_grad() 169 | with torch.set_grad_enabled(True): 170 | video_embd, text_embd = model(video, text) 171 | video_embd = F.normalize(video_embd).view(-1, args.num_clip, video_embd.shape[1]) 172 | text_embd = F.normalize(text_embd).view(-1, args.num_clip, text_embd.shape[1]) 173 | if args.distributed: 174 | video_embd = allgather(video_embd, args) 175 | text_embd = allgather(text_embd, args) 176 | loss= loss_fun(video_embd, text_embd) 177 | loss.backward() 178 | opt.step() 179 | scheduler.step() 180 | return loss 181 | 182 | def save_checkpoint(state, checkpoint_dir, epoch, n_ckpt=10): 183 | torch.save(state, os.path.join(checkpoint_dir, "epoch{:0>4d}.pth.tar".format(epoch))) 184 | 185 | def log(output, args): 186 | with open(os.path.join(os.path.dirname(__file__), 'log' , './log.txt'), "a") as f: 187 | f.write(output + '\n') 188 | 189 | if __name__ == "__main__": 190 | main() 191 | -------------------------------------------------------------------------------- /s3dg.py: -------------------------------------------------------------------------------- 1 | """Contains the definition for Gated Separable 3D network (S3D-G). 2 | """ 3 | 4 | import torch as th 5 | import torch.nn.functional as F 6 | import torch.nn as nn 7 | import os 8 | import numpy as np 9 | import re 10 | 11 | class InceptionBlock(nn.Module): 12 | 13 | def __init__(self, input_dim, num_outputs_0_0a, num_outputs_1_0a, num_outputs_1_0b, num_outputs_2_0a, num_outputs_2_0b, num_outputs_3_0b, gating=True): 14 | super(InceptionBlock, self).__init__() 15 | self.conv_b0 = STConv3D(input_dim, num_outputs_0_0a, [1, 1, 1]) 16 | self.conv_b1_a = STConv3D(input_dim, num_outputs_1_0a, [1, 1, 1]) 17 | self.conv_b1_b = STConv3D(num_outputs_1_0a, num_outputs_1_0b, [3, 3, 3], padding=1, separable=True) 18 | self.conv_b2_a = STConv3D(input_dim, num_outputs_2_0a, [1, 1, 1]) 19 | self.conv_b2_b = STConv3D(num_outputs_2_0a, num_outputs_2_0b, [3, 3, 3], padding=1, separable=True) 20 | self.maxpool_b3 = th.nn.MaxPool3d((3, 3, 3), stride=1, padding=1) 21 | self.conv_b3_b = STConv3D(input_dim, num_outputs_3_0b, [1, 1, 1]) 22 | self.gating = gating 23 | self.output_dim = num_outputs_0_0a + num_outputs_1_0b + num_outputs_2_0b + num_outputs_3_0b 24 | if gating: 25 | self.gating_b0 = SelfGating(num_outputs_0_0a) 26 | self.gating_b1 = SelfGating(num_outputs_1_0b) 27 | self.gating_b2 = SelfGating(num_outputs_2_0b) 28 | self.gating_b3 = SelfGating(num_outputs_3_0b) 29 | 30 | def forward(self, input): 31 | """Inception block 32 | """ 33 | b0 = self.conv_b0(input) 34 | b1 = self.conv_b1_a(input) 35 | b1 = self.conv_b1_b(b1) 36 | b2 = self.conv_b2_a(input) 37 | b2 = self.conv_b2_b(b2) 38 | b3 = self.maxpool_b3(input) 39 | b3 = self.conv_b3_b(b3) 40 | if self.gating: 41 | b0 = self.gating_b0(b0) 42 | b1 = self.gating_b1(b1) 43 | b2 = self.gating_b2(b2) 44 | b3 = self.gating_b3(b3) 45 | return th.cat((b0, b1, b2, b3), dim=1) 46 | 47 | class SelfGating(nn.Module): 48 | 49 | def __init__(self, input_dim): 50 | super(SelfGating, self).__init__() 51 | self.fc = nn.Linear(input_dim, input_dim) 52 | 53 | def forward(self, input_tensor): 54 | """Feature gating as used in S3D-G. 55 | """ 56 | spatiotemporal_average = th.mean(input_tensor, dim=[2, 3, 4]) 57 | weights = self.fc(spatiotemporal_average) 58 | weights = th.sigmoid(weights) 59 | return weights[:, :, None, None, None] * input_tensor 60 | 61 | class STConv3D(nn.Module): 62 | 63 | def __init__(self, 64 | input_dim, 65 | output_dim, 66 | kernel_size, 67 | stride=1, 68 | padding=0, 69 | separable=False): 70 | super(STConv3D, self).__init__() 71 | self.separable = separable 72 | self.relu = nn.ReLU(inplace=True) 73 | assert len(kernel_size) == 3 74 | if separable and kernel_size[0] != 1: 75 | spatial_kernel_size = [1, kernel_size[1], kernel_size[2]] 76 | temporal_kernel_size = [kernel_size[0], 1, 1] 77 | if isinstance(stride, list) and len(stride) == 3: 78 | spatial_stride = [1, stride[1], stride[2]] 79 | temporal_stride = [stride[0], 1, 1] 80 | else: 81 | spatial_stride = [1, stride, stride] 82 | temporal_stride = [stride, 1, 1] 83 | if isinstance(padding, list) and len(padding) == 3: 84 | spatial_padding = [0, padding[1], padding[2]] 85 | temporal_padding = [padding[0], 0, 0] 86 | else: 87 | spatial_padding = [0, padding, padding] 88 | temporal_padding = [padding, 0, 0] 89 | if separable: 90 | self.conv1 = nn.Conv3d(input_dim, output_dim, 91 | kernel_size=spatial_kernel_size, 92 | stride=spatial_stride, 93 | padding=spatial_padding, bias=False) 94 | self.bn1 = nn.BatchNorm3d(output_dim) 95 | self.conv2 = nn.Conv3d(output_dim, output_dim, 96 | kernel_size=temporal_kernel_size, 97 | stride=temporal_stride, 98 | padding=temporal_padding, bias=False) 99 | self.bn2 = nn.BatchNorm3d(output_dim) 100 | else: 101 | self.conv1 = nn.Conv3d(input_dim, output_dim, 102 | kernel_size=kernel_size, stride=stride, 103 | padding=padding, bias=False) 104 | self.bn1 = nn.BatchNorm3d(output_dim) 105 | 106 | 107 | def forward(self, input): 108 | out = self.relu(self.bn1(self.conv1(input))) 109 | if self.separable: 110 | out = self.relu(self.bn2(self.conv2(out))) 111 | return out 112 | 113 | 114 | def get_padding_shape(filter_shape, stride): 115 | def _pad_top_bottom(filter_dim, stride_val): 116 | pad_along = max(filter_dim - stride_val, 0) 117 | pad_top = pad_along // 2 118 | pad_bottom = pad_along - pad_top 119 | return pad_top, pad_bottom 120 | 121 | padding_shape = [] 122 | for filter_dim, stride_val in zip(filter_shape, stride): 123 | pad_top, pad_bottom = _pad_top_bottom(filter_dim, stride_val) 124 | padding_shape.append(pad_top) 125 | padding_shape.append(pad_bottom) 126 | depth_top = padding_shape.pop(0) 127 | depth_bottom = padding_shape.pop(0) 128 | padding_shape.append(depth_top) 129 | padding_shape.append(depth_bottom) 130 | 131 | return tuple(padding_shape) 132 | 133 | 134 | class MaxPool3dTFPadding(th.nn.Module): 135 | def __init__(self, kernel_size, stride=None, padding='SAME'): 136 | super(MaxPool3dTFPadding, self).__init__() 137 | if padding == 'SAME': 138 | padding_shape = get_padding_shape(kernel_size, stride) 139 | self.padding_shape = padding_shape 140 | self.pad = th.nn.ConstantPad3d(padding_shape, 0) 141 | self.pool = th.nn.MaxPool3d(kernel_size, stride, ceil_mode=True) 142 | 143 | def forward(self, inp): 144 | inp = self.pad(inp) 145 | out = self.pool(inp) 146 | return out 147 | 148 | class Sentence_Embedding(nn.Module): 149 | def __init__(self, 150 | embd_dim, 151 | token_to_word_path, 152 | num_embeddings=66250, 153 | word_embedding_dim=300, 154 | word2vec_path='', 155 | max_words=16, 156 | output_dim=2048): 157 | super(Sentence_Embedding, self).__init__() 158 | if word2vec_path: 159 | self.word_embd = nn.Embedding.from_pretrained(th.load(word2vec_path)) 160 | else: 161 | self.word_embd = nn.Embedding(num_embeddings, word_embedding_dim) 162 | self.fc1 = nn.Linear(word_embedding_dim, output_dim) 163 | self.fc2 = nn.Linear(output_dim, embd_dim) 164 | self.word_to_token = {} 165 | self.max_words = max_words 166 | token_to_word = np.load(token_to_word_path) 167 | for i, t in enumerate(token_to_word): 168 | self.word_to_token[t] = i + 1 169 | 170 | def _zero_pad_tensor_token(self, tensor, size): 171 | if len(tensor) >= size: 172 | return tensor[:size] 173 | else: 174 | zero = th.zeros(size - len(tensor)).long() 175 | return th.cat((tensor, zero), dim=0) 176 | 177 | def is_cuda(self): 178 | return self.fc1.bias.is_cuda 179 | 180 | def _split_text(self, sentence): 181 | w = re.findall(r"[\w']+", str(sentence)) 182 | return w 183 | 184 | def _words_to_token(self, words): 185 | words = [self.word_to_token[word] for word in words if word in self.word_to_token] 186 | if words: 187 | we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words) 188 | return we 189 | else: 190 | return th.zeros(self.max_words).long() 191 | 192 | def words_to_ids(self, x): 193 | split_x = [self._words_to_token(self._split_text(sent)) for sent in x] 194 | return th.stack(split_x, dim=0) 195 | 196 | def forward(self, x, raw_text=False): 197 | if raw_text: 198 | x = self.words_to_ids(x) 199 | with th.no_grad(): 200 | x = self.word_embd(x) 201 | x = F.relu(self.fc1(x), inplace=True) 202 | x = th.max(x, dim=1)[0] 203 | x = self.fc2(x) 204 | return x 205 | 206 | 207 | class S3D(nn.Module): 208 | 209 | def __init__(self, num_classes=512, gating=True, space_to_depth=False, word2vec_path='', init='uniform', token_to_word_path='./data/dict.npy'): 210 | super(S3D, self).__init__() 211 | self.num_classes = num_classes 212 | self.gating = gating 213 | self.space_to_depth = space_to_depth 214 | if space_to_depth: 215 | self.conv1 = STConv3D(24, 64, [2, 4, 4], stride=1, padding=(1, 2, 2), separable=False) 216 | else: 217 | self.conv1 = STConv3D(3, 64, [3, 7, 7], stride=2, padding=(1, 3, 3), separable=False) 218 | self.conv_2b = STConv3D(64, 64, [1, 1, 1], separable=False) 219 | self.conv_2c = STConv3D(64, 192, [3, 3, 3], padding=1, separable=True) 220 | self.gating = SelfGating(192) 221 | self.maxpool_2a = MaxPool3dTFPadding(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding='SAME') 222 | self.maxpool_3a = MaxPool3dTFPadding(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding='SAME') 223 | self.mixed_3b = InceptionBlock(192, 64, 96, 128, 16, 32, 32) 224 | self.mixed_3c = InceptionBlock(self.mixed_3b.output_dim, 128, 128, 192, 32, 96, 64) 225 | self.maxpool_4a = MaxPool3dTFPadding(kernel_size=(3, 3, 3), stride=(2, 2, 2), padding='SAME') 226 | self.mixed_4b = InceptionBlock(self.mixed_3c.output_dim, 192, 96, 208, 16, 48, 64) 227 | self.mixed_4c = InceptionBlock(self.mixed_4b.output_dim, 160, 112, 224, 24, 64, 64) 228 | self.mixed_4d = InceptionBlock(self.mixed_4c.output_dim, 128, 128, 256, 24, 64, 64) 229 | self.mixed_4e = InceptionBlock(self.mixed_4d.output_dim, 112, 144, 288, 32, 64, 64) 230 | self.mixed_4f = InceptionBlock(self.mixed_4e.output_dim, 256, 160, 320, 32, 128, 128) 231 | self.maxpool_5a = self.maxPool3d_5a_2x2 = MaxPool3dTFPadding(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding='SAME') 232 | self.mixed_5b = InceptionBlock(self.mixed_4f.output_dim, 256, 160, 320, 32, 128, 128) 233 | self.mixed_5c = InceptionBlock(self.mixed_5b.output_dim, 384, 192, 384, 48, 128, 128) 234 | self.fc = nn.Linear(self.mixed_5c.output_dim, num_classes) 235 | self.text_module = Sentence_Embedding( 236 | num_classes, 237 | os.path.join(os.path.dirname(__file__), token_to_word_path), 238 | word2vec_path=os.path.join(os.path.dirname(__file__), word2vec_path)) 239 | 240 | if init == 'kaiming_normal': 241 | for m in self.modules(): 242 | if isinstance(m, nn.Conv3d): 243 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') 244 | elif isinstance(m, nn.BatchNorm3d): 245 | nn.init.constant_(m.weight, 1) 246 | nn.init.constant_(m.bias, 0) 247 | 248 | def _space_to_depth(self, input): 249 | B, C, T, H, W = input.shape 250 | input = input.view(B, C, T // 2, 2, H // 2, 2, W // 2, 2) 251 | input = input.permute(0, 3, 5, 7, 1, 2, 4, 6) 252 | input = input.contiguous().view(B, 8 * C, T // 2, H // 2, W // 2) 253 | return input 254 | 255 | def forward(self, video, text, mode='all', mixed5c=False): 256 | if mode == 'all': 257 | return self.forward_video(video), self.text_module(text) 258 | elif mode == 'video': 259 | return self.forward_video(video, mixed5c=mixed5c) 260 | elif mode == 'text': 261 | return self.text_module(text) 262 | else: 263 | raise NotImplementedError 264 | 265 | def forward_video(self, inputs, mixed5c=False): 266 | #out = {} 267 | if self.space_to_depth: 268 | inputs = self._space_to_depth(inputs) 269 | # 'Conv2d_1a_7x7' 270 | net = self.conv1(inputs) 271 | if self.space_to_depth: 272 | net = net[:, :, 1:, 1:, 1:] 273 | #out['Conv2d_1a_7x7'] = net 274 | # 'MaxPool_2a_3x3' 275 | net = self.maxpool_2a(net) 276 | #out['MaxPool_2a_3x3'] = net 277 | #'Conv2d_2b_1x1' 278 | net = self.conv_2b(net) 279 | #out['Conv2d_2b_1x1'] = net 280 | # 'Conv2d_2c_3x3' 281 | net = self.conv_2c(net) 282 | #out['Conv2d_2c_3x3'] = net 283 | if self.gating: 284 | net = self.gating(net) 285 | #out['gating_1'] = net 286 | # 'MaxPool_3a_3x3' 287 | net = self.maxpool_3a(net) 288 | #out['MaxPool_3a_3x3'] = net 289 | # end_point = 'Mixed_3b' 290 | net = self.mixed_3b(net) 291 | #out['Mixed_3b'] = net 292 | # end_point = 'Mixed_3c' 293 | net = self.mixed_3c(net) 294 | #out['Mixed_3c'] = net 295 | # end_point = 'MaxPool_4a_3x3' 296 | net = self.maxpool_4a(net) 297 | #out['MaxPool_4a_3x3'] = net 298 | # end_point = 'Mixed_4b' 299 | net = self.mixed_4b(net) 300 | #out['Mixed_4b'] = net 301 | # end_point = 'Mixed_4c' 302 | net = self.mixed_4c(net) 303 | #out['Mixed_4c'] = net 304 | # end_point = 'Mixed_4d' 305 | net = self.mixed_4d(net) 306 | #out['Mixed_4d'] = net 307 | # end_point = 'Mixed_4e' 308 | net = self.mixed_4e(net) 309 | #out['Mixed_4e'] = net 310 | # end_point = 'Mixed_4f' 311 | net = self.mixed_4f(net) 312 | #out['Mixed_4f'] = net 313 | #end_point = 'MaxPool_5a_2x2' 314 | net = self.maxpool_5a(net) 315 | #out['MaxPool_5a_2x2'] = net 316 | # end_point = 'Mixed_5b' 317 | net = self.mixed_5b(net) 318 | #out['Mixed_5b'] = net 319 | # end_point = 'Mixed_5c' 320 | net = self.mixed_5c(net) 321 | #out['Mixed_5c'] = net 322 | #out['Avgpool'] = net 323 | net = th.mean(net, dim=[2, 3, 4]) 324 | if mixed5c: 325 | return net 326 | net = self.fc(net) 327 | #out['final'] = net 328 | return net 329 | -------------------------------------------------------------------------------- /soft_dtw.py: -------------------------------------------------------------------------------- 1 | # MIT License 2 | # 3 | # Copyright (c) 2020 Mehran Maghoumi 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in all 13 | # copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | # SOFTWARE. 22 | # ---------------------------------------------------------------------------------------------------------------------- 23 | 24 | import numpy as np 25 | import torch 26 | import torch.cuda 27 | import torch.nn as nn 28 | from numba import jit 29 | from torch.autograd import Function 30 | from numba import cuda 31 | import math 32 | 33 | # ---------------------------------------------------------------------------------------------------------------------- 34 | @cuda.jit 35 | def compute_softdtw_cuda(D, gamma, bandwidth, max_i, max_j, n_passes, R): 36 | """ 37 | :param seq_len: The length of the sequence (both inputs are assumed to be of the same size) 38 | :param n_passes: 2 * seq_len - 1 (The number of anti-diagonals) 39 | """ 40 | # Each block processes one pair of examples 41 | b = cuda.blockIdx.x 42 | # We have as many threads as seq_len, because the most number of threads we need 43 | # is equal to the number of elements on the largest anti-diagonal 44 | tid = cuda.threadIdx.x 45 | 46 | # Compute I, J, the indices from [0, seq_len) 47 | 48 | # The row index is always the same as tid 49 | I = tid 50 | 51 | inv_gamma = 1.0 / gamma 52 | 53 | # Go over each anti-diagonal. Only process threads that fall on the current on the anti-diagonal 54 | for p in range(n_passes): 55 | 56 | # The index is actually 'p - tid' but need to force it in-bounds 57 | J = max(0, min(p - tid, max_j - 1)) 58 | 59 | # For simplicity, we define i, j which start from 1 (offset from I, J) 60 | i = I + 1 61 | j = J + 1 62 | 63 | # Only compute if element[i, j] is on the current anti-diagonal, and also is within bounds 64 | if I + J == p and (I < max_i and J < max_j): 65 | # Don't compute if outside bandwidth 66 | if not (abs(i - j) > bandwidth > 0): 67 | r0 = -R[b, i - 1, j - 1] * inv_gamma 68 | r1 = -R[b, i - 1, j] * inv_gamma 69 | r2 = -R[b, i, j - 1] * inv_gamma 70 | rmax = max(max(r0, r1), r2) 71 | rsum = math.exp(r0 - rmax) + math.exp(r1 - rmax) + math.exp(r2 - rmax) 72 | softmin = -gamma * (math.log(rsum) + rmax) 73 | R[b, i, j] = D[b, i - 1, j - 1] + softmin 74 | 75 | # Wait for other threads in this block 76 | cuda.syncthreads() 77 | 78 | # ---------------------------------------------------------------------------------------------------------------------- 79 | @cuda.jit 80 | def compute_softdtw_backward_cuda(D, R, inv_gamma, bandwidth, max_i, max_j, n_passes, E): 81 | k = cuda.blockIdx.x 82 | tid = cuda.threadIdx.x 83 | 84 | # Indexing logic is the same as above, however, the anti-diagonal needs to 85 | # progress backwards 86 | I = tid 87 | 88 | for p in range(n_passes): 89 | # Reverse the order to make the loop go backward 90 | rev_p = n_passes - p - 1 91 | 92 | # convert tid to I, J, then i, j 93 | J = max(0, min(rev_p - tid, max_j - 1)) 94 | 95 | i = I + 1 96 | j = J + 1 97 | 98 | # Only compute if element[i, j] is on the current anti-diagonal, and also is within bounds 99 | if I + J == rev_p and (I < max_i and J < max_j): 100 | 101 | if math.isinf(R[k, i, j]): 102 | R[k, i, j] = -math.inf 103 | 104 | # Don't compute if outside bandwidth 105 | if not (abs(i - j) > bandwidth > 0): 106 | a = math.exp((R[k, i + 1, j] - R[k, i, j] - D[k, i + 1, j]) * inv_gamma) 107 | b = math.exp((R[k, i, j + 1] - R[k, i, j] - D[k, i, j + 1]) * inv_gamma) 108 | c = math.exp((R[k, i + 1, j + 1] - R[k, i, j] - D[k, i + 1, j + 1]) * inv_gamma) 109 | E[k, i, j] = E[k, i + 1, j] * a + E[k, i, j + 1] * b + E[k, i + 1, j + 1] * c 110 | 111 | # Wait for other threads in this block 112 | cuda.syncthreads() 113 | 114 | # ---------------------------------------------------------------------------------------------------------------------- 115 | class _SoftDTWCUDA(Function): 116 | """ 117 | CUDA implementation is inspired by the diagonal one proposed in https://ieeexplore.ieee.org/document/8400444: 118 | "Developing a pattern discovery method in time series data and its GPU acceleration" 119 | """ 120 | 121 | @staticmethod 122 | def forward(ctx, D, gamma, bandwidth): 123 | dev = D.device 124 | dtype = D.dtype 125 | gamma = torch.cuda.FloatTensor([gamma]) 126 | bandwidth = torch.cuda.FloatTensor([bandwidth]) 127 | 128 | B = D.shape[0] 129 | N = D.shape[1] 130 | M = D.shape[2] 131 | threads_per_block = max(N, M) 132 | n_passes = 2 * threads_per_block - 1 133 | 134 | # Prepare the output array 135 | R = torch.ones((B, N + 2, M + 2), device=dev, dtype=dtype) * math.inf 136 | R[:, 0, 0] = 0 137 | 138 | # Run the CUDA kernel. 139 | # Set CUDA's grid size to be equal to the batch size (every CUDA block processes one sample pair) 140 | # Set the CUDA block size to be equal to the length of the longer sequence (equal to the size of the largest diagonal) 141 | compute_softdtw_cuda[B, threads_per_block](cuda.as_cuda_array(D.detach()), 142 | gamma.item(), bandwidth.item(), N, M, n_passes, 143 | cuda.as_cuda_array(R)) 144 | ctx.save_for_backward(D, R, gamma, bandwidth) 145 | return R[:, -2, -2] 146 | 147 | @staticmethod 148 | def backward(ctx, grad_output): 149 | dev = grad_output.device 150 | dtype = grad_output.dtype 151 | D, R, gamma, bandwidth = ctx.saved_tensors 152 | 153 | B = D.shape[0] 154 | N = D.shape[1] 155 | M = D.shape[2] 156 | threads_per_block = max(N, M) 157 | n_passes = 2 * threads_per_block - 1 158 | 159 | D_ = torch.zeros((B, N + 2, M + 2), dtype=dtype, device=dev) 160 | D_[:, 1:N + 1, 1:M + 1] = D 161 | 162 | R[:, :, -1] = -math.inf 163 | R[:, -1, :] = -math.inf 164 | R[:, -1, -1] = R[:, -2, -2] 165 | 166 | E = torch.zeros((B, N + 2, M + 2), dtype=dtype, device=dev) 167 | E[:, -1, -1] = 1 168 | 169 | # Grid and block sizes are set same as done above for the forward() call 170 | compute_softdtw_backward_cuda[B, threads_per_block](cuda.as_cuda_array(D_), 171 | cuda.as_cuda_array(R), 172 | 1.0 / gamma.item(), bandwidth.item(), N, M, n_passes, 173 | cuda.as_cuda_array(E)) 174 | E = E[:, 1:N + 1, 1:M + 1] 175 | return grad_output.view(-1, 1, 1).expand_as(E) * E, None, None 176 | 177 | 178 | # ---------------------------------------------------------------------------------------------------------------------- 179 | # 180 | # The following is the CPU implementation based on https://github.com/Sleepwalking/pytorch-softdtw 181 | # Credit goes to Kanru Hua. 182 | # I've added support for batching and pruning. 183 | # 184 | # ---------------------------------------------------------------------------------------------------------------------- 185 | @jit(nopython=True) 186 | def compute_softdtw(D, gamma, bandwidth): 187 | B = D.shape[0] 188 | N = D.shape[1] 189 | M = D.shape[2] 190 | R = np.ones((B, N + 2, M + 2)) * np.inf 191 | R[:, 0, 0] = 0 192 | for b in range(B): 193 | for j in range(1, M + 1): 194 | for i in range(1, N + 1): 195 | 196 | # Check the pruning condition 197 | if 0 < bandwidth < np.abs(i - j): 198 | continue 199 | 200 | r0 = -R[b, i - 1, j - 1] / gamma 201 | r1 = -R[b, i - 1, j] / gamma 202 | r2 = -R[b, i, j - 1] / gamma 203 | rmax = max(max(r0, r1), r2) 204 | rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax) 205 | softmin = - gamma * (np.log(rsum) + rmax) 206 | R[b, i, j] = D[b, i - 1, j - 1] + softmin 207 | return R 208 | 209 | # ---------------------------------------------------------------------------------------------------------------------- 210 | @jit(nopython=True) 211 | def compute_softdtw_backward(D_, R, gamma, bandwidth): 212 | B = D_.shape[0] 213 | N = D_.shape[1] 214 | M = D_.shape[2] 215 | D = np.zeros((B, N + 2, M + 2)) 216 | E = np.zeros((B, N + 2, M + 2)) 217 | D[:, 1:N + 1, 1:M + 1] = D_ 218 | E[:, -1, -1] = 1 219 | R[:, :, -1] = -np.inf 220 | R[:, -1, :] = -np.inf 221 | R[:, -1, -1] = R[:, -2, -2] 222 | for k in range(B): 223 | for j in range(M, 0, -1): 224 | for i in range(N, 0, -1): 225 | 226 | if np.isinf(R[k, i, j]): 227 | R[k, i, j] = -np.inf 228 | 229 | # Check the pruning condition 230 | if 0 < bandwidth < np.abs(i - j): 231 | continue 232 | 233 | a0 = (R[k, i + 1, j] - R[k, i, j] - D[k, i + 1, j]) / gamma 234 | b0 = (R[k, i, j + 1] - R[k, i, j] - D[k, i, j + 1]) / gamma 235 | c0 = (R[k, i + 1, j + 1] - R[k, i, j] - D[k, i + 1, j + 1]) / gamma 236 | a = np.exp(a0) 237 | b = np.exp(b0) 238 | c = np.exp(c0) 239 | E[k, i, j] = E[k, i + 1, j] * a + E[k, i, j + 1] * b + E[k, i + 1, j + 1] * c 240 | return E[:, 1:N + 1, 1:M + 1] 241 | 242 | # ---------------------------------------------------------------------------------------------------------------------- 243 | class _SoftDTW(Function): 244 | """ 245 | CPU implementation based on https://github.com/Sleepwalking/pytorch-softdtw 246 | """ 247 | 248 | @staticmethod 249 | def forward(ctx, D, gamma, bandwidth): 250 | dev = D.device 251 | dtype = D.dtype 252 | gamma = torch.Tensor([gamma]).to(dev).type(dtype) # dtype fixed 253 | bandwidth = torch.Tensor([bandwidth]).to(dev).type(dtype) 254 | D_ = D.detach().cpu().numpy() 255 | g_ = gamma.item() 256 | b_ = bandwidth.item() 257 | R = torch.Tensor(compute_softdtw(D_, g_, b_)).to(dev).type(dtype) 258 | ctx.save_for_backward(D, R, gamma, bandwidth) 259 | return R[:, -2, -2] 260 | 261 | @staticmethod 262 | def backward(ctx, grad_output): 263 | dev = grad_output.device 264 | dtype = grad_output.dtype 265 | D, R, gamma, bandwidth = ctx.saved_tensors 266 | D_ = D.detach().cpu().numpy() 267 | R_ = R.detach().cpu().numpy() 268 | g_ = gamma.item() 269 | b_ = bandwidth.item() 270 | E = torch.Tensor(compute_softdtw_backward(D_, R_, g_, b_)).to(dev).type(dtype) 271 | return grad_output.view(-1, 1, 1).expand_as(E) * E, None, None 272 | 273 | # ---------------------------------------------------------------------------------------------------------------------- 274 | class SoftDTW(torch.nn.Module): 275 | """ 276 | The soft DTW implementation that optionally supports CUDA 277 | """ 278 | 279 | def __init__(self, use_cuda, gamma=1.0, normalize=False, bandwidth=None, dist_func=None): 280 | """ 281 | Initializes a new instance using the supplied parameters 282 | :param use_cuda: Flag indicating whether the CUDA implementation should be used 283 | :param gamma: sDTW's gamma parameter 284 | :param normalize: Flag indicating whether to perform normalization 285 | (as discussed in https://github.com/mblondel/soft-dtw/issues/10#issuecomment-383564790) 286 | :param bandwidth: Sakoe-Chiba bandwidth for pruning. Passing 'None' will disable pruning. 287 | :param dist_func: Optional point-wise distance function to use. If 'None', then a default Euclidean distance function will be used. 288 | """ 289 | super(SoftDTW, self).__init__() 290 | self.normalize = normalize 291 | self.gamma = gamma 292 | self.bandwidth = 0 if bandwidth is None else float(bandwidth) 293 | self.use_cuda = use_cuda 294 | 295 | # Set the distance function 296 | # Set the distance function 297 | if dist_func == 'cosine': 298 | self.dist_func = SoftDTW._cosine_dist_func 299 | elif dist_func == 'negative_cosine': 300 | self.dist_func = SoftDTW._negative_cosine_dist_func 301 | elif dist_func == 'negative_dot': 302 | self.dist_func = SoftDTW._negative_dot_product 303 | elif dist_func == 'euclidean': 304 | self.dist_func = SoftDTW._euclidean_dist_func 305 | 306 | def _get_func_dtw(self, x, y): 307 | """ 308 | Checks the inputs and selects the proper implementation to use. 309 | """ 310 | bx, lx, dx = x.shape 311 | by, ly, dy = y.shape 312 | # Make sure the dimensions match 313 | assert bx == by # Equal batch sizes 314 | assert dx == dy # Equal feature dimensions 315 | 316 | use_cuda = self.use_cuda 317 | 318 | if use_cuda and (lx > 1024 or ly > 1024): # We should be able to spawn enough threads in CUDA 319 | print("SoftDTW: Cannot use CUDA because the sequence length > 1024 (the maximum block size supported by CUDA)") 320 | use_cuda = False 321 | 322 | # Finally, return the correct function 323 | return _SoftDTWCUDA.apply if use_cuda else _SoftDTW.apply 324 | 325 | @staticmethod 326 | def _euclidean_dist_func(x, y): 327 | """ 328 | Calculates the Euclidean distance between each element in x and y per timestep 329 | """ 330 | n = x.size(1) 331 | m = y.size(1) 332 | d = x.size(2) 333 | x = x.unsqueeze(2).expand(-1, n, m, d) 334 | y = y.unsqueeze(1).expand(-1, n, m, d) 335 | return torch.exp(torch.sqrt(torch.pow(x - y, 2).sum(3))) 336 | 337 | @staticmethod 338 | def _cosine_dist_func(x, y): 339 | """ 340 | Calculates the Cosine distance between each element in x and y per timestep 341 | """ 342 | n = x.size(1) 343 | m = y.size(1) 344 | d = x.size(2) 345 | x = x.unsqueeze(2).expand(-1, n, m, d) 346 | y = y.unsqueeze(1).expand(-1, n, m, d) 347 | distance = 1 - torch.nn.functional.cosine_similarity(x, y, dim=3) 348 | return torch.exp(distance) 349 | 350 | @staticmethod 351 | def _negative_dot_product(x, y): 352 | z = torch.matmul(x, y.transpose(1, 2)) 353 | 354 | n = z.shape[2] 355 | a1 = torch.ones((z.shape[0], z.shape[1] + 1, z.shape[2] + 1)).cuda() * math.inf 356 | a2 = torch.ones((z.shape[0], z.shape[1] + 1, z.shape[2] + 1)).cuda() * math.inf 357 | a3 = torch.ones((z.shape[0], z.shape[1] + 1, z.shape[2] + 1)).cuda() * math.inf 358 | a1[:, :n, 1:n+1] = -z 359 | a2[:, 1:n+1, :n] = -z 360 | a3[:, 1:n+1, 1:n+1] = -z 361 | a1[:, 0, 0] = 0 362 | a2[:, 0, 0] = 0 363 | a3[:, 0, 0] = 0 364 | gamma = 1e-1 365 | D = -z - gamma * torch.log(torch.exp(-a1 / gamma) + torch.exp(-a2 / gamma) + torch.exp(-a3 / gamma))[:, :n, :n] 366 | 367 | threshold = 0.5 368 | b, m, n = z.shape[0], z.shape[1], z.shape[2] 369 | D = torch.cat((D, torch.ones_like(z) * threshold), dim=2) 370 | D = D.reshape(b, 2 * m, n) 371 | D = torch.cat((torch.ones(b, 1, n, dtype=z.dtype).to(z.device) * threshold, D), dim=1) 372 | D = torch.cat((D, torch.ones_like(D) * threshold), dim=1) 373 | D = D.transpose(1, 2).reshape(b, 2 * m, 2 * n + 1).transpose(1, 2) 374 | D = torch.cat((torch.ones(b, 2 * m + 1, 1, dtype=z.dtype).to(z.device) * threshold, D), dim=2) 375 | return D 376 | 377 | def forward(self, X, Y): 378 | """ 379 | Compute the soft-DTW value between X and Y 380 | :param X: One batch of examples, batch_size x seq_len x dims 381 | :param Y: The other batch of examples, batch_size x seq_len x dims 382 | :return: The computed results 383 | """ 384 | 385 | # Check the inputs and get the correct implementation 386 | func_dtw = self._get_func_dtw(X, Y) 387 | 388 | if self.normalize: 389 | # Stack everything up and run 390 | x = torch.cat([X, X, Y]) 391 | y = torch.cat([Y, X, Y]) 392 | D = self.dist_func(x, y) 393 | out = func_dtw(D, self.gamma, self.bandwidth) 394 | out_xy, out_xx, out_yy = torch.split(out, X.shape[0]) 395 | return out_xy - 1 / 2 * (out_xx + out_yy) 396 | else: 397 | D_xy = self.dist_func(X, Y) 398 | return func_dtw(D_xy, self.gamma, self.bandwidth) 399 | 400 | # ---------------------------------------------------------------------------------------------------------------------- 401 | def timed_run(a, b, sdtw): 402 | """ 403 | Runs a and b through sdtw, and times the forward and backward passes. 404 | Assumes that a requires gradients. 405 | :return: timing, forward result, backward result 406 | """ 407 | from timeit import default_timer as timer 408 | 409 | # Forward pass 410 | start = timer() 411 | forward = sdtw(a, b) 412 | end = timer() 413 | t = end - start 414 | 415 | grad_outputs = torch.ones_like(forward) 416 | 417 | # Backward 418 | start = timer() 419 | grads = torch.autograd.grad(forward, a, grad_outputs=grad_outputs)[0] 420 | end = timer() 421 | 422 | # Total time 423 | t += end - start 424 | 425 | return t, forward, grads 426 | 427 | # ---------------------------------------------------------------------------------------------------------------------- 428 | def profile(batch_size, seq_len_a, seq_len_b, dims, tol_backward): 429 | sdtw = SoftDTW(False, gamma=1.0, normalize=False) 430 | sdtw_cuda = SoftDTW(True, gamma=1.0, normalize=False) 431 | n_iters = 6 432 | 433 | print("Profiling forward() + backward() times for batch_size={}, seq_len_a={}, seq_len_b={}, dims={}...".format(batch_size, seq_len_a, seq_len_b, dims)) 434 | 435 | times_cpu = [] 436 | times_gpu = [] 437 | 438 | for i in range(n_iters): 439 | a_cpu = torch.rand((batch_size, seq_len_a, dims), requires_grad=True) 440 | b_cpu = torch.rand((batch_size, seq_len_b, dims)) 441 | a_gpu = a_cpu.cuda() 442 | b_gpu = b_cpu.cuda() 443 | 444 | # GPU 445 | t_gpu, forward_gpu, backward_gpu = timed_run(a_gpu, b_gpu, sdtw_cuda) 446 | 447 | # CPU 448 | t_cpu, forward_cpu, backward_cpu = timed_run(a_cpu, b_cpu, sdtw) 449 | 450 | # Verify the results. 451 | assert torch.allclose(forward_cpu, forward_gpu.cpu()) 452 | assert torch.allclose(backward_cpu, backward_gpu.cpu(), atol=tol_backward) 453 | print(backward_gpu.shape) 454 | if i > 0: # Ignore the first time we run, in case this is a cold start (because timings are off at a cold start of the script) 455 | times_cpu += [t_cpu] 456 | times_gpu += [t_gpu] 457 | 458 | # Average and log 459 | avg_cpu = np.mean(times_cpu) 460 | avg_gpu = np.mean(times_gpu) 461 | print("\tCPU: ", avg_cpu) 462 | print("\tGPU: ", avg_gpu) 463 | print("\tSpeedup: ", avg_cpu / avg_gpu) 464 | print() 465 | 466 | # ---------------------------------------------------------------------------------------------------------------------- 467 | if __name__ == "__main__": 468 | from timeit import default_timer as timer 469 | 470 | torch.manual_seed(1234) 471 | 472 | # profile(128, 17, 15, 2, tol_backward=1e-6) 473 | # profile(512, 64, 64, 2, tol_backward=1e-4) 474 | # profile(512, 256, 256, 2, tol_backward=1e-3) 475 | profile(32, 256, 256, 512, tol_backward=1e-1) 476 | -------------------------------------------------------------------------------- /data/msrvtt_test.csv: -------------------------------------------------------------------------------- 1 | key,vid_key,video_id,sentence 2 | ret0,msr9770,video9770,a person is connecting something to system 3 | ret1,msr9771,video9771,a little girl does gymnastics 4 | ret2,msr7020,video7020,a woman creating a fondant baby and flower 5 | ret3,msr9773,video9773,a boy plays grand theft auto 5 6 | ret4,msr7026,video7026,a man is giving a review on a vehicle 7 | ret5,msr9775,video9775,a man speaks to children in a classroom 8 | ret6,msr9776,video9776,one micky mouse is talking to other 9 | ret7,msr7025,video7025,a naked child runs through a field 10 | ret8,msr9778,video9778,a little boy singing in front of judges and crowd 11 | ret9,msr9779,video9779,fireworks are being lit and exploding in a night sky 12 | ret10,msr7028,video7028,a man is singing and standing in the road 13 | ret11,msr7029,video7029,cartoon show for kids 14 | ret12,msr9772,video9772,some cartoon characters are moving around an area 15 | ret13,msr7021,video7021,baseball player hits ball 16 | ret14,msr9774,video9774,a rocket is lauching into a blue sky smoke is emerging from the base of the rocket 17 | ret15,msr7027,video7027,the man in the video is showing a brief viewing of how the movie is starting 18 | ret16,msr9731,video9731,a woman is mixing food in a mixing bowl 19 | ret17,msr7024,video7024,little pet shop cat getting a bath and washed with little brush 20 | ret18,msr9777,video9777,a student explains to his teacher about the sheep of another student 21 | ret19,msr8913,video8913,a video about different sports 22 | ret20,msr8912,video8912,a family is having coversation 23 | ret21,msr8911,video8911,a computer generated cartoon figure operates a control panel while another character sleeps in the background 24 | ret22,msr8910,video8910,adding ingredients to a pizza 25 | ret23,msr8917,video8917,two men discuss social issues 26 | ret24,msr8916,video8916,cartoons of a sponge a squid and a starfish 27 | ret25,msr8915,video8915,person cooking up somefood 28 | ret26,msr8914,video8914,models are walking down a short runway 29 | ret27,msr8919,video8919,a man is talking on stage 30 | ret28,msr8918,video8918,a hairdresser and client speak to each other with kid voices 31 | ret29,msr9545,video9545,some one talking about top ten movies of the year 32 | ret30,msr7704,video7704,a man with a very red nose 33 | ret31,msr7118,video7118,a young girl in a horror movie is haunted 34 | ret32,msr7119,video7119,news reporters talk about a strange sight in part of san diego 35 | ret33,msr7116,video7116,a group of people are walking a woman is talking about their culture 36 | ret34,msr7117,video7117,people are singing on the beach 37 | ret35,msr7114,video7114,a girl and a man are talking to each other 38 | ret36,msr7115,video7115,a man is showing off a new vehicle 39 | ret37,msr7112,video7112,while other friends too try and hitting the basket another is eager to achieve his fourth successful basket in basketball 40 | ret38,msr7113,video7113,a child in pink watches a white bird in an open box 41 | ret39,msr7110,video7110,there is a man shooting other people in a corridor 42 | ret40,msr9542,video9542,sports vine clips of football 43 | ret41,msr9679,video9679,the demonstration of mobile rack 44 | ret42,msr8978,video8978,band performing a hard rock song about diamonds in the sky 45 | ret43,msr8464,video8464,the ground rules of the republican presidential debate are introduced 46 | ret44,msr7701,video7701,cartoon cars smiling talking and driving down a city road 47 | ret45,msr7438,video7438,the articles of a shop is being shown 48 | ret46,msr8899,video8899,some people cross a street in a busy road 49 | ret47,msr8895,video8895,a man and a woman are singing on the beach 50 | ret48,msr7431,video7431,a man talks about the publication of a nasa technical report 51 | ret49,msr9549,video9549,video of a computer program while someone navigates through its menus 52 | ret50,msr8829,video8829,two men talk about stealing land and the bible 53 | ret51,msr8828,video8828,tv show presenters speak about will smith and other actors and their respective characters 54 | ret52,msr8827,video8827,cartoon girl is talking 55 | ret53,msr8826,video8826,lady speaking on her show 56 | ret54,msr8825,video8825,a woman holds up a leather jacket 57 | ret55,msr9548,video9548,a reporter asks questions at a gun range 58 | ret56,msr8823,video8823,men are doing wrestling 59 | ret57,msr8822,video8822,newscasters speak about a school shooting on the news program info wars 60 | ret58,msr8821,video8821,two men and a women are sitting on the chair 61 | ret59,msr8820,video8820,a person comes up in the hill on a orange motor bike and falls down 62 | ret60,msr9518,video9518,people perform on the voice 63 | ret61,msr9519,video9519,boy band performs for a crowd 64 | ret62,msr7432,video7432,a person on the computer looking at data 65 | ret63,msr9503,video9503,a woman is talking about how jeans with patches or rips is trendy 66 | ret64,msr9510,video9510,pokemon video game play 67 | ret65,msr7751,video7751,two women rare sitting speaking on sites in los angeles 68 | ret66,msr7752,video7752,a man speaks on driving and a car model 69 | ret67,msr9513,video9513,a woman is smiling 70 | ret68,msr7754,video7754,this is a video of a live tv show 71 | ret69,msr9515,video9515,a boy is singing 72 | ret70,msr9516,video9516,people are cheering at a stadium 73 | ret71,msr9517,video9517,a news program about overweight people 74 | ret72,msr9677,video9677,a guy wearing a red shirt drives a car while talking 75 | ret73,msr7436,video7436,a man gets shot in the face 76 | ret74,msr7822,video7822,many ladies and men are walking upstairs inside of a caged fence 77 | ret75,msr9204,video9204,a news reporter talks about a shooting 78 | ret76,msr9205,video9205,a woman interviewing about her part in a protest happening in brazil 79 | ret77,msr9206,video9206,a man punches a faucet to show how much better bottled water is 80 | ret78,msr9207,video9207,a lady is put makeup especially around her eyes 81 | ret79,msr9200,video9200,animals are communicating with one another through thought bubbles in three different scenes from a video game 82 | ret80,msr9201,video9201,a trailer for an upcoming movie with people on a beach 83 | ret81,msr9202,video9202,a girl singing song on stage 84 | ret82,msr9203,video9203,the man is giving an informational speech to a group of people about telling someone something 85 | ret83,msr8690,video8690,a blonde woman is speaking and then a radio button to subscribe to her webcasts appears 86 | ret84,msr9208,video9208,a man is showing the foot well under the door jam area of the car 87 | ret85,msr9209,video9209,the girl shoots her nerf bow 88 | ret86,msr7753,video7753,a man is giving his commentary on a current event television show 89 | ret87,msr9647,video9647,sports are being played 90 | ret88,msr7669,video7669,three woman doing a fashion show to music 91 | ret89,msr9349,video9349,guy showing how to make items 92 | ret90,msr9348,video9348,women walking on the stage with different styles of dressing in a fashion show 93 | ret91,msr9341,video9341,the press would rather interview the most ignorant person they can find than beautiful women 94 | ret92,msr9340,video9340,one sad one trying to comfort 95 | ret93,msr9343,video9343,woman is slicing the vegetables and decorating the meal 96 | ret94,msr9342,video9342,the water safety teams arrives with the safety devices and water bike to save a person who had been drifted away 97 | ret95,msr9345,video9345,cartoon birds are flying 98 | ret96,msr9344,video9344,gameplay footage of someone playing a game 99 | ret97,msr9347,video9347,a woman in red dress explaining about cushion seat 100 | ret98,msr9346,video9346,a woman giving a photoshop tutorial 101 | ret99,msr7826,video7826,a girl is preparing potato ball and explains the recipe 102 | ret100,msr9885,video9885,a movie director talking to the media men in press conference regarding his movie and hero also 103 | ret101,msr9882,video9882,a news reader is reading the news and asking question to some people 104 | ret102,msr8578,video8578,a man is commentating while playing minecraft 105 | ret103,msr7827,video7827,people are walking down a street holding signs 106 | ret104,msr7035,video7035,there is someone playing a game in a computer 107 | ret105,msr9581,video9581,serene music plays over scenery of mountains while a woman speaks 108 | ret106,msr7034,video7034,man in black shirt is holding a baby upside down and talking about universal studios 109 | ret107,msr9580,video9580,the animals are having nice time together and eating food 110 | ret108,msr8573,video8573,a video of a rock group performing one of their songs 111 | ret109,msr8572,video8572,a man grabs at snakes and throws them around the room 112 | ret110,msr8346,video8346,a man talks about a war between two generals one of which became king 113 | ret111,msr8347,video8347,a video of someone talking about some girls 114 | ret112,msr8344,video8344,a man is driving a car through the countryside 115 | ret113,msr8345,video8345,a woman feeds another 116 | ret114,msr8342,video8342,girl in pink dress fashion model walking in ramp 117 | ret115,msr8343,video8343,a man is holding a coffee mug 118 | ret116,msr8340,video8340,a man and a woman stand in a bedroom 119 | ret117,msr8341,video8341,cartoon characters are talking to a pokemon 120 | ret118,msr7825,video7825,sitting and converstion 2 lady and 2 gents 121 | ret119,msr8348,video8348,a woman looks after abandoned children for free in her home 122 | ret120,msr8349,video8349,friends enjoy eating 123 | ret121,msr9589,video9589,a woman plays instruments in a field 124 | ret122,msr9837,video9837,video game footage of a killing spree during call of duty black ops 125 | ret123,msr9836,video9836,a woman is stirring food 126 | ret124,msr9835,video9835,the women sit at the lap top and talk to one another 127 | ret125,msr8851,video8851,a group of indian guys meeting on a bridge while it s raining 128 | ret126,msr9833,video9833,airport security figure explains requirements for transporting medicines 129 | ret127,msr9832,video9832,a video game character rides around on a motorcycle 130 | ret128,msr9831,video9831,a group of people are swimming in a boat a monkey is walking on the tree 131 | ret129,msr9830,video9830,some people was swimming under the waterit is very good job 132 | ret130,msr9839,video9839,an animated horse is in a barn and the maker asks for comments 133 | ret131,msr9838,video9838,a cartoon clip of pokemon dancing 134 | ret132,msr7111,video7111,it is the animation cartoon 135 | ret133,msr8089,video8089,a police officer drives his white car onto a grassy field and then back on to the street 136 | ret134,msr9728,video9728,a man talking about kuru disease 137 | ret135,msr8122,video8122,a fearful animation scene 138 | ret136,msr8123,video8123,a woman is giving demo for baby trolley 139 | ret137,msr8120,video8120,a guest is speaking with a television show host 140 | ret138,msr8121,video8121,there was a resistor in the back 141 | ret139,msr7839,video7839,this is a vine sports compilation 142 | ret140,msr7838,video7838,a guy is talking over loud music at the end of a youtube video 143 | ret141,msr8124,video8124,basketball players making a shot in the last seven seconds 144 | ret142,msr8125,video8125,a man in a flying contraption crashes in a field 145 | ret143,msr7835,video7835,two cartoon characters walking underwater 146 | ret144,msr7834,video7834,a man in striped collared shirt discusses jobs in news room of bloomberg 147 | ret145,msr8128,video8128,a cartoon character prepares to ride a bicycle 148 | ret146,msr7836,video7836,a girl with a hat on and dancing 149 | ret147,msr7831,video7831,a young girl is abusing a young man 150 | ret148,msr7830,video7830,this is about a young boy s photo shoot 151 | ret149,msr7833,video7833,a man is talking about business 152 | ret150,msr7832,video7832,a man and woman looking at each other on the subway 153 | ret151,msr7506,video7506,a video game car is driving recklessly 154 | ret152,msr7501,video7501,will smith has starred in a lot of movies including the movie ali 155 | ret153,msr8423,video8423,a female inside a white themed bathroom while someone else makes her makeup 156 | ret154,msr7500,video7500,a soccer team walking out on the field 157 | ret155,msr7233,video7233,views of two persons working on the super computer with the head phones on 158 | ret156,msr7232,video7232,the sky roads game on the computer 159 | ret157,msr7231,video7231,a woman introducing someone 160 | ret158,msr7230,video7230,a woman is talking about a baby stroller 161 | ret159,msr7237,video7237,there is a man repairing a product on the table 162 | ret160,msr7236,video7236,person walking around building 163 | ret161,msr7235,video7235,cartoon of a squid on a bike looking up at a treehouse 164 | ret162,msr7234,video7234,a woman flanked by two men are with a discussion 165 | ret163,msr8650,video8650,a news convention is held 166 | ret164,msr8651,video8651,a man talks about cars 167 | ret165,msr8652,video8652,a man runs into the crowd when trying to catch a basketball 168 | ret166,msr7502,video7502,a narrator explains where to find a rare vehicle in grand theft auto 169 | ret167,msr8654,video8654,a woman cooking an orange substance 170 | ret168,msr8655,video8655,an animal is throwing a piece of junk 171 | ret169,msr8656,video8656,she used an electric blender 172 | ret170,msr8657,video8657,a man rides his motorcycle to a building 173 | ret171,msr7898,video7898,a salad in a bowl is being filmed on a table 174 | ret172,msr9901,video9901,a man with glasses and a goatee talking about his former job 175 | ret173,msr8459,video8459,mario and friends play in a video game together 176 | ret174,msr8514,video8514,a person is putting the vegetable in to the water and boil it 177 | ret175,msr7909,video7909,a woman holding a ribbon 178 | ret176,msr8456,video8456,a man talks to someone and also the camera 179 | ret177,msr8018,video8018,a young girl shopping 180 | ret178,msr8457,video8457,a man rowing a kayak is shown in slow motion 181 | ret179,msr8016,video8016,a dog and a cat are in a standoff 182 | ret180,msr7900,video7900,a man talks about dna force 183 | ret181,msr8014,video8014,a group of people are singing while holding coke in their hands 184 | ret182,msr7902,video7902,a baby playing with a cats tail 185 | ret183,msr8012,video8012,a woman jumps over a bar and attacks a man 186 | ret184,msr8013,video8013,a video showing footage from sporting events 187 | ret185,msr8010,video8010,lego stormtroppers are in a facility 188 | ret186,msr8011,video8011,a girl talks about photos and her life 189 | ret187,msr7890,video7890,person is talking about the big growth in the cities 190 | ret188,msr7589,video7589,a man getting interviewed by a beach 191 | ret189,msr7588,video7588,a man showing his finished product of a wood floor in his home 192 | ret190,msr9745,video9745,a man is wearing a cap 193 | ret191,msr9744,video9744,this is a live tv show 194 | ret192,msr7587,video7587,two men in a wrestling competition 195 | ret193,msr7586,video7586,woman playing instruments in a field for a music video 196 | ret194,msr7581,video7581,lyrics are written for the song 197 | ret195,msr7580,video7580,a woman speaking about scifi and fantasy disasters 198 | ret196,msr7583,video7583,a princess tries not to cry in front of malificent 199 | ret197,msr9742,video9742,the actor playing thor talking about the new movie 200 | ret198,msr9909,video9909,a crowd of people sitting next to each other as one man plays a video game 201 | ret199,msr7349,video7349,a grey haired man interviews someone else 202 | ret200,msr7348,video7348,a girl is talking about relationships 203 | ret201,msr7347,video7347,a woman is explaining how to do something on a computer 204 | ret202,msr7346,video7346,some people video conferencing as they watch a movie 205 | ret203,msr7345,video7345,a man is talking about making it easier for kids to learn while scenes of a school are shown 206 | ret204,msr9908,video9908,a woman talking about education 207 | ret205,msr7343,video7343,a guy fixing up another car 208 | ret206,msr7342,video7342,opening of a nest a rate is coming out and searching something it eats something on a human hand 209 | ret207,msr7341,video7341,in an interview a person in advocating education among the populace 210 | ret208,msr7340,video7340,a man is talking about his car s features while inside his car 211 | ret209,msr9403,video9403,i see a prince trying to get a girl and i see peter pan play around 212 | ret210,msr8426,video8426,cabins on a sandy beach have walkways going up to their porches 213 | ret211,msr9622,video9622,dogs are walking across the road in a video 214 | ret212,msr8900,video8900,a woman bakes and decorates a cake 215 | ret213,msr8901,video8901,men pushing a car down assembly line 216 | ret214,msr8902,video8902,a man is looking out a window to look at another man who jumped from the window to his death 217 | ret215,msr8903,video8903,there are crocodiles about to eat the group of people walking across the water 218 | ret216,msr8904,video8904,a women is in an make up room telling about here itenary time wise 219 | ret217,msr8905,video8905,two man s are talking to each other 220 | ret218,msr8906,video8906,bbc news story about military crackdown in an unknown asian country 221 | ret219,msr8907,video8907,scene of thor from the avengers 222 | ret220,msr8908,video8908,a white man in a suit talking in front of a tv about logic and law 223 | ret221,msr8909,video8909,list of people s involved in making the documentary 224 | ret222,msr8835,video8835,a woman serves a bowl of soup woth stuff in it 225 | ret223,msr7699,video7699,a man with head band is demonstrating how to play ping pong 226 | ret224,msr7698,video7698,two women are walking in a parking lot 227 | ret225,msr7169,video7169,a female nurse washes her hands and then cleans off a surface 228 | ret226,msr7168,video7168,he is playing with ball 229 | ret227,msr7163,video7163,a young man is touching a young girls back 230 | ret228,msr7162,video7162,woman talking to a man in an interview 231 | ret229,msr7693,video7693,this is a jigsaw puzzle video 232 | ret230,msr7160,video7160,a cartoon character falls asleep on a couch 233 | ret231,msr7695,video7695,a man is stirring something in a pot 234 | ret232,msr7166,video7166,a person is explaining something 235 | ret233,msr7165,video7165,a cook prepares food items in a metal bowl 236 | ret234,msr7164,video7164,a computer animation using the source engine 237 | ret235,msr9699,video9699,a man explains how to do a experiment 238 | ret236,msr9698,video9698,a women is talking about the books she likes and the second favourite one is the amc the walking dead 239 | ret237,msr9693,video9693,the chef adds fish sauce and fish paste to a large stainless steel cooking pot 240 | ret238,msr9692,video9692,a man in a suit is talking on a television economy program 241 | ret239,msr9691,video9691,a commercial for the mazda 3 the card sliding around a corner 242 | ret240,msr9690,video9690,a man explains the condition of someone in the hospital to the press outside of a building 243 | ret241,msr9697,video9697,the woman has a baby monitor 244 | ret242,msr9696,video9696,a man folds up a stroller 245 | ret243,msr9695,video9695,text explains about a pokemon expisode that caused seizures 246 | ret244,msr9694,video9694,a guy barbequeing potatoes 247 | ret245,msr7765,video7765,a person is discussing a car 248 | ret246,msr9524,video9524,a bunch of cartoon faces are chomping their teeth and making eating gestures 249 | ret247,msr7767,video7767,a slideshow with captions 250 | ret248,msr7766,video7766,a video gamer is seen as he plays a video game 251 | ret249,msr9521,video9521,a person is passing by a jolly group of men 252 | ret250,msr8839,video8839,spongebob is showing memories of him with mr 253 | ret251,msr7763,video7763,people are playing basketball 254 | ret252,msr9522,video9522,a man is giving a presentation on stage 255 | ret253,msr8834,video8834,a man is cooking in the kitchen he states he will return later to add to the dish 256 | ret254,msr8056,video8056,fried potatoes are being eaten 257 | ret255,msr8836,video8836,a woman is cooking food and a man is setting a table 258 | ret256,msr8837,video8837,a young girl petting a dog that is laying on a couch 259 | ret257,msr8830,video8830,this is a video of a confrence 260 | ret258,msr7768,video7768,a 3d animation of a cabinet with plates 261 | ret259,msr8832,video8832,a girl messaging her friend 262 | ret260,msr8057,video8057,a women preparing a duck to roast 263 | ret261,msr7567,video7567,a man checks out detail on a car 264 | ret262,msr7566,video7566,a man explain why he uses edible blooms for gifts 265 | ret263,msr7565,video7565,in a kitchen a woman is chopping tomatoes in a food processor 266 | ret264,msr7564,video7564,a person juggling sticks on the summit of a snowy mountain 267 | ret265,msr7563,video7563,a man is talking about appliances 268 | ret266,msr7562,video7562,article about nasa s johnson space center being broadcasted on news 269 | ret267,msr7561,video7561,playing with toy kitchen accessories 270 | ret268,msr7560,video7560,person straightening out string of magnets and wrapping them around his finger 271 | ret269,msr7569,video7569,a woman is singing 272 | ret270,msr7568,video7568,the queen of england is seen walking with an entourage including a few islamic women a woman narrates how the queen created an outfit similar to the women in islam 273 | ret271,msr9404,video9404,a lady talks into a megaphone 274 | ret272,msr9428,video9428,presentation of the machine and its functions while working 275 | ret273,msr7947,video7947,a news program with a woman interviewing a man about merchant market currencies 276 | ret274,msr8831,video8831,many persons walking inside shoping mall hall and computer shop selling displaying on screen 277 | ret275,msr7946,video7946,the man sitting in the black chair and wearing brown suit is talking 278 | ret276,msr9131,video9131,two people wear and touch their masks made to resemble a camera cube with centered black lens one of which is covered in colorful flowers 279 | ret277,msr9405,video9405,red balloons float in the sky and have packages tied to them 280 | ret278,msr9231,video9231,few barbie dolls are playing one doll puts shoe to other barbie these are used to play by a kid 281 | ret279,msr9230,video9230,a man talks to a little boy about not letting people convince him that anything he wants to do in life isn t possible 282 | ret280,msr9233,video9233,a man is giving a speech 283 | ret281,msr9232,video9232,a video game woman character runs down a hill followed by a dog running down the hill they both then run up the hill together 284 | ret282,msr9235,video9235,men are loading their guns with ammunition 285 | ret283,msr9234,video9234,two men walking and talking about the road 286 | ret284,msr9237,video9237,preview for the movie insidious 287 | ret285,msr9236,video9236,a man examining the length of a person s arm when opening a glove compartment 288 | ret286,msr9239,video9239,a man kicks at a bull 289 | ret287,msr9238,video9238,a man is singing and dancing in an elevator while people watch 290 | ret288,msr8948,video8948,there is a man is talking with a commando 291 | ret289,msr8119,video8119,guy explaining what stiff person syndrome is 292 | ret290,msr9657,video9657,a monkey and a man feeding monkey with hand displaying on screen 293 | ret291,msr8118,video8118,women of a foreign nation comb their hair and perform in traditional costumes 294 | ret292,msr8949,video8949,woman with swimming suit is plunging into the water and gets rid of the suit 295 | ret293,msr9975,video9975,a yellow sports car with a guy speaking about the car 296 | ret294,msr9356,video9356,someone is arranging utensilslarge saucepan and spoons as a preparation to cook something 297 | ret295,msr9357,video9357,a woman laughs until she chokes 298 | ret296,msr9354,video9354,a red truck is burning while three men talk about a car 299 | ret297,msr9355,video9355,a person describing a recording of a video game 300 | ret298,msr9352,video9352,a man is talking to a crowd 301 | ret299,msr9353,video9353,two women are outside and are discussing something in a foreign language 302 | ret300,msr9350,video9350,screen showing some people talking 303 | ret301,msr9351,video9351,a man is singing on stage to a huge audience he is holding a microphone 304 | ret302,msr8111,video8111,people are using a computer software tool 305 | ret303,msr9358,video9358,someone is talking about a car 306 | ret304,msr9359,video9359,pictures of a beach while a voice talks about a shark attack 307 | ret305,msr8110,video8110,a cartoon woman cries at a bench while a woman in blue appears 308 | ret306,msr7741,video7741,a cartoon man in sunglasses waves at the crowd and smiles 309 | ret307,msr8116,video8116,a dad plays video games with his son 310 | ret308,msr7837,video7837,a person walks down a staircase surrounded by greenery and other foliage 311 | ret309,msr8620,video8620,a woman is making a hair accessory 312 | ret310,msr7468,video7468,a man jumps onto a ledge of a building 313 | ret311,msr9409,video9409,a bowl of shrimp green onions and other assorted ingredients in a soup broth 314 | ret312,msr9032,video9032,a hospital mortuary room and a doctor treat the special case 315 | ret313,msr8622,video8622,a man has tried to solve a rubik s cube 316 | ret314,msr7747,video7747,this is a scene from a disney cartoon set in ancient times 317 | ret315,msr8824,video8824,a talk show with dr 318 | ret316,msr8626,video8626,numbers are displayed on the screen 319 | ret317,msr7462,video7462,he drew a beautiful picture 320 | ret318,msr9623,video9623,a man is on a cell phone while people are fighting 321 | ret319,msr9620,video9620,outer space pictures that have parts of equipments in them with water droplets on it 322 | ret320,msr8420,video8420,a man sits in a large black truck adjusting mirror 323 | ret321,msr8945,video8945,two men stand on a platform suspended high above the city 324 | ret322,msr9451,video9451,kids feeding and playing with the horse 325 | ret323,msr7461,video7461,a man playing video games 326 | ret324,msr9450,video9450,a foreign military themed show 327 | ret325,msr7466,video7466,a group of men carry a body covered in a sheet 328 | ret326,msr7613,video7613,a female journalist wearing a purple shirt and white blazer is talking on a news show next to the image of a male in a dark suit 329 | ret327,msr7467,video7467,disney movie video clips 330 | ret328,msr9452,video9452,an animated grey shark in the middle of a blue water simulation background rotating in a circle on the screen of a monitor 331 | ret329,msr7464,video7464,the video shows gameplay of a car racing video game 332 | ret330,msr7615,video7615,a man is riding on horseback 333 | ret331,msr9625,video9625,its a cooking recipe show with chicken vegetables 334 | ret332,msr7614,video7614,a man picking out a vehicle from the trailer 335 | ret333,msr8333,video8333,a woman is talking in the tv channel 336 | ret334,msr8332,video8332,someone is assembling a small hello kitty oven toy 337 | ret335,msr8331,video8331,a man teaching students in class 338 | ret336,msr8330,video8330,a man and a woman are talking at a bus stop 339 | ret337,msr8337,video8337,people act in a comedy program 340 | ret338,msr8336,video8336,video of a church chior 341 | ret339,msr8335,video8335,a beautiful waterfall is flowing into the pool of water below it as the camera pans around the area 342 | ret340,msr8334,video8334,a cartoon of a dog running and howling 343 | ret341,msr8339,video8339,kids in a circle play with beach ball and surf boards 344 | ret342,msr7616,video7616,an orange sports car accelerates quickly 345 | ret343,msr7619,video7619,cartoons are talking to each otehr 346 | ret344,msr7491,video7491,a person explaining a concept in a show 347 | ret345,msr9458,video9458,a person playing a video game 348 | ret346,msr7744,video7744,a chef stirs up some ingredients inside of a pan 349 | ret347,msr9512,video9512,a woman with blonde hair and a black shirt is talking 350 | ret348,msr9824,video9824,two ladies sitting down and talking in an office room 351 | ret349,msr9825,video9825,an intelligent man with glasses talk about certain phrenologists 352 | ret350,msr9826,video9826,an animated micky is driving a car 353 | ret351,msr9827,video9827,lady gaga sings in a music video 354 | ret352,msr9820,video9820,a girl playing the game the sims 355 | ret353,msr9821,video9821,a man giving a presentation and showing the planet earth 356 | ret354,msr9822,video9822,a woman is talking 357 | ret355,msr9823,video9823,various rugby scenes from different games 358 | ret356,msr9514,video9514,inside of a toyota car with large space and safety 359 | ret357,msr9828,video9828,assorted people are shown holding cute pets 360 | ret358,msr9829,video9829,rusty houser talks about how hitler is loved from his results 361 | ret359,msr7755,video7755,someone speaking about a violent act regarding the police 362 | ret360,msr9520,video9520,a fat guy with a tie is looking at a man 363 | ret361,msr7756,video7756,a group of women are singing 364 | ret362,msr7757,video7757,a figure in camouflage clothing walks with bent knees through large cement rooms toward a door as people are being shot 365 | ret363,msr7828,video7828,a man gives a lecture with a microphone in front of a laptop with a red glow behind him 366 | ret364,msr7829,video7829,someone is putting a skeleton slide on a projector 367 | ret365,msr8489,video8489,a tv channel named how to cook great foodcom is telling how to prepare a dish 368 | ret366,msr8488,video8488,a woman is talking about facial care products 369 | ret367,msr8485,video8485,foreign language music video 370 | ret368,msr7823,video7823,an asian woman is talking about food 371 | ret369,msr7820,video7820,a guy talks about how a car s alternator wasn t working 372 | ret370,msr8486,video8486,a cartoon on a young guy cursing 373 | ret371,msr8481,video8481,a woman is reporting on keds commercials 374 | ret372,msr8480,video8480,impoverished children are eating and a man is talking 375 | ret373,msr8483,video8483,someone is mixing ingredients in a bowl 376 | ret374,msr8482,video8482,late night sneak peek preview 377 | ret375,msr8249,video8249,batman is beating up bane in a scene from a batman movie 378 | ret376,msr8248,video8248,the view of a boat and the water is blocked by clouds and fog 379 | ret377,msr8247,video8247,a man is talking about cooking and presenting a thermometer and way of preparation 380 | ret378,msr8246,video8246,there is a man working on a car 381 | ret379,msr8245,video8245,a warrior is fighting a battle 382 | ret380,msr8244,video8244,two men walk behind a couple of rhinoceroses one of which attacks a man 383 | ret381,msr8243,video8243,man in black suit is having meeting with group of people 384 | ret382,msr8242,video8242,miniature donkeys walking around and making noises 385 | ret383,msr8241,video8241,several women in pink outfits and various other styles are standing and smiling 386 | ret384,msr8240,video8240,a middle aged woman giving another woman a message 387 | ret385,msr7220,video7220,threee kids sing together on the voice 388 | ret386,msr8466,video8466,men are playing instruments in a band 389 | ret387,msr7222,video7222,a cartoon man dances for his girl 390 | ret388,msr7223,video7223,the small and large needles moving fast in a clock 391 | ret389,msr8463,video8463,the beautiful scene on the screen 392 | ret390,msr8462,video8462,guys play minecraft 393 | ret391,msr8461,video8461,two kids talking to the camera and then a woman talking to the camera 394 | ret392,msr7227,video7227,a man is singing 395 | ret393,msr7228,video7228,a man is sitting and playing guitar 396 | ret394,msr7229,video7229,a woman is talking about movies 397 | ret395,msr8469,video8469,two parrots in a bird cage one white chick and on green adult 398 | ret396,msr8468,video8468,someone is driving around the city in grand theft auto v 399 | ret397,msr7916,video7916,girl is checking twitter 400 | ret398,msr7917,video7917,shania twain does a closeup for her video 401 | ret399,msr7914,video7914,a flower and other natural scenes are displaying 402 | ret400,msr7915,video7915,a man in sunglasses and a blue shirt beat boxes 403 | ret401,msr7912,video7912,peter is driving in the car 404 | ret402,msr7913,video7913,a man describing how to do something in windows 405 | ret403,msr7910,video7910,a man looks at the battery of a computer 406 | ret404,msr7911,video7911,a woman is making a recipe in a sauce pan 407 | ret405,msr8507,video8507,a girl doing gymnastics in the front yard 408 | ret406,msr8628,video8628,a man is talking with his friends on a video game 409 | ret407,msr7919,video7919,a character is jumping and floating in the air in a video game 410 | ret408,msr8799,video8799,different letters are coming out and sounding out the way they sound 411 | ret409,msr8798,video8798,the lighting work is going on the building 412 | ret410,msr8069,video8069,an astronaut is looking at a flag 413 | ret411,msr8068,video8068,a woman on a couch talks to a a man 414 | ret412,msr9919,video9919,a man walks towards a woman and the woman breaks a bottle to defend her belongings 415 | ret413,msr8791,video8791,a woman is demonstrating a nail painting technique 416 | ret414,msr8790,video8790,two women are embracing 417 | ret415,msr8061,video8061,comedy skit with dc comics character vane 418 | ret416,msr8792,video8792,a girl is on the voice 419 | ret417,msr8067,video8067,guys holding cups and talking 420 | ret418,msr8794,video8794,a cartoon girl and animal jumping on body of male guy girl image still shown displaying on screen 421 | ret419,msr8797,video8797,guys playing minecraft 422 | ret420,msr8796,video8796,man talking about hiking 423 | ret421,msr7598,video7598,a person is singing in a studio 424 | ret422,msr7599,video7599,people compete in a backyard competition 425 | ret423,msr9752,video9752,a man is talking and more peoples are in the round 426 | ret424,msr7593,video7593,a group of friends are in a room talking with each other 427 | ret425,msr9750,video9750,a man points his gun towards a woman in a room 428 | ret426,msr7591,video7591,a woman is giving a demonstration about pancake presentation 429 | ret427,msr7596,video7596,a man and a woman are singing a song in a stage 430 | ret428,msr7597,video7597,a man drives while discussing his car 431 | ret429,msr7594,video7594,water is being filtered in a tank with rocks 432 | ret430,msr9755,video9755,advertisement of seat basket 433 | ret431,msr7358,video7358,it is a vine compilation 434 | ret432,msr7359,video7359,two people are preparing for sports 435 | ret433,msr7354,video7354,a man is showing the interior of a car 436 | ret434,msr7355,video7355,a hand print is outlined within the silhouette of a green dinosaur shape for a movie promotion 437 | ret435,msr7356,video7356,a vehicle with details on what comes with it by carfax 438 | ret436,msr7357,video7357,tennis players are involved in matches in large stadiums in front of large crowds 439 | ret437,msr7350,video7350,two snakes are shown in containers 440 | ret438,msr7351,video7351,a man is acting out a scene 441 | ret439,msr7352,video7352,a movie scene starring morgan freeman and men in armor running 442 | ret440,msr7353,video7353,a minecraft video game is being played 443 | ret441,msr8027,video8027,a video game is played 444 | ret442,msr8979,video8979,sports vine clips of basketball 445 | ret443,msr8025,video8025,the opeing credit to gullah grub authentic cooking 446 | ret444,msr8665,video8665,the tennis players wearing blue and red t shirts and play the tennis in the tennis court at the night time 447 | ret445,msr8664,video8664,customers wait in line at an ice cream shop while employees tend to them 448 | ret446,msr8667,video8667,music is playing and advertisements was showing 449 | ret447,msr8666,video8666,a heart is shown 450 | ret448,msr8661,video8661,a soccer player shoots a goal during a soccer game 451 | ret449,msr8660,video8660,two teams play at the olympics 452 | ret450,msr8663,video8663,gameplay footage of someone playing minecraft 453 | ret451,msr8662,video8662,a man a woman cooking on a cooking show 454 | ret452,msr8751,video8751,a person is doing cooking show and telling the ingredients 455 | ret453,msr8669,video8669,anchor talking about a shows 456 | ret454,msr8668,video8668,a man is highlighted playing basketball 457 | ret455,msr8022,video8022,the intro to world news focus on africa plays and we are greeted by the bbc reporter based in johannesburg south africa 458 | ret456,msr7686,video7686,a man looks up towards a cathedrals organ pipes and talks to a priest in a confessional 459 | ret457,msr7687,video7687,a girl using her smartphone 460 | ret458,msr7684,video7684,a woman talks about a skin care treatment she takes with her everwhere 461 | ret459,msr7685,video7685,couples describing the logic behind movie scenes 462 | ret460,msr7682,video7682,man and woman are showing affection 463 | ret461,msr8938,video8938,a man is dodging bombs 464 | ret462,msr7680,video7680,an ethiopian woman asks a child what she is good at 465 | ret463,msr7681,video7681,a group of people are riding on a raft in a body of water 466 | ret464,msr8935,video8935,man is crossing the street with big lion and friend 467 | ret465,msr8934,video8934,cameras filming near accidents between cars in traffic 468 | ret466,msr8937,video8937,this little light of mine song with different photos 469 | ret467,msr8936,video8936,some girls are practicing gymnastics 470 | ret468,msr8931,video8931,a man playing a video game character that is carrying a sword and killing animals with it 471 | ret469,msr8930,video8930,a man speaks on a a news panel 472 | ret470,msr7688,video7688,one of two guys walking on a carton of eggs with bare feet 473 | ret471,msr8932,video8932,a man is giving an interview in a tv show 474 | ret472,msr7178,video7178,an animated video game song 475 | ret473,msr7179,video7179,two animated woman s are talking to eachother 476 | ret474,msr7170,video7170,a girl practising her boy and arrow tricks 477 | ret475,msr7171,video7171,the woman wearing the white top talks to the people in the audience 478 | ret476,msr7172,video7172,the girl shows the boys her medal in this cartoon 479 | ret477,msr7173,video7173,a woman is mixing nailpolish and putting an egg into it 480 | ret478,msr7174,video7174,a man rides a lft to the top of a mountain 481 | ret479,msr7175,video7175,the band member takes a seat 482 | ret480,msr7176,video7176,a man is talking about space project adam 483 | ret481,msr7177,video7177,girl is dancing in the garden 484 | ret482,msr9688,video9688,a person looks at a celebrity on the computer 485 | ret483,msr9689,video9689,spices being combined in a stainless steel bowl 486 | ret484,msr8453,video8453,a man points a gun at another persons face 487 | ret485,msr9680,video9680,on google earth is a man who is talking about the flights 488 | ret486,msr9681,video9681,sports people are fighting on field 489 | ret487,msr9682,video9682,a girl being surprised with a stuffed animal by male friend 490 | ret488,msr9683,video9683,a man and woman performing in front of judes 491 | ret489,msr9684,video9684,somebody slices white onion with sharp knife on the table 492 | ret490,msr9685,video9685,people practising marshal arts 493 | ret491,msr9686,video9686,a woman in lots of pain 494 | ret492,msr9687,video9687,a man chopping lobster and taking off the shell 495 | ret493,msr8712,video8712,a woman is showing nail polish 496 | ret494,msr8450,video8450,a elecopter moving in air and red and yellow dress man hand touching speaking in snow land wearing helmet displaying on screen 497 | ret495,msr9416,video9416,person is recording the brown horse which is having fun 498 | ret496,msr7772,video7772,a gospel band playing 499 | ret497,msr8808,video8808,a man talks back to a cop and handcuffs him to the table 500 | ret498,msr9530,video9530,jeremy is describing a car 501 | ret499,msr7771,video7771,a golf player is trying to hit the ball into the pit 502 | ret500,msr7776,video7776,three soccer balls are laying in a field and then three men in black athletic cloths attempt to shoot a goal 503 | ret501,msr9537,video9537,animated comic scene of guy cutting up food for dinner 504 | ret502,msr9534,video9534,some men play a game of kickball 505 | ret503,msr9535,video9535,a man is talking about a sports car he is driving 506 | ret504,msr8801,video8801,three men talking about their youtube channel and thanking their viewers 507 | ret505,msr8800,video8800,people are talking to each other 508 | ret506,msr8803,video8803,a man is sweeping dust off the floor 509 | ret507,msr9539,video9539,the men eat the mexican food 510 | ret508,msr8805,video8805,a soldier is speaking to a superior person in a movie 511 | ret509,msr8804,video8804,two men are in the classroom and having conversation 512 | ret510,msr8807,video8807,a couple dancing doing salsa 513 | ret511,msr8806,video8806,someone looking at a japanese book 514 | ret512,msr7574,video7574,minecraft zombie kills player and takes heart 515 | ret513,msr7575,video7575,a man speaking in a microphone 516 | ret514,msr7576,video7576,men are being filmed in the darkness 517 | ret515,msr7577,video7577,a girl explains about some studies showing some hands actions 518 | ret516,msr7570,video7570,a very young baby is wearing a disguise and laughing 519 | ret517,msr7571,video7571,a woman in black puts on blush while looking in a mirror 520 | ret518,msr7572,video7572,foreign language cooking show 521 | ret519,msr7573,video7573,a girl in white night wear dancing very sexy 522 | ret520,msr7578,video7578,people are on stage talking 523 | ret521,msr7579,video7579,a girl wearing red top and black trouser is putting a sweater on a dog 524 | ret522,msr9969,video9969,two girls are sitting in the bed with a cat and talking 525 | ret523,msr9228,video9228,guy in purple tshirt playing guitar as they drive through 526 | ret524,msr9229,video9229,guys trying out ice cream 527 | ret525,msr9226,video9226,a girl dresses up in a bright wig 528 | ret526,msr9227,video9227,man shows how to prepare potatoes 529 | ret527,msr9224,video9224,someone is playing a game 530 | ret528,msr9225,video9225,a man discusses a bollywood celebrity 531 | ret529,msr9222,video9222,a boy is trying out for a part on the voice kids 532 | ret530,msr9223,video9223,a man and a woman are sitting in front of a television and addressing and audience 533 | ret531,msr9220,video9220,a girl is painting easter designs on nails 534 | ret532,msr9221,video9221,a cartoon clip is being played 535 | ret533,msr7419,video7419,a man playing guitar and singing on the road side 536 | ret534,msr7418,video7418,an indian man talking about iphones and a new type of clothing 537 | ret535,msr9323,video9323,it is the video of military men 538 | ret536,msr9322,video9322,a woman gets trapped in a burning trailer 539 | ret537,msr9321,video9321,penguins wander around on ice 540 | ret538,msr9320,video9320,a blonde man lies on a bed with a little baby 541 | ret539,msr9327,video9327,men talking about and eating hot dogs 542 | ret540,msr9326,video9326,two boys sneak up to a girls performing choir 543 | ret541,msr9325,video9325,a man is driving a black car 544 | ret542,msr9324,video9324,animated video showing a bottle rolling across an empty hallway 545 | ret543,msr9329,video9329,a couple is shown 546 | ret544,msr9328,video9328,a car is racing on road 547 | ret545,msr7415,video7415,in the kitchen the chef is interviewed by a lady and the ingredients are kept on the table 548 | ret546,msr7414,video7414,a female in a space age outfit crawling and dancing on the floor 549 | ret547,msr7413,video7413,there is a guy talking to his father 550 | ret548,msr7412,video7412,a man talks about visiting visiting a specific place to buy some things 551 | ret549,msr7411,video7411,jolly good music troop delivering a program and the lady is in good spirit 552 | ret550,msr7610,video7610,the olympics wight lifting photo is illustrated to explain gravitational force 553 | ret551,msr7410,video7410,vladmir putin talks on the news about the fight against terrorism 554 | ret552,msr8451,video8451,a group of young athletes race around a track 555 | ret553,msr9508,video9508,men in a garage talk about a car they are going to restore 556 | ret554,msr9957,video9957,a group of people are dancing in a room 557 | ret555,msr8653,video8653,a man and woman are talking in a car 558 | ret556,msr9039,video9039,two girls in design dress wearing cloth standing holding mic in hand on street and person walking beside discusing on topic 559 | ret557,msr9038,video9038,a person is preparing some food 560 | ret558,msr9037,video9037,a women is sitting with her baby and two people talking about that 561 | ret559,msr9036,video9036,young children in red fire chief hats are guided to a fire engine and up its metal textured stairs 562 | ret560,msr9035,video9035,a fatality from mortal kombat is shown 563 | ret561,msr9034,video9034,several dogs playing dead 564 | ret562,msr9033,video9033,man standing on the ledge of a vary tall building jumps off 565 | ret563,msr8741,video8741,a tour through chinese architecture 566 | ret564,msr9031,video9031,a lady is play the sims while talking about it 567 | ret565,msr9030,video9030,animated pirates sing on a ship 568 | ret566,msr7962,video7962,a pirate man tries to lift a lantern with his sword while on a boat 569 | ret567,msr9419,video9419,a girl talking with long hair 570 | ret568,msr8320,video8320,a young girl is showing everybody how to apply make up 571 | ret569,msr8321,video8321,a man is talking about something 572 | ret570,msr8322,video8322,a lady tries to ride on bicycle but fails 573 | ret571,msr8323,video8323,a cat is layingon a couch and another islaying in a basket 574 | ret572,msr8324,video8324,a rock band performs on stage 575 | ret573,msr8325,video8325,a man cooks some food in a kitchen 576 | ret574,msr8326,video8326,video showing helping attitude of human beings 577 | ret575,msr8327,video8327,multi colored horses in a barn and outside in the snow 578 | ret576,msr8328,video8328,a man discusses and shows traits of some computer functions 579 | ret577,msr8329,video8329,a woman talks about horse racing 580 | ret578,msr9418,video9418,a man in black suit is talking about deforestation and about climate change 581 | ret579,msr9811,video9811,a group of actors sit in a control room and think about their next move together 582 | ret580,msr9810,video9810,some women models pictures are shown as a slide show presentation and a women is talking 583 | ret581,msr9813,video9813,a man drives a motorcycle in a video game 584 | ret582,msr9812,video9812,a woman giving a photoshop tutorial 585 | ret583,msr9815,video9815,a mashup of music videos is being played 586 | ret584,msr9814,video9814,a scene from spongebob squarepants where the townspeople are carrying torches and chasing a giant squidward 587 | ret585,msr9817,video9817,vines of sports are being played 588 | ret586,msr9816,video9816,a little girl talking to her and is scared 589 | ret587,msr9819,video9819,a reporter talks about a police incident 590 | ret588,msr9818,video9818,a woman applies makeip to her eyes in double speed 591 | ret589,msr9605,video9605,a special songs for the game 592 | ret590,msr8556,video8556,a group of people are stamp dancing on stage in front of a crowd 593 | ret591,msr8498,video8498,delicious and colorful food is in the bowl 594 | ret592,msr8499,video8499,men working in surveillance room 595 | ret593,msr8492,video8492,a man is very excited 596 | ret594,msr8493,video8493,polar bear jumps into water then plays around while people watch 597 | ret595,msr8490,video8490,the song hero by skillet with lyrics 598 | ret596,msr8491,video8491,a police officer pulls a gun at a swimming pool 599 | ret597,msr8496,video8496,a cute girl with nice headgear standing in a room is talking through a microphone 600 | ret598,msr8497,video8497,a bus crashes into a car 601 | ret599,msr8494,video8494,a girl is talking about a celebrity 602 | ret600,msr8495,video8495,food in a refrigerator is displayed on shelves in containers and on a lazy suzan 603 | ret601,msr9063,video9063,people on stage performing 604 | ret602,msr8258,video8258,woman is using a baby stroller 605 | ret603,msr8259,video8259,an interview is conducted 606 | ret604,msr8254,video8254,a woman in a purple dress is talking on a video 607 | ret605,msr8255,video8255,a man is playing piano 608 | ret606,msr8256,video8256,someone playing the game dark souls 609 | ret607,msr8257,video8257,a man is discussing some functions for a science expirement 610 | ret608,msr8250,video8250,instructional video on home improvement subjects 611 | ret609,msr8251,video8251,guy in tshirt playing guitar and singing song 612 | ret610,msr8252,video8252,bride standing with a old man in formal wear 613 | ret611,msr8253,video8253,vest of sports vines 614 | ret612,msr8474,video8474,men and women sing in a choir on stage with a piano 615 | ret613,msr8475,video8475,scrolling the the menu of movieclips with different movie trailers 616 | ret614,msr8476,video8476,women are celebrating a soccer victory 617 | ret615,msr8477,video8477,city limits photograph taken from high point in day time 618 | ret616,msr8470,video8470,high school wrestling match 619 | ret617,msr7218,video7218,episode from spongebob cartoon 620 | ret618,msr8472,video8472,persons are attending a class with laptops 621 | ret619,msr8473,video8473,a animated car going on the roads with blue mark on either sides 622 | ret620,msr7215,video7215,person playing a game 623 | ret621,msr7214,video7214,race cars of different colors lined up on a dark track 624 | ret622,msr7217,video7217,a woman is advertising a stroller 625 | ret623,msr7216,video7216,a woman giving skin care tips 626 | ret624,msr8478,video8478,a cat is licking a baby 627 | ret625,msr8479,video8479,a woman demonstrates how to cook chitlins 628 | ret626,msr7213,video7213,a woman is making lasagna 629 | ret627,msr7212,video7212,a man and a woman are walking a dog on a beach 630 | ret628,msr8786,video8786,a group of people talking about stuff 631 | ret629,msr8787,video8787,minecraft gamer puts on iron armor 632 | ret630,msr7961,video7961,a band of singers and guitarists are performing on stage 633 | ret631,msr7960,video7960,a man is running around and playing a guitar 634 | ret632,msr7967,video7967,a news anchor is interviewing a person on screen 635 | ret633,msr7966,video7966,cheese is being sliced 636 | ret634,msr7965,video7965,man driving in a car an talking about the car 637 | ret635,msr7964,video7964,the couples are kissing in the game of throne 638 | ret636,msr7590,video7590,an emotional scene of two persons where they are crying on meeting 639 | ret637,msr7969,video7969,person driving in car 640 | ret638,msr7968,video7968,a foul mouthed chef demonstrates and describes a vegetable recipe 641 | ret639,msr8788,video8788,a man plays a video game where the player has a first person perspective and shoots other characters 642 | ret640,msr8789,video8789,a girl in blue color dress wearing siting speaking and television screen with black shirt man beside still image displaying on screen 643 | ret641,msr8078,video8078,a man in his backyard talks tot he camera and is going to skin a snake 644 | ret642,msr8079,video8079,several groups of people are kayaking on a waterway 645 | ret643,msr8879,video8879,a tamil movie scene is being shown they travel through a auto riksha and speak about the importance of work 646 | ret644,msr8070,video8070,man driving a car in a video game 647 | ret645,msr8071,video8071,a man is talking about the making of the movie avatar 648 | ret646,msr8072,video8072,a man talking about human relationships 649 | ret647,msr8073,video8073,a pop singer singing while she standing on a step the video is shown double 650 | ret648,msr8074,video8074,a girl sitting in a restaurant 651 | ret649,msr8075,video8075,scenes of romantic film 652 | ret650,msr8076,video8076,a web animation of a businessman 653 | ret651,msr8077,video8077,a man playing a video game 654 | ret652,msr7854,video7854,a guy talks outside in the snow at a ski resort 655 | ret653,msr8392,video8392,a girl in black color dress wearing cloth sleeping and smoking and peeping into hole displaying on screen 656 | ret654,msr7369,video7369,a man is talking about opening a laptop case 657 | ret655,msr7368,video7368,a martial arts cartoon 658 | ret656,msr7361,video7361,a woman with a camel 659 | ret657,msr7360,video7360,a woman is showing shrimp and bunch of different other ingredients in order to cook a dish 660 | ret658,msr7363,video7363,a man loads a clip into a pistol 661 | ret659,msr7362,video7362,a person is cooking on stage 662 | ret660,msr7365,video7365,cartoon people are eating at a restaurant 663 | ret661,msr7364,video7364,a busy city street in far east is seen with people holding baskets on heads 664 | ret662,msr7367,video7367,this is a rock band music video 665 | ret663,msr7366,video7366,someone giving demo for some game and talking about that 666 | ret664,msr7134,video7134,a class is being introduced to a digital reading device 667 | ret665,msr7135,video7135,a man in a music video screams shut up a bunch of times 668 | ret666,msr9400,video9400,someone is playing a game 669 | ret667,msr8672,video8672,dog is drinking milk with baby nibble bottle 670 | ret668,msr8673,video8673,a man discusses spongebob 671 | ret669,msr8670,video8670,a black t shirted man with a hat talking about an event 672 | ret670,msr7137,video7137,bill murray is covered in frosting 673 | ret671,msr8676,video8676,a girl walking down a path 674 | ret672,msr8677,video8677,two mermaid with red hair is sitting on a rock 675 | ret673,msr8674,video8674,space explorers in red and silver suits float about in a black abyss filled with blue crystals 676 | ret674,msr8675,video8675,a man kicks a ball 677 | ret675,msr9406,video9406,a person giving his opinion on how crowded the world is 678 | ret676,msr8678,video8678,a lady is walking in the beach 679 | ret677,msr8679,video8679,a man discussed a few products 680 | ret678,msr7131,video7131,athletes are getting ready and start running for an event 681 | ret679,msr7145,video7145,a critic about wine speaks about the french wines 682 | ret680,msr7144,video7144,a group of women are rubbing oil and milk all over a woman 683 | ret681,msr8928,video8928,a woman walking along side a river in a bikini 684 | ret682,msr7146,video7146,a puppy is crawling down some stairs 685 | ret683,msr7141,video7141,a cartoon that is blurryb on a tv screen of a fashion run way 686 | ret684,msr7140,video7140,someone is adding ingredients for a batter 687 | ret685,msr7143,video7143,someone giving demo for some game 688 | ret686,msr7142,video7142,an oriental femal is dressed in a pink wig with girlish clothing and is carrying a stuffed animal 689 | ret687,msr8922,video8922,an old man shakes hands with another man and then they hug each other 690 | ret688,msr8923,video8923,women are modeling clothes 691 | ret689,msr8920,video8920,a kid unwrapping his presents 692 | ret690,msr8921,video8921,someone ends their tutorial on their computer 693 | ret691,msr8926,video8926,a man is playing a guitar with a band in a live concert 694 | ret692,msr7148,video7148,a cheif is preparing a treat 695 | ret693,msr8924,video8924,a man works on a computer s motherboard 696 | ret694,msr8925,video8925,a band plays on a stage 697 | ret695,msr7219,video7219,a man drives a red indianapolis 500 type race car around an asphalt track 698 | ret696,msr7592,video7592,a dark skinned couple make love in bed the man is on top and the woman s blouse is partly transparent 699 | ret697,msr8973,video8973,commercial for a service called eva which will help people find a vehicle 700 | ret698,msr8471,video8471,a woman in a yellow top is holding a red drink 701 | ret699,msr7649,video7649,there is a vehicle riding dangerously through forest 702 | ret700,msr7138,video7138,a man talking about the two cars he test drove 703 | ret701,msr8974,video8974,a pretty young girl talks to a man with a mustache 704 | ret702,msr8618,video8618,an exploration about the title of professional video advertisement 705 | ret703,msr8772,video8772,the man is making a sauce in the kitchen 706 | ret704,msr8816,video8816,animated cartoon character is catching the ball 707 | ret705,msr8817,video8817,a football video game is shown 708 | ret706,msr8814,video8814,the judges make a decision 709 | ret707,msr8815,video8815,a man walking into a room with two kids with red helmets 710 | ret708,msr8812,video8812,shows a globe and a bunch of people 711 | ret709,msr8813,video8813,a woman is interviewed on a tv talk show 712 | ret710,msr8810,video8810,a woman plays guitar and sings for a televised competition 713 | ret711,msr8811,video8811,the boy in karate dresswho is remembering the memorable hours with his father 714 | ret712,msr7211,video7211,people share their thoughts on a boxing match 715 | ret713,msr8818,video8818,several young girls are singing on stage 716 | ret714,msr8819,video8819,a video is shown showing different cars 717 | ret715,msr7723,video7723,a man plays an online multiplayer game and talks about how the game works 718 | ret716,msr7528,video7528,a girl singing a song and her group were playing music 719 | ret717,msr7789,video7789,a compilation of vine videos is shown 720 | ret718,msr7788,video7788,two guys are wrestling in a competition 721 | ret719,msr7787,video7787,various sports clips with music are shown 722 | ret720,msr7786,video7786,a song with skeletons dancing 723 | ret721,msr7785,video7785,person lighting a kettle 724 | ret722,msr8774,video8774,there is a guy filling a toy with cotton to play with it 725 | ret723,msr7783,video7783,long legged woman with black clothes is walking on the scene 726 | ret724,msr7782,video7782,a girl talking to her dead mom 727 | ret725,msr8727,video8727,a picture of what could be the moon or mars is on the screen 728 | ret726,msr7780,video7780,a man giving information about the mormon game 729 | ret727,msr9781,video9781,a women is explaining about the information of responders and the non responders for precision medicine 730 | ret728,msr9780,video9780,an older woman who is blind is talking to a girl named lilly 731 | ret729,msr7543,video7543,a video on how to make knots for fishing 732 | ret730,msr9782,video9782,a man talking about a womans genital problems 733 | ret731,msr7545,video7545,a person is playing a violin 734 | ret732,msr9784,video9784,sygornie weaver pointing a gun and then running 735 | ret733,msr7547,video7547,there is a woman in her roomshe is a net idol show her new vedio 736 | ret734,msr7546,video7546,a man and a woman are talking about something 737 | ret735,msr7549,video7549,a transvestite shows what she bought for her dog including shampoo and conditioner made by martha stuart 738 | ret736,msr9788,video9788,a woman with long white hair and dressed all in white is sailing through the sky over snow covered ground 739 | ret737,msr7725,video7725,young people sit around the edges of a room clapping and raising their arms while others dance in the center during a party 740 | ret738,msr8721,video8721,man in grey shirt is having an interview in the building 741 | ret739,msr9259,video9259,a car of 1970 is on the screen 742 | ret740,msr9258,video9258,a person is using a phone 743 | ret741,msr9253,video9253,people talking about their trip and how they are taken care of 744 | ret742,msr9252,video9252,garage opening for a old bug to pull out to drive away 745 | ret743,msr9251,video9251,sleeping beauty play promotion 746 | ret744,msr9250,video9250,a woman talking about a white tank top 747 | ret745,msr9257,video9257,a man puts his phone down to be charged 748 | ret746,msr9256,video9256,a woman is making playdoh 749 | ret747,msr9255,video9255,a soldier is laying down 750 | ret748,msr9254,video9254,band playing music and people dancing 751 | ret749,msr7469,video7469,a video of a young man in a white shirt inviting his colleagues to join him 752 | ret750,msr7724,video7724,a lady describes about workout and exercises for women 753 | ret751,msr7963,video7963,a machine drills holes in a section of metal piping 754 | ret752,msr7558,video7558,two boys introducing a young lady who plays the cups and sings 755 | ret753,msr8784,video8784,a woman makes crafts 756 | ret754,msr9330,video9330,a man with a guitar sings on a farm 757 | ret755,msr9331,video9331,a man drives his car down the road 758 | ret756,msr9332,video9332,a girl plays a videogame 759 | ret757,msr9333,video9333,barbecued cheese bacon burgers showcased 760 | ret758,msr9334,video9334,in game footage of a mine craft character walking up stairs 761 | ret759,msr9335,video9335,the person is frying the prawns and fish 762 | ret760,msr9336,video9336,video game of a truck driving through desert obstacles 763 | ret761,msr9337,video9337,some people are inside of a room 764 | ret762,msr9338,video9338,a girl digging in the sand 765 | ret763,msr9339,video9339,com long sheeps are in the big mountains 766 | ret764,msr8783,video8783,a lady named lizzy is speaking about movies she is wearing a very nice outfit 767 | ret765,msr8780,video8780,some people are shooting outside 768 | ret766,msr7726,video7726,a woman is singing and pouring drinks 769 | ret767,msr8781,video8781,a song from the movie beauty and the beast 770 | ret768,msr7728,video7728,a woman swings her hair in front of a large sing on a brick wall 771 | ret769,msr9028,video9028,news of marijuana business having trouble growing 772 | ret770,msr9029,video9029,a player is putting a basket ball into the basket from distance 773 | ret771,msr9024,video9024,some yellow text is on a purple and white screen 774 | ret772,msr9025,video9025,an instructional video on painting your nails 775 | ret773,msr9026,video9026,still photos from the 2002 movie the pianist are shown 776 | ret774,msr9027,video9027,a group of young people are playing 777 | ret775,msr9020,video9020,a person is looking at a camera during a wrestling event 778 | ret776,msr9021,video9021,a pitcher throws a fastball 779 | ret777,msr9022,video9022,a woman stirs up some soup sprinkles a spice in and drops a shot of liquid into it 780 | ret778,msr9023,video9023,several shots of tv shows combined were shown here 781 | ret779,msr8175,video8175,a man in a suit and a woman wearing brown giving the news 782 | ret780,msr7849,video7849,a red haired woman holds a green parrot near shelves filled with bird food 783 | ret781,msr9277,video9277,models are walking the runway as part of a fashion show 784 | ret782,msr7844,video7844,bbc talking to guys with very expensive red sports car 785 | ret783,msr8416,video8416,a video of different racially motivated protests is playing while the song everyone s a little bit racist plays 786 | ret784,msr7845,video7845,a person chops up lettuce and a strainer of kidney beans is shown 787 | ret785,msr8606,video8606,a man eats rice and a woman goes to the hospital 788 | ret786,msr8756,video8756,a woman is dressed up in face paint 789 | ret787,msr9119,video9119,a man talks about molecules and certain types of antibodies 790 | ret788,msr8319,video8319,there is a man in black is walking in to the bridge 791 | ret789,msr8318,video8318,a group discusses a man s outfit 792 | ret790,msr8315,video8315,three men were discussing the national football league 793 | ret791,msr8314,video8314,a woman dances around for a music video 794 | ret792,msr8317,video8317,an indian woman is applying makeup between her hair 795 | ret793,msr8316,video8316,a man is talking to an athlete 796 | ret794,msr8311,video8311,a man is filming as he and a woman watch the news where it shows an area filled with smoke 797 | ret795,msr8310,video8310,two wrestlers are fighting on a mat 798 | ret796,msr8313,video8313,a woman is discussing a new video game 799 | ret797,msr8312,video8312,the mountain views are from a boat on the center of a lake 800 | ret798,msr8605,video8605,all womans singing and dancing 801 | ret799,msr7843,video7843,a woman discusses how we can help children in a classroom 802 | ret800,msr9639,video9639,women poring sauce to cooking vegetables 803 | ret801,msr9600,video9600,handsome man plays guitar and sings 804 | ret802,msr9117,video9117,a person in white color dress wearing cloth speaking on topic white board explayning and many persons sitting displaying on screen 805 | ret803,msr9808,video9808,a man playing video games 806 | ret804,msr9809,video9809,man talks in front of a green bicycle 807 | ret805,msr9806,video9806,person is driving his black car fast on the street 808 | ret806,msr9807,video9807,a planet is being filmed from space outside 809 | ret807,msr9804,video9804,a display of clips of the movie there will be blood 810 | ret808,msr9805,video9805,characters from video games are dancing to old mc donald had a farm 811 | ret809,msr9802,video9802,a woman dances in the background while a guy doesn t move 812 | ret810,msr9803,video9803,two guys wrestling at an event 813 | ret811,msr9800,video9800,a car is in a wreck 814 | ret812,msr9801,video9801,selena gomez clips of her videos and her dancing with a man in a tux while she wears pink 815 | ret813,msr9603,video9603,a woman on her way out the door gets a call from a man standing in a store 816 | ret814,msr8604,video8604,a cnn report is talking about their dogs 817 | ret815,msr7444,video7444,a group of people watching a screen 818 | ret816,msr7445,video7445,a man is talking and playing a video game 819 | ret817,msr8601,video8601,a man is folding pieces of paper 820 | ret818,msr9607,video9607,a commercial for the website called eharmony 821 | ret819,msr8129,video8129,behind the scenes in a professional kitchen as the chefs work and the waiters run food can be a very noisy experience 822 | ret820,msr8269,video8269,the man thought students should be given the freedom to learn 823 | ret821,msr8268,video8268,cartoon play for kids 824 | ret822,msr9609,video9609,two astronauts experiencing a tense situation before relaxing afterwards 825 | ret823,msr8261,video8261,blonde woman with black nails is recording herself in a room 826 | ret824,msr8260,video8260,man in white hoodie turns a page to tell you about an online store the url is available in the description 827 | ret825,msr8263,video8263,a man drives aroud curves through a wooden mountainside 828 | ret826,msr8262,video8262,wrestlers are involved in a large wrestling meet 829 | ret827,msr8265,video8265,someone plays a guitar and sings on stage of a tv show 830 | ret828,msr8264,video8264,women athletes taking their positions for a running race 831 | ret829,msr8267,video8267,video of gymasts practicing to roll 832 | ret830,msr8266,video8266,a robot is seen in a movie preview 833 | ret831,msr8441,video8441,scene from a popular party 834 | ret832,msr8440,video8440,a man hits another man while wrestling 835 | ret833,msr8443,video8443,valencia vesus hokit in a wrestling match 836 | ret834,msr8442,video8442,a person covers a popular song 837 | ret835,msr8445,video8445,people enjoy the performance of singer 838 | ret836,msr8444,video8444,two ladies in a cookery show explain how to marinate chicken already cleaned and ready with salt and cilantro sprigs 839 | ret837,msr8447,video8447,a woman singing on the voice 840 | ret838,msr8446,video8446,spongebob is talking to patrick while holding a butterfly net 841 | ret839,msr7202,video7202,a group of people are dancing in a room 842 | ret840,msr7203,video7203,there is no sound while the screen shows a person playing a computer game 843 | ret841,msr7200,video7200,a female soccer player accepts a reward while being cheered on by the crowd 844 | ret842,msr7201,video7201,lady and her dogs 845 | ret843,msr7206,video7206,a helicopter is shown flying in what seems to be a war zone in syria 846 | ret844,msr7207,video7207,young men discuss and demonstrate a video game 847 | ret845,msr7204,video7204,a small boy is crying and a car is showed 848 | ret846,msr7205,video7205,hands rubbing together in the dark and band members singing and playing the guitar 849 | ret847,msr9474,video9474,a man talking about finances 850 | ret848,msr8999,video8999,showing anushka sharma bollywood actress 851 | ret849,msr9511,video9511,a person is riding red car on road 852 | ret850,msr9472,video9472,a girl wearing a dress stands to the side of the screen while lyrics to a song playing in the background appear on the other side 853 | ret851,msr7970,video7970,two child playing in the house 854 | ret852,msr8044,video8044,a goat attacks a man and the man fights back 855 | ret853,msr7972,video7972,a man shows how a video game works 856 | ret854,msr7973,video7973,someone is frying food 857 | ret855,msr7974,video7974,a woman is ripping off a man clothes 858 | ret856,msr9578,video9578,a diver goes underwater 859 | ret857,msr7976,video7976,a young boy rocks out on a guitar 860 | ret858,msr7977,video7977,someone is drawing pictures 861 | ret859,msr7978,video7978,a team with blue uniforms are playing badmitten with a team in white 862 | ret860,msr7979,video7979,people talking about a fight 863 | ret861,msr9579,video9579,two men talk with children s voices 864 | ret862,msr8049,video8049,a guy is spinning around with a bat 865 | ret863,msr8048,video8048,george lopez s family sits down for dinner 866 | ret864,msr9576,video9576,a movie scene little boys inside water flowing inside forest displaying on screen 867 | ret865,msr9577,video9577,cartoon one women in horse and speak to that calmly 868 | ret866,msr9574,video9574,in the ocean a man on a surfboard rides a wave 869 | ret867,msr7376,video7376,broth is being added to a soup pot and stirred with a rubber spatula 870 | ret868,msr7377,video7377,a guy reports on complex news 871 | ret869,msr7374,video7374,video game clip showing here different charcters 872 | ret870,msr7375,video7375,a man talks about the school system 873 | ret871,msr7372,video7372,someone is showing some drink 874 | ret872,msr7373,video7373,a solider gives a speech 875 | ret873,msr7370,video7370,boys and girls dancing and singing on beach 876 | ret874,msr7371,video7371,a tv shows review program hosts discuss about the performance and staying on air of star trek 877 | ret875,msr7732,video7732,colored lights and pictures of people fade in and out of view 878 | ret876,msr7378,video7378,a clip of soccer plays 879 | ret877,msr7733,video7733,a child is singing on stage 880 | ret878,msr7730,video7730,different women in colorful clothing are walking down a runway for a fashion show 881 | ret879,msr7939,video7939,a man and woman stand together and cook in the kitchen 882 | ret880,msr7731,video7731,fox newscasters discuss chris christie and his poll numbers 883 | ret881,msr8689,video8689,a multiplayer game of mario party is played 884 | ret882,msr8688,video8688,a person folds a paper airplane 885 | ret883,msr8687,video8687,fast moving time is shown here 886 | ret884,msr8686,video8686,a women is doing craft and talking about that 887 | ret885,msr8685,video8685,a man prepares some food in the kitchen 888 | ret886,msr8684,video8684,a person points out certain figures on a paper 889 | ret887,msr8683,video8683,two people welcome people to an episode of their show 890 | ret888,msr8682,video8682,a man talking something about the new smart phones 891 | ret889,msr8681,video8681,a young man and his dad are rubbing each other s hair it s a commercial for vodafone 892 | ret890,msr8680,video8680,a woman talking to a man in a hood 893 | ret891,msr7152,video7152,a person is swimming in some white water rapids 894 | ret892,msr7153,video7153,there is a wrestling match going on between two people 895 | ret893,msr7150,video7150,a picture of the batsman is shown and he is ready for the batting and the audience are watching the show 896 | ret894,msr7151,video7151,a man is sitting on a chair 897 | ret895,msr7156,video7156,tom jones performing live on a television show 898 | ret896,msr7157,video7157,an anime cartoon character speaks to another character 899 | ret897,msr7154,video7154,demonstration on how to prepare something using a microwave 900 | ret898,msr7155,video7155,there are two men swimming in a pond 901 | ret899,msr7158,video7158,fox news presidential debate recapping the gop debate with donald trump and ted cruz 902 | ret900,msr7159,video7159,bill murray is being interviewed by david letterman while talking about bill s past roles 903 | ret901,msr9368,video9368,someone demonstrates about the small motor uses to the video 904 | ret902,msr8863,video8863,an animated girl talks to a baby and plays with it 905 | ret903,msr8862,video8862,a woman s background voice describes a virtual scene where a dog enters a bathroom 906 | ret904,msr8861,video8861,a white male raps while another plays guitar 907 | ret905,msr8860,video8860,explainin about the scene in the net 908 | ret906,msr8867,video8867,the house has at least three small pets 909 | ret907,msr8866,video8866,a news scene of the dog s exercise and diet 910 | ret908,msr8865,video8865,a man in a kitchen is preparing pancakes he s wearing a white shirt and has black hair 911 | ret909,msr8864,video8864,a clip from fox news on the shelby north carolina shooting 912 | ret910,msr8869,video8869,a girl shows a pack of toy building blocks 913 | ret911,msr8868,video8868,a young girl sits in a room and looks into a bag 914 | ret912,msr7798,video7798,dogs and cats playing in a park 915 | ret913,msr7799,video7799,a guy chops up garlic and pours it over chicken frying in a pan 916 | ret914,msr7794,video7794,a famous tv talk show 917 | ret915,msr7795,video7795,a documentary on how to windsurf and in particular recover from a wipeout 918 | ret916,msr7796,video7796,boy playing with a dump truck 919 | ret917,msr7797,video7797,a man is swimming in the swimming pool 920 | ret918,msr7790,video7790,a guy with glasses is standing next to some signs 921 | ret919,msr7791,video7791,three kids are performing a song 922 | ret920,msr7792,video7792,screen cast of mine craft oneline 923 | ret921,msr7793,video7793,flight is shaken and the pilots trying to land the flight while they opened the air 924 | ret922,msr7333,video7333,there are pictures and quotes from george bernard shaw and vincent van gogh while a voice over artist talks about making mistakes 925 | ret923,msr9798,video9798,a woman peeling vegetables in her kitchen 926 | ret924,msr7559,video7559,a man is trying some sushi 927 | ret925,msr7556,video7556,people are playing baseball 928 | ret926,msr9797,video9797,a football player with a football 929 | ret927,msr7554,video7554,a person pointing to food on a plate 930 | ret928,msr9795,video9795,a person playing a video game and commentating 931 | ret929,msr7552,video7552,it is about a cartoon film 932 | ret930,msr9793,video9793,interview with artist shanai twain 933 | ret931,msr9790,video9790,squidward scenes playing with a lil wayne song 934 | ret932,msr9791,video9791,a woman talking about different pictures next to her 935 | ret933,msr8671,video8671,a man is yelling on the phone 936 | ret934,msr7542,video7542,a boy gets out of a play police car and talks to a girl 937 | ret935,msr9248,video9248,people are riding horses in grassland 938 | ret936,msr9249,video9249,a man is discussing oxiders in bulk form 939 | ret937,msr9240,video9240,young men in a middle of the bush almos naked and scratching themselves 940 | ret938,msr9241,video9241,japanese people laughing and dancing 941 | ret939,msr9242,video9242,a person drawling people on a canvas 942 | ret940,msr9243,video9243,they are singing a song and playing a guitar in the stage 943 | ret941,msr9244,video9244,documentary about museum in peninsula 944 | ret942,msr9245,video9245,man interviews bill murray 945 | ret943,msr9246,video9246,a beautiful sceneary is shown and the sight seeing through the train is amazing and place is so lovely to watch 946 | ret944,msr9247,video9247,in an animated scene two characters are outside under a menacing sky glaring at each other 947 | ret945,msr7710,video7710,a man playing video games 948 | ret946,msr9489,video9489,a man with brown hair is singing a song 949 | ret947,msr7147,video7147,a man cooks burgers and bacon on a grill 950 | ret948,msr8929,video8929,a person is playing a video game 951 | ret949,msr9309,video9309,there is someone serving a crab dish 952 | ret950,msr9308,video9308,the models walk the catwalk 953 | ret951,msr7544,video7544,a car goes racing down the road 954 | ret952,msr9305,video9305,a football match between usa and japan 955 | ret953,msr9304,video9304,three young people sing on stage 956 | ret954,msr9307,video9307,the lady came their room go to the kitchen try to make their food 957 | ret955,msr9306,video9306,a cartoon with violence 958 | ret956,msr9301,video9301,how to make a galaxy fighter 959 | ret957,msr9300,video9300,a squid is talking 960 | ret958,msr9303,video9303,children singing a song as a group on a stage 961 | ret959,msr9302,video9302,a guy trying to climb on a rope while another guy timing him 962 | ret960,msr7209,video7209,someone is showing a car features 963 | ret961,msr7149,video7149,a trailer for a film with words over the top 964 | ret962,msr8927,video8927,a news story about hillary clinton 965 | ret963,msr7632,video7632,a man walks between two brick buildings a dusk 966 | ret964,msr9019,video9019,two people playing basketball and the one with a hat makes every shot 967 | ret965,msr9018,video9018,people scoring in sports videos 968 | ret966,msr7478,video7478,a football video game is being played 969 | ret967,msr9011,video9011,a woman in black dress and a man in a black suit sits together 970 | ret968,msr9010,video9010,a group of zombis walking towards a very tall building with a globe in front of it 971 | ret969,msr9013,video9013,basketball highlights of players scoring 972 | ret970,msr9012,video9012,a trailer for a movie with a girl knocking on the wall 973 | ret971,msr9015,video9015,several enormous juicy burgers are all stacked together 974 | ret972,msr9014,video9014,a cartoon plane picking up fruit and putting it in trucks 975 | ret973,msr9017,video9017,the man is driving his motorbike fast and having problems on the race 976 | ret974,msr9016,video9016,all persons are wearing bikini dresses and playing in sea 977 | ret975,msr9509,video9509,a women in blue shows two pink lipsticks 978 | ret976,msr7548,video7548,people are stoppped by military in the street 979 | ret977,msr9734,video9734,a cartoon shows two dogs talking to a bird 980 | ret978,msr9834,video9834,old black and white films are shown and the history of motion pictures is being dealt with 981 | ret979,msr9735,video9735,many potatoes get washed and move through machinery 982 | ret980,msr8752,video8752,a news reader describing about a news 983 | ret981,msr7064,video7064,a guy wearing a black shirt talks and shows a chart on the tv screen 984 | ret982,msr8308,video8308,a man is playing baseball 985 | ret983,msr8309,video8309,there is a wrestling match 986 | ret984,msr8302,video8302,a man talks about kim chi to a camera 987 | ret985,msr8303,video8303,a cartoon announcing the next event in a fake show 988 | ret986,msr8300,video8300,a woman pours water into her pot of meat then tomato sauce and stirs it all around while talking 989 | ret987,msr8301,video8301,man pretends to be two different people 990 | ret988,msr8306,video8306,tourists walking around the mount fuji visitor center 991 | ret989,msr8307,video8307,calm pond with lush green hills lining the background is shown 992 | ret990,msr8304,video8304,a man dressed as a woman in a spanish language tv program 993 | ret991,msr8305,video8305,in a music video a man is laying with women while singing 994 | ret992,msr7060,video7060,a man extinguishes a fire outside 995 | ret993,msr7061,video7061,goldfish chase each other around a blue tank to music 996 | ret994,msr9575,video9575,the woman in the purple blouse talk as the shelves are behind her 997 | ret995,msr9879,video9879,the young performer impressed his audience 998 | ret996,msr9878,video9878,some peole are sitting in hall 999 | ret997,msr9873,video9873,man shows how to prepare pizza 1000 | ret998,msr9872,video9872,someone is playing a first person shooter game and is making jokes 1001 | ret999,msr9871,video9871,bearded guy in grey tshirt talking to the camera 1002 | --------------------------------------------------------------------------------