├── asset
    └── main.png
├── metrics.py
├── utils.py
├── README.md
├── loader
    ├── ucf_loader.py
    ├── hmdb_loader.py
    ├── msrvtt_loader.py
    ├── crosstask_loader.py
    ├── youcook_loader.py
    └── howto100m_loader.py
├── loss.py
├── args.py
├── src
    ├── eval_youcook.py
    ├── eval_msrvtt.py
    ├── eval_crosstask.py
    ├── eval_ucf.py
    ├── eval_hmdb.py
    └── train.py
├── s3dg.py
├── soft_dtw.py
└── data
    └── msrvtt_test.csv


/asset/main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KoDohwan/VT-TWINS/HEAD/asset/main.png


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import unicode_literals
 4 | from __future__ import print_function
 5 | 
 6 | import numpy as np
 7 | 
 8 | def retrieval(x):
 9 |     sx = np.sort(-x, axis=1)
10 |     d = np.diag(-x)
11 |     d = d[:, np.newaxis]
12 |     ind = sx - d
13 |     ind = np.where(ind == 0)
14 |     ind = ind[1]
15 |     metrics = {}
16 |     metrics['R1'] = float(np.sum(ind == 0)) / len(ind) * 100
17 |     metrics['R5'] = float(np.sum(ind < 5)) / len(ind) * 100
18 |     metrics['R10'] = float(np.sum(ind < 10)) / len(ind) * 100
19 |     metrics['MR'] = np.median(ind) + 1
20 |     return metrics
21 | 
22 | def ctr(x):
23 |     sx = np.sort(-x, axis=1)
24 |     d = np.diag(-x)
25 |     d = d[:, np.newaxis]
26 |     ind = sx - d
27 |     # ind = np.where(ind == 0)
28 |     # ind = [(i, j) for i, j in zip(ind[0], ind[1])]
29 |     
30 |     # new_ind = []
31 |     # for i in ind:
32 |     #     ind_set = set([j[0] for j in new_ind])
33 |     #     if i[0] not in ind_set:
34 |     #         new_ind.append(i)
35 |     # ind = np.array([i[1] for i in new_ind])
36 |     
37 |     num = 0.
38 |     count = 0.
39 |     for i in ind:
40 |         if i[0] == 0:
41 |             num += 1
42 |         count += 1
43 |     
44 |     metrics = {}
45 |     # metrics['CTR'] = float(np.sum(ind == 0)) / len(ind) * 100
46 |     metrics['CTR'] = num / count * 100
47 |     return metrics['CTR']


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | import torch
 4 | import torch.distributed as dist
 5 | from torch.optim.lr_scheduler import LambdaLR
 6 | 
 7 | 
 8 | class AllGather(torch.autograd.Function):
 9 |     """An autograd function that performs allgather on a tensor."""
10 | 
11 |     @staticmethod
12 |     def forward(ctx, tensor, args):
13 |         output = [torch.empty_like(tensor) for _ in range(args.world_size)]
14 |         dist.all_gather(output, tensor)
15 |         ctx.rank = args.rank
16 |         ctx.batch_size = tensor.shape[0]
17 |         return torch.cat(output, 0)
18 | 
19 |     @staticmethod
20 |     def backward(ctx, grad_output):
21 |         return (
22 |             grad_output[ctx.batch_size * ctx.rank : ctx.batch_size * (ctx.rank + 1)],
23 |             None,
24 |         )
25 | 
26 | def get_cosine_schedule_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_cycles=0.5, last_epoch=-1):
27 |     """ Create a schedule with a learning rate that decreases following the
28 |     values of the cosine function between 0 and `pi * cycles` after a warmup
29 |     period during which it increases linearly between 0 and 1.
30 |     """
31 | 
32 |     def lr_lambda(current_step):
33 |         if current_step < num_warmup_steps:
34 |             return float(current_step) / float(max(1, num_warmup_steps))
35 |         progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
36 |         return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
37 | 
38 |     return LambdaLR(optimizer, lr_lambda, last_epoch)
39 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # VT-TWINS
 2 | 
 3 | This repositoriy is the implementation of "[Video-Text Representation Learning via Differentiable Weak Temporal Alignment](https://openaccess.thecvf.com/content/CVPR2022/papers/Ko_Video-Text_Representation_Learning_via_Differentiable_Weak_Temporal_Alignment_CVPR_2022_paper.pdf) (CVPR 2022)".
 4 | 
 5 | <div align="center">
 6 |   <img src="asset/main.png" width="900px" />
 7 | </div>
 8 | 
 9 | ## Preparation
10 | 
11 | ### Requirements
12 | - Python 3
13 | - PyTorch (>= 1.0)
14 | - [python-ffmpeg](https://github.com/kkroening/ffmpeg-python) with ffmpeg 
15 | - pandas
16 | - numpy
17 | - tqdm
18 | - scikit-learn
19 | - numba 0.53.1
20 | 
21 | ### Dataset
22 | The annotation files (.csv) of all datasets are in './data'. If you download the downstream datasets, place the files as follows:
23 | ```
24 | data
25 |  |─ downstream
26 |  │   |─ ucf
27 |  │   │   └─ ucf101
28 |  |   │       |─ label1
29 |  |   │           |─ video1.mp4
30 |  |   │           :
31 |  |   │       :
32 |  |   |─ hmdb
33 |  |   │   |─ label1
34 |  |   │   │   |─ video1.avi
35 |  |   │   │   :
36 |  |   │   :
37 |  |   |─ youcook
38 |  |   │   |─ task1
39 |  |   │   │   |─ video1.mp4
40 |  |   │   │   :
41 |  |   │   :
42 |  |   |─ msrvtt
43 |  |   │   └─ TestVideo
44 |  |   │       |─ video1.mp4
45 |  |   │       :
46 |  |   └─ crosstask
47 |  |       └─ videos
48 |  |           |─ 105222
49 |  |           │   |─ 4K4PnQ66LQ8.mp4
50 |  |           │   :
51 |  |           :
52 |  ```
53 | 
54 | ### Pretrained Weight
55 | The pretrained weight of our model, word2vec, and the tokenizer can be found in [here](https://drive.google.com/drive/folders/16QH4C6Sr6ptGp-wEnVsAwZem-kp-uLkt?usp=sharing). Place the pretrained weight of our model in the './checkpoint', and word2vec and the tokenizer in the './data'.
56 | 
57 | ## Evaluation
58 | 
59 | ### Action Recognition on UCF101
60 | ```
61 | python src/eval_ucf.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar
62 | ```
63 | 
64 | ### Action Recognition on HMDB
65 | ```
66 | python src/eval_hmdb.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar
67 | ```
68 | 
69 | ### Text-to-Video Retrieval on YouCook2
70 | ```
71 | python src/eval_youcook.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar
72 | ```
73 | 
74 | ### Text-to-Video Retrieval on MSRVTT
75 | ```
76 | python src/eval_msrvtt.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar
77 | ```
78 | 
79 | ### Action Step Localization on CrossTask
80 | ```
81 | python src/eval_crosstask.py --pretrain_cnn_path ./checkpoint/pretrained.pth.tar
82 | ```
83 | 
84 | ## Citation
85 | ```
86 | @inproceedings{ko2022video,
87 |   title={Video-Text Representation Learning via Differentiable Weak Temporal Alignment},
88 |   author={Ko, Dohwan and Choi, Joonmyung and Ko, Juyeon and Noh, Shinyeong and On, Kyoung-Woon and Kim, Eun-Sol and Kim, Hyunwoo J},
89 |   booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
90 |   year={2022}
91 | }
92 | ```


--------------------------------------------------------------------------------
/loader/ucf_loader.py:
--------------------------------------------------------------------------------
 1 | import torch as th
 2 | from torch.utils.data import Dataset
 3 | import pandas as pd
 4 | import os
 5 | import numpy as np
 6 | import random
 7 | import ffmpeg
 8 | import time
 9 | import re
10 | import glob
11 | 
12 | 
13 | class UCF_DataLoader(Dataset):
14 |     """HMDB Video-Text loader."""
15 | 
16 |     def __init__(
17 |             self,
18 |             data,
19 |             video_root='',
20 |             num_clip=4,
21 |             num_frames=32,
22 |             size=224,
23 |             with_flip=True,
24 |             crop_only=False,
25 |             center_crop=True,
26 |     ):
27 |         """
28 |         Args:
29 |         """
30 |         assert isinstance(size, int)
31 |         self.data = pd.read_csv(data)
32 |         self.video_root = video_root
33 |         self.size = size
34 |         self.num_frames = num_frames
35 |         self.num_clip = num_clip
36 |         self.crop_only = crop_only
37 |         self.center_crop = center_crop
38 |         self.with_flip = with_flip
39 | 
40 |     def __len__(self):
41 |         return len(self.data)
42 | 
43 |     def _get_video(self, video_path, num_clip, flip=False):
44 |         cmd = (
45 |             ffmpeg
46 |             .input(video_path)
47 |         )
48 |         if self.center_crop:
49 |             aw, ah = 0.5, 0.5
50 |         else:
51 |             aw, ah = random.uniform(0, 1), random.uniform(0, 1)
52 |         if self.crop_only:
53 |             cmd = (
54 |                 cmd.crop('(iw - {})*{}'.format(self.size, aw),
55 |                          '(ih - {})*{}'.format(self.size, ah),
56 |                          str(self.size), str(self.size))
57 |             )
58 |         else:
59 |             cmd = (
60 |                 cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
61 |                          '(ih - min(iw,ih))*{}'.format(ah),
62 |                          'min(iw,ih)',
63 |                          'min(iw,ih)')
64 |                 .filter('scale', self.size, self.size)
65 |             )
66 |         out, _ = (
67 |             cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
68 |             .run(capture_stdout=True, quiet=True)
69 |         )
70 |         video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
71 |         video = th.from_numpy(video)
72 |         video = video.permute(3, 0, 1, 2)
73 |         if video.shape[1] < self.num_frames:
74 |             zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
75 |             video = th.cat((video, zeros), axis=1)
76 |         output = th.zeros(num_clip, 3, self.num_frames, self.size, self.size)
77 |         start_ind = np.linspace(0, video.shape[1] - self.num_frames, num_clip, dtype=int) 
78 |         for i, s in enumerate(start_ind):
79 |             output[i] = video[:, s:s+self.num_frames] 
80 |         if flip:
81 |             video = th.cat((output, th.flip(output, [4])), dim=0) 
82 |         return output
83 | 
84 |     def __getitem__(self, idx):
85 |         video_id = self.data['video_id'].values[idx]
86 |         label = self.data['label'].values[idx]
87 |         split1 = self.data['split1'].values[idx]
88 |         split2 = self.data['split2'].values[idx]
89 |         split3 = self.data['split3'].values[idx]
90 |         video_path = os.path.join(self.video_root, 'ucf', 'ucf101', video_id)
91 |         if not(os.path.isfile(video_path)):
92 |             print(video_path)
93 |             raise ValueError
94 |         video = self._get_video(video_path, self.num_clip, flip=self.with_flip)
95 |         return {'video': video, 'label': label, 'split1': split1, 'split2': split2, 'split3': split3}
96 | 
97 | 


--------------------------------------------------------------------------------
/loss.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from soft_dtw import SoftDTW
 4 | import numpy as np
 5 | from itertools import permutations
 6 | 
 7 | class S2DTW(torch.nn.Module):
 8 |     def __init__(self, args):
 9 |         super(S2DTW, self).__init__()
10 |         self.args = args
11 |         self.sdtw = SoftDTW(use_cuda=True, gamma=1e-1, dist_func='negative_dot')
12 |         self.tda = TDA(self.args)
13 |         
14 |     def video_text(self, video_embd, text_embd):
15 |         b, n, d = video_embd.shape
16 |         pos = -self.sdtw(video_embd, text_embd)
17 |         video_embd_row = video_embd.unsqueeze(0).expand(b, b, n ,d).reshape(-1, n ,d)
18 |         text_embd_col = text_embd.unsqueeze(1).expand(b, b, n ,d).reshape(-1, n, d)
19 |         neg = -self.sdtw(video_embd_row, text_embd_col).reshape(b, b)
20 |         neg = torch.logsumexp(neg, 1)
21 |         loss = torch.mean(neg - pos)
22 |         return loss
23 | 
24 |     def forward(self, video_embd, text_embd):
25 |         # video_embd, text_embd = self.tda(video_embd, text_embd)
26 |         loss = self.video_text(video_embd, text_embd)
27 |         return loss
28 |     
29 | class TDA(torch.nn.Module):
30 |     def __init__(self, args):
31 |         super(TDA, self).__init__()
32 |         self.args = args
33 |         self.num_clip = args.num_clip
34 |         self.n = self.num_clip * self.num_clip
35 |         self.perm = self.generate_permutations(self.num_clip).cuda()
36 |         self.num_perm = self.perm.shape[0]
37 |         self.softmin = nn.Softmin(dim=1)
38 |         
39 |     def negative_dot_product(self, x, y):
40 |         z = torch.matmul(x, y.transpose(1, 2))
41 |         return -z
42 |         
43 |     def check_temporal_condition(self, p):
44 |         for i in range(len(p)):
45 |             if abs(p[i] - i) > 2:
46 |                 return False
47 |         return True
48 |     
49 |     def generate_permutations(self, num_clip):
50 |         perm = permutations([i for i in range(num_clip)])
51 |         temporal_condition_perm = []
52 |         for p in perm:
53 |             if self.check_temporal_condition(p):
54 |                 temporal_condition_perm.append(p)
55 |         temporal_condition_perm = torch.tensor(temporal_condition_perm)
56 |         return temporal_condition_perm
57 |     
58 |     def generate_distribution(self, embd):
59 |         b = embd.shape[0]
60 |         self_similarity = self.negative_dot_product(embd, embd).detach()
61 |         self_similarity = self_similarity.unsqueeze(1)
62 |         self.perm_ = self.perm.unsqueeze(0).unsqueeze(3).repeat(b, 1, 1, self.num_clip)
63 |         perm_similarity = self_similarity.repeat(1, self.num_perm, 1, 1)
64 |         perm_similarity = torch.gather(torch.gather(perm_similarity, 2, self.perm_), 3, self.perm_.transpose(2, 3))
65 |         distribution = torch.norm(self_similarity - perm_similarity, p=2, dim=(2, 3))
66 |         distribution = self.softmin(distribution * 50)
67 |         distribution = torch.distributions.Categorical(distribution)
68 |         return distribution
69 |         
70 |         
71 |     def forward(self, video_embd, text_embd):
72 |         b, d = video_embd.shape[0], video_embd.shape[2]
73 |         distribution_video = self.generate_distribution(video_embd)
74 |         distribution_text = self.generate_distribution(text_embd)
75 |         self.perm_ = self.perm.unsqueeze(0).repeat(b, 1, 1)
76 |         perm_video = distribution_video.sample().unsqueeze(1).unsqueeze(2).repeat(1, 1, self.num_clip)
77 |         perm_video = torch.gather(self.perm_, 1, perm_video).squeeze(1)
78 |         perm_text = distribution_text.sample().unsqueeze(1).unsqueeze(2).repeat(1, 1, self.num_clip)
79 |         perm_text = torch.gather(self.perm_, 1, perm_text).squeeze(1)
80 |         video_embd = torch.gather(video_embd, 1, perm_video.unsqueeze(2).repeat(1, 1, d))
81 |         text_embd = torch.gather(text_embd, 1, perm_text.unsqueeze(2).repeat(1, 1, d))
82 |         return video_embd, text_embd


--------------------------------------------------------------------------------
/loader/hmdb_loader.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch as th
  3 | from torch.utils.data import Dataset
  4 | import pandas as pd
  5 | import os
  6 | import numpy as np
  7 | import random
  8 | import ffmpeg
  9 | import time
 10 | import re
 11 | import glob
 12 | 
 13 | 
 14 | class HMDB_DataLoader(Dataset):
 15 |     """HMDB Video-Text loader."""
 16 | 
 17 |     def __init__(
 18 |             self,
 19 |             data,
 20 |             video_root='',
 21 |             num_clip=4,
 22 |             num_frames=32,
 23 |             size=224,
 24 |             with_flip=True,
 25 |             crop_only=False,
 26 |             center_crop=True,
 27 |     ):
 28 |         """
 29 |         Args:
 30 |         """
 31 |         assert isinstance(size, int)
 32 |         self.data = pd.read_csv(data)
 33 |         self.video_root = video_root
 34 |         self.size = size
 35 |         self.num_frames = num_frames
 36 |         self.num_clip = num_clip
 37 |         self.crop_only = crop_only
 38 |         self.center_crop = center_crop
 39 |         self.with_flip = with_flip
 40 |         self.label_dict = {'brush_hair': 0, 'cartwheel': 1, 'catch': 2, 'chew': 3, 'clap': 4, 'climb': 5, 'climb_stairs': 6, 'dive': 7, 'draw_sword': 8, 
 41 |                            'dribble': 9, 'drink': 10, 'eat': 11, 'fall_floor': 12, 'fencing': 13, 'flic_flac': 14, 'golf': 15, 'handstand': 16, 'hit': 17, 
 42 |                            'hug': 18, 'jump': 19, 'kick': 20, 'kick_ball': 21, 'kiss': 22, 'laugh': 23, 'pick': 24, 'pour': 25, 'pullup': 26, 'punch': 27, 
 43 |                            'push': 28, 'pushup': 29, 'ride_bike': 30, 'ride_horse': 31, 'run': 32, 'shake_hands': 33, 'shoot_ball': 34, 'shoot_bow': 35, 
 44 |                            'shoot_gun': 36, 'sit': 37, 'situp': 38, 'smile': 39, 'smoke': 40, 'somersault': 41, 'stand': 42, 'swing_baseball': 43, 
 45 |                            'sword': 44, 'sword_exercise': 45, 'talk': 46, 'throw': 47, 'turn': 48, 'walk': 49, 'wave': 50}
 46 | 
 47 | 
 48 |     def __len__(self):
 49 |         return len(self.data)
 50 | 
 51 |     def _get_video(self, video_path, num_clip, flip=False):
 52 |         cmd = (
 53 |             ffmpeg
 54 |             .input(video_path)
 55 |         )
 56 |         if self.center_crop:
 57 |             aw, ah = 0.5, 0.5
 58 |         else:
 59 |             aw, ah = random.uniform(0, 1), random.uniform(0, 1)
 60 |         if self.crop_only:
 61 |             cmd = (
 62 |                 cmd.crop('(iw - {})*{}'.format(self.size, aw),
 63 |                          '(ih - {})*{}'.format(self.size, ah),
 64 |                          str(self.size), str(self.size))
 65 |             )
 66 |         else:
 67 |             cmd = (
 68 |                 cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
 69 |                          '(ih - min(iw,ih))*{}'.format(ah),
 70 |                          'min(iw,ih)',
 71 |                          'min(iw,ih)')
 72 |                 .filter('scale', self.size, self.size)
 73 |             )
 74 |         out, _ = (
 75 |             cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
 76 |             .run(capture_stdout=True, quiet=True)
 77 |         )
 78 |         video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
 79 |         video = th.from_numpy(video)
 80 |         video = video.permute(3, 0, 1, 2)
 81 |         if video.shape[1] < self.num_frames:
 82 |             zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
 83 |             video = th.cat((video, zeros), axis=1)
 84 |         output = th.zeros(num_clip, 3, self.num_frames, self.size, self.size)
 85 |         start_ind = np.linspace(0, video.shape[1] - self.num_frames, num_clip, dtype=int) 
 86 |         for i, s in enumerate(start_ind):
 87 |             output[i] = video[:, s:s+self.num_frames] 
 88 |         if flip:
 89 |             video = th.cat((output, th.flip(output, [4])), dim=0) 
 90 |         return output
 91 | 
 92 |     def __getitem__(self, idx):
 93 |         video_id = self.data['video_id'].values[idx]
 94 |         label = self.data['label'].values[idx]
 95 |         split1 = self.data['split1'].values[idx]
 96 |         split2 = self.data['split2'].values[idx]
 97 |         split3 = self.data['split3'].values[idx]
 98 |         video_path = os.path.join(self.video_root, 'hmdb', label[:-5], video_id)
 99 |         if not(os.path.isfile(video_path)):
100 |             raise ValueError
101 |         video = self._get_video(video_path, self.num_clip, flip=self.with_flip)
102 |         return {'video': video, 'label': self.label_dict[label[:-5]], 'split1': split1, 'split2': split2, 'split3': split3}
103 | 
104 | 


--------------------------------------------------------------------------------
/args.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | def get_args(description='VT-TWINS'):
 4 |     parser = argparse.ArgumentParser(description=description)
 5 |     parser.add_argument('--train_csv', type=str, default='./data/all_videos.csv', help='train csv')
 6 |     parser.add_argument('--video_path', type=str, default='./data/videos', help='video_path')
 7 |     parser.add_argument('--caption_root', type=str, default='./data/caption_json', help='video_path')
 8 |     parser.add_argument('--word2vec_path', type=str, default='./data/word2vec.pth', help='')
 9 |     parser.add_argument('--eval_video_root', type=str, default='./data/downstream', help='root folder for the video at for evaluation')
10 |     parser.add_argument('--checkpoint_root', type=str, default='checkpoint', help='checkpoint dir root')
11 |     parser.add_argument('--log_root', type=str, default='log', help='log dir root')
12 |     parser.add_argument('--checkpoint_dir', type=str, default='', help='checkpoint model folder')
13 |     parser.add_argument('--optimizer', type=str, default='adam', help='opt algorithm')
14 |     parser.add_argument('--weight_init', type=str, default='uniform', help='CNN weights inits')
15 |     parser.add_argument('--num_thread_reader', type=int, default=4, help='')
16 |     parser.add_argument('--num_class', type=int, default=512, help='upper epoch limit')
17 |     parser.add_argument('--num_clip', type=int, default=8, help='num clips')
18 |     parser.add_argument('--batch_size', type=int, default=16, help='batch size')
19 |     parser.add_argument('--num_windows_test', type=int, default=10, help='number of testing windows')
20 |     parser.add_argument('--batch_size_val', type=int, default=10, help='batch size eval')
21 |     parser.add_argument('--momemtum', type=float, default=0.9, help='SGD momemtum')
22 |     parser.add_argument('--n_display', type=int, default=400, help='Information display frequence')
23 |     parser.add_argument('--num_frames', type=int, default=32, help='random seed')
24 |     parser.add_argument('--video_size', type=int, default=224, help='random seed')
25 |     parser.add_argument('--crop_only', type=int, default=1, help='random seed')
26 |     parser.add_argument('--centercrop', type=int, default=0, help='random seed')
27 |     parser.add_argument('--random_flip', type=int, default=1, help='random seed')
28 |     parser.add_argument('--verbose', type=int, default=1, help='')
29 |     parser.add_argument('--warmup_steps', type=int, default=100000, help='')
30 |     parser.add_argument('--min_time', type=float, default=5.0, help='')
31 |     parser.add_argument('--pretrain_cnn_path', type=str, default='', help='')
32 |     parser.add_argument('--fps', type=int, default=10, help='')
33 |     parser.add_argument('--cudnn_benchmark', type=int, default=0, help='')
34 |     parser.add_argument('--epochs', default=300, type=int, metavar='N', help='number of total epochs to run')
35 |     parser.add_argument('--start-epoch', default=0, type=int, metavar='N', help='manual epoch number (useful on restarts)')
36 |     parser.add_argument('--lr', '--learning-rate', default=0.001, type=float, metavar='LR', help='initial learning rate', dest='lr')
37 |     parser.add_argument('--momentum', default=0.9, type=float, metavar='M', help='momentum')
38 |     parser.add_argument('--resume', dest='resume', action='store_true', help='resume training from last checkpoint')
39 |     parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true', help='evaluate model on validation set')
40 |     parser.add_argument('--pretrained', dest='pretrained', action='store_true', help='use pre-trained model')
41 |     parser.add_argument('--pin_memory', dest='pin_memory', action='store_true', help='use pin_memory')
42 |     parser.add_argument('--world-size', default=-1, type=int, help='number of nodes for distributed training')
43 |     parser.add_argument('--rank', default=-1, type=int, help='node rank for distributed training')
44 |     parser.add_argument('--dist-file', default='dist-file', type=str, help='url used to set up distributed training')
45 |     parser.add_argument('--dist-url', default='tcp://111.111.111.111:12345', type=str, help='url used to set up distributed training')
46 |     parser.add_argument('--dist-backend', default='nccl', type=str, help='distributed backend')
47 |     parser.add_argument('--seed', default=1, type=int, help='seed for initializing training. ')
48 |     parser.add_argument('--gpu', default=None, type=int, help='GPU id to use.')
49 |     parser.add_argument('--multiprocessing-distributed', action='store_true', help='Use multi-processing distributed training to launch N processes per node, '
50 |                         'which has N GPUs. This is the fastest way to use PyTorch for either single node or multi node data parallel training')
51 |     args = parser.parse_args()
52 |     return args
53 | 


--------------------------------------------------------------------------------
/src/eval_youcook.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.simplefilter("ignore", UserWarning)
  3 | import os
  4 | import random
  5 | import socket
  6 | import time
  7 | import sys
  8 | 
  9 | root_path = os.getcwd()
 10 | sys.path.append(root_path)
 11 | import torch
 12 | import torch.optim as optim
 13 | import torch.nn.functional as F
 14 | import torch.utils.data
 15 | import torch.multiprocessing as mp
 16 | import torch.distributed as dist
 17 | import torch.backends.cudnn as cudnn
 18 | 
 19 | from metrics import retrieval
 20 | from args import get_args
 21 | from loader.youcook_loader import Youcook_DataLoader
 22 | from s3dg import S3D
 23 | from tqdm import tqdm
 24 | import numpy as np
 25 | import time
 26 | from utils import AllGather
 27 | allgather = AllGather.apply
 28 | 
 29 | def main(args):
 30 |     model = deploy_model(args)
 31 |     test_dataset = Youcook_DataLoader(data='./data/validation_youcook.csv', num_clip=args.num_windows_test,
 32 |                                 video_root=args.eval_video_root, fps=args.fps, num_frames=args.num_frames, size=args.video_size, crop_only=False,
 33 |                                 center_crop=True,)
 34 |     test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
 35 |     test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 
 36 |                                             num_workers=args.num_thread_reader, sampler=test_sampler)
 37 | 
 38 |     all_video_embd, all_text_embd = test(test_loader, model, args)
 39 |     if args.gpu == 0:
 40 |         t2v = retrieval(np.dot(all_text_embd, all_video_embd.T))
 41 |         v2t = retrieval(np.dot(all_video_embd, all_text_embd.T))
 42 |         print('YouCook2')
 43 |         print(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}")
 44 |         print(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}")
 45 |         with open('result.txt', 'a') as f:
 46 |             f.write('YouCook2\n')
 47 |             f.write(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}\n")
 48 |             f.write(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}\n")
 49 | 
 50 | def test(test_loader, model, args):
 51 |     all_text_embd = []
 52 |     all_video_embd = []
 53 |     with torch.no_grad():
 54 |         for i_batch, data in enumerate(tqdm(test_loader)):
 55 |             text = data['text'].cuda()
 56 |             video = data['video'].float().cuda()
 57 |             video = video / 255.0
 58 |             video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5])
 59 |             video_embd, text_embd = model(video, text)
 60 |             video_embd = video_embd.view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1])
 61 |             video_embd = video_embd.mean(dim=1)
 62 |             all_text_embd.append(text_embd)
 63 |             all_video_embd.append(video_embd)
 64 |     all_text_embd = torch.cat(all_text_embd, dim=0)
 65 |     all_video_embd = torch.cat(all_video_embd, dim=0)
 66 |     all_video_embd = allgather(all_video_embd, args)
 67 |     all_text_embd = allgather(all_text_embd, args)
 68 |     return all_video_embd.cpu().numpy(), all_text_embd.cpu().numpy()
 69 |     
 70 | 
 71 | def deploy_model(args):
 72 |     checkpoint_path = args.pretrain_cnn_path
 73 |     print("=> loading checkpoint '{}'".format(checkpoint_path))
 74 |     checkpoint = torch.load(checkpoint_path, map_location='cpu')
 75 |     torch.cuda.set_device(args.gpu)
 76 |     model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path)
 77 |     model.cuda(args.gpu)
 78 |     checkpoint_module = {k[7:]:v for k,v in checkpoint.items()}
 79 |     model.load_state_dict(checkpoint_module)
 80 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
 81 |     model.eval()
 82 |     
 83 |     print(f'Model Loaded on GPU {args.gpu}')
 84 |     return model
 85 | 
 86 | def main_worker(gpu, ngpus_per_node, main, args):
 87 |     cudnn.benchmark = True
 88 |     args.gpu = gpu
 89 |     args.rank = gpu
 90 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 91 |     s.connect(("8.8.8.8", 80))
 92 |     ip = s.getsockname()[0]
 93 |     args.dist_url = f'tcp://{ip}:12345'
 94 |     dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu)
 95 |     main(args)
 96 | 
 97 | def spawn_workers(main, args):
 98 |     ngpus_per_node = 8
 99 |     args.world_size = 8
100 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args))
101 | 
102 | if __name__ == "__main__":
103 |     args = get_args()
104 |     
105 |     
106 |     assert args.eval_video_root != ''
107 |     spawn_workers(main, args)


--------------------------------------------------------------------------------
/src/eval_msrvtt.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.simplefilter("ignore", UserWarning)
  3 | import os
  4 | import random
  5 | import socket
  6 | import time
  7 | import sys
  8 | 
  9 | root_path = os.getcwd()
 10 | sys.path.append(root_path)
 11 | import torch
 12 | import torch.optim as optim
 13 | import torch.nn.functional as F
 14 | import torch.utils.data
 15 | import torch.multiprocessing as mp
 16 | import torch.distributed as dist
 17 | import torch.backends.cudnn as cudnn
 18 | 
 19 | from metrics import retrieval
 20 | from args import get_args
 21 | from loader.msrvtt_loader import MSRVTT_DataLoader
 22 | from s3dg import S3D
 23 | from tqdm import tqdm
 24 | import numpy as np
 25 | import time
 26 | from utils import AllGather
 27 | allgather = AllGather.apply
 28 | 
 29 | def main(args):
 30 |     model = deploy_model(args)
 31 |     test_dataset = MSRVTT_DataLoader(data='./data/msrvtt_test.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root, 
 32 |                                      fps=args.fps, num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True,)
 33 |     test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
 34 |     test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 
 35 |                                             num_workers=args.num_thread_reader, sampler=test_sampler)
 36 | 
 37 |     all_video_embd, all_text_embd = test(test_loader, model, args)
 38 |     if args.gpu == 0:
 39 |         t2v = retrieval(np.dot(all_text_embd, all_video_embd.T))
 40 |         v2t = retrieval(np.dot(all_video_embd, all_text_embd.T))
 41 |         print('MSRVTT')
 42 |         print(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}")
 43 |         print(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}")
 44 |         with open('result.txt', 'a') as f:
 45 |             f.write('MSRVTT\n')
 46 |             f.write(f"R@1: {t2v['R1']:.2f} - R@5: {t2v['R5']:.2f} - R@10: {t2v['R10']:.2f} - Median R: {t2v['MR']}\n")
 47 |             f.write(f"R@1: {v2t['R1']:.2f} - R@5: {v2t['R5']:.2f} - R@10: {v2t['R10']:.2f} - Median R: {v2t['MR']}\n")
 48 | 
 49 | def test(test_loader, model, args):
 50 |     all_text_embd = []
 51 |     all_video_embd = []
 52 |     with torch.no_grad():
 53 |         for i_batch, data in enumerate(tqdm(test_loader)):
 54 |             text = data['text'].cuda()
 55 |             video = data['video'].float().cuda()
 56 |             video = video / 255.0
 57 |             video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5])
 58 |             video_embd, text_embd = model(video, text)
 59 |             video_embd = video_embd.view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1])
 60 |             video_embd = video_embd.mean(dim=1)
 61 |             all_text_embd.append(text_embd)
 62 |             all_video_embd.append(video_embd)
 63 |     all_text_embd = torch.cat(all_text_embd, dim=0)
 64 |     all_video_embd = torch.cat(all_video_embd, dim=0)
 65 |     all_video_embd = allgather(all_video_embd, args)
 66 |     all_text_embd = allgather(all_text_embd, args)
 67 |     return all_video_embd.cpu().numpy(), all_text_embd.cpu().numpy()
 68 |     
 69 | 
 70 | def deploy_model(args):
 71 |     checkpoint_path = args.pretrain_cnn_path
 72 |     print("=> loading checkpoint '{}'".format(checkpoint_path))
 73 |     checkpoint = torch.load(checkpoint_path, map_location='cpu')
 74 |     torch.cuda.set_device(args.gpu)
 75 |     model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path)
 76 |     model.cuda(args.gpu)
 77 |     checkpoint_module = {k[7:]:v for k,v in checkpoint.items()}
 78 |     model.load_state_dict(checkpoint_module)
 79 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
 80 |     model.eval()
 81 |     print(f'Model Loaded on GPU {args.gpu}')
 82 |     return model
 83 | 
 84 | def main_worker(gpu, ngpus_per_node, main, args):
 85 |     cudnn.benchmark = True
 86 |     args.gpu = gpu
 87 |     args.rank = gpu
 88 |     args.world_size = 8
 89 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 90 |     s.connect(("8.8.8.8", 80))
 91 |     ip = s.getsockname()[0]
 92 |     args.dist_url = f'tcp://{ip}:12345'
 93 |     dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu)
 94 |     main(args)
 95 | 
 96 | def spawn_workers(main, args):
 97 |     ngpus_per_node = 8
 98 |     args.world_size = 8
 99 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args))
100 | 
101 | if __name__ == "__main__":
102 |     args = get_args()
103 |     args.fps = 20
104 |     args.num_windows_test = 8
105 |     
106 |     assert args.eval_video_root != ''
107 |     spawn_workers(main, args)


--------------------------------------------------------------------------------
/loader/msrvtt_loader.py:
--------------------------------------------------------------------------------
  1 | import torch as th
  2 | from torch.utils.data import Dataset
  3 | import pandas as pd
  4 | import os
  5 | import numpy as np
  6 | import random
  7 | import ffmpeg
  8 | import time
  9 | import re
 10 | import pickle
 11 | 
 12 | 
 13 | class MSRVTT_DataLoader(Dataset):
 14 |     """MSRVTT Video-Text loader."""
 15 | 
 16 |     def __init__(
 17 |             self,
 18 |             data,
 19 |             video_root='',
 20 |             num_clip=4,
 21 |             fps=16,
 22 |             num_frames=32,
 23 |             size=224,
 24 |             crop_only=False,
 25 |             center_crop=True,
 26 |             token_to_word_path='../data/dict.npy',
 27 |             max_words=30,
 28 |     ):
 29 |         """
 30 |         Args:
 31 |         """
 32 |         assert isinstance(size, int)
 33 |         self.data = pd.read_csv(data)
 34 |         self.video_root = video_root
 35 |         self.size = size
 36 |         self.num_frames = num_frames
 37 |         self.fps = fps
 38 |         self.num_clip = num_clip
 39 |         self.num_sec = self.num_frames / float(self.fps)
 40 |         self.crop_only = crop_only
 41 |         self.center_crop = center_crop
 42 |         self.max_words = max_words
 43 |         self.word_to_token = {}
 44 |         token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path))
 45 |         for i, t in enumerate(token_to_word):
 46 |             self.word_to_token[t] = i + 1
 47 | 
 48 |     def __len__(self):
 49 |         return len(self.data)
 50 | 
 51 |     def _get_video(self, video_path, start, end, num_clip):
 52 |         video = th.zeros(num_clip, 3, self.num_frames, self.size, self.size)
 53 |         start_ind = np.linspace(start, max(start, end-self.num_sec - 0.4), num_clip) 
 54 |         for i, s in enumerate(start_ind):
 55 |             video[i] = self._get_video_start(video_path, s) 
 56 |         return video
 57 | 
 58 |     def _get_video_start(self, video_path, start):
 59 |         start_seek = start
 60 |         cmd = (
 61 |             ffmpeg
 62 |             .input(video_path, ss=start_seek, t=self.num_sec + 0.1)
 63 |             .filter('fps', fps=self.fps)
 64 |         )
 65 |         if self.center_crop:
 66 |             aw, ah = 0.5, 0.5
 67 |         else:
 68 |             aw, ah = random.uniform(0, 1), random.uniform(0, 1)
 69 |         if self.crop_only:
 70 |             cmd = (
 71 |                 cmd.crop('(iw - {})*{}'.format(self.size, aw),
 72 |                          '(ih - {})*{}'.format(self.size, ah),
 73 |                          str(self.size), str(self.size))
 74 |             )
 75 |         else:
 76 |             cmd = (
 77 |                 cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
 78 |                          '(ih - min(iw,ih))*{}'.format(ah),
 79 |                          'min(iw,ih)',
 80 |                          'min(iw,ih)')
 81 |                 .filter('scale', self.size, self.size)
 82 |             )
 83 |         out, _ = (
 84 |             cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
 85 |             .run(capture_stdout=True, quiet=True)
 86 |         )
 87 |         video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
 88 |         video = th.from_numpy(video)
 89 |         video = video.permute(3, 0, 1, 2)
 90 |         if video.shape[1] < self.num_frames:
 91 |             zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
 92 |             video = th.cat((video, zeros), axis=1)
 93 |         return video[:, :self.num_frames]
 94 | 
 95 |     def _split_text(self, sentence):
 96 |         w = re.findall(r"[\w']+", str(sentence))
 97 |         return w
 98 | 
 99 |     def _words_to_token(self, words):
100 |         words = [self.word_to_token[word] for word in words if word in self.word_to_token]
101 |         if words:
102 |             we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words)
103 |             return we
104 |         else:
105 |             return th.zeros(self.max_words).long()
106 | 
107 |     def _zero_pad_tensor_token(self, tensor, size):
108 |         if len(tensor) >= size:
109 |             return tensor[:size]
110 |         else:
111 |             zero = th.zeros(size - len(tensor)).long()
112 |             return th.cat((tensor, zero), dim=0)
113 | 
114 |     def words_to_ids(self, x):
115 |         return self._words_to_token(self._split_text(x))
116 | 
117 |     def _get_duration(self, video_path):
118 |         probe = ffmpeg.probe(video_path)
119 |         return probe['format']['duration']
120 | 
121 |     def __getitem__(self, idx):
122 |         video_id = self.data['video_id'].values[idx]
123 |         cap = self.data['sentence'].values[idx]
124 |         video_path = os.path.join(self.video_root, 'msrvtt', 'TestVideo', video_id + '.mp4')
125 |         duration = self._get_duration(video_path)
126 |         text = self.words_to_ids(cap)
127 |         video = self._get_video(video_path, 0, float(duration), self.num_clip)
128 |         return {'video': video, 'text': text}
129 | 
130 | 


--------------------------------------------------------------------------------
/loader/crosstask_loader.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch as th
  3 | from torch.utils.data import Dataset
  4 | import pandas as pd
  5 | import os
  6 | import numpy as np
  7 | import random
  8 | import ffmpeg
  9 | import time
 10 | import re
 11 | import pickle
 12 | 
 13 | 
 14 | class CrossTask_DataLoader(Dataset):
 15 |     """CrossTask Video-Text loader."""
 16 | 
 17 |     def __init__(
 18 |             self,
 19 |             data,
 20 |             video_root='./data/downstream/',
 21 |             num_clip=4,
 22 |             fps=16,
 23 |             num_frames=32,
 24 |             size=224,
 25 |             crop_only=False,
 26 |             center_crop=True,
 27 |             token_to_word_path='../data/dict.npy',
 28 |             max_words=30,
 29 |     ):
 30 |         """
 31 |         Args:
 32 |         """
 33 |         assert isinstance(size, int)
 34 |         self.data = pd.read_csv(data)
 35 |         self.video_root = video_root
 36 |         self.size = size
 37 |         self.num_frames = num_frames
 38 |         self.fps = fps
 39 |         self.num_clip = num_clip
 40 |         self.num_sec = self.num_frames / float(self.fps)
 41 |         self.crop_only = crop_only
 42 |         self.center_crop = center_crop
 43 |         self.max_words = max_words
 44 |         token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path))
 45 |         self.word_to_token = {}
 46 |         for i, t in enumerate(token_to_word):
 47 |             self.word_to_token[t] = i + 1
 48 | 
 49 |     def __len__(self):
 50 |         return len(self.data)
 51 | 
 52 |     def _get_video(self, video_path, start, end, num_clip):
 53 |         video = th.zeros(num_clip, 3, self.num_frames, self.size, self.size)
 54 |         start_ind = np.linspace(start, max(start, end-self.num_sec - 0.4), num_clip) 
 55 |         for i, s in enumerate(start_ind):
 56 |             video[i] = self._get_video_start(video_path, s) 
 57 |         return video
 58 | 
 59 |     def _get_video_start(self, video_path, start):
 60 |         start_seek = start
 61 |         cmd = (
 62 |             ffmpeg
 63 |             .input(video_path, ss=start_seek, t=self.num_sec + 0.1)
 64 |             .filter('fps', fps=self.fps)
 65 |         )
 66 |         if self.center_crop:
 67 |             aw, ah = 0.5, 0.5
 68 |         else:
 69 |             aw, ah = random.uniform(0, 1), random.uniform(0, 1)
 70 |         if self.crop_only:
 71 |             cmd = (
 72 |                 cmd.crop('(iw - {})*{}'.format(self.size, aw),
 73 |                          '(ih - {})*{}'.format(self.size, ah),
 74 |                          str(self.size), str(self.size))
 75 |             )
 76 |         else:
 77 |             cmd = (
 78 |                 cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
 79 |                          '(ih - min(iw,ih))*{}'.format(ah),
 80 |                          'min(iw,ih)',
 81 |                          'min(iw,ih)')
 82 |                 .filter('scale', self.size, self.size)
 83 |             )
 84 |         out, _ = (
 85 |             cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
 86 |             .run(capture_stdout=True, quiet=True)
 87 |         )
 88 |         video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
 89 |         video = th.from_numpy(video)
 90 |         video = video.permute(3, 0, 1, 2)
 91 |         if video.shape[1] < self.num_frames:
 92 |             zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
 93 |             video = th.cat((video, zeros), axis=1)
 94 |         return video[:, :self.num_frames]
 95 | 
 96 |     def _split_text(self, sentence):
 97 |         w = re.findall(r"[\w']+", str(sentence))
 98 |         return w
 99 | 
100 |     def _words_to_token(self, words):
101 |         words = [self.word_to_token[word] for word in words if word in self.word_to_token]
102 |         if words:
103 |             we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words)
104 |             return we
105 |         else:
106 |             return th.zeros(self.max_words).long()
107 | 
108 |     def _zero_pad_tensor_token(self, tensor, size):
109 |         if len(tensor) >= size:
110 |             return tensor[:size]
111 |         else:
112 |             zero = th.zeros(size - len(tensor)).long()
113 |             return th.cat((tensor, zero), dim=0)
114 | 
115 |     def words_to_ids(self, x):
116 |         return self._words_to_token(self._split_text(x))
117 | 
118 |     def __getitem__(self, idx):
119 |         video_id = self.data['video_id'].values[idx]
120 |         task = self.data['task_id'].values[idx]
121 |         start = self.data['start'].values[idx]
122 |         end = self.data['end'].values[idx]
123 |         cap = self.data['text'].values[idx]
124 |         if os.path.isfile(os.path.join(self.video_root+ '/crosstask/videos/'+ str(task) + '/' + video_id + '.mp4')):
125 |             video_path = os.path.join(self.video_root+ '/crosstask/videos/'+ str(task) + '/' + video_id + '.mp4')
126 |         else:
127 |             raise ValueError
128 |         text = self.words_to_ids(cap)
129 |         video = self._get_video(video_path, start, end, self.num_clip)
130 |         return {'video_id': video_id, 'task_id': task, 'video': video, 'text': text}


--------------------------------------------------------------------------------
/loader/youcook_loader.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch as th
  3 | from torch.utils.data import Dataset
  4 | import pandas as pd
  5 | import os
  6 | import numpy as np
  7 | import random
  8 | import ffmpeg
  9 | import time
 10 | import re
 11 | import pickle
 12 | 
 13 | 
 14 | class Youcook_DataLoader(Dataset):
 15 |     """Youcook Video-Text loader."""
 16 | 
 17 |     def __init__(
 18 |             self,
 19 |             data,
 20 |             video_root='',
 21 |             num_clip=4,
 22 |             fps=16,
 23 |             num_frames=32,
 24 |             size=224,
 25 |             crop_only=False,
 26 |             center_crop=True,
 27 |             token_to_word_path='../data/dict.npy',
 28 |             max_words=30,
 29 |     ):
 30 |         """
 31 |         Args:
 32 |         """
 33 |         assert isinstance(size, int)
 34 |         self.data = pd.read_csv(data)
 35 |         self.video_root = video_root
 36 |         self.size = size
 37 |         self.num_frames = num_frames
 38 |         self.fps = fps
 39 |         self.num_clip = num_clip
 40 |         self.num_sec = self.num_frames / float(self.fps)
 41 |         self.crop_only = crop_only
 42 |         self.center_crop = center_crop
 43 |         self.max_words = max_words
 44 |         token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path))
 45 |         self.word_to_token = {}
 46 |         for i, t in enumerate(token_to_word):
 47 |             self.word_to_token[t] = i + 1
 48 | 
 49 |     def __len__(self):
 50 |         return len(self.data)
 51 | 
 52 |     def _get_video(self, video_path, start, end, num_clip):
 53 |         video = th.zeros(num_clip, 3, self.num_frames, self.size, self.size)
 54 |         start_ind = np.linspace(start, max(start, end-self.num_sec - 0.4), num_clip) 
 55 |         for i, s in enumerate(start_ind):
 56 |             video[i] = self._get_video_start(video_path, s) 
 57 |         return video
 58 | 
 59 |     def _get_video_start(self, video_path, start):
 60 |         start_seek = start
 61 |         cmd = (
 62 |             ffmpeg
 63 |             .input(video_path, ss=start_seek, t=self.num_sec + 0.1)
 64 |             .filter('fps', fps=self.fps)
 65 |         )
 66 |         if self.center_crop:
 67 |             aw, ah = 0.5, 0.5
 68 |         else:
 69 |             aw, ah = random.uniform(0, 1), random.uniform(0, 1)
 70 |         if self.crop_only:
 71 |             cmd = (
 72 |                 cmd.crop('(iw - {})*{}'.format(self.size, aw),
 73 |                          '(ih - {})*{}'.format(self.size, ah),
 74 |                          str(self.size), str(self.size))
 75 |             )
 76 |         else:
 77 |             cmd = (
 78 |                 cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
 79 |                          '(ih - min(iw,ih))*{}'.format(ah),
 80 |                          'min(iw,ih)',
 81 |                          'min(iw,ih)')
 82 |                 .filter('scale', self.size, self.size)
 83 |             )
 84 |         out, _ = (
 85 |             cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
 86 |             .run(capture_stdout=True, quiet=True)
 87 |         )
 88 |         video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
 89 |         video = th.from_numpy(video)
 90 |         video = video.permute(3, 0, 1, 2)
 91 |         if video.shape[1] < self.num_frames:
 92 |             zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
 93 |             video = th.cat((video, zeros), axis=1)
 94 |         return video[:, :self.num_frames]
 95 | 
 96 |     def _split_text(self, sentence):
 97 |         w = re.findall(r"[\w']+", str(sentence))
 98 |         return w
 99 | 
100 |     def _words_to_token(self, words):
101 |         words = [self.word_to_token[word] for word in words if word in self.word_to_token]
102 |         if words:
103 |             we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words)
104 |             return we
105 |         else:
106 |             return th.zeros(self.max_words).long()
107 | 
108 |     def _zero_pad_tensor_token(self, tensor, size):
109 |         if len(tensor) >= size:
110 |             return tensor[:size]
111 |         else:
112 |             zero = th.zeros(size - len(tensor)).long()
113 |             return th.cat((tensor, zero), dim=0)
114 | 
115 |     def words_to_ids(self, x):
116 |         return self._words_to_token(self._split_text(x))
117 | 
118 |     def __getitem__(self, idx):
119 |         video_id = self.data['video_id'].values[idx]
120 |         task = self.data['task'].values[idx]
121 |         start = self.data['start'].values[idx]
122 |         end = self.data['end'].values[idx]
123 |         cap = self.data['text'].values[idx]
124 |         if os.path.isfile(os.path.join(self.video_root, 'youcook', str(task), video_id + '.mp4')):
125 |             video_path = os.path.join(self.video_root, 'youcook', str(task), video_id + '.mp4')
126 |         elif os.path.isfile(os.path.join(self.video_root, 'youcook', str(task), video_id + '.mkv')):
127 |             video_path = os.path.join(self.video_root, 'youcook', str(task), video_id + '.mkv')
128 |         elif os.path.isfile(os.path.join(self.video_root, 'youcook', str(task), video_id + '.webm')):
129 |             video_path = os.path.join(self.video_root, 'youcook', str(task), video_id + '.webm')
130 |         else:
131 |             raise ValueError
132 |         text = self.words_to_ids(cap)
133 |         video = self._get_video(video_path, start, end, self.num_clip)
134 |         return {'video': video, 'text': text}
135 | 
136 | 


--------------------------------------------------------------------------------
/src/eval_crosstask.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.simplefilter("ignore", UserWarning)
  3 | import os
  4 | import random
  5 | import socket
  6 | import time
  7 | import sys
  8 | 
  9 | root_path = os.getcwd()
 10 | sys.path.append(root_path)
 11 | import torch
 12 | import torch.optim as optim
 13 | import torch.nn.functional as F
 14 | import torch.utils.data
 15 | import torch.multiprocessing as mp
 16 | import torch.distributed as dist
 17 | import torch.backends.cudnn as cudnn
 18 | 
 19 | from metrics import ctr
 20 | from args import get_args
 21 | from loader.crosstask_loader import CrossTask_DataLoader
 22 | from s3dg import S3D
 23 | from tqdm import tqdm
 24 | import numpy as np
 25 | import time
 26 | from utils import AllGather
 27 | allgather = AllGather.apply
 28 | 
 29 | def main(args):
 30 |     model = deploy_model(args)
 31 |     test_dataset = CrossTask_DataLoader(data='./data/crosstask.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root, fps=args.fps,
 32 |                                         num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True, )
 33 |     test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
 34 |     test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 
 35 |                                             num_workers=args.num_thread_reader, sampler=test_sampler)
 36 | 
 37 |     all_video_embd, all_text_embd, task_id = test(test_loader, model, args)
 38 |     if args.gpu == 0:
 39 |         video_dict = {}
 40 |         for i in range(len(task_id)):
 41 |             if task_id[i].item() not in video_dict.keys():
 42 |                 video_dict[task_id[i].item()] = {}
 43 |                 video_dict[task_id[i].item()]['video_embd'] = []
 44 |                 video_dict[task_id[i].item()]['text_embd'] = []
 45 |             video_dict[task_id[i].item()]['video_embd'].append(all_video_embd[i])
 46 |             video_dict[task_id[i].item()]['text_embd'].append(all_text_embd[i])
 47 | 
 48 |         recall_list = []
 49 |         for task_id, videos in video_dict.items():
 50 |             all_video_embd = []
 51 |             all_text_embd = []
 52 |             for v, t in zip(videos['video_embd'], videos['text_embd']):
 53 |                 all_video_embd.append(np.expand_dims(v, 0))
 54 |                 all_text_embd.append(np.expand_dims(t, 0))
 55 |             all_video_embd = np.concatenate(all_video_embd, axis=0)
 56 |             all_text_embd = np.concatenate(all_text_embd, axis=0)
 57 |             similarity = np.dot(all_video_embd, all_text_embd.T)
 58 |             recall = ctr(similarity)
 59 |             recall_list.append(recall)
 60 |             
 61 |         print('CrossTask')
 62 |         print(f'CTR: {np.mean(recall_list):.2f}')
 63 |         with open('result.txt', 'a') as f:
 64 |             f.write('CrossTask\n')
 65 |             f.write(f'CTR: {np.mean(recall_list):.2f}\n')
 66 | 
 67 | def test(test_loader, model, args):
 68 |     all_text_embd = []
 69 |     all_video_embd = []
 70 |     video_id = []
 71 |     task_id = []
 72 |     with torch.no_grad():
 73 |         for i_batch, data in enumerate(tqdm(test_loader)):
 74 |             text = data['text'].cuda()
 75 |             video = data['video'].float().cuda()
 76 |             # video_id.append(data['video_id'].cuda())
 77 |             task_id.append(data['task_id'].cuda())
 78 |             
 79 |             video = video / 255.0
 80 |             video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5])
 81 |             video_embd, text_embd = model(video, text)
 82 |             video_embd = F.normalize(video_embd).view(text_embd.shape[0], args.num_windows_test, text_embd.shape[1])
 83 |             video_embd = video_embd.mean(dim=1)
 84 |             text_embd = F.normalize(text_embd)
 85 |             all_video_embd.append(video_embd)
 86 |             all_text_embd.append(text_embd)
 87 | 
 88 |     all_video_embd, all_text_embd = torch.cat(all_video_embd, dim=0), torch.cat(all_text_embd, dim=0)
 89 |     all_video_embd, all_text_embd = allgather(all_video_embd, args), allgather(all_text_embd, args)
 90 |     task_id = torch.cat(task_id, dim=0)
 91 |     task_id = allgather(task_id, args)
 92 |     return all_video_embd.cpu().numpy(), all_text_embd.cpu().numpy(), task_id.cpu().numpy()
 93 |     
 94 | 
 95 | def deploy_model(args):
 96 |     checkpoint_path = args.pretrain_cnn_path
 97 |     print("=> loading checkpoint '{}'".format(checkpoint_path))
 98 |     checkpoint = torch.load(checkpoint_path, map_location='cpu')
 99 |     torch.cuda.set_device(args.gpu)
100 |     model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path)
101 |     model.cuda(args.gpu)
102 |     checkpoint_module = {k[7:]:v for k,v in checkpoint.items()}
103 |     model.load_state_dict(checkpoint_module)
104 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
105 |     model.eval()
106 |     
107 |     print(f'Model Loaded on GPU {args.gpu}')
108 |     return model
109 | 
110 | def main_worker(gpu, ngpus_per_node, main, args):
111 |     cudnn.benchmark = True
112 |     args.gpu = gpu
113 |     args.rank = gpu
114 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
115 |     s.connect(("8.8.8.8", 80))
116 |     ip = s.getsockname()[0]
117 |     args.dist_url = f'tcp://{ip}:12345'
118 |     dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu)
119 |     main(args)
120 | 
121 | def spawn_workers(main, args):
122 |     ngpus_per_node = 8
123 |     args.world_size = 8
124 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args))
125 | 
126 | if __name__ == "__main__":
127 |     args = get_args()
128 |     args.num_windows_test = 1
129 |     
130 |     assert args.eval_video_root != ''
131 |     spawn_workers(main, args)


--------------------------------------------------------------------------------
/loader/howto100m_loader.py:
--------------------------------------------------------------------------------
  1 | import torch as th
  2 | from torch.utils.data import Dataset
  3 | import pandas as pd
  4 | import os
  5 | import numpy as np
  6 | import random
  7 | import ffmpeg
  8 | import time
  9 | import re
 10 | import json
 11 | 
 12 | class HT100M_DataLoader(Dataset):
 13 |     """HowTo100M Video-Text loader."""
 14 | 
 15 |     def __init__(self, csv, video_root='', caption_root='', min_time=4.0, fps=16, num_frames=16, size=224, crop_only=False, center_crop=True,
 16 |                 benchmark=False, token_to_word_path='./data/dict.npy', max_words=20, num_candidates=1, num_clip=8, random_left_right_flip=False,):
 17 |         """
 18 |         Args:
 19 |         """
 20 |         assert isinstance(size, int)
 21 |         self.csv = pd.read_csv(os.path.join(os.path.dirname(__file__), csv))
 22 |         self.video_root = video_root
 23 |         self.caption_root = caption_root
 24 |         self.min_time = min_time
 25 |         self.size = size
 26 |         self.num_frames = num_frames
 27 |         self.fps = fps
 28 |         self.num_sec = self.num_frames / float(self.fps)
 29 |         self.crop_only = crop_only
 30 |         self.center_crop = center_crop
 31 |         self.benchmark = benchmark
 32 |         self.max_words = max_words
 33 |         token_to_word = np.load(os.path.join(os.path.dirname(__file__), token_to_word_path))
 34 |         self.word_to_token = {}
 35 |         for i, t in enumerate(token_to_word):
 36 |             self.word_to_token[t] = i + 1
 37 |         self.num_candidates = num_candidates
 38 |         self.random_flip = random_left_right_flip
 39 |         self.num_clip = num_clip
 40 | 
 41 |     def __len__(self):
 42 |         return len(self.csv)
 43 | 
 44 |     def _get_video(self, video_path, start, end):
 45 |         videos = th.zeros(self.num_clip, 3, self.num_frames, self.size, self.size)
 46 |         for i, (s, e) in enumerate(zip(start, end)):
 47 |             start_seek = random.randint(s, int(max(s, e - self.num_sec)))
 48 |             cmd = (
 49 |                 ffmpeg
 50 |                 .input(video_path, ss=start_seek, t=self.num_sec + 0.1)
 51 |                 .filter('fps', fps=self.fps)
 52 |             )
 53 |             if self.center_crop:
 54 |                 aw, ah = 0.5, 0.5
 55 |             else:
 56 |                 aw, ah = random.uniform(0, 1), random.uniform(0, 1)
 57 |             if self.crop_only:
 58 |                 cmd = (
 59 |                     cmd.crop('(iw - {})*{}'.format(self.size, aw),
 60 |                             '(ih - {})*{}'.format(self.size, ah),
 61 |                             str(self.size), str(self.size))
 62 |                 )
 63 |             else:
 64 |                 cmd = (
 65 |                     cmd.crop('(iw - min(iw,ih))*{}'.format(aw),
 66 |                             '(ih - min(iw,ih))*{}'.format(ah),
 67 |                             'min(iw,ih)',
 68 |                             'min(iw,ih)')
 69 |                     .filter('scale', self.size, self.size)
 70 |                 )
 71 |             if self.random_flip and random.uniform(0, 1) > 0.5:
 72 |                 cmd = cmd.hflip()
 73 |             out, _ = (
 74 |                 cmd.output('pipe:', format='rawvideo', pix_fmt='rgb24')
 75 |                 .run(capture_stdout=True, quiet=True)
 76 |             )
 77 |             video = np.frombuffer(out, np.uint8).reshape([-1, self.size, self.size, 3])
 78 |             video = th.from_numpy(video)
 79 |             video = video.permute(3, 0, 1, 2)
 80 |             if video.shape[1] < self.num_frames:
 81 |                 zeros = th.zeros((3, self.num_frames - video.shape[1], self.size, self.size), dtype=th.uint8)
 82 |                 video = th.cat((video, zeros), axis=1)
 83 |             videos[i] = video[:, :self.num_frames]
 84 |         return videos
 85 | 
 86 |     def _split_text(self, sentence):
 87 |         w = re.findall(r"[\w']+", str(sentence))
 88 |         return w
 89 | 
 90 |     def _words_to_token(self, words):
 91 |         words = [self.word_to_token[word] for word in words if word in self.word_to_token]
 92 |         if words:
 93 |             we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words)
 94 |             return we
 95 |         else:
 96 |             return th.zeros(self.max_words, dtype=th.long)
 97 | 
 98 |     def _zero_pad_tensor_token(self, tensor, size):
 99 |         if len(tensor) >= size:
100 |             return tensor[:size]
101 |         else:
102 |             zero = th.zeros(size - len(tensor)).long()
103 |             return th.cat((tensor, zero), dim=0)
104 | 
105 |     def words_to_ids(self, x):
106 |         return self._words_to_token(self._split_text(x))
107 | 
108 |     def _get_text(self, caption):
109 |         caption_json = open(caption, 'r')
110 |         cap = pd.DataFrame(json.load(caption_json))
111 |         start, end = [], []
112 |         words = th.zeros(self.num_clip, self.max_words, dtype=th.long)
113 |         if len(cap) < self.num_clip:
114 |             for i in range(self.num_clip):
115 |                 start.append(int(cap['start'].values[min(i, len(cap)-1)]))
116 |                 end.append(int(cap['end'].values[min(i, len(cap)-1)]))
117 |                 words[i] = self.words_to_ids(cap['text'].values[min(i, len(cap)-1)])
118 |         else:
119 |             ind = random.randint(0, len(cap) - self.num_clip)
120 |             for i in range(self.num_clip):
121 |                 start.append(int(cap['start'].values[ind + i]))
122 |                 end.append(int(cap['end'].values[ind + i]))
123 |                 words[i] = self.words_to_ids(cap['text'].values[ind + i])
124 |         return words, start, end
125 | 
126 |     def __getitem__(self, idx):
127 |         video_file = self.csv['video_path'][idx]
128 |         video_id = video_file.split('.')[0]
129 |         video_path = os.path.join(self.video_root, video_file)
130 |         text, start, end = self._get_text(os.path.join(self.caption_root, video_id + '.json'))
131 |         videos = self._get_video(video_path, start, end)
132 |         return {'video': videos, 'text': text, 'start': th.tensor(start), 'end': th.tensor(end)}
133 | 


--------------------------------------------------------------------------------
/src/eval_ucf.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.simplefilter("ignore", UserWarning)
  3 | import os
  4 | import random
  5 | import socket
  6 | import time
  7 | import sys
  8 | 
  9 | root_path = os.getcwd()
 10 | sys.path.append(root_path)
 11 | import torch
 12 | import torch.optim as optim
 13 | import torch.nn.functional as F
 14 | import torch.utils.data
 15 | import torch.multiprocessing as mp
 16 | import torch.distributed as dist
 17 | import torch.backends.cudnn as cudnn
 18 | 
 19 | from metrics import retrieval
 20 | from args import get_args
 21 | from loader.ucf_loader import UCF_DataLoader
 22 | from s3dg import S3D
 23 | from tqdm import tqdm
 24 | import numpy as np
 25 | import time
 26 | from utils import AllGather
 27 | from sklearn import preprocessing
 28 | from sklearn.svm import LinearSVC
 29 | 
 30 | allgather = AllGather.apply
 31 | 
 32 | def main(args):
 33 |     model = deploy_model(args)
 34 |     test_dataset = UCF_DataLoader(data='./data/ucf.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root,
 35 |                             num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True, with_flip=True, )
 36 |     test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
 37 |     test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 
 38 |                                             num_workers=args.num_thread_reader, sampler=test_sampler)
 39 | 
 40 |     all_video_embd, labels, split1, split2, split3 = test(test_loader, model, args)
 41 |     if args.gpu == 0:
 42 |         le = preprocessing.LabelEncoder()
 43 |         labels = le.fit_transform(labels)
 44 |         acc_list = []
 45 |         for reg in [100.0]:
 46 |             c = LinearSVC(C=reg)
 47 |             for split in range(3):
 48 |                 if split == 0:
 49 |                     s = split1
 50 |                 elif split == 1:
 51 |                     s = split2
 52 |                 else:
 53 |                     s = split3
 54 |                 X_train, X_test = all_video_embd[np.where(s == 1)[0]].reshape((-1, 1024)), all_video_embd[np.where(s == 2)[0]].reshape((-1, 1024))
 55 |                 label_train, label_test = labels[np.where(s == 1)[0]].repeat(args.num_windows_test), labels[np.where(s == 2)[0]]
 56 |                 print('Fitting SVM for split {} and C: {}'.format(split + 1, reg))
 57 |                 c.fit(X_train, label_train)
 58 |                 X_pred = c.decision_function(X_test)
 59 |                 X_pred = np.reshape(X_pred, (len(label_test), args.num_windows_test, -1))
 60 |                 X_pred = X_pred.sum(axis=1)
 61 |                 X_pred = np.argmax(X_pred, axis=1)
 62 |                 acc = np.sum(X_pred == label_test) / float(len(X_pred))  
 63 |                 print("Top 1 accuracy split {} and C {} : {}".format(split + 1, reg, acc))
 64 |                 acc_list.append(acc * 100)
 65 |         
 66 |         print('HMDB')
 67 |         print(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}')
 68 |         with open('result.txt', 'a') as f:
 69 |             f.write('UCF\n')
 70 |             f.write(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}\n')
 71 | 
 72 | def test(test_loader, model, args):
 73 |     all_video_embd = []
 74 |     labels = []
 75 |     split1 = []
 76 |     split2 = []
 77 |     split3 = []
 78 |     with torch.no_grad():
 79 |         for i_batch, data in enumerate(tqdm(test_loader)):
 80 |             split1.append(data['split1'].cuda())
 81 |             split2.append(data['split2'].cuda())
 82 |             split3.append(data['split3'].cuda())
 83 |             labels.append(data['label'].cuda())
 84 |             video = data['video'].float().cuda()
 85 |             video = video / 255.0
 86 |             video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5])
 87 |             video_embd = model(video, None, mode='video', mixed5c=True)
 88 |             video_embd = video_embd.view(len(data['label']), -1, video_embd.shape[1])
 89 |             all_video_embd.append(video_embd)
 90 | 
 91 |     all_video_embd = torch.cat(all_video_embd, dim=0)
 92 |     all_video_embd = allgather(all_video_embd, args)
 93 |     labels = torch.cat(labels, dim=0)
 94 |     labels = allgather(labels, args)
 95 |     split1, split2, split3 = torch.cat(split1, dim=0), torch.cat(split2, dim=0), torch.cat(split3, dim=0)
 96 |     split1, split2, split3 = allgather(split1, args), allgather(split2, args), allgather(split3, args)
 97 |     return all_video_embd.cpu().numpy(), labels.cpu().numpy(), split1.cpu().numpy(), split2.cpu().numpy(), split3.cpu().numpy()
 98 |     
 99 | 
100 | def deploy_model(args):
101 |     checkpoint_path = args.pretrain_cnn_path
102 |     print("=> loading checkpoint '{}'".format(checkpoint_path))
103 |     checkpoint = torch.load(checkpoint_path, map_location='cpu')
104 |     torch.cuda.set_device(args.gpu)
105 |     model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path)
106 |     model.cuda(args.gpu)
107 |     checkpoint_module = {k[7:]:v for k,v in checkpoint.items()}
108 |     model.load_state_dict(checkpoint_module)
109 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
110 |     model.eval()
111 |     
112 |     print(f'Model Loaded on GPU {args.gpu}')
113 |     return model
114 | 
115 | def main_worker(gpu, ngpus_per_node, main, args):
116 |     cudnn.benchmark = True
117 |     args.gpu = gpu
118 |     args.rank = gpu
119 |     args.world_size = 8
120 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
121 |     s.connect(("8.8.8.8", 80))
122 |     ip = s.getsockname()[0]
123 |     args.dist_url = f'tcp://{ip}:12345'
124 |     dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu)
125 |     main(args)
126 | 
127 | def spawn_workers(main, args):
128 |     ngpus_per_node = 8
129 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args))
130 | 
131 | if __name__ == "__main__":
132 |     args = get_args()
133 |     assert args.eval_video_root != ''
134 |     spawn_workers(main, args)


--------------------------------------------------------------------------------
/src/eval_hmdb.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.simplefilter("ignore", UserWarning)
  3 | import os
  4 | import random
  5 | import socket
  6 | import time
  7 | import sys
  8 | 
  9 | root_path = os.getcwd()
 10 | sys.path.append(root_path)
 11 | import torch
 12 | import torch.optim as optim
 13 | import torch.nn.functional as F
 14 | import torch.utils.data
 15 | import torch.multiprocessing as mp
 16 | import torch.distributed as dist
 17 | import torch.backends.cudnn as cudnn
 18 | 
 19 | from metrics import retrieval
 20 | from args import get_args
 21 | from loader.hmdb_loader import HMDB_DataLoader
 22 | from s3dg import S3D
 23 | from tqdm import tqdm
 24 | import numpy as np
 25 | import time
 26 | from utils import AllGather
 27 | from sklearn import preprocessing
 28 | from sklearn.svm import LinearSVC
 29 | 
 30 | allgather = AllGather.apply
 31 | 
 32 | def main(args):
 33 |     model = deploy_model(args)
 34 |     test_dataset = HMDB_DataLoader(data='./data/hmdb51.csv', num_clip=args.num_windows_test, video_root=args.eval_video_root,
 35 |                             num_frames=args.num_frames, size=args.video_size, crop_only=False, center_crop=True, with_flip=True, )
 36 |     test_sampler = torch.utils.data.distributed.DistributedSampler(test_dataset)
 37 |     test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=args.batch_size_val, shuffle=False, drop_last=False, 
 38 |                                             num_workers=args.num_thread_reader, sampler=test_sampler)
 39 | 
 40 |     all_video_embd, labels, split1, split2, split3 = test(test_loader, model, args)
 41 |     if args.gpu == 0:
 42 |         le = preprocessing.LabelEncoder()
 43 |         labels = le.fit_transform(labels)
 44 |         acc_list = []
 45 |         for reg in [100.0]:
 46 |             c = LinearSVC(C=reg)
 47 |             for split in range(3):
 48 |                 if split == 0:
 49 |                     s = split1
 50 |                 elif split == 1:
 51 |                     s = split2
 52 |                 else:
 53 |                     s = split3
 54 |                 X_train, X_test = all_video_embd[np.where(s == 1)[0]].reshape((-1, 1024)), all_video_embd[np.where(s == 2)[0]].reshape((-1, 1024))
 55 |                 label_train, label_test = labels[np.where(s == 1)[0]].repeat(args.num_windows_test), labels[np.where(s == 2)[0]]
 56 |                 print('Fitting SVM for split {} and C: {}'.format(split + 1, reg))
 57 |                 c.fit(X_train, label_train)
 58 |                 X_pred = c.decision_function(X_test)
 59 |                 X_pred = np.reshape(X_pred, (len(label_test), args.num_windows_test, -1))
 60 |                 X_pred = X_pred.sum(axis=1)
 61 |                 X_pred = np.argmax(X_pred, axis=1)
 62 |                 acc = np.sum(X_pred == label_test) / float(len(X_pred))  
 63 |                 print("Top 1 accuracy split {} and C {} : {}".format(split + 1, reg, acc))
 64 |                 acc_list.append(acc * 100)
 65 |         
 66 |         print('HMDB')
 67 |         print(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}')
 68 |         with open('result.txt', 'a') as f:
 69 |             f.write('\nHMDB\n')
 70 |             f.write(f'Split1: {acc_list[0]:.2f} - Split2: {acc_list[1]:.2f} - Split3: {acc_list[2]:.2f} - Mean: {np.mean(acc_list):.2f}\n')
 71 | 
 72 | def test(test_loader, model, args):
 73 |     all_video_embd = []
 74 |     labels = []
 75 |     split1 = []
 76 |     split2 = []
 77 |     split3 = []
 78 |     with torch.no_grad():
 79 |         for i_batch, data in enumerate(tqdm(test_loader)):
 80 |             split1.append(data['split1'].cuda())
 81 |             split2.append(data['split2'].cuda())
 82 |             split3.append(data['split3'].cuda())
 83 |             labels.append(data['label'].cuda())
 84 |             video = data['video'].float().cuda()
 85 |             video = video / 255.0
 86 |             video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5])
 87 |             video_embd = model(video, None, mode='video', mixed5c=True)
 88 |             video_embd = video_embd.view(len(data['label']), -1, video_embd.shape[1])
 89 |             all_video_embd.append(video_embd)
 90 | 
 91 |     all_video_embd = torch.cat(all_video_embd, dim=0)
 92 |     all_video_embd = allgather(all_video_embd, args)
 93 |     labels = torch.cat(labels, dim=0)
 94 |     labels = allgather(labels, args)
 95 |     split1, split2, split3 = torch.cat(split1, dim=0), torch.cat(split2, dim=0), torch.cat(split3, dim=0)
 96 |     split1, split2, split3 = allgather(split1, args), allgather(split2, args), allgather(split3, args)
 97 |     return all_video_embd.cpu().numpy(), labels.cpu().numpy(), split1.cpu().numpy(), split2.cpu().numpy(), split3.cpu().numpy()
 98 |     
 99 | 
100 | def deploy_model(args):
101 |     checkpoint_path = args.pretrain_cnn_path
102 |     print("=> loading checkpoint '{}'".format(checkpoint_path))
103 |     checkpoint = torch.load(checkpoint_path, map_location='cpu')
104 |     torch.cuda.set_device(args.gpu)
105 |     model = S3D(args.num_class, space_to_depth=False, word2vec_path=args.word2vec_path)
106 |     model.cuda(args.gpu)
107 |     checkpoint_module = {k[7:]:v for k,v in checkpoint.items()}
108 |     model.load_state_dict(checkpoint_module)
109 |     model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu], find_unused_parameters=True)
110 |     model.eval()
111 |     
112 |     print(f'Model Loaded on GPU {args.gpu}')
113 |     return model
114 | 
115 | def main_worker(gpu, ngpus_per_node, main, args):
116 |     cudnn.benchmark = True
117 |     args.gpu = gpu
118 |     args.rank = gpu
119 |     args.world_size = 8
120 |     s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
121 |     s.connect(("8.8.8.8", 80))
122 |     ip = s.getsockname()[0]
123 |     args.dist_url = f'tcp://{ip}:12345'
124 |     dist.init_process_group(backend='nccl', init_method=args.dist_url, world_size=ngpus_per_node, rank=gpu)
125 |     main(args)
126 | 
127 | def spawn_workers(main, args):
128 |     ngpus_per_node = 8
129 |     mp.spawn(main_worker, nprocs=ngpus_per_node, args=(ngpus_per_node, main, args))
130 | 
131 | if __name__ == "__main__":
132 |     args = get_args()
133 |     assert args.eval_video_root != ''
134 |     spawn_workers(main, args)


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | warnings.simplefilter("ignore", UserWarning)
  3 | import os
  4 | import random
  5 | import time
  6 | import glob
  7 | import sys
  8 | from tqdm import tqdm
  9 | 
 10 | root_path = os.getcwd()
 11 | sys.path.append(root_path)
 12 | import numpy as np
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | import torch.nn.parallel
 17 | import torch.backends.cudnn as cudnn
 18 | import torch.distributed as dist
 19 | import torch.optim
 20 | import torch.multiprocessing as mp
 21 | import torch.utils.data
 22 | import torch.utils.data.distributed
 23 | 
 24 | from s3dg import S3D
 25 | from args import get_args
 26 | from loader.howto100m_loader import HT100M_DataLoader
 27 | from loss import S2DTW
 28 | from utils import AllGather, get_cosine_schedule_with_warmup
 29 | 
 30 | allgather = AllGather.apply
 31 | 
 32 | 
 33 | def main():
 34 |     args = get_args()
 35 |     if args.verbose:
 36 |         print(args)
 37 |     assert args.eval_video_root != '' or not(args.evaluate)
 38 |     assert args.video_path != ''
 39 |     assert args.caption_root != ''
 40 |     if args.seed is not None:
 41 |         random.seed(args.seed)
 42 |         torch.manual_seed(args.seed)
 43 | 
 44 |     args.multiprocessing_distributed = True
 45 |     args.evaluate = False
 46 |  
 47 |     args.distributed = args.world_size > 1 or args.multiprocessing_distributed
 48 |     args.world_size = torch.cuda.device_count()
 49 |     if args.multiprocessing_distributed:
 50 |         mp.spawn(main_worker, nprocs=args.world_size, args=(args.world_size, args))
 51 |     else:
 52 |         main_worker(args.gpu, args.world_size, args)
 53 | 
 54 | 
 55 | 
 56 | def main_worker(gpu, ngpus_per_node, args):
 57 |     args.gpu = gpu
 58 |     if args.distributed:
 59 |         dist.init_process_group(
 60 |             backend=args.dist_backend,
 61 |             init_method=args.dist_url,
 62 |             world_size=args.world_size,
 63 |             rank=args.gpu,
 64 |         )
 65 |     # create model
 66 |     model = S3D(args.num_class, space_to_depth=True, word2vec_path=args.word2vec_path, init=args.weight_init,)
 67 | 
 68 |     if args.distributed:
 69 |         if args.gpu is not None:
 70 |             torch.cuda.set_device(args.gpu)
 71 |             model.cuda(args.gpu)
 72 |             args.batch_size = int(args.batch_size / args.world_size)
 73 |             args.batch_size_val = int(args.batch_size_val / args.world_size)
 74 |             args.num_thread_reader = int(args.num_thread_reader / args.world_size)
 75 |             model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
 76 |         else:
 77 |             model.cuda()
 78 |             model = torch.nn.parallel.DistributedDataParallel(model)
 79 |     elif args.gpu is not None:
 80 |         torch.cuda.set_device(args.gpu)
 81 |         model = model.cuda(args.gpu)
 82 |     else:
 83 |         model = torch.nn.DataParallel(model).cuda()
 84 | 
 85 |     # Data loading code
 86 |     train_dataset = HT100M_DataLoader(
 87 |         csv=args.train_csv,
 88 |         video_root=args.video_path,
 89 |         caption_root=args.caption_root,
 90 |         min_time=args.min_time,
 91 |         fps=args.fps,
 92 |         num_frames=args.num_frames,
 93 |         size=args.video_size,
 94 |         crop_only=args.crop_only,
 95 |         center_crop=args.centercrop,
 96 |         random_left_right_flip=args.random_flip,
 97 |         num_candidates=args.num_candidates,
 98 |         num_clip = args.num_clip,
 99 |     )
100 | 
101 |     if args.distributed:
102 |         train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
103 |     else:
104 |         train_sampler = None
105 | 
106 |     train_loader = torch.utils.data.DataLoader(
107 |         train_dataset,
108 |         batch_size=args.batch_size,
109 |         shuffle=(train_sampler is None),
110 |         drop_last=True,
111 |         num_workers=args.num_thread_reader,
112 |         pin_memory=args.pin_memory,
113 |         sampler=train_sampler,
114 |     )
115 | 
116 |     criterion = S2DTW(args)
117 | 
118 |     if args.optimizer == 'adam':
119 |         optimizer = torch.optim.Adam(model.parameters(), args.lr)
120 |     elif args.optimizer == 'sgd':
121 |         optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momemtum)
122 | 
123 |     scheduler = get_cosine_schedule_with_warmup(optimizer, args.warmup_steps, len(train_loader) * args.epochs)
124 |     checkpoint_dir = os.path.join(os.path.dirname(__file__), 'checkpoint', args.checkpoint_dir)
125 |     if args.checkpoint_dir != '' and not(os.path.isdir(checkpoint_dir)) and args.rank == 0:
126 |         os.mkdir(checkpoint_dir)
127 | 
128 |     if args.cudnn_benchmark:
129 |         cudnn.benchmark = True
130 |     total_batch_size = args.world_size * args.batch_size 
131 |     log("Starting training loop for rank: {}, total batch size: {}".format(args.gpu, total_batch_size), args)
132 |     for epoch in tqdm(range(args.start_epoch, args.epochs)):
133 |         if args.distributed:
134 |             train_sampler.set_epoch(epoch)
135 |         train(train_loader, model, criterion, optimizer, scheduler, epoch, train_dataset, args)
136 |         if args.rank == 0:
137 |             save_checkpoint(
138 |                 {
139 |                     "epoch": epoch + 1,
140 |                     "state_dict": model.state_dict(),
141 |                     "optimizer": optimizer.state_dict(),
142 |                     "scheduler": scheduler.state_dict(),
143 |                 }, checkpoint_dir, epoch + 1
144 |             )
145 | 
146 | 
147 | def train(train_loader, model, criterion, optimizer, scheduler, epoch, dataset, args):
148 |     running_loss = 0.0
149 |     s = time.time()
150 |     for i_batch, sample_batch in enumerate(train_loader):
151 |         s_step = time.time()
152 |         batch_loss = TrainOneBatch(model, optimizer, scheduler, sample_batch, criterion, epoch, args)
153 |         d_step = time.time() - s_step
154 |         running_loss += batch_loss
155 |         if (i_batch + 1) % args.n_display == 0 and args.verbose and args.rank == 0:
156 |             d = time.time() - s
157 |             log(f"Epoch {epoch+1:d}, Elapsed Time: {d:.3f}, Epoch status: {args.batch_size * args.world_size * float(i_batch) / len(dataset):.4f}, \
158 |                 Training loss: {running_loss / args.n_display:.4f}, Learning rates: {optimizer.param_groups[0]['lr']:.6f}", args)
159 |             running_loss = 0.0
160 |             s = time.time()
161 | 
162 | def TrainOneBatch(model, opt, scheduler, data, loss_fun, epoch, args):
163 |     video = data["video"].float().cuda(args.gpu, non_blocking=args.pin_memory)
164 |     text = data["text"].cuda(args.gpu, non_blocking=args.pin_memory)
165 |     text = text.view(-1, text.shape[-1])
166 |     video = video / 255.0
167 |     video = video.view(-1, video.shape[2], video.shape[3], video.shape[4], video.shape[5])
168 |     opt.zero_grad()
169 |     with torch.set_grad_enabled(True):
170 |         video_embd, text_embd = model(video, text)
171 |         video_embd = F.normalize(video_embd).view(-1, args.num_clip, video_embd.shape[1])
172 |         text_embd = F.normalize(text_embd).view(-1, args.num_clip, text_embd.shape[1])
173 |         if args.distributed:
174 |             video_embd = allgather(video_embd, args)
175 |             text_embd = allgather(text_embd, args)
176 |         loss= loss_fun(video_embd, text_embd)
177 |     loss.backward()
178 |     opt.step()
179 |     scheduler.step()
180 |     return loss
181 | 
182 | def save_checkpoint(state, checkpoint_dir, epoch, n_ckpt=10):
183 |     torch.save(state, os.path.join(checkpoint_dir, "epoch{:0>4d}.pth.tar".format(epoch)))
184 | 
185 | def log(output, args):
186 |     with open(os.path.join(os.path.dirname(__file__), 'log' , './log.txt'), "a") as f:
187 |         f.write(output + '\n')
188 | 
189 | if __name__ == "__main__":
190 |     main()
191 | 


--------------------------------------------------------------------------------
/s3dg.py:
--------------------------------------------------------------------------------
  1 | """Contains the definition for Gated Separable 3D network (S3D-G).
  2 | """
  3 | 
  4 | import torch as th
  5 | import torch.nn.functional as F
  6 | import torch.nn as nn
  7 | import os
  8 | import numpy as np
  9 | import re
 10 | 
 11 | class InceptionBlock(nn.Module):
 12 | 
 13 |     def __init__(self, input_dim, num_outputs_0_0a, num_outputs_1_0a, num_outputs_1_0b, num_outputs_2_0a, num_outputs_2_0b, num_outputs_3_0b, gating=True):
 14 |         super(InceptionBlock, self).__init__()
 15 |         self.conv_b0 = STConv3D(input_dim, num_outputs_0_0a, [1, 1, 1])
 16 |         self.conv_b1_a = STConv3D(input_dim, num_outputs_1_0a, [1, 1, 1])
 17 |         self.conv_b1_b = STConv3D(num_outputs_1_0a, num_outputs_1_0b, [3, 3, 3], padding=1, separable=True)
 18 |         self.conv_b2_a = STConv3D(input_dim, num_outputs_2_0a, [1, 1, 1])
 19 |         self.conv_b2_b = STConv3D(num_outputs_2_0a, num_outputs_2_0b, [3, 3, 3], padding=1, separable=True)
 20 |         self.maxpool_b3 = th.nn.MaxPool3d((3, 3, 3), stride=1, padding=1)
 21 |         self.conv_b3_b = STConv3D(input_dim, num_outputs_3_0b, [1, 1, 1])
 22 |         self.gating = gating
 23 |         self.output_dim = num_outputs_0_0a + num_outputs_1_0b + num_outputs_2_0b + num_outputs_3_0b
 24 |         if gating:
 25 |             self.gating_b0 = SelfGating(num_outputs_0_0a)
 26 |             self.gating_b1 = SelfGating(num_outputs_1_0b)
 27 |             self.gating_b2 = SelfGating(num_outputs_2_0b)
 28 |             self.gating_b3 = SelfGating(num_outputs_3_0b)
 29 | 
 30 |     def forward(self, input):
 31 |         """Inception block
 32 |         """
 33 |         b0 = self.conv_b0(input)
 34 |         b1 = self.conv_b1_a(input)
 35 |         b1 = self.conv_b1_b(b1)
 36 |         b2 = self.conv_b2_a(input)
 37 |         b2 = self.conv_b2_b(b2)
 38 |         b3 = self.maxpool_b3(input)
 39 |         b3 = self.conv_b3_b(b3)
 40 |         if self.gating:
 41 |             b0 = self.gating_b0(b0)
 42 |             b1 = self.gating_b1(b1)
 43 |             b2 = self.gating_b2(b2)
 44 |             b3 = self.gating_b3(b3)
 45 |         return th.cat((b0, b1, b2, b3), dim=1)
 46 | 
 47 | class SelfGating(nn.Module):
 48 | 
 49 |     def __init__(self, input_dim):
 50 |         super(SelfGating, self).__init__()
 51 |         self.fc = nn.Linear(input_dim, input_dim)
 52 | 
 53 |     def forward(self, input_tensor):
 54 |         """Feature gating as used in S3D-G.
 55 |         """
 56 |         spatiotemporal_average = th.mean(input_tensor, dim=[2, 3, 4])
 57 |         weights = self.fc(spatiotemporal_average)
 58 |         weights = th.sigmoid(weights)
 59 |         return weights[:, :, None, None, None] * input_tensor
 60 | 
 61 | class STConv3D(nn.Module):
 62 | 
 63 |     def __init__(self,
 64 |                 input_dim,
 65 |                 output_dim,
 66 |                 kernel_size,
 67 |                 stride=1,
 68 |                 padding=0,
 69 |                 separable=False):
 70 |         super(STConv3D, self).__init__()
 71 |         self.separable = separable
 72 |         self.relu = nn.ReLU(inplace=True)
 73 |         assert len(kernel_size) == 3
 74 |         if separable and kernel_size[0] != 1:
 75 |             spatial_kernel_size = [1, kernel_size[1], kernel_size[2]]
 76 |             temporal_kernel_size = [kernel_size[0], 1, 1]
 77 |             if isinstance(stride, list) and len(stride) == 3:
 78 |                 spatial_stride = [1, stride[1], stride[2]]
 79 |                 temporal_stride = [stride[0], 1, 1]
 80 |             else:
 81 |                 spatial_stride = [1, stride, stride]
 82 |                 temporal_stride = [stride, 1, 1]
 83 |             if isinstance(padding, list) and len(padding) == 3:
 84 |                 spatial_padding = [0, padding[1], padding[2]]
 85 |                 temporal_padding = [padding[0], 0, 0]
 86 |             else:
 87 |                 spatial_padding = [0, padding, padding]
 88 |                 temporal_padding = [padding, 0, 0]
 89 |         if separable:
 90 |             self.conv1 = nn.Conv3d(input_dim, output_dim,
 91 |                                    kernel_size=spatial_kernel_size,
 92 |                                    stride=spatial_stride,
 93 |                                    padding=spatial_padding, bias=False)
 94 |             self.bn1 = nn.BatchNorm3d(output_dim)
 95 |             self.conv2 = nn.Conv3d(output_dim, output_dim,
 96 |                                    kernel_size=temporal_kernel_size,
 97 |                                    stride=temporal_stride,
 98 |                                    padding=temporal_padding, bias=False)
 99 |             self.bn2 = nn.BatchNorm3d(output_dim)
100 |         else:
101 |             self.conv1 = nn.Conv3d(input_dim, output_dim,
102 |                                    kernel_size=kernel_size, stride=stride,
103 |                                    padding=padding, bias=False)
104 |             self.bn1 = nn.BatchNorm3d(output_dim)
105 | 
106 | 
107 |     def forward(self, input):
108 |         out = self.relu(self.bn1(self.conv1(input)))
109 |         if self.separable:
110 |             out = self.relu(self.bn2(self.conv2(out)))
111 |         return out
112 | 
113 | 
114 | def get_padding_shape(filter_shape, stride):
115 |     def _pad_top_bottom(filter_dim, stride_val):
116 |         pad_along = max(filter_dim - stride_val, 0)
117 |         pad_top = pad_along // 2
118 |         pad_bottom = pad_along - pad_top
119 |         return pad_top, pad_bottom
120 | 
121 |     padding_shape = []
122 |     for filter_dim, stride_val in zip(filter_shape, stride):
123 |         pad_top, pad_bottom = _pad_top_bottom(filter_dim, stride_val)
124 |         padding_shape.append(pad_top)
125 |         padding_shape.append(pad_bottom)
126 |     depth_top = padding_shape.pop(0)
127 |     depth_bottom = padding_shape.pop(0)
128 |     padding_shape.append(depth_top)
129 |     padding_shape.append(depth_bottom)
130 | 
131 |     return tuple(padding_shape)
132 | 
133 | 
134 | class MaxPool3dTFPadding(th.nn.Module):
135 |     def __init__(self, kernel_size, stride=None, padding='SAME'):
136 |         super(MaxPool3dTFPadding, self).__init__()
137 |         if padding == 'SAME':
138 |             padding_shape = get_padding_shape(kernel_size, stride)
139 |             self.padding_shape = padding_shape
140 |             self.pad = th.nn.ConstantPad3d(padding_shape, 0)
141 |         self.pool = th.nn.MaxPool3d(kernel_size, stride, ceil_mode=True)
142 | 
143 |     def forward(self, inp):
144 |         inp = self.pad(inp)
145 |         out = self.pool(inp)
146 |         return out
147 | 
148 | class Sentence_Embedding(nn.Module):
149 |     def __init__(self,
150 |                  embd_dim,
151 |                  token_to_word_path,
152 |                  num_embeddings=66250,
153 |                  word_embedding_dim=300,
154 |                  word2vec_path='',
155 |                  max_words=16,
156 |                  output_dim=2048):
157 |         super(Sentence_Embedding, self).__init__()
158 |         if word2vec_path:
159 |             self.word_embd = nn.Embedding.from_pretrained(th.load(word2vec_path)) 
160 |         else:
161 |             self.word_embd = nn.Embedding(num_embeddings, word_embedding_dim)
162 |         self.fc1 = nn.Linear(word_embedding_dim, output_dim)
163 |         self.fc2 = nn.Linear(output_dim, embd_dim)
164 |         self.word_to_token = {}
165 |         self.max_words = max_words
166 |         token_to_word = np.load(token_to_word_path)
167 |         for i, t in enumerate(token_to_word):
168 |             self.word_to_token[t] = i + 1
169 | 
170 |     def _zero_pad_tensor_token(self, tensor, size):
171 |         if len(tensor) >= size:
172 |             return tensor[:size]
173 |         else:
174 |             zero = th.zeros(size - len(tensor)).long()
175 |             return th.cat((tensor, zero), dim=0)
176 | 
177 |     def is_cuda(self):
178 |         return self.fc1.bias.is_cuda
179 | 
180 |     def _split_text(self, sentence):
181 |         w = re.findall(r"[\w']+", str(sentence))
182 |         return w
183 | 
184 |     def _words_to_token(self, words):
185 |         words = [self.word_to_token[word] for word in words if word in self.word_to_token]
186 |         if words:
187 |             we = self._zero_pad_tensor_token(th.LongTensor(words), self.max_words)
188 |             return we
189 |         else:
190 |             return th.zeros(self.max_words).long()
191 | 
192 |     def words_to_ids(self, x):
193 |         split_x = [self._words_to_token(self._split_text(sent)) for sent in x]
194 |         return th.stack(split_x, dim=0)
195 | 
196 |     def forward(self, x, raw_text=False):
197 |         if raw_text:
198 |             x = self.words_to_ids(x)
199 |         with th.no_grad():
200 |             x = self.word_embd(x)
201 |         x = F.relu(self.fc1(x), inplace=True)
202 |         x = th.max(x, dim=1)[0]
203 |         x = self.fc2(x)
204 |         return x
205 | 
206 | 
207 | class S3D(nn.Module):
208 | 
209 |     def __init__(self, num_classes=512, gating=True, space_to_depth=False, word2vec_path='', init='uniform', token_to_word_path='./data/dict.npy'):
210 |         super(S3D, self).__init__()
211 |         self.num_classes = num_classes
212 |         self.gating = gating
213 |         self.space_to_depth = space_to_depth
214 |         if space_to_depth:
215 |             self.conv1 = STConv3D(24, 64, [2, 4, 4], stride=1, padding=(1, 2, 2), separable=False)
216 |         else:
217 |             self.conv1 = STConv3D(3, 64, [3, 7, 7], stride=2, padding=(1, 3, 3), separable=False)
218 |         self.conv_2b = STConv3D(64, 64, [1, 1, 1], separable=False)
219 |         self.conv_2c = STConv3D(64, 192, [3, 3, 3], padding=1, separable=True)
220 |         self.gating = SelfGating(192)
221 |         self.maxpool_2a = MaxPool3dTFPadding(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding='SAME')
222 |         self.maxpool_3a = MaxPool3dTFPadding(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding='SAME')
223 |         self.mixed_3b = InceptionBlock(192, 64, 96, 128, 16, 32, 32)
224 |         self.mixed_3c = InceptionBlock(self.mixed_3b.output_dim, 128, 128, 192, 32, 96, 64)
225 |         self.maxpool_4a = MaxPool3dTFPadding(kernel_size=(3, 3, 3), stride=(2, 2, 2), padding='SAME')
226 |         self.mixed_4b = InceptionBlock(self.mixed_3c.output_dim, 192, 96, 208, 16, 48, 64)
227 |         self.mixed_4c = InceptionBlock(self.mixed_4b.output_dim, 160, 112, 224, 24, 64, 64)
228 |         self.mixed_4d = InceptionBlock(self.mixed_4c.output_dim, 128, 128, 256, 24, 64, 64)
229 |         self.mixed_4e = InceptionBlock(self.mixed_4d.output_dim, 112, 144, 288, 32, 64, 64)
230 |         self.mixed_4f = InceptionBlock(self.mixed_4e.output_dim, 256, 160, 320, 32, 128, 128)
231 |         self.maxpool_5a = self.maxPool3d_5a_2x2 = MaxPool3dTFPadding(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding='SAME')
232 |         self.mixed_5b = InceptionBlock(self.mixed_4f.output_dim, 256, 160, 320, 32, 128, 128)
233 |         self.mixed_5c = InceptionBlock(self.mixed_5b.output_dim, 384, 192, 384, 48, 128, 128)
234 |         self.fc = nn.Linear(self.mixed_5c.output_dim, num_classes)
235 |         self.text_module = Sentence_Embedding(
236 |                                num_classes,
237 |                                os.path.join(os.path.dirname(__file__), token_to_word_path),
238 |                                word2vec_path=os.path.join(os.path.dirname(__file__), word2vec_path))
239 | 
240 |         if init == 'kaiming_normal':
241 |             for m in self.modules():
242 |                 if isinstance(m, nn.Conv3d):
243 |                     nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
244 |                 elif isinstance(m, nn.BatchNorm3d):
245 |                     nn.init.constant_(m.weight, 1)
246 |                     nn.init.constant_(m.bias, 0)
247 | 
248 |     def _space_to_depth(self, input):
249 |         B, C, T, H, W = input.shape
250 |         input = input.view(B, C, T // 2, 2, H // 2, 2, W // 2, 2)
251 |         input = input.permute(0, 3, 5, 7, 1,  2, 4, 6)
252 |         input = input.contiguous().view(B, 8 * C, T // 2, H // 2, W // 2)
253 |         return input
254 | 
255 |     def forward(self, video, text, mode='all', mixed5c=False):
256 |         if mode == 'all':
257 |             return self.forward_video(video), self.text_module(text)
258 |         elif mode == 'video':
259 |             return self.forward_video(video, mixed5c=mixed5c)
260 |         elif mode == 'text':
261 |             return self.text_module(text)
262 |         else:
263 |             raise NotImplementedError
264 | 
265 |     def forward_video(self, inputs, mixed5c=False):
266 |         #out = {}
267 |         if self.space_to_depth:
268 |             inputs = self._space_to_depth(inputs)
269 |         # 'Conv2d_1a_7x7'
270 |         net = self.conv1(inputs)
271 |         if self.space_to_depth:
272 |             net = net[:, :, 1:, 1:, 1:]
273 |         #out['Conv2d_1a_7x7'] = net
274 |         # 'MaxPool_2a_3x3'
275 |         net = self.maxpool_2a(net)
276 |         #out['MaxPool_2a_3x3'] = net
277 |         #'Conv2d_2b_1x1'
278 |         net = self.conv_2b(net)
279 |         #out['Conv2d_2b_1x1'] = net
280 |         # 'Conv2d_2c_3x3'
281 |         net = self.conv_2c(net)
282 |         #out['Conv2d_2c_3x3'] = net
283 |         if self.gating:
284 |             net = self.gating(net)
285 |             #out['gating_1'] = net
286 |         # 'MaxPool_3a_3x3'
287 |         net = self.maxpool_3a(net)
288 |         #out['MaxPool_3a_3x3'] = net
289 |         # end_point = 'Mixed_3b'
290 |         net = self.mixed_3b(net)
291 |         #out['Mixed_3b'] = net
292 |         # end_point = 'Mixed_3c'
293 |         net = self.mixed_3c(net)
294 |         #out['Mixed_3c'] = net
295 |         # end_point = 'MaxPool_4a_3x3'
296 |         net = self.maxpool_4a(net)
297 |         #out['MaxPool_4a_3x3'] = net
298 |         # end_point = 'Mixed_4b'
299 |         net = self.mixed_4b(net)
300 |         #out['Mixed_4b'] = net
301 |         # end_point = 'Mixed_4c'
302 |         net = self.mixed_4c(net)
303 |         #out['Mixed_4c'] = net
304 |         # end_point = 'Mixed_4d'
305 |         net = self.mixed_4d(net)
306 |         #out['Mixed_4d'] = net
307 |         # end_point = 'Mixed_4e'
308 |         net = self.mixed_4e(net)
309 |         #out['Mixed_4e'] = net
310 |         # end_point = 'Mixed_4f'
311 |         net = self.mixed_4f(net)
312 |         #out['Mixed_4f'] = net
313 |         #end_point = 'MaxPool_5a_2x2'
314 |         net = self.maxpool_5a(net)
315 |         #out['MaxPool_5a_2x2'] = net
316 |         # end_point = 'Mixed_5b'
317 |         net = self.mixed_5b(net)
318 |         #out['Mixed_5b'] = net
319 |         # end_point = 'Mixed_5c'
320 |         net = self.mixed_5c(net)
321 |         #out['Mixed_5c'] = net
322 |         #out['Avgpool'] = net
323 |         net = th.mean(net, dim=[2, 3, 4])
324 |         if mixed5c:
325 |             return net
326 |         net = self.fc(net)
327 |         #out['final'] = net
328 |         return net
329 | 


--------------------------------------------------------------------------------
/soft_dtw.py:
--------------------------------------------------------------------------------
  1 | # MIT License
  2 | #
  3 | # Copyright (c) 2020 Mehran Maghoumi
  4 | #
  5 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | # of this software and associated documentation files (the "Software"), to deal
  7 | # in the Software without restriction, including without limitation the rights
  8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | # copies of the Software, and to permit persons to whom the Software is
 10 | # furnished to do so, subject to the following conditions:
 11 | #
 12 | # The above copyright notice and this permission notice shall be included in all
 13 | # copies or substantial portions of the Software.
 14 | #
 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 21 | # SOFTWARE.
 22 | # ----------------------------------------------------------------------------------------------------------------------
 23 | 
 24 | import numpy as np
 25 | import torch
 26 | import torch.cuda
 27 | import torch.nn as nn
 28 | from numba import jit
 29 | from torch.autograd import Function
 30 | from numba import cuda
 31 | import math
 32 | 
 33 | # ----------------------------------------------------------------------------------------------------------------------
 34 | @cuda.jit
 35 | def compute_softdtw_cuda(D, gamma, bandwidth, max_i, max_j, n_passes, R):
 36 |     """
 37 |     :param seq_len: The length of the sequence (both inputs are assumed to be of the same size)
 38 |     :param n_passes: 2 * seq_len - 1 (The number of anti-diagonals)
 39 |     """
 40 |     # Each block processes one pair of examples
 41 |     b = cuda.blockIdx.x
 42 |     # We have as many threads as seq_len, because the most number of threads we need
 43 |     # is equal to the number of elements on the largest anti-diagonal
 44 |     tid = cuda.threadIdx.x
 45 | 
 46 |     # Compute I, J, the indices from [0, seq_len)
 47 | 
 48 |     # The row index is always the same as tid
 49 |     I = tid
 50 | 
 51 |     inv_gamma = 1.0 / gamma
 52 | 
 53 |     # Go over each anti-diagonal. Only process threads that fall on the current on the anti-diagonal
 54 |     for p in range(n_passes):
 55 | 
 56 |         # The index is actually 'p - tid' but need to force it in-bounds
 57 |         J = max(0, min(p - tid, max_j - 1))
 58 | 
 59 |         # For simplicity, we define i, j which start from 1 (offset from I, J)
 60 |         i = I + 1
 61 |         j = J + 1
 62 | 
 63 |         # Only compute if element[i, j] is on the current anti-diagonal, and also is within bounds
 64 |         if I + J == p and (I < max_i and J < max_j):
 65 |             # Don't compute if outside bandwidth
 66 |             if not (abs(i - j) > bandwidth > 0):
 67 |                 r0 = -R[b, i - 1, j - 1] * inv_gamma
 68 |                 r1 = -R[b, i - 1, j] * inv_gamma
 69 |                 r2 = -R[b, i, j - 1] * inv_gamma
 70 |                 rmax = max(max(r0, r1), r2)
 71 |                 rsum = math.exp(r0 - rmax) + math.exp(r1 - rmax) + math.exp(r2 - rmax)
 72 |                 softmin = -gamma * (math.log(rsum) + rmax)
 73 |                 R[b, i, j] = D[b, i - 1, j - 1] + softmin
 74 | 
 75 |         # Wait for other threads in this block
 76 |         cuda.syncthreads()
 77 | 
 78 | # ----------------------------------------------------------------------------------------------------------------------
 79 | @cuda.jit
 80 | def compute_softdtw_backward_cuda(D, R, inv_gamma, bandwidth, max_i, max_j, n_passes, E):
 81 |     k = cuda.blockIdx.x
 82 |     tid = cuda.threadIdx.x
 83 | 
 84 |     # Indexing logic is the same as above, however, the anti-diagonal needs to
 85 |     # progress backwards
 86 |     I = tid
 87 | 
 88 |     for p in range(n_passes):
 89 |         # Reverse the order to make the loop go backward
 90 |         rev_p = n_passes - p - 1
 91 | 
 92 |         # convert tid to I, J, then i, j
 93 |         J = max(0, min(rev_p - tid, max_j - 1))
 94 | 
 95 |         i = I + 1
 96 |         j = J + 1
 97 | 
 98 |         # Only compute if element[i, j] is on the current anti-diagonal, and also is within bounds
 99 |         if I + J == rev_p and (I < max_i and J < max_j):
100 | 
101 |             if math.isinf(R[k, i, j]):
102 |                 R[k, i, j] = -math.inf
103 | 
104 |             # Don't compute if outside bandwidth
105 |             if not (abs(i - j) > bandwidth > 0):
106 |                 a = math.exp((R[k, i + 1, j] - R[k, i, j] - D[k, i + 1, j]) * inv_gamma)
107 |                 b = math.exp((R[k, i, j + 1] - R[k, i, j] - D[k, i, j + 1]) * inv_gamma)
108 |                 c = math.exp((R[k, i + 1, j + 1] - R[k, i, j] - D[k, i + 1, j + 1]) * inv_gamma)
109 |                 E[k, i, j] = E[k, i + 1, j] * a + E[k, i, j + 1] * b + E[k, i + 1, j + 1] * c
110 | 
111 |         # Wait for other threads in this block
112 |         cuda.syncthreads()
113 | 
114 | # ----------------------------------------------------------------------------------------------------------------------
115 | class _SoftDTWCUDA(Function):
116 |     """
117 |     CUDA implementation is inspired by the diagonal one proposed in https://ieeexplore.ieee.org/document/8400444:
118 |     "Developing a pattern discovery method in time series data and its GPU acceleration"
119 |     """
120 | 
121 |     @staticmethod
122 |     def forward(ctx, D, gamma, bandwidth):
123 |         dev = D.device
124 |         dtype = D.dtype
125 |         gamma = torch.cuda.FloatTensor([gamma])
126 |         bandwidth = torch.cuda.FloatTensor([bandwidth])
127 | 
128 |         B = D.shape[0]
129 |         N = D.shape[1]
130 |         M = D.shape[2]
131 |         threads_per_block = max(N, M)
132 |         n_passes = 2 * threads_per_block - 1
133 | 
134 |         # Prepare the output array
135 |         R = torch.ones((B, N + 2, M + 2), device=dev, dtype=dtype) * math.inf
136 |         R[:, 0, 0] = 0
137 | 
138 |         # Run the CUDA kernel.
139 |         # Set CUDA's grid size to be equal to the batch size (every CUDA block processes one sample pair)
140 |         # Set the CUDA block size to be equal to the length of the longer sequence (equal to the size of the largest diagonal)
141 |         compute_softdtw_cuda[B, threads_per_block](cuda.as_cuda_array(D.detach()),
142 |                                                    gamma.item(), bandwidth.item(), N, M, n_passes,
143 |                                                    cuda.as_cuda_array(R))
144 |         ctx.save_for_backward(D, R, gamma, bandwidth)
145 |         return R[:, -2, -2]
146 | 
147 |     @staticmethod
148 |     def backward(ctx, grad_output):
149 |         dev = grad_output.device
150 |         dtype = grad_output.dtype
151 |         D, R, gamma, bandwidth = ctx.saved_tensors
152 | 
153 |         B = D.shape[0]
154 |         N = D.shape[1]
155 |         M = D.shape[2]
156 |         threads_per_block = max(N, M)
157 |         n_passes = 2 * threads_per_block - 1
158 | 
159 |         D_ = torch.zeros((B, N + 2, M + 2), dtype=dtype, device=dev)
160 |         D_[:, 1:N + 1, 1:M + 1] = D
161 | 
162 |         R[:, :, -1] = -math.inf
163 |         R[:, -1, :] = -math.inf
164 |         R[:, -1, -1] = R[:, -2, -2]
165 | 
166 |         E = torch.zeros((B, N + 2, M + 2), dtype=dtype, device=dev)
167 |         E[:, -1, -1] = 1
168 | 
169 |         # Grid and block sizes are set same as done above for the forward() call
170 |         compute_softdtw_backward_cuda[B, threads_per_block](cuda.as_cuda_array(D_),
171 |                                                             cuda.as_cuda_array(R),
172 |                                                             1.0 / gamma.item(), bandwidth.item(), N, M, n_passes,
173 |                                                             cuda.as_cuda_array(E))
174 |         E = E[:, 1:N + 1, 1:M + 1]
175 |         return grad_output.view(-1, 1, 1).expand_as(E) * E, None, None
176 | 
177 | 
178 | # ----------------------------------------------------------------------------------------------------------------------
179 | #
180 | # The following is the CPU implementation based on https://github.com/Sleepwalking/pytorch-softdtw
181 | # Credit goes to Kanru Hua.
182 | # I've added support for batching and pruning.
183 | #
184 | # ----------------------------------------------------------------------------------------------------------------------
185 | @jit(nopython=True)
186 | def compute_softdtw(D, gamma, bandwidth):
187 |     B = D.shape[0]
188 |     N = D.shape[1]
189 |     M = D.shape[2]
190 |     R = np.ones((B, N + 2, M + 2)) * np.inf
191 |     R[:, 0, 0] = 0
192 |     for b in range(B):
193 |         for j in range(1, M + 1):
194 |             for i in range(1, N + 1):
195 | 
196 |                 # Check the pruning condition
197 |                 if 0 < bandwidth < np.abs(i - j):
198 |                     continue
199 | 
200 |                 r0 = -R[b, i - 1, j - 1] / gamma
201 |                 r1 = -R[b, i - 1, j] / gamma
202 |                 r2 = -R[b, i, j - 1] / gamma
203 |                 rmax = max(max(r0, r1), r2)
204 |                 rsum = np.exp(r0 - rmax) + np.exp(r1 - rmax) + np.exp(r2 - rmax)
205 |                 softmin = - gamma * (np.log(rsum) + rmax)
206 |                 R[b, i, j] = D[b, i - 1, j - 1] + softmin
207 |     return R
208 | 
209 | # ----------------------------------------------------------------------------------------------------------------------
210 | @jit(nopython=True)
211 | def compute_softdtw_backward(D_, R, gamma, bandwidth):
212 |     B = D_.shape[0]
213 |     N = D_.shape[1]
214 |     M = D_.shape[2]
215 |     D = np.zeros((B, N + 2, M + 2))
216 |     E = np.zeros((B, N + 2, M + 2))
217 |     D[:, 1:N + 1, 1:M + 1] = D_
218 |     E[:, -1, -1] = 1
219 |     R[:, :, -1] = -np.inf
220 |     R[:, -1, :] = -np.inf
221 |     R[:, -1, -1] = R[:, -2, -2]
222 |     for k in range(B):
223 |         for j in range(M, 0, -1):
224 |             for i in range(N, 0, -1):
225 | 
226 |                 if np.isinf(R[k, i, j]):
227 |                     R[k, i, j] = -np.inf
228 | 
229 |                 # Check the pruning condition
230 |                 if 0 < bandwidth < np.abs(i - j):
231 |                     continue
232 | 
233 |                 a0 = (R[k, i + 1, j] - R[k, i, j] - D[k, i + 1, j]) / gamma
234 |                 b0 = (R[k, i, j + 1] - R[k, i, j] - D[k, i, j + 1]) / gamma
235 |                 c0 = (R[k, i + 1, j + 1] - R[k, i, j] - D[k, i + 1, j + 1]) / gamma
236 |                 a = np.exp(a0)
237 |                 b = np.exp(b0)
238 |                 c = np.exp(c0)
239 |                 E[k, i, j] = E[k, i + 1, j] * a + E[k, i, j + 1] * b + E[k, i + 1, j + 1] * c
240 |     return E[:, 1:N + 1, 1:M + 1]
241 | 
242 | # ----------------------------------------------------------------------------------------------------------------------
243 | class _SoftDTW(Function):
244 |     """
245 |     CPU implementation based on https://github.com/Sleepwalking/pytorch-softdtw
246 |     """
247 | 
248 |     @staticmethod
249 |     def forward(ctx, D, gamma, bandwidth):
250 |         dev = D.device
251 |         dtype = D.dtype
252 |         gamma = torch.Tensor([gamma]).to(dev).type(dtype)  # dtype fixed
253 |         bandwidth = torch.Tensor([bandwidth]).to(dev).type(dtype)
254 |         D_ = D.detach().cpu().numpy()
255 |         g_ = gamma.item()
256 |         b_ = bandwidth.item()
257 |         R = torch.Tensor(compute_softdtw(D_, g_, b_)).to(dev).type(dtype)
258 |         ctx.save_for_backward(D, R, gamma, bandwidth)
259 |         return R[:, -2, -2]
260 | 
261 |     @staticmethod
262 |     def backward(ctx, grad_output):
263 |         dev = grad_output.device
264 |         dtype = grad_output.dtype
265 |         D, R, gamma, bandwidth = ctx.saved_tensors
266 |         D_ = D.detach().cpu().numpy()
267 |         R_ = R.detach().cpu().numpy()
268 |         g_ = gamma.item()
269 |         b_ = bandwidth.item()
270 |         E = torch.Tensor(compute_softdtw_backward(D_, R_, g_, b_)).to(dev).type(dtype)
271 |         return grad_output.view(-1, 1, 1).expand_as(E) * E, None, None
272 | 
273 | # ----------------------------------------------------------------------------------------------------------------------
274 | class SoftDTW(torch.nn.Module):
275 |     """
276 |     The soft DTW implementation that optionally supports CUDA
277 |     """
278 | 
279 |     def __init__(self, use_cuda, gamma=1.0, normalize=False, bandwidth=None, dist_func=None):
280 |         """
281 |         Initializes a new instance using the supplied parameters
282 |         :param use_cuda: Flag indicating whether the CUDA implementation should be used
283 |         :param gamma: sDTW's gamma parameter
284 |         :param normalize: Flag indicating whether to perform normalization
285 |                           (as discussed in https://github.com/mblondel/soft-dtw/issues/10#issuecomment-383564790)
286 |         :param bandwidth: Sakoe-Chiba bandwidth for pruning. Passing 'None' will disable pruning.
287 |         :param dist_func: Optional point-wise distance function to use. If 'None', then a default Euclidean distance function will be used.
288 |         """
289 |         super(SoftDTW, self).__init__()
290 |         self.normalize = normalize
291 |         self.gamma = gamma
292 |         self.bandwidth = 0 if bandwidth is None else float(bandwidth)
293 |         self.use_cuda = use_cuda
294 | 
295 |         # Set the distance function
296 |         # Set the distance function
297 |         if dist_func == 'cosine':
298 |             self.dist_func = SoftDTW._cosine_dist_func
299 |         elif dist_func == 'negative_cosine':
300 |             self.dist_func = SoftDTW._negative_cosine_dist_func
301 |         elif dist_func == 'negative_dot':
302 |             self.dist_func = SoftDTW._negative_dot_product
303 |         elif dist_func == 'euclidean':
304 |             self.dist_func = SoftDTW._euclidean_dist_func
305 | 
306 |     def _get_func_dtw(self, x, y):
307 |         """
308 |         Checks the inputs and selects the proper implementation to use.
309 |         """
310 |         bx, lx, dx = x.shape
311 |         by, ly, dy = y.shape
312 |         # Make sure the dimensions match
313 |         assert bx == by  # Equal batch sizes
314 |         assert dx == dy  # Equal feature dimensions
315 | 
316 |         use_cuda = self.use_cuda
317 | 
318 |         if use_cuda and (lx > 1024 or ly > 1024):  # We should be able to spawn enough threads in CUDA
319 |                 print("SoftDTW: Cannot use CUDA because the sequence length > 1024 (the maximum block size supported by CUDA)")
320 |                 use_cuda = False
321 | 
322 |         # Finally, return the correct function
323 |         return _SoftDTWCUDA.apply if use_cuda else _SoftDTW.apply
324 | 
325 |     @staticmethod
326 |     def _euclidean_dist_func(x, y):
327 |         """
328 |         Calculates the Euclidean distance between each element in x and y per timestep
329 |         """
330 |         n = x.size(1)
331 |         m = y.size(1)
332 |         d = x.size(2)
333 |         x = x.unsqueeze(2).expand(-1, n, m, d)
334 |         y = y.unsqueeze(1).expand(-1, n, m, d)
335 |         return torch.exp(torch.sqrt(torch.pow(x - y, 2).sum(3)))
336 | 
337 |     @staticmethod
338 |     def _cosine_dist_func(x, y):
339 |         """
340 |         Calculates the Cosine distance between each element in x and y per timestep
341 |         """
342 |         n = x.size(1)
343 |         m = y.size(1)
344 |         d = x.size(2)
345 |         x = x.unsqueeze(2).expand(-1, n, m, d)
346 |         y = y.unsqueeze(1).expand(-1, n, m, d)
347 |         distance = 1 - torch.nn.functional.cosine_similarity(x, y, dim=3)
348 |         return torch.exp(distance)
349 |     
350 |     @staticmethod
351 |     def _negative_dot_product(x, y):
352 |         z = torch.matmul(x, y.transpose(1, 2))
353 | 
354 |         n = z.shape[2]
355 |         a1 = torch.ones((z.shape[0], z.shape[1] + 1, z.shape[2] + 1)).cuda() * math.inf
356 |         a2 = torch.ones((z.shape[0], z.shape[1] + 1, z.shape[2] + 1)).cuda() * math.inf
357 |         a3 = torch.ones((z.shape[0], z.shape[1] + 1, z.shape[2] + 1)).cuda() * math.inf
358 |         a1[:, :n, 1:n+1] = -z
359 |         a2[:, 1:n+1, :n] = -z
360 |         a3[:, 1:n+1, 1:n+1] = -z
361 |         a1[:, 0, 0] = 0
362 |         a2[:, 0, 0] = 0
363 |         a3[:, 0, 0] = 0
364 |         gamma = 1e-1
365 |         D = -z - gamma * torch.log(torch.exp(-a1 / gamma) + torch.exp(-a2 / gamma) + torch.exp(-a3 / gamma))[:, :n, :n]
366 | 
367 |         threshold = 0.5
368 |         b, m, n = z.shape[0], z.shape[1], z.shape[2]
369 |         D = torch.cat((D, torch.ones_like(z) * threshold), dim=2)
370 |         D = D.reshape(b, 2 * m, n)
371 |         D = torch.cat((torch.ones(b, 1, n, dtype=z.dtype).to(z.device) * threshold, D), dim=1)
372 |         D = torch.cat((D, torch.ones_like(D) * threshold), dim=1)
373 |         D = D.transpose(1, 2).reshape(b, 2 * m, 2 * n + 1).transpose(1, 2)
374 |         D = torch.cat((torch.ones(b, 2 * m + 1, 1, dtype=z.dtype).to(z.device) * threshold, D), dim=2)
375 |         return D
376 | 
377 |     def forward(self, X, Y):
378 |         """
379 |         Compute the soft-DTW value between X and Y
380 |         :param X: One batch of examples, batch_size x seq_len x dims
381 |         :param Y: The other batch of examples, batch_size x seq_len x dims
382 |         :return: The computed results
383 |         """
384 | 
385 |         # Check the inputs and get the correct implementation
386 |         func_dtw = self._get_func_dtw(X, Y)
387 | 
388 |         if self.normalize:
389 |             # Stack everything up and run
390 |             x = torch.cat([X, X, Y])
391 |             y = torch.cat([Y, X, Y])
392 |             D = self.dist_func(x, y)
393 |             out = func_dtw(D, self.gamma, self.bandwidth)
394 |             out_xy, out_xx, out_yy = torch.split(out, X.shape[0])
395 |             return out_xy - 1 / 2 * (out_xx + out_yy)
396 |         else:
397 |             D_xy = self.dist_func(X, Y)
398 |             return func_dtw(D_xy, self.gamma, self.bandwidth)
399 | 
400 | # ----------------------------------------------------------------------------------------------------------------------
401 | def timed_run(a, b, sdtw):
402 |     """
403 |     Runs a and b through sdtw, and times the forward and backward passes.
404 |     Assumes that a requires gradients.
405 |     :return: timing, forward result, backward result
406 |     """
407 |     from timeit import default_timer as timer
408 | 
409 |     # Forward pass
410 |     start = timer()
411 |     forward = sdtw(a, b)
412 |     end = timer()
413 |     t = end - start
414 | 
415 |     grad_outputs = torch.ones_like(forward)
416 | 
417 |     # Backward
418 |     start = timer()
419 |     grads = torch.autograd.grad(forward, a, grad_outputs=grad_outputs)[0]
420 |     end = timer()
421 | 
422 |     # Total time
423 |     t += end - start
424 | 
425 |     return t, forward, grads
426 | 
427 | # ----------------------------------------------------------------------------------------------------------------------
428 | def profile(batch_size, seq_len_a, seq_len_b, dims, tol_backward):
429 |     sdtw = SoftDTW(False, gamma=1.0, normalize=False)
430 |     sdtw_cuda = SoftDTW(True, gamma=1.0, normalize=False)
431 |     n_iters = 6
432 | 
433 |     print("Profiling forward() + backward() times for batch_size={}, seq_len_a={}, seq_len_b={}, dims={}...".format(batch_size, seq_len_a, seq_len_b, dims))
434 | 
435 |     times_cpu = []
436 |     times_gpu = []
437 | 
438 |     for i in range(n_iters):
439 |         a_cpu = torch.rand((batch_size, seq_len_a, dims), requires_grad=True)
440 |         b_cpu = torch.rand((batch_size, seq_len_b, dims))
441 |         a_gpu = a_cpu.cuda()
442 |         b_gpu = b_cpu.cuda()
443 | 
444 |         # GPU
445 |         t_gpu, forward_gpu, backward_gpu = timed_run(a_gpu, b_gpu, sdtw_cuda)
446 | 
447 |         # CPU
448 |         t_cpu, forward_cpu, backward_cpu = timed_run(a_cpu, b_cpu, sdtw)
449 | 
450 |         # Verify the results.
451 |         assert torch.allclose(forward_cpu, forward_gpu.cpu())
452 |         assert torch.allclose(backward_cpu, backward_gpu.cpu(), atol=tol_backward)
453 |         print(backward_gpu.shape)
454 |         if i > 0:  # Ignore the first time we run, in case this is a cold start (because timings are off at a cold start of the script)
455 |             times_cpu += [t_cpu]
456 |             times_gpu += [t_gpu]
457 | 
458 |     # Average and log
459 |     avg_cpu = np.mean(times_cpu)
460 |     avg_gpu = np.mean(times_gpu)
461 |     print("\tCPU:     ", avg_cpu)
462 |     print("\tGPU:     ", avg_gpu)
463 |     print("\tSpeedup: ", avg_cpu / avg_gpu)
464 |     print()
465 | 
466 | # ----------------------------------------------------------------------------------------------------------------------
467 | if __name__ == "__main__":
468 |     from timeit import default_timer as timer
469 | 
470 |     torch.manual_seed(1234)
471 | 
472 |     # profile(128, 17, 15, 2, tol_backward=1e-6)
473 |     # profile(512, 64, 64, 2, tol_backward=1e-4)
474 |     # profile(512, 256, 256, 2, tol_backward=1e-3)
475 |     profile(32, 256, 256, 512, tol_backward=1e-1)
476 | 


--------------------------------------------------------------------------------
/data/msrvtt_test.csv:
--------------------------------------------------------------------------------
   1 | key,vid_key,video_id,sentence
   2 | ret0,msr9770,video9770,a person is connecting something to system
   3 | ret1,msr9771,video9771,a little girl does gymnastics
   4 | ret2,msr7020,video7020,a woman creating a fondant baby and flower
   5 | ret3,msr9773,video9773,a boy plays grand theft auto 5
   6 | ret4,msr7026,video7026,a man is giving a review on a vehicle
   7 | ret5,msr9775,video9775,a man speaks to children in a classroom
   8 | ret6,msr9776,video9776,one micky mouse is talking to other
   9 | ret7,msr7025,video7025,a naked child runs through a field
  10 | ret8,msr9778,video9778,a little boy singing in front of judges and crowd
  11 | ret9,msr9779,video9779,fireworks are being lit and exploding in a night sky
  12 | ret10,msr7028,video7028,a man is singing and standing in the road
  13 | ret11,msr7029,video7029,cartoon show for kids
  14 | ret12,msr9772,video9772,some cartoon characters are moving around an area
  15 | ret13,msr7021,video7021,baseball player hits ball
  16 | ret14,msr9774,video9774,a rocket is lauching into a blue sky smoke is emerging from the base of the rocket
  17 | ret15,msr7027,video7027,the man in the video is showing a brief viewing of how the movie is starting
  18 | ret16,msr9731,video9731,a woman is mixing food in a mixing bowl
  19 | ret17,msr7024,video7024,little pet shop cat getting a bath and washed with little brush
  20 | ret18,msr9777,video9777,a student explains to his teacher about the sheep of another student
  21 | ret19,msr8913,video8913,a video about different sports
  22 | ret20,msr8912,video8912,a family is having coversation
  23 | ret21,msr8911,video8911,a computer generated cartoon figure operates a control panel while another character sleeps in the background
  24 | ret22,msr8910,video8910,adding ingredients to a pizza
  25 | ret23,msr8917,video8917,two men discuss social issues
  26 | ret24,msr8916,video8916,cartoons of a sponge a squid and a starfish
  27 | ret25,msr8915,video8915,person cooking up somefood
  28 | ret26,msr8914,video8914,models are walking down a short runway
  29 | ret27,msr8919,video8919,a man is talking on stage
  30 | ret28,msr8918,video8918,a hairdresser and client speak to each other with kid voices
  31 | ret29,msr9545,video9545,some one talking about top ten movies of the year
  32 | ret30,msr7704,video7704,a man with a very red nose
  33 | ret31,msr7118,video7118,a young girl in a horror movie is haunted
  34 | ret32,msr7119,video7119,news reporters talk about a strange sight in part of san diego
  35 | ret33,msr7116,video7116,a group of people are walking a woman is talking about their culture
  36 | ret34,msr7117,video7117,people are singing on the beach
  37 | ret35,msr7114,video7114,a girl and a man are talking to each other
  38 | ret36,msr7115,video7115,a man is showing off a new vehicle
  39 | ret37,msr7112,video7112,while other friends too try and hitting the basket another is eager to achieve his fourth successful basket in basketball
  40 | ret38,msr7113,video7113,a child in pink watches a white bird in an open box
  41 | ret39,msr7110,video7110,there is a man shooting other people in a corridor
  42 | ret40,msr9542,video9542,sports vine clips of football
  43 | ret41,msr9679,video9679,the demonstration of mobile rack
  44 | ret42,msr8978,video8978,band performing a hard rock song about diamonds in the sky
  45 | ret43,msr8464,video8464,the ground rules of the republican presidential debate are introduced
  46 | ret44,msr7701,video7701,cartoon cars smiling talking and driving down a city road
  47 | ret45,msr7438,video7438,the articles of a shop is being shown
  48 | ret46,msr8899,video8899,some people cross a street in a busy road
  49 | ret47,msr8895,video8895,a man and a woman are singing on the beach
  50 | ret48,msr7431,video7431,a man talks about the publication of a nasa technical report
  51 | ret49,msr9549,video9549,video of a computer program while someone navigates through its menus
  52 | ret50,msr8829,video8829,two men talk about stealing land and the bible
  53 | ret51,msr8828,video8828,tv show presenters speak about will smith and other actors and their respective characters
  54 | ret52,msr8827,video8827,cartoon girl is talking
  55 | ret53,msr8826,video8826,lady speaking on her show
  56 | ret54,msr8825,video8825,a woman holds up a leather jacket
  57 | ret55,msr9548,video9548,a reporter asks questions at a gun range
  58 | ret56,msr8823,video8823,men are doing wrestling
  59 | ret57,msr8822,video8822,newscasters speak about a school shooting on the news program info wars
  60 | ret58,msr8821,video8821,two men and a women are sitting on the chair
  61 | ret59,msr8820,video8820,a person comes up in the hill on a orange motor bike and falls down
  62 | ret60,msr9518,video9518,people perform on the voice
  63 | ret61,msr9519,video9519,boy band performs for a crowd
  64 | ret62,msr7432,video7432,a person on the computer looking at data
  65 | ret63,msr9503,video9503,a woman is talking about how jeans with patches or rips is trendy
  66 | ret64,msr9510,video9510,pokemon video game play
  67 | ret65,msr7751,video7751,two women rare sitting speaking on sites in los angeles
  68 | ret66,msr7752,video7752,a man speaks on driving and a car model
  69 | ret67,msr9513,video9513,a woman is smiling
  70 | ret68,msr7754,video7754,this is a video of a live tv show
  71 | ret69,msr9515,video9515,a boy is singing
  72 | ret70,msr9516,video9516,people are cheering at a stadium
  73 | ret71,msr9517,video9517,a news program about overweight people
  74 | ret72,msr9677,video9677,a guy wearing a red shirt drives a car while talking
  75 | ret73,msr7436,video7436,a man gets shot in the face
  76 | ret74,msr7822,video7822,many ladies and men are walking upstairs inside of a caged fence
  77 | ret75,msr9204,video9204,a news reporter talks about a shooting
  78 | ret76,msr9205,video9205,a woman interviewing about her part in a protest happening in brazil
  79 | ret77,msr9206,video9206,a man punches a faucet to show how much better bottled water is
  80 | ret78,msr9207,video9207,a lady is put makeup especially around her eyes
  81 | ret79,msr9200,video9200,animals are communicating with one another through thought bubbles in three different scenes from a video game
  82 | ret80,msr9201,video9201,a trailer for an upcoming movie with people on a beach
  83 | ret81,msr9202,video9202,a girl singing song on stage
  84 | ret82,msr9203,video9203,the man is giving an informational speech to a group of people about telling someone something
  85 | ret83,msr8690,video8690,a blonde woman is speaking and then a radio button to subscribe to her webcasts appears
  86 | ret84,msr9208,video9208,a man is showing the foot well under the door jam area of the car
  87 | ret85,msr9209,video9209,the girl shoots her nerf bow
  88 | ret86,msr7753,video7753,a man is giving his commentary on a current event television show
  89 | ret87,msr9647,video9647,sports are being played
  90 | ret88,msr7669,video7669,three woman doing a fashion show to music
  91 | ret89,msr9349,video9349,guy showing how to make items
  92 | ret90,msr9348,video9348,women walking on the stage with different styles of dressing in a fashion show
  93 | ret91,msr9341,video9341,the press would rather interview the most ignorant person they can find than beautiful women
  94 | ret92,msr9340,video9340,one sad one trying to comfort
  95 | ret93,msr9343,video9343,woman is slicing the vegetables and decorating the meal
  96 | ret94,msr9342,video9342,the water safety teams arrives with the safety devices and water bike to save a person who had been drifted away
  97 | ret95,msr9345,video9345,cartoon birds are flying
  98 | ret96,msr9344,video9344,gameplay footage of someone playing a game
  99 | ret97,msr9347,video9347,a woman in red dress explaining about cushion seat
 100 | ret98,msr9346,video9346,a woman giving a photoshop tutorial
 101 | ret99,msr7826,video7826,a girl is preparing potato ball and explains the recipe
 102 | ret100,msr9885,video9885,a movie director talking to the media men in press conference regarding his movie and hero also
 103 | ret101,msr9882,video9882,a news reader is reading the news and asking question to some people
 104 | ret102,msr8578,video8578,a man is commentating while playing minecraft
 105 | ret103,msr7827,video7827,people are walking down a street holding signs
 106 | ret104,msr7035,video7035,there is someone playing a game in a computer
 107 | ret105,msr9581,video9581,serene music plays over scenery of mountains while a woman speaks
 108 | ret106,msr7034,video7034,man in black shirt is holding a baby upside down and talking about universal studios
 109 | ret107,msr9580,video9580,the animals are having nice time together and eating food
 110 | ret108,msr8573,video8573,a video of a rock group performing one of their songs
 111 | ret109,msr8572,video8572,a man grabs at snakes and throws them around the room
 112 | ret110,msr8346,video8346,a man talks about a war between two generals one of which became king
 113 | ret111,msr8347,video8347,a video of someone talking about some girls
 114 | ret112,msr8344,video8344,a man is driving a car through the countryside
 115 | ret113,msr8345,video8345,a woman feeds another
 116 | ret114,msr8342,video8342,girl in pink dress fashion model walking in ramp
 117 | ret115,msr8343,video8343,a man is holding a coffee mug
 118 | ret116,msr8340,video8340,a man and a woman stand in a bedroom
 119 | ret117,msr8341,video8341,cartoon characters are talking to a pokemon
 120 | ret118,msr7825,video7825,sitting and converstion 2 lady and 2 gents
 121 | ret119,msr8348,video8348,a woman looks after abandoned children for free in her home
 122 | ret120,msr8349,video8349,friends enjoy eating
 123 | ret121,msr9589,video9589,a woman plays instruments in a field
 124 | ret122,msr9837,video9837,video game footage of a killing spree during call of duty black ops
 125 | ret123,msr9836,video9836,a woman is stirring food
 126 | ret124,msr9835,video9835,the women sit at the lap top and talk to one another
 127 | ret125,msr8851,video8851,a group of indian guys meeting on a bridge while it s raining
 128 | ret126,msr9833,video9833,airport security figure explains requirements for transporting medicines
 129 | ret127,msr9832,video9832,a video game character rides around on a motorcycle
 130 | ret128,msr9831,video9831,a group of people are swimming in a boat a monkey is walking on the tree
 131 | ret129,msr9830,video9830,some people was swimming under the waterit is very good job
 132 | ret130,msr9839,video9839,an animated horse is in a barn and the maker asks for comments
 133 | ret131,msr9838,video9838,a cartoon clip of pokemon dancing
 134 | ret132,msr7111,video7111,it is the animation cartoon
 135 | ret133,msr8089,video8089,a police officer drives his white car onto a grassy field and then back on to the street
 136 | ret134,msr9728,video9728,a man talking about kuru disease
 137 | ret135,msr8122,video8122,a fearful animation scene
 138 | ret136,msr8123,video8123,a woman is giving demo for baby trolley
 139 | ret137,msr8120,video8120,a guest is speaking with a television show host
 140 | ret138,msr8121,video8121,there was a resistor in the back
 141 | ret139,msr7839,video7839,this is a vine sports compilation
 142 | ret140,msr7838,video7838,a guy is talking over loud music at the end of a youtube video
 143 | ret141,msr8124,video8124,basketball players making a shot in the last seven seconds
 144 | ret142,msr8125,video8125,a man in a flying contraption crashes in a field
 145 | ret143,msr7835,video7835,two cartoon characters walking underwater
 146 | ret144,msr7834,video7834,a man in striped collared shirt discusses jobs in news room of bloomberg
 147 | ret145,msr8128,video8128,a cartoon character prepares to ride a bicycle
 148 | ret146,msr7836,video7836,a girl with a hat on and dancing
 149 | ret147,msr7831,video7831,a young girl is abusing a young man
 150 | ret148,msr7830,video7830,this is about a young boy s photo shoot
 151 | ret149,msr7833,video7833,a man is talking about business
 152 | ret150,msr7832,video7832,a man and woman looking at each other on the subway
 153 | ret151,msr7506,video7506,a video game car is driving recklessly
 154 | ret152,msr7501,video7501,will smith has starred in a lot of movies including the movie ali
 155 | ret153,msr8423,video8423,a female inside a white themed bathroom while someone else makes her makeup
 156 | ret154,msr7500,video7500,a soccer team walking out on the field
 157 | ret155,msr7233,video7233,views of two persons working on the super computer with the head phones on
 158 | ret156,msr7232,video7232,the sky roads game on the computer
 159 | ret157,msr7231,video7231,a woman introducing someone
 160 | ret158,msr7230,video7230,a woman is talking about a baby stroller
 161 | ret159,msr7237,video7237,there is a man repairing a product on the table
 162 | ret160,msr7236,video7236,person walking around building
 163 | ret161,msr7235,video7235,cartoon of a squid on a bike looking up at a treehouse
 164 | ret162,msr7234,video7234,a woman flanked by two men are with a discussion
 165 | ret163,msr8650,video8650,a news convention is held
 166 | ret164,msr8651,video8651,a man talks about cars
 167 | ret165,msr8652,video8652,a man runs into the crowd when trying to catch a basketball
 168 | ret166,msr7502,video7502,a narrator explains where to find a rare vehicle in grand theft auto
 169 | ret167,msr8654,video8654,a woman cooking an orange substance
 170 | ret168,msr8655,video8655,an animal is throwing a piece of junk
 171 | ret169,msr8656,video8656,she used an electric blender
 172 | ret170,msr8657,video8657,a man rides his motorcycle to a building
 173 | ret171,msr7898,video7898,a salad in a bowl is being filmed on a table
 174 | ret172,msr9901,video9901,a man with glasses and a goatee talking about his former job
 175 | ret173,msr8459,video8459,mario and friends play in a video game together
 176 | ret174,msr8514,video8514,a person is putting the vegetable in to the water and boil it
 177 | ret175,msr7909,video7909,a woman holding a ribbon
 178 | ret176,msr8456,video8456,a man talks to someone and also the camera
 179 | ret177,msr8018,video8018,a young girl shopping
 180 | ret178,msr8457,video8457,a man rowing a kayak is shown in slow motion
 181 | ret179,msr8016,video8016,a dog and a cat are in a standoff
 182 | ret180,msr7900,video7900,a man talks about dna force
 183 | ret181,msr8014,video8014,a group of people are singing while holding coke in their hands
 184 | ret182,msr7902,video7902,a baby playing with a cats tail
 185 | ret183,msr8012,video8012,a woman jumps over a bar and attacks a man
 186 | ret184,msr8013,video8013,a video showing footage from sporting events
 187 | ret185,msr8010,video8010,lego stormtroppers are in a facility
 188 | ret186,msr8011,video8011,a girl talks about photos and her life
 189 | ret187,msr7890,video7890,person is talking about the big growth in the cities
 190 | ret188,msr7589,video7589,a man getting interviewed by a beach
 191 | ret189,msr7588,video7588,a man showing his finished product of a wood floor in his home
 192 | ret190,msr9745,video9745,a man is wearing a cap
 193 | ret191,msr9744,video9744,this is a live tv show
 194 | ret192,msr7587,video7587,two men in a wrestling competition
 195 | ret193,msr7586,video7586,woman playing instruments in a field for a music video
 196 | ret194,msr7581,video7581,lyrics are written for the song
 197 | ret195,msr7580,video7580,a woman speaking about scifi and fantasy disasters
 198 | ret196,msr7583,video7583,a princess tries not to cry in front of malificent
 199 | ret197,msr9742,video9742,the actor playing thor talking about the new movie
 200 | ret198,msr9909,video9909,a crowd of people sitting next to each other as one man plays a video game
 201 | ret199,msr7349,video7349,a grey haired man interviews someone else
 202 | ret200,msr7348,video7348,a girl is talking about relationships
 203 | ret201,msr7347,video7347,a woman is explaining how to do something on a computer
 204 | ret202,msr7346,video7346,some people video conferencing as they watch a movie
 205 | ret203,msr7345,video7345,a man is talking about making it easier for kids to learn while scenes of a school are shown
 206 | ret204,msr9908,video9908,a woman talking about education
 207 | ret205,msr7343,video7343,a guy fixing up another car
 208 | ret206,msr7342,video7342,opening of a nest a rate is coming out and searching something it eats something on a human hand
 209 | ret207,msr7341,video7341,in an interview a person in advocating education among the populace
 210 | ret208,msr7340,video7340,a man is talking about his car s features while inside his car
 211 | ret209,msr9403,video9403,i see a prince trying to get a girl and i see peter pan play around
 212 | ret210,msr8426,video8426,cabins on a sandy beach have walkways going up to their porches
 213 | ret211,msr9622,video9622,dogs are walking across the road in a video
 214 | ret212,msr8900,video8900,a woman bakes and decorates a cake
 215 | ret213,msr8901,video8901,men pushing a car down assembly line
 216 | ret214,msr8902,video8902,a man is looking out a window to look at another man who jumped from the window to his death
 217 | ret215,msr8903,video8903,there are crocodiles about to eat the group of people walking across the water
 218 | ret216,msr8904,video8904,a women is in an make up room telling about here itenary time wise
 219 | ret217,msr8905,video8905,two man s are talking to each other
 220 | ret218,msr8906,video8906,bbc news story about military crackdown in an unknown asian country
 221 | ret219,msr8907,video8907,scene of thor from the avengers
 222 | ret220,msr8908,video8908,a white man in a suit talking in front of a tv about logic and law
 223 | ret221,msr8909,video8909,list of people s involved in making the documentary
 224 | ret222,msr8835,video8835,a woman serves a bowl of soup woth stuff in it
 225 | ret223,msr7699,video7699,a man with head band is demonstrating how to play ping pong
 226 | ret224,msr7698,video7698,two women are walking in a parking lot
 227 | ret225,msr7169,video7169,a female nurse washes her hands and then cleans off a surface
 228 | ret226,msr7168,video7168,he is playing with ball
 229 | ret227,msr7163,video7163,a young man is touching a young girls back
 230 | ret228,msr7162,video7162,woman talking to a man in an interview
 231 | ret229,msr7693,video7693,this is a jigsaw puzzle video
 232 | ret230,msr7160,video7160,a cartoon character falls asleep on a couch
 233 | ret231,msr7695,video7695,a man is stirring something in a pot
 234 | ret232,msr7166,video7166,a person is explaining something
 235 | ret233,msr7165,video7165,a cook prepares food items in a metal bowl
 236 | ret234,msr7164,video7164,a computer animation using the source engine
 237 | ret235,msr9699,video9699,a man explains how to do a experiment
 238 | ret236,msr9698,video9698,a women is talking about the books she likes and the second favourite one is the amc the walking dead
 239 | ret237,msr9693,video9693,the chef adds fish sauce and fish paste to a large stainless steel cooking pot
 240 | ret238,msr9692,video9692,a man in a suit is talking on a television economy program
 241 | ret239,msr9691,video9691,a commercial for the mazda 3 the card sliding around a corner
 242 | ret240,msr9690,video9690,a man explains the condition of someone in the hospital to the press outside of a building
 243 | ret241,msr9697,video9697,the woman has a baby monitor
 244 | ret242,msr9696,video9696,a man folds up a stroller
 245 | ret243,msr9695,video9695,text explains about a pokemon expisode that caused seizures
 246 | ret244,msr9694,video9694,a guy barbequeing potatoes
 247 | ret245,msr7765,video7765,a person is discussing a car
 248 | ret246,msr9524,video9524,a bunch of cartoon faces are chomping their teeth and making eating gestures
 249 | ret247,msr7767,video7767,a slideshow with captions
 250 | ret248,msr7766,video7766,a video gamer is seen as he plays a video game
 251 | ret249,msr9521,video9521,a person is passing by a jolly group of men
 252 | ret250,msr8839,video8839,spongebob is showing memories of him with mr
 253 | ret251,msr7763,video7763,people are playing basketball
 254 | ret252,msr9522,video9522,a man is giving a presentation on stage
 255 | ret253,msr8834,video8834,a man is cooking in the kitchen he states he will return later to add to the dish
 256 | ret254,msr8056,video8056,fried potatoes are being eaten
 257 | ret255,msr8836,video8836,a woman is cooking food and a man is setting a table
 258 | ret256,msr8837,video8837,a young girl petting a dog that is laying on a couch
 259 | ret257,msr8830,video8830,this is a video of a confrence
 260 | ret258,msr7768,video7768,a 3d animation of a cabinet with plates
 261 | ret259,msr8832,video8832,a girl messaging her friend
 262 | ret260,msr8057,video8057,a women preparing a duck to roast
 263 | ret261,msr7567,video7567,a man checks out detail on a car
 264 | ret262,msr7566,video7566,a man explain why he uses edible blooms for gifts
 265 | ret263,msr7565,video7565,in a kitchen a woman is chopping tomatoes in a food processor
 266 | ret264,msr7564,video7564,a person juggling sticks on the summit of a snowy mountain
 267 | ret265,msr7563,video7563,a man is talking about appliances
 268 | ret266,msr7562,video7562,article about nasa s johnson space center being broadcasted on news
 269 | ret267,msr7561,video7561,playing with toy kitchen accessories
 270 | ret268,msr7560,video7560,person straightening out string of magnets and wrapping them around his finger
 271 | ret269,msr7569,video7569,a woman is singing
 272 | ret270,msr7568,video7568,the queen of england is seen walking with an entourage including a few islamic women a woman narrates how the queen created an outfit similar to the women in islam
 273 | ret271,msr9404,video9404,a lady talks into a megaphone
 274 | ret272,msr9428,video9428,presentation of the machine and its functions while working
 275 | ret273,msr7947,video7947,a news program with a woman interviewing a man about merchant market currencies
 276 | ret274,msr8831,video8831,many persons walking inside shoping mall hall and computer shop selling displaying on screen
 277 | ret275,msr7946,video7946,the man sitting in the black chair and wearing brown suit is talking
 278 | ret276,msr9131,video9131,two people wear and touch their masks made to resemble a camera cube with centered black lens one of which is covered in colorful flowers
 279 | ret277,msr9405,video9405,red balloons float in the sky and have packages tied to them
 280 | ret278,msr9231,video9231,few barbie dolls are playing one doll puts shoe to other barbie these are used to play by a kid
 281 | ret279,msr9230,video9230,a man talks to a little boy about not letting people convince him that anything he wants to do in life isn t possible
 282 | ret280,msr9233,video9233,a man is giving a speech
 283 | ret281,msr9232,video9232,a video game woman character runs down a hill followed by a dog running down the hill they both then run up the hill together
 284 | ret282,msr9235,video9235,men are loading their guns with ammunition
 285 | ret283,msr9234,video9234,two men walking and talking about the road
 286 | ret284,msr9237,video9237,preview for the movie insidious
 287 | ret285,msr9236,video9236,a man examining the length of a person s arm when opening a glove compartment
 288 | ret286,msr9239,video9239,a man kicks at a bull
 289 | ret287,msr9238,video9238,a man is singing and dancing in an elevator while people watch
 290 | ret288,msr8948,video8948,there is a man is talking with a commando
 291 | ret289,msr8119,video8119,guy explaining what stiff person syndrome is
 292 | ret290,msr9657,video9657,a monkey and a man feeding monkey with hand displaying on screen
 293 | ret291,msr8118,video8118,women of a foreign nation comb their hair and perform in traditional costumes
 294 | ret292,msr8949,video8949,woman with swimming suit is plunging into the water and gets rid of the suit
 295 | ret293,msr9975,video9975,a yellow sports car with a guy speaking about the car
 296 | ret294,msr9356,video9356,someone is arranging utensilslarge saucepan and spoons as a preparation to cook something
 297 | ret295,msr9357,video9357,a woman laughs until she chokes
 298 | ret296,msr9354,video9354,a red truck is burning while three men talk about a car
 299 | ret297,msr9355,video9355,a person describing a recording of a video game
 300 | ret298,msr9352,video9352,a man is talking to a crowd
 301 | ret299,msr9353,video9353,two women are outside and are discussing something in a foreign language
 302 | ret300,msr9350,video9350,screen showing some people talking
 303 | ret301,msr9351,video9351,a man is singing on stage to a huge audience he is holding a microphone
 304 | ret302,msr8111,video8111,people are using a computer software tool
 305 | ret303,msr9358,video9358,someone is talking about a car
 306 | ret304,msr9359,video9359,pictures of a beach while a voice talks about a shark attack
 307 | ret305,msr8110,video8110,a cartoon woman cries at a bench while a woman in blue appears
 308 | ret306,msr7741,video7741,a cartoon man in sunglasses waves at the crowd and smiles
 309 | ret307,msr8116,video8116,a dad plays video games with his son
 310 | ret308,msr7837,video7837,a person walks down a staircase surrounded by greenery and other foliage
 311 | ret309,msr8620,video8620,a woman is making a hair accessory
 312 | ret310,msr7468,video7468,a man jumps onto a ledge of a building
 313 | ret311,msr9409,video9409,a bowl of shrimp green onions and other assorted ingredients in a soup broth
 314 | ret312,msr9032,video9032,a hospital mortuary room and a doctor treat the special case
 315 | ret313,msr8622,video8622,a man has tried to solve a rubik s cube
 316 | ret314,msr7747,video7747,this is a scene from a disney cartoon set in ancient times
 317 | ret315,msr8824,video8824,a talk show with dr
 318 | ret316,msr8626,video8626,numbers are displayed on the screen
 319 | ret317,msr7462,video7462,he drew a beautiful picture
 320 | ret318,msr9623,video9623,a man is on a cell phone while people are fighting
 321 | ret319,msr9620,video9620,outer space pictures that have parts of equipments in them with water droplets on it
 322 | ret320,msr8420,video8420,a man sits in a large black truck adjusting mirror
 323 | ret321,msr8945,video8945,two men stand on a platform suspended high above the city
 324 | ret322,msr9451,video9451,kids feeding and playing with the horse
 325 | ret323,msr7461,video7461,a man playing video games
 326 | ret324,msr9450,video9450,a foreign military themed show
 327 | ret325,msr7466,video7466,a group of men carry a body covered in a sheet
 328 | ret326,msr7613,video7613,a female journalist wearing a purple shirt and white blazer is talking on a news show next to the image of a male in a dark suit
 329 | ret327,msr7467,video7467,disney movie video clips
 330 | ret328,msr9452,video9452,an animated grey shark in the middle of a blue water simulation background rotating in a circle on the screen of a monitor
 331 | ret329,msr7464,video7464,the video shows gameplay of a car racing video game
 332 | ret330,msr7615,video7615,a man is riding on horseback
 333 | ret331,msr9625,video9625,its a cooking recipe show with chicken vegetables
 334 | ret332,msr7614,video7614,a man picking out a vehicle from the trailer
 335 | ret333,msr8333,video8333,a woman is talking in the tv channel
 336 | ret334,msr8332,video8332,someone is assembling a small hello kitty oven toy
 337 | ret335,msr8331,video8331,a man teaching students in class
 338 | ret336,msr8330,video8330,a man and a woman are talking at a bus stop
 339 | ret337,msr8337,video8337,people act in a comedy program
 340 | ret338,msr8336,video8336,video of a church chior
 341 | ret339,msr8335,video8335,a beautiful waterfall is flowing into the pool of water below it as the camera pans around the area
 342 | ret340,msr8334,video8334,a cartoon of a dog running and howling
 343 | ret341,msr8339,video8339,kids in a circle play with beach ball and surf boards
 344 | ret342,msr7616,video7616,an orange sports car accelerates quickly
 345 | ret343,msr7619,video7619,cartoons are talking to each otehr
 346 | ret344,msr7491,video7491,a person explaining a concept in a show
 347 | ret345,msr9458,video9458,a person playing a video game
 348 | ret346,msr7744,video7744,a chef stirs up some ingredients inside of a pan
 349 | ret347,msr9512,video9512,a woman with blonde hair and a black shirt is talking
 350 | ret348,msr9824,video9824,two ladies sitting down and talking in an office room
 351 | ret349,msr9825,video9825,an intelligent man with glasses talk about certain phrenologists
 352 | ret350,msr9826,video9826,an animated micky is driving a car
 353 | ret351,msr9827,video9827,lady gaga sings in a music video
 354 | ret352,msr9820,video9820,a girl playing the game the sims
 355 | ret353,msr9821,video9821,a man giving a presentation and showing the planet earth
 356 | ret354,msr9822,video9822,a woman is talking
 357 | ret355,msr9823,video9823,various rugby scenes from different games
 358 | ret356,msr9514,video9514,inside of a toyota car with large space and safety
 359 | ret357,msr9828,video9828,assorted people are shown holding cute pets
 360 | ret358,msr9829,video9829,rusty houser talks about how hitler is loved from his results
 361 | ret359,msr7755,video7755,someone speaking about a violent act regarding the police
 362 | ret360,msr9520,video9520,a fat guy with a tie is looking at a man
 363 | ret361,msr7756,video7756,a group of women are singing
 364 | ret362,msr7757,video7757,a figure in camouflage clothing walks with bent knees through large cement rooms toward a door as people are being shot
 365 | ret363,msr7828,video7828,a man gives a lecture with a microphone in front of a laptop with a red glow behind him
 366 | ret364,msr7829,video7829,someone is putting a skeleton slide on a projector
 367 | ret365,msr8489,video8489,a tv channel named how to cook great foodcom is telling how to prepare a dish
 368 | ret366,msr8488,video8488,a woman is talking about facial care products
 369 | ret367,msr8485,video8485,foreign language music video
 370 | ret368,msr7823,video7823,an asian woman is talking about food
 371 | ret369,msr7820,video7820,a guy talks about how a car s alternator wasn t working
 372 | ret370,msr8486,video8486,a cartoon on a young guy cursing
 373 | ret371,msr8481,video8481,a woman is reporting on keds commercials
 374 | ret372,msr8480,video8480,impoverished children are eating and a man is talking
 375 | ret373,msr8483,video8483,someone is mixing ingredients in a bowl
 376 | ret374,msr8482,video8482,late night sneak peek preview
 377 | ret375,msr8249,video8249,batman is beating up bane in a scene from a batman movie
 378 | ret376,msr8248,video8248,the view of a boat and the water is blocked by clouds and fog
 379 | ret377,msr8247,video8247,a man is talking about cooking and presenting a thermometer and way of preparation
 380 | ret378,msr8246,video8246,there is a man working on a car
 381 | ret379,msr8245,video8245,a warrior is fighting a battle
 382 | ret380,msr8244,video8244,two men walk behind a couple of rhinoceroses one of which attacks a man
 383 | ret381,msr8243,video8243,man in black suit is having meeting with group of people
 384 | ret382,msr8242,video8242,miniature donkeys walking around and making noises
 385 | ret383,msr8241,video8241,several women in pink outfits and various other styles are standing and smiling
 386 | ret384,msr8240,video8240,a middle aged woman giving another woman a message
 387 | ret385,msr7220,video7220,threee kids sing together on the voice
 388 | ret386,msr8466,video8466,men are playing instruments in a band
 389 | ret387,msr7222,video7222,a cartoon man dances for his girl
 390 | ret388,msr7223,video7223,the small and large needles moving fast in a clock
 391 | ret389,msr8463,video8463,the beautiful scene on the screen
 392 | ret390,msr8462,video8462,guys play minecraft
 393 | ret391,msr8461,video8461,two kids talking to the camera and then a woman talking to the camera
 394 | ret392,msr7227,video7227,a man is singing
 395 | ret393,msr7228,video7228,a man is sitting and playing guitar
 396 | ret394,msr7229,video7229,a woman is talking about movies
 397 | ret395,msr8469,video8469,two parrots in a bird cage one white chick and on green adult
 398 | ret396,msr8468,video8468,someone is driving around the city in grand theft auto v
 399 | ret397,msr7916,video7916,girl is checking twitter
 400 | ret398,msr7917,video7917,shania twain does a closeup for her video
 401 | ret399,msr7914,video7914,a flower and other natural scenes are displaying
 402 | ret400,msr7915,video7915,a man in sunglasses and a blue shirt beat boxes
 403 | ret401,msr7912,video7912,peter is driving in the car
 404 | ret402,msr7913,video7913,a man describing how to do something in windows
 405 | ret403,msr7910,video7910,a man looks at the battery of a computer
 406 | ret404,msr7911,video7911,a woman is making a recipe in a sauce pan
 407 | ret405,msr8507,video8507,a girl doing gymnastics in the front yard
 408 | ret406,msr8628,video8628,a man is talking with his friends on a video game
 409 | ret407,msr7919,video7919,a character is jumping and floating in the air in a video game
 410 | ret408,msr8799,video8799,different letters are coming out and sounding out the way they sound
 411 | ret409,msr8798,video8798,the lighting work is going on the building
 412 | ret410,msr8069,video8069,an astronaut is looking at a flag
 413 | ret411,msr8068,video8068,a woman on a couch talks to a a man
 414 | ret412,msr9919,video9919,a man walks towards a woman and the woman breaks a bottle to defend her belongings
 415 | ret413,msr8791,video8791,a woman is demonstrating a nail painting technique
 416 | ret414,msr8790,video8790,two women are embracing
 417 | ret415,msr8061,video8061,comedy skit with dc comics character vane
 418 | ret416,msr8792,video8792,a girl is on the voice
 419 | ret417,msr8067,video8067,guys holding cups and talking
 420 | ret418,msr8794,video8794,a cartoon girl and animal jumping on body of male guy girl image still shown displaying on screen
 421 | ret419,msr8797,video8797,guys playing minecraft
 422 | ret420,msr8796,video8796,man talking about hiking
 423 | ret421,msr7598,video7598,a person is singing in a studio
 424 | ret422,msr7599,video7599,people compete in a backyard competition
 425 | ret423,msr9752,video9752,a man is talking and more peoples are in the round
 426 | ret424,msr7593,video7593,a group of friends are in a room talking with each other
 427 | ret425,msr9750,video9750,a man points his gun towards a woman in a room
 428 | ret426,msr7591,video7591,a woman is giving a demonstration about pancake presentation
 429 | ret427,msr7596,video7596,a man and a woman are singing a song in a stage
 430 | ret428,msr7597,video7597,a man drives while discussing his car
 431 | ret429,msr7594,video7594,water is being filtered in a tank with rocks
 432 | ret430,msr9755,video9755,advertisement of seat basket
 433 | ret431,msr7358,video7358,it is a vine compilation
 434 | ret432,msr7359,video7359,two people are preparing for sports
 435 | ret433,msr7354,video7354,a man is showing the interior of a car
 436 | ret434,msr7355,video7355,a hand print is outlined within the silhouette of a green dinosaur shape for a movie promotion
 437 | ret435,msr7356,video7356,a vehicle with details on what comes with it by carfax
 438 | ret436,msr7357,video7357,tennis players are involved in matches in large stadiums in front of large crowds
 439 | ret437,msr7350,video7350,two snakes are shown in containers
 440 | ret438,msr7351,video7351,a man is acting out a scene
 441 | ret439,msr7352,video7352,a movie scene starring morgan freeman and men in armor running
 442 | ret440,msr7353,video7353,a minecraft video game is being played
 443 | ret441,msr8027,video8027,a video game is played
 444 | ret442,msr8979,video8979,sports vine clips of basketball
 445 | ret443,msr8025,video8025,the opeing credit to gullah grub authentic cooking
 446 | ret444,msr8665,video8665,the tennis players wearing blue and red t shirts and play the tennis in the tennis court at the night time
 447 | ret445,msr8664,video8664,customers wait in line at an ice cream shop while employees tend to them
 448 | ret446,msr8667,video8667,music is playing and advertisements was showing
 449 | ret447,msr8666,video8666,a heart is shown
 450 | ret448,msr8661,video8661,a soccer player shoots a goal during a soccer game
 451 | ret449,msr8660,video8660,two teams play at the olympics
 452 | ret450,msr8663,video8663,gameplay footage of someone playing minecraft
 453 | ret451,msr8662,video8662,a man a woman cooking on a cooking show
 454 | ret452,msr8751,video8751,a person is doing cooking show and telling the ingredients
 455 | ret453,msr8669,video8669,anchor talking about a shows
 456 | ret454,msr8668,video8668,a man is highlighted playing basketball
 457 | ret455,msr8022,video8022,the intro to world news focus on africa plays and we are greeted by the bbc reporter based in johannesburg south africa
 458 | ret456,msr7686,video7686,a man looks up towards a cathedrals organ pipes and talks to a priest in a confessional
 459 | ret457,msr7687,video7687,a girl using her smartphone
 460 | ret458,msr7684,video7684,a woman talks about a skin care treatment she takes with her everwhere
 461 | ret459,msr7685,video7685,couples describing the logic behind movie scenes
 462 | ret460,msr7682,video7682,man and woman are showing affection
 463 | ret461,msr8938,video8938,a man is dodging bombs
 464 | ret462,msr7680,video7680,an ethiopian woman asks a child what she is good at
 465 | ret463,msr7681,video7681,a group of people are riding on a raft in a body of water
 466 | ret464,msr8935,video8935,man is crossing the street with big lion and friend
 467 | ret465,msr8934,video8934,cameras filming near accidents between cars in traffic
 468 | ret466,msr8937,video8937,this little light of mine song with different photos
 469 | ret467,msr8936,video8936,some girls are practicing gymnastics
 470 | ret468,msr8931,video8931,a man playing a video game character that is carrying a sword and killing animals with it
 471 | ret469,msr8930,video8930,a man speaks on a a news panel
 472 | ret470,msr7688,video7688,one of two guys walking on a carton of eggs with bare feet
 473 | ret471,msr8932,video8932,a man is giving an interview in a tv show
 474 | ret472,msr7178,video7178,an animated video game song
 475 | ret473,msr7179,video7179,two animated woman s are talking to eachother
 476 | ret474,msr7170,video7170,a girl practising her boy and arrow tricks
 477 | ret475,msr7171,video7171,the woman wearing the white top talks to the people in the audience
 478 | ret476,msr7172,video7172,the girl shows the boys her medal in this cartoon
 479 | ret477,msr7173,video7173,a woman is mixing nailpolish and putting an egg into it
 480 | ret478,msr7174,video7174,a man rides a lft to the top of a mountain
 481 | ret479,msr7175,video7175,the band member takes a seat
 482 | ret480,msr7176,video7176,a man is talking about space project adam
 483 | ret481,msr7177,video7177,girl is dancing in the garden
 484 | ret482,msr9688,video9688,a person looks at a celebrity on the computer
 485 | ret483,msr9689,video9689,spices being combined in a stainless steel bowl
 486 | ret484,msr8453,video8453,a man points a gun at another persons face
 487 | ret485,msr9680,video9680,on google earth is a man who is talking about the flights
 488 | ret486,msr9681,video9681,sports people are fighting on field
 489 | ret487,msr9682,video9682,a girl being surprised with a stuffed animal by male friend
 490 | ret488,msr9683,video9683,a man and woman performing in front of judes
 491 | ret489,msr9684,video9684,somebody slices white onion with sharp knife on the table
 492 | ret490,msr9685,video9685,people practising marshal arts
 493 | ret491,msr9686,video9686,a woman in lots of pain
 494 | ret492,msr9687,video9687,a man chopping lobster and taking off the shell
 495 | ret493,msr8712,video8712,a woman is showing nail polish
 496 | ret494,msr8450,video8450,a elecopter moving in air and red and yellow dress man hand touching speaking in snow land wearing helmet displaying on screen
 497 | ret495,msr9416,video9416,person is recording the brown horse which is having fun
 498 | ret496,msr7772,video7772,a gospel band playing
 499 | ret497,msr8808,video8808,a man talks back to a cop and handcuffs him to the table
 500 | ret498,msr9530,video9530,jeremy is describing a car
 501 | ret499,msr7771,video7771,a golf player is trying to hit the ball into the pit
 502 | ret500,msr7776,video7776,three soccer balls are laying in a field and then three men in black athletic cloths attempt to shoot a goal
 503 | ret501,msr9537,video9537,animated comic scene of guy cutting up food for dinner
 504 | ret502,msr9534,video9534,some men play a game of kickball
 505 | ret503,msr9535,video9535,a man is talking about a sports car he is driving
 506 | ret504,msr8801,video8801,three men talking about their youtube channel and thanking their viewers
 507 | ret505,msr8800,video8800,people are talking to each other
 508 | ret506,msr8803,video8803,a man is sweeping dust off the floor
 509 | ret507,msr9539,video9539,the men eat the mexican food
 510 | ret508,msr8805,video8805,a soldier is speaking to a superior person in a movie
 511 | ret509,msr8804,video8804,two men are in the classroom and having conversation
 512 | ret510,msr8807,video8807,a couple dancing doing salsa
 513 | ret511,msr8806,video8806,someone looking at a japanese book
 514 | ret512,msr7574,video7574,minecraft zombie kills player and takes heart
 515 | ret513,msr7575,video7575,a man speaking in a microphone
 516 | ret514,msr7576,video7576,men are being filmed in the darkness
 517 | ret515,msr7577,video7577,a girl explains about some studies showing some hands actions
 518 | ret516,msr7570,video7570,a very young baby is wearing a disguise and laughing
 519 | ret517,msr7571,video7571,a woman in black puts on blush while looking in a mirror
 520 | ret518,msr7572,video7572,foreign language cooking show
 521 | ret519,msr7573,video7573,a girl in white night wear dancing very sexy
 522 | ret520,msr7578,video7578,people are on stage talking
 523 | ret521,msr7579,video7579,a girl wearing red top and black trouser is putting a sweater on a dog
 524 | ret522,msr9969,video9969,two girls are sitting in the bed with a cat and talking
 525 | ret523,msr9228,video9228,guy in purple tshirt playing guitar as they drive through
 526 | ret524,msr9229,video9229,guys trying out ice cream
 527 | ret525,msr9226,video9226,a girl dresses up in a bright wig
 528 | ret526,msr9227,video9227,man shows how to prepare potatoes
 529 | ret527,msr9224,video9224,someone is playing a game
 530 | ret528,msr9225,video9225,a man discusses a bollywood celebrity
 531 | ret529,msr9222,video9222,a boy is trying out for a part on the voice kids
 532 | ret530,msr9223,video9223,a man and a woman are sitting in front of a television and addressing and audience
 533 | ret531,msr9220,video9220,a girl is painting easter designs on nails
 534 | ret532,msr9221,video9221,a cartoon clip is being played
 535 | ret533,msr7419,video7419,a man playing guitar and singing on the road side
 536 | ret534,msr7418,video7418,an indian man talking about iphones and a new type of clothing
 537 | ret535,msr9323,video9323,it is the video of military men
 538 | ret536,msr9322,video9322,a woman gets trapped in a burning trailer
 539 | ret537,msr9321,video9321,penguins wander around on ice
 540 | ret538,msr9320,video9320,a blonde man lies on a bed with a little baby
 541 | ret539,msr9327,video9327,men talking about and eating hot dogs
 542 | ret540,msr9326,video9326,two boys sneak up to a girls performing choir
 543 | ret541,msr9325,video9325,a man is driving a black car
 544 | ret542,msr9324,video9324,animated video showing a bottle rolling across an empty hallway
 545 | ret543,msr9329,video9329,a couple is shown
 546 | ret544,msr9328,video9328,a car is racing on road
 547 | ret545,msr7415,video7415,in the kitchen the chef is interviewed by a lady and the ingredients are kept on the table
 548 | ret546,msr7414,video7414,a female in a space age outfit crawling and dancing on the floor
 549 | ret547,msr7413,video7413,there is a guy talking to his father
 550 | ret548,msr7412,video7412,a man talks about visiting visiting a specific place to buy some things
 551 | ret549,msr7411,video7411,jolly good music troop delivering a program and the lady is in good spirit
 552 | ret550,msr7610,video7610,the olympics wight lifting photo is illustrated to explain gravitational force
 553 | ret551,msr7410,video7410,vladmir putin talks on the news about the fight against terrorism
 554 | ret552,msr8451,video8451,a group of young athletes race around a track
 555 | ret553,msr9508,video9508,men in a garage talk about a car they are going to restore
 556 | ret554,msr9957,video9957,a group of people are dancing in a room
 557 | ret555,msr8653,video8653,a man and woman are talking in a car
 558 | ret556,msr9039,video9039,two girls in design dress wearing cloth standing holding mic in hand on street and person walking beside discusing on topic
 559 | ret557,msr9038,video9038,a person is preparing some food
 560 | ret558,msr9037,video9037,a women is sitting with her baby and two people talking about that
 561 | ret559,msr9036,video9036,young children in red fire chief hats are guided to a fire engine and up its metal textured stairs
 562 | ret560,msr9035,video9035,a fatality from mortal kombat is shown
 563 | ret561,msr9034,video9034,several dogs playing dead
 564 | ret562,msr9033,video9033,man standing on the ledge of a vary tall building jumps off
 565 | ret563,msr8741,video8741,a tour through chinese architecture
 566 | ret564,msr9031,video9031,a lady is play the sims while talking about it
 567 | ret565,msr9030,video9030,animated pirates sing on a ship
 568 | ret566,msr7962,video7962,a pirate man tries to lift a lantern with his sword while on a boat
 569 | ret567,msr9419,video9419,a girl talking with long hair
 570 | ret568,msr8320,video8320,a young girl is showing everybody how to apply make up
 571 | ret569,msr8321,video8321,a man is talking about something
 572 | ret570,msr8322,video8322,a lady tries to ride on bicycle but fails
 573 | ret571,msr8323,video8323,a cat is layingon a couch and another islaying in a basket
 574 | ret572,msr8324,video8324,a rock band performs on stage
 575 | ret573,msr8325,video8325,a man cooks some food in a kitchen
 576 | ret574,msr8326,video8326,video showing helping attitude of human beings
 577 | ret575,msr8327,video8327,multi colored horses in a barn and outside in the snow
 578 | ret576,msr8328,video8328,a man discusses and shows traits of some computer functions
 579 | ret577,msr8329,video8329,a woman talks about horse racing
 580 | ret578,msr9418,video9418,a man in black suit is talking about deforestation and about climate change
 581 | ret579,msr9811,video9811,a group of actors sit in a control room and think about their next move together
 582 | ret580,msr9810,video9810,some women models pictures are shown as a slide show presentation and a women is talking
 583 | ret581,msr9813,video9813,a man drives a motorcycle in a video game
 584 | ret582,msr9812,video9812,a woman giving a photoshop tutorial
 585 | ret583,msr9815,video9815,a mashup of music videos is being played
 586 | ret584,msr9814,video9814,a scene from spongebob squarepants where the townspeople are carrying torches and chasing a giant squidward
 587 | ret585,msr9817,video9817,vines of sports are being played
 588 | ret586,msr9816,video9816,a little girl talking to her and is scared
 589 | ret587,msr9819,video9819,a reporter talks about a police incident
 590 | ret588,msr9818,video9818,a woman applies makeip to her eyes in double speed
 591 | ret589,msr9605,video9605,a special songs for the game
 592 | ret590,msr8556,video8556,a group of people are stamp dancing on stage in front of a crowd
 593 | ret591,msr8498,video8498,delicious and colorful food is in the bowl
 594 | ret592,msr8499,video8499,men working in surveillance room
 595 | ret593,msr8492,video8492,a man is very excited
 596 | ret594,msr8493,video8493,polar bear jumps into water then plays around while people watch
 597 | ret595,msr8490,video8490,the song hero by skillet with lyrics
 598 | ret596,msr8491,video8491,a police officer pulls a gun at a swimming pool
 599 | ret597,msr8496,video8496,a cute girl with nice headgear standing in a room is talking through a microphone
 600 | ret598,msr8497,video8497,a bus crashes into a car
 601 | ret599,msr8494,video8494,a girl is talking about a celebrity
 602 | ret600,msr8495,video8495,food in a refrigerator is displayed on shelves in containers and on a lazy suzan
 603 | ret601,msr9063,video9063,people on stage performing
 604 | ret602,msr8258,video8258,woman is using a baby stroller
 605 | ret603,msr8259,video8259,an interview is conducted
 606 | ret604,msr8254,video8254,a woman in a purple dress is talking on a video
 607 | ret605,msr8255,video8255,a man is playing piano
 608 | ret606,msr8256,video8256,someone playing the game dark souls
 609 | ret607,msr8257,video8257,a man is discussing some functions for a science expirement
 610 | ret608,msr8250,video8250,instructional video on home improvement subjects
 611 | ret609,msr8251,video8251,guy in tshirt playing guitar and singing song
 612 | ret610,msr8252,video8252,bride standing with a old man in formal wear
 613 | ret611,msr8253,video8253,vest of sports vines
 614 | ret612,msr8474,video8474,men and women sing in a choir on stage with a piano
 615 | ret613,msr8475,video8475,scrolling the the menu of movieclips with different movie trailers
 616 | ret614,msr8476,video8476,women are celebrating a soccer victory
 617 | ret615,msr8477,video8477,city limits photograph taken from high point in day time
 618 | ret616,msr8470,video8470,high school wrestling match
 619 | ret617,msr7218,video7218,episode from spongebob cartoon
 620 | ret618,msr8472,video8472,persons are attending a class with laptops
 621 | ret619,msr8473,video8473,a animated car going on the roads with blue mark on either sides
 622 | ret620,msr7215,video7215,person playing a game
 623 | ret621,msr7214,video7214,race cars of different colors lined up on a dark track
 624 | ret622,msr7217,video7217,a woman is advertising a stroller
 625 | ret623,msr7216,video7216,a woman giving skin care tips
 626 | ret624,msr8478,video8478,a cat is licking a baby
 627 | ret625,msr8479,video8479,a woman demonstrates how to cook chitlins
 628 | ret626,msr7213,video7213,a woman is making lasagna
 629 | ret627,msr7212,video7212,a man and a woman are walking a dog on a beach
 630 | ret628,msr8786,video8786,a group of people talking about stuff
 631 | ret629,msr8787,video8787,minecraft gamer puts on iron armor
 632 | ret630,msr7961,video7961,a band of singers and guitarists are performing on stage
 633 | ret631,msr7960,video7960,a man is running around and playing a guitar
 634 | ret632,msr7967,video7967,a news anchor is interviewing a person on screen
 635 | ret633,msr7966,video7966,cheese is being sliced
 636 | ret634,msr7965,video7965,man driving in a car an talking about the car
 637 | ret635,msr7964,video7964,the couples are kissing in the game of throne
 638 | ret636,msr7590,video7590,an emotional scene of two persons where they are crying on meeting
 639 | ret637,msr7969,video7969,person driving in car
 640 | ret638,msr7968,video7968,a foul mouthed chef demonstrates and describes a vegetable recipe
 641 | ret639,msr8788,video8788,a man plays a video game where the player has a first person perspective and shoots other characters
 642 | ret640,msr8789,video8789,a girl in blue color dress wearing siting speaking and television screen with black shirt man beside still image displaying on screen
 643 | ret641,msr8078,video8078,a man in his backyard talks tot he camera and is going to skin a snake
 644 | ret642,msr8079,video8079,several groups of people are kayaking on a waterway
 645 | ret643,msr8879,video8879,a tamil movie scene is being shown they travel through a auto riksha and speak about the importance of work
 646 | ret644,msr8070,video8070,man driving a car in a video game
 647 | ret645,msr8071,video8071,a man is talking about the making of the movie avatar
 648 | ret646,msr8072,video8072,a man talking about human relationships
 649 | ret647,msr8073,video8073,a pop singer singing while she standing on a step the video is shown double
 650 | ret648,msr8074,video8074,a girl sitting in a restaurant
 651 | ret649,msr8075,video8075,scenes of romantic film
 652 | ret650,msr8076,video8076,a web animation of a businessman
 653 | ret651,msr8077,video8077,a man playing a video game
 654 | ret652,msr7854,video7854,a guy talks outside in the snow at a ski resort
 655 | ret653,msr8392,video8392,a girl in black color dress wearing cloth sleeping and smoking and peeping into hole displaying on screen
 656 | ret654,msr7369,video7369,a man is talking about opening a laptop case
 657 | ret655,msr7368,video7368,a martial arts cartoon
 658 | ret656,msr7361,video7361,a woman with a camel
 659 | ret657,msr7360,video7360,a woman is showing shrimp and bunch of different other ingredients in order to cook a dish
 660 | ret658,msr7363,video7363,a man loads a clip into a pistol
 661 | ret659,msr7362,video7362,a person is cooking on stage
 662 | ret660,msr7365,video7365,cartoon people are eating at a restaurant
 663 | ret661,msr7364,video7364,a busy city street in far east is seen with people holding baskets on heads
 664 | ret662,msr7367,video7367,this is a rock band music video
 665 | ret663,msr7366,video7366,someone giving demo for some game and talking about that
 666 | ret664,msr7134,video7134,a class is being introduced to a digital reading device
 667 | ret665,msr7135,video7135,a man in a music video screams shut up a bunch of times
 668 | ret666,msr9400,video9400,someone is playing a game
 669 | ret667,msr8672,video8672,dog is drinking milk with baby nibble bottle
 670 | ret668,msr8673,video8673,a man discusses spongebob
 671 | ret669,msr8670,video8670,a black t shirted man with a hat talking about an event
 672 | ret670,msr7137,video7137,bill murray is covered in frosting
 673 | ret671,msr8676,video8676,a girl walking down a path
 674 | ret672,msr8677,video8677,two mermaid with red hair is sitting on a rock
 675 | ret673,msr8674,video8674,space explorers in red and silver suits float about in a black abyss filled with blue crystals
 676 | ret674,msr8675,video8675,a man kicks a ball
 677 | ret675,msr9406,video9406,a person giving his opinion on how crowded the world is
 678 | ret676,msr8678,video8678,a lady is walking in the beach
 679 | ret677,msr8679,video8679,a man discussed a few products
 680 | ret678,msr7131,video7131,athletes are getting ready and start running for an event
 681 | ret679,msr7145,video7145,a critic about wine speaks about the french wines
 682 | ret680,msr7144,video7144,a group of women are rubbing oil and milk all over a woman
 683 | ret681,msr8928,video8928,a woman walking along side a river in a bikini
 684 | ret682,msr7146,video7146,a puppy is crawling down some stairs
 685 | ret683,msr7141,video7141,a cartoon that is blurryb on a tv screen of a fashion run way
 686 | ret684,msr7140,video7140,someone is adding ingredients for a batter
 687 | ret685,msr7143,video7143,someone giving demo for some game
 688 | ret686,msr7142,video7142,an oriental femal is dressed in a pink wig with girlish clothing and is carrying a stuffed animal
 689 | ret687,msr8922,video8922,an old man shakes hands with another man and then they hug each other
 690 | ret688,msr8923,video8923,women are modeling clothes
 691 | ret689,msr8920,video8920,a kid unwrapping his presents
 692 | ret690,msr8921,video8921,someone ends their tutorial on their computer
 693 | ret691,msr8926,video8926,a man is playing a guitar with a band in a live concert
 694 | ret692,msr7148,video7148,a cheif is preparing a treat
 695 | ret693,msr8924,video8924,a man works on a computer s motherboard
 696 | ret694,msr8925,video8925,a band plays on a stage
 697 | ret695,msr7219,video7219,a man drives a red indianapolis 500 type race car around an asphalt track
 698 | ret696,msr7592,video7592,a dark skinned couple make love in bed the man is on top and the woman s blouse is partly transparent
 699 | ret697,msr8973,video8973,commercial for a service called eva which will help people find a vehicle
 700 | ret698,msr8471,video8471,a woman in a yellow top is holding a red drink
 701 | ret699,msr7649,video7649,there is a vehicle riding dangerously through forest
 702 | ret700,msr7138,video7138,a man talking about the two cars he test drove
 703 | ret701,msr8974,video8974,a pretty young girl talks to a man with a mustache
 704 | ret702,msr8618,video8618,an exploration about the title of professional video advertisement
 705 | ret703,msr8772,video8772,the man is making a sauce in the kitchen
 706 | ret704,msr8816,video8816,animated cartoon character is catching the ball
 707 | ret705,msr8817,video8817,a football video game is shown
 708 | ret706,msr8814,video8814,the judges make a decision
 709 | ret707,msr8815,video8815,a man walking into a room with two kids with red helmets
 710 | ret708,msr8812,video8812,shows a globe and a bunch of people
 711 | ret709,msr8813,video8813,a woman is interviewed on a tv talk show
 712 | ret710,msr8810,video8810,a woman plays guitar and sings for a televised competition
 713 | ret711,msr8811,video8811,the boy in karate dresswho is remembering the memorable hours with his father
 714 | ret712,msr7211,video7211,people share their thoughts on a boxing match
 715 | ret713,msr8818,video8818,several young girls are singing on stage
 716 | ret714,msr8819,video8819,a video is shown showing different cars
 717 | ret715,msr7723,video7723,a man plays an online multiplayer game and talks about how the game works
 718 | ret716,msr7528,video7528,a girl singing a song and her group were playing music
 719 | ret717,msr7789,video7789,a compilation of vine videos is shown
 720 | ret718,msr7788,video7788,two guys are wrestling in a competition
 721 | ret719,msr7787,video7787,various sports clips with music are shown
 722 | ret720,msr7786,video7786,a song with skeletons dancing
 723 | ret721,msr7785,video7785,person lighting a kettle
 724 | ret722,msr8774,video8774,there is a guy filling a toy with cotton to play with it
 725 | ret723,msr7783,video7783,long legged woman with black clothes is walking on the scene
 726 | ret724,msr7782,video7782,a girl talking to her dead mom
 727 | ret725,msr8727,video8727,a picture of what could be the moon or mars is on the screen
 728 | ret726,msr7780,video7780,a man giving information about the mormon game
 729 | ret727,msr9781,video9781,a women is explaining about the information of responders and the non responders for precision medicine
 730 | ret728,msr9780,video9780,an older woman who is blind is talking to a girl named lilly
 731 | ret729,msr7543,video7543,a video on how to make knots for fishing
 732 | ret730,msr9782,video9782,a man talking about a womans genital problems
 733 | ret731,msr7545,video7545,a person is playing a violin
 734 | ret732,msr9784,video9784,sygornie weaver pointing a gun and then running
 735 | ret733,msr7547,video7547,there is a woman in her roomshe is a net idol show her new vedio
 736 | ret734,msr7546,video7546,a man and a woman are talking about something
 737 | ret735,msr7549,video7549,a transvestite shows what she bought for her dog including shampoo and conditioner made by martha stuart
 738 | ret736,msr9788,video9788,a woman with long white hair and dressed all in white is sailing through the sky over snow covered ground
 739 | ret737,msr7725,video7725,young people sit around the edges of a room clapping and raising their arms while others dance in the center during a party
 740 | ret738,msr8721,video8721,man in grey shirt is having an interview in the building
 741 | ret739,msr9259,video9259,a car of 1970 is on the screen
 742 | ret740,msr9258,video9258,a person is using a phone
 743 | ret741,msr9253,video9253,people talking about their trip and how they are taken care of
 744 | ret742,msr9252,video9252,garage opening for a old bug to pull out to drive away
 745 | ret743,msr9251,video9251,sleeping beauty play promotion
 746 | ret744,msr9250,video9250,a woman talking about a white tank top
 747 | ret745,msr9257,video9257,a man puts his phone down to be charged
 748 | ret746,msr9256,video9256,a woman is making playdoh
 749 | ret747,msr9255,video9255,a soldier is laying down
 750 | ret748,msr9254,video9254,band playing music and people dancing
 751 | ret749,msr7469,video7469,a video of a young man in a white shirt inviting his colleagues to join him
 752 | ret750,msr7724,video7724,a lady describes about workout and exercises for women
 753 | ret751,msr7963,video7963,a machine drills holes in a section of metal piping
 754 | ret752,msr7558,video7558,two boys introducing a young lady who plays the cups and sings
 755 | ret753,msr8784,video8784,a woman makes crafts
 756 | ret754,msr9330,video9330,a man with a guitar sings on a farm
 757 | ret755,msr9331,video9331,a man drives his car down the road
 758 | ret756,msr9332,video9332,a girl plays a videogame
 759 | ret757,msr9333,video9333,barbecued cheese bacon burgers showcased
 760 | ret758,msr9334,video9334,in game footage of a mine craft character walking up stairs
 761 | ret759,msr9335,video9335,the person is frying the prawns and fish
 762 | ret760,msr9336,video9336,video game of a truck driving through desert obstacles
 763 | ret761,msr9337,video9337,some people are inside of a room
 764 | ret762,msr9338,video9338,a girl digging in the sand
 765 | ret763,msr9339,video9339,com long sheeps are in the big mountains
 766 | ret764,msr8783,video8783,a lady named lizzy is speaking about movies she is wearing a very nice outfit
 767 | ret765,msr8780,video8780,some people are shooting outside
 768 | ret766,msr7726,video7726,a woman is singing and pouring drinks
 769 | ret767,msr8781,video8781,a song from the movie beauty and the beast
 770 | ret768,msr7728,video7728,a woman swings her hair in front of a large sing on a brick wall
 771 | ret769,msr9028,video9028,news of marijuana business having trouble growing
 772 | ret770,msr9029,video9029,a player is putting a basket ball into the basket from distance
 773 | ret771,msr9024,video9024,some yellow text is on a purple and white screen
 774 | ret772,msr9025,video9025,an instructional video on painting your nails
 775 | ret773,msr9026,video9026,still photos from the 2002 movie the pianist are shown
 776 | ret774,msr9027,video9027,a group of young people are playing
 777 | ret775,msr9020,video9020,a person is looking at a camera during a wrestling event
 778 | ret776,msr9021,video9021,a pitcher throws a fastball
 779 | ret777,msr9022,video9022,a woman stirs up some soup sprinkles a spice in and drops a shot of liquid into it
 780 | ret778,msr9023,video9023,several shots of tv shows combined were shown here
 781 | ret779,msr8175,video8175,a man in a suit and a woman wearing brown giving the news
 782 | ret780,msr7849,video7849,a red haired woman holds a green parrot near shelves filled with bird food
 783 | ret781,msr9277,video9277,models are walking the runway as part of a fashion show
 784 | ret782,msr7844,video7844,bbc talking to guys with very expensive red sports car
 785 | ret783,msr8416,video8416,a video of different racially motivated protests is playing while the song everyone s a little bit racist plays
 786 | ret784,msr7845,video7845,a person chops up lettuce and a strainer of kidney beans is shown
 787 | ret785,msr8606,video8606,a man eats rice and a woman goes to the hospital
 788 | ret786,msr8756,video8756,a woman is dressed up in face paint
 789 | ret787,msr9119,video9119,a man talks about molecules and certain types of antibodies
 790 | ret788,msr8319,video8319,there is a man in black is walking in to the bridge
 791 | ret789,msr8318,video8318,a group discusses a man s outfit
 792 | ret790,msr8315,video8315,three men were discussing the national football league
 793 | ret791,msr8314,video8314,a woman dances around for a music video
 794 | ret792,msr8317,video8317,an indian woman is applying makeup between her hair
 795 | ret793,msr8316,video8316,a man is talking to an athlete
 796 | ret794,msr8311,video8311,a man is filming as he and a woman watch the news where it shows an area filled with smoke
 797 | ret795,msr8310,video8310,two wrestlers are fighting on a mat
 798 | ret796,msr8313,video8313,a woman is discussing a new video game
 799 | ret797,msr8312,video8312,the mountain views are from a boat on the center of a lake
 800 | ret798,msr8605,video8605,all womans singing and dancing
 801 | ret799,msr7843,video7843,a woman discusses how we can help children in a classroom
 802 | ret800,msr9639,video9639,women poring sauce to cooking vegetables
 803 | ret801,msr9600,video9600,handsome man plays guitar and sings
 804 | ret802,msr9117,video9117,a person in white color dress wearing cloth speaking on topic white board explayning and many persons sitting displaying on screen
 805 | ret803,msr9808,video9808,a man playing video games
 806 | ret804,msr9809,video9809,man talks in front of a green bicycle
 807 | ret805,msr9806,video9806,person is driving his black car fast on the street
 808 | ret806,msr9807,video9807,a planet is being filmed from space outside
 809 | ret807,msr9804,video9804,a display of clips of the movie there will be blood
 810 | ret808,msr9805,video9805,characters from video games are dancing to old mc donald had a farm
 811 | ret809,msr9802,video9802,a woman dances in the background while a guy doesn t move
 812 | ret810,msr9803,video9803,two guys wrestling at an event
 813 | ret811,msr9800,video9800,a car is in a wreck
 814 | ret812,msr9801,video9801,selena gomez clips of her videos and her dancing with a man in a tux while she wears pink
 815 | ret813,msr9603,video9603,a woman on her way out the door gets a call from a man standing in a store
 816 | ret814,msr8604,video8604,a cnn report is talking about their dogs
 817 | ret815,msr7444,video7444,a group of people watching a screen
 818 | ret816,msr7445,video7445,a man is talking and playing a video game
 819 | ret817,msr8601,video8601,a man is folding pieces of paper
 820 | ret818,msr9607,video9607,a commercial for the website called eharmony
 821 | ret819,msr8129,video8129,behind the scenes in a professional kitchen as the chefs work and the waiters run food can be a very noisy experience
 822 | ret820,msr8269,video8269,the man thought students should be given the freedom to learn
 823 | ret821,msr8268,video8268,cartoon play for kids
 824 | ret822,msr9609,video9609,two astronauts experiencing a tense situation before relaxing afterwards
 825 | ret823,msr8261,video8261,blonde woman with black nails is recording herself in a room
 826 | ret824,msr8260,video8260,man in white hoodie turns a page to tell you about an online store the url is available in the description
 827 | ret825,msr8263,video8263,a man drives aroud curves through a wooden mountainside
 828 | ret826,msr8262,video8262,wrestlers are involved in a large wrestling meet
 829 | ret827,msr8265,video8265,someone plays a guitar and sings on stage of a tv show
 830 | ret828,msr8264,video8264,women athletes taking their positions for a running race
 831 | ret829,msr8267,video8267,video of gymasts practicing to roll
 832 | ret830,msr8266,video8266,a robot is seen in a movie preview
 833 | ret831,msr8441,video8441,scene from a popular party
 834 | ret832,msr8440,video8440,a man hits another man while wrestling
 835 | ret833,msr8443,video8443,valencia vesus hokit in a wrestling match
 836 | ret834,msr8442,video8442,a person covers a popular song
 837 | ret835,msr8445,video8445,people enjoy the performance of singer
 838 | ret836,msr8444,video8444,two ladies in a cookery show explain how to marinate chicken already cleaned and ready with salt and cilantro sprigs
 839 | ret837,msr8447,video8447,a woman singing on the voice
 840 | ret838,msr8446,video8446,spongebob is talking to patrick while holding a butterfly net
 841 | ret839,msr7202,video7202,a group of people are dancing in a room
 842 | ret840,msr7203,video7203,there is no sound while the screen shows a person playing a computer game
 843 | ret841,msr7200,video7200,a female soccer player accepts a reward while being cheered on by the crowd
 844 | ret842,msr7201,video7201,lady and her dogs
 845 | ret843,msr7206,video7206,a helicopter is shown flying in what seems to be a war zone in syria
 846 | ret844,msr7207,video7207,young men discuss and demonstrate a video game
 847 | ret845,msr7204,video7204,a small boy is crying and a car is showed
 848 | ret846,msr7205,video7205,hands rubbing together in the dark and band members singing and playing the guitar
 849 | ret847,msr9474,video9474,a man talking about finances
 850 | ret848,msr8999,video8999,showing anushka sharma bollywood actress
 851 | ret849,msr9511,video9511,a person is riding red car on road
 852 | ret850,msr9472,video9472,a girl wearing a dress stands to the side of the screen while lyrics to a song playing in the background appear on the other side
 853 | ret851,msr7970,video7970,two child playing in the house
 854 | ret852,msr8044,video8044,a goat attacks a man and the man fights back
 855 | ret853,msr7972,video7972,a man shows how a video game works
 856 | ret854,msr7973,video7973,someone is frying food
 857 | ret855,msr7974,video7974,a woman is ripping off a man clothes
 858 | ret856,msr9578,video9578,a diver goes underwater
 859 | ret857,msr7976,video7976,a young boy rocks out on a guitar
 860 | ret858,msr7977,video7977,someone is drawing pictures
 861 | ret859,msr7978,video7978,a team with blue uniforms are playing badmitten with a team in white
 862 | ret860,msr7979,video7979,people talking about a fight
 863 | ret861,msr9579,video9579,two men talk with children s voices
 864 | ret862,msr8049,video8049,a guy is spinning around with a bat
 865 | ret863,msr8048,video8048,george lopez s family sits down for dinner
 866 | ret864,msr9576,video9576,a movie scene little boys inside water flowing inside forest displaying on screen
 867 | ret865,msr9577,video9577,cartoon one women in horse and speak to that calmly
 868 | ret866,msr9574,video9574,in the ocean a man on a surfboard rides a wave
 869 | ret867,msr7376,video7376,broth is being added to a soup pot and stirred with a rubber spatula
 870 | ret868,msr7377,video7377,a guy reports on complex news
 871 | ret869,msr7374,video7374,video game clip showing here different charcters
 872 | ret870,msr7375,video7375,a man talks about the school system
 873 | ret871,msr7372,video7372,someone is showing some drink
 874 | ret872,msr7373,video7373,a solider gives a speech
 875 | ret873,msr7370,video7370,boys and girls dancing and singing on beach
 876 | ret874,msr7371,video7371,a tv shows review program hosts discuss about the performance and staying on air of star trek
 877 | ret875,msr7732,video7732,colored lights and pictures of people fade in and out of view
 878 | ret876,msr7378,video7378,a clip of soccer plays
 879 | ret877,msr7733,video7733,a child is singing on stage
 880 | ret878,msr7730,video7730,different women in colorful clothing are walking down a runway for a fashion show
 881 | ret879,msr7939,video7939,a man and woman stand together and cook in the kitchen
 882 | ret880,msr7731,video7731,fox newscasters discuss chris christie and his poll numbers
 883 | ret881,msr8689,video8689,a multiplayer game of mario party is played
 884 | ret882,msr8688,video8688,a person folds a paper airplane
 885 | ret883,msr8687,video8687,fast moving time is shown here
 886 | ret884,msr8686,video8686,a women is doing craft and talking about that
 887 | ret885,msr8685,video8685,a man prepares some food in the kitchen
 888 | ret886,msr8684,video8684,a person points out certain figures on a paper
 889 | ret887,msr8683,video8683,two people welcome people to an episode of their show
 890 | ret888,msr8682,video8682,a man talking something about the new smart phones
 891 | ret889,msr8681,video8681,a young man and his dad are rubbing each other s hair it s a commercial for vodafone
 892 | ret890,msr8680,video8680,a woman talking to a man in a hood
 893 | ret891,msr7152,video7152,a person is swimming in some white water rapids
 894 | ret892,msr7153,video7153,there is a wrestling match going on between two people
 895 | ret893,msr7150,video7150,a picture of the batsman is shown and he is ready for the batting and the audience are watching the show
 896 | ret894,msr7151,video7151,a man is sitting on a chair
 897 | ret895,msr7156,video7156,tom jones performing live on a television show
 898 | ret896,msr7157,video7157,an anime cartoon character speaks to another character
 899 | ret897,msr7154,video7154,demonstration on how to prepare something using a microwave
 900 | ret898,msr7155,video7155,there are two men swimming in a pond
 901 | ret899,msr7158,video7158,fox news presidential debate recapping the gop debate with donald trump and ted cruz
 902 | ret900,msr7159,video7159,bill murray is being interviewed by david letterman while talking about bill s past roles
 903 | ret901,msr9368,video9368,someone demonstrates about the small motor uses to the video
 904 | ret902,msr8863,video8863,an animated girl talks to a baby and plays with it
 905 | ret903,msr8862,video8862,a woman s background voice describes a virtual scene where a dog enters a bathroom
 906 | ret904,msr8861,video8861,a white male raps while another plays guitar
 907 | ret905,msr8860,video8860,explainin about the scene in the net
 908 | ret906,msr8867,video8867,the house has at least three small pets
 909 | ret907,msr8866,video8866,a news scene of the dog s exercise and diet
 910 | ret908,msr8865,video8865,a man in a kitchen is preparing pancakes he s wearing a white shirt and has black hair
 911 | ret909,msr8864,video8864,a clip from fox news on the shelby north carolina shooting
 912 | ret910,msr8869,video8869,a girl shows a pack of toy building blocks
 913 | ret911,msr8868,video8868,a young girl sits in a room and looks into a bag
 914 | ret912,msr7798,video7798,dogs and cats playing in a park
 915 | ret913,msr7799,video7799,a guy chops up garlic and pours it over chicken frying in a pan
 916 | ret914,msr7794,video7794,a famous tv talk show
 917 | ret915,msr7795,video7795,a documentary on how to windsurf and in particular recover from a wipeout
 918 | ret916,msr7796,video7796,boy playing with a dump truck
 919 | ret917,msr7797,video7797,a man is swimming in the swimming pool
 920 | ret918,msr7790,video7790,a guy with glasses is standing next to some signs
 921 | ret919,msr7791,video7791,three kids are performing a song
 922 | ret920,msr7792,video7792,screen cast of mine craft oneline
 923 | ret921,msr7793,video7793,flight is shaken and the pilots trying to land the flight while they opened the air
 924 | ret922,msr7333,video7333,there are pictures and quotes from george bernard shaw and vincent van gogh while a voice over artist talks about making mistakes
 925 | ret923,msr9798,video9798,a woman peeling vegetables in her kitchen
 926 | ret924,msr7559,video7559,a man is trying some sushi
 927 | ret925,msr7556,video7556,people are playing baseball
 928 | ret926,msr9797,video9797,a football player with a football
 929 | ret927,msr7554,video7554,a person pointing to food on a plate
 930 | ret928,msr9795,video9795,a person playing a video game and commentating
 931 | ret929,msr7552,video7552,it is about a cartoon film
 932 | ret930,msr9793,video9793,interview with artist shanai twain
 933 | ret931,msr9790,video9790,squidward scenes playing with a lil wayne song
 934 | ret932,msr9791,video9791,a woman talking about different pictures next to her
 935 | ret933,msr8671,video8671,a man is yelling on the phone
 936 | ret934,msr7542,video7542,a boy gets out of a play police car and talks to a girl
 937 | ret935,msr9248,video9248,people are riding horses in grassland
 938 | ret936,msr9249,video9249,a man is discussing oxiders in bulk form
 939 | ret937,msr9240,video9240,young men in a middle of the bush almos naked and scratching themselves
 940 | ret938,msr9241,video9241,japanese people laughing and dancing
 941 | ret939,msr9242,video9242,a person drawling people on a canvas
 942 | ret940,msr9243,video9243,they are singing a song and playing a guitar in the stage
 943 | ret941,msr9244,video9244,documentary about museum in peninsula
 944 | ret942,msr9245,video9245,man interviews bill murray
 945 | ret943,msr9246,video9246,a beautiful sceneary is shown and the sight seeing through the train is amazing and place is so lovely to watch
 946 | ret944,msr9247,video9247,in an animated scene two characters are outside under a menacing sky glaring at each other
 947 | ret945,msr7710,video7710,a man playing video games
 948 | ret946,msr9489,video9489,a man with brown hair is singing a song
 949 | ret947,msr7147,video7147,a man cooks burgers and bacon on a grill
 950 | ret948,msr8929,video8929,a person is playing a video game
 951 | ret949,msr9309,video9309,there is someone serving a crab dish
 952 | ret950,msr9308,video9308,the models walk the catwalk
 953 | ret951,msr7544,video7544,a car goes racing down the road
 954 | ret952,msr9305,video9305,a football match between usa and japan
 955 | ret953,msr9304,video9304,three young people sing on stage
 956 | ret954,msr9307,video9307,the lady came their room go to the kitchen try to make their food
 957 | ret955,msr9306,video9306,a cartoon with violence
 958 | ret956,msr9301,video9301,how to make a galaxy fighter
 959 | ret957,msr9300,video9300,a squid is talking
 960 | ret958,msr9303,video9303,children singing a song as a group on a stage
 961 | ret959,msr9302,video9302,a guy trying to climb on a rope while another guy timing him
 962 | ret960,msr7209,video7209,someone is showing a car features
 963 | ret961,msr7149,video7149,a trailer for a film with words over the top
 964 | ret962,msr8927,video8927,a news story about hillary clinton
 965 | ret963,msr7632,video7632,a man walks between two brick buildings a dusk
 966 | ret964,msr9019,video9019,two people playing basketball and the one with a hat makes every shot
 967 | ret965,msr9018,video9018,people scoring in sports videos
 968 | ret966,msr7478,video7478,a football video game is being played
 969 | ret967,msr9011,video9011,a woman in black dress and a man in a black suit sits together
 970 | ret968,msr9010,video9010,a group of zombis walking towards a very tall building with a globe in front of it
 971 | ret969,msr9013,video9013,basketball highlights of players scoring
 972 | ret970,msr9012,video9012,a trailer for a movie with a girl knocking on the wall
 973 | ret971,msr9015,video9015,several enormous juicy burgers are all stacked together
 974 | ret972,msr9014,video9014,a cartoon plane picking up fruit and putting it in trucks
 975 | ret973,msr9017,video9017,the man is driving his motorbike fast and having problems on the race
 976 | ret974,msr9016,video9016,all persons are wearing bikini dresses and playing in sea
 977 | ret975,msr9509,video9509,a women in blue shows two pink lipsticks
 978 | ret976,msr7548,video7548,people are stoppped by military in the street
 979 | ret977,msr9734,video9734,a cartoon shows two dogs talking to a bird
 980 | ret978,msr9834,video9834,old black and white films are shown and the history of motion pictures is being dealt with
 981 | ret979,msr9735,video9735,many potatoes get washed and move through machinery
 982 | ret980,msr8752,video8752,a news reader describing about a news
 983 | ret981,msr7064,video7064,a guy wearing a black shirt talks and shows a chart on the tv screen
 984 | ret982,msr8308,video8308,a man is playing baseball
 985 | ret983,msr8309,video8309,there is a wrestling match
 986 | ret984,msr8302,video8302,a man talks about kim chi to a camera
 987 | ret985,msr8303,video8303,a cartoon announcing the next event in a fake show
 988 | ret986,msr8300,video8300,a woman pours water into her pot of meat then tomato sauce and stirs it all around while talking
 989 | ret987,msr8301,video8301,man pretends to be two different people
 990 | ret988,msr8306,video8306,tourists walking around the mount fuji visitor center
 991 | ret989,msr8307,video8307,calm pond with lush green hills lining the background is shown
 992 | ret990,msr8304,video8304,a man dressed as a woman in a spanish language tv program
 993 | ret991,msr8305,video8305,in a music video a man is laying with women while singing
 994 | ret992,msr7060,video7060,a man extinguishes a fire outside
 995 | ret993,msr7061,video7061,goldfish chase each other around a blue tank to music
 996 | ret994,msr9575,video9575,the woman in the purple blouse talk as the shelves are behind her
 997 | ret995,msr9879,video9879,the young performer impressed his audience
 998 | ret996,msr9878,video9878,some peole are sitting in hall
 999 | ret997,msr9873,video9873,man shows how to prepare pizza
1000 | ret998,msr9872,video9872,someone is playing a first person shooter game and is making jokes
1001 | ret999,msr9871,video9871,bearded guy in grey tshirt talking to the camera
1002 | 


--------------------------------------------------------------------------------