├── .gitattributes
├── .gitignore
├── README.md
├── combine_rgb_flow.py
├── pytorch
    ├── .gitignore
    ├── LICENSE
    ├── README.md
    ├── checkpoints.py
    ├── datasets
    │   ├── __init__.py
    │   ├── charadesflow.py
    │   ├── charadesrgb.py
    │   ├── fake.py
    │   └── transforms.py
    ├── exp
    │   ├── flownet.py
    │   ├── flownet_test.py
    │   ├── rgbnet.py
    │   ├── rgbnet_inception.py
    │   ├── rgbnet_resnet.py
    │   └── rgbnet_test.py
    ├── get_alreadytrained.sh
    ├── main.py
    ├── models
    │   ├── __init__.py
    │   └── vgg16flow.py
    ├── opts.py
    ├── train.py
    └── utils
    │   ├── __init__.py
    │   ├── map.py
    │   └── tee.py
└── torch
    ├── INSTALL.md
    ├── LICENSE
    ├── README.md
    ├── checkpoints.lua
    ├── dataloader.lua
    ├── datasets
        ├── README.md
        ├── charades-gen.lua
        ├── charades.lua
        ├── charadesflow-gen.lua
        ├── charadesflow.lua
        ├── charadessync-gen.lua
        ├── charadessync.lua
        ├── charadessyncflow-gen.lua
        ├── charadessyncflow.lua
        ├── cifar10-gen.lua
        ├── cifar10.lua
        ├── imagenet-gen.lua
        ├── imagenet.lua
        ├── init.lua
        └── transforms.lua
    ├── exp
        ├── flownet.lua
        ├── flownet_localize.lua
        ├── flownet_resume.lua
        ├── flownet_test.lua
        ├── lstmflownet.lua
        ├── lstmrgbnet.lua
        ├── rgbnet.lua
        ├── rgbnet_localize.lua
        ├── rgbnet_resume.lua
        └── rgbnet_test.lua
    ├── get_alreadytrained.sh
    ├── get_alreadytrained_lstm.sh
    ├── get_models.sh
    ├── layers
        └── CrossEntropyCriterion.lua
    ├── main.lua
    ├── models
        ├── init.lua
        ├── preresnet.lua
        ├── resnet.lua
        ├── vgg16.lua
        ├── vgg16flow.lua
        ├── vgg16lstm.lua
        └── vgg16lstmflow.lua
    ├── opts.lua
    └── train.lua


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.py		text
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | gen/
2 | libnccl.so
3 | model_best.t7
4 | checkpoints
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Charades Starter Code for Activity Recognition in Torch and PyTorch
 2 | 
 3 | Contributor: Gunnar Atli Sigurdsson
 4 | 
 5 | **New:** extension of this framework to the deep CRF model on Charades for *Asynchronous Temporal Fields for Action Recognition*: https://github.com/gsig/temporal-fields
 6 | 
 7 | * **New:** This code implements a Two-Stream network in PyTorch
 8 | * This code implements a Two-Stream network in Torch
 9 | * This code implements a Two-Stream+LSTM network in Torch
10 | 
11 | See [pytorch/](pytorch/), [torch/](torch/), for the code repositories.
12 | 
13 | The code replicates the 'Two-Stream Extended' and 'Two-Stream+LSTM' baselines found in:
14 | ```
15 | @inproceedings{sigurdsson2017asynchronous,
16 | author = {Gunnar A. Sigurdsson and Santosh Divvala and Ali Farhadi and Abhinav Gupta},
17 | title = {Asynchronous Temporal Fields for Action Recognition},
18 | booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
19 | year={2017},
20 | pdf = {http://arxiv.org/pdf/1612.06371.pdf},
21 | code = {https://github.com/gsig/temporal-fields},
22 | }
23 | ```
24 | which is in turn based off "Two-stream convolutional networks for action recognition in videos" by Simonyan and Zisserman, and "Beyond Short Snippets: Deep Networks for Video Classification" by Joe Yue-Hei Ng el al.
25 | 
26 | Combining the predictions (submission files) of those models using combine_rgb_flow.py
27 | yields a final classification accuracy of 18.9% mAP (Two-Stream) and 19.8% (LSTM) on Charades (evalated with charades_v1_classify.m)
28 | 
29 | 
30 | ## Technical Overview:
31 |  
32 | The code is organized such that to train a two-stream network. Two independed network are trained: One RGB network and one Flow network.
33 | This code parses the training data into pairs of an image (or flow), and a label for a single activity class. This forms a softmax training setup like a standard CNN. The network is a VGG-16 network. For RGB it is pretrained on Image-Net, and for Flow it is pretrained on UCF101. The pretrained networks can be downloaded with the scripts in this directory.
34 | For testing. The network uses a batch size of 25, scores all images, and pools the output to make a classfication prediction or uses all 25 outputs for localization.
35 | 
36 | 


--------------------------------------------------------------------------------
/combine_rgb_flow.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # Script for combining the submission files for the RGB and Flow networks
 4 | #
 5 | # Contributor: Gunnar Atli Sigurdsson
 6 | 
 7 | import numpy as np
 8 | import sys
 9 | import pdb
10 | from itertools import groupby
11 | 
12 | rgbfile = sys.argv[1]
13 | flowfile = sys.argv[2]
14 | w = [0.5,0.5]
15 | nclasses = 157
16 | 
17 | def loadfile(path):
18 |     with open(path) as f:
19 |         lines = [x.strip().split(' ') for x in f.readlines()]
20 |     localization = len(lines[0]) == nclasses+2
21 |     if localization:
22 |         data = [(x[0]+' '+x[1],np.array([float(y) for y in x[2:]])) for x in lines]
23 |     else:
24 |         data = [(x[0],np.array([float(y) for y in x[1:]])) for x in lines]
25 |     return data
26 | 
27 | rgb = loadfile(rgbfile)
28 | flow = loadfile(flowfile)
29 | 
30 | rgbdict = dict(rgb)
31 | flowdict = dict(flow)
32 | 
33 | keys = list(set(rgbdict.keys()+flowdict.keys()))
34 | w = [x/sum(w) for x in w]
35 | 
36 | def normme(x):
37 |     x = x-np.mean(x)
38 |     x = x/(0.00001+np.std(x))
39 |     return x
40 | 
41 | N = 157
42 | def lookup(d,key):
43 |     if key in d:
44 |         return d[key]
45 |     else:
46 |         sys.stderr.write('error ' + key + '\n')
47 |         return np.zeros((nclasses,))
48 | 
49 | for id0 in keys:
50 |     r = lookup(rgbdict,id0)
51 |     f = lookup(flowdict,id0)
52 |     out = r*w[0]+f*w[1] #unnormalized combination
53 |     #out = normme(r)*w[0]+normme(f)*w[1] #normalize first
54 |     #out = np.exp(np.log(r)*w[0]+np.log(f)*w[1]) #weighted geometric mean
55 |     out = [str(x) for x in out]
56 |     print('{} {}'.format(id0,' '.join(out)))
57 | 


--------------------------------------------------------------------------------
/pytorch/.gitignore:
--------------------------------------------------------------------------------
1 | gen/
2 | checkpoints/
3 | *.pyc
4 | *.swp


--------------------------------------------------------------------------------
/pytorch/README.md:
--------------------------------------------------------------------------------
 1 | ## PyTorch Starter Code for Activity Classification and Localization on Charades
 2 | 
 3 | Contributor: Gunnar Atli Sigurdsson
 4 | 
 5 | Extension of this framework to the deep CRF model on Charades for *Asynchronous Temporal Fields for Action Recognition*: https://github.com/gsig/temporal-fields
 6 | 
 7 | * This code implements a Two-Stream network in PyTorch
 8 | 
 9 | The code replicates the 'Two-Stream Extended' and 'Two-Stream+LSTM' baselines found in:
10 | ```
11 | @inproceedings{sigurdsson2017asynchronous,
12 | author = {Gunnar A. Sigurdsson and Santosh Divvala and Ali Farhadi and Abhinav Gupta},
13 | title = {Asynchronous Temporal Fields for Action Recognition},
14 | booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
15 | year={2017},
16 | pdf = {http://arxiv.org/pdf/1612.06371.pdf},
17 | code = {https://github.com/gsig/temporal-fields},
18 | }
19 | ```
20 | which is in turn based off "Two-stream convolutional networks for action recognition in videos" by Simonyan and Zisserman, and "Beyond Short Snippets: Deep Networks for Video Classification" by Joe Yue-Hei Ng el al.
21 | 
22 | Combining the predictions (submission files) of those models using combine_rgb_flow.py
23 | yields a final classification accuracy of 20.6% mAP (Two-Stream) and on Charades (evalated with charades_v1_classify.m)
24 | 
25 | 
26 | ## Technical Overview:
27 |  
28 | The code is organized such that to train a two-stream network. Two independed network are trained: One RGB network and one Flow network.
29 | This code parses the training data into pairs of an image (or flow), and a label for a single activity class. This forms a softmax training setup like a standard CNN. The network is a VGG-16 network. For RGB it is pretrained on Image-Net, and for Flow it is pretrained on UCF101. The pretrained networks can be downloaded with the scripts in this directory.
30 | For testing. The network uses a batch size of 25, scores all images, and pools the output to make a classfication prediction or uses all 25 outputs for localization.
31 | 
32 | All outputs are stored in the cache-dir. This includes epoch*.txt which is the classification output, and localize*.txt which is the localization output (note the you need to specify that you want this in the options).
33 | Those output files can be combined after training with the python scripts in this directory.
34 | All output files can be scored with the official MATLAB evaluation script provided with the Charades dataset.
35 | 
36 | Requirements:
37 | * Python 2.7
38 | * PyTorch 
39 | 
40 | 
41 | ## Steps to train your own two-stream network on Charades:
42 |  
43 | 1. Download the Charades Annotations (allenai.org/plato/charades/)
44 | 2. Download the Charades RGB and/or Flow frames (allenai.org/plato/charades/)
45 | 3. Duplicate and edit one of the experiment files under exp/ with appropriate parameters. For additional parameters, see opts.lua
46 | 4. Run an experiment by calling python exp/rgbnet.py where rgbnet.py is your experiment file
47 | 5. The checkpoints/logfiles/outputs are stored in your specified cache directory. 
48 | 6. Combine one RGB output file and one Flow output file with combine_rgb_flow.py to generate a submission file
49 | 7. Evaluate the submission file with the Charades_v1_classify.m or Charades_v1_localize.m evaluation scripts 
50 | 8. Build of the code, cite our papers, and say hi to us at CVPR.
51 | 
52 | Good luck!
53 | 
54 | 
55 | ## Pretrained networks:
56 | 
57 | While the RGB net can be trained in a day on a modern GPU, the flow net requires nontrivial IO and time to converge. For your convenience we provide RGB and Flow models already trained on Charades using exp/rgbnet.py and exp/flownet.py
58 | 
59 | https://www.dropbox.com/s/p457h2ifi6v1qdz/twostream_rgb.pth.tar?dl=1
60 | https://www.dropbox.com/s/m1hkeiwjtndt26z/twostream_flow.pth?dl=1
61 | 
62 | * The rgb model was obtained after 7 epochs (epochSize=0.1)
63 | * The rgb model has a classification accuracy of 18.6% mAP (evalated with charades_v1_classify.m)
64 | * The flow model was converted directly from the Charades Torch codebase (../torch/)
65 | * The flow model has a classification accuracy of 15.4% mAP (via charades_v1_classify.m)
66 | 
67 | Combining the predictions (submission files) of those models using combine_rgb_flow.py
68 | yields a final classification accuracy of 20.6% mAP (evalated with charades_v1_classify.m)
69 | 
70 | To fine-tune those models, or run experiments, please see exp/rgbnet_test.py and exp/flownet_test.py
71 | 
72 | 
73 | Additionally we include rgb-streams fine-tuned from resnet and inception pretrained on ImageNet:
74 | * ResNet-152 (exp/rgbnet_resnet.py): 22.8% mAP (via charades_v1_classify.m)
75 | * https://www.dropbox.com/s/iy9fmk0r1a3edoz/resnet_rgb.pth.tar?dl=1
76 | * Inception_v3 (exp/rgbnet_inception.py): 22.7% mAP (via charades_v1_classify.m)
77 | * https://www.dropbox.com/s/whxikophm7xqchb/inception_rgb.pth.tar?dl=1
78 | 
79 | 
80 | Charades submission files are available for multiple baselines at https://github.com/gsig/temporal-fields
81 | 


--------------------------------------------------------------------------------
/pytorch/checkpoints.py:
--------------------------------------------------------------------------------
 1 | """ Defines functions used for checkpointing models and storing model scores """
 2 | import os
 3 | import torch
 4 | import shutil
 5 | from collections import OrderedDict
 6 | 
 7 | 
 8 | def ordered_load_state(model, chkpoint):
 9 |     """ 
10 |         Wrapping the model with parallel/dataparallel seems to
11 |         change the variable names for the states
12 |         This attempts to load normally and otherwise aligns the labels
13 |         of the two statese and tries again.
14 |     """
15 |     try:
16 |         model.load_state_dict(chkpoint)
17 |     except Exception:  # assume order is the same, and use new labels
18 |         print('keys do not match model, trying to align')
19 |         modelkeys = model.state_dict().keys()
20 |         fixed = OrderedDict([(z,y) 
21 |                              for (x,y),z in zip(chkpoint.items(), modelkeys)])
22 |         model.load_state_dict(fixed)
23 | 
24 | 
25 | def load(args, model, optimizer):
26 |     if args.resume:
27 |         if os.path.isfile(args.resume):
28 |             print("=> loading checkpoint '{}'".format(args.resume))
29 |             chkpoint = torch.load(args.resume)
30 |             if isinstance(chkpoint, dict) and 'state_dict' in chkpoint:
31 |                 args.start_epoch = chkpoint['epoch']
32 |                 mAP = chkpoint['mAP']
33 |                 ordered_load_state(model, chkpoint['state_dict'])
34 |                 optimizer.load_state_dict(chkpoint['optimizer'])
35 |                 print("=> loaded checkpoint '{}' (epoch {})"
36 |                       .format(args.resume, chkpoint['epoch']))
37 |                 return mAP
38 |             else:
39 |                 ordered_load_state(model, chkpoint)
40 |                 print("=> loaded checkpoint '{}' (just weights)"
41 |                       .format(args.resume))
42 |                 return 0
43 |         else:
44 |             raise ValueError("no checkpoint found at '{}'".format(args.resume))
45 |     return 0
46 | 
47 | 
48 | def score_file(scores, filename):
49 |     with open(filename, 'w') as f:
50 |         for key, val in sorted(scores.items()):
51 |             f.write('{} {}\n'.format(key, val))
52 | 
53 | 
54 | def save(epoch, args, model, optimizer, is_best, scores):
55 |     state = {
56 |         'epoch': epoch + 1,
57 |         'arch': args.arch,
58 |         'state_dict': model.state_dict(),
59 |         'mAP': scores['mAP'],
60 |         'optimizer': optimizer.state_dict(),
61 |     }
62 |     filename = "{}/model.pth.tar".format(args.cache)
63 |     score_file(scores, "{}/model_{:03d}.txt".format(args.cache, epoch+1))
64 |     torch.save(state, filename)
65 |     if is_best:
66 |         bestname = "{}/model_best.pth.tar".format(args.cache)
67 |         score_file(scores, "{}/model_best.txt".format(args.cache, epoch+1))
68 |         shutil.copyfile(filename, bestname)
69 | 


--------------------------------------------------------------------------------
/pytorch/datasets/__init__.py:
--------------------------------------------------------------------------------
 1 | """ Initilize the datasets module
 2 |     New datasets can be added with python scripts under datasets/
 3 | """
 4 | import torch
 5 | import torch.utils.data
 6 | import torch.utils.data.distributed
 7 | import importlib
 8 | 
 9 | 
10 | def get_dataset(args):
11 |     dataset = importlib.import_module('.'+args.dataset, package='datasets')
12 |     train_dataset, val_dataset, valvideo_dataset = dataset.get(args)
13 | 
14 |     if args.distributed:
15 |         train_sampler = torch.utils.data.distributed.DistributedSampler(
16 |             train_dataset)
17 |     else:
18 |         train_sampler = None
19 | 
20 |     train_loader = torch.utils.data.DataLoader(
21 |         train_dataset, batch_size=args.batch_size, shuffle=(
22 |             train_sampler is None),
23 |         num_workers=args.workers, pin_memory=True, sampler=train_sampler)
24 | 
25 |     val_loader = torch.utils.data.DataLoader(
26 |         val_dataset, batch_size=args.batch_size, shuffle=True,
27 |         num_workers=args.workers, pin_memory=True)
28 | 
29 |     valvideo_loader = torch.utils.data.DataLoader(
30 |         valvideo_dataset, batch_size=25, shuffle=False,
31 |         num_workers=args.workers, pin_memory=True)
32 | 
33 |     return train_loader, val_loader, valvideo_loader
34 | 


--------------------------------------------------------------------------------
/pytorch/datasets/charadesflow.py:
--------------------------------------------------------------------------------
  1 | """ Dataset loader for the Charades dataset """
  2 | import torch
  3 | import torchvision.transforms as transforms
  4 | import transforms as arraytransforms
  5 | from charadesrgb import Charades, cls2int
  6 | from PIL import Image
  7 | import numpy as np
  8 | from glob import glob
  9 | 
 10 | 
 11 | def pil_loader(path):
 12 |     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
 13 |     with open(path, 'rb') as f:
 14 |         img = Image.open(f)
 15 |         return img.convert('L')
 16 | 
 17 | 
 18 | def accimage_loader(path):
 19 |     import accimage
 20 |     try:
 21 |         return accimage.Image(path)
 22 |     except IOError:
 23 |         # Potentially a decoding problem, fall back to PIL.Image
 24 |         return pil_loader(path)
 25 | 
 26 | 
 27 | def default_loader(path):
 28 |     from torchvision import get_image_backend
 29 |     if get_image_backend() == 'accimage':
 30 |         return accimage_loader(path)
 31 |     else:
 32 |         return pil_loader(path)
 33 | 
 34 | 
 35 | class Charadesflow(Charades):
 36 |     def __init__(self, *args, **kwargs):
 37 |         super(Charadesflow,self).__init__(*args, **kwargs)
 38 |         
 39 |     def prepare(self, path, labels, split):
 40 |         FPS, GAP, testGAP = 24, 4, 25
 41 |         STACK=10
 42 |         datadir = path
 43 |         image_paths, targets, ids = [], [], []
 44 | 
 45 |         for i, (vid, label) in enumerate(labels.iteritems()):
 46 |             iddir = datadir + '/' + vid
 47 |             lines = glob(iddir+'/*.jpg')
 48 |             n = len(lines)/2
 49 |             if i % 100 == 0:
 50 |                 print("{} {}".format(i, iddir))
 51 |             if n == 0:
 52 |                 continue
 53 |             if split == 'val_video':
 54 |                 target = torch.IntTensor(157).zero_()
 55 |                 for x in label:
 56 |                     target[cls2int(x['class'])] = 1
 57 |                 spacing = np.linspace(0, n-1-STACK-1, testGAP)  # fit 10 optical flow pairs
 58 |                 for loc in spacing:
 59 |                     impath = '{}/{}-{:06d}x.jpg'.format(
 60 |                         iddir, vid, int(np.floor(loc))+1)
 61 |                     image_paths.append(impath)
 62 |                     targets.append(target)
 63 |                     ids.append(vid)
 64 |             else:
 65 |                 for x in label:
 66 |                     for ii in range(0, n-1, GAP):
 67 |                         if x['start'] < ii/float(FPS) < x['end']:
 68 |                             if ii>n-1-STACK-1: continue  # fit 10 optical flow pairs
 69 |                             impath = '{}/{}-{:06d}x.jpg'.format(
 70 |                                 iddir, vid, ii+1)
 71 |                             image_paths.append(impath)
 72 |                             targets.append(cls2int(x['class']))
 73 |                             ids.append(vid)
 74 |         return {'image_paths': image_paths, 'targets': targets, 'ids': ids}
 75 | 
 76 |     def __getitem__(self, index):
 77 |         """
 78 |         Args:
 79 |             index (int): Index
 80 |         Returns:
 81 |             tuple: (image, target) where target is class_index of the target class.
 82 |         """
 83 |         path = self.data['image_paths'][index]
 84 |         base = path[:-5-6]
 85 |         framenr = int(path[-5-6:-5])
 86 |         assert '{}{:06d}x.jpg'.format(base,framenr) == path
 87 |         STACK=10
 88 |         img = []
 89 |         for i in range(STACK):
 90 |             x = '{}{:06d}x.jpg'.format(base,framenr+i)
 91 |             y = '{}{:06d}y.jpg'.format(base,framenr+i)
 92 |             imgx = default_loader(x)
 93 |             imgy = default_loader(y)
 94 |             img.append(imgx)
 95 |             img.append(imgy)
 96 |         target = self.data['targets'][index]
 97 |         meta = {}
 98 |         meta['id'] = self.data['ids'][index]
 99 |         if self.transform is not None:
100 |             img = self.transform(img)
101 |         if self.target_transform is not None:
102 |             target = self.target_transform(target)
103 |         return img, target, meta
104 | 
105 | 
106 | def get(args):
107 |     """ Entry point. Call this function to get all Charades dataloaders """
108 |     normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0])
109 |     train_file = args.train_file
110 |     val_file = args.val_file
111 |     train_dataset = Charadesflow(
112 |         args.data, 'train', train_file, args.cache,
113 |         transform=transforms.Compose([
114 |             arraytransforms.RandomResizedCrop(224),
115 |             arraytransforms.ToTensor(),
116 |             normalize,
117 |             transforms.Lambda(lambda x: torch.cat(x)),
118 |         ]))
119 |     val_transforms = transforms.Compose([
120 |             arraytransforms.Resize(256),
121 |             arraytransforms.CenterCrop(224),
122 |             arraytransforms.ToTensor(),
123 |             normalize,
124 |             transforms.Lambda(lambda x: torch.cat(x)),
125 |         ])
126 |     val_dataset = Charadesflow(
127 |         args.data, 'val', val_file, args.cache, transform=val_transforms)
128 |     valvideo_dataset = Charadesflow(
129 |         args.data, 'val_video', val_file, args.cache, transform=val_transforms)
130 |     return train_dataset, val_dataset, valvideo_dataset
131 | 


--------------------------------------------------------------------------------
/pytorch/datasets/charadesrgb.py:
--------------------------------------------------------------------------------
  1 | """ Dataset loader for the Charades dataset """
  2 | import torch
  3 | import torchvision.transforms as transforms
  4 | import torch.utils.data as data
  5 | from PIL import Image
  6 | import numpy as np
  7 | from glob import glob
  8 | import csv
  9 | import cPickle as pickle
 10 | import os
 11 | 
 12 | 
 13 | def parse_charades_csv(filename):
 14 |     labels = {}
 15 |     with open(filename) as f:
 16 |         reader = csv.DictReader(f)
 17 |         for row in reader:
 18 |             vid = row['id']
 19 |             actions = row['actions']
 20 |             if actions == '':
 21 |                 actions = []
 22 |             else:
 23 |                 actions = [a.split(' ') for a in actions.split(';')]
 24 |                 actions = [{'class': x, 'start': float(
 25 |                     y), 'end': float(z)} for x, y, z in actions]
 26 |             labels[vid] = actions
 27 |     return labels
 28 | 
 29 | 
 30 | def cls2int(x):
 31 |     return int(x[1:])
 32 | 
 33 | 
 34 | def pil_loader(path):
 35 |     # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
 36 |     with open(path, 'rb') as f:
 37 |         img = Image.open(f)
 38 |         return img.convert('RGB')
 39 | 
 40 | 
 41 | def accimage_loader(path):
 42 |     import accimage
 43 |     try:
 44 |         return accimage.Image(path)
 45 |     except IOError:
 46 |         # Potentially a decoding problem, fall back to PIL.Image
 47 |         return pil_loader(path)
 48 | 
 49 | 
 50 | def default_loader(path):
 51 |     from torchvision import get_image_backend
 52 |     if get_image_backend() == 'accimage':
 53 |         return accimage_loader(path)
 54 |     else:
 55 |         return pil_loader(path)
 56 | 
 57 | 
 58 | def cache(cachefile):
 59 |     """ Creates a decorator that caches the result to cachefile """
 60 |     def cachedecorator(fn):
 61 |         def newf(*args, **kwargs):
 62 |             print('cachefile {}'.format(cachefile))
 63 |             if os.path.exists(cachefile):
 64 |                 with open(cachefile, 'rb') as f:
 65 |                     print("Loading cached result from '%s'" % cachefile)
 66 |                     return pickle.load(f)
 67 |             res = fn(*args, **kwargs)
 68 |             with open(cachefile, 'wb') as f:
 69 |                 print("Saving result to cache '%s'" % cachefile)
 70 |                 pickle.dump(res, f)
 71 |             return res
 72 |         return newf
 73 |     return cachedecorator
 74 | 
 75 | 
 76 | class Charades(data.Dataset):
 77 |     def __init__(self, root, split, labelpath, cachedir, transform=None, target_transform=None):
 78 |         self.num_classes = 157
 79 |         self.transform = transform
 80 |         self.target_transform = target_transform
 81 |         self.labels = parse_charades_csv(labelpath)
 82 |         self.root = root
 83 |         cachename = '{}/{}_{}.pkl'.format(cachedir,
 84 |                                           self.__class__.__name__, split)
 85 |         self.data = cache(cachename)(self.prepare)(root, self.labels, split)
 86 | 
 87 |     def prepare(self, path, labels, split):
 88 |         FPS, GAP, testGAP = 24, 4, 25
 89 |         datadir = path
 90 |         image_paths, targets, ids = [], [], []
 91 | 
 92 |         for i, (vid, label) in enumerate(labels.iteritems()):
 93 |             iddir = datadir + '/' + vid
 94 |             lines = glob(iddir+'/*.jpg')
 95 |             n = len(lines)
 96 |             if i % 100 == 0:
 97 |                 print("{} {}".format(i, iddir))
 98 |             if n == 0:
 99 |                 continue
100 |             if split == 'val_video':
101 |                 target = torch.IntTensor(157).zero_()
102 |                 for x in label:
103 |                     target[cls2int(x['class'])] = 1
104 |                 spacing = np.linspace(0, n-1, testGAP)
105 |                 for loc in spacing:
106 |                     impath = '{}/{}-{:06d}.jpg'.format(
107 |                         iddir, vid, int(np.floor(loc))+1)
108 |                     image_paths.append(impath)
109 |                     targets.append(target)
110 |                     ids.append(vid)
111 |             else:
112 |                 for x in label:
113 |                     for ii in range(0, n-1, GAP):
114 |                         if x['start'] < ii/float(FPS) < x['end']:
115 |                             impath = '{}/{}-{:06d}.jpg'.format(
116 |                                 iddir, vid, ii+1)
117 |                             image_paths.append(impath)
118 |                             targets.append(cls2int(x['class']))
119 |                             ids.append(vid)
120 |         return {'image_paths': image_paths, 'targets': targets, 'ids': ids}
121 | 
122 |     def __getitem__(self, index):
123 |         """
124 |         Args:
125 |             index (int): Index
126 |         Returns:
127 |             tuple: (image, target) where target is class_index of the target class.
128 |         """
129 |         path = self.data['image_paths'][index]
130 |         target = self.data['targets'][index]
131 |         meta = {}
132 |         meta['id'] = self.data['ids'][index]
133 |         img = default_loader(path)
134 |         if self.transform is not None:
135 |             img = self.transform(img)
136 |         if self.target_transform is not None:
137 |             target = self.target_transform(target)
138 |         return img, target, meta
139 | 
140 |     def __len__(self):
141 |         return len(self.data['image_paths'])
142 | 
143 |     def __repr__(self):
144 |         fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
145 |         fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
146 |         fmt_str += '    Root Location: {}\n'.format(self.root)
147 |         tmp = '    Transforms (if any): '
148 |         fmt_str += '{0}{1}\n'.format(
149 |             tmp, self.transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
150 |         tmp = '    Target Transforms (if any): '
151 |         fmt_str += '{0}{1}'.format(
152 |             tmp, self.target_transform.__repr__().replace('\n', '\n' + ' ' * len(tmp)))
153 |         return fmt_str
154 | 
155 | 
156 | def get(args):
157 |     """ Entry point. Call this function to get all Charades dataloaders """
158 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
159 |                                      std=[0.229, 0.224, 0.225])
160 |     train_file = args.train_file
161 |     val_file = args.val_file
162 |     train_dataset = Charades(
163 |         args.data, 'train', train_file, args.cache,
164 |         transform=transforms.Compose([
165 |             transforms.RandomResizedCrop(args.inputsize),
166 |             transforms.ColorJitter(
167 |                 brightness=0.4, contrast=0.4, saturation=0.4),
168 |             transforms.RandomHorizontalFlip(),
169 |             transforms.ToTensor(),  # missing PCA lighting jitter
170 |             normalize,
171 |         ]))
172 |     val_dataset = Charades(
173 |         args.data, 'val', val_file, args.cache,
174 |         transform=transforms.Compose([
175 |             transforms.Resize(int(256./224*args.inputsize)),
176 |             transforms.CenterCrop(args.inputsize),
177 |             transforms.ToTensor(),
178 |             normalize,
179 |         ]))
180 |     valvideo_dataset = Charades(
181 |         args.data, 'val_video', val_file, args.cache,
182 |         transform=transforms.Compose([
183 |             transforms.Resize(int(256./224*args.inputsize)),
184 |             transforms.CenterCrop(args.inputsize),
185 |             transforms.ToTensor(),
186 |             normalize,
187 |         ]))
188 |     return train_dataset, val_dataset, valvideo_dataset
189 | 


--------------------------------------------------------------------------------
/pytorch/datasets/fake.py:
--------------------------------------------------------------------------------
 1 | """ Define random data for quick debugging """
 2 | import torchvision
 3 | import torchvision.transforms as transforms
 4 | 
 5 | 
 6 | def get(args):
 7 |     normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
 8 |                                      std=[0.229, 0.224, 0.225])
 9 | 
10 |     train_dataset = torchvision.datasets.FakeData(
11 |         transform=transforms.Compose([
12 |             transforms.RandomResizedCrop(224),
13 |             transforms.RandomHorizontalFlip(),
14 |             transforms.ToTensor(),
15 |             normalize,
16 |         ]))
17 | 
18 |     val_dataset = torchvision.datasets.FakeData(
19 |         transform=transforms.Compose([
20 |             transforms.Resize(256),
21 |             transforms.CenterCrop(224),
22 |             transforms.ToTensor(),
23 |             normalize,
24 |         ]))
25 | 
26 |     return train_dataset, val_dataset, val_dataset
27 | 


--------------------------------------------------------------------------------
/pytorch/datasets/transforms.py:
--------------------------------------------------------------------------------
 1 | """ Overloading Torchvision transforms to operate on a list """
 2 | 
 3 | import torchvision.transforms as parents
 4 | 
 5 | class CenterCrop(parents.CenterCrop):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super(CenterCrop, self).__init__(*args, **kwargs)
 8 |     def __call__(self, img):
 9 |         return [super(CenterCrop, self).__call__(im) for im in img]
10 |         
11 | 
12 | class RandomCrop(parents.RandomCrop):
13 |     def __init__(self, *args, **kwargs):
14 |         super(RandomCrop, self).__init__(*args, **kwargs)
15 |     def __call__(self, img):
16 |         return [super(RandomCrop, self).__call__(im) for im in img]
17 | 
18 | 
19 | class RandomResizedCrop(parents.RandomResizedCrop):
20 |     def __init__(self, *args):
21 |         super(RandomResizedCrop, self).__init__(*args)
22 |     def __call__(self, img):
23 |         return [super(RandomResizedCrop, self).__call__(im) for im in img]
24 | 
25 | 
26 | class Resize(parents.Resize):
27 |     def __init__(self, *args, **kwargs):
28 |         super(Resize, self).__init__(*args, **kwargs)
29 |     def __call__(self, img):
30 |         return [super(Resize, self).__call__(im) for im in img]
31 | 
32 | 
33 | class ToTensor(parents.ToTensor):
34 |     def __init__(self, *args, **kwargs):
35 |         super(ToTensor, self).__init__(*args, **kwargs)
36 |     def __call__(self, img):
37 |         return [super(ToTensor, self).__call__(im) for im in img]
38 | 
39 | 
40 | class Normalize(parents.Normalize):
41 |     def __init__(self, *args, **kwargs):
42 |         super(Normalize, self).__init__(*args, **kwargs)
43 |     def __call__(self, img):
44 |         return [super(Normalize, self).__call__(im) for im in img]
45 | 
46 | 


--------------------------------------------------------------------------------
/pytorch/exp/flownet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | #sys.path.insert(0, '..')
 4 | sys.path.insert(0, '.')
 5 | from main import main
 6 | 
 7 | args = [
 8 |     '--name', __file__.split('/')[-1].split('.')[0],  # name is filename
 9 |     '--print-freq', '1',
10 |     '--dataset', 'charadesflow',
11 |     '--data','/scratch/gsigurds/Charades_v1_flow/',
12 |     '--arch', 'vgg16flow',
13 |     '--pretrained-weights', './vgg16flow_ucf101.pth',
14 |     '--lr', '5e-3',
15 |     '--lr-decay-rate','15',
16 |     '--epochs','40',
17 |     '--batch-size', '64',
18 |     '--train-size', '0.2',
19 |     '--val-size', '0.1',
20 |     '--cache-dir', '/nfs.yoda/gsigurds/ai2/caches/',
21 |     '--pretrained',
22 |     #'--evaluate',
23 | ]
24 | sys.argv.extend(args)
25 | main()
26 | 


--------------------------------------------------------------------------------
/pytorch/exp/flownet_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | #sys.path.insert(0, '..')
 4 | sys.path.insert(0, '.')
 5 | from main import main
 6 | 
 7 | args = [
 8 |     '--name', __file__.split('/')[-1].split('.')[0],  # name is filename
 9 |     '--print-freq', '1',
10 |     '--dataset', 'charadesflow',
11 |     '--data','/scratch/gsigurds/Charades_v1_flow/',
12 |     '--arch', 'vgg16flow',
13 |     '--lr', '5e-3',
14 |     '--lr-decay-rate','15',
15 |     '--epochs','40',
16 |     '--batch-size', '64',
17 |     '--train-size', '0.2',
18 |     '--val-size', '0.001',
19 |     '--cache-dir', '/nfs.yoda/gsigurds/ai2/caches/',
20 |     '--pretrained',
21 |     '--resume', './twostream_flow.pth',
22 |     '--evaluate',
23 | ]
24 | sys.argv.extend(args)
25 | main()
26 | 


--------------------------------------------------------------------------------
/pytorch/exp/rgbnet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | #sys.path.insert(0, '..')
 4 | sys.path.insert(0, '.')
 5 | from main import main
 6 | 
 7 | args = [
 8 |     '--name', __file__.split('/')[-1].split('.')[0],  # name is filename
 9 |     '--print-freq', '1',
10 |     '--dataset', 'charadesrgb',
11 |     '--arch', 'vgg16',
12 |     '--lr', '1e-3',
13 |     '--batch-size', '64',
14 |     '--train-size', '0.1',
15 |     '--val-size', '0.1',
16 |     '--cache-dir', '/nfs.yoda/gsigurds/ai2/caches/',
17 |     '--pretrained',
18 |     #'--evaluate',
19 | ]
20 | sys.argv.extend(args)
21 | main()
22 | 


--------------------------------------------------------------------------------
/pytorch/exp/rgbnet_inception.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | #sys.path.insert(0, '..')
 4 | sys.path.insert(0, '.')
 5 | from main import main
 6 | 
 7 | args = [
 8 |     '--name', __file__.split('/')[-1].split('.')[0],  # name is filename
 9 |     '--print-freq', '1',
10 |     '--dataset', 'charadesrgb',
11 |     '--arch', 'inception_v3',
12 |     '--inputsize','299',
13 |     '--lr', '1e-3',
14 |     '--batch-size', '64',
15 |     '--train-size', '0.1',
16 |     '--val-size', '0.1',
17 |     '--cache-dir', '/nfs.yoda/gsigurds/ai2/caches/',
18 |     '--pretrained',
19 |     #'--evaluate',
20 | ]
21 | sys.argv.extend(args)
22 | main()
23 | 


--------------------------------------------------------------------------------
/pytorch/exp/rgbnet_resnet.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | #sys.path.insert(0, '..')
 4 | sys.path.insert(0, '.')
 5 | from main import main
 6 | 
 7 | args = [
 8 |     '--name', __file__.split('/')[-1].split('.')[0],  # name is filename
 9 |     '--print-freq', '1',
10 |     '--dataset', 'charadesrgb',
11 |     '--arch', 'resnet152',
12 |     '--lr', '1e-3',
13 |     '--batch-size', '50',
14 |     '--train-size', '0.1',
15 |     '--val-size', '0.1',
16 |     '--cache-dir', '/nfs.yoda/gsigurds/ai2/caches/',
17 |     '--pretrained',
18 |     #'--evaluate',
19 | ]
20 | sys.argv.extend(args)
21 | main()
22 | 


--------------------------------------------------------------------------------
/pytorch/exp/rgbnet_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import sys
 3 | #sys.path.insert(0, '..')
 4 | sys.path.insert(0, '.')
 5 | from main import main
 6 | 
 7 | args = [
 8 |     '--name', __file__.split('/')[-1].split('.')[0],  # name is filename
 9 |     '--print-freq', '1',
10 |     '--dataset', 'charadesrgb',
11 |     '--arch', 'vgg16',
12 |     '--lr', '1e-3',
13 |     '--batch-size', '64',
14 |     '--train-size', '0.1',
15 |     '--val-size', '0.1',
16 |     '--cache-dir', '/nfs.yoda/gsigurds/ai2/caches/',
17 |     '--pretrained',
18 |     '--resume', './twostream_rgb.pth.tar',
19 |     '--evaluate',
20 | ]
21 | sys.argv.extend(args)
22 | main()
23 | 


--------------------------------------------------------------------------------
/pytorch/get_alreadytrained.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to download pretrained pytorch models on Charades
 3 | # Approximately equivalent to models obtained by running exp/rgbnet.py
 4 | #
 5 | # The rgb model was obtained after 7 epochs (epoch-size 0.1)
 6 | # The rgb model has a classification accuracy of 18.6% mAP (via charades_v1_classify.m)
 7 | #     Notice that this is an improvement over the Torch RGB model
 8 | # The flow model was converted directly from the Charades Torch codebase (../torch/)
 9 | # The flow model has a classification accuracy of 15.4% mAP (via charades_v1_classify.m)
10 | #
11 | # vgg16flow_ucf101.pth is a converted model from Torch that was pretrained on UCF101
12 | # and is used as an initialization for the flow model
13 | #
14 | # Combining the predictions (submission files) of those models using combine_rgb_flow.py
15 | # yields a final classification accuracy of 20.6% mAP (via charades_v1_classify.m)
16 | #
17 | # Additionally we include rgb-streams fine-tuned from resnet and inception pretrained on ImageNet
18 | # ResNet-152 (exp/rgbnet_resnet.py): 22.8% mAP (via charades_v1_classify.m)
19 | # Inception_v3 (exp/rgbnet_inception.py): 22.7% mAP (via charades_v1_classify.m)
20 | 
21 | wget -O twostream_rgb.pth.tar https://www.dropbox.com/s/p457h2ifi6v1qdz/twostream_rgb.pth.tar?dl=1
22 | wget -O twostream_flow.pth https://www.dropbox.com/s/m1hkeiwjtndt26z/twostream_flow.pth?dl=1
23 | wget -O vgg16flow_ucf101.pth https://www.dropbox.com/s/qlr5aty2jz4dq5o/vgg16flow_ucf101.pth?dl=1
24 | wget -O resnet_rgb.pth.tar https://www.dropbox.com/s/iy9fmk0r1a3edoz/resnet_rgb.pth.tar?dl=1
25 | wget -O inception_rgb.pth.tar https://www.dropbox.com/s/whxikophm7xqchb/inception_rgb.pth.tar?dl=1
26 | 


--------------------------------------------------------------------------------
/pytorch/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | """Charades activity recognition baseline code
 4 |    Can be run directly or throught config scripts under exp/
 5 | 
 6 |    Gunnar Sigurdsson, 2018
 7 | """ 
 8 | import torch
 9 | import numpy as np
10 | import random
11 | import train
12 | from models import create_model
13 | from datasets import get_dataset
14 | import checkpoints
15 | from opts import parse
16 | from utils import tee
17 | 
18 | 
19 | def seed(manualseed):
20 |     random.seed(manualseed)
21 |     np.random.seed(manualseed)
22 |     torch.manual_seed(manualseed)
23 |     torch.cuda.manual_seed(manualseed)
24 | 
25 | 
26 | best_mAP = 0
27 | def main():
28 |     global opt, best_mAP
29 |     opt = parse()
30 |     tee.Tee(opt.cache+'/log.txt')
31 |     print(vars(opt))
32 |     seed(opt.manual_seed)
33 | 
34 |     model, criterion, optimizer = create_model(opt)
35 |     if opt.resume: best_mAP = checkpoints.load(opt, model, optimizer)
36 |     print(model)
37 |     trainer = train.Trainer()
38 |     train_loader, val_loader, valvideo_loader = get_dataset(opt)
39 | 
40 |     if opt.evaluate:
41 |         trainer.validate(val_loader, model, criterion, -1, opt)
42 |         trainer.validate_video(valvideo_loader, model, -1, opt)
43 |         return
44 | 
45 |     for epoch in range(opt.start_epoch, opt.epochs):
46 |         if opt.distributed:
47 |             trainer.train_sampler.set_epoch(epoch)
48 |         top1,top5 = trainer.train(train_loader, model, criterion, optimizer, epoch, opt)
49 |         top1val,top5val = trainer.validate(val_loader, model, criterion, epoch, opt)
50 |         mAP = trainer.validate_video(valvideo_loader, model, epoch, opt)
51 |         is_best = mAP > best_mAP
52 |         best_mAP = max(mAP, best_mAP)
53 |         scores = {'top1train':top1,'top5train':top5,'top1val':top1val,'top5val':top5val,'mAP':mAP}
54 |         checkpoints.save(epoch, opt, model, optimizer, is_best, scores)
55 | 
56 | 
57 | if __name__ == '__main__':
58 |     main()
59 | 


--------------------------------------------------------------------------------
/pytorch/models/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Initialize the model module
 3 | New models can be defined by adding scripts under models/
 4 | """
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.parallel
 8 | import torch.backends.cudnn as cudnn
 9 | import torch.distributed as dist
10 | import torchvision.models as tmodels
11 | import importlib
12 | 
13 | 
14 | def create_model(args):
15 |     if args.arch in tmodels.__dict__:  # torchvision models
16 |         if args.pretrained:
17 |             print("=> using pre-trained model '{}'".format(args.arch))
18 |             model = tmodels.__dict__[args.arch](pretrained=True)
19 |             model = model.cuda()
20 |         else:
21 |             print("=> creating model '{}'".format(args.arch))
22 |             model = tmodels.__dict__[args.arch]()
23 |     else:  # defined as script in this directory
24 |         model = importlib.import_module('.'+args.arch, package='models').model
25 |         if not args.pretrained_weights == '':
26 |             print('loading pretrained-weights from {}'.format(args.pretrained_weights))
27 |             model.load_state_dict(torch.load(args.pretrained_weights))
28 | 
29 |     # replace last layer
30 |     if hasattr(model, 'classifier'):
31 |         newcls = list(model.classifier.children())
32 |         newcls = newcls[:-1] + [nn.Linear(newcls[-1].in_features, args.nclass).cuda()]
33 |         model.classifier = nn.Sequential(*newcls)
34 |     elif hasattr(model, 'fc'):
35 |         model.fc = nn.Linear(model.fc.in_features, args.nclass)
36 |         if hasattr(model, 'AuxLogits'):
37 |             model.AuxLogits.fc = nn.Linear(model.AuxLogits.fc.in_features, args.nclass)
38 |     else:
39 |         newcls = list(model.children())
40 |         if hasattr(model, 'in_features'):
41 |             in_features = model.in_features
42 |         else:
43 |             in_features = newcls[-1].in_features
44 |         newcls = newcls[:-1] + [nn.Linear(in_features, args.nclass).cuda()]
45 |         model = nn.Sequential(*newcls)
46 | 
47 |     if args.distributed:
48 |         dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
49 |                                 world_size=args.world_size)
50 |         model.cuda()
51 |         model = torch.nn.parallel.DistributedDataParallel(model)
52 |     else:
53 |         if hasattr(model, 'features'):
54 |             model.features = torch.nn.DataParallel(model.features)
55 |             model.cuda()
56 |         else:
57 |             model = torch.nn.DataParallel(model).cuda()
58 | 
59 |     # define loss function and optimizer
60 |     criterion = nn.CrossEntropyLoss().cuda()
61 |     optimizer = torch.optim.SGD(model.parameters(), args.lr,
62 |                                 momentum=args.momentum,
63 |                                 weight_decay=args.weight_decay)
64 |     cudnn.benchmark = True
65 |     return model, criterion, optimizer
66 | 


--------------------------------------------------------------------------------
/pytorch/models/vgg16flow.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | import torch.nn as nn
 4 | from torch.autograd import Variable
 5 | 
 6 | class LambdaBase(nn.Sequential):
 7 |     def __init__(self, fn, *args):
 8 |         super(LambdaBase, self).__init__(*args)
 9 |         self.lambda_func = fn
10 | 
11 |     def forward_prepare(self, input):
12 |         output = []
13 |         for module in self._modules.values():
14 |             output.append(module(input))
15 |         return output if output else input
16 | 
17 | class Lambda(LambdaBase):
18 |     def forward(self, input):
19 |         return self.lambda_func(self.forward_prepare(input))
20 | 
21 | 
22 | model = nn.Sequential( # Sequential,
23 | 	nn.Conv2d(20,64,(3, 3),(1, 1),(1, 1)),
24 | 	nn.ReLU(),
25 | 	nn.Conv2d(64,64,(3, 3),(1, 1),(1, 1)),
26 | 	nn.ReLU(),
27 | 	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
28 | 	nn.Conv2d(64,128,(3, 3),(1, 1),(1, 1)),
29 | 	nn.ReLU(),
30 | 	nn.Conv2d(128,128,(3, 3),(1, 1),(1, 1)),
31 | 	nn.ReLU(),
32 | 	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
33 | 	nn.Conv2d(128,256,(3, 3),(1, 1),(1, 1)),
34 | 	nn.ReLU(),
35 | 	nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)),
36 | 	nn.ReLU(),
37 | 	nn.Conv2d(256,256,(3, 3),(1, 1),(1, 1)),
38 | 	nn.ReLU(),
39 | 	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
40 | 	nn.Conv2d(256,512,(3, 3),(1, 1),(1, 1)),
41 | 	nn.ReLU(),
42 | 	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
43 | 	nn.ReLU(),
44 | 	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
45 | 	nn.ReLU(),
46 | 	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
47 | 	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
48 | 	nn.ReLU(),
49 | 	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
50 | 	nn.ReLU(),
51 | 	nn.Conv2d(512,512,(3, 3),(1, 1),(1, 1)),
52 | 	nn.ReLU(),
53 | 	nn.MaxPool2d((2, 2),(2, 2),(0, 0),ceil_mode=True),
54 | 	Lambda(lambda x: x.view(x.size(0),-1)), # View,
55 | 	nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(25088,4096)), # Linear,
56 | 	nn.ReLU(),
57 | 	nn.Dropout(0.9),
58 | 	nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,4096)), # Linear,
59 | 	nn.ReLU(),
60 | 	nn.Dropout(0.8),
61 | 	nn.Sequential(Lambda(lambda x: x.view(1,-1) if 1==len(x.size()) else x ),nn.Linear(4096,101)), # Linear,
62 | )
63 | model.in_features = 4096
64 | 


--------------------------------------------------------------------------------
/pytorch/opts.py:
--------------------------------------------------------------------------------
 1 | """ Define and parse commandline arguments """
 2 | import argparse
 3 | import os
 4 | 
 5 | 
 6 | def parse():
 7 |     print('parsing arguments')
 8 |     parser = argparse.ArgumentParser(description='PyTorch Charades Training')
 9 |     parser.add_argument('--data', metavar='DIR', default='/scratch/gsigurds/Charades_v1_rgb/',
10 |                         help='path to dataset')
11 |     parser.add_argument('--dataset', metavar='DIR', default='fake',
12 |                         help='name of dataset under datasets/')
13 |     parser.add_argument('--train-file', default='./Charades_v1_train.csv', type=str)
14 |     parser.add_argument('--val-file', default='./Charades_v1_test.csv', type=str)
15 |     parser.add_argument('--arch', '-a', metavar='ARCH', default='alexnet',
16 |                         help='model architecture: ')
17 |     parser.add_argument('-j', '--workers', default=4, type=int, metavar='N',
18 |                         help='number of data loading workers (default: 4)')
19 |     parser.add_argument('--epochs', default=20, type=int, metavar='N',
20 |                         help='number of total epochs to run')
21 |     parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
22 |                         help='manual epoch number (useful on restarts)')
23 |     parser.add_argument('-b', '--batch-size', default=256, type=int,
24 |                         metavar='N', help='mini-batch size (default: 256)')
25 |     parser.add_argument('--lr', '--learning-rate', default=1e-3, type=float,
26 |                         metavar='LR', help='initial learning rate')
27 |     parser.add_argument('--lr-decay-rate',default=6, type=int)
28 |     parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
29 |                         help='momentum')
30 |     parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
31 |                         metavar='W', help='weight decay (default: 1e-4)')
32 |     parser.add_argument('--print-freq', '-p', default=10, type=int,
33 |                         metavar='N', help='print frequency (default: 10)')
34 |     parser.add_argument('--resume', default='', type=str, metavar='PATH',
35 |                         help='path to latest checkpoint (default: none)')
36 |     parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
37 |                         help='evaluate model on validation set')
38 |     parser.add_argument('--pretrained', dest='pretrained', action='store_true',
39 |                         help='use pre-trained model')
40 |     parser.add_argument('--pretrained-weights', default='', type=str)
41 |     parser.add_argument('--inputsize', default=224, type=int)
42 |     parser.add_argument('--world-size', default=1, type=int,
43 |                         help='number of distributed processes')
44 |     parser.add_argument('--manual-seed', default=0, type=int)
45 |     parser.add_argument('--dist-url', default='tcp://224.66.41.62:23456', type=str,
46 |                         help='url used to set up distributed training')
47 |     parser.add_argument('--dist-backend', default='gloo', type=str,
48 |                         help='distributed backend')
49 |     parser.add_argument('--train-size', default=1.0, type=float)
50 |     parser.add_argument('--val-size', default=1.0, type=float)
51 |     parser.add_argument('--cache-dir', default='./cache/', type=str)
52 |     parser.add_argument('--name', default='test', type=str)
53 |     parser.add_argument('--nclass', default=157, type=int)
54 |     parser.add_argument('--accum-grad', default=4, type=int)
55 |     args = parser.parse_args()
56 |     args.distributed = args.world_size > 1
57 |     args.cache = args.cache_dir+args.name+'/'
58 |     if not os.path.exists(args.cache):
59 |         os.makedirs(args.cache)
60 | 
61 |     return args
62 | 


--------------------------------------------------------------------------------
/pytorch/train.py:
--------------------------------------------------------------------------------
  1 | """ Defines the Trainer class which handles train/validation/validation_video
  2 | """
  3 | import time
  4 | import torch
  5 | import itertools
  6 | import numpy as np
  7 | from utils import map
  8 | 
  9 | 
 10 | class AverageMeter(object):
 11 |     """Computes and stores the average and current value"""
 12 | 
 13 |     def __init__(self):
 14 |         self.reset()
 15 | 
 16 |     def reset(self):
 17 |         self.val = 0
 18 |         self.avg = 0
 19 |         self.sum = 0
 20 |         self.count = 0
 21 | 
 22 |     def update(self, val, n=1):
 23 |         self.val = val
 24 |         self.sum += val * n
 25 |         self.count += n
 26 |         self.avg = self.sum / self.count
 27 | 
 28 | 
 29 | def adjust_learning_rate(startlr, decay_rate, optimizer, epoch):
 30 |     """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
 31 |     lr = startlr * (0.1 ** (epoch // decay_rate))
 32 |     for param_group in optimizer.param_groups:
 33 |         param_group['lr'] = lr
 34 | 
 35 | 
 36 | def accuracy(output, target, topk=(1,)):
 37 |     """Computes the precision@k for the specified values of k"""
 38 |     maxk = max(topk)
 39 |     batch_size = target.size(0)
 40 | 
 41 |     _, pred = output.topk(maxk, 1, True, True)
 42 |     pred = pred.t()
 43 |     correct = pred.eq(target.view(1, -1).expand_as(pred))
 44 | 
 45 |     res = []
 46 |     for k in topk:
 47 |         correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
 48 |         res.append(correct_k.mul_(100.0 / batch_size))
 49 |     return res
 50 | 
 51 | 
 52 | def submission_file(ids, outputs, filename):
 53 |     """ write list of ids and outputs to filename"""
 54 |     with open(filename, 'w') as f:
 55 |         for vid, output in zip(ids, outputs):
 56 |             scores = ['{:g}'.format(x)
 57 |                       for x in output]
 58 |             f.write('{} {}\n'.format(vid, ' '.join(scores)))
 59 | 
 60 | 
 61 | class Trainer():
 62 |     def train(self, loader, model, criterion, optimizer, epoch, args):
 63 |         adjust_learning_rate(args.lr, args.lr_decay_rate, optimizer, epoch)
 64 |         batch_time = AverageMeter()
 65 |         data_time = AverageMeter()
 66 |         losses = AverageMeter()
 67 |         top1 = AverageMeter()
 68 |         top5 = AverageMeter()
 69 | 
 70 |         # switch to train mode
 71 |         model.train()
 72 |         optimizer.zero_grad()
 73 | 
 74 |         def part(x): return itertools.islice(x, int(len(x)*args.train_size))
 75 |         end = time.time()
 76 |         for i, (input, target, meta) in enumerate(part(loader)):
 77 |             data_time.update(time.time() - end)
 78 | 
 79 |             target = target.long().cuda(async=True)
 80 |             input_var = torch.autograd.Variable(input.cuda())
 81 |             target_var = torch.autograd.Variable(target)
 82 |             output = model(input_var)
 83 |             loss = None
 84 |             # for nets that have multiple outputs such as inception
 85 |             if isinstance(output, tuple):
 86 |                 loss = sum((criterion(o,target_var) for o in output))
 87 |                 output = output[0]
 88 |             else:
 89 |                 loss = criterion(output, target_var)
 90 |             prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
 91 |             losses.update(loss.data[0], input.size(0))
 92 |             top1.update(prec1[0], input.size(0))
 93 |             top5.update(prec5[0], input.size(0))
 94 | 
 95 |             loss.backward()
 96 |             if i % args.accum_grad == args.accum_grad-1:
 97 |                 print('updating parameters')
 98 |                 optimizer.step()
 99 |                 optimizer.zero_grad()
100 | 
101 |             batch_time.update(time.time() - end)
102 |             end = time.time()
103 | 
104 |             if i % args.print_freq == 0:
105 |                 print('Epoch: [{0}][{1}/{2}({3})]\t'
106 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
107 |                       'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
108 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
109 |                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
110 |                       'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
111 |                           epoch, i, int(
112 |                               len(loader)*args.train_size), len(loader),
113 |                           batch_time=batch_time, data_time=data_time, loss=losses, top1=top1, top5=top5))
114 |         return top1.avg,top5.avg
115 | 
116 |     def validate(self, loader, model, criterion, epoch, args):
117 |         batch_time = AverageMeter()
118 |         losses = AverageMeter()
119 |         top1 = AverageMeter()
120 |         top5 = AverageMeter()
121 | 
122 |         # switch to evaluate mode
123 |         model.eval()
124 | 
125 |         def part(x): return itertools.islice(x, int(len(x)*args.val_size))
126 |         end = time.time()
127 |         for i, (input, target, meta) in enumerate(part(loader)):
128 |             target = target.long().cuda(async=True)
129 |             input_var = torch.autograd.Variable(input.cuda(), volatile=True)
130 |             target_var = torch.autograd.Variable(target, volatile=True)
131 |             output = model(input_var)
132 |             loss = criterion(output, target_var)
133 | 
134 |             prec1, prec5 = accuracy(output.data, target, topk=(1, 5))
135 |             losses.update(loss.data[0], input.size(0))
136 |             top1.update(prec1[0], input.size(0))
137 |             top5.update(prec5[0], input.size(0))
138 |             batch_time.update(time.time() - end)
139 |             end = time.time()
140 | 
141 |             if i % args.print_freq == 0:
142 |                 print('Test: [{0}/{1} ({2})]\t'
143 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
144 |                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
145 |                       'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
146 |                       'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
147 |                           i, int(len(loader)*args.val_size), len(loader),
148 |                           batch_time=batch_time, loss=losses,
149 |                           top1=top1, top5=top5))
150 | 
151 |         print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'
152 |               .format(top1=top1, top5=top5))
153 | 
154 |         return top1.avg,top5.avg
155 | 
156 |     def validate_video(self, loader, model, epoch, args):
157 |         """ Run video-level validation on the Charades test set"""
158 |         batch_time = AverageMeter()
159 |         outputs = []
160 |         gts = []
161 |         ids = []
162 | 
163 |         # switch to evaluate mode
164 |         model.eval()
165 | 
166 |         end = time.time()
167 |         for i, (input, target, meta) in enumerate(loader):
168 |             target = target.long().cuda(async=True)
169 |             assert target[0,:].eq(target[1,:]).all(), "val_video not synced"
170 |             input_var = torch.autograd.Variable(input.cuda(), volatile=True)
171 |             output = model(input_var)
172 |             output = torch.nn.Softmax(dim=1)(output)
173 | 
174 |             # store predictions
175 |             output_video = output.mean(dim=0)
176 |             outputs.append(output_video.data.cpu().numpy())
177 |             gts.append(target[0,:])
178 |             ids.append(meta['id'][0])
179 |             batch_time.update(time.time() - end)
180 |             end = time.time()
181 | 
182 |             if i % args.print_freq == 0:
183 |                 print('Test2: [{0}/{1}]\t'
184 |                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
185 |                           i, len(loader), batch_time=batch_time))
186 |         #mAP, _, ap = map.map(np.vstack(outputs), np.vstack(gts))
187 |         mAP, _, ap = map.charades_map(np.vstack(outputs), np.vstack(gts))
188 |         print(ap)
189 |         print(' * mAP {:.3f}'.format(mAP))
190 |         submission_file(
191 |             ids, outputs, '{}/epoch_{:03d}.txt'.format(args.cache, epoch+1))
192 |         return mAP
193 | 


--------------------------------------------------------------------------------
/pytorch/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gsig/charades-algorithms/927794cd04c588f1e749e96f5c0e69d81a1576e0/pytorch/utils/__init__.py


--------------------------------------------------------------------------------
/pytorch/utils/map.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def map(submission_array, gt_array):
 5 |     """ Returns mAP, weighted mAP, and AP array """
 6 |     m_aps = []
 7 |     n_classes = submission_array.shape[1]
 8 |     for oc_i in range(n_classes):
 9 |         sorted_idxs = np.argsort(-submission_array[:, oc_i])
10 |         tp = gt_array[:, oc_i][sorted_idxs] == 1
11 |         fp = np.invert(tp)
12 |         n_pos = tp.sum()
13 |         if n_pos < 0.1:
14 |             m_aps.append(float('nan'))
15 |             continue
16 |         fp.sum()
17 |         f_pcs = np.cumsum(fp)
18 |         t_pcs = np.cumsum(tp)
19 |         prec = t_pcs / (f_pcs+t_pcs).astype(float)
20 |         avg_prec = 0
21 |         for i in range(submission_array.shape[0]):
22 |             if tp[i]:
23 |                 avg_prec += prec[i]
24 |         m_aps.append(avg_prec / n_pos.astype(float))
25 |     m_aps = np.array(m_aps)
26 |     m_ap = np.mean(m_aps)
27 |     w_ap = (m_aps * gt_array.sum(axis=0) / gt_array.sum().sum().astype(float))
28 |     return m_ap, w_ap, m_aps
29 | 
30 | 
31 | def charades_map(submission_array, gt_array):
32 |     """ 
33 |     Approximate version of the charades evaluation function
34 |     For precise numbers, use the submission file with the official matlab script
35 |     """
36 |     fix = submission_array.copy()
37 |     empty = np.sum(gt_array, axis=1)==0
38 |     fix[empty, :] = np.NINF
39 |     return map(fix, gt_array)
40 | 


--------------------------------------------------------------------------------
/pytorch/utils/tee.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Implements a crude stdout-to-file redirect for keep history of experiments
 3 | The following code initializes the redirect:
 4 | import tee
 5 | tee.Tee(filename)
 6 | """
 7 | import logging
 8 | import sys
 9 | 
10 | 
11 | class StreamToLogger(object):
12 |     def __init__(self, stream, logger, log_level=logging.INFO):
13 |         self.logger = logger
14 |         self.log_level = log_level
15 |         self.linebuf = ''
16 |         self.stream = stream
17 | 
18 |     def write(self, buf):
19 |         self.stream.write(buf)
20 |         for line in buf.rstrip().splitlines():
21 |             self.logger.log(self.log_level, line.rstrip())
22 | 
23 |     def flush(self):
24 |         self.stream.flush()
25 | 
26 | 
27 | class Tee(object):
28 |     def __init__(self, filename):
29 |         self.filename = filename
30 |         logging.basicConfig(
31 |             level=logging.DEBUG,
32 |             format='%(asctime)s:%(message)s',
33 |             filename=filename,
34 |             filemode='a'
35 |         )
36 |         stdout_logger = logging.getLogger('STDOUT')
37 |         sl = StreamToLogger(sys.stdout, stdout_logger, logging.INFO)
38 |         sys.stdout = sl
39 | 
40 |         stderr_logger = logging.getLogger('STDERR')
41 |         sl = StreamToLogger(sys.stderr, stderr_logger, logging.ERROR)
42 |         sys.stderr = sl
43 |         print "Logging to file {}".format(filename)
44 | 


--------------------------------------------------------------------------------
/torch/INSTALL.md:
--------------------------------------------------------------------------------
 1 | Torch ResNet Installation
 2 | =========================
 3 | 
 4 | This is the suggested way to install the Torch ResNet dependencies on [Ubuntu 14.04+](http://www.ubuntu.com/):
 5 | * NVIDIA CUDA 7.0+
 6 | * NVIDIA cuDNN v4
 7 | * Torch
 8 | * ImageNet dataset
 9 | 
10 | ## Requirements
11 | * NVIDIA GPU with compute capability 3.5 or above
12 | 
13 | ## Install CUDA
14 | 1. Install the `build-essential` package:
15 |  ```bash
16 |  sudo apt-get install build-essential
17 |  ```
18 | 
19 | 2. If you are using a Virtual Machine (like Amazon EC2 instances), install:
20 |  ```bash
21 |  sudo apt-get update
22 |  sudo apt-get install linux-generic
23 |  ```
24 | 
25 | 3. Download the CUDA .deb file for Linux Ubuntu 14.04 64-bit from: https://developer.nvidia.com/cuda-downloads.
26 | The file will be named something like `cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb`
27 | 
28 | 4. Install CUDA from the .deb file:
29 |  ```bash
30 |  sudo dpkg -i cuda-repo-ubuntu1404-7-5-local_7.5-18_amd64.deb
31 |  sudo apt-get update
32 |  sudo apt-get install cuda
33 |  echo "export PATH=/usr/local/cuda/bin/:\$PATH; export LD_LIBRARY_PATH=/usr/local/cuda/lib64/:\$LD_LIBRARY_PATH; " >>~/.bashrc && source ~/.bashrc
34 |  ```
35 | 
36 | 4. Restart your computer
37 | 
38 | ## Install cuDNN v4
39 | 1. Download cuDNN v4 from https://developer.nvidia.com/cuDNN  (requires registration).
40 |   The file will be named something like `cudnn-7.0-linux-x64-v4.0-rc.tgz`.
41 | 
42 | 2. Extract the file to `/usr/local/cuda`:
43 |  ```bash
44 |  tar -xvf cudnn-7.0-linux-x64-v4.0-rc.tgz
45 |  sudo cp cuda/include/*.h /usr/local/cuda/include
46 |  sudo cp cuda/lib64/*.so* /usr/local/cuda/lib64
47 |  ```
48 | 
49 | ## Install Torch
50 | 1. Install the Torch dependencies:
51 |   ```bash
52 |   curl -sk https://raw.githubusercontent.com/torch/ezinstall/master/install-deps | bash -e
53 |   ```
54 | 
55 | 2. Install Torch in a local folder:
56 |   ```bash
57 |   git clone https://github.com/torch/distro.git ~/torch --recursive
58 |   cd ~/torch; ./install.sh
59 |   ```
60 | 
61 | If you want to uninstall torch, you can use the command: `rm -rf ~/torch`
62 | 
63 | ## Install the Torch cuDNN v4 bindings
64 | ```bash
65 | git clone -b R4 https://github.com/soumith/cudnn.torch.git
66 | cd cudnn.torch; luarocks make
67 | ```
68 | 
69 | 


--------------------------------------------------------------------------------
/torch/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD License
 2 | 
 3 | For fb.resnet.torch software
 4 | 
 5 | Copyright (c) 2016, Facebook, Inc. All rights reserved.
 6 | 
 7 | Redistribution and use in source and binary forms, with or without modification,
 8 | are permitted provided that the following conditions are met:
 9 | 
10 |  * Redistributions of source code must retain the above copyright notice, this
11 |    list of conditions and the following disclaimer.
12 | 
13 |  * Redistributions in binary form must reproduce the above copyright notice,
14 |    this list of conditions and the following disclaimer in the documentation
15 |    and/or other materials provided with the distribution.
16 | 
17 |  * Neither the name Facebook nor the names of its contributors may be used to
18 |    endorse or promote products derived from this software without specific
19 |    prior written permission.
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | 


--------------------------------------------------------------------------------
/torch/README.md:
--------------------------------------------------------------------------------
 1 | ## Charades Starter Code for Activity Classification and Localization
 2 | 
 3 | Contributor: Gunnar Atli Sigurdsson
 4 | 
 5 | **New:** extension of this framework to the deep CRF model on Charades for *Asynchronous Temporal Fields for Action Recognition*: https://github.com/gsig/temporal-fields
 6 | 
 7 | * This code implements a Two-Stream network in Torch
 8 | * This code implements a Two-Stream+LSTM network in Torch
 9 | * This code is built of the Res-Net Torch source code: github.com/facebook/fb.resnet.torch
10 | * This code awkwardly hacks said code to work as Two-Stream/LSTM
11 | * Some functionality from original code may work (optnet)
12 | * Some functionality from original code may not work (resnet)
13 | 
14 | The code replicates the 'Two-Stream Extended' and 'Two-Stream+LSTM' baselines found in:
15 | ```
16 | @inproceedings{sigurdsson2017asynchronous,
17 | author = {Gunnar A. Sigurdsson and Santosh Divvala and Ali Farhadi and Abhinav Gupta},
18 | title = {Asynchronous Temporal Fields for Action Recognition},
19 | booktitle={The IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
20 | year={2017},
21 | pdf = {http://arxiv.org/pdf/1612.06371.pdf},
22 | code = {https://github.com/gsig/temporal-fields},
23 | }
24 | ```
25 | which is in turn based off "Two-stream convolutional networks for action recognition in videos" by Simonyan and Zisserman, and "Beyond Short Snippets: Deep Networks for Video Classification" by Joe Yue-Hei Ng el al.
26 | 
27 | Combining the predictions (submission files) of those models using combine_rgb_flow.py
28 | yields a final classification accuracy of 18.9% mAP (Two-Stream) and 19.8% (LSTM) on Charades (evalated with charades_v1_classify.m)
29 | 
30 | 
31 | ## Technical Overview:
32 |  
33 | The code is organized such that to train a two-stream network. Two independed network are trained: One RGB network and one Flow network.
34 | This code parses the training data into pairs of an image (or flow), and a label for a single activity class. This forms a softmax training setup like a standard CNN. The network is a VGG-16 network. For RGB it is pretrained on Image-Net, and for Flow it is pretrained on UCF101. The pretrained networks can be downloaded with the scripts in this directory.
35 | For testing. The network uses a batch size of 25, scores all images, and pools the output to make a classfication prediction or uses all 25 outputs for localization.
36 | 
37 | All outputs are stored in the cacheDir under checkpoints/. This includes epoch*.txt which is the classification output, and localize*.txt which is the localization output (note the you need to specify that you want this in the options).
38 | Those output files can be combined after training with the python scripts in this directory.
39 | All output files can be scored with the official MATLAB evaluation script provided with the Charades dataset.
40 | 
41 | Requirements:
42 | * csvigo: luarocks install csvigo
43 | * loadcaffe: luarocks install loadcaffe
44 | * optnet: luarocks install optnet 
45 | (The flow net requires optnet to converge with the current default settings for the parameters)
46 | 
47 | Optional requirements:
48 | * Facebook Lua Libraries, for speedups and fb.debugger, a great debugger
49 | Please refer to the original res-net codebase for more information.
50 | 
51 | 
52 | ## Steps to train your own two-stream network on Charades:
53 |  
54 | 1. Download the Charades Annotations (allenai.org/plato/charades/)
55 | 2. Download the Charades RGB and/or Flow frames (allenai.org/plato/charades/)
56 | 3. Download the Imagenet/UCF101 Pre-trained Image and Flow models using ./get_models.sh
57 | 4. Duplicate and edit one of the experiment files under exp/ with appropriate parameters. For additional parameters, see opts.lua
58 | 5. Run an experiment by calling dofile 'exp/rgbnet.lua' where rgbnet.lua is your experiment file
59 | 6. The checkpoints/logfiles/outputs are stored in your specified cache directory. 
60 | 7. Combine one RGB output file and one Flow output file with combine_rgb_flow.py to generate a submission file
61 | 8. Evaluate the submission file with the Charades_v1_classify.m or Charades_v1_localize.m evaluation scripts 
62 | 9. Build of the code, cite our papers, and say hi to us at CVPR.
63 | 
64 | Good luck!
65 | 
66 | 
67 | ## Pretrained networks:
68 | 
69 | While the RGB net can be trained in a day on a modern GPU, the flow net requires nontrivial IO and time to converge. For your convenience we provide RGB and Flow models already trained on Charades using exp/rgbnet.lua and exp/flownet.lua
70 | 
71 | https://www.dropbox.com/s/o7afkhw52rqr48g/twostream_flow.t7?dl=1
72 | https://www.dropbox.com/s/bo9rv32zaxojsmz/twostream_rgb.t7?dl=1
73 | 
74 | * The flow model was obtained after 31 epochs (epochSize=0.2)
75 | * The flow model has a classification accuracy of 15.4% mAP (evalated with charades_v1_classify.m)
76 | * The rgb model was obtained after 6 epochs (epochSize=0.1)
77 | * The rgb model has a classification accuracy of 15.6% mAP (evalated with charades_v1_classify.m)
78 | 
79 | Combining the predictions (submission files) of those models using combine_rgb_flow.py
80 | yields a final classification accuracy of 18.9% mAP (evalated with charades_v1_classify.m)
81 | 
82 | To fine-tune those models, or run experiments, please see exp/rgbnet_resume.lua, exp/rgbnet_test.lua, exp/flownet_resume.lua, and exp/flownet_test.lua
83 | 
84 | Charades submission files are available for multiple baselines at https://github.com/gsig/temporal-fields
85 | 
86 | ## Two-Stream+LSTM details
87 | 
88 | We also provide pre-trained LSTM models using exp/lstmrgbnet.lua and exp/lstmflownet.lua, please see get_alreadytrained_lstm.sh for details.
89 | 
90 | This baseline fine-tunes the previous Two-Stream models with a LSTM on top of fc7. It uses a special loader for Charades (charadessync), that feeds in a full video for each batch, to train an LSTM. To accomodate the softmax loss, (frame,label) pairs are randomly sampled for the training set. exp/lstmrgnet.lua, models/vgg16lstm.lua, and datasets/charadessync-gen.lua contain more details.
91 | 


--------------------------------------------------------------------------------
/torch/checkpoints.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | local checkpoint = {}
10 | 
11 | local function deepCopy(tbl)
12 |    -- creates a copy of a network with new modules and the same tensors
13 |    local copy = {}
14 |    for k, v in pairs(tbl) do
15 |       if type(v) == 'table' then
16 |          copy[k] = deepCopy(v)
17 |       else
18 |          copy[k] = v
19 |       end
20 |    end
21 |    if torch.typename(tbl) then
22 |       torch.setmetatable(copy, torch.typename(tbl))
23 |    end
24 |    return copy
25 | end
26 | 
27 | function checkpoint.latest(opt)
28 |    if opt.resume == 'none' then
29 |       return nil
30 |    end
31 | 
32 |    local latestPath = paths.concat(opt.resume, 'latest.t7')
33 |    if not paths.filep(latestPath) then
34 |       return nil
35 |    end
36 | 
37 |    print('=> Loading checkpoint ' .. latestPath)
38 |    local latest = torch.load(latestPath)
39 |    local optimState = torch.load(paths.concat(opt.resume, latest.optimFile))
40 | 
41 |    return latest, optimState
42 | end
43 | 
44 | local function modelscore(name,score)
45 |    print('dumping score to file')
46 |    local out = assert(io.open(name, "w"))
47 |    out:write("train top1: " .. score[1] .. "\n")
48 |    out:write("train top5: " .. score[2] .. "\n")
49 |    out:write("test top1: " .. score[3] .. "\n")
50 |    out:write("test top5: " .. score[4] .. "\n")
51 |    out:write("mAP: " .. score[5] .. "\n")
52 |    out:close()
53 | end
54 | 
55 | function checkpoint.save(epoch, model, optimState, isBestModel, opt, score)
56 |   -- don't save the DataParallelTable for easier loading on other machines
57 |    if torch.type(model) == 'nn.DataParallelTable' then
58 |       model = model:get(1)
59 |    end
60 | 
61 |    -- create a clean copy on the CPU without modifying the original network
62 |    model = deepCopy(model):float():clearState()
63 | 
64 |    local modelFile = 'model_' .. epoch .. '.t7'
65 |    local optimFile = 'optimState_' .. epoch .. '.t7'
66 | 
67 |    modelscore(paths.concat(opt.save, string.format("model_%03d.txt",epoch)), score)
68 |    torch.save(paths.concat(opt.save, modelFile), model)
69 |    torch.save(paths.concat(opt.save, optimFile), optimState)
70 |    torch.save(paths.concat(opt.save, 'latest.t7'), {
71 |       epoch = epoch,
72 |       modelFile = modelFile,
73 |       optimFile = optimFile,
74 |    })
75 |    modelscore(paths.concat(opt.save, 'latest.txt'), score)
76 | 
77 |    if isBestModel then
78 |       torch.save(paths.concat(opt.save, 'model_best.t7'), model)
79 |       modelscore(paths.concat(opt.save, 'model_best.txt'), score)
80 |    end
81 | end
82 | 
83 | return checkpoint
84 | 


--------------------------------------------------------------------------------
/torch/dataloader.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Multi-threaded data loader
 10 | --
 11 | 
 12 | local datasets = require 'datasets/init'
 13 | local Threads = require 'threads'
 14 | Threads.serialization('threads.sharedserialize')
 15 | 
 16 | local M = {}
 17 | local DataLoader = torch.class('resnet.DataLoader', M)
 18 | 
 19 | function DataLoader.create(opt)
 20 |    -- The train and val loader
 21 |    local loaders = {}
 22 | 
 23 |    for i, split in ipairs{'train', 'val', 'val2'} do
 24 |       local dataset = datasets.create(opt, split)
 25 |       loaders[i] = M.DataLoader(dataset, opt, split)
 26 |    end
 27 | 
 28 |    return table.unpack(loaders)
 29 | end
 30 | 
 31 | function DataLoader:__init(dataset, opt, split)
 32 |    local manualSeed = opt.manualSeed
 33 |    local function init()
 34 |       require('datasets/' .. opt.dataset)
 35 |    end
 36 |    local function main(idx)
 37 |       if manualSeed ~= 0 then
 38 |          torch.manualSeed(manualSeed + idx)
 39 |       end
 40 |       torch.setnumthreads(1)
 41 |       _G.dataset = dataset
 42 |       _G.preprocess = dataset:preprocess()
 43 |       return dataset:size()
 44 |    end
 45 | 
 46 |    local threads, sizes = Threads(opt.nThreads, init, main)
 47 |    self.nCrops = (split == 'val' and opt.tenCrop) and 10 or 1
 48 |    self.threads = threads
 49 |    self.__size = sizes[1][1]
 50 |    self.split = split
 51 |    self.synchronous = (opt.dataset=='charadessync') or (opt.dataset=='charadessyncflow')
 52 |    self.epochSize = tonumber(opt.epochSize)
 53 |    if self.epochSize and (self.epochSize < 1) then
 54 |        self.epochSize = torch.floor(self.epochSize * self.__size / opt.batchSize) * opt.batchSize
 55 |    end
 56 |    self.testSize = tonumber(opt.testSize)
 57 |    if self.testSize and (self.testSize < 1) then
 58 |        self.testSize = torch.floor(self.testSize * self.__size / opt.batchSize) * opt.batchSize
 59 |    end
 60 |    if split=='val2' then
 61 |        self.batchSize = 25
 62 |    else
 63 |        self.batchSize = math.floor(opt.batchSize / self.nCrops)
 64 |    end
 65 | end
 66 | 
 67 | function DataLoader:size()
 68 |    if  self.split=='train' and self.epochSize and not (self.epochSize==1) then
 69 |        return math.ceil(self.epochSize / self.batchSize)
 70 |    elseif  self.split=='val' and self.testSize and not (self.testSize==1) then
 71 |        return math.ceil(self.testSize / self.batchSize)
 72 |    else
 73 |        return math.ceil(self.__size / self.batchSize)
 74 |    end
 75 | end
 76 | 
 77 | function DataLoader:run()
 78 |    local threads = self.threads
 79 |    local split = self.split
 80 |    local size, batchSize = self.__size, self.batchSize
 81 |    local perm = torch.randperm(size)
 82 |    if self.split=='train' then
 83 |        if self.epochSize and not (self.epochSize==1) then
 84 |            -- Ensure each sample is seen equally often
 85 |            -- but reduce the epochSize
 86 |            if not self.perm then 
 87 |                self.perm = torch.randperm(size) 
 88 |                if self.synchronous then self.perm = torch.range(1,size) end
 89 |            end
 90 |            if self.perm:size(1) <= self.epochSize then
 91 |                if self.synchronous then 
 92 |                    self.perm = self.perm:cat(torch.range(1,size),1)
 93 |                else
 94 |                    self.perm = self.perm:cat(torch.randperm(size),1)
 95 |                end
 96 |            end
 97 |            perm = self.perm[{{1,self.epochSize}}]
 98 |            self.perm = self.perm[{{self.epochSize+1,-1}}]
 99 |            size = self.epochSize
100 |        else
101 |            perm = torch.randperm(size)
102 |            if self.synchronous then perm = torch.range(1,size) end
103 |        end
104 |    elseif self.split=='val' then
105 |        perm = torch.range(1,size)
106 |        if self.testSize and not (self.testSize==1) then
107 |            perm = perm[{{1,self.testSize}}]
108 |            size = self.testSize
109 |        end
110 |    elseif self.split=='val2' then
111 |        perm = torch.range(1,size)
112 |    else
113 |        assert(false,'split undefined')
114 |    end
115 | 
116 |    local idx, sample = 1, nil
117 |    local function enqueue()
118 |       while idx <= size and threads:acceptsjob() do
119 |          local indices = perm:narrow(1, idx, math.min(batchSize, size - idx + 1))
120 |          threads:addjob(
121 |             function(indices, nCrops)
122 |                local sz = indices:size(1)
123 |                local batch, imageSize
124 |                local target
125 |                
126 |                if split=="val2" then
127 |                    target = torch.IntTensor(sz,157)
128 |                else
129 |                    target = torch.IntTensor(sz)
130 |                end
131 |                local names = {}
132 |                local ids = {}
133 |                local obj = torch.IntTensor(sz)
134 |                local verb = torch.IntTensor(sz)
135 |                local scene = torch.IntTensor(sz)
136 |                for i, idx in ipairs(indices:totable()) do
137 |                   local sample = _G.dataset:get(idx)
138 |                   local input = _G.preprocess(sample.input)
139 |                   if not batch then
140 |                      imageSize = input:size():totable()
141 |                      if nCrops > 1 then table.remove(imageSize, 1) end
142 |                      batch = torch.FloatTensor(sz, nCrops, table.unpack(imageSize))
143 |                   end
144 |                   batch[i]:copy(input)
145 | 
146 |                   if split=="val2" then
147 |                       target[i]:copy(sample.target)
148 |                   else
149 |                       target[i] = sample.target
150 |                   end
151 |                   names[i] = sample.name
152 |                   ids[i] = sample.id
153 |                   obj[i] = sample.obj and sample.obj or 0
154 |                   verb[i] = sample.verb and sample.verb or 0
155 |                   scene[i] = sample.scene and sample.scene or 0
156 |                end
157 |                collectgarbage()
158 |                return {
159 |                   input = batch:view(sz * nCrops, table.unpack(imageSize)),
160 |                   target = target,
161 |                   names = names,
162 |                   ids = ids,
163 |                   obj = obj,
164 |                   verb = verb,
165 |                   scene = scene,
166 |                }
167 |             end,
168 |             function(_sample_)
169 |                sample = _sample_
170 |             end,
171 |             indices,
172 |             self.nCrops
173 |          )
174 |          idx = idx + batchSize
175 |       end
176 |    end
177 | 
178 |    local n = 0
179 |    local function loop()
180 |       enqueue()
181 |       if not threads:hasjob() then
182 |          return nil
183 |       end
184 |       threads:dojob()
185 |       if threads:haserror() then
186 |          threads:synchronize()
187 |       end
188 |       enqueue()
189 |       n = n + 1
190 |       return n, sample
191 |    end
192 | 
193 |    return loop
194 | end
195 | 
196 | return M.DataLoader
197 | 


--------------------------------------------------------------------------------
/torch/datasets/README.md:
--------------------------------------------------------------------------------
 1 | ## Datasets
 2 | 
 3 | Each dataset consist of two files: `dataset-gen.lua` and `dataset.lua`. The `dataset-gen.lua` is responsible for one-time setup, while
 4 | the `dataset.lua` handles the actual data loading.
 5 | 
 6 | ### `dataset-gen.lua`
 7 | 
 8 | The `dataset-gen.lua` performs any necessary one-time setup. For example, the [`cifar10-gen.lua`](cifar10-gen.lua) file downloads the CIFAR-10 dataset, and the [`imagenet-gen.lua`](imagenet-gen.lua) file indexes all the training and validation data.
 9 | 
10 | The module should have a single function `exec(opt, cacheFile)`.
11 | - `opt`: the command line options
12 | - `cacheFile`: path to output 
13 | 
14 | ```lua
15 | local M = {}
16 | function M.exec(opt, cacheFile)
17 |   local imageInfo = {}
18 |   -- preprocess dataset, store results in imageInfo, save to cacheFile
19 |   torch.save(cacheFile, imageInfo)
20 | end
21 | return M
22 | ```
23 | 
24 | ### `dataset.lua`
25 | 
26 | The `dataset.lua` should return a class that implements three functions:
27 | - `get(i)`: returns a table containing two entries, `input` and `target`
28 |   - `input`: the training or validation image as a Torch tensor
29 |   - `target`: the image category as a number 1-N
30 | - `size()`: returns the number of entries in the dataset
31 | - `preprocess()`: returns a function that transforms the `input` for data augmentation or input normalization
32 | 
33 | ```lua
34 | local M = {}
35 | local FakeDataset = torch.class('resnet.FakeDataset', M)
36 | 
37 | function FakeDataset:__init(imageInfo, opt, split)
38 |   -- imageInfo: result from dataset-gen.lua
39 |   -- opt: command-line arguments
40 |   -- split: "train" or "val"
41 | end
42 | 
43 | function FakeDataset:get(i)
44 |   return {
45 |     input = torch.Tensor(3, 800, 600):uniform(),
46 |     target = 42,
47 |   }
48 | end
49 | 
50 | function FakeDataset:size()
51 |   -- size of dataset
52 |   return 2000 
53 | end
54 | 
55 | function FakeDataset:preprocess()
56 |   -- Scale smaller side to 256 and take 224x224 center-crop
57 |   return t.Compose{
58 |     t.Scale(256),
59 |     t.CenterCrop(224),
60 |   }
61 | end
62 | 
63 | return M.FakeDataset
64 | ```
65 | 


--------------------------------------------------------------------------------
/torch/datasets/charades-gen.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Script to compute list of Charades filenames and classes rgb images
 10 | --
 11 | --  This generates a file gen/charades.t7 which contains the list of all
 12 | --  Charades training and validation images and their classes. This script also
 13 | --  works for other datasets arragned with the same layout.
 14 | --
 15 | --  Contributor: Gunnar Atli Sigurdsson
 16 | 
 17 | local sys = require 'sys'
 18 | local ffi = require 'ffi'
 19 | 
 20 | local M = {}
 21 | 
 22 | local function parseCSV(filename)
 23 |     require 'csvigo'
 24 |     print(('Loading csv: %s'):format(filename))
 25 |     local all = csvigo.load{path=filename, mode='tidy'}
 26 |     local ids = all['id']
 27 |     local actionss = all['actions']
 28 |     local N = #ids
 29 |     local labels = {}
 30 |     for i = 1,#ids do
 31 |         local id = ids[i]
 32 |         local actions = actionss[i]
 33 |         local label = {}
 34 |         for a in string.gmatch(actions, '([^;]+)') do -- split on ';'
 35 |             local a = string.gmatch(a, '([^ ]+)') -- split on ' '
 36 |             table.insert(label,{c=a(), s=tonumber(a()), e=tonumber(a())})
 37 |         end
 38 |         labels[id] = label
 39 |     end
 40 |     return labels
 41 | end
 42 | 
 43 | 
 44 | local function prepare(opt,labels,split)
 45 |     require 'sys'
 46 |     require 'string'
 47 |     local imagePath = torch.CharTensor()
 48 |     local imageClass = torch.LongTensor()
 49 |     local dir = opt.data
 50 |     assert(paths.dirp(dir), 'directory not found: ' .. dir)
 51 |     local imagePaths, imageClasses, ids = {}, {}, {}
 52 |     local FPS, GAP, testGAP = 24, 4, 25
 53 |     local e,count = 0, 0
 54 |     
 55 |     -- For each video annotation, prepare test files
 56 |     local imageClasses2
 57 |     if split=='val_video' then
 58 |         imageClasses2 = torch.IntTensor(4000000, opt.nClasses):zero() --allocating memory
 59 |     end
 60 |     for id,label in pairs(labels) do
 61 |         e = e+1
 62 |         if e % 100 == 1 then print(e) end
 63 |         iddir = dir .. '/' .. id
 64 |         local f = io.popen(('find -L %s -iname "*.jpg" '):format(iddir))
 65 |         if not f then 
 66 |             print('class not found: ' .. id)
 67 |             print(('find -L %s -iname "*.jpg" '):format(iddir))
 68 |         else
 69 |             local lines = {}
 70 |             while true do
 71 |                 local line = f:read('*line')
 72 |                 if not line then break end
 73 |                 table.insert(lines,line)
 74 |             end
 75 |             local N = #lines
 76 |             if split=='val_video' then
 77 |                 local target = torch.IntTensor(157,1):zero()
 78 |                 for _,anno in pairs(label) do
 79 |                     target[1+tonumber(string.sub(anno.c,2,-1))] = 1 -- 1-index
 80 |                 end
 81 |                 local tmp = torch.linspace(1,N,testGAP)
 82 |                 for ii = 1,testGAP do
 83 |                     local i = torch.floor(tmp[ii])
 84 |                     local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. '.jpg'
 85 |                     count = count + 1
 86 |                     table.insert(imagePaths,impath)
 87 |                     imageClasses2[count]:copy(target)
 88 |                     table.insert(ids,id)
 89 |                 end
 90 |             elseif opt.setup == 'softmax' then
 91 |                 if #label>0 then 
 92 |                     for _,anno in pairs(label) do
 93 |                         for i = 1,N,GAP do
 94 |                             if (anno.s<(i-1)/FPS) and ((i-1)/FPS<anno.e) then
 95 |                                 local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. '.jpg'
 96 |                                 table.insert(imagePaths,impath)
 97 |                                 table.insert(imageClasses, 1+tonumber(string.sub(anno.c,2,-1))) -- 1-index
 98 |                                 table.insert(ids,id)
 99 |                             end
100 |                         end
101 |                     end
102 |                 end
103 |             elseif opt.setup == 'sigmoid' then
104 |                 -- TODO
105 |                 assert(false,'Invalid opt.setup')
106 |             else
107 |                 assert(false,'Invalid opt.setup')
108 |             end
109 |             f:close()
110 |         end
111 |     end
112 | 
113 |     -- Convert the generated list to a tensor for faster loading
114 |     local nImages = #imagePaths
115 |     local maxLength = -1
116 |     for _,p in pairs(imagePaths) do
117 |         maxLength = math.max(maxLength, #p + 1)
118 |     end
119 |     local imagePath = torch.CharTensor(nImages, maxLength):zero()
120 |     for i, path in ipairs(imagePaths) do
121 |        ffi.copy(imagePath[i]:data(), path)
122 |     end
123 | 
124 |     local maxLength2 = -1
125 |     for _,p in pairs(ids) do
126 |         maxLength2 = math.max(maxLength2, #p + 1)
127 |     end
128 |     local ids_tensor = torch.CharTensor(nImages, maxLength2):zero()
129 |     for i, path in ipairs(ids) do
130 |        ffi.copy(ids_tensor[i]:data(), path)
131 |     end
132 | 
133 |     local imageClass = torch.LongTensor(imageClasses)
134 |     if split=='val_video' then
135 |         imageClass = imageClasses2[{{1,count},{}}]
136 |     end
137 |     assert(imagePath:size(1)==imageClass:size(1),"Sizes do not match")
138 | 
139 |     return imagePath, imageClass, ids_tensor
140 | end
141 | 
142 | 
143 | local function findClasses(dir)
144 |    return Nil, Nil
145 | end
146 | 
147 | 
148 | function M.exec(opt, cacheFile)
149 |    -- find the image path names
150 |    local imagePath = torch.CharTensor()  -- path to each image in dataset
151 |    local imageClass = torch.LongTensor() -- class index of each image (class index in self.classes)
152 | 
153 |    local filename = opt.trainfile
154 |    local filenametest = opt.testfile 
155 |    local labels = parseCSV(filename)
156 |    print('done parsing train csv')
157 |    local labelstest = parseCSV(filenametest)
158 |    print('done parsing test csv')
159 | 
160 |    print("=> Generating list of images")
161 |    local classList, classToIdx = findClasses(trainDir)
162 | 
163 |    print(" | finding all validation2 images")
164 |    local val2ImagePath, val2ImageClass, val2ids = prepare(opt,labelstest,'val_video')
165 | 
166 |    print(" | finding all validation images")
167 |    local valImagePath, valImageClass, valids = prepare(opt,labelstest,'val')
168 | 
169 |    print(" | finding all training images")
170 |    local trainImagePath, trainImageClass, ids = prepare(opt,labels,'train')
171 | 
172 |    local info = {
173 |       basedir = opt.data,
174 |       classList = classList,
175 |       train = {
176 |          imagePath = trainImagePath,
177 |          imageClass = trainImageClass,
178 |          ids = ids,
179 |       },
180 |       val = {
181 |          imagePath = valImagePath,
182 |          imageClass = valImageClass,
183 |          ids = valids,
184 |       },
185 |       val2 = {
186 |          imagePath = val2ImagePath,
187 |          imageClass = val2ImageClass,
188 |          ids = val2ids,
189 |       },
190 |    }
191 | 
192 |    print(" | saving list of images to " .. cacheFile)
193 |    torch.save(cacheFile, info)
194 |    return info
195 | end
196 | 
197 | return M
198 | 


--------------------------------------------------------------------------------
/torch/datasets/charades.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Charades dataset loader
 10 | --  Contributor: Gunnar Atli Sigurdsson
 11 | 
 12 | local image = require 'image'
 13 | local paths = require 'paths'
 14 | local t = require 'datasets/transforms'
 15 | local ffi = require 'ffi'
 16 | 
 17 | local M = {}
 18 | local CharadesDataset = torch.class('resnet.CharadesDataset', M)
 19 | 
 20 | function CharadesDataset:__init(imageInfo, opt, split)
 21 |    self.imageInfo = imageInfo[split]
 22 |    self.opt = opt
 23 |    self.split = split
 24 |    self.dir = opt.data
 25 |    assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir)
 26 | end
 27 | 
 28 | function CharadesDataset:get(i)
 29 |    local path = ffi.string(self.imageInfo.imagePath[i]:data())
 30 |    local image = self:_loadImage(paths.concat(self.dir, path))
 31 |    local class = self.imageInfo.imageClass[i]
 32 |    local id = ffi.string(self.imageInfo.ids[i]:data())
 33 | 
 34 |    return {
 35 |       input = image,
 36 |       target = class,
 37 |       id = id
 38 |    }
 39 | end
 40 | 
 41 | function CharadesDataset:_loadImage(path)
 42 |    local ok, input = pcall(function()
 43 |       return image.load(path, 3, 'float')
 44 |    end)
 45 | 
 46 |    -- Sometimes image.load fails because the file extension does not match the
 47 |    -- image format. In that case, use image.decompress on a ByteTensor.
 48 |    if not ok then
 49 |       local f = io.open(path, 'r')
 50 |       assert(f, 'Error reading: ' .. tostring(path))
 51 |       local data = f:read('*a')
 52 |       f:close()
 53 | 
 54 |       local b = torch.ByteTensor(string.len(data))
 55 |       ffi.copy(b:data(), data, b:size(1))
 56 | 
 57 |       input = image.decompress(b, 3, 'float')
 58 |    end
 59 | 
 60 |    return input
 61 | end
 62 | 
 63 | function CharadesDataset:size()
 64 |    return self.imageInfo.imageClass:size(1)
 65 | end
 66 | 
 67 | -- Computed from random subset of ImageNet training images
 68 | local meanstd = {
 69 |    mean = { 103.939/255, 116.779/255, 123.68/255 }, --vgg16
 70 |    std = { 1.0, 1.0, 1.0 }, -- I don't think caffe normalizes
 71 | }
 72 | local pca = {
 73 |    eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 },
 74 |    eigvec = torch.Tensor{
 75 |       { -0.5675,  0.7192,  0.4009 },
 76 |       { -0.5808, -0.0045, -0.8140 },
 77 |       { -0.5836, -0.6948,  0.4203 },
 78 |    },
 79 | }
 80 | 
 81 | function CharadesDataset:preprocess()
 82 |    if self.split == 'train' then
 83 |       return t.Compose{
 84 |          t.RandomSizedCrop(224),
 85 |          t.ColorJitter({
 86 |             brightness = 0.4,
 87 |             contrast = 0.4,
 88 |             saturation = 0.4,
 89 |          }),
 90 |          t.Lighting(0.1, pca.eigval, pca.eigvec),
 91 |          t.ColorNormalize(meanstd),
 92 |          t.HorizontalFlip(0.5),
 93 |       }
 94 |    elseif self.split == 'val' then
 95 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
 96 |       return t.Compose{
 97 |          t.Scale(256),
 98 |          t.ColorNormalize(meanstd),
 99 |          Crop(224),
100 |       }
101 |    elseif self.split == 'val2' then
102 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
103 |       return t.Compose{
104 |          t.Scale(256),
105 |          t.ColorNormalize(meanstd),
106 |          Crop(224),
107 |       }
108 |    else
109 |       error('invalid split: ' .. self.split)
110 |    end
111 | end
112 | 
113 | return M.CharadesDataset
114 | 


--------------------------------------------------------------------------------
/torch/datasets/charadesflow-gen.lua:
--------------------------------------------------------------------------------
  1 | --  This source code is licensed under the BSD-style license found in the
  2 | --  LICENSE file in the root directory of this source tree. An additional grant
  3 | --  of patent rights can be found in the PATENTS file in the same directory.
  4 | --
  5 | --  Script to compute list of Charades filenames and classes
  6 | --
  7 | --  This generates a file gen/charadesflow.t7 which contains the list of all
  8 | --  Charades training and validation images and their classes. This script also
  9 | --  works for other datasets arragned with the same layout.
 10 | --
 11 | --  Contributor: Gunnar Atli Sigurdsson
 12 | 
 13 | local sys = require 'sys'
 14 | local ffi = require 'ffi'
 15 | 
 16 | local M = {}
 17 | 
 18 | local function parseCSV(filename)
 19 |     require 'csvigo'
 20 |     print(('Loading csv: %s'):format(filename))
 21 |     local all = csvigo.load{path=filename, mode='tidy'}
 22 |     local ids = all['id']
 23 |     local actionss = all['actions']
 24 |     local N = #ids
 25 |     local labels = {}
 26 |     for i = 1,#ids do
 27 |         local id = ids[i]
 28 |         local actions = actionss[i]
 29 |         local label = {}
 30 |         for a in string.gmatch(actions, '([^;]+)') do -- split on ';'
 31 |             local a = string.gmatch(a, '([^ ]+)') -- split on ' '
 32 |             table.insert(label,{c=a(), s=tonumber(a()), e=tonumber(a())})
 33 |         end
 34 |         labels[id] = label
 35 |     end
 36 |     return labels
 37 | end
 38 | 
 39 | 
 40 | local function prepare(opt,labels,split)
 41 |     require 'sys'
 42 |     require 'string'
 43 |     local imagePath = torch.CharTensor()
 44 |     local imageClass = torch.LongTensor()
 45 |     local dir = opt.data
 46 |     assert(paths.dirp(dir), 'directory not found: ' .. dir)
 47 |     local imagePaths = {}
 48 |     local imageClasses = {}
 49 |     local ids = {}
 50 |     local FPS = 24
 51 |     local GAP = 4
 52 |     local testGAP = 25
 53 |     local flowframes = 10
 54 | 
 55 |     local e = 0
 56 |     local count = 0
 57 |     -- For each video annotation, prepare test files
 58 |     local imageClasses2
 59 |     if split=='val_video' then
 60 |         imageClasses2 = torch.IntTensor(4000000, opt.nClasses):zero()
 61 |     end
 62 |     for id,label in pairs(labels) do
 63 |         e = e+1
 64 |         if e % 100 == 1 then print(e) end
 65 |         iddir = dir .. '/' .. id
 66 |         local f = io.popen(('find -L %s -iname "*.jpg" '):format(iddir))
 67 |         if not f then 
 68 |             print('class not found: ' .. id)
 69 |             print(('find -L %s -iname "*.jpg" '):format(iddir))
 70 |         else
 71 |             local lines = {}
 72 |             while true do
 73 |                 local line = f:read('*line')
 74 |                 if not line then break end
 75 |                 table.insert(lines,line)
 76 |             end
 77 |             local N = torch.floor(#lines/2) -- to account for x and y
 78 |             if split=='val_video' then
 79 |                 local target = torch.IntTensor(157,1):zero()
 80 |                 for _,anno in pairs(label) do
 81 |                     target[1+tonumber(string.sub(anno.c,2,-1))] = 1 -- 1-index
 82 |                 end
 83 |                 local tmp = torch.linspace(1,N-flowframes-1,testGAP) -- -1 so we don't get bad flow
 84 |                 for ii = 1,testGAP do
 85 |                     local i = torch.floor(tmp[ii])
 86 |                     local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. 'x' .. '.jpg'
 87 |                     count = count + 1
 88 |                     table.insert(imagePaths,impath)
 89 |                     imageClasses2[count]:copy(target)
 90 |                     table.insert(ids,id)
 91 |                 end
 92 |             elseif opt.setup == 'softmax' then
 93 |                 if #label>0 then 
 94 |                     for _,anno in pairs(label) do
 95 |                         for i = 1,N,GAP do
 96 |                             if (anno.s<(i-1)/FPS) and ((i-1)/FPS<anno.e) then
 97 |                                 if i+flowframes+1<N then
 98 |                                     local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. 'x' .. '.jpg'
 99 |                                     table.insert(imagePaths,impath)
100 |                                     table.insert(imageClasses, 1+tonumber(string.sub(anno.c,2,-1))) -- 1-index
101 |                                     table.insert(ids,id)
102 |                                 end
103 |                             end
104 |                         end
105 |                     end
106 |                 end
107 |             elseif opt.setup == 'sigmoid' then
108 |                 -- TODO
109 |                 assert(false,'Invalid opt.setup')
110 |             else
111 |                 assert(false,'Invalid opt.setup')
112 |             end
113 |             f:close()
114 |         end
115 |     end
116 | 
117 |     -- Convert the generated list to a tensor for faster loading
118 |     local nImages = #imagePaths
119 |     local maxLength = -1
120 |     for _,p in pairs(imagePaths) do
121 |         maxLength = math.max(maxLength, #p + 1)
122 |     end
123 |     local imagePath = torch.CharTensor(nImages, maxLength):zero()
124 |     for i, path in ipairs(imagePaths) do
125 |        ffi.copy(imagePath[i]:data(), path)
126 |     end
127 |     local maxLength2 = -1
128 |     for _,p in pairs(ids) do
129 |         maxLength2 = math.max(maxLength2, #p + 1)
130 |     end
131 |     local ids_tensor = torch.CharTensor(nImages, maxLength2):zero()
132 |     for i, path in ipairs(ids) do
133 |        ffi.copy(ids_tensor[i]:data(), path)
134 |     end
135 |     local imageClass = torch.LongTensor(imageClasses)
136 |     if split=='val_video' then
137 |         imageClass = imageClasses2[{{1,count},{}}]
138 |     end
139 |     assert(imagePath:size(1)==imageClass:size(1),"Sizes do not match")
140 | 
141 |     return imagePath, imageClass, ids_tensor
142 | end
143 | 
144 | 
145 | local function findClasses(dir)
146 |    return Nil, Nil
147 | end
148 | 
149 | 
150 | function M.exec(opt, cacheFile)
151 |    -- find the image path names
152 |    local imagePath = torch.CharTensor()  -- path to each image in dataset
153 |    local imageClass = torch.LongTensor() -- class index of each image (class index in self.classes)
154 | 
155 |    local filename = opt.trainfile
156 |    local filenametest = opt.testfile 
157 |    local labels = parseCSV(filename)
158 |    print('done parsing train csv')
159 |    local labelstest = parseCSV(filenametest)
160 |    print('done parsing test csv')
161 | 
162 |    print("=> Generating list of images")
163 |    local classList, classToIdx = findClasses(trainDir)
164 | 
165 |    print(" | finding all validation2 images")
166 |    local val2ImagePath, val2ImageClass, val2ids = prepare(opt,labelstest,'val_video')
167 | 
168 |    print(" | finding all validation images")
169 |    local valImagePath, valImageClass, valids = prepare(opt,labelstest,'val')
170 | 
171 |    print(" | finding all training images")
172 |    local trainImagePath, trainImageClass, ids = prepare(opt,labels,'train')
173 | 
174 |    local info = {
175 |       basedir = opt.data,
176 |       classList = classList,
177 |       train = {
178 |          imagePath = trainImagePath,
179 |          imageClass = trainImageClass,
180 |          ids = ids,
181 |       },
182 |       val = {
183 |          imagePath = valImagePath,
184 |          imageClass = valImageClass,
185 |          ids = valids,
186 |       },
187 |       val2 = {
188 |          imagePath = val2ImagePath,
189 |          imageClass = val2ImageClass,
190 |          ids = val2ids,
191 |       },
192 |    }
193 | 
194 |    print(" | saving list of images to " .. cacheFile)
195 |    torch.save(cacheFile, info)
196 |    return info
197 | end
198 | 
199 | return M
200 | 


--------------------------------------------------------------------------------
/torch/datasets/charadesflow.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  ImageNet dataset loader
 10 | --
 11 | 
 12 | local image = require 'image'
 13 | local paths = require 'paths'
 14 | local t = require 'datasets/transforms'
 15 | local ffi = require 'ffi'
 16 | 
 17 | local M = {}
 18 | local CharadesDataset = torch.class('resnet.CharadesDataset', M)
 19 | 
 20 | function CharadesDataset:__init(imageInfo, opt, split)
 21 |    self.imageInfo = imageInfo[split]
 22 |    self.opt = opt
 23 |    self.split = split
 24 |    self.dir = opt.data
 25 |    assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir)
 26 | end
 27 | 
 28 | function CharadesDataset:get(i)
 29 |    -- This function loads in 20 consecutive optical flow images (10 x and 10 y images)
 30 |    -- Follwing the two-stream architecture
 31 |    local path = ffi.string(self.imageInfo.imagePath[i]:data())
 32 |    local image1 = self:_loadImage(paths.concat(self.dir, path))
 33 |    local finalimage = torch.Tensor(20,image1:size(2),image1:size(3))
 34 |    -- the path is of the format */?????-000000x.jpg
 35 |    local prefix = string.sub(path,1,#path-6-5)
 36 |    local number = string.sub(path,#path-6-5+1,#path-5)
 37 |    for j = 1,10 do
 38 |        local thispath1 = prefix .. string.format('%06d',number-1+j) .. 'x' .. '.jpg'
 39 |        local thispath2 = prefix .. string.format('%06d',number-1+j) .. 'y' .. '.jpg'
 40 |        local image1 = self:_loadImage(paths.concat(self.dir, thispath1))
 41 |        local image2 = self:_loadImage(paths.concat(self.dir, thispath2))
 42 |        finalimage[{(j-1)*2+1,{},{}}] = image1
 43 |        finalimage[{(j-1)*2+1+1,{},{}}] = image2
 44 |    end
 45 | 
 46 |    local class = self.imageInfo.imageClass[i]
 47 |    local id = ffi.string(self.imageInfo.ids[i]:data())
 48 | 
 49 |    return {
 50 |       input = finalimage,
 51 |       target = class,
 52 |       id = id
 53 |    }
 54 | end
 55 | 
 56 | function CharadesDataset:_loadImage(path)
 57 |    local ok, input = pcall(function()
 58 |       --return image.load(path, 3, 'float')
 59 |       return image.load(path, 1, 'float')
 60 |    end)
 61 | 
 62 |    -- Sometimes image.load fails because the file extension does not match the
 63 |    -- image format. In that case, use image.decompress on a ByteTensor.
 64 |    if not ok then
 65 |       local f = io.open(path, 'r')
 66 |       assert(f, 'Error reading: ' .. tostring(path))
 67 |       local data = f:read('*a')
 68 |       f:close()
 69 | 
 70 |       local b = torch.ByteTensor(string.len(data))
 71 |       ffi.copy(b:data(), data, b:size(1))
 72 | 
 73 |       --input = image.decompress(b, 3, 'float')
 74 |       input = image.decompress(b, 1, 'float')
 75 |    end
 76 | 
 77 |    return input
 78 | end
 79 | 
 80 | function CharadesDataset:size()
 81 |    return self.imageInfo.imageClass:size(1)
 82 | end
 83 | 
 84 | -- Computed from random subset of ImageNet training images
 85 | local meanstd = {
 86 |    mean = { 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255 }, --flow vgg16
 87 |    std = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, -- I don't think caffe normalizes
 88 | }
 89 | local pca = {
 90 |    eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 },
 91 |    eigvec = torch.Tensor{
 92 |       { -0.5675,  0.7192,  0.4009 },
 93 |       { -0.5808, -0.0045, -0.8140 },
 94 |       { -0.5836, -0.6948,  0.4203 },
 95 |    },
 96 | }
 97 | 
 98 | function CharadesDataset:preprocess()
 99 |    if self.split == 'train' then
100 |       return t.Compose{
101 |          t.RandomSizedCrop(224),
102 |          --t.ColorJitter({
103 |          --   brightness = 0.4,
104 |          --   contrast = 0.4,
105 |          --   saturation = 0.4,
106 |          --}),
107 |          --t.Lighting(0.1, pca.eigval, pca.eigvec),
108 |          t.ColorNormalize(meanstd),
109 |          t.HorizontalFlip(0.5),
110 |       }
111 |    elseif self.split == 'val' then
112 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
113 |       return t.Compose{
114 |          t.Scale(256),
115 |          t.ColorNormalize(meanstd),
116 |          Crop(224),
117 |       }
118 |    elseif self.split == 'val2' then
119 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
120 |       return t.Compose{
121 |          t.Scale(256),
122 |          t.ColorNormalize(meanstd),
123 |          Crop(224),
124 |       }
125 |    else
126 |       error('invalid split: ' .. self.split)
127 |    end
128 | end
129 | 
130 | return M.CharadesDataset
131 | 


--------------------------------------------------------------------------------
/torch/datasets/charadessync-gen.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Script to compute list of Charades filenames and classes rgb images
 10 | --
 11 | --  This version is different from charades-gen.lua as this loads videos one by one
 12 | --  To train models that require sequential data, such as LSTM
 13 | --
 14 | --  This generates a file gen/charades.t7 which contains the list of all
 15 | --  Charades training and validation images and their classes. This script also
 16 | --  works for other datasets arragned with the same layout.
 17 | --
 18 | --  Contributor: Gunnar Atli Sigurdsson
 19 | 
 20 | local sys = require 'sys'
 21 | local ffi = require 'ffi'
 22 | 
 23 | local M = {}
 24 | 
 25 | local function parseCSV(filename)
 26 |     require 'csvigo'
 27 |     print(('Loading csv: %s'):format(filename))
 28 |     local all = csvigo.load{path=filename, mode='tidy'}
 29 |     local ids = all['id']
 30 |     local actionss = all['actions']
 31 |     local N = #ids
 32 |     local labels = {}
 33 |     for i = 1,#ids do
 34 |         local id = ids[i]
 35 |         local actions = actionss[i]
 36 |         local label = {}
 37 |         for a in string.gmatch(actions, '([^;]+)') do -- split on ';'
 38 |             local a = string.gmatch(a, '([^ ]+)') -- split on ' '
 39 |             table.insert(label,{c=a(), s=tonumber(a()), e=tonumber(a())})
 40 |         end
 41 |         labels[id] = label
 42 |     end
 43 |     return labels
 44 | end
 45 | 
 46 | 
 47 | local function prepare(opt,labels,split)
 48 |     require 'sys'
 49 |     require 'string'
 50 |     local imagePath = torch.CharTensor()
 51 |     local imageClass = torch.LongTensor()
 52 |     local dir = opt.data
 53 |     assert(paths.dirp(dir), 'directory not found: ' .. dir)
 54 |     local imagePaths, imageClasses, ids = {}, {}, {}
 55 |     local FPS, GAP, testGAP = 24, 4, 25
 56 |     local e,count = 0, 0
 57 |     
 58 |     -- For each video annotation, prepare test files
 59 |     local imageClasses2
 60 |     if split=='val_video' then
 61 |         imageClasses2 = torch.IntTensor(4000000, opt.nClasses):zero() --allocating memory
 62 |     end
 63 |     for id,label in pairs(labels) do
 64 |         e = e+1
 65 |         if e % 100 == 1 then print(e) end
 66 |         iddir = dir .. '/' .. id
 67 |         local f = io.popen(('find -L %s -iname "*.jpg" '):format(iddir))
 68 |         if not f then 
 69 |             print('class not found: ' .. id)
 70 |             print(('find -L %s -iname "*.jpg" '):format(iddir))
 71 |         else
 72 |             local lines = {}
 73 |             while true do
 74 |                 local line = f:read('*line')
 75 |                 if not line then break end
 76 |                 table.insert(lines,line)
 77 |             end
 78 |             local N = #lines
 79 |             if split=='val_video' then
 80 |                 local target = torch.IntTensor(157,1):zero()
 81 |                 for _,anno in pairs(label) do
 82 |                     target[1+tonumber(string.sub(anno.c,2,-1))] = 1 -- 1-index
 83 |                 end
 84 |                 local tmp = torch.linspace(1,N,testGAP)
 85 |                 for ii = 1,testGAP do
 86 |                     local i = torch.floor(tmp[ii])
 87 |                     local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. '.jpg'
 88 |                     count = count + 1
 89 |                     table.insert(imagePaths,impath)
 90 |                     imageClasses2[count]:copy(target)
 91 |                     table.insert(ids,id)
 92 |                 end
 93 |             elseif opt.setup == 'softmax' then
 94 |                 local localimagePaths = {}
 95 |                 local localimageClasses = {}
 96 |                 local localids = {}
 97 |                 if #label>0 then 
 98 |                     -- To generate training data with softmax loss (only one label)
 99 |                     -- We create a sorted pool with all pairs of (frames,label) 
100 |                     -- and then randomly select a subset of those according to our batch size
101 |                     -- Someone should really figure out how to properly use sigmoid loss for this
102 |                     for i = 1,N,GAP do
103 |                         for _,anno in pairs(label) do
104 |                             if (anno.s<(i-1)/FPS) and ((i-1)/FPS<anno.e) then
105 |                                 local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. '.jpg'
106 |                                 local a = 1+tonumber(string.sub(anno.c,2,-1))
107 |                                 table.insert(localimagePaths,impath)
108 |                                 table.insert(localimageClasses, a) -- 1-index
109 |                                 table.insert(localids,id)
110 |                             end
111 |                         end
112 |                     end
113 |                 end
114 |                 local Nex = #localimagePaths
115 |                 if Nex>=opt.batchSize then
116 |                     local inds = torch.multinomial(torch.Tensor(1,Nex):fill(1),opt.batchSize)[1]
117 |                     inds = inds:sort()
118 |                     assert(inds:size(1)==opt.batchSize)
119 |                     for aa = 1,opt.batchSize do
120 |                         a = inds[aa]
121 |                         table.insert(imagePaths,localimagePaths[a])
122 |                         table.insert(imageClasses, localimageClasses[a]) -- 1-index
123 |                         table.insert(ids,localids[a])
124 |                     end
125 |                 end
126 |             elseif opt.setup == 'sigmoid' then
127 |                 -- TODO
128 |                 assert(false,'Invalid opt.setup')
129 |             else
130 |                 assert(false,'Invalid opt.setup')
131 |             end
132 |             f:close()
133 |         end
134 |     end
135 | 
136 |     -- Convert the generated list to a tensor for faster loading
137 |     local nImages = #imagePaths
138 |     local maxLength = -1
139 |     for _,p in pairs(imagePaths) do
140 |         maxLength = math.max(maxLength, #p + 1)
141 |     end
142 |     local imagePath = torch.CharTensor(nImages, maxLength):zero()
143 |     for i, path in ipairs(imagePaths) do
144 |        ffi.copy(imagePath[i]:data(), path)
145 |     end
146 | 
147 |     local maxLength2 = -1
148 |     for _,p in pairs(ids) do
149 |         maxLength2 = math.max(maxLength2, #p + 1)
150 |     end
151 |     local ids_tensor = torch.CharTensor(nImages, maxLength2):zero()
152 |     for i, path in ipairs(ids) do
153 |        ffi.copy(ids_tensor[i]:data(), path)
154 |     end
155 | 
156 |     local imageClass = torch.LongTensor(imageClasses)
157 |     if split=='val_video' then
158 |         imageClass = imageClasses2[{{1,count},{}}]
159 |     end
160 |     assert(imagePath:size(1)==imageClass:size(1),"Sizes do not match")
161 | 
162 |     return imagePath, imageClass, ids_tensor
163 | end
164 | 
165 | 
166 | local function findClasses(dir)
167 |    return Nil, Nil
168 | end
169 | 
170 | 
171 | function M.exec(opt, cacheFile)
172 |    -- find the image path names
173 |    local imagePath = torch.CharTensor()  -- path to each image in dataset
174 |    local imageClass = torch.LongTensor() -- class index of each image (class index in self.classes)
175 | 
176 |    local filename = opt.trainfile
177 |    local filenametest = opt.testfile 
178 |    local labels = parseCSV(filename)
179 |    print('done parsing train csv')
180 |    local labelstest = parseCSV(filenametest)
181 |    print('done parsing test csv')
182 | 
183 |    print("=> Generating list of images")
184 |    local classList, classToIdx = findClasses(trainDir)
185 | 
186 |    print(" | finding all validation2 images")
187 |    local val2ImagePath, val2ImageClass, val2ids = prepare(opt,labelstest,'val_video')
188 | 
189 |    print(" | finding all validation images")
190 |    local valImagePath, valImageClass, valids = prepare(opt,labelstest,'val')
191 | 
192 |    print(" | finding all training images")
193 |    local trainImagePath, trainImageClass, ids = prepare(opt,labels,'train')
194 | 
195 |    local info = {
196 |       basedir = opt.data,
197 |       classList = classList,
198 |       train = {
199 |          imagePath = trainImagePath,
200 |          imageClass = trainImageClass,
201 |          ids = ids,
202 |       },
203 |       val = {
204 |          imagePath = valImagePath,
205 |          imageClass = valImageClass,
206 |          ids = valids,
207 |       },
208 |       val2 = {
209 |          imagePath = val2ImagePath,
210 |          imageClass = val2ImageClass,
211 |          ids = val2ids,
212 |       },
213 |    }
214 | 
215 |    print(" | saving list of images to " .. cacheFile)
216 |    torch.save(cacheFile, info)
217 |    return info
218 | end
219 | 
220 | return M
221 | 


--------------------------------------------------------------------------------
/torch/datasets/charadessync.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Charades dataset loader
 10 | --  Contributor: Gunnar Atli Sigurdsson
 11 | 
 12 | local image = require 'image'
 13 | local paths = require 'paths'
 14 | local t = require 'datasets/transforms'
 15 | local ffi = require 'ffi'
 16 | 
 17 | local M = {}
 18 | local CharadesDataset = torch.class('resnet.CharadesDataset', M)
 19 | 
 20 | function CharadesDataset:__init(imageInfo, opt, split)
 21 |    self.imageInfo = imageInfo[split]
 22 |    self.opt = opt
 23 |    self.split = split
 24 |    self.dir = opt.data
 25 |    assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir)
 26 | end
 27 | 
 28 | function CharadesDataset:get(i)
 29 |    local path = ffi.string(self.imageInfo.imagePath[i]:data())
 30 |    local image = self:_loadImage(paths.concat(self.dir, path))
 31 |    local class = self.imageInfo.imageClass[i]
 32 |    local id = ffi.string(self.imageInfo.ids[i]:data())
 33 | 
 34 |    return {
 35 |       input = image,
 36 |       target = class,
 37 |       id = id
 38 |    }
 39 | end
 40 | 
 41 | function CharadesDataset:_loadImage(path)
 42 |    local ok, input = pcall(function()
 43 |       return image.load(path, 3, 'float')
 44 |    end)
 45 | 
 46 |    -- Sometimes image.load fails because the file extension does not match the
 47 |    -- image format. In that case, use image.decompress on a ByteTensor.
 48 |    if not ok then
 49 |       local f = io.open(path, 'r')
 50 |       assert(f, 'Error reading: ' .. tostring(path))
 51 |       local data = f:read('*a')
 52 |       f:close()
 53 | 
 54 |       local b = torch.ByteTensor(string.len(data))
 55 |       ffi.copy(b:data(), data, b:size(1))
 56 | 
 57 |       input = image.decompress(b, 3, 'float')
 58 |    end
 59 | 
 60 |    return input
 61 | end
 62 | 
 63 | function CharadesDataset:size()
 64 |    return self.imageInfo.imageClass:size(1)
 65 | end
 66 | 
 67 | -- Computed from random subset of ImageNet training images
 68 | local meanstd = {
 69 |    mean = { 103.939/255, 116.779/255, 123.68/255 }, --vgg16
 70 |    std = { 1.0, 1.0, 1.0 }, -- I don't think caffe normalizes
 71 | }
 72 | local pca = {
 73 |    eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 },
 74 |    eigvec = torch.Tensor{
 75 |       { -0.5675,  0.7192,  0.4009 },
 76 |       { -0.5808, -0.0045, -0.8140 },
 77 |       { -0.5836, -0.6948,  0.4203 },
 78 |    },
 79 | }
 80 | 
 81 | function CharadesDataset:preprocess()
 82 |    if self.split == 'train' then
 83 |       return t.Compose{
 84 |          t.RandomSizedCrop(224),
 85 |          t.ColorJitter({
 86 |             brightness = 0.4,
 87 |             contrast = 0.4,
 88 |             saturation = 0.4,
 89 |          }),
 90 |          t.Lighting(0.1, pca.eigval, pca.eigvec),
 91 |          t.ColorNormalize(meanstd),
 92 |          t.HorizontalFlip(0.5),
 93 |       }
 94 |    elseif self.split == 'val' then
 95 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
 96 |       return t.Compose{
 97 |          t.Scale(256),
 98 |          t.ColorNormalize(meanstd),
 99 |          Crop(224),
100 |       }
101 |    elseif self.split == 'val2' then
102 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
103 |       return t.Compose{
104 |          t.Scale(256),
105 |          t.ColorNormalize(meanstd),
106 |          Crop(224),
107 |       }
108 |    else
109 |       error('invalid split: ' .. self.split)
110 |    end
111 | end
112 | 
113 | return M.CharadesDataset
114 | 


--------------------------------------------------------------------------------
/torch/datasets/charadessyncflow-gen.lua:
--------------------------------------------------------------------------------
  1 | --  This source code is licensed under the BSD-style license found in the
  2 | --  LICENSE file in the root directory of this source tree. An additional grant
  3 | --  of patent rights can be found in the PATENTS file in the same directory.
  4 | --
  5 | --  Script to compute list of Charades filenames and classes
  6 | --
  7 | --  This generates a file gen/charadesflow.t7 which contains the list of all
  8 | --  Charades training and validation images and their classes. This script also
  9 | --  works for other datasets arragned with the same layout.
 10 | --
 11 | --  Contributor: Gunnar Atli Sigurdsson
 12 | 
 13 | local sys = require 'sys'
 14 | local ffi = require 'ffi'
 15 | 
 16 | local M = {}
 17 | 
 18 | local function parseCSV(filename)
 19 |     require 'csvigo'
 20 |     print(('Loading csv: %s'):format(filename))
 21 |     local all = csvigo.load{path=filename, mode='tidy'}
 22 |     local ids = all['id']
 23 |     local actionss = all['actions']
 24 |     local N = #ids
 25 |     local labels = {}
 26 |     for i = 1,#ids do
 27 |         local id = ids[i]
 28 |         local actions = actionss[i]
 29 |         local label = {}
 30 |         for a in string.gmatch(actions, '([^;]+)') do -- split on ';'
 31 |             local a = string.gmatch(a, '([^ ]+)') -- split on ' '
 32 |             table.insert(label,{c=a(), s=tonumber(a()), e=tonumber(a())})
 33 |         end
 34 |         labels[id] = label
 35 |     end
 36 |     return labels
 37 | end
 38 | 
 39 | 
 40 | local function prepare(opt,labels,split)
 41 |     require 'sys'
 42 |     require 'string'
 43 |     local imagePath = torch.CharTensor()
 44 |     local imageClass = torch.LongTensor()
 45 |     local dir = opt.data
 46 |     assert(paths.dirp(dir), 'directory not found: ' .. dir)
 47 |     local imagePaths = {}
 48 |     local imageClasses = {}
 49 |     local ids = {}
 50 |     local FPS = 24
 51 |     local GAP = 4
 52 |     local testGAP = 25
 53 |     local flowframes = 10
 54 | 
 55 |     local e = 0
 56 |     local count = 0
 57 |     -- For each video annotation, prepare test files
 58 |     local imageClasses2
 59 |     if split=='val_video' then
 60 |         imageClasses2 = torch.IntTensor(4000000, opt.nClasses):zero()
 61 |     end
 62 |     for id,label in pairs(labels) do
 63 |         e = e+1
 64 |         if e % 100 == 1 then print(e) end
 65 |         iddir = dir .. '/' .. id
 66 |         local f = io.popen(('find -L %s -iname "*.jpg" '):format(iddir))
 67 |         if not f then 
 68 |             print('class not found: ' .. id)
 69 |             print(('find -L %s -iname "*.jpg" '):format(iddir))
 70 |         else
 71 |             local lines = {}
 72 |             while true do
 73 |                 local line = f:read('*line')
 74 |                 if not line then break end
 75 |                 table.insert(lines,line)
 76 |             end
 77 |             local N = torch.floor(#lines/2) -- to account for x and y
 78 |             if split=='val_video' then
 79 |                 local target = torch.IntTensor(157,1):zero()
 80 |                 for _,anno in pairs(label) do
 81 |                     target[1+tonumber(string.sub(anno.c,2,-1))] = 1 -- 1-index
 82 |                 end
 83 |                 local tmp = torch.linspace(1,N-flowframes-1,testGAP) -- -1 so we don't get bad flow
 84 |                 for ii = 1,testGAP do
 85 |                     local i = torch.floor(tmp[ii])
 86 |                     local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. 'x' .. '.jpg'
 87 |                     count = count + 1
 88 |                     table.insert(imagePaths,impath)
 89 |                     imageClasses2[count]:copy(target)
 90 |                     table.insert(ids,id)
 91 |                 end
 92 |             elseif opt.setup == 'softmax' then
 93 |                 local localimagePaths = {}
 94 |                 local localimageClasses = {}
 95 |                 local localids = {}
 96 |                 if #label>0 then 
 97 |                     for i = 1,N,GAP do
 98 |                         for _,anno in pairs(label) do
 99 |                             if (anno.s<(i-1)/FPS) and ((i-1)/FPS<anno.e) then
100 |                                 if i+flowframes+1<N then
101 |                                     local impath = iddir .. '/' .. id .. '-' .. string.format('%06d',i) .. 'x' .. '.jpg'
102 |                                     local a = 1+tonumber(string.sub(anno.c,2,-1))
103 |                                     table.insert(localimagePaths,impath)
104 |                                     table.insert(localimageClasses, a) -- 1-index
105 |                                     table.insert(localids,id)
106 |                                 end
107 |                             end
108 |                         end
109 |                     end
110 |                 end
111 |                 local Nex = #localimagePaths
112 |                 if Nex>=opt.batchSize then
113 |                     local inds = torch.multinomial(torch.Tensor(1,Nex):fill(1),opt.batchSize)[1]
114 |                     inds = inds:sort()
115 |                     assert(inds:size(1)==opt.batchSize)
116 |                     for aa = 1,opt.batchSize do
117 |                         a = inds[aa]
118 |                         table.insert(imagePaths,localimagePaths[a])
119 |                         table.insert(imageClasses, localimageClasses[a]) -- 1-index
120 |                         table.insert(ids,localids[a])
121 |                     end
122 |                 end
123 |             elseif opt.setup == 'sigmoid' then
124 |                 -- TODO
125 |                 assert(false,'Invalid opt.setup')
126 |             else
127 |                 assert(false,'Invalid opt.setup')
128 |             end
129 |             f:close()
130 |         end
131 |     end
132 | 
133 |     -- Convert the generated list to a tensor for faster loading
134 |     local nImages = #imagePaths
135 |     local maxLength = -1
136 |     for _,p in pairs(imagePaths) do
137 |         maxLength = math.max(maxLength, #p + 1)
138 |     end
139 |     local imagePath = torch.CharTensor(nImages, maxLength):zero()
140 |     for i, path in ipairs(imagePaths) do
141 |        ffi.copy(imagePath[i]:data(), path)
142 |     end
143 |     local maxLength2 = -1
144 |     for _,p in pairs(ids) do
145 |         maxLength2 = math.max(maxLength2, #p + 1)
146 |     end
147 |     local ids_tensor = torch.CharTensor(nImages, maxLength2):zero()
148 |     for i, path in ipairs(ids) do
149 |        ffi.copy(ids_tensor[i]:data(), path)
150 |     end
151 |     local imageClass = torch.LongTensor(imageClasses)
152 |     if split=='val_video' then
153 |         imageClass = imageClasses2[{{1,count},{}}]
154 |     end
155 |     assert(imagePath:size(1)==imageClass:size(1),"Sizes do not match")
156 | 
157 |     return imagePath, imageClass, ids_tensor
158 | end
159 | 
160 | 
161 | local function findClasses(dir)
162 |    return Nil, Nil
163 | end
164 | 
165 | 
166 | function M.exec(opt, cacheFile)
167 |    -- find the image path names
168 |    local imagePath = torch.CharTensor()  -- path to each image in dataset
169 |    local imageClass = torch.LongTensor() -- class index of each image (class index in self.classes)
170 | 
171 |    local filename = opt.trainfile
172 |    local filenametest = opt.testfile 
173 |    local labels = parseCSV(filename)
174 |    print('done parsing train csv')
175 |    local labelstest = parseCSV(filenametest)
176 |    print('done parsing test csv')
177 | 
178 |    print("=> Generating list of images")
179 |    local classList, classToIdx = findClasses(trainDir)
180 | 
181 |    print(" | finding all validation2 images")
182 |    local val2ImagePath, val2ImageClass, val2ids = prepare(opt,labelstest,'val_video')
183 | 
184 |    print(" | finding all validation images")
185 |    local valImagePath, valImageClass, valids = prepare(opt,labelstest,'val')
186 | 
187 |    print(" | finding all training images")
188 |    local trainImagePath, trainImageClass, ids = prepare(opt,labels,'train')
189 | 
190 |    local info = {
191 |       basedir = opt.data,
192 |       classList = classList,
193 |       train = {
194 |          imagePath = trainImagePath,
195 |          imageClass = trainImageClass,
196 |          ids = ids,
197 |       },
198 |       val = {
199 |          imagePath = valImagePath,
200 |          imageClass = valImageClass,
201 |          ids = valids,
202 |       },
203 |       val2 = {
204 |          imagePath = val2ImagePath,
205 |          imageClass = val2ImageClass,
206 |          ids = val2ids,
207 |       },
208 |    }
209 | 
210 |    print(" | saving list of images to " .. cacheFile)
211 |    torch.save(cacheFile, info)
212 |    return info
213 | end
214 | 
215 | return M
216 | 


--------------------------------------------------------------------------------
/torch/datasets/charadessyncflow.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  ImageNet dataset loader
 10 | --
 11 | 
 12 | local image = require 'image'
 13 | local paths = require 'paths'
 14 | local t = require 'datasets/transforms'
 15 | local ffi = require 'ffi'
 16 | 
 17 | local M = {}
 18 | local CharadesDataset = torch.class('resnet.CharadesDataset', M)
 19 | 
 20 | function CharadesDataset:__init(imageInfo, opt, split)
 21 |    self.imageInfo = imageInfo[split]
 22 |    self.opt = opt
 23 |    self.split = split
 24 |    self.dir = opt.data
 25 |    assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir)
 26 | end
 27 | 
 28 | function CharadesDataset:get(i)
 29 |    -- This function loads in 20 consecutive optical flow images (10 x and 10 y images)
 30 |    -- Follwing the two-stream architecture
 31 |    local path = ffi.string(self.imageInfo.imagePath[i]:data())
 32 |    local image1 = self:_loadImage(paths.concat(self.dir, path))
 33 |    local finalimage = torch.Tensor(20,image1:size(2),image1:size(3))
 34 |    -- the path is of the format */?????-000000x.jpg
 35 |    local prefix = string.sub(path,1,#path-6-5)
 36 |    local number = string.sub(path,#path-6-5+1,#path-5)
 37 |    for j = 1,10 do
 38 |        local thispath1 = prefix .. string.format('%06d',number-1+j) .. 'x' .. '.jpg'
 39 |        local thispath2 = prefix .. string.format('%06d',number-1+j) .. 'y' .. '.jpg'
 40 |        local image1 = self:_loadImage(paths.concat(self.dir, thispath1))
 41 |        local image2 = self:_loadImage(paths.concat(self.dir, thispath2))
 42 |        finalimage[{(j-1)*2+1,{},{}}] = image1
 43 |        finalimage[{(j-1)*2+1+1,{},{}}] = image2
 44 |    end
 45 | 
 46 |    local class = self.imageInfo.imageClass[i]
 47 |    local id = ffi.string(self.imageInfo.ids[i]:data())
 48 | 
 49 |    return {
 50 |       input = finalimage,
 51 |       target = class,
 52 |       id = id
 53 |    }
 54 | end
 55 | 
 56 | function CharadesDataset:_loadImage(path)
 57 |    local ok, input = pcall(function()
 58 |       --return image.load(path, 3, 'float')
 59 |       return image.load(path, 1, 'float')
 60 |    end)
 61 | 
 62 |    -- Sometimes image.load fails because the file extension does not match the
 63 |    -- image format. In that case, use image.decompress on a ByteTensor.
 64 |    if not ok then
 65 |       local f = io.open(path, 'r')
 66 |       assert(f, 'Error reading: ' .. tostring(path))
 67 |       local data = f:read('*a')
 68 |       f:close()
 69 | 
 70 |       local b = torch.ByteTensor(string.len(data))
 71 |       ffi.copy(b:data(), data, b:size(1))
 72 | 
 73 |       --input = image.decompress(b, 3, 'float')
 74 |       input = image.decompress(b, 1, 'float')
 75 |    end
 76 | 
 77 |    return input
 78 | end
 79 | 
 80 | function CharadesDataset:size()
 81 |    return self.imageInfo.imageClass:size(1)
 82 | end
 83 | 
 84 | -- Computed from random subset of ImageNet training images
 85 | local meanstd = {
 86 |    mean = { 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255, 128.0/255 }, --flow vgg16
 87 |    std = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }, -- I don't think caffe normalizes
 88 | }
 89 | local pca = {
 90 |    eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 },
 91 |    eigvec = torch.Tensor{
 92 |       { -0.5675,  0.7192,  0.4009 },
 93 |       { -0.5808, -0.0045, -0.8140 },
 94 |       { -0.5836, -0.6948,  0.4203 },
 95 |    },
 96 | }
 97 | 
 98 | function CharadesDataset:preprocess()
 99 |    if self.split == 'train' then
100 |       return t.Compose{
101 |          t.RandomSizedCrop(224),
102 |          --t.ColorJitter({
103 |          --   brightness = 0.4,
104 |          --   contrast = 0.4,
105 |          --   saturation = 0.4,
106 |          --}),
107 |          --t.Lighting(0.1, pca.eigval, pca.eigvec),
108 |          t.ColorNormalize(meanstd),
109 |          t.HorizontalFlip(0.5),
110 |       }
111 |    elseif self.split == 'val' then
112 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
113 |       return t.Compose{
114 |          t.Scale(256),
115 |          t.ColorNormalize(meanstd),
116 |          Crop(224),
117 |       }
118 |    elseif self.split == 'val2' then
119 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
120 |       return t.Compose{
121 |          t.Scale(256),
122 |          t.ColorNormalize(meanstd),
123 |          Crop(224),
124 |       }
125 |    else
126 |       error('invalid split: ' .. self.split)
127 |    end
128 | end
129 | 
130 | return M.CharadesDataset
131 | 


--------------------------------------------------------------------------------
/torch/datasets/cifar10-gen.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  Script to compute list of ImageNet filenames and classes
10 | --
11 | --  This automatically downloads the CIFAR-10 dataset from
12 | --  http://torch7.s3-website-us-east-1.amazonaws.com/data/cifar-10-torch.tar.gz
13 | --
14 | 
15 | local URL = 'http://torch7.s3-website-us-east-1.amazonaws.com/data/cifar-10-torch.tar.gz'
16 | 
17 | local M = {}
18 | 
19 | local function convertToTensor(files)
20 |    local data, labels
21 | 
22 |    for _, file in ipairs(files) do
23 |       local m = torch.load(file, 'ascii')
24 |       if not data then
25 |          data = m.data:t()
26 |          labels = m.labels:squeeze()
27 |       else
28 |          data = torch.cat(data, m.data:t(), 1)
29 |          labels = torch.cat(labels, m.labels:squeeze())
30 |       end
31 |    end
32 | 
33 |    -- This is *very* important. The downloaded files have labels 0-9, which do
34 |    -- not work with CrossEntropyCriterion
35 |    labels:add(1)
36 | 
37 |    return {
38 |       data = data:contiguous():view(-1, 3, 32, 32),
39 |       labels = labels,
40 |    }
41 | end
42 | 
43 | function M.exec(opt, cacheFile)
44 |    print("=> Downloading CIFAR-10 dataset from " .. URL)
45 |    local ok = os.execute('curl ' .. URL .. ' | tar xz -C gen/')
46 |    assert(ok == true or ok == 0, 'error downloading CIFAR-10')
47 | 
48 |    print(" | combining dataset into a single file")
49 |    local trainData = convertToTensor({
50 |       'gen/cifar-10-batches-t7/data_batch_1.t7',
51 |       'gen/cifar-10-batches-t7/data_batch_2.t7',
52 |       'gen/cifar-10-batches-t7/data_batch_3.t7',
53 |       'gen/cifar-10-batches-t7/data_batch_4.t7',
54 |       'gen/cifar-10-batches-t7/data_batch_5.t7',
55 |    })
56 |    local testData = convertToTensor({
57 |       'gen/cifar-10-batches-t7/test_batch.t7',
58 |    })
59 | 
60 |    print(" | saving CIFAR-10 dataset to " .. cacheFile)
61 |    torch.save(cacheFile, {
62 |       train = trainData,
63 |       val = testData,
64 |    })
65 | end
66 | 
67 | return M
68 | 


--------------------------------------------------------------------------------
/torch/datasets/cifar10.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  CIFAR-10 dataset loader
10 | --
11 | 
12 | local t = require 'datasets/transforms'
13 | 
14 | local M = {}
15 | local CifarDataset = torch.class('resnet.CifarDataset', M)
16 | 
17 | function CifarDataset:__init(imageInfo, opt, split)
18 |    assert(imageInfo[split], split)
19 |    self.imageInfo = imageInfo[split]
20 |    self.split = split
21 | end
22 | 
23 | function CifarDataset:get(i)
24 |    local image = self.imageInfo.data[i]:float()
25 |    local label = self.imageInfo.labels[i]
26 | 
27 |    return {
28 |       input = image,
29 |       target = label,
30 |    }
31 | end
32 | 
33 | function CifarDataset:size()
34 |    return self.imageInfo.data:size(1)
35 | end
36 | 
37 | -- Computed from entire CIFAR-10 training set
38 | local meanstd = {
39 |    mean = {125.3, 123.0, 113.9},
40 |    std  = {63.0,  62.1,  66.7},
41 | }
42 | 
43 | function CifarDataset:preprocess()
44 |    if self.split == 'train' then
45 |       return t.Compose{
46 |          t.ColorNormalize(meanstd),
47 |          t.HorizontalFlip(0.5),
48 |          t.RandomCrop(32, 4),
49 |       }
50 |    elseif self.split == 'val' then
51 |       return t.ColorNormalize(meanstd)
52 |    else
53 |       error('invalid split: ' .. self.split)
54 |    end
55 | end
56 | 
57 | return M.CifarDataset
58 | 


--------------------------------------------------------------------------------
/torch/datasets/imagenet-gen.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Script to compute list of ImageNet filenames and classes
 10 | --
 11 | --  This generates a file gen/imagenet.t7 which contains the list of all
 12 | --  ImageNet training and validation images and their classes. This script also
 13 | --  works for other datasets arragned with the same layout.
 14 | --
 15 | 
 16 | local sys = require 'sys'
 17 | local ffi = require 'ffi'
 18 | 
 19 | local M = {}
 20 | 
 21 | local function findClasses(dir)
 22 |    local dirs = paths.dir(dir)
 23 |    table.sort(dirs)
 24 | 
 25 |    local classList = {}
 26 |    local classToIdx = {}
 27 |    for _ ,class in ipairs(dirs) do
 28 |       if not classToIdx[class] and class ~= '.' and class ~= '..' then
 29 |          table.insert(classList, class)
 30 |          classToIdx[class] = #classList
 31 |       end
 32 |    end
 33 | 
 34 |    -- assert(#classList == 1000, 'expected 1000 ImageNet classes')
 35 |    return classList, classToIdx
 36 | end
 37 | 
 38 | local function findImages(dir, classToIdx)
 39 |    local imagePath = torch.CharTensor()
 40 |    local imageClass = torch.LongTensor()
 41 | 
 42 |    ----------------------------------------------------------------------
 43 |    -- Options for the GNU and BSD find command
 44 |    local extensionList = {'jpg', 'png', 'jpeg', 'JPG', 'PNG', 'JPEG', 'ppm', 'PPM', 'bmp', 'BMP'}
 45 |    local findOptions = ' -iname "*.' .. extensionList[1] .. '"'
 46 |    for i=2,#extensionList do
 47 |       findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"'
 48 |    end
 49 | 
 50 |    -- Find all the images using the find command
 51 |    local f = io.popen('find -L ' .. dir .. findOptions)
 52 | 
 53 |    local maxLength = -1
 54 |    local imagePaths = {}
 55 |    local imageClasses = {}
 56 | 
 57 |    -- Generate a list of all the images and their class
 58 |    while true do
 59 |       local line = f:read('*line')
 60 |       if not line then break end
 61 | 
 62 |       local className = paths.basename(paths.dirname(line))
 63 |       local filename = paths.basename(line)
 64 |       local path = className .. '/' .. filename
 65 | 
 66 |       local classId = classToIdx[className]
 67 |       assert(classId, 'class not found: ' .. className)
 68 | 
 69 |       table.insert(imagePaths, path)
 70 |       table.insert(imageClasses, classId)
 71 | 
 72 |       maxLength = math.max(maxLength, #path + 1)
 73 |    end
 74 | 
 75 |    f:close()
 76 | 
 77 |    -- Convert the generated list to a tensor for faster loading
 78 |    local nImages = #imagePaths
 79 |    local imagePath = torch.CharTensor(nImages, maxLength):zero()
 80 |    for i, path in ipairs(imagePaths) do
 81 |       ffi.copy(imagePath[i]:data(), path)
 82 |    end
 83 | 
 84 |    local imageClass = torch.LongTensor(imageClasses)
 85 |    return imagePath, imageClass
 86 | end
 87 | 
 88 | function M.exec(opt, cacheFile)
 89 |    -- find the image path names
 90 |    local imagePath = torch.CharTensor()  -- path to each image in dataset
 91 |    local imageClass = torch.LongTensor() -- class index of each image (class index in self.classes)
 92 | 
 93 |    local trainDir = paths.concat(opt.data, 'train')
 94 |    local valDir = paths.concat(opt.data, 'val')
 95 |    assert(paths.dirp(trainDir), 'train directory not found: ' .. trainDir)
 96 |    assert(paths.dirp(valDir), 'val directory not found: ' .. valDir)
 97 | 
 98 |    print("=> Generating list of images")
 99 |    local classList, classToIdx = findClasses(trainDir)
100 | 
101 |    print(" | finding all validation images")
102 |    local valImagePath, valImageClass = findImages(valDir, classToIdx)
103 | 
104 |    print(" | finding all training images")
105 |    local trainImagePath, trainImageClass = findImages(trainDir, classToIdx)
106 | 
107 |    local info = {
108 |       basedir = opt.data,
109 |       classList = classList,
110 |       train = {
111 |          imagePath = trainImagePath,
112 |          imageClass = trainImageClass,
113 |       },
114 |       val = {
115 |          imagePath = valImagePath,
116 |          imageClass = valImageClass,
117 |       },
118 |    }
119 | 
120 |    print(" | saving list of images to " .. cacheFile)
121 |    torch.save(cacheFile, info)
122 |    return info
123 | end
124 | 
125 | return M
126 | 


--------------------------------------------------------------------------------
/torch/datasets/imagenet.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  ImageNet dataset loader
 10 | --
 11 | 
 12 | local image = require 'image'
 13 | local paths = require 'paths'
 14 | local t = require 'datasets/transforms'
 15 | local ffi = require 'ffi'
 16 | 
 17 | local M = {}
 18 | local ImagenetDataset = torch.class('resnet.ImagenetDataset', M)
 19 | 
 20 | function ImagenetDataset:__init(imageInfo, opt, split)
 21 |    self.imageInfo = imageInfo[split]
 22 |    self.opt = opt
 23 |    self.split = split
 24 |    self.dir = paths.concat(opt.data, split)
 25 |    assert(paths.dirp(self.dir), 'directory does not exist: ' .. self.dir)
 26 | end
 27 | 
 28 | function ImagenetDataset:get(i)
 29 |    local path = ffi.string(self.imageInfo.imagePath[i]:data())
 30 | 
 31 |    local image = self:_loadImage(paths.concat(self.dir, path))
 32 |    local class = self.imageInfo.imageClass[i]
 33 | 
 34 |    return {
 35 |       input = image,
 36 |       target = class,
 37 |    }
 38 | end
 39 | 
 40 | function ImagenetDataset:_loadImage(path)
 41 |    local ok, input = pcall(function()
 42 |       return image.load(path, 3, 'float')
 43 |    end)
 44 | 
 45 |    -- Sometimes image.load fails because the file extension does not match the
 46 |    -- image format. In that case, use image.decompress on a ByteTensor.
 47 |    if not ok then
 48 |       local f = io.open(path, 'r')
 49 |       assert(f, 'Error reading: ' .. tostring(path))
 50 |       local data = f:read('*a')
 51 |       f:close()
 52 | 
 53 |       local b = torch.ByteTensor(string.len(data))
 54 |       ffi.copy(b:data(), data, b:size(1))
 55 | 
 56 |       input = image.decompress(b, 3, 'float')
 57 |    end
 58 | 
 59 |    return input
 60 | end
 61 | 
 62 | function ImagenetDataset:size()
 63 |    return self.imageInfo.imageClass:size(1)
 64 | end
 65 | 
 66 | -- Computed from random subset of ImageNet training images
 67 | local meanstd = {
 68 |    mean = { 0.485, 0.456, 0.406 },
 69 |    std = { 0.229, 0.224, 0.225 },
 70 | }
 71 | local pca = {
 72 |    eigval = torch.Tensor{ 0.2175, 0.0188, 0.0045 },
 73 |    eigvec = torch.Tensor{
 74 |       { -0.5675,  0.7192,  0.4009 },
 75 |       { -0.5808, -0.0045, -0.8140 },
 76 |       { -0.5836, -0.6948,  0.4203 },
 77 |    },
 78 | }
 79 | 
 80 | function ImagenetDataset:preprocess()
 81 |    if self.split == 'train' then
 82 |       return t.Compose{
 83 |          t.RandomSizedCrop(224),
 84 |          t.ColorJitter({
 85 |             brightness = 0.4,
 86 |             contrast = 0.4,
 87 |             saturation = 0.4,
 88 |          }),
 89 |          t.Lighting(0.1, pca.eigval, pca.eigvec),
 90 |          t.ColorNormalize(meanstd),
 91 |          t.HorizontalFlip(0.5),
 92 |       }
 93 |    elseif self.split == 'val' then
 94 |       local Crop = self.opt.tenCrop and t.TenCrop or t.CenterCrop
 95 |       return t.Compose{
 96 |          t.Scale(256),
 97 |          t.ColorNormalize(meanstd),
 98 |          Crop(224),
 99 |       }
100 |    else
101 |       error('invalid split: ' .. self.split)
102 |    end
103 | end
104 | 
105 | return M.ImagenetDataset
106 | 


--------------------------------------------------------------------------------
/torch/datasets/init.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  ImageNet and CIFAR-10 datasets
10 | --
11 | 
12 | local M = {}
13 | 
14 | local function isvalid(opt, cachePath)
15 |    local imageInfo = torch.load(cachePath)
16 |    if imageInfo.basedir and imageInfo.basedir ~= opt.data then
17 |       return false
18 |    end
19 |    return true
20 | end
21 | 
22 | function M.create(opt, split)
23 |    local cachePath = paths.concat(opt.gen, opt.dataset .. '.t7')
24 |    if not paths.filep(cachePath) or not isvalid(opt, cachePath) then
25 |       paths.mkdir('gen')
26 | 
27 |       local script = paths.dofile(opt.dataset .. '-gen.lua')
28 |       script.exec(opt, cachePath)
29 |    end
30 |    local imageInfo = torch.load(cachePath)
31 | 
32 |    local Dataset = require('datasets/' .. opt.dataset)
33 |    opt.dataopt = imageInfo.opt
34 |    return Dataset(imageInfo, opt, split)
35 | end
36 | 
37 | return M
38 | 


--------------------------------------------------------------------------------
/torch/datasets/transforms.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Image transforms for data augmentation and input normalization
 10 | --
 11 | 
 12 | require 'image'
 13 | 
 14 | local M = {}
 15 | 
 16 | function M.Compose(transforms)
 17 |    return function(input)
 18 |       for _, transform in ipairs(transforms) do
 19 |          input = transform(input)
 20 |       end
 21 |       return input
 22 |    end
 23 | end
 24 | 
 25 | function M.ColorNormalize(meanstd)
 26 |    return function(img)
 27 |       img = img:clone()
 28 |       for i=1,img:size(1) do
 29 |          img[i]:add(-meanstd.mean[i])
 30 |          img[i]:div(meanstd.std[i])
 31 |       end
 32 |       return img
 33 |    end
 34 | end
 35 | 
 36 | -- Scales the smaller edge to size
 37 | function M.Scale(size, interpolation)
 38 |    interpolation = interpolation or 'bicubic'
 39 |    return function(input)
 40 |       local w, h = input:size(3), input:size(2)
 41 |       if (w <= h and w == size) or (h <= w and h == size) then
 42 |          return input
 43 |       end
 44 |       if w < h then
 45 |          return image.scale(input, size, h/w * size, interpolation)
 46 |       else
 47 |          return image.scale(input, w/h * size, size, interpolation)
 48 |       end
 49 |    end
 50 | end
 51 | 
 52 | -- Crop to centered rectangle
 53 | function M.CenterCrop(size)
 54 |    return function(input)
 55 |       local w1 = math.ceil((input:size(3) - size)/2)
 56 |       local h1 = math.ceil((input:size(2) - size)/2)
 57 |       return image.crop(input, w1, h1, w1 + size, h1 + size) -- center patch
 58 |    end
 59 | end
 60 | 
 61 | -- Random crop form larger image with optional zero padding
 62 | function M.RandomCrop(size, padding)
 63 |    padding = padding or 0
 64 | 
 65 |    return function(input)
 66 |       if padding > 0 then
 67 |          local temp = input.new(3, input:size(2) + 2*padding, input:size(3) + 2*padding)
 68 |          temp:zero()
 69 |             :narrow(2, padding+1, input:size(2))
 70 |             :narrow(3, padding+1, input:size(3))
 71 |             :copy(input)
 72 |          input = temp
 73 |       end
 74 | 
 75 |       local w, h = input:size(3), input:size(2)
 76 |       if w == size and h == size then
 77 |          return input
 78 |       end
 79 | 
 80 |       local x1, y1 = torch.random(0, w - size), torch.random(0, h - size)
 81 |       local out = image.crop(input, x1, y1, x1 + size, y1 + size)
 82 |       assert(out:size(2) == size and out:size(3) == size, 'wrong crop size')
 83 |       return out
 84 |    end
 85 | end
 86 | 
 87 | -- Four corner patches and center crop from image and its horizontal reflection
 88 | function M.TenCrop(size)
 89 |    local centerCrop = M.CenterCrop(size)
 90 | 
 91 |    return function(input)
 92 |       local w, h = input:size(3), input:size(2)
 93 | 
 94 |       local output = {}
 95 |       for _, img in ipairs{input, image.hflip(input)} do
 96 |          table.insert(output, centerCrop(img))
 97 |          table.insert(output, image.crop(img, 0, 0, size, size))
 98 |          table.insert(output, image.crop(img, w-size, 0, w, size))
 99 |          table.insert(output, image.crop(img, 0, h-size, size, h))
100 |          table.insert(output, image.crop(img, w-size, h-size, w, h))
101 |       end
102 | 
103 |       -- View as mini-batch
104 |       for i, img in ipairs(output) do
105 |          output[i] = img:view(1, img:size(1), img:size(2), img:size(3))
106 |       end
107 | 
108 |       return input.cat(output, 1)
109 |    end
110 | end
111 | 
112 | -- Resized with shorter side randomly sampled from [minSize, maxSize] (ResNet-style)
113 | function M.RandomScale(minSize, maxSize)
114 |    return function(input)
115 |       local w, h = input:size(3), input:size(2)
116 | 
117 |       local targetSz = torch.random(minSize, maxSize)
118 |       local targetW, targetH = targetSz, targetSz
119 |       if w < h then
120 |          targetH = torch.round(h / w * targetW)
121 |       else
122 |          targetW = torch.round(w / h * targetH)
123 |       end
124 | 
125 |       return image.scale(input, targetW, targetH, 'bicubic')
126 |    end
127 | end
128 | 
129 | -- Random crop with size 8%-100% and aspect ratio 3/4 - 4/3 (Inception-style)
130 | function M.RandomSizedCrop(size)
131 |    local scale = M.Scale(size)
132 |    local crop = M.CenterCrop(size)
133 | 
134 |    return function(input)
135 |       local attempt = 0
136 |       repeat
137 |          local area = input:size(2) * input:size(3)
138 |          local targetArea = torch.uniform(0.08, 1.0) * area
139 | 
140 |          local aspectRatio = torch.uniform(3/4, 4/3)
141 |          local w = torch.round(math.sqrt(targetArea * aspectRatio))
142 |          local h = torch.round(math.sqrt(targetArea / aspectRatio))
143 | 
144 |          if torch.uniform() < 0.5 then
145 |             w, h = h, w
146 |          end
147 | 
148 |          if h <= input:size(2) and w <= input:size(3) then
149 |             local y1 = torch.random(0, input:size(2) - h)
150 |             local x1 = torch.random(0, input:size(3) - w)
151 | 
152 |             local out = image.crop(input, x1, y1, x1 + w, y1 + h)
153 |             assert(out:size(2) == h and out:size(3) == w, 'wrong crop size')
154 | 
155 |             return image.scale(out, size, size, 'bicubic')
156 |          end
157 |          attempt = attempt + 1
158 |       until attempt >= 10
159 | 
160 |       -- fallback
161 |       return crop(scale(input))
162 |    end
163 | end
164 | 
165 | function M.HorizontalFlip(prob)
166 |    return function(input)
167 |       if torch.uniform() < prob then
168 |          input = image.hflip(input)
169 |       end
170 |       return input
171 |    end
172 | end
173 | 
174 | function M.Rotation(deg)
175 |    return function(input)
176 |       if deg ~= 0 then
177 |          input = image.rotate(input, (torch.uniform() - 0.5) * deg * math.pi / 180, 'bilinear')
178 |       end
179 |       return input
180 |    end
181 | end
182 | 
183 | -- Lighting noise (AlexNet-style PCA-based noise)
184 | function M.Lighting(alphastd, eigval, eigvec)
185 |    return function(input)
186 |       if alphastd == 0 then
187 |          return input
188 |       end
189 | 
190 |       local alpha = torch.Tensor(3):normal(0, alphastd)
191 |       local rgb = eigvec:clone()
192 |          :cmul(alpha:view(1, 3):expand(3, 3))
193 |          :cmul(eigval:view(1, 3):expand(3, 3))
194 |          :sum(2)
195 |          :squeeze()
196 | 
197 |       input = input:clone()
198 |       for i=1,3 do
199 |          input[i]:add(rgb[i])
200 |       end
201 |       return input
202 |    end
203 | end
204 | 
205 | local function blend(img1, img2, alpha)
206 |    return img1:mul(alpha):add(1 - alpha, img2)
207 | end
208 | 
209 | local function grayscale(dst, img)
210 |    dst:resizeAs(img)
211 |    dst[1]:zero()
212 |    dst[1]:add(0.299, img[1]):add(0.587, img[2]):add(0.114, img[3])
213 |    dst[2]:copy(dst[1])
214 |    dst[3]:copy(dst[1])
215 |    return dst
216 | end
217 | 
218 | function M.Saturation(var)
219 |    local gs
220 | 
221 |    return function(input)
222 |       gs = gs or input.new()
223 |       grayscale(gs, input)
224 | 
225 |       local alpha = 1.0 + torch.uniform(-var, var)
226 |       blend(input, gs, alpha)
227 |       return input
228 |    end
229 | end
230 | 
231 | function M.Brightness(var)
232 |    local gs
233 | 
234 |    return function(input)
235 |       gs = gs or input.new()
236 |       gs:resizeAs(input):zero()
237 | 
238 |       local alpha = 1.0 + torch.uniform(-var, var)
239 |       blend(input, gs, alpha)
240 |       return input
241 |    end
242 | end
243 | 
244 | function M.Contrast(var)
245 |    local gs
246 | 
247 |    return function(input)
248 |       gs = gs or input.new()
249 |       grayscale(gs, input)
250 |       gs:fill(gs[1]:mean())
251 | 
252 |       local alpha = 1.0 + torch.uniform(-var, var)
253 |       blend(input, gs, alpha)
254 |       return input
255 |    end
256 | end
257 | 
258 | function M.RandomOrder(ts)
259 |    return function(input)
260 |       local img = input.img or input
261 |       local order = torch.randperm(#ts)
262 |       for i=1,#ts do
263 |          img = ts[order[i]](img)
264 |       end
265 |       return input
266 |    end
267 | end
268 | 
269 | function M.ColorJitter(opt)
270 |    local brightness = opt.brightness or 0
271 |    local contrast = opt.contrast or 0
272 |    local saturation = opt.saturation or 0
273 | 
274 |    local ts = {}
275 |    if brightness ~= 0 then
276 |       table.insert(ts, M.Brightness(brightness))
277 |    end
278 |    if contrast ~= 0 then
279 |       table.insert(ts, M.Contrast(contrast))
280 |    end
281 |    if saturation ~= 0 then
282 |       table.insert(ts, M.Saturation(saturation))
283 |    end
284 | 
285 |    if #ts == 0 then
286 |       return function(input) return input end
287 |    end
288 | 
289 |    return M.RandomOrder(ts)
290 | end
291 | 
292 | return M
293 | 


--------------------------------------------------------------------------------
/torch/exp/flownet.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using flow
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start Torch
 6 | --  Usage: dofile 'exp/flownet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16flow',
16 | '-dataset','charadesflow',
17 | '-LR','0.005',
18 | '-LR_decay_freq','15',
19 | '-epochSize','0.2',
20 | '-nThreads','4',
21 | '-testSize','0.1',
22 | '-nEpochs','40',
23 | '-conv1LR','1',
24 | '-conv2LR','1',
25 | '-conv3LR','1',
26 | '-conv4LR','1',
27 | '-conv5LR','1',
28 | '-batchSize','64',
29 | '-accumGrad','4',
30 | '-trainfile','../Charades_v1_train.csv',
31 | '-testfile','../Charades_v1_test.csv',
32 | '-optnet','true',
33 | '-cacheDir','/mnt/raid00/gunnars/cache/',
34 | '-data','/mnt/raid00/gunnars/Charades_v1_flow/',
35 | }
36 | for _,v in pairs(morearg) do
37 |     table.insert(arg,v)
38 | end
39 | dofile 'main.lua'
40 | 


--------------------------------------------------------------------------------
/torch/exp/flownet_localize.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using flow
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start Torch
 6 | --  Usage: dofile 'exp/flownet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16flow',
16 | '-dataset','charadesflow',
17 | '-LR','0.005',
18 | '-LR_decay_freq','15',
19 | '-epochSize','0.2',
20 | '-nThreads','4',
21 | '-testSize','0.1',
22 | '-nEpochs','40',
23 | '-conv1LR','1',
24 | '-conv2LR','1',
25 | '-conv3LR','1',
26 | '-conv4LR','1',
27 | '-conv5LR','1',
28 | '-batchSize','64',
29 | '-accumGrad','4',
30 | '-dumpLocalize','true',
31 | '-cacheDir','/mnt/raid00/gunnars/cache/',
32 | '-data','/mnt/raid00/gunnars/Charades_v1_flow/',
33 | '-trainfile','../Charades_v1_train.csv',
34 | '-testfile','../Charades_v1_test.csv', 
35 | '-optnet','true',
36 | }
37 | for _,v in pairs(morearg) do
38 |     table.insert(arg,v)
39 | end
40 | dofile 'main.lua'
41 | 


--------------------------------------------------------------------------------
/torch/exp/flownet_resume.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using flow
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start Torch
 6 | --  Usage: dofile 'exp/flownet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16flow',
16 | '-dataset','charadesflow',
17 | '-LR','0.005',
18 | '-LR_decay_freq','15',
19 | '-epochSize','0.2',
20 | '-nThreads','4',
21 | '-testSize','0.1',
22 | '-nEpochs','40',
23 | '-conv1LR','1',
24 | '-conv2LR','1',
25 | '-conv3LR','1',
26 | '-conv4LR','1',
27 | '-conv5LR','1',
28 | '-batchSize','64',
29 | '-accumGrad','4',
30 | '-retrain','/mnt/raid00/gunnars/cache/rgbnet/checkpoints/model_9.t7', -- path to the trained model to use
31 | '-epochNumber','9', -- what epoch to resume from
32 | '-optimState','/mnt/raid00/gunnars/cache/rgbnet/checkpoints/optimstate_9.t7', -- path to the optimizer state
33 | '-gen','/mnt/raid00/gunnars/cache/rgbnet/gen/', -- what cached data to use
34 | '-cacheDir','/mnt/raid00/gunnars/cache/',
35 | '-data','/mnt/raid00/gunnars/Charades_v1_flow/',
36 | '-trainfile','../Charades_v1_train.csv',
37 | '-testfile','../Charades_v1_test.csv', 
38 | '-optnet','true',
39 | }
40 | for _,v in pairs(morearg) do
41 |     table.insert(arg,v)
42 | end
43 | dofile 'main.lua'
44 | 


--------------------------------------------------------------------------------
/torch/exp/flownet_test.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using flow
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start Torch
 6 | --  Usage: dofile 'exp/flownet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16flow',
16 | '-dataset','charadesflow',
17 | '-LR','0.005',
18 | '-LR_decay_freq','15',
19 | '-epochSize','0.2',
20 | '-nThreads','4',
21 | '-testSize','0.1',
22 | '-nEpochs','40',
23 | '-conv1LR','1',
24 | '-conv2LR','1',
25 | '-conv3LR','1',
26 | '-conv4LR','1',
27 | '-conv5LR','1',
28 | '-batchSize','64',
29 | '-accumGrad','4',
30 | '-testOnly','true',
31 | '-retrain','/mnt/raid00/gunnars/cache/rgbnet/checkpoints/model_9.t7', -- path to the trained model to use
32 | '-gen','/mnt/raid00/gunnars/cache/rgbnet/gen/', -- what cached data to use
33 | '-cacheDir','/mnt/raid00/gunnars/cache/',
34 | '-data','/mnt/raid00/gunnars/Charades_v1_flow/',
35 | '-trainfile','../Charades_v1_train.csv',
36 | '-testfile','../Charades_v1_test.csv', 
37 | '-optnet','true',
38 | }
39 | for _,v in pairs(morearg) do
40 |     table.insert(arg,v)
41 | end
42 | dofile 'main.lua'
43 | 


--------------------------------------------------------------------------------
/torch/exp/lstmflownet.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using flow
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start Torch
 6 | --  Usage: dofile 'exp/flownet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16lstmflow',
16 | '-dataset','charadessyncflow',
17 | '-LR','5e-4',
18 | '-LR_decay_freq','15',
19 | '-epochSize','0.6',
20 | '-nThreads','4',
21 | '-testSize','0.1',
22 | '-nEpochs','35',
23 | '-conv1LR','1',
24 | '-conv2LR','1',
25 | '-conv3LR','1',
26 | '-conv4LR','1',
27 | '-conv5LR','1',
28 | '-batchSize','64',
29 | '-accumGrad','4',
30 | '-trainfile','../Charades_v1_train.csv',
31 | '-testfile','../Charades_v1_test.csv',
32 | '-optnet','true',
33 | '-cacheDir','/mnt/raid00/gunnars/cache/',
34 | '-data','/mnt/raid00/gunnars/Charades_v1_flow/',
35 | }
36 | for _,v in pairs(morearg) do
37 |     table.insert(arg,v)
38 | end
39 | dofile 'main.lua'
40 | 


--------------------------------------------------------------------------------
/torch/exp/lstmrgbnet.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using rgb
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start torch
 6 | --  Usage: dofile 'exp/rgbnet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16lstm',
16 | '-dataset','charadessync',
17 | '-LR_decay_freq','10',
18 | '-LR','0.0015',
19 | '-epochSize','0.3',
20 | '-testSize','0.1',
21 | '-nEpochs','30',
22 | '-conv1LR','1',
23 | '-conv2LR','1',
24 | '-conv3LR','1',
25 | '-conv4LR','1',
26 | '-conv5LR','1',
27 | '-batchSize','64',
28 | '-accumGrad','4',
29 | '-cacheDir','/mnt/raid00/gunnars/cache/',
30 | '-data','/mnt/raid00/gunnars/Charades_v1_rgb/',
31 | '-trainfile','../Charades_v1_train.csv',
32 | '-testfile','../Charades_v1_test.csv',
33 | '-optnet','true',
34 | }
35 | for _,v in pairs(morearg) do
36 |     table.insert(arg,v)
37 | end
38 | dofile 'main.lua'
39 | 


--------------------------------------------------------------------------------
/torch/exp/rgbnet.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using rgb
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start torch
 6 | --  Usage: dofile 'exp/rgbnet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16',
16 | '-dataset','charades',
17 | '-LR_decay_freq','30',
18 | '-LR','0.001',
19 | '-epochSize','0.1',
20 | '-testSize','0.1',
21 | '-nEpochs','10',
22 | '-conv1LR','1',
23 | '-conv2LR','1',
24 | '-conv3LR','1',
25 | '-conv4LR','1',
26 | '-conv5LR','1',
27 | '-batchSize','64',
28 | '-accumGrad','4',
29 | '-cacheDir','/mnt/raid00/gunnars/cache/',
30 | '-data','/mnt/raid00/gunnars/Charades_v1_rgb/',
31 | '-trainfile','../Charades_v1_train.csv',
32 | '-testfile','../Charades_v1_test.csv',
33 | '-optnet','true',
34 | }
35 | for _,v in pairs(morearg) do
36 |     table.insert(arg,v)
37 | end
38 | dofile 'main.lua'
39 | 


--------------------------------------------------------------------------------
/torch/exp/rgbnet_localize.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using rgb
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start torch
 6 | --  Usage: dofile 'exp/rgbnet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16',
16 | '-dataset','charades',
17 | '-LR_decay_freq','30',
18 | '-LR','0.001',
19 | '-epochSize','0.1',
20 | '-testSize','0.1',
21 | '-nEpochs','10',
22 | '-conv1LR','1',
23 | '-conv2LR','1',
24 | '-conv3LR','1',
25 | '-conv4LR','1',
26 | '-conv5LR','1',
27 | '-batchSize','64',
28 | '-accumGrad','4',
29 | '-dumpLocalize','true',
30 | '-cacheDir','/mnt/raid00/gunnars/cache/',
31 | '-data','/mnt/raid00/gunnars/Charades_v1_rgb/',
32 | '-trainfile','../Charades_v1_train.csv',
33 | '-testfile','../Charades_v1_test.csv',
34 | '-optnet','true',
35 | }
36 | for _,v in pairs(morearg) do
37 |     table.insert(arg,v)
38 | end
39 | dofile 'main.lua'
40 | 


--------------------------------------------------------------------------------
/torch/exp/rgbnet_resume.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using rgb
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start torch
 6 | --  Usage: dofile 'exp/rgbnet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16',
16 | '-dataset','charades',
17 | '-LR_decay_freq','30',
18 | '-LR','0.001',
19 | '-epochSize','0.1',
20 | '-testSize','0.1',
21 | '-nEpochs','10',
22 | '-conv1LR','1',
23 | '-conv2LR','1',
24 | '-conv3LR','1',
25 | '-conv4LR','1',
26 | '-conv5LR','1',
27 | '-batchSize','64',
28 | '-accumGrad','4',
29 | '-retrain','/mnt/raid00/gunnars/cache/flownet/checkpoints/model_9.t7', -- path to the trained model to use
30 | '-epochNumber','9', -- what epoch to resume from
31 | '-optimState','/mnt/raid00/gunnars/cache/flowrgbnet/checkpoints/optimstate_9.t7', -- path to the optimizer state
32 | '-cacheDir','/mnt/raid00/gunnars/cache/',
33 | '-data','/mnt/raid00/gunnars/Charades_v1_rgb/',
34 | '-trainfile','../Charades_v1_train.csv',
35 | '-testfile','../Charades_v1_test.csv',
36 | '-optnet','true',
37 | }
38 | for _,v in pairs(morearg) do
39 |     table.insert(arg,v)
40 | end
41 | dofile 'main.lua'
42 | 


--------------------------------------------------------------------------------
/torch/exp/rgbnet_test.lua:
--------------------------------------------------------------------------------
 1 | --  Action recognition experiment using rgb
 2 | -- 
 3 | --  Purpose: ?
 4 | --  
 5 | --  start torch
 6 | --  Usage: dofile 'exp/rgbnet.lua'
 7 | 
 8 | local info = debug.getinfo(1,'S');
 9 | name = info.source
10 | name = string.sub(name,1,#name-4) --remove ext
11 | local name = name:match( "([^/]+)$" ) --remove folders
12 | arg = arg or {}
13 | morearg = {
14 | '-name',name,
15 | '-netType','vgg16',
16 | '-dataset','charades',
17 | '-LR_decay_freq','30',
18 | '-LR','0.001',
19 | '-epochSize','0.1',
20 | '-testSize','0.1',
21 | '-nEpochs','10',
22 | '-conv1LR','1',
23 | '-conv2LR','1',
24 | '-conv3LR','1',
25 | '-conv4LR','1',
26 | '-conv5LR','1',
27 | '-batchSize','64',
28 | '-accumGrad','4',
29 | '-testOnly','true',
30 | '-retrain','/mnt/raid00/gunnars/cache/flownet/checkpoints/model_9.t7', -- path to the trained model to use
31 | '-cacheDir','/mnt/raid00/gunnars/cache/',
32 | '-data','/mnt/raid00/gunnars/Charades_v1_rgb/',
33 | '-trainfile','../Charades_v1_train.csv',
34 | '-testfile','../Charades_v1_test.csv',
35 | '-optnet','true',
36 | }
37 | for _,v in pairs(morearg) do
38 |     table.insert(arg,v)
39 | end
40 | dofile 'main.lua'
41 | 


--------------------------------------------------------------------------------
/torch/get_alreadytrained.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to download pretrained models on Charades
 3 | # Approximately equivalent to models obtained by running exp/rgbnet.lua and exp/flownet.lua
 4 | #
 5 | # The flow model was obtained after 31 epochs (epochSize=0.2)
 6 | # The flow model has a classification accuracy of 15.4% mAP (via charades_v1_classify.m)
 7 | # The rgb model was obtained after 6 epochs (epochSize=0.1)
 8 | # The rgb model has a classification accuracy of 15.6% mAP (via charades_v1_classify.m)
 9 | #
10 | # Combining the predictions (submission files) of those models using combine_rgb_flow.py
11 | # yields a final classification accuracy of 18.9% mAP (via charades_v1_classify.m)
12 | 
13 | wget -O twostream_flow.t7 https://www.dropbox.com/s/o7afkhw52rqr48g/twostream_flow.t7?dl=1
14 | wget -O twostream_rgb.t7 https://www.dropbox.com/s/bo9rv32zaxojsmz/twostream_rgb.t7?dl=1
15 | 


--------------------------------------------------------------------------------
/torch/get_alreadytrained_lstm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Script to download pretrained lstm models on Charades
 3 | # Approximately equivalent to models obtained by running exp/lstmrgbnet.lua and exp/lstmflownet.lua
 4 | #
 5 | # The flow model was obtained after 30 epochs (epochSize=0.6)
 6 | # The flow model has a classification accuracy of 15.4% mAP (via charades_v1_classify.m)
 7 | # The rgb model was obtained after 25 epochs (epochSize=0.3)
 8 | # The rgb model has a classification accuracy of 16.6% mAP (via charades_v1_classify.m)
 9 | #
10 | # Combining the predictions (submission files) of those models using combine_rgb_flow.py
11 | # yields a final classification accuracy of 19.8% mAP (via charades_v1_classify.m)
12 | 
13 | wget -O lstm_flow.t7 https://www.dropbox.com/s/gj808t2izq2el4e/lstm_flow.t7?dl=1
14 | wget -O lstm_rgb.t7 https://www.dropbox.com/s/t7n0ivjj15v75kt/lstm_rgb.t7?dl=1
15 | 


--------------------------------------------------------------------------------
/torch/get_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # Script to download models pretrained on ImageNet and UCF101
3 | # Those are used as the starting point for training on Charades
4 | 
5 | wget -O VGG_ILSVRC_16_layers_deploy.prototxt https://www.dropbox.com/s/iycrzeruaf75soc/VGG_ILSVRC_16_layers_deploy.prototxt?dl=1
6 | wget -O VGG_UCF101_16_layers_deploy.prototxt https://www.dropbox.com/s/4ktxsdiiqm429j2/VGG_UCF101_16_layers_deploy.prototxt?dl=1
7 | wget -O VGG_ILSVRC_16_layers.caffemodel https://www.dropbox.com/s/rwo3iim5z2w07aa/VGG_ILSVRC_16_layers.caffemodel?dl=1
8 | wget -O VGG_UCF101_16_layers.caffemodel https://www.dropbox.com/s/d1n9emy0awzlwlr/VGG_UCF101_16_layers.caffemodel?dl=1
9 | 


--------------------------------------------------------------------------------
/torch/layers/CrossEntropyCriterion.lua:
--------------------------------------------------------------------------------
 1 | local CrossEntropyCriterion, Criterion = torch.class('nn.CrossEntropyCriterion', 'nn.Criterion')
 2 | 
 3 | function CrossEntropyCriterion:__init(weights)
 4 |    Criterion.__init(self)
 5 |    self.lsm = nn.LogSoftMax()
 6 |    self.nll = nn.ClassNLLCriterion(weights)
 7 | end
 8 | 
 9 | function CrossEntropyCriterion:updateOutput(input, target)
10 |    input = input:squeeze()
11 |    target = type(target) == 'number' and target or target:squeeze()
12 |    self.lsm:updateOutput(input)
13 |    self.nll:updateOutput(self.lsm.output, target)
14 |    self.output = self.nll.output
15 |    return self.output
16 | end
17 | 
18 | function CrossEntropyCriterion:updateGradInput(input, target)
19 |    local size = input:size()
20 |    input = input:squeeze()
21 |    target = type(target) == 'number' and target or target:squeeze()
22 |    self.nll:updateGradInput(self.lsm.output, target)
23 |    self.lsm:updateGradInput(input, self.nll.gradInput)
24 |    self.gradInput:view(self.lsm.gradInput, size)
25 |    return self.gradInput
26 | end
27 | 
28 | return nn.CrossEntropyCriterion
29 | 


--------------------------------------------------------------------------------
/torch/main.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | require 'torch'
10 | require 'paths'
11 | require 'optim'
12 | require 'nn'
13 | local DataLoader = require 'dataloader'
14 | local models = require 'models/init'
15 | local Trainer
16 | local opts = require 'opts'
17 | local checkpoints = require 'checkpoints'
18 | 
19 | torch.setdefaulttensortype('torch.FloatTensor')
20 | torch.setnumthreads(1)
21 | 
22 | local opt = opts.parse(arg)
23 | torch.manualSeed(opt.manualSeed)
24 | cutorch.manualSeedAll(opt.manualSeed)
25 | 
26 | Trainer = require 'train'
27 | 
28 | -- Load previous checkpoint, if it exists
29 | local checkpoint, optimState = checkpoints.latest(opt)
30 | 
31 | -- Data loading
32 | print('Creating Data Loader')
33 | local trainLoader, valLoader, val2Loader = DataLoader.create(opt)
34 | 
35 | -- Create model
36 | local model, criterion = models.setup(opt, checkpoint)
37 | 
38 | -- The trainer handles the training loop and evaluation on validation set
39 | print('Creating Trainer')
40 | local trainer = Trainer(model, criterion, opt, optimState)
41 | 
42 | if opt.testOnly then
43 |    --local top1Err, top5Err = trainer:test(opt, 0, valLoader)
44 |    --print(string.format(' * Results top1: %6.3f  top5: %6.3f', top1Err, top5Err))
45 | 
46 |    local AP = trainer:test2(opt, 0, val2Loader)
47 |    local mAP = AP:mean()
48 |    print(string.format(' * Results mAP: %6.3f', mAP))
49 | 
50 |    return
51 | end
52 | 
53 | local startEpoch = checkpoint and checkpoint.epoch + 1 or opt.epochNumber
54 | local bestTop1 = math.huge
55 | local bestTop5 = math.huge
56 | local bestmAP = 0
57 | for epoch = startEpoch, opt.nEpochs do
58 |    -- Train for a single epoch
59 |    local trainTop1, trainTop5, trainLoss = trainer:train(opt, epoch, trainLoader)
60 | 
61 |    -- Run model on validation set evaluating on the whole video
62 |    local AP = trainer:test2(opt, epoch, val2Loader)
63 |    local mAP = AP:mean()
64 | 
65 |    -- Run model on validation set
66 |    local testTop1, testTop5 = trainer:test(opt, epoch, valLoader)
67 | 
68 |    local bestModel = false
69 |    if testTop1 < bestTop1 then
70 |       bestModel = true
71 |       bestTop1 = testTop1
72 |       bestTop5 = testTop5
73 |       bestmAP = mAP 
74 |       print(' * Best model ', testTop1, testTop5, mAP)
75 |    end
76 | 
77 |    local score = {trainTop1, trainTop5, testTop1, testTop5, mAP}
78 |    checkpoints.save(epoch, model, trainer.optimState, bestModel, opt, score)
79 | end
80 | 
81 | print(string.format(' * Finished top1: %6.3f  top5: %6.3f  mAP: %6.3f', bestTop1, bestTop5, bestmAP))
82 | 


--------------------------------------------------------------------------------
/torch/models/init.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  Generic model creating code. For the specific ResNet model see
 10 | --  models/resnet.lua
 11 | --
 12 | 
 13 | require 'nn'
 14 | require 'cunn'
 15 | require 'cudnn'
 16 | 
 17 | local M = {}
 18 | 
 19 | function M.setup(opt, checkpoint)
 20 |    local model
 21 |    local criterion
 22 |    if checkpoint then
 23 |       local modelPath = paths.concat(opt.resume, checkpoint.modelFile)
 24 |       assert(paths.filep(modelPath), 'Saved model not found: ' .. modelPath)
 25 |       print('=> Resuming model from ' .. modelPath)
 26 |       model,criterion = torch.load(modelPath):cuda()
 27 |    elseif opt.retrain ~= 'none' then
 28 |       assert(paths.filep(opt.retrain), 'File not found: ' .. opt.retrain)
 29 |       print('Loading model from file: ' .. opt.retrain)
 30 |       model,criterion = torch.load(opt.retrain):cuda()
 31 |       if not criterion then
 32 |           local _
 33 |           _,criterion = require('models/' .. opt.netType)(opt)
 34 |       end
 35 |    else
 36 |       print('=> Creating model from file: models/' .. opt.netType .. '.lua')
 37 |       model,criterion = require('models/' .. opt.netType)(opt)
 38 |    end
 39 | 
 40 |    -- First remove any DataParallelTable
 41 |    if torch.type(model) == 'nn.DataParallelTable' then
 42 |       model = model:get(1)
 43 |    end
 44 | 
 45 |    -- optnet is an general library for reducing memory usage in neural networks
 46 |    if opt.optnet then
 47 |       local optnet = require 'optnet'
 48 |       local imsize = opt.dataset == 'cifar10' and 32 or 224 
 49 |       local sampleInput = (opt.dataset == 'charadesflow') and torch.zeros(4,20,imsize,imsize):cuda() or torch.zeros(4,3,imsize,imsize):cuda()
 50 |       optnet.optimizeMemory(model, sampleInput, {inplace = false, mode = 'training'})
 51 |    end
 52 | 
 53 |    -- This is useful for fitting ResNet-50 on 4 GPUs, but requires that all
 54 |    -- containers override backwards to call backwards recursively on submodules
 55 |    if opt.shareGradInput then
 56 |       M.shareGradInput(model)
 57 |    end
 58 | 
 59 |    -- For resetting the classifier when fine-tuning on a different Dataset
 60 |    if opt.resetClassifier and not checkpoint then
 61 |       print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier')
 62 | 
 63 |       local orig = model:get(#model.modules)
 64 |       assert(torch.type(orig) == 'nn.Linear',
 65 |          'expected last layer to be fully connected')
 66 | 
 67 |       local linear = nn.Linear(orig.weight:size(2), opt.nClasses)
 68 |       linear.bias:zero()
 69 | 
 70 |       model:remove(#model.modules)
 71 |       model:add(linear:cuda())
 72 |    end
 73 | 
 74 |    -- Set the CUDNN flags
 75 |    if opt.cudnn == 'fastest' then
 76 |       cudnn.fastest = true
 77 |       cudnn.benchmark = true
 78 |    elseif opt.cudnn == 'deterministic' then
 79 |       -- Use a deterministic convolution implementation
 80 |       model:apply(function(m)
 81 |          if m.setMode then m:setMode(1, 1, 1) end
 82 |       end)
 83 |    end
 84 | 
 85 |    -- Wrap the model with DataParallelTable, if using more than one GPU
 86 |    if opt.nGPU > 1 then
 87 |       local gpus = torch.range(1, opt.nGPU):totable()
 88 |       local fastest, benchmark = cudnn.fastest, cudnn.benchmark
 89 | 
 90 |       local dpt = nn.DataParallelTable(1, true, true)
 91 |          :add(model, gpus)
 92 |          :threads(function()
 93 |             local cudnn = require 'cudnn'
 94 |             cudnn.fastest, cudnn.benchmark = fastest, benchmark
 95 |          end)
 96 |       dpt.gradInput = nil
 97 | 
 98 |       model = dpt:cuda()
 99 |    end
100 | 
101 |    if not criterion then
102 |        criterion = nn.CrossEntropyCriterion():cuda()
103 |    end
104 |    return model, criterion
105 | end
106 | 
107 | function M.shareGradInput(model)
108 |    local function sharingKey(m)
109 |       local key = torch.type(m)
110 |       if m.__shareGradInputKey then
111 |          key = key .. ':' .. m.__shareGradInputKey
112 |       end
113 |       return key
114 |    end
115 | 
116 |    -- Share gradInput for memory efficient backprop
117 |    local cache = {}
118 |    model:apply(function(m)
119 |       local moduleType = torch.type(m)
120 |       if torch.isTensor(m.gradInput) and moduleType ~= 'nn.ConcatTable' then
121 |          local key = sharingKey(m)
122 |          if cache[key] == nil then
123 |             cache[key] = torch.CudaStorage(1)
124 |          end
125 |          m.gradInput = torch.CudaTensor(cache[key], 1, 0)
126 |       end
127 |    end)
128 |    for i, m in ipairs(model:findModules('nn.ConcatTable')) do
129 |       if cache[i % 2] == nil then
130 |          cache[i % 2] = torch.CudaStorage(1)
131 |       end
132 |       m.gradInput = torch.CudaTensor(cache[i % 2], 1, 0)
133 |    end
134 | end
135 | 
136 | return M
137 | 


--------------------------------------------------------------------------------
/torch/models/preresnet.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  The full pre-activation ResNet variation from the technical report
 10 | -- "Identity Mappings in Deep Residual Networks" (http://arxiv.org/abs/1603.05027)
 11 | --
 12 | 
 13 | local nn = require 'nn'
 14 | require 'cunn'
 15 | 
 16 | local Convolution = cudnn.SpatialConvolution
 17 | local Avg = cudnn.SpatialAveragePooling
 18 | local ReLU = cudnn.ReLU
 19 | local Max = nn.SpatialMaxPooling
 20 | local SBatchNorm = nn.SpatialBatchNormalization
 21 | 
 22 | local function createModel(opt)
 23 |    local depth = opt.depth
 24 |    local shortcutType = opt.shortcutType or 'B'
 25 |    local iChannels
 26 | 
 27 |    -- The shortcut layer is either identity or 1x1 convolution
 28 |    local function shortcut(nInputPlane, nOutputPlane, stride)
 29 |       local useConv = shortcutType == 'C' or
 30 |          (shortcutType == 'B' and nInputPlane ~= nOutputPlane)
 31 |       if useConv then
 32 |          -- 1x1 convolution
 33 |          return nn.Sequential()
 34 |             :add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
 35 |       elseif nInputPlane ~= nOutputPlane then
 36 |          -- Strided, zero-padded identity shortcut
 37 |          return nn.Sequential()
 38 |             :add(nn.SpatialAveragePooling(1, 1, stride, stride))
 39 |             :add(nn.Concat(2)
 40 |                :add(nn.Identity())
 41 |                :add(nn.MulConstant(0)))
 42 |       else
 43 |          return nn.Identity()
 44 |       end
 45 |    end
 46 | 
 47 |    -- Typically shareGradInput uses the same gradInput storage for all modules
 48 |    -- of the same type. This is incorrect for some SpatialBatchNormalization
 49 |    -- modules in this network b/c of the in-place CAddTable. This marks the
 50 |    -- module so that it's shared only with other modules with the same key
 51 |    local function ShareGradInput(module, key)
 52 |       assert(key)
 53 |       module.__shareGradInputKey = key
 54 |       return module
 55 |    end
 56 | 
 57 |    -- The basic residual layer block for 18 and 34 layer network, and the
 58 |    -- CIFAR networks
 59 |    local function basicblock(n, stride, type)
 60 |       local nInputPlane = iChannels
 61 |       iChannels = n
 62 | 
 63 |       local block = nn.Sequential()
 64 |       local s = nn.Sequential()
 65 |       if type == 'both_preact' then
 66 |          block:add(ShareGradInput(SBatchNorm(nInputPlane), 'preact'))
 67 |          block:add(ReLU(true))
 68 |       elseif type ~= 'no_preact' then
 69 |          s:add(SBatchNorm(nInputPlane))
 70 |          s:add(ReLU(true))
 71 |       end
 72 |       s:add(Convolution(nInputPlane,n,3,3,stride,stride,1,1))
 73 |       s:add(SBatchNorm(n))
 74 |       s:add(ReLU(true))
 75 |       s:add(Convolution(n,n,3,3,1,1,1,1))
 76 | 
 77 |       return block
 78 |          :add(nn.ConcatTable()
 79 |             :add(s)
 80 |             :add(shortcut(nInputPlane, n, stride)))
 81 |          :add(nn.CAddTable(true))
 82 |    end
 83 | 
 84 |    -- The bottleneck residual layer for 50, 101, and 152 layer networks
 85 |    local function bottleneck(n, stride, type)
 86 |       local nInputPlane = iChannels
 87 |       iChannels = n * 4
 88 | 
 89 |       local block = nn.Sequential()
 90 |       local s = nn.Sequential()
 91 |       if type == 'both_preact' then
 92 |          block:add(ShareGradInput(SBatchNorm(nInputPlane), 'preact'))
 93 |          block:add(ReLU(true))
 94 |       elseif type ~= 'no_preact' then
 95 |          s:add(SBatchNorm(nInputPlane))
 96 |          s:add(ReLU(true))
 97 |       end
 98 |       s:add(Convolution(nInputPlane,n,1,1,1,1,0,0))
 99 |       s:add(SBatchNorm(n))
100 |       s:add(ReLU(true))
101 |       s:add(Convolution(n,n,3,3,stride,stride,1,1))
102 |       s:add(SBatchNorm(n))
103 |       s:add(ReLU(true))
104 |       s:add(Convolution(n,n*4,1,1,1,1,0,0))
105 | 
106 |       return block
107 |          :add(nn.ConcatTable()
108 |             :add(s)
109 |             :add(shortcut(nInputPlane, n * 4, stride)))
110 |          :add(nn.CAddTable(true))
111 |    end
112 | 
113 |    -- Creates count residual blocks with specified number of features
114 |    local function layer(block, features, count, stride, type)
115 |       local s = nn.Sequential()
116 |       if count < 1 then
117 |         return s
118 |       end
119 |       s:add(block(features, stride,
120 |                   type == 'first' and 'no_preact' or 'both_preact'))
121 |       for i=2,count do
122 |          s:add(block(features, 1))
123 |       end
124 |       return s
125 |    end
126 | 
127 |    local model = nn.Sequential()
128 |    if opt.dataset == 'imagenet' then
129 |       -- Configurations for ResNet:
130 |       --  num. residual blocks, num features, residual block function
131 |       local cfg = {
132 |          [18]  = {{2, 2, 2, 2}, 512, basicblock},
133 |          [34]  = {{3, 4, 6, 3}, 512, basicblock},
134 |          [50]  = {{3, 4, 6, 3}, 2048, bottleneck},
135 |          [101] = {{3, 4, 23, 3}, 2048, bottleneck},
136 |          [152] = {{3, 8, 36, 3}, 2048, bottleneck},
137 |          [200] = {{3, 24, 36, 3}, 2048, bottleneck},
138 |       }
139 | 
140 |       assert(cfg[depth], 'Invalid depth: ' .. tostring(depth))
141 |       local def, nFeatures, block = table.unpack(cfg[depth])
142 |       iChannels = 64
143 |       print(' | ResNet-' .. depth .. ' ImageNet')
144 | 
145 |       -- The ResNet ImageNet model
146 |       model:add(Convolution(3,64,7,7,2,2,3,3))
147 |       model:add(SBatchNorm(64))
148 |       model:add(ReLU(true))
149 |       model:add(Max(3,3,2,2,1,1))
150 |       model:add(layer(block, 64,  def[1], 1, 'first'))
151 |       model:add(layer(block, 128, def[2], 2))
152 |       model:add(layer(block, 256, def[3], 2))
153 |       model:add(layer(block, 512, def[4], 2))
154 |       model:add(ShareGradInput(SBatchNorm(iChannels), 'last'))
155 |       model:add(ReLU(true))
156 |       model:add(Avg(7, 7, 1, 1))
157 |       model:add(nn.View(nFeatures):setNumInputDims(3))
158 |       model:add(nn.Linear(nFeatures, 1000))
159 |    elseif opt.dataset == 'cifar10' then
160 |       -- Model type specifies number of layers for CIFAR-10 model
161 |       assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202')
162 |       local n = (depth - 2) / 6
163 |       iChannels = 16
164 |       print(' | ResNet-' .. depth .. ' CIFAR-10')
165 | 
166 |       -- The ResNet CIFAR-10 model
167 |       model:add(Convolution(3,16,3,3,1,1,1,1))
168 |       model:add(layer(basicblock, 16, n, 1))
169 |       model:add(layer(basicblock, 32, n, 2))
170 |       model:add(layer(basicblock, 64, n, 2))
171 |       model:add(ShareGradInput(SBatchNorm(iChannels), 'last'))
172 |       model:add(ReLU(true))
173 |       model:add(Avg(8, 8, 1, 1))
174 |       model:add(nn.View(64):setNumInputDims(3))
175 |       model:add(nn.Linear(64, 10))
176 |    else
177 |       error('invalid dataset: ' .. opt.dataset)
178 |    end
179 | 
180 |    local function ConvInit(name)
181 |       for k,v in pairs(model:findModules(name)) do
182 |          local n = v.kW*v.kH*v.nOutputPlane
183 |          v.weight:normal(0,math.sqrt(2/n))
184 |          if cudnn.version >= 4000 then
185 |             v.bias = nil
186 |             v.gradBias = nil
187 |          else
188 |             v.bias:zero()
189 |          end
190 |       end
191 |    end
192 |    local function BNInit(name)
193 |       for k,v in pairs(model:findModules(name)) do
194 |          v.weight:fill(1)
195 |          v.bias:zero()
196 |       end
197 |    end
198 | 
199 |    ConvInit('cudnn.SpatialConvolution')
200 |    ConvInit('nn.SpatialConvolution')
201 |    BNInit('fbnn.SpatialBatchNormalization')
202 |    BNInit('cudnn.SpatialBatchNormalization')
203 |    BNInit('nn.SpatialBatchNormalization')
204 |    for k,v in pairs(model:findModules('nn.Linear')) do
205 |       v.bias:zero()
206 |    end
207 |    model:cuda()
208 | 
209 |    if opt.cudnn == 'deterministic' then
210 |       model:apply(function(m)
211 |          if m.setMode then m:setMode(1,1,1) end
212 |       end)
213 |    end
214 | 
215 |    model:get(1).gradInput = nil
216 | 
217 |    return model
218 | end
219 | 
220 | return createModel
221 | 


--------------------------------------------------------------------------------
/torch/models/resnet.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  The ResNet model definition
 10 | --
 11 | 
 12 | local nn = require 'nn'
 13 | require 'cunn'
 14 | 
 15 | local Convolution = cudnn.SpatialConvolution
 16 | local Avg = cudnn.SpatialAveragePooling
 17 | local ReLU = cudnn.ReLU
 18 | local Max = nn.SpatialMaxPooling
 19 | local SBatchNorm = nn.SpatialBatchNormalization
 20 | 
 21 | local function createModel(opt)
 22 |    local depth = opt.depth
 23 |    local shortcutType = opt.shortcutType or 'B'
 24 |    local iChannels
 25 | 
 26 |    -- The shortcut layer is either identity or 1x1 convolution
 27 |    local function shortcut(nInputPlane, nOutputPlane, stride)
 28 |       local useConv = shortcutType == 'C' or
 29 |          (shortcutType == 'B' and nInputPlane ~= nOutputPlane)
 30 |       if useConv then
 31 |          -- 1x1 convolution
 32 |          return nn.Sequential()
 33 |             :add(Convolution(nInputPlane, nOutputPlane, 1, 1, stride, stride))
 34 |             :add(SBatchNorm(nOutputPlane))
 35 |       elseif nInputPlane ~= nOutputPlane then
 36 |          -- Strided, zero-padded identity shortcut
 37 |          return nn.Sequential()
 38 |             :add(nn.SpatialAveragePooling(1, 1, stride, stride))
 39 |             :add(nn.Concat(2)
 40 |                :add(nn.Identity())
 41 |                :add(nn.MulConstant(0)))
 42 |       else
 43 |          return nn.Identity()
 44 |       end
 45 |    end
 46 | 
 47 |    -- The basic residual layer block for 18 and 34 layer network, and the
 48 |    -- CIFAR networks
 49 |    local function basicblock(n, stride)
 50 |       local nInputPlane = iChannels
 51 |       iChannels = n
 52 | 
 53 |       local s = nn.Sequential()
 54 |       s:add(Convolution(nInputPlane,n,3,3,stride,stride,1,1))
 55 |       s:add(SBatchNorm(n))
 56 |       s:add(ReLU(true))
 57 |       s:add(Convolution(n,n,3,3,1,1,1,1))
 58 |       s:add(SBatchNorm(n))
 59 | 
 60 |       return nn.Sequential()
 61 |          :add(nn.ConcatTable()
 62 |             :add(s)
 63 |             :add(shortcut(nInputPlane, n, stride)))
 64 |          :add(nn.CAddTable(true))
 65 |          :add(ReLU(true))
 66 |    end
 67 | 
 68 |    -- The bottleneck residual layer for 50, 101, and 152 layer networks
 69 |    local function bottleneck(n, stride)
 70 |       local nInputPlane = iChannels
 71 |       iChannels = n * 4
 72 | 
 73 |       local s = nn.Sequential()
 74 |       s:add(Convolution(nInputPlane,n,1,1,1,1,0,0))
 75 |       s:add(SBatchNorm(n))
 76 |       s:add(ReLU(true))
 77 |       s:add(Convolution(n,n,3,3,stride,stride,1,1))
 78 |       s:add(SBatchNorm(n))
 79 |       s:add(ReLU(true))
 80 |       s:add(Convolution(n,n*4,1,1,1,1,0,0))
 81 |       s:add(SBatchNorm(n * 4))
 82 | 
 83 |       return nn.Sequential()
 84 |          :add(nn.ConcatTable()
 85 |             :add(s)
 86 |             :add(shortcut(nInputPlane, n * 4, stride)))
 87 |          :add(nn.CAddTable(true))
 88 |          :add(ReLU(true))
 89 |    end
 90 | 
 91 |    -- Creates count residual blocks with specified number of features
 92 |    local function layer(block, features, count, stride)
 93 |       local s = nn.Sequential()
 94 |       for i=1,count do
 95 |          s:add(block(features, i == 1 and stride or 1))
 96 |       end
 97 |       return s
 98 |    end
 99 | 
100 |    local model = nn.Sequential()
101 |    if opt.dataset == 'imagenet' then
102 |       -- Configurations for ResNet:
103 |       --  num. residual blocks, num features, residual block function
104 |       local cfg = {
105 |          [18]  = {{2, 2, 2, 2}, 512, basicblock},
106 |          [34]  = {{3, 4, 6, 3}, 512, basicblock},
107 |          [50]  = {{3, 4, 6, 3}, 2048, bottleneck},
108 |          [101] = {{3, 4, 23, 3}, 2048, bottleneck},
109 |          [152] = {{3, 8, 36, 3}, 2048, bottleneck},
110 |       }
111 | 
112 |       assert(cfg[depth], 'Invalid depth: ' .. tostring(depth))
113 |       local def, nFeatures, block = table.unpack(cfg[depth])
114 |       iChannels = 64
115 |       print(' | ResNet-' .. depth .. ' ImageNet')
116 | 
117 |       -- The ResNet ImageNet model
118 |       model:add(Convolution(3,64,7,7,2,2,3,3))
119 |       model:add(SBatchNorm(64))
120 |       model:add(ReLU(true))
121 |       model:add(Max(3,3,2,2,1,1))
122 |       model:add(layer(block, 64, def[1]))
123 |       model:add(layer(block, 128, def[2], 2))
124 |       model:add(layer(block, 256, def[3], 2))
125 |       model:add(layer(block, 512, def[4], 2))
126 |       model:add(Avg(7, 7, 1, 1))
127 |       model:add(nn.View(nFeatures):setNumInputDims(3))
128 |       model:add(nn.Linear(nFeatures, 1000))
129 |    elseif opt.dataset == 'cifar10' then
130 |       -- Model type specifies number of layers for CIFAR-10 model
131 |       assert((depth - 2) % 6 == 0, 'depth should be one of 20, 32, 44, 56, 110, 1202')
132 |       local n = (depth - 2) / 6
133 |       iChannels = 16
134 |       print(' | ResNet-' .. depth .. ' CIFAR-10')
135 | 
136 |       -- The ResNet CIFAR-10 model
137 |       model:add(Convolution(3,16,3,3,1,1,1,1))
138 |       model:add(SBatchNorm(16))
139 |       model:add(ReLU(true))
140 |       model:add(layer(basicblock, 16, n))
141 |       model:add(layer(basicblock, 32, n, 2))
142 |       model:add(layer(basicblock, 64, n, 2))
143 |       model:add(Avg(8, 8, 1, 1))
144 |       model:add(nn.View(64):setNumInputDims(3))
145 |       model:add(nn.Linear(64, 10))
146 |    else
147 |       error('invalid dataset: ' .. opt.dataset)
148 |    end
149 | 
150 |    local function ConvInit(name)
151 |       for k,v in pairs(model:findModules(name)) do
152 |          local n = v.kW*v.kH*v.nOutputPlane
153 |          v.weight:normal(0,math.sqrt(2/n))
154 |          if cudnn.version >= 4000 then
155 |             v.bias = nil
156 |             v.gradBias = nil
157 |          else
158 |             v.bias:zero()
159 |          end
160 |       end
161 |    end
162 |    local function BNInit(name)
163 |       for k,v in pairs(model:findModules(name)) do
164 |          v.weight:fill(1)
165 |          v.bias:zero()
166 |       end
167 |    end
168 | 
169 |    ConvInit('cudnn.SpatialConvolution')
170 |    ConvInit('nn.SpatialConvolution')
171 |    BNInit('fbnn.SpatialBatchNormalization')
172 |    BNInit('cudnn.SpatialBatchNormalization')
173 |    BNInit('nn.SpatialBatchNormalization')
174 |    for k,v in pairs(model:findModules('nn.Linear')) do
175 |       v.bias:zero()
176 |    end
177 |    model:cuda()
178 | 
179 |    if opt.cudnn == 'deterministic' then
180 |       model:apply(function(m)
181 |          if m.setMode then m:setMode(1,1,1) end
182 |       end)
183 |    end
184 | 
185 |    model:get(1).gradInput = nil
186 | 
187 |    return model
188 | end
189 | 
190 | return createModel
191 | 


--------------------------------------------------------------------------------
/torch/models/vgg16.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  The RGB model definition
10 | --
11 | --  Contributor: Gunnar Atli Sigurdsson
12 | 
13 | local nn = require 'nn'
14 | require 'cunn'
15 | require 'loadcaffe'
16 | 
17 | local function createModel(opt)
18 |    local model = loadcaffe.load(opt.pretrainpath .. 'VGG_ILSVRC_16_layers_deploy.prototxt', opt.pretrainpath .. 'VGG_ILSVRC_16_layers.caffemodel','cudnn')
19 | 
20 |    print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier')
21 | 
22 |    model:remove(#model.modules) --remove softmax
23 |    local orig = model:get(#model.modules)
24 |    assert(torch.type(orig) == 'nn.Linear',
25 |       'expected last layer to be fully connected')
26 | 
27 |    local linear = nn.Linear(orig.weight:size(2), opt.nClasses)
28 |    linear.name = "fc8"
29 |    linear.bias:zero()
30 | 
31 |    model:remove(#model.modules)
32 |    model:add(linear:cuda())
33 |    model:cuda()
34 | 
35 |    print(tostring(model))
36 |    if opt.cudnn == 'deterministic' then
37 |       model:apply(function(m)
38 |          if m.setMode then m:setMode(1,1,1) end
39 |       end)
40 |    end
41 | 
42 |    return model
43 | end
44 | 
45 | return createModel
46 | 


--------------------------------------------------------------------------------
/torch/models/vgg16flow.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  The Flow model definition
10 | --
11 | --  Contributor: Gunnar Atli Sigurdsson
12 | 
13 | local nn = require 'nn'
14 | require 'cunn'
15 | require 'loadcaffe'
16 | 
17 | local function createModel(opt)
18 |    local model = loadcaffe.load(opt.pretrainpath .. 'VGG_UCF101_16_layers_deploy.prototxt', opt.pretrainpath .. 'VGG_UCF101_16_layers.caffemodel','cudnn')
19 | 
20 |    print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier')
21 | 
22 |    --model:remove(#model.modules) --remove softmax
23 |    local orig = model:get(#model.modules)
24 |    assert(torch.type(orig) == 'nn.Linear',
25 |       'expected last layer to be fully connected')
26 | 
27 |    local linear = nn.Linear(orig.weight:size(2), opt.nClasses)
28 |    linear.name = "fc8"
29 |    linear.bias:zero()
30 | 
31 |    model:remove(#model.modules)
32 |    model:add(linear:cuda())
33 |    model:cuda()
34 | 
35 |    print(tostring(model))
36 |    if opt.cudnn == 'deterministic' then
37 |       model:apply(function(m)
38 |          if m.setMode then m:setMode(1,1,1) end
39 |       end)
40 |    end
41 | 
42 |    return model
43 | end
44 | 
45 | return createModel
46 | 


--------------------------------------------------------------------------------
/torch/models/vgg16lstm.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  The LSTM RGB model definition
10 | --
11 | --  Contributor: Gunnar Atli Sigurdsson
12 | 
13 | local nn = require 'nn'
14 | require 'cunn'
15 | 
16 | local function createModel(opt)
17 |    local model = torch.load(opt.pretrainpath .. 'twostream_rgb.t7'):cuda() -- Load pretrained Two-Stream model
18 | 
19 |    print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier')
20 | 
21 |    local orig = model:get(#model.modules)
22 |    assert(torch.type(orig) == 'nn.Linear',
23 |       'expected last layer to be fully connected')
24 | 
25 |    local lstm = cudnn.LSTM(4096,512,1,false)
26 |    lstm.name = "fc8"
27 |    local linear = nn.Linear(512, opt.nClasses)
28 |    linear.name = "fc8"
29 |    linear.bias:zero()
30 | 
31 |    model:remove(#model.modules)
32 |    model:add(nn.View(1,4096))
33 |    model:add(lstm)
34 |    model:add(nn.View(512))
35 |    model:add(linear:cuda())
36 |    model:cuda()
37 | 
38 |    print(tostring(model))
39 |    if opt.cudnn == 'deterministic' then
40 |       model:apply(function(m)
41 |          if m.setMode then m:setMode(1,1,1) end
42 |       end)
43 |    end
44 | 
45 |    return model
46 | end
47 | 
48 | return createModel
49 | 


--------------------------------------------------------------------------------
/torch/models/vgg16lstmflow.lua:
--------------------------------------------------------------------------------
 1 | --
 2 | --  Copyright (c) 2016, Facebook, Inc.
 3 | --  All rights reserved.
 4 | --
 5 | --  This source code is licensed under the BSD-style license found in the
 6 | --  LICENSE file in the root directory of this source tree. An additional grant
 7 | --  of patent rights can be found in the PATENTS file in the same directory.
 8 | --
 9 | --  The LSTM Flow model definition
10 | --
11 | --  Contributor: Gunnar Atli Sigurdsson
12 | 
13 | local nn = require 'nn'
14 | require 'cunn'
15 | 
16 | local function createModel(opt)
17 |    local model = torch.load(opt.pretrainpath .. 'twostream_flow.t7'):cuda() -- Load pretrained Two-Stream model
18 | 
19 |    print(' => Replacing classifier with ' .. opt.nClasses .. '-way classifier')
20 | 
21 |    local orig = model:get(#model.modules)
22 |    assert(torch.type(orig) == 'nn.Linear',
23 |       'expected last layer to be fully connected')
24 | 
25 |    local lstm = cudnn.LSTM(4096,512,1,false)
26 |    lstm.name = "fc8"
27 |    local linear = nn.Linear(512, opt.nClasses)
28 |    linear.name = "fc8"
29 |    linear.bias:zero()
30 | 
31 |    model:remove(#model.modules)
32 |    model:add(nn.View(1,4096))
33 |    model:add(lstm)
34 |    model:add(nn.View(512))
35 |    model:add(linear:cuda())
36 |    model:cuda()
37 | 
38 |    print(tostring(model))
39 |    if opt.cudnn == 'deterministic' then
40 |       model:apply(function(m)
41 |          if m.setMode then m:setMode(1,1,1) end
42 |       end)
43 |    end
44 | 
45 |    return model
46 | end
47 | 
48 | return createModel
49 | 


--------------------------------------------------------------------------------
/torch/opts.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | local M = { }
 10 | 
 11 | function M.parse(arg)
 12 |    local cmd = torch.CmdLine()
 13 |    cmd:text()
 14 |    cmd:text('Torch-7 Charades Two-Stream Training Script')
 15 |    cmd:text('Check out the README file for an overview, and the exp/ folder for training examples')
 16 |    cmd:text('See https://github.com/facebook/fb.resnet.torch/blob/master/TRAINING.md for examples')
 17 |    cmd:text()
 18 |    cmd:text('Options:')
 19 |     ------------ General options --------------------
 20 |    cmd:option('-data',       '/mnt/raid00/gunnars/Charades_v1_jpg/', 'Path to dataset')
 21 |    cmd:option('-trainfile',  './Charades_v1_train.csv', 'Path to training annotations')
 22 |    cmd:option('-testfile',   './Charades_v1_test.csv', 'Path to testing annotations')
 23 |    cmd:option('-cacheDir',   '/mnt/raid00/gunnars/cache/', 'Path to model caches')
 24 |    cmd:option('-name',       'test',     'Experiment name')
 25 |    cmd:option('-dataset',    'charades', 'Options: imagenet | cifar10 | charades')
 26 |    cmd:option('-setup',      'softmax',  'Options: softmax | sigmoid')
 27 |    cmd:option('-manualSeed', 0,          'Manually set RNG seed')
 28 |    cmd:option('-nGPU',       1,          'Number of GPUs to use by default')
 29 |    cmd:option('-backend',    'cudnn',    'Options: cudnn | cunn')
 30 |    cmd:option('-cudnn',      'default',  'Options: fastest | default | deterministic')
 31 |    cmd:option('-gen',        'gen',      'Path to save generated files')
 32 |    ------------- Data options ------------------------
 33 |    cmd:option('-nThreads',    1, 'number of data loading threads')
 34 |    ------------- Training options --------------------
 35 |    cmd:option('-nEpochs',     1,       'Number of total epochs to run')
 36 |    cmd:option('-epochNumber', 1,       'Manual epoch number (useful on restarts)')
 37 |    cmd:option('-epochSize',   1,       'Epoch size (Int | [0,1])')
 38 |    cmd:option('-testSize',    1,       'Size of test set (Int | [0,1])')
 39 |    cmd:option('-batchSize',   64,      'mini-batch size (1 = pure stochastic)')
 40 |    cmd:option('-testOnly',    'false', 'Run on validation set only')
 41 |    cmd:option('-dumpLocalize','false',  'Output localization')
 42 |    cmd:option('-tenCrop',     'false', 'Ten-crop testing')
 43 |    cmd:option('-accumGrad',   4,       'Accumulate gradient accross N batches (Increase effective batch size)')
 44 |    cmd:option('-solver',      'sgd',   'Solver to use. Options: sgd | adam')
 45 |    ------------- Checkpointing options ---------------
 46 |    cmd:option('-save',   'checkpoints', 'Directory in which to save checkpoints')
 47 |    cmd:option('-resume', 'none',        'Resume from the latest checkpoint in this directory')
 48 |    ---------- Optimization options ----------------------
 49 |    cmd:option('-LR',            0.001, 'initial learning rate')
 50 |    cmd:option('-LR_decay_freq', 6,     'epoch at which LR drops to 1/10')
 51 |    cmd:option('-momentum',      0.9,   'momentum')
 52 |    cmd:option('-weightDecay',   5e-4,  'weight decay')
 53 |    cmd:option('-conv1LR',       1.0,   'convolution layer LR modifier')
 54 |    cmd:option('-conv2LR',       1.0,   'convolution layer LR modifier')
 55 |    cmd:option('-conv3LR',       1.0,   'convolution layer LR modifier')
 56 |    cmd:option('-conv4LR',       1.0,   'convolution layer LR modifier')
 57 |    cmd:option('-conv5LR',       1.0,   'convolution layer LR modifier')
 58 |    cmd:option('-fc8LR',         1.0,   'fc8 layer LR modifier')
 59 |    ---------- Model options ----------------------------------
 60 |    cmd:option('-netType',      'vgg16','Options: resnet | preresnet | vgg16')
 61 |    cmd:option('-pretrainpath', './',     'Path to pretrained models')
 62 |    cmd:option('-depth',        34,     'ResNet depth: 18 | 34 | 50 | 101 | ...', 'number')
 63 |    cmd:option('-fc7_dropout',  0.5,    'Dropout rate after fc7 [0,1]')
 64 |    cmd:option('-marginal',     'mean', 'Type of inference (mean | max)')
 65 |    cmd:option('-shortcutType', '',     'Options: A | B | C')
 66 |    cmd:option('-retrain',      'none', 'Path to model to retrain with')
 67 |    cmd:option('-optimState',   'none', 'Path to an optimState to reload from')
 68 |    ---------- Model options ----------------------------------
 69 |    cmd:option('-shareGradInput',  'false', 'Share gradInput tensors to reduce memory usage')
 70 |    cmd:option('-optnet',          'true',  'Use optnet to reduce memory usage')
 71 |    cmd:option('-resetClassifier', 'false', 'Reset the fully connected layer for fine-tuning')
 72 |    cmd:option('-nClasses',         157,    'Number of classes in the dataset')
 73 |    cmd:text()
 74 | 
 75 |    print(arg)
 76 |    local opt = cmd:parse(arg or {})
 77 |    opt.cacheDir = opt.cacheDir .. opt.name .. '/' -- brand new cacheDir
 78 | 
 79 |    if not paths.dirp(opt.cacheDir) and not paths.mkdir(opt.cacheDir) then
 80 |       cmd:error('error: unable to create cache directory: ' .. opt.cacheDir .. '\n')
 81 |    end
 82 |    cmd:log(opt.cacheDir .. '/log.txt', opt) --start logging
 83 |    cmd:addTime(name,'%F %T')
 84 | 
 85 |    opt.save = opt.cacheDir .. opt.save
 86 |    if not (string.sub(opt.gen,1,1)=='/') then
 87 |        -- If path is not absolute, then put it under opt.cacheDir
 88 |        opt.gen = opt.cacheDir .. opt.gen
 89 |    end
 90 | 
 91 |    opt.testOnly = opt.testOnly ~= 'false'
 92 |    opt.tenCrop = opt.tenCrop ~= 'false'
 93 |    opt.shareGradInput = opt.shareGradInput ~= 'false'
 94 |    opt.optnet = opt.optnet ~= 'false'
 95 |    opt.resetClassifier = opt.resetClassifier ~= 'false'
 96 |    opt.dumpLocalize = opt.dumpLocalize ~= 'false'
 97 | 
 98 |    if not paths.dirp(opt.save) and not paths.mkdir(opt.save) then
 99 |       cmd:error('error: unable to create checkpoint directory: ' .. opt.save .. '\n')
100 |    end
101 |    if not paths.dirp(opt.gen) and not paths.mkdir(opt.gen) then
102 |       cmd:error('error: unable to create checkpoint directory: ' .. opt.gen .. '\n')
103 |    end
104 | 
105 |    if opt.dataset == 'imagenet' then
106 |       -- Handle the most common case of missing -data flag
107 |       local trainDir = paths.concat(opt.data, 'train')
108 |       if not paths.dirp(opt.data) then
109 |          cmd:error('error: missing ImageNet data directory')
110 |       elseif not paths.dirp(trainDir) then
111 |          cmd:error('error: ImageNet missing `train` directory: ' .. trainDir)
112 |       end
113 |       -- Default shortcutType=B and nEpochs=90
114 |       opt.shortcutType = opt.shortcutType == '' and 'B' or opt.shortcutType
115 |       opt.nEpochs = opt.nEpochs == 0 and 90 or opt.nEpochs
116 |    elseif opt.dataset == 'cifar10' then
117 |       -- Default shortcutType=A and nEpochs=164
118 |       opt.shortcutType = opt.shortcutType == '' and 'A' or opt.shortcutType
119 |       opt.nEpochs = opt.nEpochs == 0 and 164 or opt.nEpochs
120 |    elseif opt.dataset == 'charades' then
121 |       if not paths.dirp(opt.data) then
122 |          cmd:error('error: missing Charades data directory')
123 |       end
124 |       opt.nEpochs = opt.nEpochs == 0 and 1 or opt.nEpochs
125 |    elseif opt.dataset == 'charadesflow' then
126 |       if not paths.dirp(opt.data) then
127 |          cmd:error('error: missing Charadesflow data directory')
128 |       end
129 |       opt.nEpochs = opt.nEpochs == 0 and 1 or opt.nEpochs
130 |    elseif opt.dataset == 'charadessync' then
131 |       if not paths.dirp(opt.data) then
132 |          cmd:error('error: missing Charades data directory')
133 |       end
134 |       opt.nEpochs = opt.nEpochs == 0 and 1 or opt.nEpochs
135 |    elseif opt.dataset == 'charadessyncflow' then
136 |       if not paths.dirp(opt.data) then
137 |          cmd:error('error: missing Charadesflow data directory')
138 |       end
139 |       opt.nEpochs = opt.nEpochs == 0 and 1 or opt.nEpochs
140 |    else
141 |       cmd:error('unknown dataset: ' .. opt.dataset)
142 |    end
143 | 
144 |    if opt.resetClassifier then
145 |       if opt.nClasses == 0 then
146 |          cmd:error('-nClasses required when resetClassifier is set')
147 |       end
148 |    end
149 | 
150 |    if opt.shareGradInput and opt.optnet then
151 |       cmd:error('error: cannot use both -shareGradInput and -optnet')
152 |    end
153 | 
154 |    return opt
155 | end
156 | 
157 | return M
158 | 


--------------------------------------------------------------------------------
/torch/train.lua:
--------------------------------------------------------------------------------
  1 | --
  2 | --  Copyright (c) 2016, Facebook, Inc.
  3 | --  All rights reserved.
  4 | --
  5 | --  This source code is licensed under the BSD-style license found in the
  6 | --  LICENSE file in the root directory of this source tree. An additional grant
  7 | --  of patent rights can be found in the PATENTS file in the same directory.
  8 | --
  9 | --  The training loop and learning rate schedule
 10 | --
 11 | --  Contributor: Gunnar Atli Sigurdsson
 12 | 
 13 | local optim = require 'optim'
 14 | 
 15 | local M = {}
 16 | local Trainer = torch.class('resnet.Trainer', M)
 17 | 
 18 | -- name of the modules in the same order as model:parameters()
 19 | -- assumes a single nn.Sequential
 20 | local function layer_names(model)
 21 |     local w = {}
 22 |     for i=1,#model.modules do
 23 |         local name = model.modules[i].name or ""
 24 |         local mw,_ = model.modules[i]:parameters()
 25 |         if mw then
 26 |             for k,_ in pairs(mw) do
 27 |                 table.insert(w,name)
 28 |             end
 29 |         end
 30 |     end
 31 |     return w
 32 | end
 33 | 
 34 | function Trainer:__init(model, criterion, opt, optimState)
 35 |    self.model = model
 36 |    self.criterion = criterion
 37 |    optimState = optimState or {
 38 |       originalLR = opt.LR,
 39 |       learningRate = opt.LR,
 40 |       learningRateDecay = 0.0,
 41 |       momentum = opt.momentum,
 42 |       nesterov = true,
 43 |       dampening = 0.0,
 44 |       weightDecay = opt.weightDecay,
 45 |    }
 46 |    self.opt = opt
 47 |    self.params, self.gradParams = model:parameters()
 48 |    self.L = #self.params
 49 |    self.LR_decay_freq = opt.LR_decay_freq
 50 |    self.optimState = {}
 51 |    local names = layer_names(self.model)
 52 |    assert(#names==self.L)
 53 |    for i=1,self.L do
 54 |        local layername = names[i] or ""
 55 |        self.optimState[i] = {}
 56 |        for k,v in pairs(optimState) do
 57 |            self.optimState[i][k] = v
 58 |        end
 59 |        if string.find(layername, "conv1") then
 60 |            self.optimState[i].learningRate = opt.LR*opt.conv1LR
 61 |        end
 62 |        if string.find(layername, "conv2") then
 63 |            self.optimState[i].learningRate = opt.LR*opt.conv2LR
 64 |        end
 65 |        if string.find(layername, "conv3") then
 66 |            self.optimState[i].learningRate = opt.LR*opt.conv3LR
 67 |        end
 68 |        if string.find(layername, "conv4") then
 69 |            self.optimState[i].learningRate = opt.LR*opt.conv4LR
 70 |        end
 71 |        if string.find(layername, "conv5") then
 72 |            self.optimState[i].learningRate = opt.LR*opt.conv5LR
 73 |        end
 74 |        if string.find(layername, "fc8") then
 75 |            self.optimState[i].learningRate = opt.LR*opt.fc8LR
 76 |        end
 77 |    end
 78 | end
 79 | 
 80 | function Trainer:train(opt, epoch, dataloader)
 81 |    -- Trains the model for a single epoch
 82 |    
 83 |    local timer = torch.Timer()
 84 |    local dataTimer = torch.Timer()
 85 |    local LRM = self:learningRateModifier(epoch) 
 86 |    for l=1,self.L do 
 87 |       self.optimState[l].learningRate = self.optimState[l].originalLR*LRM
 88 |    end
 89 | 
 90 |    local function feval(i)
 91 |       return function () return self.criterion.output, self.gradParams[i] end
 92 |    end
 93 | 
 94 |    local trainSize = dataloader:size()
 95 |    local top1Sum, top5Sum, lossSum = 0.0, 0.0, 0.0
 96 |    local N = 0
 97 | 
 98 |    print('=> Training epoch # ' .. epoch)
 99 |    -- set the batch norm to training mode
100 |    self.model:training()
101 |    self.model:zeroGradParameters()
102 |    for n, sample in dataloader:run() do
103 |       local dataTime = dataTimer:time().real
104 | 
105 |       -- Copy input and target to the GPU
106 |       self:copyInputs(sample)
107 | 
108 |       local output = self.model:forward(self.input):float()
109 |       local batchSize = output:size(1)
110 |       local loss = self.criterion:forward(self.model.output, self.target)
111 | 
112 |       if dataloader.synchronous then
113 |           -- Double check that dataloader is giving one video per batch
114 |           for i=1,batchSize-1 do -- make sure there is no error in the loader, this should be one video
115 |               assert(sample.ids[{{i}}]==sample.ids[{{i+1}}],"Training set batch size and current batch size do not match!")
116 |           end
117 |       end
118 | 
119 |       self.criterion:backward(self.model.output, self.target)
120 |       self.model:backward(self.input, self.criterion.gradInput)
121 |       --require('fb.debugger'):enter()
122 | 
123 |       if n % opt.accumGrad == 0 then -- accumulate batches
124 |           for i=1,self.L do -- sgd on invdividual layers
125 |               optim.sgd(feval(i), self.params[i], self.optimState[i])
126 |           end
127 |           self.model:zeroGradParameters()
128 |       end
129 | 
130 |       local top1, top5 = self:computeScore(output, sample.target, 1)
131 |       top1Sum = top1Sum + top1*batchSize
132 |       top5Sum = top5Sum + top5*batchSize
133 |       lossSum = lossSum + loss*batchSize
134 |       N = N + batchSize
135 | 
136 |       print(('%s | Epoch: [%d][%d/%d]    Time %.3f  Data %.3f  Err %1.4f  top1 %7.3f  top5 %7.3f'):format(
137 |          opt.name, epoch, n, trainSize, timer:time().real, dataTime, loss, top1, top5))
138 | 
139 |       -- check that the storage didn't get changed do to an unfortunate getParameters call
140 |       assert(self.params[1]:storage() == self.model:parameters()[1]:storage()) -- TODO this ok?
141 | 
142 |       timer:reset()
143 |       dataTimer:reset()
144 |    end
145 | 
146 |    return top1Sum / N, top5Sum / N, lossSum / N
147 | end
148 | 
149 | function Trainer:test(opt, epoch, dataloader)
150 |    -- Computes the top-1 and top-5 err on the validation set
151 | 
152 |    local timer = torch.Timer()
153 |    local dataTimer = torch.Timer()
154 |    local size = dataloader:size()
155 | 
156 |    local nCrops = self.opt.tenCrop and 10 or 1
157 |    local top1Sum, top5Sum = 0.0, 0.0
158 |    local N = 0
159 | 
160 |    self.model:evaluate()
161 |    for n, sample in dataloader:run() do
162 |       local dataTime = dataTimer:time().real
163 | 
164 |       -- Copy input and target to the GPU
165 |       self:copyInputs(sample)
166 | 
167 |       local output = self.model:forward(self.input):float()
168 |       local batchSize = output:size(1) / nCrops
169 |       local loss = self.criterion:forward(self.model.output, self.target)
170 | 
171 |       local top1, top5 = self:computeScore(output, sample.target, nCrops)
172 |       top1Sum = top1Sum + top1*batchSize
173 |       top5Sum = top5Sum + top5*batchSize
174 |       N = N + batchSize
175 | 
176 |       print(('%s | Test: [%d][%d/%d]    Time %.3f  Data %.3f  top1 %7.3f (%7.3f)  top5 %7.3f (%7.3f)'):format(
177 |          opt.name, epoch, n, size, timer:time().real, dataTime, top1, top1Sum / N, top5, top5Sum / N))
178 | 
179 |       timer:reset()
180 |       dataTimer:reset()
181 |    end
182 |    self.model:training()
183 | 
184 |    print((' * Finished epoch # %d     top1: %7.3f  top5: %7.3f\n'):format(
185 |       epoch, top1Sum / N, top5Sum / N))
186 | 
187 |    return top1Sum / N, top5Sum / N
188 | end
189 | 
190 | -- Torch port of THUMOSeventclspr in THUMOS'15
191 | local function mAP(conf, gt)
192 |     local so,sortind = torch.sort(conf, 1, true) --desc order
193 |     local tp = gt:index(1,sortind:view(-1)):eq(1):int()
194 |     local fp = gt:index(1,sortind:view(-1)):eq(0):int()
195 |     local npos = torch.sum(tp)
196 | 
197 |     fp = torch.cumsum(fp)
198 |     tp = torch.cumsum(tp)
199 |     local rec = tp:float()/npos
200 |     local prec = torch.cdiv(tp:float(),(fp+tp):float())
201 |     
202 |     local ap = 0
203 |     local tmp = gt:index(1,sortind:view(-1)):eq(1):view(-1)
204 |     for i=1,conf:size(1) do
205 |         if tmp[i]==1 then
206 |             ap = ap+prec[i]
207 |         end
208 |     end
209 |     ap = ap/npos
210 | 
211 |     return rec,prec,ap
212 | end
213 | 
214 | local function charades_ap(outputs, gt)
215 |    -- approximate version of the charades evaluation function
216 |    -- For precise numbers, use the submission file with the official matlab script
217 |    conf = outputs:clone()
218 |    conf[gt:sum(2):eq(0):expandAs(conf)] = -math.huge -- This is to match the official matlab evaluation code. This omits videos with no annotations 
219 |    ap = torch.Tensor(157,1)
220 |    for i=1,157 do
221 |        _,_,ap[{{i},{}}] = mAP(conf[{{},{i}}],gt[{{},{i}}])
222 |    end
223 |    return ap
224 | end
225 | 
226 | local function tensor2str(x)
227 |     str = ""
228 |     for i=1,x:size(1) do
229 |         if i == x:size(1) then
230 |             str = str .. x[i]
231 |         else
232 |             str = str .. x[i] .. " "
233 |         end
234 |     end
235 |     return str
236 | end
237 | 
238 | function Trainer:test2(opt, epoch, dataloader)
239 |    -- Computes the mAP over the whole videos
240 | 
241 |    local timer = torch.Timer()
242 |    local dataTimer = torch.Timer()
243 |    local size = dataloader:size()
244 | 
245 |    local nCrops = 1
246 |    local N = 0
247 |    local outputs = torch.Tensor(2000,157) --allocate memory
248 |    local gt = torch.Tensor(2000,157) --allocate memory
249 |    local names = {}
250 | 
251 |    local frameoutputs, framenr, framenames, nframe
252 |    if opt.dumpLocalize then
253 |        frameoutputs = torch.Tensor(25*2000,157)
254 |        framenames = {}
255 |        framenr = {}
256 |        nframe = 0
257 |    end
258 | 
259 |    self.model:evaluate()
260 |    n2 = 0
261 |    for n, sample in dataloader:run() do
262 |       n2 = n2 + 1
263 |       local dataTime = dataTimer:time().real
264 | 
265 |       -- Copy input and target to the GPU
266 |       self:copyInputs(sample)
267 | 
268 |       local output = self.model:forward(self.input):float()
269 |       local batchSize = 25
270 | 
271 |       for i=1,25-1 do -- make sure there is no error in the loader, this should be one video
272 |           assert(torch.all(torch.eq(
273 |               sample.target[{{i},{}}],
274 |               sample.target[{{i+1},{}}]
275 |           )))
276 |       end
277 | 
278 |       local tmp = output:exp()
279 |       tmp = tmp:cdiv(tmp:sum(2):expandAs(output))
280 |       outputs[{{n2},{}}] = tmp:mean(1)
281 |       gt[{{n2},{}}] = sample.target[{{1},{}}]
282 |       table.insert(names,sample.ids[1])
283 | 
284 |       if opt.dumpLocalize then
285 |           frameoutputs[{{nframe+1,nframe+25},{}}] = tmp
286 |           for b=1,25 do
287 |               framenames[nframe+b] = sample.ids[1]
288 |               framenr[nframe+b] = b
289 |           end
290 |           nframe = nframe+25
291 |       end
292 | 
293 |       print(('%s | Test2: [%d][%d/%d]    Time %.3f  Data %.3f'):format(
294 |          opt.name, epoch, n, size, timer:time().real, dataTime))
295 | 
296 |       timer:reset()
297 |       dataTimer:reset()
298 |    end
299 |    self.model:training()
300 |    outputs = outputs[{{1,n2},{}}] 
301 |    gt = gt[{{1,n2},{}}] 
302 |    ap = charades_ap(outputs, gt)
303 | 
304 |    print((' * Finished epoch # %d     mAP: %7.3f\n'):format(
305 |       epoch, torch.mean(ap)))
306 | 
307 |    print('dumping output to file')
308 |    local out = assert(io.open(self.opt.save .. "/epoch" .. epoch .. ".txt", "w"))
309 |    for i=1,outputs:size(1) do
310 |       out:write(names[i] .. " " .. tensor2str(outputs[{{i},{}}]:view(-1)) .. "\n")  
311 |    end
312 |    out:close()
313 | 
314 |    if opt.dumpLocalize then
315 |        print('dumping localization output to file')
316 |        frameoutputs = frameoutputs[{{1,nframe},{}}] 
317 |        local out = assert(io.open(self.opt.save .. "/localize" .. epoch .. ".txt", "w"))
318 |        for i=1,frameoutputs:size(1) do
319 |           f = framenr[i]
320 |           vidid = framenames[i]
321 |           out:write(vidid .. " " .. f .. " " .. tensor2str(frameoutputs[{{i},{}}]:view(-1)) .. "\n")  
322 |        end
323 |        out:close()
324 |    end
325 | 
326 |    return ap
327 | end
328 | 
329 | 
330 | function Trainer:computeScore(output, target, nCrops)
331 |    if nCrops > 1 then
332 |       -- Sum over crops
333 |       output = output:view(output:size(1) / nCrops, nCrops, output:size(2))
334 |          --:exp()
335 |          :sum(2):squeeze(2)
336 |    end
337 | 
338 |    -- Coputes the top1 and top5 error rate
339 |    local batchSize = output:size(1)
340 | 
341 |    local _ , predictions = output:float():sort(2, true) -- descending
342 | 
343 |    -- Find which predictions match the target
344 |    local correct = predictions:eq(
345 |       target:long():view(batchSize, 1):expandAs(output))
346 | 
347 |    -- Top-1 score
348 |    local top1 = 1.0 - (correct:narrow(2, 1, 1):sum() / batchSize)
349 | 
350 |    -- Top-5 score, if there are at least 5 classes
351 |    local len = math.min(5, correct:size(2))
352 |    local top5 = 1.0 - (correct:narrow(2, 1, len):sum() / batchSize)
353 | 
354 |    return top1 * 100, top5 * 100
355 | end
356 | 
357 | function Trainer:copyInputs(sample)
358 |    -- Copies the input to a CUDA tensor, if using 1 GPU, or to pinned memory,
359 |    -- if using DataParallelTable. The target is always copied to a CUDA tensor
360 |    self.input = self.input or (self.opt.nGPU == 1
361 |       and torch.CudaTensor()
362 |       or cutorch.createCudaHostTensor())
363 |    self.target = self.target or torch.CudaTensor()
364 | 
365 |    self.input:resize(sample.input:size()):copy(sample.input)
366 |    self.target:resize(sample.target:size()):copy(sample.target)
367 | end
368 | 
369 | function Trainer:learningRateModifier(epoch)
370 |    -- Training schedule
371 |    local decay = 0
372 |    if self.opt.dataset == 'charades' then
373 |       decay = math.floor((epoch - 1) / self.LR_decay_freq)
374 |    elseif self.opt.dataset == 'charadesflow' then
375 |       decay = math.floor((epoch - 1) / self.LR_decay_freq)
376 |    elseif self.opt.dataset == 'imagenet' then
377 |       decay = math.floor((epoch - 1) / 30)
378 |    elseif self.opt.dataset == 'cifar10' then
379 |       decay = epoch >= 122 and 2 or epoch >= 81 and 1 or 0
380 |    else
381 |       decay = math.floor((epoch - 1) / self.LR_decay_freq)
382 |    end
383 |    return math.pow(0.1, decay)
384 | end
385 | 
386 | 
387 | 
388 | return M.Trainer
389 | 


--------------------------------------------------------------------------------