├── .gitignore
├── .gitmodules
├── .theanorc.example
├── LICENSE
├── README.md
├── cocoeval.py
├── common.py
├── config.py
├── create_movies.py
├── data
    ├── README.md
    ├── __init__.py
    ├── create_dataset.py
    ├── create_msr_vtt.py
    ├── create_mvad_mpii_lsmdc.py
    ├── create_skip_vectors.py
    ├── create_tacos.py
    ├── create_trecvid.py
    ├── create_y2t.py
    ├── process_frames.py
    ├── process_pca.py
    ├── py3_process_features.py
    ├── subsect_videos.py
    ├── util.py
    └── validate_feats.py
├── data_engine.py
├── download.py
├── hyperband.py
├── metrics.py
├── model_attention.py
├── model_lstmdd.py
├── model_mtle.py
├── py2-vid-desc_requirements.txt
├── py2_pip_freeze.txt
├── py3-vid-desc_requirements.txt
├── py3_pip_freeze.txt
└── train_model.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | .idea/*
60 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "coco-caption"]
 2 | 	path = coco-caption
 3 | 	url = https://github.com/tylin/coco-caption.git
 4 | [submodule "jobman"]
 5 | 	path = jobman
 6 | 	url = https://github.com/crmne/jobman.git
 7 | [submodule "data/skip-thoughts"]
 8 | 	path = data/skip-thoughts
 9 | 	url = https://github.com/olivernina/skip-thoughts.git
10 | 


--------------------------------------------------------------------------------
/.theanorc.example:
--------------------------------------------------------------------------------
 1 | [global]
 2 | device = gpu0
 3 | floatX = float32
 4 | 
 5 | [dnn]
 6 | enabled = True
 7 | 
 8 | [nvcc]
 9 | flags = -D_FORCE_INLINES
10 | 
11 | [lib]
12 | cnmem = 10500
13 | # Use 11200 for training anything with an entire dataset
14 | #cnmem = 11200
15 | # Use 5500 for HYPERBAND
16 | #cnmem=5500
17 | 
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 OSU Photogammetric Computer Vision Lab
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # MTLE
  2 | 
  3 | This is the latest version of our code described in our [paper](https://arxiv.org/abs/1809.07257). An earlier version of our code was used at LSMDC17 where we won the movie description task. 
  4 | 
  5 | ## Dependencies
  6 | 
  7 | These are the general, high-level dependencies:
  8 | 
  9 | - CUDA-capable GPU(s)
 10 | - Large storage medium for dataset videos (for re-creating results)
 11 | - Python 2.7 + Python 3.5 (both required, more info below)
 12 | 
 13 | For Python 2.7:
 14 | 
 15 | - `Theano 0.8.1`
 16 |     - `cuDNN 5.4`
 17 |     - `CNMeM Memory backend` (optional)
 18 |     - Working `.theanorc` config file (provided, more info below)
 19 | 
 20 | For Python 3.5 and above:
 21 | 
 22 | - `PyTorch 0.4.0` (torch + torchvision)
 23 | - `pretrainedmodels` (Cadene repository)
 24 | 
 25 | In-depth Python package information is provided for each respective environment:
 26 | 
 27 | - `py2-vid-desc_requirements.txt`
 28 | - `py2_pip_freeze.txt`
 29 | - `py3-vid-desc_requirements.txt`
 30 | - `py3_pip_freeze.txt`
 31 | 
 32 | The rest of this guide assumes you are using Linux. 
 33 | 
 34 | We recommend the use of Anaconda to handle dependencies, as the files above can be used to easily re-create the necessary environments.
 35 | 
 36 | Theano uses a file called `.theanorc` to configure certain options. This file goes in your home directory on Linux. We have provided one that we use on a working test system, called `.theanorc.example`.  
 37 | 
 38 | #### Why two Python versions?
 39 | 
 40 | We use the `pretrainedmodels` package provided by GitHub user Cadene, due to its ease of use and better portability over Caffe.
 41 | However, this means having to use Python 3 for this specific step. Everything else uses Python 2.7. 
 42 | We thought this was a worthwhile hurdle to take advantage of PyTorch's ease of installation. 
 43 | 
 44 | 
 45 | ## Installation
 46 | 
 47 | We recommend Anaconda, available [here](https://www.anaconda.com/download/).
 48 | 
 49 | Clone the code as follows:
 50 | 
 51 | ` git clone https://github.com/OSUPCVLab/VideoToTextDNN.git --recursive`
 52 | 
 53 | Once Anaconda is installed, you must create two anaconda environments:
 54 | 
 55 | The general-purpose one:
 56 | 
 57 | `conda create --name vid-desc python=2.7 --file py2-vid-desc_requirements.txt`
 58 | 
 59 | and one for feature extraction:
 60 | 
 61 | `conda create --name vid-desc-feats python=3.6 --file py3-vid-desc_requirements.txt`
 62 | 
 63 | Use `conda activate <env-name>` to switch between environments. 
 64 | 
 65 | Clone the repo recursively in order to clone other submodules that are required for the project
 66 | 
 67 | `git clone https://github.com/OSUPCVLab/VideoToTextDNN.git --recursive`
 68 | 
 69 | Install the required packages for the project
 70 | 
 71 | `pip install -r py2_pip_freeze.txt`
 72 | 
 73 | You might see some complaints about the following packages so you will need them to install them manually:
 74 | 
 75 | `conda install -c conda-forge pyro4`
 76 | 
 77 | For client installation, the following modules are also needed:
 78 | 
 79 | `python -m pip install --upgrade mss`
 80 | 
 81 | `conda install -c https://conda.anaconda.org/menpo opencv3`
 82 | 
 83 | `pip install pyttsx3`
 84 | 
 85 | `pip install pretrainedmodels`
 86 | 
 87 | 
 88 | 
 89 | ## Data
 90 | 
 91 | The data pipeline is handled under the `data/` directory. The `README.md` file there describes how to download the necessary datasets and process them for consumption in detail.
 92 | 
 93 |  
 94 | 
 95 | ## Tutorial
 96 | 
 97 | Since the data collection process can take from minutes to weeks, depending on your available hardware, we have split the tutorial into two paths.
 98 | 
 99 | Visit the file `data/README.md` and follow the path most interesting to you to prepare the data. Once you have your data files ready, come back to this file to perform training or prediction on the path relevant to you.
100 | 
101 | The rest of this tutorial assumes you have either 1) extracted feature files, or 2) created `.pkl` files, as described in `data/README.md`. If there are any problems, feel free to file an issue on this repo, as this release is still a work in progress. 
102 | 
103 | 
104 | With your extracted features ready, you will need a pre-trained model. We have provided two checkpoint files, one trained on the 10k video MSVD Youtube-based dataset, and another on the 120k video LSMDC16 Movie dataset. 
105 | 
106 | MSVD and LSMDC Checkpoint: [download](https://uflorida-my.sharepoint.com/:f:/g/personal/w_garcia_ufl_edu/Ev7InIZkYc5Pn91wlU3oK1gB_NQ6BAArSll4iFELl8Hj2w?e=vad0K7)
107 | 
108 | 
109 | ## Demo
110 | For a live demo, we make use of a server running our system and a client extracting and submiting cnn features to the server.
111 | 
112 | To start the server just run the script: 
113 | 
114 |  `python live_mtle_server.py <path_to_checkpoint>`
115 |  
116 |  The server then will output a temp_uri string that you need to use on the client to point where you want to send the input to.
117 |  
118 | To run the client just execute the following script with the mode you want to run the client. There are three types: live (screen capture), prompt (you pass the path of the video), headless (you pass a list of videos to process)
119 | 
120 | `python live_mtle_client.py <tem_uri> --mode <run_mode>`
121 | 
122 | 
123 | ## Acknowledgements
124 | Big thanks to Li Yao and his original project [arctic-capgen-vid](https://github.com/yaoli/arctic-capgen-vid), which this project derives from. 
125 | 
126 | This work was sponsored by the SMART DOD program.
127 | 
128 | We apologize for the delay in releasing the code. The main author encountered some difficulties and life events leading up to the public release of the paper which made it difficult to relase the paper and code sooner. 
129 | 


--------------------------------------------------------------------------------
/cocoeval.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(1,'coco-caption')
  3 | 
  4 | from pycocoevalcap.bleu.bleu import Bleu
  5 | from pycocoevalcap.rouge.rouge import Rouge
  6 | from pycocoevalcap.cider.cider import Cider
  7 | from pycocoevalcap.meteor.meteor import Meteor
  8 | from pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer
  9 | import os, cPickle
 10 | 
 11 | class COCOScorer(object):
 12 |     def __init__(self):
 13 |         print 'init COCO-EVAL scorer'
 14 |             
 15 |     def score(self, GT, RES, IDs):
 16 |         self.eval = {}
 17 |         self.imgToEval = {}
 18 |         gts = {}
 19 |         res = {}
 20 |         for ID in IDs:
 21 |             gts[ID] = GT[ID]
 22 |             res[ID] = RES[ID]
 23 |         print 'tokenization...'
 24 |         tokenizer = PTBTokenizer()
 25 |         gts  = tokenizer.tokenize(gts)
 26 |         res = tokenizer.tokenize(res)
 27 | 
 28 |         # =================================================
 29 |         # Set up scorers
 30 |         # =================================================
 31 |         print 'setting up scorers...'
 32 |         scorers = [
 33 |             (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
 34 |             (Meteor(),"METEOR"),
 35 |             (Rouge(), "ROUGE_L"),
 36 |             (Cider(), "CIDEr")
 37 |         ]
 38 | 
 39 |         # =================================================
 40 |         # Compute scores
 41 |         # =================================================
 42 |         eval = {}
 43 |         for scorer, method in scorers:
 44 |             print 'computing %s score...'%(scorer.method())
 45 |             score, scores = scorer.compute_score(gts, res)
 46 |             if type(method) == list:
 47 |                 for sc, scs, m in zip(score, scores, method):
 48 |                     self.setEval(sc, m)
 49 |                     self.setImgToEvalImgs(scs, IDs, m)
 50 |                     print "%s: %0.3f"%(m, sc)
 51 |             else:
 52 |                 self.setEval(score, method)
 53 |                 self.setImgToEvalImgs(scores, IDs, method)
 54 |                 print "%s: %0.3f"%(method, score)
 55 |                 
 56 |         for metric, score in self.eval.items():
 57 |             print '%s: %.5f'%(metric, score)
 58 |         return self.eval
 59 |     
 60 |     def setEval(self, score, method):
 61 |         self.eval[method] = score
 62 | 
 63 |     def setImgToEvalImgs(self, scores, imgIds, method):
 64 |         for imgId, score in zip(imgIds, scores):
 65 |             if not imgId in self.imgToEval:
 66 |                 self.imgToEval[imgId] = {}
 67 |                 self.imgToEval[imgId]["image_id"] = imgId
 68 |             self.imgToEval[imgId][method] = score
 69 | 
 70 |             
 71 | def load_pkl(path):    
 72 |     f = open(path, 'rb')
 73 |     try:
 74 |         rval = cPickle.load(f)
 75 |     finally:
 76 |         f.close()
 77 |     return rval
 78 | 
 79 | def score(ref, sample):
 80 |     # ref and sample are both dict
 81 |     scorers = [
 82 |         (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
 83 |         (Meteor(),"METEOR"),
 84 |         (Rouge(), "ROUGE_L"),
 85 |         (Cider(), "CIDEr")
 86 |     ]
 87 |     final_scores = {}
 88 |     for scorer, method in scorers:
 89 |         print 'computing %s score with COCO-EVAL...'%(scorer.method())
 90 |         score, scores = scorer.compute_score(ref, sample)
 91 |         if type(score) == list:
 92 |             for m, s in zip(method, score):
 93 |                 final_scores[m] = s
 94 |         else:
 95 |             final_scores[method] = score
 96 |     return final_scores
 97 | 
 98 | def test_cocoscorer():
 99 |     '''gts = {
100 |         184321:[
101 |         {u'image_id': 184321, u'id': 352188, u'caption': u'A train traveling down-tracks next to lights.'},
102 |         {u'image_id': 184321, u'id': 356043, u'caption': u"A blue and silver train next to train's station and trees."},
103 |         {u'image_id': 184321, u'id': 356382, u'caption': u'A blue train is next to a sidewalk on the rails.'},
104 |         {u'image_id': 184321, u'id': 361110, u'caption': u'A passenger train pulls into a train station.'},
105 |         {u'image_id': 184321, u'id': 362544, u'caption': u'A train coming down the tracks arriving at a station.'}],
106 |         81922: [
107 |         {u'image_id': 81922, u'id': 86779, u'caption': u'A large jetliner flying over a traffic filled street.'},
108 |         {u'image_id': 81922, u'id': 90172, u'caption': u'An airplane flies low in the sky over a city street. '},
109 |         {u'image_id': 81922, u'id': 91615, u'caption': u'An airplane flies over a street with many cars.'},
110 |         {u'image_id': 81922, u'id': 92689, u'caption': u'An airplane comes in to land over a road full of cars'},
111 |         {u'image_id': 81922, u'id': 823814, u'caption': u'The plane is flying over top of the cars'}]
112 |         }
113 |         
114 |     samples = {
115 |         184321: [{u'image_id': 184321, 'id': 111, u'caption': u'train traveling down a track in front of a road'}],
116 |         81922: [{u'image_id': 81922, 'id': 219, u'caption': u'plane is flying through the sky'}],
117 |         }
118 |     '''
119 |     gts = {
120 |         '184321':[
121 |         {u'image_id': '184321', u'cap_id': 0, u'caption': u'A train traveling down tracks next to lights.',
122 |          'tokenized': 'a train traveling down tracks next to lights'},
123 |         {u'image_id': '184321', u'cap_id': 1, u'caption': u'A train coming down the tracks arriving at a station.',
124 |          'tokenized': 'a train coming down the tracks arriving at a station'}],
125 |         '81922': [
126 |         {u'image_id': '81922', u'cap_id': 0, u'caption': u'A large jetliner flying over a traffic filled street.',
127 |          'tokenized': 'a large jetliner flying over a traffic filled street'},
128 |         {u'image_id': '81922', u'cap_id': 1, u'caption': u'The plane is flying over top of the cars',
129 |          'tokenized': 'the plan is flying over top of the cars'},]
130 |         }
131 |         
132 |     samples = {
133 |         '184321': [{u'image_id': '184321', u'caption': u'train traveling down a track in front of a road'}],
134 |         '81922': [{u'image_id': '81922', u'caption': u'plane is flying through the sky'}],
135 |         }
136 |     IDs = ['184321', '81922']
137 |     scorer = COCOScorer()
138 |     scorer.score(gts, samples, IDs)
139 |     
140 | if __name__ == '__main__':
141 |     test_cocoscorer()
142 | 


--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
  1 | import cPickle, os
  2 |     
  3 | import numpy
  4 | from collections import OrderedDict
  5 | import theano
  6 | import theano.tensor as tensor
  7 | from theano.sandbox.rng_mrg import MRG_RandomStreams
  8 | from theano import config
  9 | 
 10 | # the dir where there should be a subdir named 'youtube2text_iccv15'
 11 | 
 12 | RAB_DATASET_BASE_PATH = './data/'
 13 | # RAB_DATASET_BASE_PATH = '/media/onina/sea2/datasets/lsmdc/out_pkl/'
 14 | 
 15 | 
 16 | # the dir where all the experiment data is dumped.
 17 | RAB_EXP_PATH = 'results/'
 18 | 
 19 | 
 20 | def numpy_floatX(data):
 21 |     return numpy.asarray(data, dtype=config.floatX)
 22 | 
 23 | def get_two_rngs(seed=None):
 24 |     if seed is None:
 25 |         seed = 1234
 26 |     else:
 27 |         seed = seed
 28 |     rng_numpy = numpy.random.RandomState(seed)
 29 |     rng_theano = MRG_RandomStreams(seed)
 30 |     return rng_numpy, rng_theano
 31 | 
 32 | rng_numpy, rng_theano = get_two_rngs()
 33 | 
 34 | def concatenate(tensor_list, axis=0):
 35 |     """
 36 |     Alternative implementation of `theano.tensor.concatenate`.
 37 |     This function does exactly the same thing, but contrary to Theano's own
 38 |     implementation, the gradient is implemented on the GPU.
 39 |     Backpropagating through `theano.tensor.concatenate` yields slowdowns
 40 |     because the inverse operation (splitting) needs to be done on the CPU.
 41 |     This implementation does not have that problem.
 42 |     :usage:
 43 |         >>> x, y = theano.tensor.matrices('x', 'y')
 44 |         >>> c = concatenate([x, y], axis=1)
 45 |     :parameters:
 46 |         - tensor_list : list
 47 |             list of Theano tensor expressions that should be concatenated.
 48 |         - axis : int
 49 |             the tensors will be joined along this axis.
 50 |     :returns:
 51 |         - out : tensor
 52 |             the concatenated tensor expression.
 53 |     """
 54 |     concat_size = sum(tt.shape[axis] for tt in tensor_list)
 55 | 
 56 |     output_shape = ()
 57 |     for k in range(axis):
 58 |         output_shape += (tensor_list[0].shape[k],)
 59 |     output_shape += (concat_size,)
 60 |     for k in range(axis + 1, tensor_list[0].ndim):
 61 |         output_shape += (tensor_list[0].shape[k],)
 62 | 
 63 |     out = tensor.zeros(output_shape)
 64 |     offset = 0
 65 |     for tt in tensor_list:
 66 |         indices = ()
 67 |         for k in range(axis):
 68 |             indices += (slice(None),)
 69 |         indices += (slice(offset, offset + tt.shape[axis]),)
 70 |         for k in range(axis + 1, tensor_list[0].ndim):
 71 |             indices += (slice(None),)
 72 | 
 73 |         out = tensor.set_subtensor(out[indices], tt)
 74 |         offset += tt.shape[axis]
 75 | 
 76 |     return out
 77 | '''
 78 | Theano shared variables require GPUs, so to
 79 | make this code more portable, these two functions
 80 | push and pull variables between a shared
 81 | variable dictionary and a regular numpy 
 82 | dictionary
 83 | '''
 84 | # push parameters to Theano shared variables
 85 | def zipp(params, tparams):
 86 |     for kk, vv in params.iteritems():
 87 |         tparams[kk].set_value(vv)
 88 | 
 89 | # pull parameters from Theano shared variables
 90 | def unzip(zipped):
 91 |     new_params = OrderedDict()
 92 |     for kk, vv in zipped.iteritems():
 93 |         new_params[kk] = vv.get_value()
 94 |     return new_params
 95 | 
 96 | # get the list of parameters: Note that tparams must be OrderedDict
 97 | def itemlist(tparams):
 98 |     return [vv for kk, vv in tparams.iteritems()]
 99 | 
100 | # dropout
101 | def dropout_layer(state_before, use_noise, trng):
102 |     proj = tensor.switch(use_noise, 
103 |                          state_before * 
104 |                          trng.binomial(state_before.shape, p=0.5, n=1, dtype=state_before.dtype),
105 |                          state_before * 0.5)
106 |     return proj
107 | 
108 | 
109 | # initialize Theano shared variables according to the initial parameters
110 | def init_tparams(params):
111 |     tparams = OrderedDict()
112 |     for kk, pp in params.iteritems():
113 |         tparams[kk] = theano.shared(params[kk], name=kk)
114 |     return tparams
115 | 
116 | # some utilities
117 | def ortho_weight(ndim):
118 |     """
119 |     Random orthogonal weights, we take
120 |     the right matrix in the SVD.
121 | 
122 |     Remember in SVD, u has the same # rows as W
123 |     and v has the same # of cols as W. So we
124 |     are ensuring that the rows are 
125 |     orthogonal. 
126 |     """
127 |     W = rng_numpy.randn(ndim, ndim)
128 |     u, _, _ = numpy.linalg.svd(W)
129 |     return u.astype('float32')
130 | 
131 | def norm_weight(nin,nout=None, scale=0.01, ortho=True):
132 |     """
133 |     Random weights drawn from a Gaussian
134 |     """
135 |     if nout == None:
136 |         nout = nin
137 |     if nout == nin and ortho:
138 |         W = ortho_weight(nin)
139 |     else:
140 |         W = scale * rng_numpy.randn(nin, nout)
141 |     return W.astype('float32')
142 | 
143 | def tanh(x):
144 |     return tensor.tanh(x)
145 | 
146 | def rectifier(x):
147 |     return tensor.maximum(0., x)
148 | 
149 | def linear(x):
150 |     return x
151 | 
152 | # load parameters
153 | def load_params(path, params):
154 |     pp = numpy.load(path)
155 |     for kk, vv in params.iteritems():
156 |         if kk not in pp:
157 |             raise Warning('%s is not in the archive'%kk)
158 |         params[kk] = pp[kk]
159 |     return params
160 | 
161 | def grad_nan_report(grads, tparams):
162 |     numpy.set_printoptions(precision=3)
163 |     D = OrderedDict()
164 |     i = 0
165 |     NaN_keys = []
166 |     magnitude = []
167 |     assert len(grads) == len(tparams)
168 |     for k, v in tparams.iteritems():
169 |         grad = grads[i]
170 |         magnitude.append(numpy.abs(grad).mean())
171 |         if numpy.isnan(grad.sum()):
172 |             NaN_keys.append(k)
173 |         #assert v.get_value().shape == grad.shape
174 |         D[k] = grad
175 |         i += 1
176 |     #norm = [numpy.sqrt(numpy.sum(grad**2)) for grad in grads]
177 |     #print '\tgrad mean(abs(x))', numpy.array(magnitude)
178 |     return D, NaN_keys
179 | 
180 | # optimizers
181 | # name(hyperp, tparams, grads, inputs (list), cost) = f_grad_shared, f_update
182 | def adadelta(lr, tparams, grads, inp, cost, extra):
183 |     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
184 |     running_up2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rup2'%k) for k, p in tparams.iteritems()]
185 |     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]
186 | 
187 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
188 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
189 | 
190 |     f_grad_shared = theano.function(inp, [cost] + extra, updates=zgup+rg2up,
191 |                                     profile=False, on_unused_input='ignore',allow_input_downcast=True)
192 |     
193 |     updir = [-tensor.sqrt(ru2 + 1e-6) / tensor.sqrt(rg2 + 1e-6) * zg for zg, ru2, rg2 in zip(zipped_grads, running_up2, running_grads2)]
194 |     ru2up = [(ru2, 0.95 * ru2 + 0.05 * (ud ** 2)) for ru2, ud in zip(running_up2, updir)]
195 |     param_up = [(p, p + ud) for p, ud in zip(itemlist(tparams), updir)]
196 | 
197 |     f_update = theano.function([lr], [], updates=ru2up+param_up, on_unused_input='ignore', profile=False)
198 | 
199 |     return f_grad_shared, f_update
200 | 
201 | def adam(lr, tparams, grads, inp, cost):
202 |     gshared = [theano.shared(p.get_value() * 0., name='%s_grad'%k) for k, p in tparams.iteritems()]
203 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
204 | 
205 |     f_grad_shared = theano.function(inp, cost, updates=gsup)
206 | 
207 |     lr0 = 0.0002
208 |     b1 = 0.1
209 |     b2 = 0.001
210 |     e = 1e-8
211 | 
212 |     updates = []
213 | 
214 |     i = theano.shared(numpy_floatX(0.))
215 |     i_t = i + 1.
216 |     fix1 = 1. - b1**(i_t)
217 |     fix2 = 1. - b2**(i_t)
218 |     lr_t = lr0 * (tensor.sqrt(fix2) / fix1)
219 | 
220 |     for p, g in zip(tparams.values(), gshared):
221 |         m = theano.shared(p.get_value() * 0.)
222 |         v = theano.shared(p.get_value() * 0.)
223 |         m_t = (b1 * g) + ((1. - b1) * m)
224 |         v_t = (b2 * tensor.sqr(g)) + ((1. - b2) * v)
225 |         g_t = m_t / (tensor.sqrt(v_t) + e)
226 |         p_t = p - (lr_t * g_t)
227 |         updates.append((m, m_t))
228 |         updates.append((v, v_t))
229 |         updates.append((p, p_t))
230 |     updates.append((i, i_t))
231 | 
232 |     f_update = theano.function([lr], [], updates=updates, on_unused_input='ignore')
233 | 
234 |     return f_grad_shared, f_update
235 | 
236 | def rmsprop(lr, tparams, grads, inp, cost):
237 |     zipped_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
238 |     running_grads = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad'%k) for k, p in tparams.iteritems()]
239 |     running_grads2 = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_rgrad2'%k) for k, p in tparams.iteritems()]
240 | 
241 |     zgup = [(zg, g) for zg, g in zip(zipped_grads, grads)]
242 |     rgup = [(rg, 0.95 * rg + 0.05 * g) for rg, g in zip(running_grads, grads)]
243 |     rg2up = [(rg2, 0.95 * rg2 + 0.05 * (g ** 2)) for rg2, g in zip(running_grads2, grads)]
244 | 
245 |     f_grad_shared = theano.function(inp, cost, updates=zgup+rgup+rg2up, profile=False)
246 | 
247 |     updir = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_updir'%k) for k, p in tparams.iteritems()]
248 |     updir_new = [(ud, 0.9 * ud - 1e-4 * zg / tensor.sqrt(rg2 - rg ** 2 + 1e-4)) for ud, zg, rg, rg2 in zip(updir, zipped_grads, running_grads, running_grads2)]
249 |     param_up = [(p, p + udn[1]) for p, udn in zip(itemlist(tparams), updir_new)]
250 |     f_update = theano.function([lr], [], updates=updir_new+param_up, on_unused_input='ignore', profile=False)
251 | 
252 |     return f_grad_shared, f_update
253 | 
254 | def sgd(lr, tparams, grads, inp, cost):
255 |     gshared = [theano.shared(p.get_value() * numpy_floatX(0.), name='%s_grad'%k) for k, p in tparams.iteritems()]
256 |     gsup = [(gs, g) for gs, g in zip(gshared, grads)]
257 | 
258 |     f_grad_shared = theano.function(inp, cost, updates=gsup, profile=False)
259 | 
260 |     pup = [(p, p - lr * g) for p, g in zip(itemlist(tparams), gshared)]
261 |     f_update = theano.function([lr], [], updates=pup, profile=False)
262 | 
263 |     return f_grad_shared, f_update
264 | 
265 | def load_pkl(path):
266 |     """
267 |     Load a pickled file.
268 | 
269 |     :param path: Path to the pickled file.
270 | 
271 |     :return: The unpickled Python object.
272 |     """
273 |     f = open(path, 'rb')
274 |     try:
275 |         rval = cPickle.load(f)
276 |     finally:
277 |         f.close()
278 |     return rval
279 | 
280 | def dump_pkl(obj, path):
281 |     """
282 |     Save a Python object into a pickle file.
283 |     """
284 |     f = open(path, 'wb')
285 |     try:
286 |         cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
287 |     finally:
288 |         f.close()
289 | 
290 | 
291 | def generate_minibatch_idx(dataset_size, minibatch_size):
292 |     # generate idx for minibatches SGD
293 |     # output [m1, m2, m3, ..., mk] where mk is a list of indices
294 |     assert dataset_size >= minibatch_size
295 |     n_minibatches = dataset_size / minibatch_size
296 |     leftover = dataset_size % minibatch_size
297 |     idx = range(dataset_size)
298 |     if leftover == 0:
299 |         minibatch_idx = numpy.split(numpy.asarray(idx), n_minibatches)
300 |     else:
301 |         print 'uneven minibath chunking, overall %d, last one %d'%(minibatch_size, leftover)
302 |         minibatch_idx = numpy.split(numpy.asarray(idx)[:-leftover], n_minibatches)
303 |         minibatch_idx = minibatch_idx + [numpy.asarray(idx[-leftover:])]
304 |     minibatch_idx = [idx_.tolist() for idx_ in minibatch_idx]
305 |     return minibatch_idx
306 | 
307 | def get_rab_dataset_base_path():
308 |     return RAB_DATASET_BASE_PATH
309 | 
310 | def get_rab_exp_path():
311 |     return RAB_EXP_PATH
312 | 
313 | def create_dir_if_not_exist(directory):
314 |     if not os.path.exists(directory):
315 |         print 'creating directory %s'%directory
316 |         os.makedirs(directory)
317 |     else:
318 |         print "%s already exists!"%directory
319 | 
320 | def flatten_list_of_list(l):
321 |     # l is a list of list
322 |     return [item for sublist in l for item in sublist]
323 | 
324 | def load_txt_file(path):
325 |     f = open(path,'r')
326 |     lines = f.readlines()
327 |     f.close()
328 |     return lines
329 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | from jobman import DD
  2 | import common
  3 | 
  4 | exp_path = common.get_rab_exp_path()
  5 | 
  6 | config = DD({
  7 |     'model': 'attention',
  8 |     'random_seed': 1234,
  9 |     # ERASE everything under save_model_path
 10 |     'erase_history': True,
 11 |     'attention': DD({
 12 |         'reload_': False,
 13 |         'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
 14 |         'from_dir': '',
 15 |         'dataset': 'youtube2text',#'youtube2text',#'lsmdc',mvad. 'ysvd'
 16 |         'video_feature': 'googlenet',
 17 |         'dim_word':468, # 474
 18 |         'ctx_dim':-1,# auto set 
 19 |         'dim':3518, # lstm dim # 536
 20 |         'n_layers_out':1, # for predicting next word        
 21 |         'n_layers_init':0, 
 22 |         'encoder_dim': 300,
 23 |         'prev2out':True, 
 24 |         'ctx2out':True, 
 25 |         'patience':20,
 26 |         'max_epochs':500, 
 27 |         'decay_c':1e-4,
 28 |         'alpha_entropy_r': 0.,
 29 |         'alpha_c':0.70602,
 30 |         'lrate':0.01,
 31 |         'selector':True,
 32 |         'n_words':20000, 
 33 |         'maxlen':30, # max length of the descprition
 34 |         'optimizer':'adadelta',
 35 |         'clip_c': 10.,
 36 |         'batch_size': 64, # for trees use 25
 37 |         # 'batch_size': 2, # for trees use 25
 38 |         'valid_batch_size':200,
 39 |         # 'valid_batch_size':2,
 40 |         # in the unit of minibatches
 41 |         'dispFreq':200,
 42 |         'validFreq':2000,
 43 |         'saveFreq':-1, # this is disabled, now use sampleFreq instead
 44 |         'sampleFreq':100,
 45 |         # blue, meteor, or both
 46 |         'metric': 'everything', # set to perplexity on DVS
 47 |         'use_dropout':True,
 48 |         'K':28, # 26 when compare
 49 |         'OutOf':None, # used to be 240, for motionfeature use 26
 50 |         'verbose': True,
 51 |         'debug': False,
 52 |         'dec':'standard',
 53 |         'encoder':None,
 54 |         'mode':'train',
 55 |         'proc':'nostd',
 56 |         'data_dir':'',
 57 |         'feats_dir':''
 58 |         }),
 59 |     'iLSTM': DD({
 60 |         'reload_': False,
 61 |         'save_model_dir': exp_path + 'attention_mod/',
 62 |         'dec':'standard',
 63 |         'valid_batch_size':200,
 64 |         'dataset': 'youtube2text',
 65 |         'encoder': None,
 66 |         'max_epochs':500,
 67 |         'from_dir': '',
 68 |         }),
 69 |     'attention_mod': DD({
 70 |         'reload_': False,
 71 |         'save_model_dir': exp_path + 'attention_mod/',
 72 |         'dec':'multi-stdist'
 73 |         }),
 74 |     'mtle': DD({
 75 |         'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
 76 |         'reload_': False,
 77 |         'from_dir': '',
 78 |         'dec':'multi-stdist',
 79 |         'dim_word':468, # 474
 80 |         'encoder':None,
 81 |         'encoder_dim': 300,
 82 |         'batch_size': 64, #64 for trees use 25
 83 |         'valid_batch_size':200,
 84 |         'dataset': 'vtt',
 85 |         'dim':3518, # lstm dim # 536
 86 |         'video_feature': 'googlenet',
 87 |         'validFreq': 2000,
 88 |         'max_epochs': 500,
 89 |         'mode':'train',
 90 |         'proc':'nostd',
 91 |         'K':28, # 26 when compare
 92 |         'lrate':0.0001,
 93 |         'data_dir':'',
 94 |         'dispFreq':10,
 95 |         'feats_dir':'',
 96 |         'cost_type':'v1'
 97 |         }),
 98 |     'fcoupled': DD({
 99 |         'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
100 |         'reload_': False,
101 |         'dec':'multi-random',
102 |         'encoder':None,
103 |         'encoder_dim': 300,
104 |         'batch_size': 64, # for trees use 25
105 |         'dataset': 'youtube2text',
106 |         'dim':3518, # lstm dim # 536
107 |         'from_dir': '',
108 |         'valid_batch_size':200,
109 |         'max_epochs':500,
110 |         'video_feature': 'googlenet',
111 |         }),
112 |     'const': DD({
113 |         'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
114 |         'reload_': False,
115 |         'dec':'multi-random',
116 |         'encoder':None,
117 |         'encoder_dim': 300,
118 |         'batch_size': 64, # for trees use 25
119 |         'dataset': 'youtube2text',
120 |         'dim':3518, # lstm dim # 536
121 |         'from_dir': '',
122 |         }),
123 |     'const2': DD({
124 |         'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
125 |         'reload_': False,
126 |         'dec':'multi-random',
127 |         'encoder':None,
128 |         'encoder_dim': 300,
129 |         'batch_size': 64, # for trees use 25
130 |         'dataset': 'youtube2text'
131 |         }),
132 |     'LSTM': DD({
133 |         'reload_': False,
134 |         'save_model_dir': exp_path + 'attention_mod/',
135 |         'dec':'standard',
136 |         'valid_batch_size':200,
137 |         'dataset': 'youtube2text',
138 |         'encoder': 'lstm_uni',
139 |         'max_epochs':500,
140 |         'from_dir': '',
141 |         }),
142 |     'lstmdd': DD({
143 |         'save_model_dir': exp_path + 'arctic-capgen-vid/test_non/',
144 |         'reload_': False,
145 |         'from_dir': '',
146 |         'dec':'multi-stdi',
147 |         'dim_word':468, # 474
148 |         'encoder':None,
149 |         'encoder_dim': 300,
150 |         'batch_size': 64, #64 for trees use 25
151 |         'valid_batch_size':200,
152 |         'dataset': 'vtt',
153 |         'dim':3518, # lstm dim # 536
154 |         'video_feature': 'googlenet',
155 |         'validFreq': 2000,
156 |         'max_epochs': 500,
157 |         'mode':'train',
158 |         'proc':'nostd',
159 |         'K':28, # 26 when compare
160 |         'lrate':0.0001,
161 |         'data_dir':'',
162 |         'dispFreq':10,
163 |         'feats_dir':'',
164 |         'cost_type':'v1'
165 | 
166 |     }),
167 |     'gru': DD({
168 |         'reload_': False,
169 |         'save_model_dir': exp_path + 'gru_model2/',
170 |         'from_dir': '',
171 |         'dataset': 'youtube2text',#'youtube2text',#'lsmdc',mvad. 'ysvd'
172 |         'video_feature': 'googlenet',
173 |         'dim_word':468, # 474
174 |         'ctx_dim':-1,# auto set
175 |         'dim':3518, # lstm dim # 536
176 |         'n_layers_out':1, # for predicting next word
177 |         'n_layers_init':0,
178 |         'encoder_dim': 300,
179 |         'prev2out':True,
180 |         'ctx2out':True,
181 |         'patience':20,
182 |         'max_epochs':500,
183 |         'decay_c':1e-4,
184 |         'alpha_entropy_r': 0.,
185 |         'alpha_c':0.70602,
186 |         'lrate':0.01,
187 |         'selector':True,
188 |         'n_words':20000,
189 |         'maxlen':30, # max length of the descprition
190 |         'optimizer':'adadelta',
191 |         'clip_c': 10.,
192 |         'batch_size': 64, # for trees use 25
193 |         # 'batch_size': 2, # for trees use 25
194 |         'valid_batch_size':200,
195 |         # 'valid_batch_size':2,
196 |         # in the unit of minibatches
197 |         'dispFreq':10,
198 |         'validFreq':2000,
199 |         'saveFreq':-1, # this is disabled, now use sampleFreq instead
200 |         'sampleFreq':100,
201 |         # blue, meteor, or both
202 |         'metric': 'everything', # set to perplexity on DVS
203 |         'use_dropout':True,
204 |         'K':28, # 26 when compare
205 |         'OutOf':None, # used to be 240, for motionfeature use 26
206 |         'verbose': True,
207 |         'debug': False,
208 |         'dec':'standard',
209 |         'encoder':None,
210 |         'mode':'train',
211 |         'proc':'nostd'
212 |         }),
213 |     'fc': DD({
214 |         'reload_': False,
215 |         'save_model_dir': exp_path + 'attention_mod/',
216 |         'dec':'standard',
217 |         'dataset': 'youtube2text',
218 |         'encoder': None,
219 |         'from_dir': '',
220 |         }),
221 |     'ic': DD({
222 |         'reload_': False,
223 |         'save_model_dir': exp_path + 'attention_mod/',
224 |         'dec':'standard',
225 |         'dataset': 'youtube2text',
226 |         'encoder': None,
227 |         'from_dir': '',
228 |         }),
229 |     'const_w': DD({
230 |         'save_model_dir': exp_path + 'const_w/',
231 |         'reload_': False,
232 |         'dec':'multi-stdist',
233 |         'encoder':None,
234 |         'encoder_dim': 300,
235 |         'batch_size': 64, # for trees use 25
236 |         'dataset': 'youtube2text',
237 |         'video_feature': 'googlenet',
238 |         }),
239 | 
240 | 
241 | })
242 | 


--------------------------------------------------------------------------------
/create_movies.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'onina'
  2 | 
  3 | import argparse
  4 | import json
  5 | import os
  6 | import pickle
  7 | import numpy as np
  8 | 
  9 | # import process_frames
 10 | from PIL import Image
 11 | from PIL import ImageFont
 12 | from PIL import ImageDraw
 13 | from multiprocessing import Pool
 14 | # import subprocess
 15 | 
 16 | from data.util import mkdirs_safe
 17 | 
 18 | 
 19 | def resizeImage(new_frame_path):
 20 |     print 'resizeImage: {}'.format(new_frame_path)
 21 |     command = 'magick {} -resize 1000x562\\! {}'.format(new_frame_path, new_frame_path)
 22 |     os.system(command)
 23 | 
 24 | 
 25 | def drawOverlay(image, text):
 26 | 
 27 |     print 'drawOverlay: ' + image + ' text: ' + text
 28 |     img = Image.open(image)
 29 |     draw = ImageDraw.Draw(img)
 30 |     text_length = len(text)*10
 31 | 
 32 |     w = 1000
 33 |     y = 440
 34 | 
 35 |     font_size = 16
 36 |     if text_length >= w:
 37 |         font_size = 12
 38 |         text_length = len(text) * 7.2
 39 | 
 40 |     x = int((w - text_length) / 2)
 41 |     draw.rectangle((x,y, x+text_length,y+16*1.5), fill=(0,0,0))
 42 | 
 43 |     font = ImageFont.truetype("DejaVuSans-Bold.ttf", font_size)
 44 |     draw.text((x, y),' '+text+' ',(0,255,0),font=font)
 45 |     img.save(image)
 46 | 
 47 | 
 48 | def createVideo(inputdir, output_file, framerate):
 49 |     print 'createVideo: {} -> {}'.format(inputdir, output_file)
 50 |     command = 'magick -delay 7x100 -loop 0 ' + inputdir + '/*.jpg -layers OptimizePlus ' + output_file
 51 |     print 'command: {}'.format(command)
 52 |     os.system(command)
 53 |     print 'Finished job for {}.'.format(output_file)
 54 | 
 55 | 
 56 | def main(args):
 57 |     # print params
 58 |     with open(args.rfile) as file:
 59 |         data = json.load(file)
 60 | 
 61 |     needed_files = os.listdir(args.vidpath)
 62 | 
 63 |     id_to_file_dict = {}
 64 | 
 65 |     if args.dataset != 'other':
 66 |         real_to_id_dict = pickle.load(open(os.path.join(args.dict_path)))
 67 | 
 68 |     # Hack to make lsmdc look like the other json files.
 69 |     if args.dataset == 'lsmdc16':
 70 |         data = {'result': data}
 71 |         print("Cut to {} videos".format(args.test))
 72 |         print("Converted LSMDC16 to usable format")
 73 | 
 74 |     if args.test:
 75 |         data = {'result': np.random.choice(data['result'], args.test)}
 76 | 
 77 |     for i, desc in enumerate(data['result']):
 78 | 
 79 |         id = desc['video_id']
 80 |         found_file = False
 81 |         print("Attempt {}".format(id))
 82 | 
 83 |         for file in needed_files:
 84 |             if found_file:
 85 |                 continue
 86 | 
 87 |             file_id = '.'.join(file.split('.')[:-1])
 88 |             if args.dataset != 'other':
 89 |                 if file_id not in real_to_id_dict:
 90 |                     # print("{} not found in mapping dict.".format(file_id))
 91 |                     continue
 92 |                     
 93 |                 if real_to_id_dict[file_id] == id:
 94 |                     id_to_file_dict[id] = file
 95 |                     found_file = True
 96 |             elif file_id == id:
 97 |                 id_to_file_dict[id] = file
 98 |                 found_file = True
 99 | 
100 |         if not found_file:
101 |             print "Didn't find the file for {}.".format(id)
102 | 
103 |     getting_frames_jobs = []
104 |     video_create_jobs_after_frame_get = []
105 |     video_create_jobs = []
106 | 
107 |     for i, desc in enumerate(data['result']):
108 | 
109 |         if desc['video_id'] not in id_to_file_dict.keys():
110 |             continue
111 | 
112 |         if desc['caption'] == '':
113 |             continue
114 | 
115 |         # print result['video_id']
116 |         # print result['caption']
117 | 
118 |         processed_vid_path = os.path.join(args.vidpath, id_to_file_dict[desc['video_id']])
119 | 
120 |         out_path = os.path.join(args.rpath, 'frames')
121 |         if not os.path.exists(out_path):
122 |             mkdirs_safe(out_path)
123 | 
124 |         # out_path/out_frames_path.xxx/-> frames
125 |         out_frames_path = os.path.join(out_path, id_to_file_dict[desc['video_id']])
126 | 
127 |         framerate = 30
128 | 
129 |         if not os.path.exists(out_frames_path):
130 |             if os.path.isdir(processed_vid_path):
131 |                 getting_frames_jobs.append((desc, out_frames_path, out_path, processed_vid_path))
132 | 
133 |                 rvid_path = os.path.join(args.rpath, 'vids')
134 |                 check_rvid_path(rvid_path)
135 | 
136 |                 video_create_jobs_after_frame_get.append((desc, framerate, out_frames_path, rvid_path))
137 |             else:
138 |                 print 'No processed video directory found at {}!'.format(processed_vid_path)
139 |         else:
140 |             print "final frames already created"
141 |             rvid_path = os.path.join(args.rpath, 'vids')
142 |             check_rvid_path(rvid_path)
143 | 
144 |             video_create_jobs.append((desc, framerate, out_frames_path, rvid_path))
145 | 
146 | 
147 |     threadPoolWhenFramesNil = Pool()
148 |     threadPoolFramesExisted = Pool()
149 | 
150 |     # Do frame getting jobs
151 |     threadPoolWhenFramesNil.map(copy_frames_and_draw_overlay, getting_frames_jobs)
152 |     # Also do video create jobs for frames already there
153 |     threadPoolFramesExisted.map(prepare_path_and_create_video, video_create_jobs)
154 | 
155 |     threadPoolWhenFramesNil.close()
156 |     threadPoolWhenFramesNil.join()
157 | 
158 |     threadPoolWhenFramesNil = Pool()
159 | 
160 |     # Now do video create jobs for previously nil frames
161 |     threadPoolWhenFramesNil.map(prepare_path_and_create_video, video_create_jobs_after_frame_get)
162 | 
163 |     threadPoolWhenFramesNil.close()
164 |     threadPoolWhenFramesNil.join()
165 |     threadPoolFramesExisted.close()
166 |     threadPoolFramesExisted.join()
167 | 
168 | 
169 | def check_rvid_path(rvid_path):
170 |     if not os.path.exists(rvid_path):
171 |         mkdirs_safe(rvid_path)
172 | 
173 | 
174 | def prepare_path_and_create_video((desc, framerate, out_frames_path, rvid_path)):
175 |     new_vid_path = os.path.join(rvid_path, str(desc['video_id']) + '.gif')
176 |     if not os.path.exists(new_vid_path):
177 |         createVideo(out_frames_path, new_vid_path, framerate)
178 | 
179 | 
180 | def copy_frames_and_draw_overlay((desc, out_frames_path, out_path, processed_vid_path)):
181 |     print 'Copying files {} -> {}'.format(processed_vid_path, out_path)
182 |     command = "cp -r {} {}".format(processed_vid_path, out_path)
183 |     os.system(command)
184 |     frames = os.listdir(out_frames_path)
185 |     print 'Creating Image Overlay For ' + str(len(frames)) + ' frames'
186 |     for frame in frames:
187 |         if frame.endswith('.jpg') or frame.endswith('.png'):
188 |             new_frame_path = os.path.join(out_frames_path, frame)
189 |             print new_frame_path
190 |             print desc['caption']
191 |             resizeImage(new_frame_path)
192 |             drawOverlay(new_frame_path, desc['caption'])
193 | 
194 | 
195 | def _validate(args):
196 |     if args.dataset == 'msvd' or args.dataset == 'lsmdc16':
197 |         if not args.dict_path:
198 |             raise ValueError("Was given dataset={} but no annotations path was given.".format(args.dataset))
199 |         if not os.path.exists(args.dict_path):
200 |             raise IOError("Was given dataset={} but dict_path={} does not exist.".format(args.dataset, args.dict_path))
201 | 
202 | 
203 | if __name__=="__main__":
204 |     #Run the script twice, the first time it will extract the frames the second time it will create the vids
205 |     parser = argparse.ArgumentParser()
206 |     parser.add_argument('-j','--rfile',dest='rfile', type=str, help='json file path with results',default='')
207 |     parser.add_argument('-p','--vidpath',dest='vidpath',type=str,help='path where the processed videos reside', default='')
208 |     parser.add_argument('-r','--rpath', dest='rpath',type=str, help='path where we will save vids',default='')
209 |     parser.add_argument('-d', '--dataset', help="Dataset being processed. Some json files are written differently depending on the dataset.", default='other', choices=['msvd', 'lsmdc16', 'other'])
210 |     parser.add_argument('-dp', '--dict_path', help="Path to msvd or lsmdc name mapping pkl file (containing mapping dict) [msdv & lsmdc16 only]", required=False)
211 |     parser.add_argument('-t', '--test', type=int, help="Unit test/limit number movies to create to given arg. Default=0/OFF", default=0)
212 |     parser.add_argument('-s', '--seed', help="Random seed.", default=9)
213 | 
214 |     args = parser.parse_args()
215 |     np.random.seed(int(args.seed))
216 | 
217 |     _validate(args)
218 |     main(args)
219 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | # `vid-desc/data`
 2 | 
 3 | ## What is here?
 4 | 
 5 | Standalone scripts for generating evalation data from these datasets:
 6 | 
 7 | - MSR-VTT
 8 | - M-VAD
 9 | - MPII
10 | - LSMDC 2016 (M-VAD + MPII)
11 | - TRECVID 2016
12 | - MSVD (Youtube2text)
13 | - TACoS
14 | 
15 | Multi-caption datasets rely on `pickle` generated files to store features, while single-caption datasets do not.
16 | This is mainly an artifact of the dataset sizes. Generating `pkl` files for LSMDC, MPII, and MVAD was usually unwieldy.
17 | 
18 | Due to the modular nature, this pipeline can also be used as a general-purpose feature extractor for any videos. 
19 | 
20 | 
21 | ## How are scripts run?
22 | 
23 | The general pipeline is this:
24 | 
25 | - `download videos to vid_dir` 
26 | - `subsect_videos(vid_dir)`* 
27 | - `process_frames(subsect_dir)`
28 | - `process_features(frames_dir)`
29 | - `create_dataset(feats_dir)`* 
30 | 
31 | `*` denotes a process that depends on dataset meta data. Everything else is dataset agnostic. 
32 | 
33 | Everything except `process_frames` uses the python2 environment. `process_frames` uses python3, which is a result of upgrading to pytorch for feature extraction. 
34 | For the time being, you will just need to switch between two conda environments during the pipeline. 
35 | 
36 | So long as you have a directory with videos in it, the usage of each step should be clear. If you are not trying to re-create the results for some dataset, you can ommit `subsect_videos` and `create_dataset`, as those are for specific datasets.
37 |  
38 | Here the pipeline will be described in detail following the two paths established in the root `README.md`.  
39 |  
40 |  
41 | ## Tutorial
42 |  
43 | ####  "I just want to caption a couple of videos" 
44 | 
45 | 
46 | 
47 | 
48 | 
49 | 
50 | #### "I want to re-create your results" (MSVD & LSMDC)
51 | 
52 |  
53 | 
54 | 


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSUPCVLab/VideoToTextDNN/a840172edf38e0a71d5e8feb130ab8f6c5eb19b6/data/__init__.py


--------------------------------------------------------------------------------
/data/create_dataset.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Dataset creation helper. Use this to generate command lines for lots of datasets.
  3 | """
  4 | import logging
  5 | import os
  6 | import argparse
  7 | 
  8 | from util import *
  9 | 
 10 | from datetime import datetime
 11 | 
 12 | logging.basicConfig()
 13 | logger = logging.getLogger(__name__)
 14 | logger.setLevel(logging.DEBUG)
 15 | 
 16 | possible_features = ['resnet', 'googlenet', 'nasnetalarge', 'resnet152', 'pnasnet5large', 'densenet121', 'polynet', 'senet154']
 17 | 
 18 | SEED = 9
 19 | 
 20 | 
 21 | # When inputting dir paths in dict, make sure trailing `/` is removed.
 22 | dataset_to_meta = {
 23 |     'mvad':
 24 |         {"data_dir": "mvad/pkls",
 25 |          "base": "mvad/"},
 26 |     'vtt16':
 27 |         {"data_dir": "vtt/pkls2016",
 28 |          "base": "vtt"},
 29 |     'vtt17':
 30 |         {"data_dir": "vtt/pkls2017",
 31 |          "base": "vtt"},
 32 |     'youtube2text':
 33 |         {"data_dir": "youtube2text/pkls_yao",
 34 |          "base": "youtube2text"},
 35 |     'mpii':
 36 |         {"data_dir": "mpii/full",
 37 |          "base": "mpii"},
 38 |     'lsmdc16':
 39 |         {"data_dir": "lsmdc16/pkls16",
 40 |          "base": "lsmdc16/"},
 41 |     'tacos':
 42 |         {"data_dir": "TACoS/pkls",
 43 |          "base": "TACoS/"},
 44 |     'trecvid':
 45 |         {"data_dir": "trecvid/pkls",
 46 |          "base": "trecvid"},
 47 |     }
 48 | 
 49 | 
 50 | feats_dir_prefix = "features_"
 51 | test_feats_dir_prefix = "features_testing_"
 52 | annots_dir_name = "annotations"
 53 | 
 54 | 
 55 | def create_commands(args, datasets, features):
 56 |     """
 57 |     Create command lines to generate dataset files.
 58 | 
 59 |     :param args:
 60 |     :param datasets:
 61 |     :param features:
 62 |     :return: nil
 63 |     """
 64 | 
 65 |     lines = set()
 66 |     main_lines = set()
 67 |     counting = 0
 68 | 
 69 |     for ds in datasets:
 70 |         for ft in features:
 71 |             if ds == 'mvad' or ds == 'mpii' or ds == 'lsmdc16' or ds == 'tacos':
 72 |                 # single-caption take feats from feats_dir
 73 |                 data_dir = dataset_to_meta[ds]["data_dir"]
 74 |             else:
 75 |                 # multi caption take feats from pkl dir
 76 |                 data_dir = dataset_to_meta[ds]["data_dir"] + '_' + ft
 77 | 
 78 |             if args.test:
 79 |                 data_dir += '_ut{}'.format(args.test)
 80 | 
 81 |             data_dir = os.path.join(args.base_path, data_dir)
 82 | 
 83 |             base_dir = dataset_to_meta[ds]["base"]
 84 |             feat_dir = os.path.join(base_dir, feats_dir_prefix + ft)
 85 |             test_feat_dir = os.path.join(base_dir, test_feats_dir_prefix + ft)
 86 |             annots_dir = os.path.join(base_dir, annots_dir_name)
 87 | 
 88 |             for p in (feat_dir, annots_dir):
 89 |                 if not os.path.isdir(p):
 90 |                     logger.warning("Did not find directory at {}.".format(p))
 91 | 
 92 |             if 'vtt' in ds:
 93 |                 if not os.path.isdir(test_feat_dir):
 94 |                     logger.warning("Did not find directory at {}.".format(test_feat_dir))
 95 | 
 96 |             main_cmd = create_line(args.seed, ds, annots_dir, ft, data_dir, feat_dir, test_feat_dir, args.test, args.skip_thoughts)
 97 | 
 98 |             if main_cmd in main_lines:
 99 |                 continue
100 | 
101 |             main_lines.add(main_cmd)
102 | 
103 |             lines.add(main_cmd)
104 | 
105 |     create_command_files(args, lines)
106 | 
107 | 
108 | def create_command_files(args, lines):
109 |     out_txt_path = os.path.join(args.out, 'commands.txt')
110 |     with open(out_txt_path, 'w') as f:
111 |         for l in lines:
112 |             f.write(l)
113 |             f.write('\n')
114 | 
115 |     logger.info("Created list of dataset creation commands at {}".format(out_txt_path))
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     ap = argparse.ArgumentParser()
120 | 
121 |     creation_args = ap.add_argument_group("Creation Args")
122 |     creation_args.add_argument('-bp', '--base_path', help="Base path to prepend onto the dataset_to_meta dict values defined above", default="")
123 |     creation_args.add_argument('-ds', '--dataset_select', help="Select a dataset rather than all.", nargs='+', required=False,default=None, choices=dataset_to_meta.keys())
124 |     creation_args.add_argument('-fs', '--feature_select', help="Select a feature type...", nargs='+', required=False,default=None, choices=possible_features)
125 |     creation_args.add_argument('-s', '--seed', help="Random seed", required=False, default=SEED)
126 |     creation_args.add_argument('-t', '--test', help="Create unit-test dataset. 0=Off, otherwise size of unittest dataset, in samples.", default=0)
127 |     creation_args.add_argument('-st', '--skip_thoughts', help="Perform skip-thoughts as SDM.", action='store_true', default=False)
128 | 
129 |     file_args = ap.add_argument_group("FileArgs")
130 |     file_args.add_argument("-o", "--out", help="Output file for generated commands from this script.", required=True)
131 | 
132 |     args = ap.parse_args()
133 | 
134 |     mkdirs_safe(args.out)
135 | 
136 |     if args.dataset_select:
137 |         if type(args.dataset_select) == list:
138 |             datasets = args.dataset_select
139 |         else:
140 |             datasets = [args.dataset_select]
141 |     else:
142 |         datasets = dataset_to_meta.keys()
143 | 
144 |     if args.feature_select:
145 |         if type(args.feature_select) == list:
146 |             features = args.feature_select
147 |         else:
148 |             features = [args.feature_select]
149 |     else:
150 |         features = possible_features
151 | 
152 |     create_commands(args, datasets, features)
153 | 


--------------------------------------------------------------------------------
/data/create_msr_vtt.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import argparse
  4 | import json
  5 | import nltk
  6 | import logging
  7 | 
  8 | from util import *
  9 | 
 10 | logging.basicConfig()
 11 | logger = logging.getLogger(__name__)
 12 | logger.setLevel(logging.DEBUG)
 13 | 
 14 | SEED = 9
 15 | 
 16 | 
 17 | def get_annots_vtt(feats_pool, train_val_list_path, test_list_path, test_sens_path, annotations, unittest=0):
 18 |     with open(train_val_list_path, 'r') as data_file:
 19 |         train_val_data = json.load(data_file)
 20 | 
 21 |     with open(test_list_path, 'r') as data_file:
 22 |         test_data = json.load(data_file)
 23 | 
 24 |     test_sens = None
 25 |     if test_sens_path:
 26 |         with open(test_sens_path, 'r') as data_file:
 27 |             test_sens = json.load(data_file)
 28 | 
 29 |     annotations, vids_train, vids_val, all_vids = get_annots_train_val_vtt(feats_pool, train_val_data, annotations, {}, unittest)
 30 |     annotations, vids_test, all_vids = get_annots_test_vtt(feats_pool, test_data, test_sens, annotations, all_vids, unittest)
 31 | 
 32 |     return annotations, vids_train, vids_val, vids_test, all_vids
 33 | 
 34 | 
 35 | def get_annots_train_val_vtt(feats_pool, train_val_data, annotations, all_vids, unittest=0):
 36 |     vids_train = []
 37 |     vids_val = []
 38 | 
 39 |     logger.info('Retrieving annotations for train-val...')
 40 | 
 41 |     videos_getting = [i['video_id'] for i in train_val_data['videos']]
 42 | 
 43 |     if unittest:
 44 |         num_videos = unittest
 45 |         logger.debug('UNIT TEST: On')
 46 |         np.random.shuffle(videos_getting)
 47 |         videos_getting = videos_getting[:num_videos]
 48 | 
 49 |     sentences = train_val_data['sentences']
 50 | 
 51 |     for sent in sentences:
 52 |         vid_name = sent['video_id']
 53 |         if vid_name not in videos_getting:
 54 |             continue
 55 | 
 56 |         if vid_name not in feats_pool:
 57 |             logger.warn("feature was missing for video_id={}".format(vid_name))
 58 |             continue
 59 | 
 60 |         if vid_name not in all_vids:
 61 |             all_vids[vid_name] = 1
 62 |         else:
 63 |             all_vids[vid_name] += 1
 64 | 
 65 |         ocaption = sent['caption']
 66 |         ocaption = ocaption.strip().encode('utf-8', 'ignore')
 67 | 
 68 |         tokens = nltk.word_tokenize(ocaption)
 69 |         tokenized = ' '.join(tokens)
 70 |         tokenized = tokenized.lower()
 71 | 
 72 |         if vid_name in annotations:
 73 |             cap_id = str(len(annotations[vid_name]))
 74 |             annotations[vid_name].append({'tokenized':tokenized,'image_id':vid_name,'cap_id':cap_id,'caption':ocaption})
 75 |         else:
 76 |             annotations[vid_name] = []
 77 |             cap_id = str(0)
 78 |             annotations[vid_name].append({'tokenized':tokenized,'image_id':vid_name,'cap_id':cap_id,'caption':ocaption})
 79 | 
 80 |         vid_and_cap = vid_name + '_' + cap_id
 81 | 
 82 |         vid_id = int(vid_name.split('video')[1])
 83 | 
 84 |         gt_id = train_val_data['videos'][vid_id]['id']
 85 |         assert gt_id == vid_id, 'Got an ID mis-match: vid_id={}, json_id={}'.format(vid_id, gt_id)
 86 | 
 87 |         if train_val_data['videos'][vid_id]['split'] == 'train':
 88 |             vids_train.append(vid_and_cap)
 89 |         elif train_val_data['videos'][vid_id]['split'] == 'validate':
 90 |             vids_val.append(vid_and_cap)
 91 |         else:
 92 |             raise ValueError("Video ID {} is not in train or valid split. Correct json file given?".format(vid_id))
 93 | 
 94 |     np.random.shuffle(vids_train)  # If we don't shuffle performance deminishes
 95 |     np.random.shuffle(vids_val)
 96 | 
 97 |     return annotations, vids_train, vids_val, all_vids
 98 | 
 99 | 
100 | def get_annots_test_vtt(feats_pool, test_list_data, test_sens, annotations, all_vids, unittest=0):
101 |     vids_test = []
102 | 
103 |     logger.info('Retrieving annotations for test...')
104 | 
105 |     videos_getting = [i['video_id'] for i in test_list_data['videos']]
106 | 
107 |     if unittest:
108 |         num_videos = unittest
109 |         logger.debug( 'UNIT TEST: On')
110 |         np.random.shuffle(videos_getting)
111 |         videos_getting = videos_getting[:num_videos]
112 | 
113 |     for vid_name in videos_getting:
114 |         if vid_name not in videos_getting:
115 |             continue
116 | 
117 |         if vid_name not in feats_pool:
118 |             logger.warn("feature was missing for video_id={}".format(vid_name))
119 |             continue
120 | 
121 |         if vid_name not in all_vids:
122 |             all_vids[vid_name] = 1
123 |         else:
124 |             all_vids[vid_name] += 1
125 | 
126 |         if test_sens:
127 |             # Use the released test sentences
128 |             vid_sens = [s for s in test_sens['sentences'] if s['video_id'] == vid_name]
129 | 
130 |             for sent in vid_sens:
131 |                 ocaption = sent['caption']
132 |                 ocaption = ocaption.strip().encode('utf-8', 'ignore')
133 | 
134 |                 tokens = nltk.word_tokenize(ocaption)
135 |                 tokenized = ' '.join(tokens)
136 |                 tokenized = tokenized.lower()
137 | 
138 |                 if vid_name in annotations:
139 |                     cap_id = str(len(annotations[vid_name]))
140 |                     annotations[vid_name].append(
141 |                         {'tokenized': tokenized, 'image_id': vid_name, 'cap_id': cap_id, 'caption': ocaption})
142 |                 else:
143 |                     annotations[vid_name] = []
144 |                     cap_id = str(0)
145 |                     annotations[vid_name].append(
146 |                         {'tokenized': tokenized, 'image_id': vid_name, 'cap_id': cap_id, 'caption': ocaption})
147 | 
148 |                 vid_and_cap = vid_name + '_' + cap_id
149 |                 vids_test.append(vid_and_cap)
150 | 
151 |         else:
152 |             ocaption = 'no caption'
153 |             ocaption = ocaption.strip().encode('utf-8', 'ignore')
154 | 
155 |             tokens = nltk.word_tokenize(ocaption)
156 |             tokenized = ' '.join(tokens)
157 |             tokenized = tokenized.lower()
158 | 
159 |             annotations[vid_name] = []
160 |             cap_id = str(0)
161 |             annotations[vid_name].append(
162 |                 {'tokenized': tokenized, 'image_id': vid_name, 'cap_id': cap_id, 'caption': ocaption})
163 | 
164 |             vid_and_cap = vid_name + '_' + cap_id
165 |             vids_test.append(vid_and_cap)
166 | 
167 |     np.random.shuffle(vids_test)
168 | 
169 |     return annotations, vids_test, all_vids
170 | 
171 | 
172 | def load_annots_vtt(cap_path):
173 |     return load_pkl(cap_path)
174 | 
175 | 
176 | def get_features_from_dir(vid_frame_folder_names, feats_dir, feats_2017_test_dir, feat_type):
177 | 
178 |     feats = {}
179 |     progress_checking = int(len(vid_frame_folder_names) / 10)
180 | 
181 |     logger.info("Extracting features...")
182 | 
183 |     for i, files in enumerate(vid_frame_folder_names):
184 |         ext = '.' + files.split('.')[-1]
185 |         feat_filename = files.split('/')[-1].split(ext)[0]
186 | 
187 |         vid_id = int(files.split('video')[1])
188 |         if vid_id >= 10000:
189 |             feat_file_path = os.path.join(feats_2017_test_dir, feat_filename)
190 |         else:
191 |             feat_file_path = os.path.join(feats_dir, feat_filename)
192 | 
193 |         if feat_type == 'c3d':
194 |             feats[feat_filename]=load_c3d_feat(feat_file_path)
195 |             logger.info('features extracted successfuly: ' + feat_file_path)
196 |         else:
197 |             if os.path.exists(feat_file_path):
198 |                 feat = np.load(feat_file_path)
199 |                 feats[feat_filename] = feat
200 |                 # print('features extracted successfuly: ' + feat_file_path)
201 |             else:
202 |                 logger.info('No features found!: ' + feat_file_path)
203 | 
204 |         if i % progress_checking == 0:
205 |             logger.info("Processed " + str(i) + '/' + str(len(vid_frame_folder_names)))
206 | 
207 |     return feats
208 | 
209 | 
210 | def validate(vids_train, vids_val, vids_test):
211 |     ntr = len(vids_train)
212 |     logger.info("Have {} train samples".format(ntr))
213 |     assert ntr > 0
214 | 
215 |     nva = len(vids_val)
216 |     logger.info("Have {} val samples".format(nva))
217 |     assert nva > 0
218 | 
219 |     nts = len(vids_test)
220 |     logger.info("Have {} test samples.".format(nts))
221 |     assert nts > 0
222 | 
223 |     tr_s = set(vids_train)
224 |     va_s = set(vids_val)
225 |     ts_s = set(vids_test)
226 | 
227 |     inter = tr_s.intersection(va_s)
228 |     assert len(inter) == 0, 'Validation contaminated with training data.'
229 |     inter = va_s.intersection(ts_s)
230 |     assert len(inter) == 0, 'Testing contaminated with validation data.'
231 |     inter = tr_s.intersection(ts_s)
232 |     assert len(inter) == 0, 'Testing contaminated with training data.'
233 | 
234 | 
235 | def vtt(params):
236 |     pkl_dir = params.pkl_dir
237 |     feats_dir = params.feats_dir
238 |     feats_testing_dir = params.feats_testing_dir
239 |     json_dir = params.json_dir
240 |     unittest = params.test
241 |     feat_type = params.type
242 |     protocol = params.protocol
243 |     version = params.version
244 | 
245 |     annotations = {}
246 | 
247 |     if not os.path.exists(pkl_dir):
248 |         os.mkdir(pkl_dir)
249 | 
250 |     train_path = os.path.join(pkl_dir,'train.pkl')
251 |     valid_path = os.path.join(pkl_dir,'valid.pkl')
252 |     test_path = os.path.join(pkl_dir,'test.pkl')
253 |     cap_path = os.path.join(pkl_dir,'CAP.pkl')
254 |     dict_path = os.path.join(pkl_dir,'worddict.pkl')
255 | 
256 |     if protocol != '':
257 |         filename = 'FEATS_{}_{}.pkl'.format(feat_type, protocol)
258 |     else:
259 |         filename = 'FEATS_{}.pkl'.format(feat_type)
260 | 
261 |     feats_path = os.path.join(pkl_dir, filename)
262 | 
263 |     if os.path.exists(train_path) or os.path.exists(valid_path) or os.path.exists(test_path):
264 |         var = raw_input("Pickle files found in [{}]. Do you want to erase them? type: [yes] [no] ".format(pkl_dir))
265 | 
266 |         if var == 'yes':
267 |             logger.info('Removing old pkls...')
268 |             remove_pickle_files(cap_path, dict_path, feats_path, test_path, train_path, valid_path)
269 | 
270 |         else:
271 |             logger.info('Loading previous pickle files and creating new FEATS_ file at path: {}'.format(feats_path))
272 |             if os.path.exists(feats_path):
273 |                 os.remove(feats_path)
274 | 
275 |             annotations = load_annots_vtt(cap_path)
276 | 
277 |             features = get_features_from_dir(annotations.keys(), feats_dir, feats_testing_dir, feat_type)
278 |             dump_pkl(features, feats_path)
279 |             logger.info('FEAT file created! Path: {}'.format(feats_path))
280 |             sys.exit(0)
281 | 
282 |     vid_feats_dirs = os.listdir(feats_dir)
283 |     vid_feats_dirs = sorted(vid_feats_dirs, key=lambda x: float(x.split('video')[-1])) #This is to sort the videos
284 | 
285 |     vid_testing_feats_dirs = os.listdir(feats_testing_dir)
286 |     vid_testing_feats_dirs = sorted(vid_testing_feats_dirs, key=lambda x: float(x.split('video')[-1]))
287 | 
288 |     feats_pool = vid_feats_dirs + vid_testing_feats_dirs
289 | 
290 |     test_sens_path = None
291 | 
292 |     if version == '2016':
293 |         test_list_path = os.path.join(json_dir, 'test_videodatainfo_nosen_2016.json')
294 |         train_val_list_path = os.path.join(json_dir, 'train_val_videodatainfo.json')
295 |         if args.with_sentences: test_sens_path = os.path.join(json_dir, 'videodatainfo_2017.json')
296 |     else:
297 |         test_list_path = os.path.join(json_dir, 'test_videodatainfo_nosen_2017.json')
298 |         train_val_list_path = os.path.join(json_dir, 'videodatainfo_2017.json')
299 |         if args.with_sentences: test_sens_path = os.path.join(json_dir, 'test_videodatainfo_2017.json')
300 | 
301 |     annotations, vids_train, vids_val, vids_test, all_vids = get_annots_vtt(feats_pool, train_val_list_path,
302 |                                                                             test_list_path, test_sens_path, annotations, unittest)
303 | 
304 |     logger.info('Validating generated lists...')
305 |     validate(vids_train, vids_val, vids_test)
306 | 
307 |     dump_pkl(vids_test, test_path)
308 |     logger.info('test.pkl created')
309 | 
310 |     dump_pkl(vids_train,train_path)
311 |     logger.info('train.pkl created')
312 | 
313 |     dump_pkl(vids_val,valid_path)
314 |     logger.info('valid.pkl created')
315 | 
316 |     dump_pkl(all_vids.keys(), os.path.join(pkl_dir,'allvids.pkl'))
317 |     dump_pkl(annotations, cap_path)
318 |     logger.info('CAP.pkl created')
319 |     worddict = create_dictionary(annotations,dict_path)
320 |     dump_pkl(worddict,dict_path)
321 |     logger.info('worddict.pkl created')
322 | 
323 |     features = get_features_from_dir(annotations.keys(), feats_dir, feats_testing_dir, feat_type)
324 |     dump_pkl(features,feats_path)
325 |     logger.info('FEAT file created! Path: {}'.format(feats_path))
326 | 
327 |     if params.do_skip_thoughts:
328 |         logger.info("Generating skip-thoughts...")
329 |         import create_skip_vectors
330 |         class ArgsFaker():
331 |             captions_file = cap_path
332 |             output_file = os.path.join(pkl_dir, 'skip_vectors.pkl')
333 | 
334 |         fake_args = ArgsFaker()
335 |         create_skip_vectors.main(fake_args)
336 | 
337 | 
338 | def remove_pickle_files(cap_path, dict_path, feats_path, test_path, train_path, valid_path):
339 |     if os.path.exists(train_path):
340 |         os.remove(train_path)
341 |     if os.path.exists(valid_path):
342 |         os.remove(valid_path)
343 |     if os.path.exists(test_path):
344 |         os.remove(test_path)
345 |     if os.path.exists(cap_path):
346 |         os.remove(cap_path)
347 |     if os.path.exists(dict_path):
348 |         os.remove(dict_path)
349 |     if os.path.exists(feats_path):
350 |         os.remove(feats_path)
351 |     # if os.path.exists('allvids.pkl'):
352 |     #     os.remove('allvids.pkl')
353 | 
354 | 
355 | def _validate(args):
356 |     if args.version == '2016' and args.with_sentences:
357 |         logger.info("2016 version test sentences were made available in 2017 dataset.")
358 |         sens_path = os.path.join(args.json_dir, "videodatainfo_2017.json")
359 |         if os.path.exists(sens_path):
360 |            logger.info("Found ground truth captions for 2016 test sentences")
361 |         else:
362 |             logger.critical("Did not find ground truth captions for 2016 test sentences: {}".format(sens_path))
363 |             sys.exit(1)
364 | 
365 |     if args.type not in args.feats_dir or args.type not in args.feats_testing_dir:
366 |         logger.critical("Requested feature type {}, but directories are something else:\tfeats_dir={}\tfeats_testing_dir={}".format(args.type, args.feats_dir, args.feats_testing_dir))
367 |         sys.exit(1)
368 | 
369 | 
370 | if __name__=='__main__':
371 |     arg_parser = argparse.ArgumentParser()
372 | 
373 |     creation_args = arg_parser.add_argument_group("CreationArgs")
374 |     creation_args.add_argument('-s', '--seed', type=int, help="Random seed.", default=SEED, required=False)
375 |     creation_args.add_argument('-f','--feats_dir',dest ='feats_dir',type=str, required=True)
376 |     creation_args.add_argument('-ft', '--feats_testing_dir', dest='feats_testing_dir', type=str, required=True)
377 |     creation_args.add_argument('-j','--json_dir',dest ='json_dir',type=str, required=True)
378 |     creation_args.add_argument('-p','--pkl_dir',dest ='pkl_dir',type=str, required=True)
379 |     creation_args.add_argument('-type','--type',dest ='type',type=str, required=True)
380 |     creation_args.add_argument('-t', '--test', dest='test', type=int, default=0,
381 |                                help='perform small unit test. If value 0 not unit test if greater than 0 gets a dataset with that numbers of videos')
382 |     creation_args.add_argument('-proc', '--protocol', dest='protocol', type=str, default='')
383 |     creation_args.add_argument('-st', '--do_skip_thoughts', dest='do_skip_thoughts', action='store_true', default=False)
384 | 
385 |     vtt_args = arg_parser.add_argument_group("VTTArgs")
386 |     vtt_args.add_argument('-v', '--version', dest='version', type=str, default='2016', help="Which MSR-VTT version to create.", choices=['2016', '2017'])
387 |     vtt_args.add_argument('-ws', '--with_sentences', dest='with_sentences', default=False, action='store_true', help='Use the available test set sentences.')
388 | 
389 |     args = arg_parser.parse_args()
390 | 
391 |     np.random.seed(args.seed)
392 | 
393 |     if not len(sys.argv) > 1:
394 |         print(arg_parser.print_help())
395 |         sys.exit(0)
396 | 
397 |     _validate(args)
398 | 
399 |     vtt(args)
400 | 
401 | 
402 | 
403 | 


--------------------------------------------------------------------------------
/data/create_mvad_mpii_lsmdc.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'oliver'
  2 | 
  3 | import argparse
  4 | import nltk
  5 | import shutil
  6 | import numpy as np
  7 | import logging
  8 | from util import *
  9 | 
 10 | import collections
 11 | from collections import OrderedDict
 12 | 
 13 | logging.basicConfig()
 14 | logger = logging.getLogger(__name__)
 15 | logger.setLevel(logging.DEBUG)
 16 | 
 17 | SEED = 9
 18 | 
 19 | 
 20 | def get_annots_lsmdc(filename, annotations, num_test):
 21 |     vids_names = {}
 22 | 
 23 |     with open(filename) as csvfile:
 24 |         rows = csvfile.readlines()
 25 |         for row in rows[:num_test]:
 26 |             row = row.split('\t')
 27 |             vid_name = row[0]
 28 | 
 29 |             if len(row) > 5:
 30 |                 ocaption = row[5]
 31 | 
 32 |                 ocaption = ocaption.replace('\n', '')
 33 |                 udata = ocaption.decode("utf-8")
 34 |                 caption = udata.encode("ascii", "ignore")
 35 | 
 36 |                 tokens = nltk.word_tokenize(caption)
 37 |                 tokenized = ' '.join(tokens)
 38 |                 tokenized = tokenized.lower()
 39 | 
 40 |                 if vids_names.has_key(vid_name):
 41 |                     vids_names[vid_name] += 1
 42 |                     logger.info('other annots')
 43 |                 else:
 44 |                     feat_path = '/PATH/TO/lsmdc16/videos/' + vid_name + '.avi'
 45 |                     if not os.path.exists(feat_path):
 46 |                         logger.warning('video not found ' + feat_path)
 47 |                     vids_names[vid_name] = 1
 48 | 
 49 |                 annotations[vid_name] = [
 50 |                     {'tokenized': tokenized, 'image_id': vid_name, 'cap_id': vids_names[vid_name], 'caption': ocaption}]
 51 | 
 52 |     return annotations, vids_names
 53 | 
 54 | 
 55 | def get_blind_lsmdc(filename, num_test):
 56 |     vids_names = OrderedDict()
 57 |     # annotations = OrderedDict()
 58 | 
 59 |     with open(filename) as csvfile:
 60 |         rows = csvfile.readlines()
 61 |         for row in rows:
 62 |             row = row.split('\t')
 63 |             vid_name = row[0]
 64 | 
 65 |             if vids_names.has_key(vid_name):
 66 |                 vids_names[vid_name] += 1
 67 |                 logger.info('other annots')
 68 |             else:
 69 |                 # feat_path = '/media/onina/sea2/datasets/lsmdc/features_chal/'+vid_name
 70 |                 # if not os.path.exists(feat_path):
 71 |                 #     print 'features not found '+feat_path
 72 |                 vids_names[vid_name] = 1
 73 | 
 74 |                 # annotations[vid_name]=[{'tokenized':tokenized,'image_id':vid_name,'cap_id':1,'caption':''}]
 75 | 
 76 |     return vids_names
 77 | 
 78 | 
 79 | def get_annots_mvad(rows, annots_corpus, annotations, feats_dir):
 80 |     vids_names = {}
 81 | 
 82 |     for i, row in enumerate(rows):
 83 | 
 84 |         # row = row.split('\t')
 85 |         vid_name = row.split('/')[-1].split('.')[0]
 86 |         caption = annots_corpus[i]
 87 |         caption = caption.replace('\n', '')
 88 | 
 89 |         udata = caption.decode("utf-8")
 90 |         caption = udata.encode("ascii", "ignore")
 91 | 
 92 |         tokens = nltk.word_tokenize(caption)
 93 |         tokenized = ' '.join(tokens)
 94 |         tokenized = tokenized.lower()
 95 | 
 96 |         if vids_names.has_key(vid_name):
 97 |             vids_names[vid_name] += 1
 98 |             logger.info('other annots, there should be only 1')
 99 |             # sys.exit(0)
100 |         else:
101 |             vids_names[vid_name] = 1
102 | 
103 |         annotations[vid_name] = [
104 |             {'tokenized': tokenized, 'image_id': vid_name, 'cap_id': vids_names[vid_name], 'caption': caption}]
105 | 
106 |     return annotations, vids_names
107 | 
108 | 
109 | def create_dictionary(annotations, pkl_dir):
110 |     worddict = collections.OrderedDict()
111 |     word_idx = 2
112 |     for a in annotations:
113 |         caps = annotations[a]
114 | 
115 |         for cap in caps:
116 |             tokens = cap['tokenized'].split()
117 |             for token in tokens:
118 |                 if token not in ['', '\t', '\n', ' ']:
119 |                     if not worddict.has_key(token):
120 |                         worddict[token] = word_idx
121 |                         word_idx += 1
122 | 
123 |     return worddict
124 | 
125 | 
126 | def lsmdc16(args):
127 |     data_dir = args.data_dir
128 |     pkl_dir = args.pkl_dir
129 | 
130 |     num_train, num_valid, num_test, num_blind = 9999999999, 9999999999, 9999999999, 9999999999
131 | 
132 |     test_mode = int(args.unit_test)
133 | 
134 |     train_list_path = 'LSMDC16_annos_training.csv'
135 |     valid_list_path = 'LSMDC16_annos_val.csv'
136 |     test_list_path = 'LSMDC16_annos_test.csv'
137 |     btest_list_path = 'LSMDC16_annos_blindtest.csv'
138 | 
139 |     if test_mode:
140 |         num_train = int(0.50 * test_mode)
141 |         num_test = int(0.15 * test_mode)
142 |         num_valid = int(0.25 * test_mode)
143 |         num_blind = test_mode - (num_test + num_train + num_valid)
144 | 
145 |     annotations = {}
146 | 
147 |     if not os.path.exists(pkl_dir):
148 |         os.mkdir(pkl_dir)
149 | 
150 |     all_vids = []
151 | 
152 |     train_path = os.path.join(pkl_dir, 'train.pkl')
153 |     if not os.path.exists(train_path):
154 |         train_file = os.path.join(data_dir, train_list_path)
155 |         logger.info(train_file)
156 |         annotations, vids_names = get_annots_lsmdc(train_file, annotations, num_train)
157 |         training_list = vids_names.keys()
158 |         dump_pkl(training_list, train_path)
159 |     else:
160 |         training_list = load_pkl(train_path)
161 | 
162 |     all_vids = all_vids + training_list
163 | 
164 |     valid_path = os.path.join(pkl_dir, 'valid.pkl')
165 |     if not os.path.exists(valid_path):
166 |         valid_file = os.path.join(data_dir, valid_list_path)
167 |         annotations, vids_names = get_annots_lsmdc(valid_file, annotations, num_valid)
168 |         valid_list = vids_names.keys()
169 |         dump_pkl(valid_list, valid_path)
170 |     else:
171 |         valid_list = load_pkl(valid_path)
172 | 
173 |     all_vids = all_vids + valid_list
174 | 
175 |     test_path = os.path.join(pkl_dir, 'test.pkl')
176 |     if not os.path.exists(test_path):
177 |         test_file = os.path.join(data_dir, test_list_path)
178 |         annotations, vids_names = get_annots_lsmdc(test_file, annotations, num_test)
179 |         test_list = vids_names.keys()
180 |         dump_pkl(test_list, test_path)
181 |     else:
182 |         test_list = load_pkl(test_path)
183 | 
184 |     all_vids = all_vids + test_list
185 | 
186 |     cap_path = os.path.join(pkl_dir, 'CAP.pkl')
187 |     if not os.path.exists(cap_path):
188 |         dump_pkl(annotations, cap_path)
189 | 
190 |     dict_path = os.path.join(pkl_dir, 'worddict.pkl')
191 |     if not os.path.exists(dict_path):
192 |         worddict = create_dictionary(annotations, dict_path)
193 |         dump_pkl(worddict, dict_path)
194 | 
195 |     btest_path = os.path.join(pkl_dir, 'blindtest.pkl')
196 |     if not os.path.exists(btest_path):
197 |         btest_file = os.path.join(data_dir, btest_list_path)
198 |         vids_names = get_blind_lsmdc(btest_file, num_blind)
199 |         btest_list = vids_names.keys()
200 |         dump_pkl(btest_list, btest_path)
201 |     else:
202 |         btest_list = load_pkl(btest_path)
203 | 
204 |     logger.info('done creating dataset')
205 | 
206 | 
207 | def mpii(params):
208 |     data_dir = params.data_dir
209 |     pkl_dir = params.pkl_dir
210 |     testing = params.unit_test
211 |     local_dir = params.local_dir
212 |     feats_dir = params.feats_dir
213 | 
214 |     f = open(os.path.join(data_dir, 'lists', 'downloadLinksAvi.txt'), 'rb')
215 |     files = f.readlines()
216 |     f.close()
217 |     f = open(os.path.join(data_dir, 'lists', 'annotations-someone.csv'), 'rb')
218 |     annots = f.readlines()
219 |     f.close()
220 |     f = open(os.path.join(data_dir, 'lists', 'dataSplit.txt'), 'rb')
221 |     splits_file = f.readlines()
222 |     splits = {}
223 | 
224 |     annotations = {}
225 |     train_clip_names = []
226 |     valid_clip_names = []
227 |     test_clip_names = []
228 | 
229 |     if testing:
230 |         tuples = [(f, a) for f, a in zip(files, annots)]
231 |         np.random.shuffle(tuples)
232 |         tuples = tuples[:testing]
233 |         files = [a[0] for a in tuples]
234 |         annots = [b[1] for b in tuples]
235 | 
236 |     train_path = os.path.join(pkl_dir, 'train.pkl')
237 |     if not os.path.exists(train_path):
238 |         for line in splits_file:
239 |             film_name = line.split('\t')[0]
240 |             split = line.split('\t')[1]
241 |             splits[film_name] = split.replace('\r\n', '')
242 | 
243 |         for i, file in enumerate(files):
244 |             parts = file.split('/')
245 | 
246 |             film_name = parts[6]
247 |             clip_name = parts[7].replace('\n', '')
248 |             clip_name = clip_name.split('.avi')[0]
249 |             caption = annots[i].split('\t')[1]
250 |             caption = caption.replace('\n', '')
251 | 
252 |             udata = caption.decode("utf-8")
253 |             caption = udata.encode("ascii", "ignore")
254 | 
255 |             tokens = nltk.word_tokenize(caption)
256 |             tokenized = ' '.join(tokens)
257 |             tokenized = tokenized.lower()
258 | 
259 |             annotations[clip_name] = [{'tokenized': tokenized, 'image_id': clip_name, 'cap_id': 1, 'caption': caption}]
260 | 
261 |             if splits[film_name] == 'training':
262 |                 train_clip_names.append(clip_name)
263 |             elif splits[film_name] == 'validation':
264 |                 valid_clip_names.append(clip_name)
265 |             elif splits[film_name] == 'test':
266 |                 test_clip_names.append(clip_name)
267 | 
268 |     if not os.path.exists(pkl_dir):
269 |         os.mkdir(pkl_dir)
270 | 
271 |     all_vids = []
272 | 
273 |     train_path = os.path.join(pkl_dir, 'train.pkl')
274 |     if not os.path.exists(train_path):
275 |         dump_pkl(train_clip_names, train_path)
276 |     else:
277 |         train_clip_names = load_pkl(train_path)
278 | 
279 |     all_vids = all_vids + train_clip_names
280 | 
281 |     valid_path = os.path.join(pkl_dir, 'valid.pkl')
282 |     if not os.path.exists(valid_path):
283 |         dump_pkl(valid_clip_names, valid_path)
284 |     else:
285 |         valid_clip_names = load_pkl(valid_path)
286 | 
287 |     all_vids = all_vids + valid_clip_names
288 | 
289 |     test_path = os.path.join(pkl_dir, 'test.pkl')
290 |     if not os.path.exists(test_path):
291 |         dump_pkl(test_clip_names, test_path)
292 |     else:
293 |         test_clip_names = load_pkl(test_path)
294 | 
295 |     all_vids = all_vids + test_clip_names
296 | 
297 |     cap_path = os.path.join(pkl_dir, 'CAP.pkl')
298 |     if not os.path.exists(cap_path):
299 |         dump_pkl(annotations, cap_path)
300 | 
301 |     dict_path = os.path.join(pkl_dir, 'worddict.pkl')
302 |     if not os.path.exists(dict_path):
303 |         worddict = create_dictionary(annotations, dict_path)
304 |         dump_pkl(worddict, dict_path)
305 | 
306 |     if testing and local_dir:
307 |         logger.info("Copying required features...")
308 |         if not os.path.isdir(local_dir):
309 |             os.makedirs(local_dir)
310 | 
311 |         for vid_name in all_vids:
312 |             ft_path = os.path.join(feats_dir, vid_name)
313 |             local_ft_path = os.path.join(local_dir, vid_name)
314 |             shutil.copy2(ft_path, local_ft_path)
315 | 
316 |     logger.info('done creating dataset')
317 | 
318 | 
319 | def mvad(params):
320 |     feats_dir = params.feats_dir
321 |     data_dir = params.data_dir
322 |     pkl_dir = params.pkl_dir
323 | 
324 |     testing = params.unit_test
325 |     local_dir = params.local_dir
326 | 
327 |     annotations = {}
328 | 
329 |     if not os.path.exists(pkl_dir):
330 |         os.mkdir(pkl_dir)
331 | 
332 |     all_vids = []
333 | 
334 |     s_paths = [os.path.join(pkl_dir, 'train.pkl'),
335 |                os.path.join(pkl_dir, 'valid.pkl'),
336 |                os.path.join(pkl_dir, 'test.pkl')
337 |                ]
338 |     l_paths = [os.path.join(data_dir, 'lists/TrainList.txt'),
339 |                os.path.join(data_dir, 'lists/ValidList.txt'),
340 |                os.path.join(data_dir, 'lists/TestList.txt')
341 |                ]
342 |     c_paths = [os.path.join(data_dir, 'lists/TrainCorpus.txt'),
343 |                os.path.join(data_dir, 'lists/ValidCorpus.txt'),
344 |                os.path.join(data_dir, 'lists/TestCorpus.txt')
345 |                ]
346 | 
347 |     for i, s_path in enumerate(s_paths):
348 |         if not os.path.exists(s_path):
349 |             _rows = open(l_paths[i], 'rw').readlines()
350 |             _corpus = open(c_paths[i], 'rw').readlines()
351 | 
352 |             if testing:
353 |                 _pairs = [(r, c) for r, c in zip(_rows, _corpus)]
354 |                 np.random.shuffle(_pairs)
355 |                 num = int(testing * params.split[i])
356 |                 _rows = [p[0] for p in _pairs[:num]]
357 |                 _corpus = [p[1] for p in _pairs[:num]]
358 | 
359 |             annotations, vids_names = get_annots_mvad(_rows, _corpus, annotations, feats_dir)
360 |             _list = vids_names.keys()
361 |             dump_pkl(_list, s_path)
362 |         else:
363 |             _list = load_pkl(s_path)
364 | 
365 |         all_vids = all_vids + _list
366 | 
367 |     cap_path = os.path.join(pkl_dir, 'CAP.pkl')
368 |     if not os.path.exists(cap_path):
369 |         dump_pkl(annotations, cap_path)
370 | 
371 |     dict_path = os.path.join(pkl_dir, 'worddict.pkl')
372 |     if not os.path.exists(dict_path):
373 |         worddict = create_dictionary(annotations, dict_path)
374 |         dump_pkl(worddict, dict_path)
375 | 
376 |     if testing and local_dir:
377 |         logger.info("Copying required features...")
378 |         if not os.path.isdir(local_dir):
379 |             os.makedirs(local_dir)
380 | 
381 |         for vid_name in all_vids:
382 |             ft_path = os.path.join(feats_dir, vid_name)
383 |             local_ft_path = os.path.join(local_dir, vid_name)
384 |             shutil.copy2(ft_path, local_ft_path)
385 | 
386 |     logger.info('done creating dataset')
387 | 
388 | 
389 | def get_human_annotations(data_dir):
390 |     hannot_path = os.path.join(data_dir, 'human_annotations', 'HumanCaps.csv')
391 |     import csv
392 | 
393 |     hannot = {}
394 |     with open(hannot_path, 'rb') as csvfile:
395 |         spamreader = csv.reader(csvfile, delimiter=',', quotechar='\"')
396 |         for row in spamreader:
397 |             logger.info(', '.join(row))
398 |             hannot[row[0]] = row[1]
399 |     return hannot
400 | 
401 | 
402 | def tokenize_cap(caption):
403 |     udata = caption.decode("utf-8")
404 |     caption = udata.encode("ascii", "ignore")
405 | 
406 |     tokens = nltk.word_tokenize(caption)
407 |     tokenized = ' '.join(tokens)
408 |     tokenized = tokenized.lower()
409 |     return tokenized
410 | 
411 | 
412 | if __name__ == '__main__':
413 | 
414 |     parser = argparse.ArgumentParser()
415 |     creation_args = parser.add_argument_group("CreationArgs")
416 |     creation_args.add_argument('-s', '--seed', type=int, help="Random seed.", default=SEED, required=False)
417 |     creation_args.add_argument('-d', '--data_dir', dest='data_dir', help='Example: /path/to/dataset/annotations',
418 |                                required=True)
419 |     creation_args.add_argument('-p', '--pkl_dir', dest='pkl_dir', help='Example: /path/to/dataset/pkls', required=True)
420 |     creation_args.add_argument('-dbname', '--dbname', dest='dbname', help='Dataset type.', required=True,
421 |                                choices=['mvad', 'mpii', 'lsmdc16'])
422 |     creation_args.add_argument('-st', '--do_skip_thoughts', dest='do_skip_thoughts', action='store_true', default=False)
423 | 
424 |     ut_args = parser.add_argument_group("UnitTestArgs")
425 |     ut_args.add_argument('-t', '--unit_test', dest='unit_test', type=int, default=0,
426 |                          help='Perform small test. Takes number of samples in unit test dataset.')
427 |     ut_args.add_argument('-l', '--local_dir', dest='local_dir', help="Where to copy unit_test features.", default=None)
428 |     ut_args.add_argument('-sp', '--split', dest='split', nargs='+',
429 |                          help='Space delimited [train val test] Data split to use in unit test dataset',
430 |                          default=[0.50, 0.25, 0.25], type=float)
431 |     ut_args.add_argument('-feat', '--feats_dir', dest='feats_dir', help='Example: /path/to/dataset/features_googlenet',
432 |                          required=False)
433 | 
434 |     args = parser.parse_args()
435 | 
436 |     if not len(sys.argv) > 1:
437 |         parser.print_help()
438 |         sys.exit(0)
439 | 
440 |     np.random.seed(args.seed)
441 | 
442 |     if not args.feats_dir:
443 |         if args.local_dir:
444 |             logger.critical(
445 |                 "You must provide an argument for --feats_dir to create a local copy of features (--local_dir)")
446 |             sys.exit(1)
447 | 
448 |     if args.dbname == 'mvad':
449 |         mvad(args)
450 |     if args.dbname == 'mpii':
451 |         mpii(args)
452 |     if args.dbname == 'lsmdc16':
453 |         lsmdc16(args)
454 | 


--------------------------------------------------------------------------------
/data/create_skip_vectors.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import sys
 3 | import util
 4 | sys.path.append('skip-thoughts')
 5 | import skipthoughts
 6 | 
 7 | 
 8 | def main(params):
 9 |     captions_file = params.captions_file
10 |     output_file = params.output_file
11 | 
12 |     vids = util.load_pkl(captions_file)
13 |     st_model = skipthoughts.load_model()
14 | 
15 |     skip_vectors = {}
16 |     for vid in vids.keys():
17 | 
18 |         caps = vids[vid]
19 |         num_caps = len(caps)
20 | 
21 |         raw_caps = [ '' for x in range(num_caps)]
22 | 
23 |         for cap in caps:
24 |             raw_caps[int(cap['cap_id'])]=cap['tokenized']
25 | 
26 |         vector = skipthoughts.encode(st_model, raw_caps, verbose=False)
27 | 
28 |         skip_vectors[vid] = vector
29 | 
30 |     util.dump_pkl(skip_vectors, output_file)
31 | 
32 | 
33 | if __name__=='__main__':
34 |     arg_parser = argparse.ArgumentParser()
35 | 
36 |     arg_parser.add_argument('-i','--input',dest ='captions_file',type=str, required=True)
37 |     arg_parser.add_argument('-o','--output',dest ='output_file',type=str, required=True, help="/path/to/dataset/skip_vectors.pkl")
38 | 
39 |     args = arg_parser.parse_args()
40 | 
41 |     main(args)
42 | 


--------------------------------------------------------------------------------
/data/create_tacos.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import nltk
  3 | 
  4 | from util import *
  5 | 
  6 | 
  7 | SEED = 9
  8 | 
  9 | 
 10 | def get_annots_tacos(vid_feat_files, id_to_cap_dict, unittest, splits):
 11 |     vids_train = []
 12 |     vids_val = []
 13 |     vids_test = []
 14 |     all_vids = {}
 15 |     annotations = {}
 16 | 
 17 |     print 'Retrieving annotations...'
 18 |     if unittest:
 19 |         print 'UNIT TEST: On'
 20 |         n = unittest
 21 |     else:
 22 |         n = len(id_to_cap_dict)
 23 | 
 24 |     # We are going to create the valid and test datasets ourselves.
 25 |     train_split, valid_split, test_split = splits.split(',')
 26 | 
 27 |     n_as_float = float(n)
 28 | 
 29 |     num_train = int(n_as_float * float(train_split))
 30 |     num_valid = int(n_as_float * float(valid_split))
 31 |     num_test = int(n - (num_valid + num_train))
 32 |     assert n == num_train + num_valid + num_test
 33 | 
 34 |     count_train = 0
 35 |     count_valid = 0
 36 |     count_test = 0
 37 | 
 38 |     for enum, vid_id in enumerate(vid_feat_files):
 39 |         if unittest and enum > unittest:
 40 |             break
 41 | 
 42 |         cap = id_to_cap_dict[vid_id]
 43 | 
 44 |         if vid_id not in all_vids:
 45 |             all_vids[vid_id] = 1
 46 |         else:
 47 |             all_vids[vid_id] += 1
 48 | 
 49 |         ocaption = cap
 50 |         ocaption = ocaption.replace('\n', '')
 51 |         ocaption = ocaption.strip()
 52 | 
 53 |         udata = ocaption.decode("utf-8", "ignore")
 54 |         ocaption = udata.encode("ascii", "ignore")
 55 | 
 56 |         tokens = nltk.word_tokenize(ocaption.replace('.', ''))
 57 | 
 58 |         if len(tokens) == 0:
 59 |             continue
 60 | 
 61 |         tokenized = ' '.join(tokens)
 62 |         tokenized = tokenized.lower()
 63 | 
 64 |         if annotations.has_key(vid_id):
 65 |             cap_id = str(len(annotations[vid_id]))
 66 |             annotations[vid_id].append({'tokenized': tokenized, 'image_id': vid_id, 'cap_id': cap_id, 'caption': ocaption})
 67 |         else:
 68 |             annotations[vid_id]= []
 69 |             cap_id = str(0)
 70 |             annotations[vid_id].append({'tokenized': tokenized, 'image_id': vid_id, 'cap_id': cap_id, 'caption': ocaption})
 71 | 
 72 |         if count_train < num_train:
 73 |             vids_train.append(vid_id)
 74 |             count_train += 1
 75 |         elif count_valid < num_valid:
 76 |             vids_val.append(vid_id)
 77 |             count_valid += 1
 78 |         elif count_test < num_test:
 79 |             vids_test.append(vid_id)
 80 |             count_test += 1
 81 | 
 82 |     np.random.shuffle(vids_train)
 83 |     np.random.shuffle(vids_val)
 84 |     np.random.shuffle(vids_test)
 85 | 
 86 |     return annotations, vids_train, vids_val, vids_test, all_vids
 87 | 
 88 | 
 89 | def build_ground_truth_dict(gt_dir):
 90 |     csv_file = open(os.path.join(gt_dir, 'index.tsv'), 'r')
 91 | 
 92 |     id_to_cap_dict = {}
 93 |     for line in csv_file:
 94 |         groups = line.replace('\n', '').split('\t')
 95 |         dest_vid = groups[0]
 96 |         sentence = groups[1]
 97 | 
 98 |         # vidID -> sentence
 99 |         id_to_cap_dict[dest_vid] = sentence
100 | 
101 |     return id_to_cap_dict
102 | 
103 | 
104 | def tacos(params):
105 |     pkl_dir = params.pkl_dir
106 |     feats_dir = params.feats_dir
107 |     gt_dir = params.gt_dir
108 |     unittest = params.test
109 |     splits = params.splits
110 | 
111 |     if not os.path.exists(pkl_dir):
112 |         os.mkdir(pkl_dir)
113 | 
114 |     train_path = os.path.join(pkl_dir, 'train.pkl')
115 |     valid_path = os.path.join(pkl_dir, 'valid.pkl')
116 |     test_path = os.path.join(pkl_dir, 'test.pkl')
117 |     cap_path = os.path.join(pkl_dir, 'CAP.pkl')
118 |     dict_path = os.path.join(pkl_dir, 'worddict.pkl')
119 | 
120 |     id_to_cap_dict = build_ground_truth_dict(gt_dir)
121 |     vid_feat_files = os.listdir(feats_dir)
122 | 
123 |     annotations, vids_train, vids_val, vids_test, all_vids = get_annots_tacos(vid_feat_files, id_to_cap_dict, unittest, splits)
124 | 
125 |     dump_pkl(vids_train, train_path)
126 |     print('train.pkl created')
127 |     dump_pkl(vids_val, valid_path)
128 |     print('valid.pkl created')
129 |     dump_pkl(vids_test, test_path)
130 |     print('test.pkl created')
131 | 
132 |     dump_pkl(all_vids.keys(), os.path.join(pkl_dir, 'allvids.pkl'))
133 |     dump_pkl(annotations, cap_path)
134 |     print('CAP.pkl created')
135 | 
136 |     worddict = create_dictionary(annotations, dict_path)
137 |     dump_pkl(worddict, dict_path)
138 |     print('worddict.pkl created')
139 | 
140 | 
141 | if __name__=='__main__':
142 |     arg_parser = argparse.ArgumentParser()
143 | 
144 |     arg_parser.add_argument('-f', '--feats_dir', dest='feats_dir', type=str, default='')
145 |     arg_parser.add_argument('-gt','--gt_dir',dest ='gt_dir',type=str, default='')
146 |     arg_parser.add_argument('-p','--pkl_dir',dest ='pkl_dir',type=str, default='')
147 |     arg_parser.add_argument('-t','--test',dest = 'test', type=int, default=0,
148 |                             help='perform small unit test. If value 0 not unit test if greater than 0 gets a dataset with that numbers of videos')
149 |     arg_parser.add_argument('-sp', '--splits', dest='splits', type=str, default='0.61,0.05,0.34',
150 |                             help='Create validation and test datasets. Usage: floats delimited by commas, '
151 |                                  'of the form Tr,Val. ex: {-s 0.60,0.20,0.20}. Default: 0.61,0.05,0.34')
152 |     arg_parser.add_argument('-s', '--seed', type=int, help="Random seed.", default=SEED, required=False)
153 |     arg_parser.add_argument('-st', '--do_skip_thoughts', dest='do_skip_thoughts', action='store_true', default=False)
154 | 
155 |     args = arg_parser.parse_args()
156 | 
157 |     np.random.seed(args.seed)
158 | 
159 |     if not len(sys.argv) > 1:
160 |         print arg_parser.print_help()
161 |         sys.exit(0)
162 | 
163 |     np.random.seed(args.seed)
164 |     tacos(args)
165 | 


--------------------------------------------------------------------------------
/data/create_trecvid.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import nltk
  4 | import sys
  5 | import numpy as np
  6 | 
  7 | from util import *
  8 | 
  9 | SEED = 9
 10 | 
 11 | 
 12 | def get_annots_trecvid(vid_feat_files, id_to_A_B_cap_dict, unittest, splits):
 13 |     vids_train = []
 14 |     vids_val = []
 15 |     vids_test = []
 16 |     all_vids = {}
 17 |     annotations = {}
 18 | 
 19 |     print 'Retrieving annotations...'
 20 |     if unittest:
 21 |         print 'UNIT TEST: On'
 22 |         id_to_A_B_cap_dict = {i: id_to_A_B_cap_dict[i] for enum, i in enumerate(id_to_A_B_cap_dict) if enum < unittest}
 23 | 
 24 |     n = len(id_to_A_B_cap_dict)
 25 | 
 26 |     # We are going to create the valid and test datasets ourselves.
 27 |     train_split, valid_split, test_split = splits.split(',')
 28 | 
 29 |     n_as_float = float(n)
 30 | 
 31 |     num_train = int(n_as_float * float(train_split))
 32 |     num_valid = int(n_as_float * float(valid_split))
 33 |     num_test = int(n - (num_valid + num_train))
 34 |     assert n == num_train + num_valid + num_test
 35 | 
 36 |     count_train = 0
 37 |     count_valid = 0
 38 |     count_test = 0
 39 | 
 40 |     for vid_id in vid_feat_files:
 41 |         if vid_id not in id_to_A_B_cap_dict:
 42 |             continue
 43 | 
 44 |         for enum, cap in enumerate(id_to_A_B_cap_dict[vid_id]):
 45 |             if not all_vids.has_key(vid_id):
 46 |                 all_vids[vid_id] = 1
 47 |             else:
 48 |                 all_vids[vid_id] += 1
 49 | 
 50 |             ocaption = cap
 51 |             ocaption = ocaption.replace('\n', '')
 52 |             ocaption = ocaption.strip()
 53 | 
 54 |             udata = ocaption.decode("utf-8", "ignore")
 55 |             ocaption = udata.encode("ascii", "ignore")
 56 | 
 57 |             tokens = nltk.word_tokenize(ocaption.replace('.', ''))
 58 | 
 59 |             if len(tokens) == 0:
 60 |                 continue
 61 | 
 62 |             tokenized = ' '.join(tokens)
 63 |             tokenized = tokenized.lower()
 64 | 
 65 |             if annotations.has_key(vid_id):
 66 |                 annotations[vid_id].append({'tokenized': tokenized, 'image_id': vid_id, 'cap_id': str(enum), 'caption': ocaption})
 67 |             else:
 68 |                 annotations[vid_id]= []
 69 |                 annotations[vid_id].append({'tokenized': tokenized, 'image_id': vid_id, 'cap_id': str(enum), 'caption': ocaption})
 70 | 
 71 |         if count_train < num_train:
 72 |             vids_train.extend([vid_id + '_' + str(enum) for enum, i in enumerate(annotations[vid_id])])
 73 |             count_train += 1
 74 |         elif count_valid < num_valid:
 75 |             vids_val.extend([vid_id + '_' + str(enum) for enum, i in enumerate(annotations[vid_id])])
 76 |             count_valid += 1
 77 |         elif count_test < num_test:
 78 |             vids_test.extend([vid_id + '_' + str(enum) for enum, i in enumerate(annotations[vid_id])])
 79 |             count_test += 1
 80 | 
 81 |     np.random.shuffle(vids_train)
 82 |     np.random.shuffle(vids_val)
 83 |     np.random.shuffle(vids_test)
 84 | 
 85 |     return annotations, vids_train, vids_val, vids_test, all_vids
 86 | 
 87 | 
 88 | def get_features_from_dir(vid_ids, feats_dir, feat_type):
 89 |     feats = {}
 90 | 
 91 |     for i, vid_id in enumerate(vid_ids):
 92 |         feat_file_path = os.path.join(feats_dir, vid_id.split('vid')[-1])
 93 | 
 94 |         if feat_type == 'c3d':
 95 |             feats[vid_id] = load_c3d_feat(feat_file_path)
 96 |             print('features extracted successfuly: ' + feat_file_path)
 97 |         else:
 98 |             if os.path.exists(feat_file_path):
 99 |                 feat = np.load(feat_file_path)
100 |                 feats[vid_id] = feat
101 |                 print('features extracted successfuly: ' + feat_file_path)
102 |             else:
103 |                 print('No features found!: ' + feat_file_path)
104 | 
105 |         print str(i) + '/' + str(len(vid_ids))
106 |     return feats
107 | 
108 | 
109 | def build_ground_truth_dict(gt_dir):
110 |     gt_map_file = open(os.path.join(gt_dir, 'vtt.gt'), 'r')
111 |     gt_A_file = open(os.path.join(gt_dir, 'vines.textDescription.A.testingSet'), 'r')
112 |     gt_B_file = open(os.path.join(gt_dir, 'vines.textDescription.B.testingSet'), 'r')
113 | 
114 |     gt_A_index_to_cap_dict = {}
115 |     for line in gt_A_file:
116 |         cap_id, cap = line.replace('\n', '').split('    ')
117 |         gt_A_index_to_cap_dict[cap_id] = cap
118 |     gt_B_index_to_cap_dict = {}
119 |     for line in gt_B_file:
120 |         cap_id, cap = line.replace('\n', '').split('    ')
121 |         gt_B_index_to_cap_dict[cap_id] = cap
122 |     id_to_A_B_cap_dict = {}
123 |     for line in gt_map_file:
124 |         vid_id, cap_id_A, cap_id_B = line.replace('\n', '').split(' ')
125 |         # vidID -> (capA, capB)
126 |         id_to_A_B_cap_dict['vid' + vid_id] = (gt_A_index_to_cap_dict[cap_id_A], gt_B_index_to_cap_dict[cap_id_B])
127 | 
128 |     return id_to_A_B_cap_dict
129 | 
130 | 
131 | def trecvid(params):
132 |     pkl_dir = params.pkl_dir
133 |     feats_dir = params.feats_dir
134 |     gt_dir = params.gt_dir
135 |     unittest = params.test
136 |     splits = params.splits
137 |     feat_type = params.type
138 |     protocol = params.protocol
139 | 
140 |     if not os.path.exists(pkl_dir):
141 |         os.mkdir(pkl_dir)
142 | 
143 |     train_path = os.path.join(pkl_dir, 'train.pkl')
144 |     valid_path = os.path.join(pkl_dir, 'valid.pkl')
145 |     test_path = os.path.join(pkl_dir, 'test.pkl')
146 |     cap_path = os.path.join(pkl_dir, 'CAP.pkl')
147 |     dict_path = os.path.join(pkl_dir, 'worddict.pkl')
148 | 
149 |     if protocol != '':
150 |         filename = 'FEATS_{}_{}.pkl'.format(feat_type, protocol)
151 |     else:
152 |         filename = 'FEATS_{}.pkl'.format(feat_type)
153 | 
154 |     feats_path = os.path.join(pkl_dir, filename)
155 | 
156 |     id_to_A_B_cap_dict = build_ground_truth_dict(gt_dir)
157 |     vid_feat_files = ['vid' + i for i in os.listdir(feats_dir)]
158 | 
159 |     annotations, vids_train, vids_val, vids_test, all_vids = get_annots_trecvid(vid_feat_files, id_to_A_B_cap_dict, unittest, splits)
160 | 
161 |     dump_pkl(vids_train, train_path)
162 |     print('train.pkl created')
163 |     dump_pkl(vids_val, valid_path)
164 |     print('valid.pkl created')
165 |     dump_pkl(vids_test, test_path)
166 |     print('test.pkl created')
167 | 
168 |     dump_pkl(all_vids.keys(), os.path.join(pkl_dir, 'allvids.pkl'))
169 |     dump_pkl(annotations, cap_path)
170 |     print('CAP.pkl created')
171 | 
172 |     worddict = create_dictionary(annotations, dict_path)
173 |     dump_pkl(worddict, dict_path)
174 |     print('worddict.pkl created')
175 | 
176 |     features = get_features_from_dir(annotations.keys(), feats_dir, feat_type)
177 |     dump_pkl(features, feats_path)
178 | 
179 |     print 'FEAT file created! Path: {}'.format(feats_path)
180 | 
181 |     if params.do_skip_thoughts:
182 |         logger.info("Generating skip-thoughts...")
183 |         import create_skip_vectors
184 |         class ArgsFaker():
185 |             captions_file = cap_path
186 |             output_file = os.path.join(pkl_dir, 'skip_vectors.pkl')
187 | 
188 |         fake_args = ArgsFaker()
189 |         create_skip_vectors.main(fake_args)
190 | 
191 | 
192 | if __name__=='__main__':
193 |     arg_parser = argparse.ArgumentParser()
194 | 
195 |     arg_parser.add_argument('-s', '--seed', type=int, help="Random seed.", default=SEED, required=False)
196 |     arg_parser.add_argument('-f','--feats_dir',dest ='feats_dir',type=str,default='')
197 |     arg_parser.add_argument('-gt','--gt_dir',dest ='gt_dir',type=str,default='')
198 |     arg_parser.add_argument('-p','--pkl_dir',dest ='pkl_dir',type=str,default='')
199 |     arg_parser.add_argument('-type','--type',dest ='type',type=str,default='googlenet')
200 |     arg_parser.add_argument('-t','--test',dest = 'test',type=int,default=0,
201 |                             help='perform small unit test. If value 0 not unit test if greater than 0 gets a dataset with that numbers of videos')
202 |     arg_parser.add_argument('-sp', '--splits', dest='splits', type=str, default='0.61,0.05,0.34',
203 |                             help='Create validation and test datasets. Usage: floats delimited by commas, '
204 |                                  'of the form Tr,Val. ex: {-s 0.60,0.20,0.20}. Default: 0.61,0.05,0.34')
205 |     arg_parser.add_argument('-proc', '--protocol', dest='protocol', type=str, default='')
206 |     arg_parser.add_argument('-st', '--do_skip_thoughts', dest='do_skip_thoughts', action='store_true', default=False)
207 | 
208 |     args = arg_parser.parse_args()
209 | 
210 |     np.random.seed(args.seed)
211 | 
212 |     if not len(sys.argv) > 1:
213 |         print arg_parser.print_help()
214 |         sys.exit(0)
215 | 
216 |     trecvid(args)
217 | 


--------------------------------------------------------------------------------
/data/create_y2t.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import nltk
  4 | import cPickle
  5 | import sys
  6 | import numpy as np
  7 | 
  8 | from util import *
  9 | import create_msr_vtt
 10 | 
 11 | 
 12 | SEED = 9
 13 | 
 14 | def get_features_from_dir(vid_frame_folder_names, feats_dir, feat_type):
 15 |     feats = {}
 16 | 
 17 |     for i, files in enumerate(vid_frame_folder_names):
 18 |         ext = '.' + files.split('.')[-1]
 19 |         feat_filename = files.split('/')[-1].split(ext)[0]
 20 | 
 21 |         feat_file_path = os.path.join(feats_dir, feat_filename)
 22 | 
 23 |         if feat_type == 'c3d':
 24 |             feats[feat_filename] = load_c3d_feat(feat_file_path)
 25 |             print('features extracted successfuly: ' + feat_file_path)
 26 |         else:
 27 |             if os.path.exists(feat_file_path):
 28 |                 feat = np.load(feat_file_path)
 29 |                 feats[feat_filename] = feat
 30 |                 print('features extracted successfuly: ' + feat_file_path)
 31 |             else:
 32 |                 print('No features found!: ' + feat_file_path)
 33 | 
 34 |         print str(i) + '/' + str(len(vid_frame_folder_names))
 35 |     return feats
 36 | 
 37 | 
 38 | def get_annots_y2t(vid_caption_dict, youtube_map_dict, unittest=0, splits=''):
 39 |     vids_train = []
 40 |     vids_val = []
 41 |     vids_test = []
 42 |     all_vids = {}
 43 |     annotations = {}
 44 | 
 45 |     print 'Retrieving annotations...'
 46 | 
 47 |     pkl = youtube_map_dict
 48 |     if unittest:
 49 |         print 'UNIT TEST: On'
 50 |         keys = pkl.keys()
 51 |         np.random.shuffle(keys)
 52 |         keys = keys[:unittest]
 53 |         pkl = {key: pkl[key] for key in keys}
 54 | 
 55 |     n = len(pkl)
 56 | 
 57 |     if splits == 'yao':
 58 |         num_train = 1201
 59 |         num_valid = 100
 60 |         num_test = 670
 61 |     else:
 62 |         train_split, valid_split, test_split = splits.split(',')
 63 | 
 64 |         n_as_float = float(n)
 65 | 
 66 |         num_train = int(n_as_float * float(train_split))
 67 |         num_valid = int(n_as_float * float(valid_split))
 68 |         num_test = int(n_as_float * float(test_split))
 69 |         assert n == num_train + num_valid + num_test
 70 | 
 71 |     count_train = 0
 72 |     count_valid = 0
 73 |     count_test = 0
 74 | 
 75 |     for vid_name in pkl.keys():
 76 |         vid = youtube_map_dict[vid_name]
 77 | 
 78 |         for cap_id, cap in enumerate(vid_caption_dict[vid_name]):
 79 |             if not all_vids.has_key(vid_name):
 80 |                 all_vids[vid_name] = 1
 81 |             else:
 82 |                 all_vids[vid_name] += 1
 83 | 
 84 |             ocaption = cap
 85 |             ocaption = ocaption.replace('\n', '')
 86 |             ocaption = ocaption.strip()
 87 | 
 88 |             udata = ocaption.decode("utf-8")
 89 |             ocaption = udata.encode("ascii", "ignore")
 90 | 
 91 |             tokens = nltk.word_tokenize(ocaption.replace('.', ''))
 92 | 
 93 |             if len(tokens) == 0:
 94 |                 continue
 95 | 
 96 |             tokenized = ' '.join(tokens)
 97 |             tokenized = tokenized.lower()
 98 | 
 99 |             if annotations.has_key(vid):
100 |                 annotations[vid].append({'tokenized': tokenized, 'image_id': vid, 'cap_id': str(cap_id), 'caption': ocaption})
101 |             else:
102 |                 annotations[vid]= []
103 |                 annotations[vid].append({'tokenized': tokenized, 'image_id': vid, 'cap_id': str(cap_id), 'caption': ocaption})
104 | 
105 |         if count_train < num_train:
106 |             vids_train.extend([vid + '_' + str(enum) for enum, i in enumerate(annotations[vid])])
107 |             count_train += 1
108 |         elif count_valid < num_valid:
109 |             vids_val.extend([vid + '_' + str(enum) for enum, i in enumerate(annotations[vid])])
110 |             count_valid += 1
111 |         elif count_test < num_test:
112 |             vids_test.extend([vid + '_' + str(enum) for enum, i in enumerate(annotations[vid])])
113 |             count_test += 1
114 | 
115 |     np.random.shuffle(vids_train)
116 |     np.random.shuffle(vids_val)
117 |     np.random.shuffle(vids_test)
118 | 
119 |     return annotations, vids_train, vids_val, vids_test, all_vids
120 | 
121 | 
122 | def get_features_from_pkl(from_pkl_file, all_vids_dict, youtube_map_dict):
123 |     pkl = cPickle.load(open(from_pkl_file))
124 |     feats = {}
125 | 
126 |     for key in all_vids_dict:
127 |         # key is going to be of the form xxxxxxxxxx_##_## but we want vid####
128 |         vid = youtube_map_dict[key]
129 |         feats[vid] = pkl[vid]
130 | 
131 |     return feats
132 | 
133 | 
134 | def fix_feature_file_names(youtube_map_dict, feats_dir, pkl_dir):
135 |     feat_files = os.listdir(feats_dir)
136 |     work_order = []
137 |     for original in feat_files:
138 |         if original not in youtube_map_dict.values():
139 |             new_name = youtube_map_dict[original]
140 |             did = "{} to {}".format(original, new_name)
141 |             work_order.append(did)
142 |             #print did
143 |             orig_path = os.path.join(feats_dir, original)
144 |             new_path = os.path.join(feats_dir, new_name)
145 |             os.rename(orig_path, new_path)
146 | 
147 |     # Print to file a record of what names were changed
148 |     work_order_path = os.path.join(pkl_dir, 'feat_name_changes.txt')
149 |     f = open(work_order_path, 'w')
150 |     for i in work_order:
151 |         f.write(i + '\n')
152 | 
153 |     print "Saved name changes to {}".format(work_order_path)
154 | 
155 | 
156 | def y2t(params):
157 |     pkl_dir = params.pkl_dir
158 |     feats_dir = params.feats_dir
159 |     json_dir = params.json_dir
160 |     unittest = params.test
161 |     splits = 'yao' if params.yao else params.splits
162 |     feat_type = params.type
163 |     protocol = params.protocol
164 |     from_pkl = params.from_pkl
165 | 
166 |     if not os.path.exists(pkl_dir):
167 |         os.mkdir(pkl_dir)
168 | 
169 |     if splits == 'yao':
170 |         print("Using Yao2015 splits.")
171 | 
172 |     f = open(os.path.join(json_dir, 'dict_movieID_caption.pkl'), 'r')
173 |     vid_caption_dict = cPickle.load(f)
174 | 
175 |     f = open(os.path.join(json_dir, 'dict_youtube_mapping.pkl'), 'r')
176 |     youtube_map_dict = cPickle.load(f)
177 | 
178 |     if os.path.isdir(feats_dir):
179 |         feat_files = set(os.listdir(feats_dir))
180 |         vidX_formatted_files = set(youtube_map_dict.values())
181 | 
182 |         diff = feat_files - vidX_formatted_files
183 |         if len(diff) > 0 and not from_pkl:
184 |             print "Found mismatch of feature file names and youtube_mapping_dict." \
185 |                   "Feature files will be re-named according to youtube_map_dict.pkl"
186 |             fix_feature_file_names(youtube_map_dict, feats_dir, pkl_dir)
187 | 
188 |     else:
189 |         print "Feature directroy not found at {}.\nExiting.".format(feats_dir)
190 |         sys.exit(0)
191 | 
192 |     train_path = os.path.join(pkl_dir, 'train.pkl')
193 |     valid_path = os.path.join(pkl_dir, 'valid.pkl')
194 |     test_path = os.path.join(pkl_dir, 'test.pkl')
195 |     cap_path = os.path.join(pkl_dir, 'CAP.pkl')
196 |     dict_path = os.path.join(pkl_dir, 'worddict.pkl')
197 | 
198 |     if protocol != '':
199 |         filename = 'FEATS_{}_{}.pkl'.format(feat_type, protocol)
200 |     else:
201 |         filename = 'FEATS_{}.pkl'.format(feat_type)
202 | 
203 |     feats_path = os.path.join(pkl_dir, filename)
204 | 
205 |     if os.path.exists(train_path) or os.path.exists(valid_path) or os.path.exists(test_path):
206 |         var = raw_input("Pickle files found in [{}]. Do you want to erase them? type: yes/[no] ".format(pkl_dir))
207 | 
208 |         if var == 'yes':
209 |             print 'Removing old pkls...'
210 |             create_msr_vtt.remove_pickle_files(cap_path, dict_path, feats_path, test_path, train_path, valid_path)
211 | 
212 |         else:
213 |             print('Loading previous pickle files and creating new FEATS_ file at path: {}'.format(feats_path))
214 |             if os.path.exists(feats_path):
215 |                 os.remove(feats_path)
216 | 
217 |             annotations = create_msr_vtt.load_annots_vtt(cap_path)
218 | 
219 |             features = get_features_from_dir(annotations.keys(), feats_dir, feat_type)
220 |             create_msr_vtt.dump_pkl(features, feats_path)
221 |             print 'FEAT file created! Path: {}'.format(feats_path)
222 |             sys.exit(0)
223 | 
224 |     annotations, vids_train, vids_val, vids_test, all_vids = get_annots_y2t(vid_caption_dict, youtube_map_dict,
225 |                                                                             unittest, splits)
226 | 
227 |     dump_pkl(vids_train, train_path)
228 |     print('train.pkl created')
229 |     dump_pkl(vids_val, valid_path)
230 |     print('valid.pkl created')
231 |     dump_pkl(vids_test, test_path)
232 |     print('test.pkl created')
233 | 
234 |     dump_pkl(all_vids.keys(), os.path.join(pkl_dir, 'allvids.pkl'))
235 |     dump_pkl(annotations, cap_path)
236 |     print('CAP.pkl created')
237 | 
238 |     worddict = create_dictionary(annotations, dict_path)
239 |     dump_pkl(worddict, dict_path)
240 |     print('worddict.pkl created')
241 | 
242 |     if from_pkl:
243 |         # Getting features from pkl file.
244 |         from_pkl_file = os.path.join(feats_dir, 'FEAT_key_vidID_value_features.pkl')
245 |         print "Loading features from pkl file."
246 |         features = get_features_from_pkl(from_pkl_file, all_vids, youtube_map_dict)
247 |     else:
248 |         features = get_features_from_dir(annotations.keys(), feats_dir, feat_type)
249 |     dump_pkl(features, feats_path)
250 |     print 'FEAT file created! Path: {}'.format(feats_path)
251 | 
252 |     if params.do_skip_thoughts:
253 |         print("Generating skip-thoughts...")
254 |         import create_skip_vectors
255 |         class ArgsFaker():
256 |             captions_file = cap_path
257 |             output_file = os.path.join(pkl_dir, 'skip_vectors.pkl')
258 | 
259 |         fake_args = ArgsFaker()
260 |         create_skip_vectors.main(fake_args)
261 | 
262 | 
263 | def _validate(args):
264 |     if args.type not in args.feats_dir:
265 |         print("FATAL : Requested feature type {}, but directories are something else:\tfeats_dir={}".format(args.type, args.feats_dir))
266 |         sys.exit(0)
267 | 
268 | 
269 | if __name__=='__main__':
270 |     arg_parser = argparse.ArgumentParser()
271 | 
272 |     arg_parser.add_argument('-s', '--seed', type=int, help="Random seed.", default=SEED, required=False)
273 |     arg_parser.add_argument('-f','--feats_dir',dest ='feats_dir',type=str, required=True)
274 |     arg_parser.add_argument('-j','--json_dir',dest ='json_dir',type=str,required=True)
275 |     arg_parser.add_argument('-p','--pkl_dir',dest ='pkl_dir',type=str,required=True)
276 |     arg_parser.add_argument('-type','--type',dest ='type',type=str, choices=['resnet', 'googlenet', 'nasnetalarge', 'resnet152', 'pnasnet5large', 'polynet', 'senet154'])
277 |     arg_parser.add_argument('-t','--test',dest = 'test',type=int,default=0,
278 |                             help='perform small unit test. If value 0 not unit test if greater than 0 gets a dataset with that numbers of videos')
279 |     arg_parser.add_argument('-sp', '--splits', dest='splits', type=str, default='0.61,0.05,0.34',
280 |                             help='Create validation and test datasets. Usage: floats delimited by commas, '
281 |                                  'of the form Tr,Val. ex: {-s 0.60,0.40}. Off by default.', required=False)
282 |     arg_parser.add_argument('-proc', '--protocol', dest='protocol', type=str, default='')
283 |     arg_parser.add_argument('-from_pkl', '--from_pkl', dest='from_pkl', type=int, default=0,
284 |                             help='If >=1, load features from pickle file instead of raw feature files.'
285 |                                  'Note that this is negated if loading pre-existing pickle files.')
286 |     arg_parser.add_argument('-st', '--do_skip_thoughts', dest='do_skip_thoughts', action='store_true', default=False)
287 |     arg_parser.add_argument('-y', '--yao', dest='yao', action='store_true', default=False, help='Use Yao2015 split.')
288 | 
289 |     args = arg_parser.parse_args()
290 | 
291 |     np.random.seed(args.seed)
292 | 
293 |     if not len(sys.argv) > 1:
294 |         print arg_parser.print_help()
295 |         sys.exit(0)
296 | 
297 |     _validate(args)
298 | 
299 |     y2t(args)
300 | 


--------------------------------------------------------------------------------
/data/process_frames.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | import argparse
 4 | import time
 5 | from multiprocessing import Pool
 6 | 
 7 | 
 8 | def main(args):
 9 |     src_dir = args.src_dir
10 |     dst_dir = args.dst_dir
11 |     start = int(args.start)
12 |     end = int(args.end)
13 |     PREPEND = args.prepend
14 | 
15 |     src_files = os.listdir(src_dir)
16 | 
17 |     if not os.path.isdir(dst_dir):
18 |         os.mkdir(dst_dir)
19 | 
20 |     tuple_list = []
21 | 
22 |     for video_file in src_files[start:end]:
23 |         src_path =  os.path.join(src_dir, video_file)
24 |         dst_path = os.path.join(dst_dir, video_file)
25 | 
26 |         tuple_list.append((PREPEND, video_file, src_path, dst_path))
27 | 
28 |     pool = Pool() # Default to number cores
29 |     pool.map(process_vid, tuple_list)
30 |     pool.close()
31 |     pool.join()
32 | 
33 | 
34 | def process_vid(args):
35 |     (PREPEND, video_file, src_path, dst_path) = args
36 |     if not os.path.isdir(dst_path):
37 |         os.mkdir(dst_path)
38 |         # command = 'ffmpeg -i '+ src_path+' -s 256x256 '+ dst_path + '/%5d.jpg' #with resize
39 |         command = PREPEND + 'ffmpeg -i '+ src_path+' -r 20 '+ dst_path + '/%6d.jpg > /dev/null 2>&1' #6 is to be in accordance with C3D features.
40 |         print(command)
41 | 
42 |         os.system(command)
43 |     else:
44 |         print("Frames directory already found at {}".format(dst_path))
45 | 
46 | 
47 | if __name__=='__main__':
48 |     arg_parser = argparse.ArgumentParser()
49 |     arg_parser.add_argument(
50 |         'src_dir',
51 |         help='directory where videos are'
52 |     )
53 |     arg_parser.add_argument(
54 |         'dst_dir',
55 |         help='directory where to store frames'
56 |     )
57 |     arg_parser.add_argument(
58 |         'start',
59 |         help='start index (inclusive)'
60 |     )
61 |     arg_parser.add_argument(
62 |         'end',
63 |         help='end index (noninclusive)'
64 |     )
65 |     arg_parser.add_argument(
66 |         '--prepend',
67 |         default='',
68 |         help='optional prepend to start of ffmpeg command (in case you want to use a non-system wide version of ffmpeg)'
69 |              'For example: --prepend ~/anaconda2/bin/ will use ffmpeg installed in anaconda2'
70 |     )
71 | 
72 |     if not len(sys.argv) > 1:
73 |         print(arg_parser.print_help())
74 |         sys.exit(0)
75 | 
76 |     args = arg_parser.parse_args()
77 | 
78 |     start_time = time.time()
79 |     main(args)
80 |     print("Job took %s mins" % ((time.time() - start_time)/60))


--------------------------------------------------------------------------------
/data/process_pca.py:
--------------------------------------------------------------------------------
  1 | from sklearn.decomposition import PCA
  2 | 
  3 | import sys
  4 | import argparse
  5 | import numpy as np
  6 | import os
  7 | import shutil
  8 | from data import create_msr_vtt
  9 | 
 10 | 
 11 | def gather_feats(feats_dir, unittest):
 12 |     sampling = True
 13 | 
 14 |     feats_orig = os.listdir(feats_dir)
 15 |     if unittest:
 16 |         feats_orig = feats_orig[:unittest]
 17 | 
 18 |     assert len(feats_orig) >= 2
 19 | 
 20 |     # Get first feature so np.concatenate has something to use
 21 |     with open(os.path.join(feats_dir, feats_orig[0])) as f:
 22 |         feats = np.load(f)
 23 |         if sampling:
 24 |             feats = create_msr_vtt.get_sub_frames(feats)
 25 |     counter = 1
 26 | 
 27 |     for key in feats_orig[1:]:
 28 |         with open(os.path.join(feats_dir, key)) as f:
 29 |             feat = np.load(f)
 30 |             if sampling:
 31 |                 feat = create_msr_vtt.get_sub_frames(feat)
 32 |             feats = np.concatenate((feats, feat), axis=0)
 33 |         sys.stdout.write('\r' + '{' + key + '} ' + str(counter) + '/' + str(len(feats_orig)) + '\n')
 34 |         sys.stdout.flush()
 35 |         counter+=1
 36 |     print "saving concatenated feats.."
 37 | 
 38 |     return feats
 39 | 
 40 | 
 41 | def main():
 42 |     ap = argparse.ArgumentParser()
 43 |     ap.add_argument('-f', '--feats_dir', dest='feats_dir', type=str, default='')
 44 |     ap.add_argument('-ft', '--feats_testing_dir', dest='feats_testing_dir', type=str, default='')
 45 |     ap.add_argument('-pca', '--pca_dir', dest='pca_dir', type=str, default='')
 46 |     ap.add_argument('-pca_test', '--pca_test_dir', dest='pca_test_dir', type=str, default='')
 47 |     ap.add_argument('-type', '--type', dest='type', type=str, default='googlenet')
 48 |     ap.add_argument('-t', '--test', dest='test', type=int, default=0,
 49 |                     help='perform small unit test. If value 0 not unit test if greater than 0 gets a dataset with that numbers of videos')
 50 |     ap.add_argument('-train_pkl', '--training_pkl', dest='train_pkl', type=str, default='')
 51 |     ap.add_argument('-test_pkl', '--testing_pkl', dest='test_pkl', type=str, default='')
 52 | 
 53 |     if not len(sys.argv) > 1:
 54 |         print ap.print_help()
 55 |         sys.exit(0)
 56 | 
 57 |     args = ap.parse_args()
 58 | 
 59 |     feats_dir = args.feats_dir
 60 |     feats_test_dir = args.feats_testing_dir
 61 |     pca_dir = args.pca_dir
 62 |     pca_test_dir = args.pca_test_dir
 63 |     type = args.type
 64 |     unittest = args.test
 65 | 
 66 |     given_train_pkl = args.train_pkl
 67 |     given_test_pkl = args.test_pkl
 68 | 
 69 |     print "Extracting regular feature files..."
 70 |     extract_and_write_pca(feats_dir, feats_dir, pca_dir, type, unittest, given_train_pkl)
 71 | 
 72 |     #print "Extracting test feature files..."
 73 |     #extract_and_write_pca(feats_test_dir, feats_test_dir, pca_test_dir, type, unittest)
 74 | 
 75 | 
 76 | def extract_and_write_pca(transforming_feats_dir, fit_feats_dir, pca_dir, type, unittest, given_train_pkl):
 77 |     if given_train_pkl:
 78 |         pca = create_msr_vtt.load_pkl(given_train_pkl)
 79 |     else:
 80 |         # Refactor later to allow for mixing of fit feat files
 81 |         feats = gather_feats(fit_feats_dir, unittest)
 82 |         pca = PCA(n_components=1024).fit(feats)
 83 | 
 84 |     #dump_pkl(pca, os.path.join(pca_dir, 'pca_{}.pkl'.format(type)))
 85 | 
 86 |     if os.path.isdir(pca_dir):
 87 |         if raw_input("Found PCA folder, remove? [y/n]") == 'y':
 88 |             shutil.rmtree(pca_dir)
 89 |         else:
 90 |             print "Bye"
 91 |             sys.exit(0)
 92 | 
 93 |     os.mkdir(pca_dir)
 94 | 
 95 |     t_feat_files = os.listdir(transforming_feats_dir)
 96 |     if unittest:
 97 |         t_feat_files = t_feat_files[:unittest]
 98 | 
 99 |     for i, key in enumerate(t_feat_files, start=1):
100 |         orig_feat_path = os.path.join(transforming_feats_dir, key)
101 |         pca_feat_path = os.path.join(pca_dir, key)
102 | 
103 |         if type == 'c3d':
104 |             feat = create_msr_vtt.load_c3d_feat(orig_feat_path)
105 |             pca_feat = pca.transform(feat)
106 | 
107 |         elif type == 'resnet':
108 |             with open(orig_feat_path) as f:
109 |                 feat = np.load(f)
110 |                 pca_feat = pca.transform(feat)
111 |         else:
112 |             print "Invalid feature type. Exiting."
113 |             sys.exit(0)
114 | 
115 |         np.save(open(pca_feat_path, 'wb'), pca_feat)
116 | 
117 |         print str(i) + '/' + str(len(t_feat_files))
118 |     print 'processed: ' + str(len(t_feat_files)) + " features."
119 | 
120 | 
121 | if __name__ == '__main__':
122 |     main()


--------------------------------------------------------------------------------
/data/py3_process_features.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import torch
  3 | import pretrainedmodels
  4 | import pretrainedmodels.utils as utils
  5 | import torch.nn as nn
  6 | import argparse
  7 | import time
  8 | import data.validate_feats as validate_feats
  9 | import os
 10 | import numpy as np
 11 | import logging
 12 | import shutil
 13 | 
 14 | from multiprocessing import Pool
 15 | 
 16 | logging.basicConfig()
 17 | logger = logging.getLogger(__name__)
 18 | logger.setLevel(logging.DEBUG)
 19 | 
 20 | available_features = ['nasnetalarge', 'resnet152', 'pnasnet5large', 'densenet121', 'senet154', 'polynet']
 21 | 
 22 | args = None
 23 | 
 24 | 
 25 | def init_model(gpu_ids, model_name):
 26 |     # model_name = 'pnasnet5large'
 27 |     # could be fbresnet152 or inceptionresnetv2
 28 |     model = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
 29 |     model.eval()
 30 |     load_img = utils.LoadImage()
 31 | 
 32 |     # transformations depending on the model
 33 |     # rescale, center crop, normalize, and others (ex: ToBGR, ToRange255)
 34 |     tf_img = utils.TransformImage(model)
 35 | 
 36 |     """
 37 |     TODO(WG): Would be nice to use something like DataParallel, but that only does forward pass on given module.
 38 |     Need to stop before logits step. 
 39 |     Should create wrapper for pretrainedmodels that does the MPI-like ops across GPUs on model.features modules:
 40 |     1) replicated
 41 |     2) scatter
 42 |     3) parallel_apply
 43 |     4) gather
 44 |     Would have to know what layers are being used on each model. 
 45 |     """
 46 |     if torch.cuda.is_available():
 47 |         model = model.cuda(device=gpu_ids[0])
 48 | 
 49 |     return load_img, tf_img, model
 50 | 
 51 | 
 52 | def extract_features(args):
 53 |     root_frames_dir = args.frames_dir
 54 |     root_feats_dir = args.feats_dir
 55 |     work = args.work
 56 |     autofill = int(args.autofill)
 57 |     ftype = args.type
 58 |     gpu_list = args.gpu_list
 59 | 
 60 |     frames_dirs = os.listdir(root_frames_dir)
 61 | 
 62 |     if not os.path.isdir(root_feats_dir):
 63 |         os.mkdir(root_feats_dir)
 64 |     # else:
 65 |     #     if autofill:
 66 |     #         logger.info('AUTOFILL ON: Attempting to autofill missing features.')
 67 |     #         frames_dirs = validate_feats.go(featsd=root_feats_dir, framesd=root_frames_dir)
 68 | 
 69 |     # Difficulty of each job is measured by # of frames to process in each chunk.
 70 |     # Can't be randomized since autofill list woudld be no longer valid.
 71 |     # np.random.shuffle(frames_dirs)
 72 |     work = len(frames_dirs) if not work else work
 73 | 
 74 |     load_img, tf_img, model = init_model(args.gpu_list, args.type)
 75 | 
 76 |     work_done = 0
 77 |     while work_done != work:
 78 |         frames_dirs_avail = diff_feats(root_frames_dir, root_feats_dir)
 79 |         if len(frames_dirs_avail) == 0:
 80 |             break
 81 | 
 82 |         frames_dir = np.random.choice(frames_dirs_avail)
 83 |         ext = '.' + frames_dir.split('.')[-1]
 84 |         feat_filename = frames_dir.split('/')[-1].split(ext)[0]
 85 |         video_feats_path = os.path.join(args.feats_dir, feat_filename)
 86 | 
 87 |         if os.path.exists(video_feats_path):
 88 |             logger.info('Features already extracted:\t{}'.format(video_feats_path))
 89 |             continue
 90 | 
 91 |         try:
 92 |             frames_to_do = [os.path.join(args.frames_dir, frames_dir, p) for p in
 93 |                             os.listdir(os.path.join(args.frames_dir, frames_dir))]
 94 |         except Exception as e:
 95 |             logger.exception(e)
 96 |             continue
 97 | 
 98 |         # Must sort so frames follow numerical order. os.listdir does not guarantee order.
 99 |         frames_to_do.sort()
100 | 
101 |         if len(frames_to_do) == 0:
102 |             logger.warning("Frame folder has no frames! Skipping...")
103 |             continue
104 | 
105 |         # Save a flag copy
106 |         with open(video_feats_path, 'wb') as pf:
107 |             np.save(pf, [])
108 | 
109 |         try:
110 |             batches = create_batches(frames_to_do, load_img, tf_img, batch_size=args.batch_size)
111 |         except OSError as e:
112 |             logger.exception(e)
113 |             logger.warning("Corrupt image file. Skipping...")
114 |             os.remove(video_feats_path)
115 |             continue
116 | 
117 |         logger.debug("Start video {}".format(work_done))
118 | 
119 |         feats = process_batches(batches, ftype, gpu_list, model)
120 | 
121 |         with open(video_feats_path, 'wb') as pf:
122 |             np.save(pf, feats)
123 |             logger.info('Saved complete features to {}.'.format(video_feats_path))
124 |         work_done += 1
125 | 
126 | 
127 | def process_batches(batches, ftype, gpu_list, model):
128 |     done_batches = []
129 |     for i, batch in enumerate(batches):
130 |         if torch.cuda.is_available():
131 |             batch = batch.cuda(device=gpu_list[0])
132 | 
133 |         output_features = model.features(batch)
134 |         output_features = output_features.data.cpu()
135 | 
136 |         conv_size = output_features.shape[-1]
137 | 
138 |         if ftype == 'nasnetalarge' or ftype == 'pnasnet5large':
139 |             relu = nn.ReLU()
140 |             rf = relu(output_features)
141 |             avg_pool = nn.AvgPool2d(conv_size, stride=1, padding=0)
142 |             out_feats = avg_pool(rf)
143 |         else:
144 |             avg_pool = nn.AvgPool2d(conv_size, stride=1, padding=0)
145 |             out_feats = avg_pool(output_features)
146 | 
147 |         out_feats = out_feats.view(out_feats.size(0), -1)
148 |         logger.info('Processed {}/{} batches.\r'.format(i + 1, len(batches)))
149 | 
150 |         done_batches.append(out_feats)
151 |     feats = np.concatenate(done_batches, axis=0)
152 |     return feats
153 | 
154 | 
155 | def create_batches(frames_to_do, load_img_fn, tf_img_fn, batch_size=8):
156 |     n = len(frames_to_do)
157 |     if n < batch_size:
158 |         logger.warning("Sample size less than batch size: Cutting batch size.")
159 |         batch_size = n
160 | 
161 |     logger.info("Generating {} batches...".format(n // batch_size))
162 |     batches = []
163 |     frames_to_do = np.array(frames_to_do)
164 | 
165 |     for idx in range(0, n, batch_size):
166 |         frames_idx = list(range(idx, min(idx+batch_size, n)))
167 |         batch_frame_paths = frames_to_do[frames_idx]
168 | 
169 |         batch_tensor = torch.zeros((len(batch_frame_paths),) + tuple(tf_img_fn.input_size))
170 |         for i, frame_path in enumerate(batch_frame_paths):
171 |             input_img = load_img_fn(frame_path)
172 |             input_tensor = tf_img_fn(input_img)  # 3x400x225 -> 3x299x299 size may differ
173 |             # input_tensor = input_tensor.unsqueeze(0)  # 3x299x299 -> 1x3x299x299
174 |             batch_tensor[i] = input_tensor
175 | 
176 |         batch_ag = torch.autograd.Variable(batch_tensor, requires_grad=False)
177 |         batches.append(batch_ag)
178 | 
179 |     return batches
180 | 
181 | 
182 | def diff_feats(frames_dir, feats_dir):
183 |     feats = set(os.listdir(feats_dir))
184 |     frames_to_ext = {'.'.join(i.split('.')[:-1]): i.split('.')[-1] for i in os.listdir(frames_dir)}
185 |     frames = set(frames_to_ext.keys())
186 |     needed_feats = frames - feats
187 |     needed_feats = [i + '.' + frames_to_ext[i] for i in needed_feats]
188 |     return needed_feats
189 | 
190 | 
191 | if __name__ == '__main__':
192 |     arg_parser = argparse.ArgumentParser()
193 |     arg_parser.add_argument('frames_dir',help = 'Directory where there are frame directories.')
194 |     arg_parser.add_argument('feats_dir',help = 'Root directory of dataset\'s processed videos.')
195 |     arg_parser.add_argument('-w', '--work', help = 'Number of features to process. Defaults to all.', default=0, type=int)
196 |     arg_parser.add_argument('-gl', '--gpu_list', required=True, nargs='+', type=int, help="Space delimited list of GPU indices to use. Example for 4 GPUs: -gl 0 1 2 3")
197 |     arg_parser.add_argument('-bs', '--batch_size', type=int, help="Batch size to use during feature extraction. Larger batch size = more VRAM usage", default=8)
198 |     arg_parser.add_argument('--type', required=True, help = 'ConvNet to use for processing features.', choices=available_features)
199 |     arg_parser.add_argument('--autofill', action='store_true', default=False, help="Perform diff between frames_dir and feats_dir and fill them in.")
200 | 
201 |     args = arg_parser.parse_args()
202 | 
203 |     start_time = time.time()
204 | 
205 |     logger.info("Found {} GPUs, using {}.".format(torch.cuda.device_count(), len(args.gpu_list)))
206 | 
207 |     extract_features(args)
208 | 
209 |     logger.info("Job took %s mins" % ((time.time() - start_time)/60))
210 | 


--------------------------------------------------------------------------------
/data/subsect_videos.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import re
  5 | import argparse
  6 | from math import floor
  7 | from multiprocessing import Pool
  8 | 
  9 | 
 10 | def do_command(command):
 11 |     os.system(command)
 12 | 
 13 | 
 14 | def general_case(args):
 15 |     if args.annots_path.endswith('.json'):
 16 |         # Load user specified json file.
 17 |         json_file = open(args.annots_path)
 18 |     else:
 19 |         json_file = open(os.path.join(args.annots_path, 'videodatainfo_2017.json'))
 20 | 
 21 |     json_str = json_file.read()
 22 |     json_data = json.loads(json_str)
 23 | 
 24 |     src_dir = args.src_dir
 25 |     dst_dir = args.dst_dir
 26 |     start = int(args.start)
 27 |     end = int(args.end)
 28 | 
 29 |     src_files = os.listdir(src_dir)
 30 | 
 31 | 
 32 |     if not os.path.isdir(dst_dir):
 33 |         os.mkdir(dst_dir)
 34 | 
 35 |     command_list = []
 36 | 
 37 |     for video_file in src_files[start:end]:
 38 |         # Get index from video file name
 39 |         video_index = int(re.findall('\d+', video_file)[0])
 40 | 
 41 |         # Two scenarios:
 42 |         #       Subsecting training videos, which go video0 to video9999
 43 |         #       Subsecting test videos, which go video10000 to vieo12999
 44 |         # To account for either case, take mod 10000 to get the correct 0-based index to use in json lookup.
 45 |         video_index %= 10000
 46 | 
 47 |         start_time = float(json_data['videos'][video_index]['start time'])
 48 |         end_time = float(json_data['videos'][video_index]['end time'])
 49 |         duration = end_time - start_time
 50 | 
 51 |         src_path = os.path.join(src_dir, video_file)
 52 | 
 53 |         dst_path = os.path.join(dst_dir, video_file)
 54 | 
 55 |         if os.path.isfile(dst_path):
 56 |             print 'File at {} already exists!'.format(dst_path)
 57 |             continue
 58 | 
 59 |         ffmpeg_subsection_cmd = "ffmpeg -ss {} -i {} -t {} -vcodec copy -acodec copy {}".format(
 60 |             start_time, src_path, duration, dst_path)
 61 |         command_list.append(ffmpeg_subsection_cmd)
 62 | 
 63 |     threadPool = Pool()
 64 |     threadPool.map(do_command, command_list)
 65 |     threadPool.close()
 66 |     threadPool.join()
 67 | 
 68 | 
 69 | def tacos(args):
 70 |     def frame_to_timeestamp(frame_rate, frame_num):
 71 |         return float("%.3f" % (float(frame_num) / float(frame_rate)))
 72 | 
 73 |     if args.annots_path.endswith('.tsv'):
 74 |         # Load user specified json file.
 75 |         tsv_file = open(args.annots_path)
 76 |     else:
 77 |         tsv_file = open(os.path.join(args.annots_path, 'index.tsv'))
 78 | 
 79 |     data = [i for i in tsv_file]
 80 | 
 81 |     src_dir = args.src_dir
 82 |     dst_dir = args.dst_dir
 83 |     start = int(args.start)
 84 |     end = int(args.end)
 85 | 
 86 |     if not os.path.isdir(dst_dir):
 87 |         os.makedirs(dst_dir)
 88 | 
 89 |     command_list = []
 90 | 
 91 |     for line in data:
 92 |         groups = line.replace('\n', '').split('\t')
 93 |         dest_vid = groups[0]
 94 |         sentence = groups[1]
 95 |         src_vid = groups[2]
 96 |         start_frame = float(groups[3])
 97 |         end_frame = float(groups[4])
 98 | 
 99 |         start_time = frame_to_timeestamp(29.40, start_frame)
100 |         duration_time = frame_to_timeestamp(29.40, end_frame - start_frame)
101 |         src_path = os.path.join(src_dir, src_vid + '.avi')
102 |         dst_path = os.path.join(dst_dir, dest_vid + '.avi')
103 | 
104 |         if os.path.isfile(dst_path):
105 |             print 'File at {} already exists!'.format(dst_path)
106 |             continue
107 | 
108 |         ffmpeg_subsection_cmd = "ffmpeg -ss {} -i {} -t {} -vcodec copy -acodec copy {}".format(
109 |             start_time, src_path, duration_time, dst_path)
110 |         command_list.append(ffmpeg_subsection_cmd)
111 | 
112 |     threadPool = Pool()
113 |     threadPool.map(do_command, command_list)
114 |     threadPool.close()
115 |     threadPool.join()
116 | 
117 | 
118 | if __name__ == '__main__':
119 |     arg_parser = argparse.ArgumentParser()
120 |     arg_parser.add_argument('src_dir', help='directory where to get full videos')
121 |     arg_parser.add_argument('dst_dir',help = 'directory where to store subsections')
122 |     arg_parser.add_argument('annots_path', help='directory where annotations file is stored')
123 |     arg_parser.add_argument('start',help = 'start video index')
124 |     arg_parser.add_argument('end',help = 'end video index')
125 |     arg_parser.add_argument('--dataset', help='dataset being worked on')
126 | 
127 |     args = arg_parser.parse_args()
128 | 
129 |     if args.dataset == 'tacos':
130 |         tacos(args)
131 |     else:
132 |         general_case(args)
133 | 


--------------------------------------------------------------------------------
/data/util.py:
--------------------------------------------------------------------------------
  1 | import cPickle
  2 | import os
  3 | import numpy as np
  4 | import sys
  5 | import logging
  6 | 
  7 | from collections import OrderedDict
  8 | 
  9 | 
 10 | logging.basicConfig()
 11 | logger = logging.getLogger(__name__)
 12 | logger.setLevel(logging.DEBUG)
 13 | 
 14 | 
 15 | def dump_pkl(obj, path):
 16 |     """
 17 |     Save a Python object into a pickle file.
 18 |     """
 19 |     f = open(path, 'wb')
 20 |     try:
 21 |         cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
 22 | 
 23 |     finally:
 24 |         f.close()
 25 |     print path+' created'
 26 | 
 27 | 
 28 | def load_pkl(path):
 29 |     """
 30 |     Load a pickled file.
 31 | 
 32 |     :param path: Path to the pickled file.
 33 | 
 34 |     :return: The unpickled Python object.
 35 |     """
 36 |     f = open(path, 'rb')
 37 |     try:
 38 |         rval = cPickle.load(f)
 39 |     finally:
 40 |         f.close()
 41 |     return rval
 42 | 
 43 | def create_dictionary(annotations,pkl_dir):
 44 |     worddict = OrderedDict()
 45 |     word_idx = 2
 46 |     for a in annotations:
 47 |         caps = annotations[a]
 48 | 
 49 |         for cap in caps:
 50 |             tokens = cap['tokenized'].split()
 51 |             for token in tokens:
 52 |                 if token not in ['','\t','\n',' ']:
 53 |                     if not worddict.has_key(token):
 54 |                         worddict[token]=word_idx
 55 |                         word_idx+=1
 56 | 
 57 |     return worddict
 58 | 
 59 | 
 60 | def pad_frames(frames, limit):
 61 |     last_frame = frames[-1]
 62 |     padding = np.asarray([last_frame * 0.]*(limit-len(frames)))
 63 |     frames_padded = np.concatenate([frames, padding], axis=0)
 64 |     return frames_padded
 65 | 
 66 | 
 67 | def extract_frames_equally_spaced(frames, K):
 68 |     # chunk frames into 'how_many' segments and use the first frame
 69 |     # from each segment
 70 |     n_frames = len(frames)
 71 |     splits = np.array_split(range(n_frames), K)
 72 |     idx_taken = [s[0] for s in splits]
 73 |     sub_frames = frames[idx_taken]
 74 |     return sub_frames
 75 | 
 76 | 
 77 | def get_sub_frames(frames):
 78 | 
 79 |     K=28
 80 |     if len(frames) < K:
 81 |         frames_ = pad_frames(frames, K)
 82 |     else:
 83 |         frames_ = extract_frames_equally_spaced(frames, K)
 84 | 
 85 |     return frames_
 86 | 
 87 | 
 88 | def load_c3d_feat(feat_file_path):
 89 |     if os.path.exists(feat_file_path):
 90 |         files = os.listdir(feat_file_path)
 91 |         files.sort()
 92 |         allftrs = np.zeros((len(files), 4101),dtype=np.float32)
 93 | 
 94 |         for j in range(0, len(files)):
 95 |             feat = np.fromfile(os.path.join(feat_file_path, files[j]),dtype=np.float32)
 96 |             allftrs[j,:] = feat
 97 |         allftrs = get_sub_frames(allftrs)
 98 | 
 99 |         return allftrs
100 |     else:
101 |         print 'error feature file doesnt exist'+feat_file_path
102 |         sys.exit(0)
103 | 
104 | 
105 | def mkdirs_safe(dir):
106 |     try:
107 |         if not os.path.isdir(dir):
108 |             os.makedirs(dir)
109 |     except OSError as e:
110 |         logger.exception(e)
111 | 
112 | 
113 | def create_line(seed, dataset, annots_dir, feature_type, pickle_dir, feature_dir, feature_test_dir, ut=0, st=False):
114 |     if dataset == 'mvad' or dataset == 'mpii' or dataset == 'lsmdc16':
115 |         line = "python create_mvad_mpii_lsmdc.py "
116 |         line += "-s {} ".format(seed)
117 |         line += "-d {} ".format(annots_dir)
118 |         line += "-p {} ".format(pickle_dir)
119 |         line += "-dbname {} ".format(dataset)
120 |     elif dataset == 'tacos':
121 |         line = "python create_tacos.py "
122 |         line += "-s {} ".format(seed)
123 |         line += "-f {} ".format(feature_dir)
124 |         line += "-gt {} ".format(annots_dir)
125 |         line += "-p {} ".format(pickle_dir)
126 |     elif dataset == 'youtube2text':
127 |         line = "python create_y2t.py "
128 |         line += "-s {} ".format(seed)
129 |         line += "-f {} ".format(feature_dir)
130 |         line += "-j {} ".format(annots_dir)
131 |         line += "-p {} ".format(pickle_dir)
132 |         line += "-type {} ".format(feature_type)
133 |     elif dataset == 'vtt16':
134 |         line = "python create_msr_vtt.py "
135 |         line += "-s {} ".format(seed)
136 |         line += "-f {} ".format(feature_dir)
137 |         line += "-ft {} ".format(feature_test_dir)
138 |         line += "-j {} ".format(annots_dir)
139 |         line += "-p {} ".format(pickle_dir)
140 |         line += "-type {} ".format(feature_type)
141 |         line += "-v 2016 "
142 |         line += "-ws "
143 |     elif dataset == 'vtt17':
144 |         line = "python create_msr_vtt.py "
145 |         line += "-s {} ".format(seed)
146 |         line += "-f {} ".format(feature_dir)
147 |         line += "-ft {} ".format(feature_test_dir)
148 |         line += "-j {} ".format(annots_dir)
149 |         line += "-p {} ".format(pickle_dir)
150 |         line += "-type {} ".format(feature_type)
151 |         line += "-v 2017 "
152 |         line += "-ws "
153 |     elif dataset == 'trecvid':
154 |         line = "python create_trecvid.py "
155 |         line += "-s {} ".format(seed)
156 |         line += "-f {} ".format(feature_dir)
157 |         line += "-gt {} ".format(annots_dir)
158 |         line += "-p {} ".format(pickle_dir)
159 |         line += "-type {} ".format(feature_type)
160 |     else:
161 |         raise NotImplementedError("Dataset not implemented: {}".format(dataset))
162 | 
163 |     if ut:
164 |         line += "-t {} ".format(ut)
165 |     if st:
166 |         line += "-st "
167 | 
168 |     return line
169 | 


--------------------------------------------------------------------------------
/data/validate_feats.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import logging
 4 | 
 5 | logging.basicConfig()
 6 | logger = logging.getLogger(__name__)
 7 | logger.setLevel(logging.DEBUG)
 8 | 
 9 | 
10 | def go(args=None, featsd=None, framesd=None):
11 | 
12 |     logger.info("\nParsing frame and feature directories.")
13 |     if args is not None:
14 |         feats_dir = args.feats_dir
15 |         frames_dir = args.frames_dir
16 |     else:
17 |         feats_dir = featsd
18 |         frames_dir = framesd
19 | 
20 |     feats = set(os.listdir(feats_dir))
21 |     # '.'.join(i.split('.')[:-1]): Get video name up to the extension (last group)
22 |     frames_to_ext = {'.'.join(i.split('.')[:-1]): i.split('.')[-1] for i in os.listdir(frames_dir)}
23 |     frames = set(frames_to_ext.keys())
24 | 
25 |     logger.info('There are {} feature files and {} frame folders.'.format(len(feats), len(frames)))
26 |     assert len(frames) >= len(feats)
27 | 
28 |     logger.info("Validate existing features...")
29 |     bad_feats = set()
30 |     invalid_paths = []
31 |     sizes = {}
32 | 
33 |     for feat in feats:
34 |         fpath = os.path.join(feats_dir, feat)
35 |         stat = os.stat(fpath)
36 |         sizes[fpath] = stat.st_size
37 | 
38 |         if stat.st_size <= 130:  # Empty npy file is usually 80 bytes. Flag file is 128
39 |             bad_feats.add(feat)
40 |             invalid_paths.append(fpath)
41 | 
42 |     if bad_feats:
43 |         logger.warning("There are {} nil features.".format(len(bad_feats)))
44 |         feats = feats - bad_feats
45 |         logger.info("Invalid paths start:")
46 |         for fpath in invalid_paths:
47 |             print("-> " + fpath)
48 |             if args.rm_nil:
49 |                 os.remove(fpath)
50 |                 print("--> Removed!")
51 |     else:
52 |         logger.info("Existing features are valid (filesize > 130B).")
53 | 
54 |     if sizes:
55 |         logger.info("Smallest feature was {} Bytes\n------------------".format(min(sizes.values())))
56 | 
57 |     logger.info("In total, there are {} missing features.".format(len(frames - feats)))
58 | 
59 |     if args is None:
60 |         needed_feats = frames - feats
61 |         # Put back together extension since intersection is finished
62 |         needed_feats = [i + '.' + frames_to_ext[i] for i in needed_feats]
63 |         return needed_feats
64 | 
65 | 
66 | if __name__ == '__main__':
67 |     ap = argparse.ArgumentParser()
68 |     ap.add_argument('frames_dir', help='Frames directory')
69 |     ap.add_argument('feats_dir', help='Features directory')
70 |     ap.add_argument('-rm', '--rm_nil', help="Remove nil/invalid features.", default=False, action='store_true')
71 | 
72 |     args = ap.parse_args()
73 | 
74 |     go(args=args)
75 | 


--------------------------------------------------------------------------------
/data_engine.py:
--------------------------------------------------------------------------------
  1 | import gzip
  2 | import os, socket, shutil
  3 | import sys, re
  4 | import time
  5 | from collections import OrderedDict
  6 | import numpy
  7 | # import tables
  8 | import theano
  9 | import theano.tensor as T
 10 | import common
 11 | import numpy as np
 12 | 
 13 | # sys.path.append('skip-thoughts')
 14 | # import skipthoughts
 15 | from scipy import spatial
 16 | from nltk.corpus import stopwords
 17 | 
 18 | from multiprocessing import Process, Queue, Manager
 19 | 
 20 | hostname = socket.gethostname()
 21 | 
 22 |                 
 23 | class Movie2Caption(object):
 24 |             
 25 |     def __init__(self, model_type, signature, video_feature,
 26 |                  mb_size_train, mb_size_test, maxlen, n_words,dec,proc,
 27 |                  n_frames=None, outof=None, data_dir='', feats_dir=''
 28 |                  ):
 29 |         self.signature = signature
 30 |         self.model_type = model_type
 31 |         self.video_feature = video_feature
 32 |         self.maxlen = maxlen
 33 |         self.n_words = n_words
 34 |         self.K = n_frames
 35 |         self.OutOf = outof
 36 |         self.dec = dec
 37 | 
 38 |         self.mb_size_train = mb_size_train
 39 |         self.mb_size_test = mb_size_test
 40 |         self.non_pickable = []
 41 |         self.proc = proc
 42 |         self.host = socket.gethostname()
 43 |         self.data_dir=data_dir
 44 |         self.feats_dir = feats_dir
 45 | 
 46 |         # self.test_mode = 0 #don't chage this when in production
 47 |         self.load_data()
 48 |         
 49 | 
 50 | 
 51 |         if dec=='multi-stdist':
 52 |             # self.st_model = skipthoughts.load_model()  #refactoring ...
 53 |             # vectors = skipthoughts.encode(engine.st_model, captions)
 54 | 
 55 |             self.cap_distances = {}
 56 | 
 57 |         
 58 |     def _filter_feature(self, vidID):
 59 |         feat = self.FEAT[vidID]
 60 |         # print vidID
 61 |         # print feat
 62 |         feat = self.get_sub_frames(feat)
 63 |         return feat
 64 | 
 65 |     def _filter_c3d_resnet(self, vidID):
 66 |         feat = self.FEAT[vidID]
 67 |         feat2 = self.FEAT2[vidID]
 68 |         # print vidID
 69 |         # print feat
 70 |         feat = self.get_sub_frames(feat)
 71 |         feat2 = self.get_sub_frames(feat2)
 72 | 
 73 |         cfeat =np.concatenate((feat,feat2),axis=1)
 74 |         return cfeat
 75 | 
 76 |     def _load_feat_file(self, vidID):
 77 | 
 78 |         # feats_dir =os.path.join(data_dir,'features_chal')
 79 |         feat = []
 80 |         feats_dir = self.feats_dir
 81 | 
 82 |         feat_filename = vidID#files.split('/')[-1].split('.avi')[0]
 83 |         feat_file_path = os.path.join(feats_dir,feat_filename)
 84 | 
 85 |         if os.path.exists(feat_file_path):
 86 |             feat = np.load(feat_file_path)
 87 | 
 88 |             if len(feat) > 0:
 89 |                 feat = self.get_sub_frames(feat)
 90 |             else:
 91 |                 print 'feature file is empty '+feat_file_path
 92 |                 print feat
 93 |         else:
 94 |             print 'error feature file doesnt exist'+feat_file_path
 95 | 
 96 | 
 97 |         return feat
 98 | 
 99 |     def _load_c3d_feat_file(self,vidID):
100 |         feats_dir = 'vid-desc/vtt/features_c3d'
101 |         feat_filename = vidID
102 |         feat_file_path = os.path.join(feats_dir,feat_filename)
103 | 
104 |         if os.path.exists(feat_file_path):
105 |             files = os.listdir(feat_file_path)
106 |             files.sort()
107 |             allftrs = np.zeros((len(files), 4101),dtype=np.float32)
108 | 
109 |             for j in range(0, len(files)):
110 | 
111 |                 feat = np.fromfile(os.path.join(feat_file_path, files[j]),dtype=np.float32)
112 |                 allftrs[j,:] = feat
113 |             allftrs = self.get_sub_frames(allftrs)
114 | 
115 |             return allftrs
116 |         else:
117 |             print 'error feature file doesnt exist'+feat_file_path
118 |             sys.exit(0)
119 | 
120 | 
121 |     def get_video_features(self, vidID):
122 |         # hack to be fixed
123 |         available_features = ['googlenet', 'resnet', 'c3d', 'resnet152', 'nasnetalarge', 'pnasnet5large', 'densenet152', 'polynet', 'senet154']
124 |         if self.video_feature in available_features:
125 |             if self.signature == 'youtube2text' or self.signature == 'ysvd' or self.signature == 'vtt16' or self.signature == 'vtt17' or self.signature == 'trecvid':
126 |                 y = self._filter_feature(vidID)
127 |             elif self.signature == 'lsmdc' or self.signature == 'lsmdc16' or self.signature == 'mpii' or self.signature == 'mvad' or self.signature == 'tacos':
128 |                 y = self._load_feat_file(vidID) #this is for large datasets, needs to be fixed with something better. Mpii might need this..
129 |             # elif self.signature == 'vtt':
130 |             #     y = self._load_c3d_feat_file(vidID)
131 |             else:
132 |                 raise NotImplementedError()
133 |         elif self.video_feature == 'c3d_resnet':
134 |             y = self._filter_c3d_resnet(vidID)
135 |         else:
136 |             raise NotImplementedError()
137 |         return y
138 | 
139 |     def pad_frames(self, frames, limit, jpegs):
140 |         # pad frames with 0, compatible with both conv and fully connected layers
141 |         last_frame = frames[-1]
142 |         if jpegs:
143 |             frames_padded = frames + [last_frame]*(limit-len(frames))
144 |         else:
145 |             padding = numpy.asarray([last_frame * 0.]*(limit-len(frames)))
146 |             frames_padded = numpy.concatenate([frames, padding], axis=0)
147 |         return frames_padded
148 |     
149 |     def extract_frames_equally_spaced(self, frames, how_many):
150 |         # chunk frames into 'how_many' segments and use the first frame
151 |         # from each segment
152 |         n_frames = len(frames)
153 |         splits = numpy.array_split(range(n_frames), self.K)
154 |         idx_taken = [s[0] for s in splits]
155 |         sub_frames = frames[idx_taken]
156 |         return sub_frames
157 |     
158 |     def add_end_of_video_frame(self, frames):
159 |         if len(frames.shape) == 4:
160 |             # feat from conv layer
161 |             _,a,b,c = frames.shape
162 |             eos = numpy.zeros((1,a,b,c),dtype='float32') - 1.
163 |         elif len(frames.shape) == 2:
164 |             # feat from full connected layer
165 |             _,b = frames.shape
166 |             eos = numpy.zeros((1,b),dtype='float32') - 1.
167 |         else:
168 |             import pdb; pdb.set_trace()
169 |             raise NotImplementedError()
170 |         frames = numpy.concatenate([frames, eos], axis=0)
171 |         return frames
172 |     
173 |     def get_sub_frames(self, frames, jpegs=False):
174 |         # from all frames, take K of them, then add end of video frame
175 |         # jpegs: to be compatible with visualizations
176 |         if self.OutOf:
177 |             raise NotImplementedError('OutOf has to be None')
178 |             frames_ = frames[:self.OutOf]
179 |             if len(frames_) < self.OutOf:
180 |                 frames_ = self.pad_frames(frames_, self.OutOf, jpegs)
181 |         else:
182 |             if len(frames) < self.K:
183 |                 #frames_ = self.add_end_of_video_frame(frames)
184 | 
185 |                 frames_ = self.pad_frames(frames, self.K, jpegs)
186 | 
187 |             else:
188 | 
189 |                 frames_ = self.extract_frames_equally_spaced(frames, self.K)
190 |                 #frames_ = self.add_end_of_video_frame(frames_)
191 |         if jpegs:
192 |             frames_ = numpy.asarray(frames_)
193 |         return frames_
194 | 
195 |     def prepare_data_for_blue(self, whichset):
196 |         # assume one-to-one mapping between ids and features
197 |         feats = []
198 |         feats_mask = []
199 |         if whichset == 'valid':
200 |             ids = self.valid_ids
201 |         elif whichset == 'test':
202 |             ids = self.test_ids
203 |         elif whichset == 'train':
204 |             ids = self.train_ids
205 |         elif whichset == 'blind':
206 |             ids = self.btest_ids
207 | 
208 |         for i, vidID in enumerate(ids):
209 |             feat = self.get_video_features(vidID)
210 |             feats.append(feat)
211 |             feat_mask = self.get_ctx_mask(feat)
212 |             feats_mask.append(feat_mask)
213 |             # print i, vidID
214 |         return feats, feats_mask
215 |     
216 |     def get_ctx_mask(self, ctx):
217 |         if ctx.ndim == 3:
218 |             rval = (ctx[:,:,:self.ctx_dim].sum(axis=-1) != 0).astype('int32').astype('float32')
219 |         elif ctx.ndim == 2:
220 |             rval = (ctx[:,:self.ctx_dim].sum(axis=-1) != 0).astype('int32').astype('float32')
221 |         elif ctx.ndim == 5 or ctx.ndim == 4:
222 |             assert self.video_feature == 'oxfordnet_conv3_512'
223 |             # in case of oxfordnet features
224 |             # (m, 26, 512, 14, 14)
225 |             rval = (ctx.sum(-1).sum(-1).sum(-1) != 0).astype('int32').astype('float32')
226 |         else:
227 |             import pdb; pdb.set_trace()
228 |             raise NotImplementedError()
229 |         
230 |         return rval
231 |     
232 |     def load_feats(self,dataset_path):
233 |         if self.video_feature=='c3d':
234 |             if self.proc=='pca':
235 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d_'+self.proc+'.pkl'))
236 |             elif self.proc=='pca512':
237 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d_'+self.proc+'.pkl'))
238 |             elif self.proc=='pca_c3d':
239 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d_pca.pkl'))
240 |             else:
241 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d.pkl'))
242 | 
243 |         elif self.video_feature=='c3d_resnet':
244 |             if self.proc=='pca':
245 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d_'+self.proc+'.pkl'))
246 |                 self.FEAT2 = common.load_pkl(os.path.join(dataset_path , 'FEATS_resnet_'+self.proc+'.pkl'))
247 |             elif self.proc=='pca512':
248 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d_'+self.proc+'.pkl'))
249 |                 self.FEAT2 = common.load_pkl(os.path.join(dataset_path ,'FEATS_resnet_'+self.proc+'.pkl'))
250 |             elif self.proc=='pca_c3d':
251 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d_pca.pkl'))
252 |                 self.FEAT2 = common.load_pkl(os.path.join(dataset_path ,'FEATS_resnet_nostd.pkl'))
253 |             else:
254 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_c3d.pkl'))
255 |                 self.FEAT2 = common.load_pkl(os.path.join(dataset_path ,'FEATS_resnet.pkl'))
256 | 
257 |         elif self.video_feature == 'googlenet':
258 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_googlenet.pkl'))
259 |         elif self.video_feature == 'resnet':
260 |             if self.proc=='pca':
261 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_resnet_'+self.proc+'.pkl'))
262 |             else:
263 |                 self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_resnet.pkl'))
264 |         elif self.video_feature == 'nasnetalarge':
265 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_nasnetalarge.pkl'))
266 |         elif self.video_feature == 'resnet152':
267 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_resnet152.pkl'))
268 |         elif self.video_feature == 'pnasnet5large':
269 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_pnasnet5large.pkl'))
270 |         elif self.video_feature == 'polynet':
271 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_polynet.pkl'))
272 |         elif self.video_feature == 'senet154':
273 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEATS_senet154.pkl'))
274 |         else:
275 |             self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEATS_'+self.proc+'.pkl'))
276 |         return self
277 |         
278 |     def load_data(self):
279 | 
280 | 
281 |         if self.signature == 'youtube2text' or self.signature == 'trecvid':
282 |             print 'loading {} {} features'.format(self.signature, self.video_feature)
283 |             if self.data_dir=='':
284 |                 dataset_path = common.get_rab_dataset_base_path()+'youtube2text/'+self.video_feature
285 |             else:
286 |                 dataset_path = self.data_dir
287 | 
288 |             # dataset_path = common.get_rab_dataset_base_path()
289 |             self.train = common.load_pkl(os.path.join(dataset_path ,'train.pkl'))
290 |             self.valid = common.load_pkl(os.path.join(dataset_path ,'valid.pkl'))
291 |             self.test = common.load_pkl(os.path.join(dataset_path ,'test.pkl'))
292 |             self.CAP = common.load_pkl(os.path.join(dataset_path , 'CAP.pkl'))
293 | 
294 | 
295 |             # self.FEAT = common.load_pkl(os.path.join(dataset_path , 'FEAT_key_vidID_value_features_'+self.proc+'.pkl'))
296 |             self.load_feats(dataset_path)
297 | 
298 |             self.train_ids = list(set(self.train[i].split('_')[0] for i in range(len(self.train))))
299 |             self.valid_ids = list(set(self.valid[i].split('_')[0] for i in range(len(self.valid))))
300 |             self.test_ids = list(set(self.test[i].split('_')[0] for i in range(len(self.test))))
301 | 
302 | 
303 |         elif self.signature == 'lsmdc' or self.signature == 'lsmdc16' or self.signature == 'mvad' or self.signature == 'mpii' or self.signature == 'tacos':
304 |             print 'loading {} {} features'.format(self.signature, self.video_feature)
305 |             dataset_path = self.data_dir
306 |             self.train = common.load_pkl(os.path.join(dataset_path, 'train.pkl'))
307 |             self.valid = common.load_pkl(os.path.join(dataset_path, 'valid.pkl'))
308 |             self.test = common.load_pkl(os.path.join(dataset_path, 'test.pkl'))
309 |             self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl'))
310 | 
311 |             self.train_ids = self.train
312 |             self.valid_ids = self.valid
313 |             self.test_ids = self.test
314 | 
315 |             if self.signature == 'lsmdc16':
316 |                 self.btest = common.load_pkl(os.path.join(dataset_path, 'blindtest.pkl'))
317 |                 self.btest_ids = self.btest
318 | 
319 | 
320 |         elif self.signature == 'ysvd':
321 |             print 'loading ysvd %s features'%self.video_feature
322 |             dataset_path = common.get_rab_dataset_base_path()+'ysvd/'
323 | 
324 |             self.all = common.load_pkl(os.path.join(dataset_path, 'all_vids.pkl'))
325 |             self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl'))
326 |             self.FEAT = common.load_pkl(os.path.join(dataset_path, 'FEAT_key_vidID_value_features.pkl'))
327 | 
328 |             self.train = self.all[0:500]
329 |             self.valid = self.all[501:750]
330 |             self.test = self.all[751:1000]
331 | 
332 |             self.train_ids = self.train
333 |             self.valid_ids = self.valid
334 |             self.test_ids = self.test
335 | 
336 |         elif self.signature == 'vtt16' or self.signature == 'vtt17':
337 |             print 'loading {} {} features'.format(self.signature, self.video_feature)
338 | 
339 |             if self.data_dir=='':
340 |                 dataset_path = common.get_rab_dataset_base_path()+'vtt/'+self.video_feature
341 |             else:
342 |                 dataset_path = self.data_dir
343 | 
344 |             self.train = common.load_pkl(os.path.join(dataset_path, 'train.pkl'))
345 |             self.valid = common.load_pkl(os.path.join(dataset_path, 'valid.pkl'))
346 |             self.test = common.load_pkl(os.path.join(dataset_path, 'test.pkl'))
347 |             self.CAP = common.load_pkl(os.path.join(dataset_path, 'CAP.pkl'))
348 | 
349 | 
350 |             self.load_feats(dataset_path)
351 | 
352 |             # Get list of just the videoID, instead of videoID_CapID. Use set to ignore duplicates, then recast to list
353 |             self.train_ids = list(set(self.train[i].split('_')[0] for i in range(len(self.train))))
354 |             self.valid_ids = list(set(self.valid[i].split('_')[0] for i in range(len(self.valid))))
355 |             self.test_ids = list(set(self.test[i].split('_')[0] for i in range(len(self.test))))
356 | 
357 |             self.test_ids = self.test_ids #only for testing
358 | 
359 |         else:
360 |             raise NotImplementedError()
361 |                 
362 |         self.worddict = common.load_pkl(os.path.join(dataset_path ,'worddict.pkl'))
363 |         self.word_idict = dict()
364 |         # wordict start with index 2
365 |         for kk, vv in self.worddict.iteritems():
366 |             self.word_idict[vv] = kk
367 |         self.word_idict[0] = '<eos>'
368 |         self.word_idict[1] = 'UNK'
369 | 
370 |         if self.video_feature == 'googlenet':
371 |             self.ctx_dim = 1024
372 |         elif self.video_feature == 'resnet' or self.video_feature == 'resnet152':
373 |             if self.proc=='nostd':
374 |                 self.ctx_dim = 2048
375 |             elif self.proc=='pca':
376 |                 self.ctx_dim=1024
377 |         elif self.video_feature == 'nasnetalarge':
378 |             self.ctx_dim = 4032
379 |         elif self.video_feature == 'pnasnet5large':
380 |             self.ctx_dim = 4320
381 |         elif self.video_feature == 'polynet':
382 |             self.ctx_dim = 2048
383 |         elif self.video_feature == 'senet154':
384 |             self.ctx_dim = 2048
385 |         elif self.video_feature == 'densenet121':
386 |             raise NotImplementedError()
387 |         elif self.video_feature == 'c3d':
388 |             if self.proc=='nostd':
389 |                 self.ctx_dim = 4101
390 |             elif self.proc=='pca':
391 |                 self.ctx_dim=1024
392 |         elif self.video_feature == 'c3d_resnet':
393 |             if self.proc=='nostd':
394 |                 self.ctx_dim = 6149
395 |             elif self.proc=='pca':
396 |                 self.ctx_dim=2048
397 |             elif self.proc=='pca512':
398 |                 self.ctx_dim=1024
399 |             elif self.proc=='pca_c3d':
400 |                 self.ctx_dim=3072
401 |         else:
402 |             raise NotImplementedError()
403 | 
404 |         print "ctx_dim: "+str(self.ctx_dim)
405 |         self.kf_train = common.generate_minibatch_idx(
406 |             len(self.train), self.mb_size_train)
407 |         self.kf_valid = common.generate_minibatch_idx(
408 |             len(self.valid), self.mb_size_test)
409 |         self.kf_test = common.generate_minibatch_idx(
410 |             len(self.test), self.mb_size_test)
411 | 
412 |         if self.dec == 'multi-stdist':
413 |             self.skip_vectors = common.load_pkl(os.path.join(dataset_path,'skip_vectors.pkl'))
414 | 
415 |         
416 | def prepare_data(engine, IDs):
417 |     # print "Preparing engine "+engine.dec
418 |     seqs = []
419 |     z_seqs = []
420 |     feat_list = []
421 | 
422 |     def get_words(vidID, capID):
423 |         rval = None
424 |         if engine.signature == 'youtube2text' or engine.signature == 'vtt16' or engine.signature == 'vtt17' or engine.signature == 'trecvid':
425 |             caps = engine.CAP[vidID]
426 |             for cap in caps:
427 |                 if cap['cap_id'] == capID:
428 |                     rval = cap['tokenized'].split(' ')
429 |                     break
430 |         elif engine.signature == 'lsmdc' or engine.signature == 'lsmdc16':
431 |             cap = engine.CAP[vidID][0]
432 |             rval = cap['tokenized'].split()
433 |         elif engine.signature == 'mvad' or engine.signature == 'tacos':
434 |             cap = engine.CAP[vidID][0]
435 |             rval = cap['tokenized'].split()
436 |         elif engine.signature == 'mpii':
437 |             cap = engine.CAP[vidID][0]
438 |             rval = cap['tokenized'].split()
439 |         elif engine.signature == 'ysvd':
440 |             cap = engine.CAP[vidID][capID]
441 |             rval = cap['tokenized'].split()
442 | 
443 |         assert rval is not None
444 |         return rval
445 | 
446 |     def get_z_seq():
447 |         caps = engine.CAP[vidID]
448 |         num_caps = len(caps)
449 |         #print vidID+" "+str(num_caps)
450 | 
451 |         if engine.dec == 'multi-stdist': #'stdist'
452 | 
453 |             # common.dump_pkl(caps,'/media/onina/SSD/projects/skip-thoughts/caps')
454 | 
455 |             if not engine.cap_distances.has_key(vidID):
456 | 
457 |                 captions = [ caps[0]['caption'] for x in range(num_caps)] #initialized all with the firs caption
458 |                 for i in range(0,num_caps):
459 |                     cap = caps[i]
460 | 
461 |                     if engine.signature != 'vtt16' or engine.signature != 'vtt17':
462 |                         id = int(cap['cap_id'])
463 |                         
464 |                         caption = cap['caption']
465 |                         # print str(id)+" "+caption
466 |                         # print len(captions)
467 |                         # print vidID
468 |                         udata=caption.decode("utf-8")
469 | 
470 |                         # if id>=num_caps:
471 |                         #     continue
472 |                         captions[id] = udata.encode("ascii","ignore")
473 | 
474 |                         if captions[id].isspace():
475 |                             captions[id] = captions[0]
476 |                     else:
477 |                         captions[i] = cap['tokenized']
478 |                     # print captions[id]
479 | 
480 |                 # common.dump_pkl(captions,'captions')
481 |                 # vectors = skipthoughts.encode(engine.st_model,captions)  #refactoring this line
482 |                 vectors = engine.skip_vectors[vidID]
483 |                 caps_dist = spatial.distance.cdist(vectors, vectors, 'cosine')
484 |                 engine.cap_distances[vidID] = caps_dist
485 | 
486 |             caps_dist = engine.cap_distances[vidID]
487 |             query_id = int(capID)
488 |             js =range(0, query_id) + range(query_id+1,num_caps)
489 |             
490 |             
491 |             if len(js)>0 and engine.signature != 'mvad':
492 |                 # print js,query_id
493 |                 most_distant = np.argmax(caps_dist[query_id,js])
494 |             else:
495 |                 most_distant = 0
496 | 
497 |             z_words = get_words(vidID, str(most_distant))
498 |             z_seq = [engine.worddict[w] if engine.worddict[w] < engine.n_words else 1 for w in z_words]
499 | 
500 | 
501 |         elif engine.dec == 'generative':
502 |             z_words = get_words(vidID, str(1))
503 |             z_words = [word for word in z_words if word not in stopwords.words('english')]
504 |             z_seq = [engine.worddict[w] if engine.worddict[w] < engine.n_words else 1 for w in z_words]
505 | 
506 |         elif engine.dec == 'generative.2':
507 |             
508 |             z_words = get_words(vidID, str(1))
509 |             z_words = [word for word in z_words if word not in stopwords.words('english')]
510 |             # print z_words
511 | 
512 |             def get_hypernyms(z_words):
513 | 
514 |                 from nltk.corpus import wordnet 
515 |                 new_z_words = []
516 |                 for word in z_words:
517 |                     hypernyms = wordnet.synsets(word)
518 |                     if len(hypernyms) > 1 :
519 |                         h = hypernyms[0].hypernyms()
520 |                         if len(h) >0:
521 |                             nwords = h[0].lemma_names()
522 |                             nword = str(nwords[0])
523 |                             if '_' not in nword and '-' not in nword and engine.worddict.has_key(nword):
524 |                                 new_z_words.append(nword)
525 |                                 # print word+' replaced with '+ nword
526 |                             else:
527 |                                 new_z_words.append(word)
528 |                         else:
529 |                             new_z_words.append(word)
530 |                     else:
531 |                         new_z_words.append(word)
532 | 
533 |                 return new_z_words
534 | 
535 |             import random
536 |             if random.randint(0,1):  #only change to hypernyms every .5 percent the time
537 |                 z_words = get_hypernyms(z_words) 
538 |             # print z_words
539 | 
540 |             z_seq = [engine.worddict[w] if engine.worddict[w] < engine.n_words else 1 for w in z_words]
541 | 
542 | 
543 |             # print new_z_words
544 | 
545 |         return z_seq
546 | 
547 |     def clean_sequences(seqs,z_seqs,feat_list):
548 | 
549 |         if  engine.dec=="standard":
550 | 
551 |             lengths = [len(s) for s in seqs]
552 |             if engine.maxlen != None:
553 |                 new_seqs = []
554 |                 new_feat_list = []
555 |                 new_lengths = []
556 |                 new_caps = []
557 |                 for l, s, y, c in zip(lengths, seqs, feat_list, IDs):
558 |                     # sequences that have length >= maxlen will be thrown away
559 |                     if l < engine.maxlen:
560 |                         new_seqs.append(s)
561 |                         new_feat_list.append(y)
562 |                         new_lengths.append(l)
563 |                         new_caps.append(c)
564 |                 lengths = new_lengths
565 |                 feat_list = new_feat_list
566 |                 seqs = new_seqs
567 | 
568 |             return seqs,None,feat_list,lengths
569 | 
570 |         else:
571 |             lengths = [len(s) for s in seqs]
572 |             z_lengths = [len(s) for s in z_seqs]
573 |             if engine.maxlen != None:
574 |                 new_seqs = []
575 |                 new_zseqs = []
576 |                 new_feat_list = []
577 |                 new_lengths = []
578 |                 new_caps = []
579 |                 new_zlengths = []
580 |                 for l,z_l, s, y, c in zip(lengths,z_lengths, seqs, feat_list, IDs):
581 |                     # sequences that have length >= maxlen will be thrown away
582 |                     if l < engine.maxlen and z_l < engine.maxlen :
583 |                         new_seqs.append(s)
584 |                         new_zseqs.append(s)
585 |                         new_feat_list.append(y)
586 |                         new_lengths.append(l)
587 |                         new_caps.append(c)
588 |                 lengths = new_lengths
589 |                 feat_list = new_feat_list
590 |                 seqs = new_seqs
591 |                 z_seqs = new_zseqs
592 | 
593 |             return seqs,z_seqs,feat_list,lengths
594 | 
595 |     for i, ID in enumerate(IDs):
596 |         #print 'processed %d/%d caps'%(i,len(IDs))
597 |         # print ID
598 |         if engine.signature == 'youtube2text' or engine.signature == 'vtt16' or engine.signature == 'vtt17' or engine.signature == 'trecvid':
599 |             # load GNet feature
600 |             vidID, capID = ID.split('_')
601 |         elif engine.signature == 'tacos':
602 |             vidID = ID
603 |             capID = 0
604 |         elif engine.signature == 'lsmdc' or engine.signature == 'lsmdc16':
605 |             # t = ID.split('_')
606 |             # vidID = '_'.join(t[:-1])
607 |             # capID = t[-1]
608 |             vidID = ID
609 |             capID = 1
610 |         elif engine.signature == 'mvad':
611 |             # t = ID.split('_')
612 |             # vidID = '_'.join(t[:-1])
613 |             # capID = t[-1]
614 |             vidID = ID
615 |             capID = 1
616 |         elif engine.signature == 'ysvd':
617 |             # t = ID.split('_')
618 |             # vidID = '_'.join(t[:-1])
619 |             # capID = t[-1]
620 |             vidID = ID
621 |             capID = 0
622 |         elif engine.signature == 'mpii':
623 |             vidID = ID
624 |             capID = 1
625 |         else:
626 |             raise NotImplementedError()
627 | 
628 |         feat = engine.get_video_features(vidID)
629 | 
630 |         # if len(feat[0])!= engine.ctx_dim:
631 |         #     print 'dim error on '+vidID
632 |         #     sys.exit(0)
633 | 
634 |         feat_list.append(feat)
635 |         words = get_words(vidID, capID)
636 |         # print words
637 |         seqs.append([engine.worddict[w] if engine.worddict[w] < engine.n_words else 1 for w in words])
638 | 
639 |         # print engine.dec
640 |         if engine.dec != "standard":
641 |             z_seq = get_z_seq()
642 |             z_seqs.append(z_seq)
643 | 
644 | 
645 |     seqs,z_seqs,feat_list,lengths = clean_sequences(seqs,z_seqs,feat_list)
646 | 
647 |     if len(lengths) < 1:
648 |         return None, None, None, None
649 | 
650 |     y = numpy.asarray(feat_list)
651 |     # print len(y[1,1])
652 |     y_mask = engine.get_ctx_mask(y)
653 | 
654 |     n_samples = len(seqs)
655 |     maxlen = numpy.max(lengths)+1
656 | 
657 |     x = numpy.zeros((maxlen, n_samples)).astype('int64')
658 |     x_mask = numpy.zeros((maxlen, n_samples)).astype('float32')
659 |     for idx, s in enumerate(seqs):
660 |         x[:lengths[idx],idx] = s
661 |         x_mask[:lengths[idx]+1,idx] = 1.
662 | 
663 |     if engine.dec=="standard":
664 |         return x, x_mask, y, y_mask
665 |     else:
666 |         z = numpy.zeros((maxlen, n_samples)).astype('int64')  #This is the other label
667 |         z_mask = numpy.zeros((maxlen, n_samples)).astype('float32')
668 |         for idx, s in enumerate(z_seqs):
669 |             z[:lengths[idx],idx] = s
670 |             z_mask[:lengths[idx]+1,idx] = 1.
671 | 
672 |         return x, x_mask, y, y_mask,z,z_mask
673 | 
674 | 
675 | def test_data_engine():
676 |     video_feature = 'googlenet' 
677 |     out_of = None
678 |     maxlen = 100
679 |     mb_size_train = 64
680 |     mb_size_test = 128
681 |     maxlen = 50
682 |     n_words = 30000 # 25770 
683 |     signature = 'youtube2text' #'youtube2text'
684 |     engine = Movie2Caption('attention', signature, video_feature,
685 |                            mb_size_train, mb_size_test, maxlen,
686 |                            n_words,'standard','nostd',
687 |                            n_frames=26,
688 |                            outof=out_of)
689 |     i = 0
690 |     t = time.time()
691 |     for idx in engine.kf_train:
692 |         t0 = time.time()
693 |         i += 1
694 |         ids = [engine.train[index] for index in idx]
695 |         x, mask, ctx, ctx_mask = prepare_data(engine, ids)
696 |         print 'seen %d minibatches, used time %.2f '%(i,time.time()-t0)
697 |         if i == 10:
698 |             break
699 |             
700 |     print 'used time %.2f'%(time.time()-t)
701 | 
702 | 
703 | if __name__ == '__main__':
704 |     test_data_engine()
705 | 
706 | 
707 | 


--------------------------------------------------------------------------------
/download.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup as Soup, SoupStrainer
  2 | import urllib
  3 | import os
  4 | import shutil
  5 | import json
  6 | import argparse
  7 | import sys
  8 | from multiprocessing import Pool
  9 | 
 10 | 
 11 | def download_mvad(command):
 12 |     os.system(command)
 13 | 
 14 | 
 15 | def video_mvad(args):
 16 |     dst_dir = args.dst_dir
 17 |     json_dir = args.json_path
 18 |     start = int(args.start)
 19 |     end = int(args.end)
 20 | 
 21 |     base_url = 'http://courvila_contact:59db938f6d@lisaweb.iro.umontreal.ca/transfert/lisa/users/courvila'
 22 | 
 23 |     with open(os.path.join(json_dir, 'TrainList.txt'), 'r') as f:
 24 |         train_list = [i.replace('\n', '') for i in f]
 25 |     with open(os.path.join(json_dir, 'TestList.txt'), 'r') as f:
 26 |         test_list = [i.replace('\n', '') for i in f]
 27 |     with open(os.path.join(json_dir, 'ValidList.txt'), 'r') as f:
 28 |         valid_list = [i.replace('\n', '') for i in f]
 29 | 
 30 |     big_list = train_list + test_list + valid_list
 31 |     big_list = big_list[start:end]
 32 |     print "There are {} videos to get.".format(len(big_list))
 33 | 
 34 |     if not os.path.exists(dst_dir):
 35 |         os.mkdir(dst_dir)
 36 | 
 37 |     present_vids = os.listdir(dst_dir)
 38 |     print "There are currently {} videos in dst_dir.".format(len(present_vids))
 39 | 
 40 |     count = 0
 41 | 
 42 |     if int(args.filter):
 43 |         print "FILTER: ON"
 44 |         filter_dir = os.path.join(dst_dir, '../trash/')
 45 |         if not os.path.isdir(filter_dir):
 46 |             os.makedirs(filter_dir)
 47 |         big_list_names = [i.split('/')[-1] for i in big_list]
 48 |         vids_to_move = []
 49 |         for i in present_vids:
 50 |             if i not in big_list_names:
 51 |                 vids_to_move.append(i)
 52 | 
 53 |         for v in vids_to_move:
 54 |             print "Move {} -> {}".format(v, filter_dir)
 55 |             shutil.move(os.path.join(dst_dir, v), os.path.join(filter_dir, v))
 56 | 
 57 |         present_vids = os.listdir(dst_dir)
 58 |         print "There are now {} videos in dst_dir.".format(len(present_vids))
 59 | 
 60 |     command_list = []
 61 |     for i in big_list:
 62 |         video_name = i.split('/')[-1]
 63 |         if video_name not in present_vids:
 64 |             count += 1
 65 |             dst_path = os.path.join(dst_dir, video_name)
 66 |             #print video_name
 67 |             command_list.append('wget -O {} {}'.format(dst_path, base_url + i))
 68 | 
 69 |     threadPool = Pool()
 70 | 
 71 |     try:
 72 |         threadPool.map(download_mvad, command_list)
 73 |         threadPool.close()
 74 |         threadPool.join()
 75 |     except Exception:
 76 |         threadPool.close()
 77 |         threadPool.join()
 78 |         raise Exception
 79 | 
 80 | 
 81 | def video_mpii(video_dir,video_name,video_clip):
 82 | 
 83 | 
 84 |     # url='http://courvila_contact:59db938f6d@lisaweb.iro.umontreal.ca/transfert/lisa/users/courvila/data/lisatmp2/torabi/DVDtranscription/'+video_name+'/video/'+video_clip
 85 |     url='http://97H5:thoNohyee7@datasets.d2.mpi-inf.mpg.de/movieDescription/protected/avi/'+video_name+'/'+video_clip
 86 | 
 87 | 
 88 | 
 89 |     u2 = urllib.urlopen(url)
 90 |     video_dir_dst = os.path.join(video_dir,video_name)
 91 |     if not os.path.exists(video_dir_dst):
 92 |         os.mkdir(video_dir_dst)
 93 | 
 94 |     f = open(video_dir_dst+'/'+video_clip, 'wb')
 95 |     meta = u2.info()
 96 |     file_size = int(meta.getheaders("Content-Length")[0])
 97 |     print "Downloading: %s Bytes: %s" % (video_name, file_size)
 98 | 
 99 |     file_size_dl = 0
100 |     block_sz = 8192
101 |     while True:
102 |         buffer = u2.read(block_sz)
103 |         if not buffer:
104 |             break
105 | 
106 |         file_size_dl += len(buffer)
107 |         f.write(buffer)
108 |         status = r"%10d  [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
109 |         status = status + chr(8)*(len(status)+1)
110 |         print status,
111 | 
112 |     f.close()
113 | 
114 | 
115 | def download_video((video_id, video_url)):
116 |     dst_dir = args.dst_dir
117 |     mp4_dst_path = "{}/{}.mp4".format(dst_dir, video_id)
118 |     webm_dst_path = "{}/{}.webm".format(dst_dir, video_id)
119 |     mkv_dst_path = "{}/{}.mkv".format(dst_dir, video_id)
120 | 
121 |     # Don't know the extension beforehand so check all of them
122 |     if os.path.isfile(mp4_dst_path) or os.path.isfile(webm_dst_path) or os.path.isfile(mkv_dst_path):
123 |         print 'File already downloaded!'
124 |         return
125 | 
126 |     dst_path = "\'{}/{}.%(ext)s\'".format(dst_dir, video_id)
127 |     cmd = "youtube-dl " + video_url + " -o {}".format(dst_path)
128 |     os.system(cmd)
129 | 
130 | 
131 | def video_vtt(args):
132 | 
133 |     def fill_info_list(videoID_to_info_tuple_list):
134 |         if args.json_path.endswith('.json'):
135 |             # Load user-specified json file
136 |             json_file = open(args.json_path)
137 |         else:
138 |             json_file = open(os.path.join(args.json_path, 'videodatainfo_2017.json'))
139 | 
140 |         json_str = json_file.read()
141 |         json_data = json.loads(json_str)
142 | 
143 |         start = int(args.start)
144 |         end = int(args.end)  # Max vids to do
145 | 
146 |         for vid_meta in json_data['videos'][start:end]:
147 |             video_id = vid_meta['video_id']
148 |             video_url = vid_meta['url']
149 | 
150 |             videoID_to_info_tuple_list.append((video_id, video_url))
151 | 
152 |     dst_dir = args.dst_dir
153 | 
154 |     videoID_to_info_tuple_list = []
155 | 
156 |     fill_info_list(videoID_to_info_tuple_list)
157 | 
158 |     if not os.path.isdir(dst_dir):
159 |         os.mkdir(dst_dir)
160 | 
161 |     threadPool = Pool(1)  # Bottlenecked by network. Change to blank if otherwise
162 |     threadPool.map(download_video, videoID_to_info_tuple_list)
163 |     threadPool.close()
164 |     threadPool.join()
165 | 
166 | 
167 | def download_vine(command):
168 |     print command
169 |     os.system(command)
170 | 
171 | 
172 | def video_trecvid(args):
173 |     def fill_info_list(command_list):
174 |         f = open(os.path.join(args.json_path, 'vines.url.testingSet'))
175 | 
176 |         start = int(args.start)
177 |         end = int(args.end)  # Max vids to do
178 |         dst_dir = args.dst_dir
179 | 
180 |         f = [l for l in f][start:end]
181 | 
182 |         for line in f:
183 |             id, url = line.replace('\n', '').split('    ')
184 |             dst_path = os.path.join(dst_dir, id + '.mp4')
185 |             if not os.path.isfile(dst_path):
186 |                 command_list.append('wget -O {} {}'.format(dst_path, url))
187 |             else:
188 |                 print "File already found! {}".format(dst_path)
189 |     dst_dir = args.dst_dir
190 | 
191 |     command_list = []
192 | 
193 |     fill_info_list(command_list)
194 | 
195 |     if not os.path.isdir(dst_dir):
196 |         os.mkdir(dst_dir)
197 | 
198 |     threadPool = Pool()
199 |     threadPool.map(download_vine, command_list)
200 |     threadPool.close()
201 |     threadPool.join()
202 | 
203 | 
204 | if __name__== '__main__':
205 |     arg_parser = argparse.ArgumentParser()
206 |     arg_parser.add_argument('dst_dir',help = 'directory where to store videos')
207 |     arg_parser.add_argument('json_path', help='directory where json file is stored')
208 |     arg_parser.add_argument('start',help = 'start video index')
209 |     arg_parser.add_argument('end',help = 'end video index')
210 |     arg_parser.add_argument('dataset', help = 'Which dataset to download. '
211 |                                               'Options: vtt | trecvid | mvad')
212 |     arg_parser.add_argument('--filter', help = 'Special mode which will filter out videos present in dst_dir but not in json file to dst_dir/../trash'
213 |                                                'Options: 0 or 1', default=0)
214 |     args = arg_parser.parse_args()
215 | 
216 |     if not len(sys.argv) > 1:
217 |         print arg_parser.print_help()
218 |         sys.exit(0)
219 | 
220 |     try:
221 |         if args.dataset == 'vtt':
222 |             video_vtt(args)
223 |         elif args.dataset == 'trecvid':
224 |             video_trecvid(args)
225 |         elif args.dataset == 'mvad':
226 |             video_mvad(args)
227 |     except KeyboardInterrupt:
228 |         print 'Interrupted'
229 |         try:
230 |             sys.exit(0)
231 |         except SystemExit:
232 |             os._exit(0)
233 | 
234 | 


--------------------------------------------------------------------------------
/hyperband.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.insert(1,'jobman')
  3 | sys.path.insert(1,'coco-caption')
  4 | 
  5 | import os
  6 | import random
  7 | import copy
  8 | import subprocess
  9 | import numpy as np
 10 | 
 11 | from math import *
 12 | from numpy import argsort
 13 | from multiprocessing import Pool
 14 | 
 15 | 
 16 | def args_as_typed(args):
 17 |     result = ""
 18 |     for key in args:
 19 |         result += key
 20 |         result += "="
 21 |         result += str(args[key])
 22 |         result += " "
 23 | 
 24 |     return result
 25 | 
 26 | 
 27 | def get_random_hyperparameter_configuration():
 28 |     hp_dict = {'dim_word': int(random.uniform(100, 1000)),
 29 |                'dim': int(random.uniform(100, 5000)),
 30 |                'encoder_dim': int(random.uniform(100, 900)),
 31 |                'cost_type': np.random.choice(['v1', 'v3', 'v4', 'v5', 'v6'])}
 32 | 
 33 |     return hp_dict
 34 | 
 35 | 
 36 | def run_then_return_val_loss(args, num_iters, hyperparameters, gpu_id):
 37 |     # -7: BLEU1
 38 |     # -6: BLEU2
 39 |     # -5: BLEU3
 40 |     # -4: BLEU4
 41 |     # -3: Meteor
 42 |     # -2: Rouge
 43 |     # -1: Cider
 44 |     colnum = -4
 45 | 
 46 |     # Parse through arguments and replace as necessary
 47 |     model = args['model'].replace('\'', '')
 48 | 
 49 |     # Do save_model_dir and logging for this run
 50 |     save_model_key = model + '.save_model_dir'
 51 |     save_model_dir = args[save_model_key].replace('\'', '')
 52 | 
 53 |     run_name = model + '_'
 54 |     run_name += 'HYPERBAND_{}-iters-{}'\
 55 |         .format('_'.join(['{}-{}'.format(k, hyperparameters[k]) for k in hyperparameters]), num_iters)
 56 | 
 57 |     logging_dir = os.path.join(save_model_dir, 'logs', run_name)
 58 |     if not os.path.isdir(logging_dir):
 59 |         os.makedirs(logging_dir)
 60 | 
 61 |     save_model_dir = os.path.join(save_model_dir, run_name)
 62 |     if not os.path.isdir(save_model_dir):
 63 |         os.makedirs(save_model_dir)
 64 | 
 65 |     args[save_model_key] = '\'' + save_model_dir + '\''
 66 | 
 67 |     # Do Epochs
 68 |     num_epochs_key = model + '.max_epochs'
 69 |     args[num_epochs_key] = num_iters
 70 | 
 71 |     # Set hyper-parameters
 72 |     for k in hyperparameters:
 73 |         args[model + '.' + k] = hyperparameters[k]
 74 | 
 75 |     theano_flag = "THEANO_FLAGS=\'device=gpu{}\'".format(gpu_id)
 76 |     # "/dev/null 2>&1"
 77 |     command = "{} {} {} > {} 2>&1".format(theano_flag, "python train_model.py", args_as_typed(args), os.path.join(logging_dir, 'record.txt'))
 78 |     print " ----- \n{}".format(command)
 79 | 
 80 |     os.system(command)
 81 | 
 82 |     print " %%%%% Job finished! \n{}".format(args_as_typed(args))
 83 |     train_loss_path = os.path.join(save_model_dir, 'train_valid_test.txt')
 84 |     if os.path.isfile(train_loss_path):
 85 |         train_loss_file = open(train_loss_path)
 86 |         lines = [i.replace('\n', '').split(' ') for i in train_loss_file]
 87 |         return float(lines[-1][colnum])
 88 |     else:
 89 |         print "Validation results were not found for this run! validFreq value must be lowered, or the training crashed."
 90 |         return 0.000
 91 | 
 92 | 
 93 | def HYPERBAND(args):
 94 |     """
 95 |     Adapted from:
 96 |     https://people.eecs.berkeley.edu/~kjamieson/hyperband.html
 97 | 
 98 |     Performs HYPERBAND across available GPUs using Theano flags.
 99 |     This version uses BLEU4 as the score.
100 |     :param args:
101 |     :return:
102 |     """
103 |     max_iter = 81  # maximum iterations/epochs per configuration
104 |     eta = 3  # defines downsampling rate (default=3)
105 |     logeta = lambda x: log(x) / log(eta)
106 |     s_max = int(logeta(max_iter))  # number of unique executions of Successive Halving (minus one)
107 |     B = (s_max + 1) * max_iter  # total number of iterations (without reuse) per execution of Succesive Halving (n,r)
108 | 
109 |     # Modify this for your needs
110 |     models_per_gpu = 2
111 |     avail_gpus = [0, 1]
112 |     #avail_gpus = range(num_gpu)
113 | 
114 |     num_gpu = len(avail_gpus)
115 | 
116 |     #### Begin Finite Horizon Hyperband outlerloop. Repeat indefinetely.
117 |     for s in reversed(range(s_max + 1)):
118 |         n = int(ceil(B / max_iter / (s + 1) * eta ** s))  # initial number of configurations
119 |         r = max_iter * eta ** (-s)  # initial number of iterations to run configurations for
120 | 
121 |         #### Begin Finite Horizon Successive Halving with (n,r)
122 |         T = [get_random_hyperparameter_configuration() for _ in range(n)]
123 | 
124 |         for i in range(s + 1):
125 |             val_losses = []
126 | 
127 |             # Run each of the n_i configs for r_i iterations and keep best n_i/eta
128 |             n_i = n * eta ** (-i)
129 |             r_i = int(floor(int(r * eta ** (i))))
130 |             r_i += 3 # Add 3 iterations since only see results after 4-8 epochs
131 |             if r_i > 60:
132 |                 continue
133 | 
134 |             print ' ---- \nAt s: {}, i: {}, r_i: {}, T is: {}'.format(s, i, r_i, T)
135 |             #val_losses = [run_then_return_val_loss(args=copy.deepcopy(args), num_iters=r_i, hyperparameters=t) for t in T]
136 |             # First figure out what runs must be done
137 |             runs = [(copy.deepcopy(args), r_i, t) for t in T]
138 | 
139 |             # Now tag runs with a GPU id and add to pending jobs, until no more runs
140 |             while len(runs) > 0:
141 |                 gpuPool = Pool(num_gpu * models_per_gpu)
142 |                 gpu_subprocess_params_list = []
143 | 
144 |                 for gpu_id in avail_gpus:
145 |                     # First build the params by tagging on correct gpu_id
146 |                     model_params_per_gpu = [runs.pop() + (gpu_id,)
147 |                                             for i in range(models_per_gpu) if len(runs) != 0]
148 |                     # Use params to build list of async functions on new threads
149 |                     model_params_per_gpu = [gpuPool.apply_async(run_then_return_val_loss, i)
150 |                                             for i in model_params_per_gpu]
151 | 
152 |                     gpu_subprocess_params_list.extend(model_params_per_gpu)
153 | 
154 |                 # Execute all pending jobs, getting results as jobs finish
155 |                 val_losses = map(lambda x: x.get(), gpu_subprocess_params_list)
156 |                 gpuPool.close()
157 |                 gpuPool.join()
158 | 
159 |             print 'val_losses was: {}'.format(val_losses)
160 |             T = [T[i] for i in argsort(val_losses)[0:int(n_i / eta)]]
161 | 
162 | 
163 |         #### End Finite Horizon Successive Halving with (n,r)
164 | 
165 | if __name__ == '__main__':
166 |     args = {}
167 |     try:
168 |         for arg in sys.argv[1:]:
169 |             k, v = arg.split('=')
170 |             args[k] = v
171 |     except:
172 |         print 'args must be like a=X b.c=X'
173 |         exit(1)
174 | 
175 |     HYPERBAND(args)
176 | 


--------------------------------------------------------------------------------
/metrics.py:
--------------------------------------------------------------------------------
  1 | import argparse, os, pdb, sys, time
  2 | import numpy
  3 | import cPickle as pkl
  4 | import copy
  5 | import glob
  6 | import subprocess
  7 | from multiprocessing import Process, Queue, Manager
  8 | from collections import OrderedDict
  9 | 
 10 | import data_engine
 11 | from cocoeval import COCOScorer
 12 | import common
 13 | 
 14 | MAXLEN = 50
 15 | 
 16 | 
 17 | def gen_model(queue, rqueue, pid, model, options, beam,
 18 |               model_params, shared_params):
 19 |     import theano
 20 |     from theano import tensor
 21 |     from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
 22 | 
 23 |     trng = RandomStreams(1234, use_cuda=False)
 24 |     # this makes sure it allocates on CPU
 25 |     use_noise = theano.tensor._shared(numpy.asarray(numpy.float32(0.)),
 26 |                                       name='use_noise')
 27 | 
 28 |     params = model.init_params(options)
 29 |     for kk, vv in params.iteritems():
 30 |         if kk not in model_params:
 31 |             raise Exception('%s is not in the archive' % kk)
 32 |         assert params[kk].shape == model_params[kk].shape
 33 |         params[kk] = model_params[kk]
 34 |         if params[kk].shape == ():
 35 |             # theano.tensor._shared only takes ndarray
 36 |             # thus, converting numpy.float32 to numpy.adarray first
 37 |             params[kk] = numpy.asarray(params[kk])
 38 |     tparams = model.init_tparams(params, force_cpu=True)
 39 |     mode = theano.compile.get_default_mode().excluding('gpu')
 40 |     f_init, f_next = model.build_sampler(tparams, options, use_noise, trng, mode=mode)
 41 | 
 42 |     curridx = shared_params['id']
 43 | 
 44 |     def _gencap(ctx, ctx_mask):
 45 |         sample, score, next_state, next_memory = model.gen_sample(
 46 |             tparams, f_init, f_next, ctx, ctx_mask,
 47 |             options,
 48 |             trng=trng, k=k, maxlen=MAXLEN, stochastic=False)
 49 | 
 50 |         sidx = numpy.argmin(score)
 51 |         return sample[sidx], next_state, next_memory
 52 | 
 53 |     while True:
 54 |         req = queue.get()
 55 |         if req == None:
 56 |             break
 57 |         idx, context, context_mask = req[0], req[1], req[2]
 58 |         if curridx < shared_params['id']:
 59 |             print 'Updating parameters...'
 60 |             for kk in shared_params.keys():
 61 |                 if kk in tparams:
 62 |                     tparams[kk].set_value(shared_params[kk])
 63 |             curridx = shared_params['id']
 64 | 
 65 |         print pid, '-', idx
 66 |         seq, next_state, next_memory = _gencap(context, context_mask)
 67 | 
 68 |         rqueue.put((idx, seq, next_state, next_memory))
 69 | 
 70 |     return
 71 | 
 72 | 
 73 | manager = Manager()
 74 | 
 75 | 
 76 | def update_params(shared_params, model_params):
 77 |     for kk, vv in model_params.iteritems():
 78 |         shared_params[kk] = vv
 79 |     shared_params['id'] = shared_params['id'] + 1
 80 | 
 81 | 
 82 | def build_sample_pairs(samples, vidIDs):
 83 |     D = OrderedDict()
 84 |     for sample, vidID in zip(samples, vidIDs):
 85 |         D[vidID] = [{'image_id': vidID, 'caption': sample}]
 86 |     return D
 87 | 
 88 | def save_test_samples_youtube2text(samples_test, engine):
 89 | 
 90 |     out_dir = 'predictions/' + engine.signature + '_' + engine.video_feature + '_' + engine.model_type + '/'
 91 | 
 92 |     if not os.path.exists('predictions/'):
 93 |         os.mkdir('predictions/')
 94 |     if not os.path.exists(out_dir):
 95 |         os.mkdir(out_dir)
 96 | 
 97 |     f = open(out_dir + 'samplestest.csv', 'wr')
 98 | 
 99 |     gts_test = OrderedDict()
100 | 
101 |     results = OrderedDict()
102 |     results['version'] = "1.2"
103 |     D = None
104 | 
105 |     if engine.signature == 'youtube2text':
106 |         import cPickle
107 |         d = open(os.path.join(engine.data_dir,'dict_youtube_mapping.pkl'), 'rb')
108 |         D = cPickle.load(d)
109 |         D = dict((y, x) for x, y in D.iteritems())
110 | 
111 |     samples = []
112 |     for vidID in sorted(engine.test_ids):
113 |         gts_test[vidID] = engine.CAP[vidID]
114 |         # print samples_test[vidID]
115 |         sample = OrderedDict()
116 |         sample['video_id'] = vidID
117 |         sample['caption'] = samples_test[vidID][0]['caption']
118 |         samples.append(sample)
119 | 
120 |         if engine.signature == 'youtube2text':
121 |             f.write(D[vidID] + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
122 |         # elif engine.signature == 'trecvid':
123 |         #     f.write(vidID + ' ' + samples_test[vidID][0]['caption'] + '\n')
124 |         else:
125 |             f.write(vidID + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
126 | 
127 |     f.close()
128 | 
129 |     results['result'] = samples
130 |     results['external_data'] = {'used': 'true', 'details': 'Resnet trained on Imagenet.'}
131 | 
132 |     import json
133 |     with open(out_dir + 'prediction.json', 'w') as outfile:
134 |         json.dump(results, outfile, indent=4)
135 | 
136 | 
137 | def save_test_samples_acm_trecvid_y2t(samples_test, engine):  # for acm/trecvid/y2t challenge
138 | 
139 |     out_dir = 'predictions/' + engine.signature + '_' + engine.video_feature + '_' + engine.model_type + '/'
140 | 
141 |     if not os.path.exists('predictions/'):
142 |         os.mkdir('predictions/')
143 |     if not os.path.exists(out_dir):
144 |         os.mkdir(out_dir)
145 | 
146 |     if engine.signature == 'trecvid':
147 |         f = open(out_dir + 'trecvid.txt', 'wr')
148 |     else:
149 |         f = open(out_dir + 'samplestest.csv', 'wr')
150 | 
151 |     gts_test = OrderedDict()
152 | 
153 |     results = OrderedDict()
154 |     results['version'] = "1.2"
155 |     # D = None
156 |     # if engine.signature == 'youtube2text':
157 |     #     import cPickle
158 |     #     d = open('data/youtube2text_iccv15/original/dict_youtube_mapping.pkl', 'rb')
159 |     #     D = cPickle.load(d)
160 |     #     D = dict((y, x) for x, y in D.iteritems())
161 | 
162 |     samples = []
163 |     for vidID in sorted(engine.test_ids):
164 |         gts_test[vidID] = engine.CAP[vidID]
165 |         # print samples_test[vidID]
166 |         sample = OrderedDict()
167 |         sample['video_id'] = vidID
168 |         sample['caption'] = samples_test[vidID][0]['caption']
169 |         samples.append(sample)
170 | 
171 |         # if engine.signature == 'youtube2text':
172 |         #     f.write(D[vidID] + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
173 |         # if engine.signature == 'trecvid':
174 |         #     f.write(vidID + ' ' + samples_test[vidID][0]['caption'] + '\n')
175 |         # else:
176 |         f.write(vidID + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
177 | 
178 |     f.close()
179 | 
180 |     results['result'] = samples
181 |     results['external_data'] = {'used': 'true', 'details': 'Resnet trained on Imagenet.'}
182 | 
183 |     import json
184 |     with open(out_dir + 'submission.json', 'w') as outfile:
185 |         json.dump(results, outfile, indent=4)
186 | 
187 | def save_test_samples_vtt(samples_test, engine):  # for acm/trecvid/y2t challenge
188 | 
189 |     out_dir = 'predictions/' + engine.signature + '_' + engine.video_feature + '_' + engine.model_type + '/'
190 | 
191 |     if not os.path.exists('predictions/'):
192 |         os.mkdir('predictions/')
193 |     if not os.path.exists(out_dir):
194 |         os.mkdir(out_dir)
195 | 
196 |     # if engine.signature == 'trecvid':
197 |     #     f = open(out_dir + 'trecvid.txt', 'wr')
198 |     # else:
199 |     f = open(out_dir + 'samplestest.csv', 'wr')
200 | 
201 |     gts_test = OrderedDict()
202 | 
203 |     results = OrderedDict()
204 |     results['version'] = "1.2"
205 |     # D = None
206 |     # if engine.signature == 'youtube2text':
207 |     #     import cPickle
208 |     #     d = open('data/youtube2text_iccv15/original/dict_youtube_mapping.pkl', 'rb')
209 |     #     D = cPickle.load(d)
210 |     #     D = dict((y, x) for x, y in D.iteritems())
211 | 
212 |     samples = []
213 |     for vidID in sorted(engine.test_ids):
214 |         gts_test[vidID] = engine.CAP[vidID]
215 |         # print samples_test[vidID]
216 |         sample = OrderedDict()
217 |         sample['video_id'] = vidID
218 |         sample['caption'] = samples_test[vidID][0]['caption']
219 |         samples.append(sample)
220 | 
221 |         # if engine.signature == 'youtube2text':
222 |         #     f.write(D[vidID] + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
223 |         # if engine.signature == 'trecvid':
224 |         #     f.write(vidID + ' ' + samples_test[vidID][0]['caption'] + '\n')
225 |         # else:
226 |         f.write(vidID + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
227 | 
228 |     f.close()
229 | 
230 |     results['result'] = samples
231 |     results['external_data'] = {'used': 'true', 'details': 'Resnet trained on Imagenet.'}
232 | 
233 |     import json
234 |     with open(out_dir + 'submission.json', 'w') as outfile:
235 |         json.dump(results, outfile, indent=4)
236 | 
237 | def save_test_samples_lsmdc(samples_test, engine):  # for lsmdc16 challenge
238 | 
239 |     out_dir = 'predictions/' + engine.signature + '_' + engine.video_feature + '_' + engine.model_type + '/'
240 | 
241 |     if not os.path.exists('predictions/'):
242 |         os.mkdir('predictions/')
243 |     if not os.path.exists(out_dir):
244 |         os.mkdir(out_dir)
245 | 
246 |     f = open(out_dir + 'samplestest.csv', 'wr')
247 | 
248 |     gts_test = OrderedDict()
249 | 
250 |     results = OrderedDict()
251 |     results['version'] = "1"
252 | 
253 |     dict_path = os.path.join('/PATH/TO/lsmdc16/pkls16', 'dict_vids_mapping.pkl')
254 |     vids_names = common.load_pkl(dict_path)
255 |     # D= None
256 |     # if engine.signature=='youtube2text':
257 |     #     import cPickle
258 |     #     d= open('data/youtube2text_iccv15/original/dict_youtube_mapping.pkl','rb')
259 |     #     D = cPickle.load(d)
260 |     #     D = dict((y,x) for x,y in D.iteritems())
261 | 
262 |     samples = []
263 |     # for vidID in engine.test_ids:
264 |     for vidID in samples_test.keys():
265 |         gts_test[vidID] = engine.CAP[vidID]
266 |         # print samples_test[vidID]
267 |         sample = OrderedDict()
268 |         sample['video_id'] = vids_names[vidID]
269 |         # sample['ovid_id']=vidID
270 |         sample['caption'] = samples_test[vidID][0]['caption']
271 |         # sample['ocaption']=gts_test[vidID][0]['caption']
272 |         samples.append(sample)
273 | 
274 |         # if engine.signature=='youtube2text':
275 |         #     f.write(D[vidID]+','+ samples_test[vidID][0]['caption']+','+gts_test[vidID][0]['caption']+'\n')
276 |         # else:
277 |         f.write(vidID + ',' + samples_test[vidID][0]['caption'] + ',' + gts_test[vidID][0]['caption'] + '\n')
278 | 
279 |     f.close()
280 | 
281 |     # results['result']= samples
282 |     # results['external_data']={'used': 'true','details':'First fully connected of C3D pretrained on Sports1M'}
283 | 
284 |     samples = sorted(samples, key=lambda x: x['video_id'])
285 | 
286 |     import json
287 |     with open(out_dir + 'publictest_burka_results.json', 'w') as outfile:
288 |         json.dump(samples, outfile, indent=4)
289 | 
290 | 
291 | def save_blind_test_samples(samples_test, engine):  # for lsmdc16 challenge
292 | 
293 |     out_dir = 'submissions/' + engine.signature + '_' + engine.video_feature + '_' + engine.model_type + '/'
294 | 
295 |     if not os.path.exists('submissions/'):
296 |         os.mkdir('submissions/')
297 |     if not os.path.exists(out_dir):
298 |         os.mkdir(out_dir)
299 | 
300 |     # f=open(out_dir+'samplesbtest.csv','wr')
301 | 
302 |     gts_test = OrderedDict()
303 | 
304 |     results = OrderedDict()
305 |     results['version'] = "1"
306 | 
307 |     dict_path = os.path.join('data/lsmdc16/', 'dict_bvids_mapping.pkl')
308 |     vids_names = common.load_pkl(dict_path)
309 | 
310 |     samples = []
311 |     # for vidID in engine.test_ids:
312 |     for vidID in samples_test.keys():
313 |         # gts_test[vidID] = engine.CAP[vidID]
314 |         sample = OrderedDict()
315 |         sample['video_id'] = vids_names[vidID]
316 |         sample['caption'] = samples_test[vidID][0]['caption']
317 |         samples.append(sample)
318 |         # f.write(vidID+','+ samples_test[vidID][0]['caption']+','+gts_test[vidID][0]['caption']+'\n')
319 | 
320 |     # f.close()
321 | 
322 |     samples = sorted(samples, key=lambda x: x['video_id'])
323 | 
324 |     import json
325 |     with open(out_dir + 'blindtest_burka_results.json', 'w') as outfile:
326 |         json.dump(samples, outfile, indent=4)
327 | 
328 | 
329 | def score_with_cocoeval(samples_valid, samples_test, engine):
330 |     scorer = COCOScorer()
331 |     if samples_valid:
332 |         gts_valid = OrderedDict()
333 |         for vidID in engine.valid_ids:
334 |             # TODO(WG) Check for sampling type
335 |             gts_valid[vidID] = engine.CAP[vidID]
336 |         valid_score = scorer.score(gts_valid, samples_valid, engine.valid_ids)
337 |     else:
338 |         valid_score = None
339 | 
340 |     if samples_test:
341 |         gts_test = OrderedDict()
342 |         for vidID in engine.test_ids:
343 |             gts_test[vidID] = engine.CAP[vidID]
344 |         test_score = scorer.score(gts_test, samples_test, engine.test_ids)
345 | 
346 |     else:
347 |         test_score = None
348 |     return valid_score, test_score
349 | 
350 | 
351 | def generate_sample_gpu_single_process(
352 |         model_type, model_archive, options, engine, model,
353 |         f_init, f_next,
354 |         save_dir='./samples', beam=5,
355 |         whichset='both'):
356 |     def _seqs2words(caps):
357 |         capsw = []
358 |         for cc in caps:
359 |             ww = []
360 |             for w in cc:
361 |                 if w == 0:
362 |                     break
363 |                 ww.append(engine.word_idict[1]
364 |                           if w > len(engine.word_idict) else engine.word_idict[w])
365 |             capsw.append(' '.join(ww))
366 |         return capsw
367 | 
368 |     def sample(whichset):
369 |         samples = []
370 |         ctxs, ctx_masks = engine.prepare_data_for_blue(whichset)
371 |         # i = 0
372 |         for i, ctx, ctx_mask in zip(range(len(ctxs)), ctxs, ctx_masks):
373 |             print 'sampling %d/%d' % (i, len(ctxs))
374 |             sample, score, _, _ = model.gen_sample(
375 |                 None, f_init, f_next, ctx, ctx_mask, options,
376 |                 None, beam, maxlen=MAXLEN)
377 | 
378 |             sidx = numpy.argmin(score)
379 |             sample = sample[sidx]
380 |             # print _seqs2words([sample])[0]
381 |             samples.append(sample)
382 | 
383 |             # if i>10: # hack to test it is working OK
384 |             #     samples = _seqs2words(samples)
385 |             #     return samples
386 |             # i+=1
387 | 
388 |         # print "finished sampling"
389 |         samples = _seqs2words(samples)
390 |         # print 'finished _seq2words'
391 |         return samples
392 | 
393 |     samples_valid = None
394 |     samples_test = None
395 |     samples_btest = None
396 | 
397 |     if whichset == 'valid' or whichset == 'both':
398 |         print 'Valid Set...',
399 |         samples_valid = sample('valid')
400 |         with open(save_dir + '/valid_samples.txt', 'w') as f:
401 |             print >> f, '\n'.join(samples_valid)
402 |     if whichset == 'test' or whichset == 'both':
403 |         print 'Test Set...',
404 |         samples_test = sample('test')
405 |         with open(save_dir + '/test_samples.txt', 'w') as f:
406 |             print >> f, '\n'.join(samples_test)
407 |     if whichset == 'blind':
408 |         print 'Blind Test Set...',
409 |         samples_btest = sample('blind')
410 |         with open(save_dir + '/blind_test_samples.txt', 'w') as f:
411 |             print >> f, '\n'.join(samples_btest)
412 | 
413 |     if samples_valid != None:
414 |         samples_valid = build_sample_pairs(samples_valid, engine.valid_ids)
415 |     if samples_test != None:
416 |         samples_test = build_sample_pairs(samples_test, engine.test_ids)
417 |     if samples_btest != None:
418 |         # print 'build sample pairs'
419 |         samples_btest = build_sample_pairs(samples_btest, engine.btest_ids)
420 | 
421 |     return samples_valid, samples_test, samples_btest
422 | 
423 | 
424 | def compute_score(
425 |         model_type, model_archive, options, engine, save_dir,
426 |         beam, n_process,
427 |         whichset='both', on_cpu=True,
428 |         processes=None, queue=None, rqueue=None, shared_params=None,
429 |         one_time=False, metric=None,
430 |         f_init=None, f_next=None, model=None):
431 |     assert metric != 'perplexity'
432 |     if on_cpu:
433 |         raise NotImplementedError()
434 |     else:
435 |         assert model is not None
436 |         samples_valid, samples_test, samples_btest = generate_sample_gpu_single_process(
437 |             model_type, model_archive, options,
438 |             engine, model, f_init, f_next,
439 |             save_dir=save_dir,
440 |             beam=beam,
441 |             whichset=whichset)
442 | 
443 |     valid_score, test_score = score_with_cocoeval(samples_valid, samples_test, engine)
444 | 
445 |     scores_final = {}
446 |     scores_final['valid'] = valid_score
447 |     scores_final['test'] = test_score
448 | 
449 |     if one_time:
450 |         return scores_final
451 | 
452 |     return scores_final, processes, queue, rqueue, shared_params
453 | 
454 | 
455 | def save_samples(
456 |         model_type, model_archive, options, engine, save_dir,
457 |         beam, n_process,
458 |         whichset='both', on_cpu=True,
459 |         processes=None, queue=None, rqueue=None, shared_params=None,
460 |         one_time=False, metric=None,
461 |         f_init=None, f_next=None, model=None):
462 |     assert metric != 'perplexity'
463 |     if on_cpu:
464 |         raise NotImplementedError()
465 |     else:
466 |         assert model is not None
467 |         samples_valid, samples_test, samples_btest = generate_sample_gpu_single_process(
468 |             model_type, model_archive, options,
469 |             engine, model, f_init, f_next,
470 |             save_dir=save_dir,
471 |             beam=beam,
472 |             whichset=whichset)
473 |         print samples_test
474 | 
475 |     if whichset == 'test':
476 |         if engine.signature == 'trecvid':
477 |             save_test_samples_acm_trecvid_y2t(samples_test, engine)
478 |         if engine.signature == 'youtube2text':
479 |             save_test_samples_youtube2text(samples_test, engine)
480 |         if engine.signature == 'vtt':
481 |             save_test_samples_vtt(samples_test, engine)
482 |         if engine.signature == 'lsmdc16':
483 |             save_test_samples_lsmdc(samples_test, engine)
484 |         else:
485 |             save_test_samples_acm_trecvid_y2t(samples_test, engine)
486 |     elif whichset == 'blind':
487 |         save_blind_test_samples(samples_btest, engine)
488 | 
489 | 
490 | def test_cocoeval():
491 |     engine = data_engine.Movie2Caption('attention', 'lsmdc16',
492 |                                        video_feature='googlenet',
493 |                                        mb_size_train=20,
494 |                                        mb_size_test=20,
495 |                                        maxlen=50, n_words=20000,
496 |                                        dec='standard', proc='nostd',
497 |                                        n_frames=20, outof=None)
498 |     # samples_valid = common.load_txt_file('./test/valid_samples.txt')
499 |     # samples_test = common.load_txt_file('./test/test_samples.txt')
500 |     samples_valid = common.load_txt_file('/PATH/TO/valid_samples.txt')
501 |     samples_test = common.load_txt_file('/PATH/TO/test_samples.txt')
502 |     samples_valid = [sample.strip() for sample in samples_valid]
503 |     samples_test = [sample.strip() for sample in samples_test]
504 | 
505 |     samples_valid = build_sample_pairs(samples_valid, engine.valid_ids)
506 |     samples_test = build_sample_pairs(samples_test, engine.test_ids)
507 |     valid_score, test_score = score_with_cocoeval(samples_valid, samples_test, engine)
508 |     print valid_score, test_score
509 | 
510 | 
511 | def test_cocoeval_vtt():
512 |     engine = data_engine.Movie2Caption('attention', 'lsmdc16',
513 |                                        video_feature='googlenet',
514 |                                        mb_size_train=20,
515 |                                        mb_size_test=20,
516 |                                        maxlen=50, n_words=20000,
517 |                                        dec='standard', proc='nostd',
518 |                                        n_frames=20, outof=None,
519 |                                        data_dir='/PATH/TO/data/lsmdc16/pkls/',
520 |                                        feats_dir='/PATH/TO/lsmdc16/features_googlenet')
521 |     samples_valid = common.load_txt_file(
522 |         '/PATH/TO/valid_samples.txt')
523 |     samples_test = common.load_txt_file(
524 |         'PATH/TO/test_samples.txt')
525 |     samples_valid = [sample.strip() for sample in samples_valid]
526 |     samples_test = [sample.strip() for sample in samples_test]
527 | 
528 |     samples_valid = build_sample_pairs(samples_valid, engine.valid_ids)
529 |     samples_test = build_sample_pairs(samples_test, engine.test_ids)
530 |     valid_score, test_score = score_with_cocoeval(samples_valid, samples_test, engine)
531 |     print valid_score, test_score
532 | 
533 | 
534 | if __name__ == '__main__':
535 |     test_cocoeval_vtt()
536 | 


--------------------------------------------------------------------------------
/py2-vid-desc_requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | ca-certificates=2018.03.07=0
 5 | certifi=2018.4.16=py27_0
 6 | intel-openmp=2018.0.3=0
 7 | libedit=3.1.20170329=h6b74fdf_2
 8 | libffi=3.2.1=hd88cf55_4
 9 | libgcc-ng=7.2.0=hdf63c60_3
10 | libgfortran-ng=7.2.0=hdf63c60_3
11 | libopenblas=0.2.20=h9ac9557_7
12 | libstdcxx-ng=7.2.0=hdf63c60_3
13 | mkl=2018.0.3=1
14 | mkl_fft=1.0.1=py27h3010b51_0
15 | mkl_random=1.0.1=py27h629b387_0
16 | ncurses=6.1=hf484d3e_0
17 | nltk=3.3.0=py27_0
18 | openssl=1.0.2o=h20670df_0
19 | pip=10.0.1=py27_0
20 | python=2.7.15=h1571d57_0
21 | readline=7.0=ha6073c6_4
22 | scikit-learn=0.19.1=py27h445a80a_0
23 | setuptools=39.2.0=py27_0
24 | six=1.11.0=py27h5f960f1_1
25 | sqlite=3.23.1=he433501_0
26 | tk=8.6.7=hc745277_3
27 | wheel=0.31.1=py27_0
28 | zlib=1.2.11=ha838bed_2
29 | 


--------------------------------------------------------------------------------
/py2_pip_freeze.txt:
--------------------------------------------------------------------------------
 1 | backports.functools-lru-cache==1.5
 2 | beautifulsoup4==4.6.0
 3 | certifi==2018.4.16
 4 | cloudpickle==0.5.3
 5 | cycler==0.10.0
 6 | Cython==0.28.3
 7 | dask==0.17.5
 8 | decorator==4.3.0
 9 | kiwisolver==1.0.1
10 | matplotlib==2.2.2
11 | mkl-fft==1.0.0
12 | mkl-random==1.0.1
13 | networkx==2.1
14 | nltk==3.3
15 | numpy==1.14.4
16 | Pillow==5.1.0
17 | protobuf==3.5.2.post1
18 | pyparsing==2.2.0
19 | python-dateutil==2.7.3
20 | pytz==2018.4
21 | PyWavelets==0.5.2
22 | scikit-image==0.14.0
23 | scikit-learn==0.19.1
24 | scipy==1.1.0
25 | six==1.11.0
26 | subprocess32==3.5.2
27 | Theano==0.8.1
28 | toolz==0.9.0
29 | 


--------------------------------------------------------------------------------
/py3-vid-desc_requirements.txt:
--------------------------------------------------------------------------------
 1 | # This file may be used to create an environment using:
 2 | # $ conda create --name <env> --file <this file>
 3 | # platform: linux-64
 4 | blas=1.0=mkl
 5 | ca-certificates=2018.03.07=0
 6 | certifi=2018.4.16=py36_0
 7 | cffi=1.11.5=py36h9745a5d_0
 8 | coverage=4.5.1=py36h14c3975_0
 9 | cudatoolkit=8.0=3
10 | cudnn=7.0.5=cuda8.0_0
11 | freetype=2.8=hab7d2ae_1
12 | intel-openmp=2018.0.0=8
13 | java-jre=8.45.14=0
14 | jpeg=9b=h024ee3a_2
15 | libedit=3.1.20170329=h6b74fdf_2
16 | libffi=3.2.1=hd88cf55_4
17 | libgcc-ng=7.2.0=hdf63c60_3
18 | libgfortran-ng=7.2.0=hdf63c60_3
19 | libpng=1.6.34=hb9fc6fc_0
20 | libstdcxx-ng=7.2.0=hdf63c60_3
21 | libtiff=4.0.9=he85c1e1_1
22 | mkl=2018.0.2=1
23 | mkl_fft=1.0.1=py36h3010b51_0
24 | mkl_random=1.0.1=py36h629b387_0
25 | nccl=1.3.4=cuda8.0_1
26 | ncurses=6.1=hf484d3e_0
27 | ninja=1.8.2=py36h6bb024c_1
28 | numpy=1.14.3=py36hcd700cb_1
29 | numpy-base=1.14.3=py36h9be14a7_1
30 | olefile=0.45.1=py36_0
31 | openssl=1.0.2o=h20670df_0
32 | pep8=1.7.1=py36_0
33 | pillow=5.1.0=py36h3deb7b8_0
34 | pip=10.0.1=py36_0
35 | pycparser=2.18=py36hf9f622e_1
36 | python=3.6.5=hc3d631a_2
37 | pytorch=0.4.0=py36_cuda8.0.61_cudnn7.1.2_1
38 | pyyaml=3.12=py36hafb9ca4_1
39 | readline=7.0=ha6073c6_4
40 | setuptools=39.1.0=py36_0
41 | six=1.11.0=py36h372c433_1
42 | sqlite=3.23.1=he433501_0
43 | tk=8.6.7=hc745277_3
44 | torchvision=0.2.1=py36_1
45 | wheel=0.31.1=py36_0
46 | xz=5.2.4=h14c3975_4
47 | yaml=0.1.7=had09818_2
48 | zlib=1.2.11=ha838bed_2
49 | 


--------------------------------------------------------------------------------
/py3_pip_freeze.txt:
--------------------------------------------------------------------------------
 1 | backcall==0.1.0
 2 | certifi==2018.4.16
 3 | cffi==1.11.5
 4 | chardet==3.0.4
 5 | coverage==4.5.1
 6 | Cython==0.28.2
 7 | decorator==4.3.0
 8 | easydict==1.7
 9 | idna==2.6
10 | ipaddress==1.0.22
11 | ipython==6.4.0
12 | ipython-genutils==0.2.0
13 | jedi==0.12.0
14 | mkl-fft==1.0.0
15 | mkl-random==1.0.1
16 | munch==2.3.2
17 | numpy==1.14.3
18 | olefile==0.45.1
19 | parso==0.2.1
20 | pep8==1.7.1
21 | pexpect==4.6.0
22 | pickleshare==0.7.4
23 | Pillow==5.1.0
24 | pretrainedmodels==0.7.0
25 | prompt-toolkit==1.0.15
26 | protobuf==3.5.2.post1
27 | ptyprocess==0.5.2
28 | pycparser==2.18
29 | Pygments==2.2.0
30 | pyre==0.3.2
31 | PyYAML==3.12
32 | pyzmq==17.0.0
33 | requests==2.18.4
34 | simplegeneric==0.8.1
35 | six==1.11.0
36 | skipthoughts==0.0.0
37 | torch==0.4.0
38 | torchtext==0.2.3
39 | torchvision==0.2.1
40 | tqdm==4.23.4
41 | traitlets==4.3.2
42 | urllib3==1.22
43 | wcwidth==0.1.7
44 | xmlrunner==1.7.7
45 | 


--------------------------------------------------------------------------------
/train_model.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | sys.path.insert(1,'jobman')
  4 | sys.path.insert(1,'coco-caption')
  5 | 
  6 | import numpy
  7 | import os, sys, socket
  8 | import time
  9 | import logging
 10 | from config import config
 11 | from jobman import DD, expand
 12 | import common
 13 | import numpy as np
 14 | 
 15 | import model_attention
 16 | import model_lstmdd
 17 | import model_mtle
 18 | 
 19 | logging.basicConfig()
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | def set_config(conf, args, add_new_key=False):
 24 |     # add_new_key: if conf does not contain the key, creates it
 25 |     for key in args:
 26 |         if key != 'jobman':
 27 |             v = args[key]
 28 |             if isinstance(v, DD):
 29 |                 set_config(conf[key], v)
 30 |             else:
 31 |                 if conf.has_key(key):
 32 |                     conf[key] = convert_from_string(v)
 33 |                 elif add_new_key:
 34 |                     # create a new key in conf
 35 |                     conf[key] = convert_from_string(v)
 36 |                 else:
 37 |                     raise KeyError(key)
 38 | 
 39 | def convert_from_string(x):
 40 |     """
 41 |     Convert a string that may represent a Python item to its proper data type.
 42 |     It consists in running `eval` on x, and if an error occurs, returning the
 43 |     string itself.
 44 |     """
 45 |     try:
 46 |         return eval(x, {}, {})
 47 |     except Exception:
 48 |         return x
 49 |     
 50 | def train_from_scratch(config, state, channel):    
 51 |     # Model options
 52 |     save_model_dir = config[config.model].save_model_dir
 53 | 
 54 |     np.random.seed(int(config.random_seed))
 55 | 
 56 |     if save_model_dir == 'current':
 57 |         config[config.model].save_model_dir = './'
 58 |         save_model_dir = './'
 59 |         # to facilitate the use of cluster for multiple jobs
 60 |         save_path = './model_config.pkl'
 61 |     else:
 62 |         # run locally, save locally
 63 |         save_path = os.path.join(save_model_dir ,'model_config.pkl')
 64 |     print 'current save dir ',save_model_dir
 65 |     common.create_dir_if_not_exist(save_model_dir)
 66 | 
 67 |     reload_ = config[config.model].reload_
 68 |     if reload_:
 69 |         print 'preparing reload'
 70 |         save_dir_backup = config[config.model].save_model_dir
 71 |         from_dir_backup = config[config.model].from_dir
 72 |         # never start retrain in the same folder
 73 |         assert save_dir_backup != from_dir_backup
 74 |         print 'save dir ',save_dir_backup
 75 |         print 'from_dir ',from_dir_backup
 76 |         print 'setting current model config with the old one'
 77 | 
 78 | 
 79 |         if config[config.model].mode=='train':
 80 |             model_config_old = common.load_pkl(from_dir_backup+'/model_config.pkl')
 81 |             set_config(config, model_config_old)
 82 |         config[config.model].save_model_dir = save_dir_backup
 83 |         config[config.model].from_dir = from_dir_backup
 84 |         config[config.model].reload_ = True
 85 |     if config.erase_history:
 86 |         print 'erasing everything in ',save_model_dir
 87 |         os.system('rm %s/*'%save_model_dir)
 88 | 
 89 | 
 90 | 
 91 |     # for stdout file logging
 92 |     #sys.stdout = Unbuffered(sys.stdout, state.save_model_path + 'stdout.log')
 93 |     print 'saving model config into %s'%save_path
 94 |     common.dump_pkl(config, save_path)
 95 |     # Also copy back from config into state.
 96 |     for key in config:
 97 |         setattr(state, key, config[key])
 98 | 
 99 | 
100 |     model_type = config.model
101 |     print 'Model Type: %s'%model_type
102 |     print 'Host:    %s' % socket.gethostname()
103 |     print 'Command: %s' % ' '.join(sys.argv)
104 | 
105 |     if config.model == 'attention':
106 |         model_attention.train_from_scratch(state, channel)
107 |     elif config.model == 'lstmdd':
108 |         model_lstmdd.train_from_scratch(state, channel)
109 |     elif config.model == 'mtle':
110 |         model_mtle.train_from_scratch(state, channel)
111 |     else:
112 |         raise NotImplementedError()
113 |         
114 |     
115 | def main(state, channel=None):
116 |     set_config(config, state)
117 |     train_from_scratch(config, state, channel)
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     args = {}
122 |     try:
123 |         for arg in sys.argv[1:]:
124 |             k, v = arg.split('=')
125 |             args[k] = v
126 |     except:
127 |         print 'args must be like a=X b.c=X'
128 |         exit(1)
129 |     
130 |     state = expand(args)
131 | 
132 |     try:
133 |         main(state)
134 |     except Exception as e:
135 |         logger.exception(e)
136 | 


--------------------------------------------------------------------------------