├── .gitignore
├── Crawler
    ├── Kinetics
    │   ├── LICENSE
    │   ├── README.md
    │   ├── data
    │   │   ├── kinetics_train.csv
    │   │   └── kinetics_val.csv
    │   ├── download.py
    │   ├── environment.yml
    │   └── process_download_report.py
    ├── README
    ├── activity_net.v1-3.min.json
    ├── command_list.txt
    ├── fetch_activitynet_videos.sh
    └── run_crosscheck.py
├── Evaluation
    ├── README.md
    ├── check_lmdb.py
    ├── eval_classification.py
    ├── eval_detection.py
    ├── eval_kinetics.py
    ├── eval_proposal.py
    ├── frame_prediction.py
    ├── frame_prediction_BG.py
    ├── get_classification_performance.py
    ├── get_detection_performance.py
    ├── get_kinetics_performance.py
    ├── get_proposal_performance.py
    ├── hog.xml
    ├── localization.py
    ├── optical_flow.py
    ├── taxonomy.py
    ├── test_data_meta_info.json
    ├── testing.py
    ├── training.py
    ├── training_data_meta_info.json
    ├── training_model_hog.py
    ├── training_model_m2.py
    ├── training_model_m3.py
    ├── training_model_m4.py
    ├── training_model_svm.py
    ├── utils.py
    ├── val_data_meta_info.json
    └── vid_probs.csv
├── LICENSE
├── Notebooks
    ├── ActivityNet-Release1.2-Classification.ipynb
    ├── ActivityNet-Release1.2-Detection.ipynb
    ├── ActivityNet-Release1.3.Proposals.ipynb
    └── ActivityNet-Temporal-Proposals.ipynb
├── README.md
└── caffe_models
    ├── c3d_fc_net.prototxt
    ├── c3d_fc_net_solver.prototxt
    ├── deploy_OF_alexnet_mirror.prototxt
    ├── deploy_c3d_fc_net.prototxt
    ├── deploy_hog_fc_net.prototxt
    ├── frames_alexMir_step_80k.log
    ├── hog_fc_net.prototxt
    ├── hog_fc_net_solver.prototxt
    ├── mean_c3d.binaryproto
    ├── mean_c3d_10k.binaryproto
    ├── mean_c3d_4k.binaryproto
    ├── mean_hog_4k.binaryproto
    ├── optical_flow_alexnet_mirror.prototxt
    ├── optical_flow_alexnet_mirror_solver.prototxt
    └── snapshots
        ├── c3d_10k_2500_adam_e4
            ├── c3d_fc_net_snap_iter_400000.caffemodel
            ├── c3d_fc_net_snap_iter_400000.solverstate
            └── c3d_train_10k_adam_1e-4.log
        ├── c3d_4k_1k
            ├── c3d_fc_net_snap_iter_400000.caffemodel
            ├── c3d_fc_net_snap_iter_400000.solverstate
            └── c3d_train.log
        ├── c3d_4k_1k_adam_e4
            ├── c3d_fc_net_snap_iter_200000.caffemodel
            ├── c3d_fc_net_snap_iter_200000.solverstate
            └── c3d_train_adam_1e-4.log
        └── c3d_train_adam_1e-3.log


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | *.mp4
 3 | *.jpeg
 4 | *.pkl
 5 | trained_models/
 6 | Crawler/videos/
 7 | Evaluation/samples_*/
 8 | Evaluation/data/
 9 | Evaluation/dumps/
10 | Evaluation/submission_*
11 | Evaluation/sub_t*
12 | Evaluation/mbh_subs/
13 | Evaluation/RF/
14 | Evaluation/samples_*/
15 | Evaluation/val_samples_*
16 | Evaluation/tr_samples_*
17 | my_subs/
18 | caffe_models/snapshots/hog_4k_1k/
19 | 


--------------------------------------------------------------------------------
/Crawler/Kinetics/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Fabian Caba H.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Crawler/Kinetics/README.md:
--------------------------------------------------------------------------------
 1 | # Kinetics - Downloader
 2 | 
 3 | ## Usage
 4 | First, clone this repository and make sure that all the submodules are also cloned properly.
 5 | ```
 6 | git clone https://github.com/activitynet/ActivityNet.git
 7 | cd ActivityNet/Crawler/Kinetics
 8 | ```
 9 | 
10 | Next, setup your environment
11 | ```
12 | conda env create -f environment.yml
13 | source activate kinetics
14 | pip install --upgrade youtube-dl
15 | ```
16 | 
17 | Finally, download a dataset split by calling:
18 | ```
19 | mkdir <data_dir>; python download.py {dataset_split}.csv <data_dir>
20 | ```
21 | 


--------------------------------------------------------------------------------
/Crawler/Kinetics/download.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import fnmatch
  3 | import glob
  4 | import json
  5 | import os
  6 | import shutil
  7 | import subprocess
  8 | import uuid
  9 | 
 10 | from joblib import delayed
 11 | from joblib import Parallel
 12 | import pandas as pd
 13 | 
 14 | 
 15 | def create_video_folders(dataset, output_dir, tmp_dir):
 16 |     """Creates a directory for each label name in the dataset."""
 17 |     if not os.path.exists(output_dir):
 18 |         os.makedirs(output_dir)
 19 |     if not os.path.exists(tmp_dir):
 20 |         os.makedirs(tmp_dir)
 21 | 
 22 |     label_to_dir = {}
 23 |     for label_name in dataset['label-name'].unique():
 24 |         this_dir = os.path.join(output_dir, label_name)
 25 |         if not os.path.exists(this_dir):
 26 |             os.makedirs(this_dir)
 27 |         label_to_dir[label_name] = this_dir
 28 |     return label_to_dir
 29 | 
 30 | 
 31 | def construct_video_filename(row, label_to_dir, trim_format='%06d'):
 32 |     """Given a dataset row, this function constructs the 
 33 |        output filename for a given video.
 34 |     """
 35 |     basename = '%s_%s_%s.mp4' % (row['video-id'],
 36 |                                  trim_format % row['start-time'],
 37 |                                  trim_format % row['end-time'])
 38 |     output_filename = os.path.join(label_to_dir[row['label-name']], basename)
 39 |     return output_filename
 40 | 
 41 | 
 42 | def download_clip(video_identifier, output_filename,
 43 |                   start_time, end_time, 
 44 |                   tmp_dir='/tmp/kinetics',
 45 |                   num_attempts=5,
 46 |                   url_base='https://www.youtube.com/watch?v='):
 47 |     """Download a video from youtube if exists and is not blocked.
 48 |     
 49 |     arguments:
 50 |     ---------
 51 |     video_identifier: str
 52 |         Unique YouTube video identifier (11 characters)
 53 |     output_filename: str
 54 |         File path where the video will be stored.
 55 |     start_time: float
 56 |         Indicates the begining time in seconds from where the video 
 57 |         will be trimmed.
 58 |     end_time: float
 59 |         Indicates the ending time in seconds of the trimmed video.
 60 |     """
 61 |     # Defensive argument checking.
 62 |     assert isinstance(video_identifier, str), 'video_identifier must be string'
 63 |     assert isinstance(output_filename, str), 'output_filename must be string'
 64 |     assert len(video_identifier) == 11, 'video_identifier must have length 11'
 65 | 
 66 |     status = False
 67 |     # Construct command line for getting the direct video link.
 68 |     tmp_filename = os.path.join(tmp_dir,
 69 |                                 '%s.%%(ext)s' % uuid.uuid4())
 70 |     command = ['youtube-dl',
 71 |                '--quiet', '--no-warnings',
 72 |                '-f', 'mp4',
 73 |                '-o', '"%s"' % tmp_filename, 
 74 |                '"%s"' % (url_base + video_identifier)]
 75 |     command = ' '.join(command)
 76 |     attempts = 0
 77 |     while True:
 78 |          try:
 79 |             output = subprocess.check_output(command, shell=True, 
 80 |                                              stderr=subprocess.STDOUT)
 81 |          except subprocess.CalledProcessError as err:
 82 |             attempts += 1
 83 |             if attempts == num_attempts:
 84 |                 return status, err.output
 85 |          else:
 86 |             break
 87 |             
 88 |     tmp_filename = glob.glob('%s*' % tmp_filename.split('.')[0])[0]
 89 |     # Construct command to trim the videos (ffmpeg required).
 90 |     command = ['ffmpeg',
 91 |                '-i', '"%s"' % tmp_filename,
 92 |                '-ss', str(start_time),
 93 |                '-t', str(end_time - start_time),
 94 |                '-c:v', 'libx264', '-c:a', 'copy',
 95 |                '-threads', '1',
 96 |                '-loglevel', 'panic',
 97 |                '"%s"' % output_filename]
 98 |     command = ' '.join(command)
 99 |     try:
100 |         output = subprocess.check_output(command, shell=True,
101 |                                          stderr=subprocess.STDOUT)
102 |     except subprocess.CalledProcessError as err:
103 |         return status, err.output
104 | 
105 |     # Check if the video was successfully saved.
106 |     status = os.path.exists(output_filename)
107 |     os.remove(tmp_filename)
108 |     return status, 'Downloaded'
109 | 
110 | 
111 | def download_clip_wrapper(row, label_to_dir, trim_format, tmp_dir):
112 |     """Wrapper for parallel processing purposes."""
113 |     output_filename = construct_video_filename(row, label_to_dir,
114 |                                                trim_format)
115 |     clip_id = os.path.basename(output_filename).split('.mp4')[0]
116 |     if os.path.exists(output_filename):
117 |         status = tuple([clip_id, True, 'Exists'])
118 |         return status
119 | 
120 |     downloaded, log = download_clip(row['video-id'], output_filename,
121 |                                     row['start-time'], row['end-time'],
122 |                                     tmp_dir=tmp_dir)
123 |     status = tuple([clip_id, downloaded, log])
124 |     return status
125 |  
126 | 
127 | def parse_kinetics_annotations(input_csv):
128 |     """Returns a parsed DataFrame.
129 |     
130 |     arguments:
131 |     ---------
132 |     input_csv: str
133 |         Path to CSV file containing the following columns:
134 |           'YouTube Identifier,Start time,End time,Class label'
135 | 
136 |     returns:
137 |     -------
138 |     dataset: DataFrame
139 |         Pandas with the following columns:
140 |             'video-id', 'start-time', 'end-time', 'label-name'
141 |     """
142 |     df = pd.read_csv(input_csv)
143 |     df.rename(columns={'youtube_id': 'video-id',
144 |                        'time_start': 'start-time',
145 |                        'time_end': 'end-time',
146 |                        'label': 'label-name',
147 |                        'is_cc': 'is-cc'}, inplace=True)
148 |     return df
149 | 
150 | def main(input_csv, output_dir,
151 |          trim_format='%06d', num_jobs=24, tmp_dir='/tmp/kinetics'):
152 | 
153 |     # Reading and parsing Kinetics.
154 |     dataset = parse_kinetics_annotations(input_csv)
155 | 
156 |     # Creates folders where videos will be saved later.
157 |     label_to_dir = create_video_folders(dataset, output_dir, tmp_dir)
158 | 
159 |     # Download all clips.
160 |     if num_jobs==1:
161 |         status_lst = []
162 |         for i, row in dataset.iterrows():
163 |             status_lst.append(download_clip_wrapper(row, label_to_dir, 
164 |                                                     trim_format, tmp_dir))
165 |     else:
166 |         status_lst = Parallel(n_jobs=num_jobs)(delayed(download_clip_wrapper)(
167 |             row, label_to_dir,
168 |             trim_format, tmp_dir) for i, row in dataset.iterrows())
169 | 
170 |     # Clean tmp dir.
171 |     shutil.rmtree(tmp_dir)
172 | 
173 |     # Save download report.
174 |     with open('download_report.json', 'w') as fobj:
175 |         fobj.write(json.dumps(status_lst))
176 | 
177 | 
178 | if __name__ == '__main__':
179 |     description = 'Helper script for downloading and trimming kinetics videos.'
180 |     p = argparse.ArgumentParser(description=description)
181 |     p.add_argument('input_csv', type=str,
182 |                    help=('CSV file containing the following format: '
183 |                          'YouTube Identifier,Start time,End time,Class label'))
184 |     p.add_argument('output_dir', type=str,
185 |                    help='Output directory where videos will be saved.')
186 |     p.add_argument('-f', '--trim-format', type=str, default='%06d',
187 |                    help=('This will be the format for the '
188 |                          'filename of trimmed videos: '
189 |                          'videoid_%0xd(start_time)_%0xd(end_time).mp4'))
190 |     p.add_argument('-n', '--num-jobs', type=int, default=24)
191 |     p.add_argument('-t', '--tmp-dir', type=str, default='/tmp/kinetics')
192 |     main(**vars(p.parse_args()))
193 | 


--------------------------------------------------------------------------------
/Crawler/Kinetics/environment.yml:
--------------------------------------------------------------------------------
 1 | name: kinetics
 2 | channels: !!python/tuple
 3 | - !!python/unicode
 4 |   'defaults'
 5 | dependencies:
 6 | - joblib=0.9.4=py27_0
 7 | - menpo::ffmpeg=3.1.3=0
 8 | - mkl=2017.0.1=0
 9 | - numpy=1.12.1=py27_0
10 | - openssl=1.0.2k=1
11 | - pandas=0.19.2=np112py27_1
12 | - pip=9.0.1=py27_1
13 | - python=2.7.13=0
14 | - python-dateutil=2.6.0=py27_0
15 | - pytz=2017.2=py27_0
16 | - readline=6.2=2
17 | - setuptools=27.2.0=py27_0
18 | - six=1.10.0=py27_0
19 | - sqlite=3.13.0=0
20 | - tk=8.5.18=0
21 | - wheel=0.29.0=py27_0
22 | - zlib=1.2.8=3
23 | - pip:
24 |   - decorator==4.0.11
25 |   - olefile==0.44
26 |   - youtube-dl==2017.6.5
27 | prefix: /home/cabaf/.conda/envs/kinetics
28 | 
29 | 


--------------------------------------------------------------------------------
/Crawler/Kinetics/process_download_report.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import json
  3 | import argparse
  4 | 
  5 | 
  6 | status_and_reason_to_message_dict = {
  7 |     ('Downloaded', ''): ['Downloaded', 
  8 |                          'Exists'],
  9 | 
 10 |     ('Network',''): ['unable to download video data',
 11 |                      'The read operation timed out',
 12 |                      'Did not get any data blocks',
 13 |                      'giving up after 10 retries',
 14 |                      'Network is unreachable',
 15 |                      'content too short'],
 16 | 
 17 |     ('Unavailable','User-Removed'): ['This video is no longer available because the uploader has closed their YouTube account.',
 18 |                                      'account associated with this video has been terminated',
 19 |                                      'This video has been removed by the user.',
 20 |                                      'This video is not available.',
 21 |                                      'This video does not exist.'],
 22 | 
 23 |     ('Unavailable','Copyright'): ['multiple third-party notifications of copyright infringement.',
 24 |                                   'This video is no longer available due to a copyright claim',
 25 |                                   'blocked it on copyright grounds',
 26 |                                   'a duplicate of a previously uploaded video'],
 27 | 
 28 |     ('Unavailable','Country-Block'): ['The uploader has not made this video available in your country.',
 29 |                                       'who has blocked it in your country on copyright grounds.'],
 30 | 
 31 |     ('Unavailable','Spam'): ['policy on spam, deceptive practices, and scams.'],
 32 |     ('Unavailable','Nudity'): ['policy on nudity or sexual content.'],
 33 |     ('Unavailable','Sign-In'): ['Please sign in to view this video.'],
 34 |     ('Unavailable','Private'): ['This video is private.'],
 35 |     ('Unavailable','Guidelines'): ['Community Guidelines.'],
 36 |     ('Unavailable','Harassment and Bullying'): ['policy on harassment and bullying.'],
 37 |     ('Unavailable','Service-Terms'): ['Terms of Service.'],
 38 |     ('Unavailable','Harmful'): ['policy on harmful or dangerous content'],
 39 |     }
 40 | 
 41 | def get_status_and_reason(msg):
 42 |     for s_r, lst in status_and_reason_to_message_dict.iteritems():
 43 |         if any([x in msg for x in lst]):
 44 |             return s_r
 45 | 
 46 |     print("<get_status_and_reason>: error message is not matched with a status and a reason. message:", msg)
 47 | 
 48 |     return ('Other', msg)
 49 | 
 50 | def process_download_report(report):
 51 |     output = []
 52 |     for r in report:
 53 |         name, b, msg = r[0], r[1], r[2]
 54 |         output += [(name, get_status_and_reason(msg))]
 55 |     return output
 56 | 
 57 | 
 58 | def wrapper_process_download_reports(json_files):
 59 |     all_ouputs = []
 60 |     for f in json_files:
 61 |         with open(f, 'r') as fobj:
 62 |             report = json.load(fobj)
 63 |         all_ouputs += process_download_report(report)
 64 |     return all_ouputs
 65 | 
 66 | def main(input_csv, input_json, output_file, trim_format='%06d', num_input=1):
 67 |     json_files = []
 68 |     if num_input <= 1:
 69 |         json_files += [input_json]
 70 |     else:
 71 |         for i in range(num_input):
 72 |             json_files +=[input_json + ("-%02d" % (i+1))]
 73 |     
 74 |     all_ouputs = wrapper_process_download_reports(json_files)
 75 |     all_ouputs = dict(all_ouputs)
 76 | 
 77 |     dataset = pd.read_csv(input_csv)
 78 | 
 79 |     status_lst = []
 80 |     reason_lst = []
 81 |     for indx, row in dataset.iterrows():
 82 |         name = '%s_%s_%s' % (row['youtube_id'],
 83 |                              trim_format % row['time_start'],
 84 |                              trim_format % row['time_end'])
 85 | 
 86 |         s, r = all_ouputs[name]
 87 |         status_lst += [s]
 88 |         reason_lst += [r]
 89 |         if indx % 10000 == 0:
 90 |             print(indx)
 91 |     print("Done!!")
 92 |     dataset["status"] = status_lst
 93 |     dataset["reason"] = reason_lst
 94 |     
 95 |     dataset.to_csv(output_file, index=False)
 96 | 
 97 | if __name__ == '__main__':
 98 |     description = 'Helper script for processing the reports from downloading and trimming kinetics videos.'
 99 |     p = argparse.ArgumentParser(description=description)
100 |     p.add_argument('input_csv', type=str,
101 |                    help=('CSV file containing the following format: ' 
102 |                          'label,   youtube_id,  time_start,  time_end,    split,   is_cc'))
103 |     p.add_argument('input_json', type=str,
104 |                    help=('base name for download report json files'),
105 |                    default='download_report.json')
106 |     p.add_argument('output_file', type=str,
107 |                    help='Output csv file with statuses and reasons.')
108 |     p.add_argument('-f', '--trim-format', type=str, default='%06d',
109 |                    help=('This will be the format for the '
110 |                          'filename of trimmed videos: '
111 |                          'videoid_%0xd(start_time)_%0xd(end_time).mp4'))
112 |     p.add_argument('-n', '--num_input', 
113 |                     help=('number of input json files with the same base name input_json.'),
114 |                     type=int, default=1)
115 |     main(**vars(p.parse_args()))
116 | 


--------------------------------------------------------------------------------
/Crawler/README:
--------------------------------------------------------------------------------
 1 | ActivityNet Tools
 2 | =================
 3 | 
 4 | Requirements
 5 | ------------
 6 |  1. youtube-dl (https://github.com/rg3/youtube-dl/)
 7 | 
 8 | Fetch ActivityNet
 9 | -----------------
10 | To download all the ActivityNet videos run the following command line:
11 |  $ mkdir $VIDEO_PATH
12 |  $ chmod +x fetch_activitynet_videos.sh
13 |  $ ./fetch_activitynet_videos.sh $VIDEO_PATH activity_net.v1-X.json
14 | 
15 | Where $VIDEO_PATH is the path where the videos will be located. If you already 
16 | have a subset of the videos, input that directory.
17 | 


--------------------------------------------------------------------------------
/Crawler/fetch_activitynet_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | VIDEOPATH=$1
 4 | JSON_FILE=$2
 5 | TEMP_FILE="command_list.txt"
 6 | 
 7 | if [ -d $VIDEOPATH ]; then
 8 |     python run_crosscheck.py $VIDEOPATH $JSON_FILE $TEMP_FILE
 9 |     if [ -f $TEMP_FILE ]; then
10 |         bash $TEMP_FILE
11 |     else
12 |         echo "File $TEMP_FILE does not exists."
13 |     fi
14 | else
15 |     echo "Directory does not exists."
16 |     exit 0
17 | fi
18 | 
19 | #rm $TEMP_FILE
20 | echo "Have a good day!"
21 | 


--------------------------------------------------------------------------------
/Crawler/run_crosscheck.py:
--------------------------------------------------------------------------------
 1 | from argparse import ArgumentParser
 2 | import glob
 3 | import json
 4 | import os
 5 | 
 6 | def crosscheck_videos(video_path, ann_file):
 7 |     # Get existing videos
 8 |     existing_vids = glob.glob("%s/*.mp4" % video_path)
 9 |     for idx, vid in enumerate(existing_vids):
10 |         basename = os.path.basename(vid).split(".mp4")[0]
11 |         if len(basename) == 13:
12 |             existing_vids[idx] = basename[2:]
13 |         elif len(basename) == 11:
14 |             existing_vids[idx] = basename
15 |         else:
16 |             raise RuntimeError("Unknown filename format: %s", vid)
17 |     # Read an get video IDs from annotation file
18 |     with open(ann_file, "r") as fobj:
19 |         anet_v_1_0 = json.load(fobj)
20 |     all_vids = anet_v_1_0["database"].keys()
21 |     non_existing_videos = []
22 |     for vid in all_vids:
23 |         if vid in existing_vids:
24 |             continue
25 |         else:
26 |             non_existing_videos.append(vid)
27 |     return non_existing_videos
28 | 
29 | def main(video_path, ann_file, output_filename):
30 |     non_existing_videos = crosscheck_videos(video_path, ann_file)
31 |     print "No of non-existing videos = {}" .format(len(non_existing_videos))
32 |     filename = os.path.join(video_path, "v_%s.mp4")
33 |     cmd_base = "youtube-dl -f best -f mp4 "
34 |     cmd_base += '"https://www.youtube.com/watch?v=%s" '
35 |     cmd_base += '-o "%s"' % filename
36 |     with open(output_filename, "w") as fobj:
37 |         for vid in non_existing_videos:
38 |             cmd = cmd_base % (vid, vid)
39 |             fobj.write("%s\n" % cmd)
40 | 
41 | if __name__ == "__main__":
42 |     parser = ArgumentParser(description="Script to double check video content.")
43 |     parser.add_argument("video_path", help="Where are located the videos? (Full path)")
44 |     parser.add_argument("ann_file", help="Where is the annotation file?")
45 |     parser.add_argument("output_filename", help="Output script location.")
46 |     args = vars(parser.parse_args())
47 |     main(**args)
48 | 


--------------------------------------------------------------------------------
/Evaluation/README.md:
--------------------------------------------------------------------------------
 1 | #ActivityNet Large Scale Activity Recognition Challenge - Evaluation Toolkit
 2 | This file is taken as is from the source ActivityNet repository.
 3 | This is the documentation of the ActivityNet Large Scale Activity Recognition
 4 | Challenge Evaluation Toolkit. It includes APIs to evaluate the performance of a method in the two different tasks in the challenge: *untrimmed video classification* and *activity detection*. For more information about the challenge competitions, please read the [guidelines](http://activity-net.org/challenges/2016/guidelines.html).
 5 | 
 6 | ##Dependencies
 7 | The Evaluation Toolkit is purely written in Python (>=2.7) and it requires the 
 8 | following third party libraries:
 9 | * [Numpy](http://www.numpy.org/)
10 | * [Pandas](http://pandas.pydata.org/)
11 | 
12 | ##Getting started
13 | We include sample prediction files in the folder data to show how to evaluate your prediction results. Please follow this steps to obtain the performance evaluation on the provided sample files:
14 | * Run `git clone` this repository.
15 | * To evaluate classification performance call: `python get_classification_performance.py data/activity_net.v1-3.min.json sample_classification_prediction.json`
16 | * To evaluate detection performance call: `python get_detection_performance.py data/activity_net.v1-3.min.json sample_detection_prediction.json`
17 | 
18 | ##Contributions and Troubleshooting
19 | We are welcome to contributions, please keep your pull-request simple so we can go back to you as soon as we can. If you found a bug please open a new issue and describe the problem.
20 | 


--------------------------------------------------------------------------------
/Evaluation/check_lmdb.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python2
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 | Created on Wed Jun 21 22:20:12 2017
 5 | 
 6 | @author: hadoop
 7 | Description: Read LMDB
 8 | 
 9 | """
10 | import lmdb
11 | import numpy as np
12 | import caffe
13 | import cv2
14 | 
15 | #LMDB_PATH = "/home/hadoop/VisionWorkspace/KTH_OpticalFlow/dataset/kth_actions_train/LMDB/OF_lmdb"
16 | LMDB_PATH = '/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/train_hog_lmdb'
17 | 
18 | def waitTillEscPressed():
19 |     while(True):
20 |         if cv2.waitKey(10)==27:
21 |             print("Esc Pressed")
22 |             return
23 | 
24 | if __name__ == '__main__':
25 |     
26 |     env = lmdb.open(LMDB_PATH, readonly=True)
27 |     print env.stat()
28 |     i,j = 0, 0
29 |     with env.begin() as txn:
30 |         cursor = txn.cursor()
31 |         datum = caffe.proto.caffe_pb2.Datum()
32 |         for k,v in cursor:
33 |             datum.ParseFromString(v)
34 |             lab = datum.label
35 |             #print "Shape : {}" .format(datum.width)
36 |             #flat_x = np.fromstring(datum.data, dtype=np.uint8)
37 |             flat_x = np.array(datum.float_data)
38 |             x = flat_x.reshape(datum.channels, datum.height, datum.width)
39 |             y = datum.label
40 |             #print "sum(x) = {} " .format(np.sum(x))
41 |             #print "y = %d " %y
42 |             j += 1
43 |             if np.sum(x) == 0:
44 |                 print j
45 |                 print "class %d " %y
46 |                 i += 1
47 |         #raw_datum = txn.get(b'00000000')
48 |         
49 |     print 'No of 0s are %d ' %i
50 | #
51 | 
52 | 
53 | #label = datum.label
54 | #    data = caffe.io.datum_to_array(datum)
55 | #    for l, d in zip(label, data):
56 | #            print l, d
57 | 
58 | # Iterate over the LMDB values
59 | 
60 | #with env.begin() as txn:
61 | #    cursor = txn.cursor()
62 | #    datum = caffe.proto.caffe_pb2.Datum()
63 | #    for key, value in cursor:
64 | #        datum.ParseFromString(value)
65 | #        label = datum.label
66 | #        flat_x = np.fromstring(datum.data, dtype=np.uint8)
67 | #        x = flat_x.reshape(datum.channels, datum.height, datum.width)
68 | #        img = convert_to_bgr(x)
69 | #        cv2.imshow("BGR_OF", img)
70 | #        print "Label = "+str(label)
71 | #        keyPressed = waitTillEscPressed()
72 | #        if keyPressed==0:      # write to file
73 | #            cv2.imwrite(os.path.join(curr_path,key+"_"+str(label)+".jpg"),img)
74 |         #if key == '00000099':
75 |         #    print(key, value)
76 |     


--------------------------------------------------------------------------------
/Evaluation/eval_classification.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import urllib2
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from utils import get_blocked_videos
  8 | from utils import interpolated_prec_rec
  9 | 
 10 | class ANETclassification(object):
 11 |     GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version']
 12 |     PREDICTION_FIELDS = ['results', 'version', 'external_data']
 13 | 
 14 |     def __init__(self, ground_truth_filename=None, prediction_filename=None,
 15 |                  ground_truth_fields=GROUND_TRUTH_FIELDS,
 16 |                  prediction_fields=PREDICTION_FIELDS,
 17 |                  subset='validation', verbose=False, top_k=3,
 18 |                  check_status=True):
 19 |         if not ground_truth_filename:
 20 |             raise IOError('Please input a valid ground truth file.')
 21 |         if not prediction_filename:
 22 |             raise IOError('Please input a valid prediction file.')
 23 |         self.subset = subset
 24 |         self.verbose = verbose
 25 |         self.gt_fields = ground_truth_fields
 26 |         self.pred_fields = prediction_fields
 27 |         self.top_k = top_k
 28 |         self.ap = None
 29 |         self.hit_at_k = None
 30 |         self.check_status = check_status
 31 |         # Retrieve blocked videos from server.
 32 |         if self.check_status:
 33 |             self.blocked_videos = get_blocked_videos()
 34 |         else:
 35 |             self.blocked_videos = list()
 36 |         # Import ground truth and predictions.
 37 |         self.ground_truth, self.activity_index = self._import_ground_truth(
 38 |             ground_truth_filename)
 39 |         self.prediction = self._import_prediction(prediction_filename)
 40 | 
 41 |         if self.verbose:
 42 |             print '[INIT] Loaded annotations from {} subset.'.format(subset)
 43 |             nr_gt = len(self.ground_truth)
 44 |             print '\tNumber of ground truth instances: {}'.format(nr_gt)
 45 |             nr_pred = len(self.prediction)
 46 |             print '\tNumber of predictions: {}'.format(nr_pred)
 47 | 
 48 |     def _import_ground_truth(self, ground_truth_filename):
 49 |         """Reads ground truth file, checks if it is well formatted, and returns
 50 |            the ground truth instances and the activity classes.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         ground_truth_filename : str
 55 |             Full path to the ground truth json file.
 56 | 
 57 |         Outputs
 58 |         -------
 59 |         ground_truth : df
 60 |             Data frame containing the ground truth instances.
 61 |         activity_index : dict
 62 |             Dictionary containing class index.
 63 |         """
 64 |         with open(ground_truth_filename, 'r') as fobj:
 65 |             data = json.load(fobj)
 66 |         # Checking format
 67 |         if not all([field in data.keys() for field in self.gt_fields]):
 68 |             raise IOError('Please input a valid ground truth file.')
 69 | 
 70 |         # Initialize data frame
 71 |         activity_index, cidx = {}, 0
 72 |         video_lst, label_lst = [], []
 73 |         for videoid, v in data['database'].iteritems():
 74 |             if self.subset != v['subset']:
 75 |                 continue
 76 |             if videoid in self.blocked_videos:
 77 |                 continue
 78 |             for ann in v['annotations']:
 79 |                 if ann['label'] not in activity_index:
 80 |                     activity_index[ann['label']] = cidx
 81 |                     cidx += 1
 82 |                 video_lst.append(videoid)
 83 |                 label_lst.append(activity_index[ann['label']])
 84 |         ground_truth = pd.DataFrame({'video-id': video_lst,
 85 |                                      'label': label_lst})
 86 |         ground_truth = ground_truth.drop_duplicates().reset_index(drop=True)
 87 |         return ground_truth, activity_index
 88 | 
 89 |     def _import_prediction(self, prediction_filename):
 90 |         """Reads prediction file, checks if it is well formatted, and returns
 91 |            the prediction instances.
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         prediction_filename : str
 96 |             Full path to the prediction json file.
 97 | 
 98 |         Outputs
 99 |         -------
100 |         prediction : df
101 |             Data frame containing the prediction instances.
102 |         """
103 |         with open(prediction_filename, 'r') as fobj:
104 |             data = json.load(fobj)
105 |         # Checking format...
106 |         if not all([field in data.keys() for field in self.pred_fields]):
107 |             raise IOError('Please input a valid prediction file.')
108 | 
109 |         # Initialize data frame
110 |         video_lst, label_lst, score_lst = [], [], []
111 |         for videoid, v in data['results'].iteritems():
112 |             if videoid in self.blocked_videos:
113 |                 continue
114 |             for result in v:
115 |                 label = self.activity_index[result['label']]
116 |                 video_lst.append(videoid)
117 |                 label_lst.append(label)
118 |                 score_lst.append(result['score'])
119 |         prediction = pd.DataFrame({'video-id': video_lst,
120 |                                    'label': label_lst,
121 |                                    'score': score_lst})
122 |         return prediction
123 | 
124 |     def wrapper_compute_average_precision(self):
125 |         """Computes average precision for each class in the subset.
126 |         """
127 |         ap = np.zeros(len(self.activity_index.items()))
128 |         for activity, cidx in self.activity_index.iteritems():
129 |             gt_idx = self.ground_truth['label'] == cidx
130 |             pred_idx = self.prediction['label'] == cidx
131 |             ap[cidx] = compute_average_precision_classification(
132 |                 self.ground_truth.loc[gt_idx].reset_index(drop=True),
133 |                 self.prediction.loc[pred_idx].reset_index(drop=True))
134 |         return ap
135 | 
136 |     def evaluate(self):
137 |         """Evaluates a prediction file. For the detection task we measure the
138 |         interpolated mean average precision to measure the performance of a
139 |         method.
140 |         """
141 |         ap = self.wrapper_compute_average_precision()
142 |         hit_at_k = compute_video_hit_at_k(self.ground_truth,
143 |                                           self.prediction, top_k=self.top_k)
144 |         avg_hit_at_k = compute_video_hit_at_k(
145 |             self.ground_truth, self.prediction, top_k=self.top_k, avg=True)
146 |         if self.verbose:
147 |             print ('[RESULTS] Performance on ActivityNet untrimmed video '
148 |                    'classification task.')
149 |             print '\tMean Average Precision: {}'.format(ap.mean())
150 |             print '\tHit@{}: {}'.format(self.top_k, hit_at_k)
151 |             print '\tAvg Hit@{}: {}'.format(self.top_k, avg_hit_at_k)
152 |         self.ap = ap
153 |         self.hit_at_k = hit_at_k
154 |         self.avg_hit_at_k = avg_hit_at_k
155 | 
156 | ################################################################################
157 | # Metrics
158 | ################################################################################
159 | 
160 | def compute_average_precision_classification(ground_truth, prediction):
161 |     """Compute average precision (classification task) between ground truth and
162 |     predictions data frames. If multiple predictions occurs for the same
163 |     predicted segment, only the one with highest score is matched as
164 |     true positive. This code is greatly inspired by Pascal VOC devkit.
165 | 
166 |     Parameters
167 |     ----------
168 |     ground_truth : df
169 |         Data frame containing the ground truth instances.
170 |         Required fields: ['video-id']
171 |     prediction : df
172 |         Data frame containing the prediction instances.
173 |         Required fields: ['video-id, 'score']
174 | 
175 |     Outputs
176 |     -------
177 |     ap : float
178 |         Average precision score.
179 |     """
180 |     npos = float(len(ground_truth))
181 |     lock_gt = np.ones(len(ground_truth)) * -1
182 |     # Sort predictions by decreasing score order.
183 |     sort_idx = prediction['score'].values.argsort()[::-1]
184 |     prediction = prediction.loc[sort_idx].reset_index(drop=True)
185 | 
186 |     # Initialize true positive and false positive vectors.
187 |     tp = np.zeros(len(prediction))
188 |     fp = np.zeros(len(prediction))
189 | 
190 |     # Assigning true positive to truly grount truth instances.
191 |     for idx in range(len(prediction)):
192 |         this_pred = prediction.loc[idx]
193 |         gt_idx = ground_truth['video-id'] == this_pred['video-id']
194 |         # Check if there is at least one ground truth in the video associated.
195 |         if not gt_idx.any():
196 |             fp[idx] = 1
197 |             continue
198 |         this_gt = ground_truth.loc[gt_idx].reset_index()
199 |         if lock_gt[this_gt['index']] >= 0:
200 |             fp[idx] = 1
201 |         else:
202 |             tp[idx] = 1
203 |             lock_gt[this_gt['index']] = idx
204 | 
205 |     # Computing prec-rec
206 |     tp = np.cumsum(tp).astype(np.float)
207 |     fp = np.cumsum(fp).astype(np.float)
208 |     rec = tp / npos
209 |     prec = tp / (tp + fp)
210 |     return interpolated_prec_rec(prec, rec)
211 | 
212 | def compute_video_hit_at_k(ground_truth, prediction, top_k=3, avg=False):
213 |     """Compute accuracy at k prediction between ground truth and
214 |     predictions data frames. This code is greatly inspired by evaluation
215 |     performed in Karpathy et al. CVPR14.
216 | 
217 |     Parameters
218 |     ----------
219 |     ground_truth : df
220 |         Data frame containing the ground truth instances.
221 |         Required fields: ['video-id', 'label']
222 |     prediction : df
223 |         Data frame containing the prediction instances.
224 |         Required fields: ['video-id, 'label', 'score']
225 | 
226 |     Outputs
227 |     -------
228 |     acc : float
229 |         Top k accuracy score.
230 |     """
231 |     video_ids = np.unique(ground_truth['video-id'].values)
232 |     avg_hits_per_vid = np.zeros(video_ids.size)
233 |     for i, vid in enumerate(video_ids):
234 |         pred_idx = prediction['video-id'] == vid
235 |         if not pred_idx.any():
236 |             continue
237 |         this_pred = prediction.loc[pred_idx].reset_index(drop=True)
238 |         # Get top K predictions sorted by decreasing score.
239 |         sort_idx = this_pred['score'].values.argsort()[::-1][:top_k]
240 |         this_pred = this_pred.loc[sort_idx].reset_index(drop=True)
241 |         # Get labels and compare against ground truth.
242 |         pred_label = this_pred['label'].tolist()
243 |         gt_idx = ground_truth['video-id'] == vid
244 |         gt_label = ground_truth.loc[gt_idx]['label'].tolist()
245 |         avg_hits_per_vid[i] = np.mean([1 if this_label in pred_label else 0
246 |                                        for this_label in gt_label])
247 |         if not avg:
248 |             avg_hits_per_vid[i] = np.ceil(avg_hits_per_vid[i])
249 |     return float(avg_hits_per_vid.mean())
250 | 


--------------------------------------------------------------------------------
/Evaluation/eval_detection.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import urllib2
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from utils import get_blocked_videos
  8 | from utils import interpolated_prec_rec
  9 | from utils import segment_iou
 10 | 
 11 | class ANETdetection(object):
 12 | 
 13 |     GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version']
 14 |     PREDICTION_FIELDS = ['results', 'version', 'external_data']
 15 | 
 16 |     def __init__(self, ground_truth_filename=None, prediction_filename=None,
 17 |                  ground_truth_fields=GROUND_TRUTH_FIELDS,
 18 |                  prediction_fields=PREDICTION_FIELDS,
 19 |                  tiou_thresholds=np.linspace(0.5, 0.95, 10), 
 20 |                  subset='validation', verbose=False, 
 21 |                  check_status=True):
 22 |         if not ground_truth_filename:
 23 |             raise IOError('Please input a valid ground truth file.')
 24 |         if not prediction_filename:
 25 |             raise IOError('Please input a valid prediction file.')
 26 |         self.subset = subset
 27 |         self.tiou_thresholds = tiou_thresholds
 28 |         self.verbose = verbose
 29 |         self.gt_fields = ground_truth_fields
 30 |         self.pred_fields = prediction_fields
 31 |         self.ap = None
 32 |         self.check_status = check_status
 33 |         # Retrieve blocked videos from server.
 34 |         if self.check_status:
 35 |             self.blocked_videos = get_blocked_videos()
 36 |         else:
 37 |             self.blocked_videos = list()
 38 |         # Import ground truth and predictions.
 39 |         self.ground_truth, self.activity_index = self._import_ground_truth(
 40 |             ground_truth_filename)
 41 |         self.prediction = self._import_prediction(prediction_filename)
 42 | 
 43 |         if self.verbose:
 44 |             print '[INIT] Loaded annotations from {} subset.'.format(subset)
 45 |             nr_gt = len(self.ground_truth)
 46 |             print '\tNumber of ground truth instances: {}'.format(nr_gt)
 47 |             nr_pred = len(self.prediction)
 48 |             print '\tNumber of predictions: {}'.format(nr_pred)
 49 |             print '\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds)
 50 | 
 51 |     def _import_ground_truth(self, ground_truth_filename):
 52 |         """Reads ground truth file, checks if it is well formatted, and returns
 53 |            the ground truth instances and the activity classes.
 54 | 
 55 |         Parameters
 56 |         ----------
 57 |         ground_truth_filename : str
 58 |             Full path to the ground truth json file.
 59 | 
 60 |         Outputs
 61 |         -------
 62 |         ground_truth : df
 63 |             Data frame containing the ground truth instances.
 64 |         activity_index : dict
 65 |             Dictionary containing class index.
 66 |         """
 67 |         with open(ground_truth_filename, 'r') as fobj:
 68 |             data = json.load(fobj)
 69 |         # Checking format
 70 |         if not all([field in data.keys() for field in self.gt_fields]):
 71 |             raise IOError('Please input a valid ground truth file.')
 72 | 
 73 |         # Read ground truth data.
 74 |         activity_index, cidx = {}, 0
 75 |         video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], []
 76 |         for videoid, v in data['database'].iteritems():
 77 |             if self.subset != v['subset']:
 78 |                 continue
 79 |             if videoid in self.blocked_videos:
 80 |                 continue
 81 |             for ann in v['annotations']:
 82 |                 if ann['label'] not in activity_index:
 83 |                     activity_index[ann['label']] = cidx
 84 |                     cidx += 1
 85 |                 video_lst.append(videoid)
 86 |                 t_start_lst.append(ann['segment'][0])
 87 |                 t_end_lst.append(ann['segment'][1])
 88 |                 label_lst.append(activity_index[ann['label']])
 89 | 
 90 |         ground_truth = pd.DataFrame({'video-id': video_lst,
 91 |                                      't-start': t_start_lst,
 92 |                                      't-end': t_end_lst,
 93 |                                      'label': label_lst})
 94 |         return ground_truth, activity_index
 95 | 
 96 |     def _import_prediction(self, prediction_filename):
 97 |         """Reads prediction file, checks if it is well formatted, and returns
 98 |            the prediction instances.
 99 | 
100 |         Parameters
101 |         ----------
102 |         prediction_filename : str
103 |             Full path to the prediction json file.
104 | 
105 |         Outputs
106 |         -------
107 |         prediction : df
108 |             Data frame containing the prediction instances.
109 |         """
110 |         with open(prediction_filename, 'r') as fobj:
111 |             data = json.load(fobj)
112 |         # Checking format...
113 |         if not all([field in data.keys() for field in self.pred_fields]):
114 |             raise IOError('Please input a valid prediction file.')
115 | 
116 |         # Read predicitons.
117 |         video_lst, t_start_lst, t_end_lst = [], [], []
118 |         label_lst, score_lst = [], []
119 |         for videoid, v in data['results'].iteritems():
120 |             if videoid in self.blocked_videos:
121 |                 continue
122 |             for result in v:
123 |                 label = self.activity_index[result['label']]
124 |                 video_lst.append(videoid)
125 |                 t_start_lst.append(result['segment'][0])
126 |                 t_end_lst.append(result['segment'][1])
127 |                 label_lst.append(label)
128 |                 score_lst.append(result['score'])
129 |         prediction = pd.DataFrame({'video-id': video_lst,
130 |                                    't-start': t_start_lst,
131 |                                    't-end': t_end_lst,
132 |                                    'label': label_lst,
133 |                                    'score': score_lst})
134 |         return prediction
135 | 
136 |     def wrapper_compute_average_precision(self):
137 |         """Computes average precision for each class in the subset.
138 |         """
139 |         ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index.items())))
140 |         for activity, cidx in self.activity_index.iteritems():
141 |             gt_idx = self.ground_truth['label'] == cidx
142 |             pred_idx = self.prediction['label'] == cidx
143 |             ap[:,cidx] = compute_average_precision_detection(
144 |                 self.ground_truth.loc[gt_idx].reset_index(drop=True),
145 |                 self.prediction.loc[pred_idx].reset_index(drop=True),
146 |                 tiou_thresholds=self.tiou_thresholds)
147 |         return ap
148 | 
149 |     def evaluate(self):
150 |         """Evaluates a prediction file. For the detection task we measure the
151 |         interpolated mean average precision to measure the performance of a
152 |         method.
153 |         """
154 |         self.ap = self.wrapper_compute_average_precision()
155 |         self.mAP = self.ap.mean(axis=1)
156 |         if self.verbose:
157 |             print '[RESULTS] Performance on ActivityNet detection task.'
158 |             print '\tAverage-mAP: {}'.format(self.mAP.mean())
159 | 
160 | def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10)):
161 |     """Compute average precision (detection task) between ground truth and
162 |     predictions data frames. If multiple predictions occurs for the same
163 |     predicted segment, only the one with highest score is matches as
164 |     true positive. This code is greatly inspired by Pascal VOC devkit.
165 | 
166 |     Parameters
167 |     ----------
168 |     ground_truth : df
169 |         Data frame containing the ground truth instances.
170 |         Required fields: ['video-id', 't-start', 't-end']
171 |     prediction : df
172 |         Data frame containing the prediction instances.
173 |         Required fields: ['video-id, 't-start', 't-end', 'score']
174 |     tiou_thresholds : 1darray, optional
175 |         Temporal intersection over union threshold.
176 | 
177 |     Outputs
178 |     -------
179 |     ap : float
180 |         Average precision score.
181 |     """
182 |     npos = float(len(ground_truth))
183 |     lock_gt = np.ones((len(tiou_thresholds),len(ground_truth))) * -1
184 |     # Sort predictions by decreasing score order.
185 |     sort_idx = prediction['score'].values.argsort()[::-1]
186 |     prediction = prediction.loc[sort_idx].reset_index(drop=True)
187 | 
188 |     # Initialize true positive and false positive vectors.
189 |     tp = np.zeros((len(tiou_thresholds), len(prediction)))
190 |     fp = np.zeros((len(tiou_thresholds), len(prediction)))
191 | 
192 |     # Adaptation to query faster
193 |     ground_truth_gbvn = ground_truth.groupby('video-id')
194 | 
195 |     # Assigning true positive to truly grount truth instances.
196 |     for idx, this_pred in prediction.iterrows():
197 | 
198 |         try:
199 |             # Check if there is at least one ground truth in the video associated.
200 |             ground_truth_videoid = ground_truth_gbvn.get_group(this_pred['video-id'])
201 |         except Exception as e:
202 |             fp[:, idx] = 1
203 |             continue
204 | 
205 |         this_gt = ground_truth_videoid.reset_index()
206 |         tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values,
207 |                                this_gt[['t-start', 't-end']].values)
208 |         # We would like to retrieve the predictions with highest tiou score.
209 |         tiou_sorted_idx = tiou_arr.argsort()[::-1]
210 |         for tidx, tiou_thr in enumerate(tiou_thresholds):
211 |             for jdx in tiou_sorted_idx:
212 |                 if tiou_arr[jdx] < tiou_thr:
213 |                     fp[tidx, idx] = 1
214 |                     break
215 |                 if lock_gt[tidx, this_gt.loc[jdx]['index']] >= 0:
216 |                     continue
217 |                 # Assign as true positive after the filters above.
218 |                 tp[tidx, idx] = 1
219 |                 lock_gt[tidx, this_gt.loc[jdx]['index']] = idx
220 |                 break
221 | 
222 |             if fp[tidx, idx] == 0 and tp[tidx, idx] == 0:
223 |                 fp[tidx, idx] = 1
224 | 
225 |     ap = np.zeros(len(tiou_thresholds))
226 | 
227 |     for tidx in range(len(tiou_thresholds)):
228 |         # Computing prec-rec
229 |         this_tp = np.cumsum(tp[tidx,:]).astype(np.float)
230 |         this_fp = np.cumsum(fp[tidx,:]).astype(np.float)
231 |         rec = this_tp / npos
232 |         prec = this_tp / (this_tp + this_fp)
233 |         ap[tidx] = interpolated_prec_rec(prec, rec)
234 | 
235 |     return ap
236 | 


--------------------------------------------------------------------------------
/Evaluation/eval_kinetics.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import urllib2
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from utils import get_blocked_videos
  8 | from utils import interpolated_prec_rec
  9 | 
 10 | class ANETclassification(object):
 11 |     GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version']
 12 |     PREDICTION_FIELDS = ['results', 'version', 'external_data']
 13 | 
 14 |     def __init__(self, ground_truth_filename=None, prediction_filename=None,
 15 |                  ground_truth_fields=GROUND_TRUTH_FIELDS,
 16 |                  prediction_fields=PREDICTION_FIELDS,
 17 |                  subset='validation', verbose=False, top_k=3,
 18 |                  check_status=True):
 19 |         if not ground_truth_filename:
 20 |             raise IOError('Please input a valid ground truth file.')
 21 |         if not prediction_filename:
 22 |             raise IOError('Please input a valid prediction file.')
 23 |         self.subset = subset
 24 |         self.verbose = verbose
 25 |         self.gt_fields = ground_truth_fields
 26 |         self.pred_fields = prediction_fields
 27 |         self.top_k = top_k
 28 |         self.ap = None
 29 |         self.hit_at_k = None
 30 |         self.check_status = check_status
 31 |         # Retrieve blocked videos from server.
 32 |         if self.check_status:
 33 |             self.blocked_videos = get_blocked_videos()
 34 |         else:
 35 |             self.blocked_videos = list()
 36 |         # Import ground truth and predictions.
 37 |         self.ground_truth, self.activity_index = self._import_ground_truth(
 38 |             ground_truth_filename)
 39 |         self.prediction = self._import_prediction(prediction_filename)
 40 | 
 41 |         if self.verbose:
 42 |             print '[INIT] Loaded annotations from {} subset.'.format(subset)
 43 |             nr_gt = len(self.ground_truth)
 44 |             print '\tNumber of ground truth instances: {}'.format(nr_gt)
 45 |             nr_pred = len(self.prediction)
 46 |             print '\tNumber of predictions: {}'.format(nr_pred)
 47 | 
 48 |     def _import_ground_truth(self, ground_truth_filename):
 49 |         """Reads ground truth file, checks if it is well formatted, and returns
 50 |            the ground truth instances and the activity classes.
 51 | 
 52 |         Parameters
 53 |         ----------
 54 |         ground_truth_filename : str
 55 |             Full path to the ground truth json file.
 56 | 
 57 |         Outputs
 58 |         -------
 59 |         ground_truth : df
 60 |             Data frame containing the ground truth instances.
 61 |         activity_index : dict
 62 |             Dictionary containing class index.
 63 |         """
 64 |         with open(ground_truth_filename, 'r') as fobj:
 65 |             data = json.load(fobj)
 66 |         # Checking format
 67 |         if not all([field in data.keys() for field in self.gt_fields]):
 68 |             raise IOError('Please input a valid ground truth file.')
 69 | 
 70 |         # Initialize data frame
 71 |         activity_index, cidx = {}, 0
 72 |         video_lst, label_lst = [], []
 73 |         for videoid, v in data['database'].iteritems():
 74 |             if self.subset != v['subset']:
 75 |                 continue
 76 |             if videoid in self.blocked_videos:
 77 |                 continue
 78 |             for ann in v['annotations']:
 79 |                 if ann['label'] not in activity_index:
 80 |                     activity_index[ann['label']] = cidx
 81 |                     cidx += 1
 82 |                 video_lst.append(videoid)
 83 |                 label_lst.append(activity_index[ann['label']])
 84 |         ground_truth = pd.DataFrame({'video-id': video_lst,
 85 |                                      'label': label_lst})
 86 |         ground_truth = ground_truth.drop_duplicates().reset_index(drop=True)
 87 |         return ground_truth, activity_index
 88 | 
 89 |     def _import_prediction(self, prediction_filename):
 90 |         """Reads prediction file, checks if it is well formatted, and returns
 91 |            the prediction instances.
 92 | 
 93 |         Parameters
 94 |         ----------
 95 |         prediction_filename : str
 96 |             Full path to the prediction json file.
 97 | 
 98 |         Outputs
 99 |         -------
100 |         prediction : df
101 |             Data frame containing the prediction instances.
102 |         """
103 |         with open(prediction_filename, 'r') as fobj:
104 |             data = json.load(fobj)
105 |         # Checking format...
106 |         if not all([field in data.keys() for field in self.pred_fields]):
107 |             raise IOError('Please input a valid prediction file.')
108 | 
109 |         # Initialize data frame
110 |         video_lst, label_lst, score_lst = [], [], []
111 |         for videoid, v in data['results'].iteritems():
112 |             if videoid in self.blocked_videos:
113 |                 continue
114 |             for result in v:
115 |                 label = self.activity_index[result['label']]
116 |                 video_lst.append(videoid)
117 |                 label_lst.append(label)
118 |                 score_lst.append(result['score'])
119 |         prediction = pd.DataFrame({'video-id': video_lst,
120 |                                    'label': label_lst,
121 |                                    'score': score_lst})
122 |         return prediction
123 | 
124 |     def wrapper_compute_average_precision(self):
125 |         """Computes average precision for each class in the subset.
126 |         """
127 |         ap = np.zeros(len(self.activity_index.items()))
128 |         for activity, cidx in self.activity_index.iteritems():
129 |             gt_idx = self.ground_truth['label'] == cidx
130 |             pred_idx = self.prediction['label'] == cidx
131 |             ap[cidx] = compute_average_precision_classification(
132 |                 self.ground_truth.loc[gt_idx].reset_index(drop=True),
133 |                 self.prediction.loc[pred_idx].reset_index(drop=True))
134 |         return ap
135 | 
136 |     def evaluate(self):
137 |         """Evaluates a prediction file. For the detection task we measure the
138 |         interpolated mean average precision to measure the performance of a
139 |         method.
140 |         """
141 |         ap = self.wrapper_compute_average_precision()
142 |         hit_at_k = compute_video_hit_at_k(self.ground_truth,
143 |                                           self.prediction, top_k=self.top_k)
144 |         avg_hit_at_k = compute_video_hit_at_k(
145 |             self.ground_truth, self.prediction, top_k=self.top_k, avg=True)
146 |         if self.verbose:
147 |             print ('[RESULTS] Performance on ActivityNet untrimmed video '
148 |                    'classification task.')
149 |             print '\tMean Average Precision: {}'.format(ap.mean())
150 |             print '\tError@{}: {}'.format(self.top_k, 1.0 - hit_at_k)
151 |             #print '\tAvg Hit@{}: {}'.format(self.top_k, avg_hit_at_k)
152 |         self.ap = ap
153 |         self.hit_at_k = hit_at_k
154 |         self.avg_hit_at_k = avg_hit_at_k
155 | 
156 | ################################################################################
157 | # Metrics
158 | ################################################################################
159 | 
160 | def compute_average_precision_classification(ground_truth, prediction):
161 |     """Compute average precision (classification task) between ground truth and
162 |     predictions data frames. If multiple predictions occurs for the same
163 |     predicted segment, only the one with highest score is matched as
164 |     true positive. This code is greatly inspired by Pascal VOC devkit.
165 | 
166 |     Parameters
167 |     ----------
168 |     ground_truth : df
169 |         Data frame containing the ground truth instances.
170 |         Required fields: ['video-id']
171 |     prediction : df
172 |         Data frame containing the prediction instances.
173 |         Required fields: ['video-id, 'score']
174 | 
175 |     Outputs
176 |     -------
177 |     ap : float
178 |         Average precision score.
179 |     """
180 |     npos = float(len(ground_truth))
181 |     lock_gt = np.ones(len(ground_truth)) * -1
182 |     # Sort predictions by decreasing score order.
183 |     sort_idx = prediction['score'].values.argsort()[::-1]
184 |     prediction = prediction.loc[sort_idx].reset_index(drop=True)
185 | 
186 |     # Initialize true positive and false positive vectors.
187 |     tp = np.zeros(len(prediction))
188 |     fp = np.zeros(len(prediction))
189 | 
190 |     # Assigning true positive to truly grount truth instances.
191 |     for idx in range(len(prediction)):
192 |         this_pred = prediction.loc[idx]
193 |         gt_idx = ground_truth['video-id'] == this_pred['video-id']
194 |         # Check if there is at least one ground truth in the video associated.
195 |         if not gt_idx.any():
196 |             fp[idx] = 1
197 |             continue
198 |         this_gt = ground_truth.loc[gt_idx].reset_index()
199 |         if lock_gt[this_gt['index']] >= 0:
200 |             fp[idx] = 1
201 |         else:
202 |             tp[idx] = 1
203 |             lock_gt[this_gt['index']] = idx
204 | 
205 |     # Computing prec-rec
206 |     tp = np.cumsum(tp).astype(np.float)
207 |     fp = np.cumsum(fp).astype(np.float)
208 |     rec = tp / npos
209 |     prec = tp / (tp + fp)
210 |     return interpolated_prec_rec(prec, rec)
211 | 
212 | def compute_video_hit_at_k(ground_truth, prediction, top_k=3, avg=False):
213 |     """Compute accuracy at k prediction between ground truth and
214 |     predictions data frames. This code is greatly inspired by evaluation
215 |     performed in Karpathy et al. CVPR14.
216 | 
217 |     Parameters
218 |     ----------
219 |     ground_truth : df
220 |         Data frame containing the ground truth instances.
221 |         Required fields: ['video-id', 'label']
222 |     prediction : df
223 |         Data frame containing the prediction instances.
224 |         Required fields: ['video-id, 'label', 'score']
225 | 
226 |     Outputs
227 |     -------
228 |     acc : float
229 |         Top k accuracy score.
230 |     """
231 |     video_ids = np.unique(ground_truth['video-id'].values)
232 |     avg_hits_per_vid = np.zeros(video_ids.size)
233 |     for i, vid in enumerate(video_ids):
234 |         pred_idx = prediction['video-id'] == vid
235 |         if not pred_idx.any():
236 |             continue
237 |         this_pred = prediction.loc[pred_idx].reset_index(drop=True)
238 |         # Get top K predictions sorted by decreasing score.
239 |         sort_idx = this_pred['score'].values.argsort()[::-1][:top_k]
240 |         this_pred = this_pred.loc[sort_idx].reset_index(drop=True)
241 |         # Get labels and compare against ground truth.
242 |         pred_label = this_pred['label'].tolist()
243 |         gt_idx = ground_truth['video-id'] == vid
244 |         gt_label = ground_truth.loc[gt_idx]['label'].tolist()
245 |         avg_hits_per_vid[i] = np.mean([1 if this_label in pred_label else 0
246 |                                        for this_label in gt_label])
247 |         if not avg:
248 |             avg_hits_per_vid[i] = np.ceil(avg_hits_per_vid[i])
249 |     return float(avg_hits_per_vid.mean())
250 | 


--------------------------------------------------------------------------------
/Evaluation/eval_proposal.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import urllib2
  3 | 
  4 | import numpy as np
  5 | import pandas as pd
  6 | 
  7 | from utils import get_blocked_videos
  8 | from utils import interpolated_prec_rec
  9 | from utils import segment_iou
 10 | from utils import wrapper_segment_iou
 11 | 
 12 | class ANETproposal(object):
 13 | 
 14 |     GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version']
 15 |     PROPOSAL_FIELDS = ['results', 'version', 'external_data']
 16 | 
 17 |     def __init__(self, ground_truth_filename=None, proposal_filename=None,
 18 |                  ground_truth_fields=GROUND_TRUTH_FIELDS,
 19 |                  proposal_fields=PROPOSAL_FIELDS,
 20 |                  tiou_thresholds=np.linspace(0.5, 0.95, 10),
 21 |                  max_avg_nr_proposals=None,
 22 |                  subset='validation', verbose=False,
 23 |                  check_status=True):
 24 |         if not ground_truth_filename:
 25 |             raise IOError('Please input a valid ground truth file.')
 26 |         if not proposal_filename:
 27 |             raise IOError('Please input a valid proposal file.')
 28 |         self.subset = subset
 29 |         self.tiou_thresholds = tiou_thresholds
 30 |         self.max_avg_nr_proposals = max_avg_nr_proposals
 31 |         self.verbose = verbose
 32 |         self.gt_fields = ground_truth_fields
 33 |         self.pred_fields = proposal_fields
 34 |         self.recall = None
 35 |         self.avg_recall = None
 36 |         self.proposals_per_video = None
 37 |         self.check_status = check_status
 38 |         # Retrieve blocked videos from server.
 39 |         if self.check_status:
 40 |             self.blocked_videos = get_blocked_videos()
 41 |         else:
 42 |             self.blocked_videos = list()
 43 |         # Import ground truth and proposals.
 44 |         self.ground_truth, self.activity_index = self._import_ground_truth(
 45 |             ground_truth_filename)
 46 |         self.proposal = self._import_proposal(proposal_filename)
 47 | 
 48 |         if self.verbose:
 49 |             print '[INIT] Loaded annotations from {} subset.'.format(subset)
 50 |             nr_gt = len(self.ground_truth)
 51 |             print '\tNumber of ground truth instances: {}'.format(nr_gt)
 52 |             nr_pred = len(self.proposal)
 53 |             print '\tNumber of proposals: {}'.format(nr_pred)
 54 |             print '\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds)
 55 | 
 56 |     def _import_ground_truth(self, ground_truth_filename):
 57 |         """Reads ground truth file, checks if it is well formatted, and returns
 58 |            the ground truth instances and the activity classes.
 59 | 
 60 |         Parameters
 61 |         ----------
 62 |         ground_truth_filename : str
 63 |             Full path to the ground truth json file.
 64 | 
 65 |         Outputs
 66 |         -------
 67 |         ground_truth : df
 68 |             Data frame containing the ground truth instances.
 69 |         activity_index : dict
 70 |             Dictionary containing class index.
 71 |         """
 72 |         with open(ground_truth_filename, 'r') as fobj:
 73 |             data = json.load(fobj)
 74 |         # Checking format
 75 |         if not all([field in data.keys() for field in self.gt_fields]):
 76 |             raise IOError('Please input a valid ground truth file.')
 77 | 
 78 |         # Read ground truth data.
 79 |         activity_index, cidx = {}, 0
 80 |         video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], []
 81 |         for videoid, v in data['database'].iteritems():
 82 |             if self.subset != v['subset']:
 83 |                 continue
 84 |             if videoid in self.blocked_videos:
 85 |                 continue
 86 |             for ann in v['annotations']:
 87 |                 if ann['label'] not in activity_index:
 88 |                     activity_index[ann['label']] = cidx
 89 |                     cidx += 1
 90 |                 video_lst.append(videoid)
 91 |                 t_start_lst.append(ann['segment'][0])
 92 |                 t_end_lst.append(ann['segment'][1])
 93 |                 label_lst.append(activity_index[ann['label']])
 94 | 
 95 |         ground_truth = pd.DataFrame({'video-id': video_lst,
 96 |                                      't-start': t_start_lst,
 97 |                                      't-end': t_end_lst,
 98 |                                      'label': label_lst})
 99 |         return ground_truth, activity_index
100 | 
101 |     def _import_proposal(self, proposal_filename):
102 |         """Reads proposal file, checks if it is well formatted, and returns
103 |            the proposal instances.
104 | 
105 |         Parameters
106 |         ----------
107 |         proposal_filename : str
108 |             Full path to the proposal json file.
109 | 
110 |         Outputs
111 |         -------
112 |         proposal : df
113 |             Data frame containing the proposal instances.
114 |         """
115 |         with open(proposal_filename, 'r') as fobj:
116 |             data = json.load(fobj)
117 |         # Checking format...
118 |         if not all([field in data.keys() for field in self.pred_fields]):
119 |             raise IOError('Please input a valid proposal file.')
120 | 
121 |         # Read predictions.
122 |         video_lst, t_start_lst, t_end_lst = [], [], []
123 |         score_lst = []
124 |         for videoid, v in data['results'].iteritems():
125 |             if videoid in self.blocked_videos:
126 |                 continue
127 |             for result in v:
128 |                 video_lst.append(videoid)
129 |                 t_start_lst.append(result['segment'][0])
130 |                 t_end_lst.append(result['segment'][1])
131 |                 score_lst.append(result['score'])
132 |         proposal = pd.DataFrame({'video-id': video_lst,
133 |                                    't-start': t_start_lst,
134 |                                    't-end': t_end_lst,
135 |                                    'score': score_lst})
136 |         return proposal
137 | 
138 |     def evaluate(self):
139 |         """Evaluates a proposal file. To measure the performance of a
140 |         method for the proposal task, we computes the area under the 
141 |         average recall vs average number of proposals per video curve.
142 |         """
143 |         recall, avg_recall, proposals_per_video = average_recall_vs_avg_nr_proposals(
144 |                 self.ground_truth, self.proposal,
145 |                 max_avg_nr_proposals=self.max_avg_nr_proposals,
146 |                 tiou_thresholds=self.tiou_thresholds)
147 | 	print "Average Recall: {} " .format(avg_recall)
148 |         area_under_curve = np.trapz(avg_recall, proposals_per_video)
149 | 
150 |         if self.verbose:
151 |             print '[RESULTS] Performance on ActivityNet proposal task.'
152 |             print '\tArea Under the AR vs AN curve: {}%'.format(100.*float(area_under_curve)/proposals_per_video[-1])
153 | 
154 |         self.recall = recall
155 |         self.avg_recall = avg_recall
156 |         self.proposals_per_video = proposals_per_video
157 | 
158 | def average_recall_vs_avg_nr_proposals(ground_truth, proposals,
159 |                                        max_avg_nr_proposals=None,
160 |                                        tiou_thresholds=np.linspace(0.5, 0.95, 10)):
161 |     """ Computes the average recall given an average number 
162 |         of proposals per video.
163 |     
164 |     Parameters
165 |     ----------
166 |     ground_truth : df
167 |         Data frame containing the ground truth instances.
168 |         Required fields: ['video-id', 't-start', 't-end']
169 |     proposal : df
170 |         Data frame containing the proposal instances.
171 |         Required fields: ['video-id, 't-start', 't-end', 'score']
172 |     tiou_thresholds : 1darray, optional
173 |         array with tiou thresholds.
174 |         
175 |     Outputs
176 |     -------
177 |     recall : 2darray
178 |         recall[i,j] is recall at ith tiou threshold at the jth average number of average number of proposals per video.
179 |     average_recall : 1darray
180 |         recall averaged over a list of tiou threshold. This is equivalent to recall.mean(axis=0).
181 |     proposals_per_video : 1darray
182 |         average number of proposals per video.
183 |     """
184 | 
185 |     # Get list of videos.
186 |     video_lst = ground_truth['video-id'].unique()
187 | 
188 |     if not max_avg_nr_proposals:
189 |         max_avg_nr_proposals = float(proposals.shape[0])/video_lst.shape[0]
190 | 
191 |     ratio = max_avg_nr_proposals*float(video_lst.shape[0])/proposals.shape[0]
192 | 
193 |     # Adaptation to query faster
194 |     ground_truth_gbvn = ground_truth.groupby('video-id')
195 |     proposals_gbvn = proposals.groupby('video-id')
196 | 
197 |     # For each video, computes tiou scores among the retrieved proposals.
198 |     score_lst = []
199 |     total_nr_proposals = 0
200 |     for videoid in video_lst:
201 | 
202 |         # Get proposals for this video.
203 |         proposals_videoid = proposals_gbvn.get_group(videoid)
204 |         this_video_proposals = proposals_videoid.loc[:, ['t-start', 't-end']].values
205 | 
206 |         # Sort proposals by score.
207 |         sort_idx = proposals_videoid['score'].argsort()[::-1]
208 |         this_video_proposals = this_video_proposals[sort_idx, :]
209 | 
210 |         # Get ground-truth instances associated to this video.
211 |         ground_truth_videoid = ground_truth_gbvn.get_group(videoid)
212 |         this_video_ground_truth = ground_truth_videoid.loc[:,['t-start', 't-end']].values
213 | 
214 |         if this_video_proposals.shape[0] == 0:
215 |             n = this_video_ground_truth.shape[0]
216 |             score_lst.append(np.zeros((n, 1)))
217 |             continue
218 | 
219 |         if this_video_proposals.ndim != 2:
220 |             this_video_proposals = np.expand_dims(this_video_proposals, axis=0)
221 |         if this_video_ground_truth.ndim != 2:
222 |             this_video_ground_truth = np.expand_dims(this_video_ground_truth, axis=0)
223 | 
224 |         nr_proposals = np.minimum(int(this_video_proposals.shape[0] * ratio), this_video_proposals.shape[0])
225 |         total_nr_proposals += nr_proposals
226 |         this_video_proposals = this_video_proposals[:nr_proposals, :]
227 | 
228 |         # Compute tiou scores.
229 |         tiou = wrapper_segment_iou(this_video_proposals, this_video_ground_truth)
230 |         score_lst.append(tiou)
231 | 
232 |     # Given that the length of the videos is really varied, we 
233 |     # compute the number of proposals in terms of a ratio of the total 
234 |     # proposals retrieved, i.e. average recall at a percentage of proposals 
235 |     # retrieved per video.
236 | 
237 |     # Computes average recall.
238 |     pcn_lst = np.arange(1, 101) / 100.0 *(max_avg_nr_proposals*float(video_lst.shape[0])/total_nr_proposals)
239 |     matches = np.empty((video_lst.shape[0], pcn_lst.shape[0]))
240 |     positives = np.empty(video_lst.shape[0])
241 |     recall = np.empty((tiou_thresholds.shape[0], pcn_lst.shape[0]))
242 |     # Iterates over each tiou threshold.
243 |     for ridx, tiou in enumerate(tiou_thresholds):
244 | 
245 |         # Inspect positives retrieved per video at different 
246 |         # number of proposals (percentage of the total retrieved).
247 |         for i, score in enumerate(score_lst):
248 |             # Total positives per video.
249 |             positives[i] = score.shape[0]
250 |             # Find proposals that satisfies minimum tiou threshold.
251 |             true_positives_tiou = score >= tiou
252 |             # Get number of proposals as a percentage of total retrieved.
253 |             pcn_proposals = np.minimum((score.shape[1] * pcn_lst).astype(np.int), score.shape[1])
254 | 
255 |             for j, nr_proposals in enumerate(pcn_proposals):
256 |                 # Compute the number of matches for each percentage of the proposals
257 |                 matches[i, j] = np.count_nonzero((true_positives_tiou[:, :nr_proposals]).sum(axis=1))
258 | 
259 |         # Computes recall given the set of matches per video.
260 |         recall[ridx, :] = matches.sum(axis=0) / positives.sum()
261 | 
262 |     # Recall is averaged.
263 |     avg_recall = recall.mean(axis=0)
264 | 
265 |     # Get the average number of proposals per video.
266 |     proposals_per_video = pcn_lst * (float(total_nr_proposals) / video_lst.shape[0])
267 | 
268 |     return recall, avg_recall, proposals_per_video
269 | 
270 | 


--------------------------------------------------------------------------------
/Evaluation/frame_prediction_BG.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Jul  3 14:49:28 2017
  5 | 
  6 | @author: Arpan
  7 | Description: Use c3d trained model for prediction. To be executed after 
  8 | training_model_m4.py
  9 | """
 10 | 
 11 | import json
 12 | import os
 13 | import utils
 14 | import numpy as np
 15 | import h5py
 16 | import pandas as pd
 17 | import collections
 18 | import cv2
 19 | import caffe
 20 | from joblib import Parallel, delayed
 21 | 
 22 | 
 23 | # Temporal Proposals : Pretrained
 24 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos'
 25 | #ANNOTATION_FILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 26 | #PROPOSALS_FILENAME = '/home/arpan/DATA_Drive/ActivityNet/extra_features/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 27 | #SHUFFLE = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 28 | #MBH = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_features.h5"
 29 | #MBH_IDS = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_quids.txt"
 30 | #C3D = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D/sub_activitynet_v1-3.c3d.hdf5"
 31 | #C3D_PCA = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D/PCA_activitynet_v1-3.hdf5"
 32 | #SHUFFLE_IDS = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 33 | #MODEL = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/deploy_c3d_fc_net.prototxt"
 34 | #PRETRAINED = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel"
 35 | #MEANFILE = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto"
 36 | #SUBSET = 'validation'
 37 | 
 38 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 39 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 40 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 41 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 42 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5"
 43 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 44 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5"
 45 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 46 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb"
 47 | MODEL = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/deploy_c3d_fc_net.prototxt"
 48 | PRETRAINED = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel"
 49 | MEANFILE = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto"
 50 | MEANFILE = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto"
 51 | SUBSET = 'validation'
 52 | 
 53 | 
 54 | def get_c3d_feature(fc3d, vid, pos, vfps):
 55 |     '''
 56 |     Read the feature vector that is near the pos of video
 57 |     c3d features are taken for every 8th frame
 58 |     '''
 59 |     row = int(pos/8)
 60 |     while not row <= fc3d[vid]['c3d_features'].shape[0]:
 61 |         print "Decrement by 1"
 62 |         row -= 1
 63 |         assert row <= fc3d[vid]['c3d_features'].shape[0]
 64 |     vec = fc3d[vid]['c3d_features'][row,:]
 65 |     return vec
 66 | 
 67 | 
 68 | def get_predictions(net, test_vids, category_names):
 69 |     fc3d = h5py.File(C3D, 'r')
 70 |     fpca = h5py.File(C3D_PCA, 'r')
 71 |     train_mean = get_training_mean(MEANFILE)
 72 |     pred = {}
 73 |     c3d_lev2 = pd.DataFrame(np.zeros((len(test_vids), len(category_names))), \
 74 |                             index=test_vids, columns=category_names)
 75 |     bgThresh = 500000
 76 |     
 77 |     print "Calculate frames being ignored ..."
 78 |     result = Parallel(n_jobs=4)(delayed(get_rows_ignored) \
 79 |                           (test_vids[j], bgThresh, j) \
 80 |                           for j in range(len(test_vids)))
 81 |     
 82 |     for i, vid in enumerate(test_vids):
 83 |         print "{} --> For video : {}" .format(i, vid)
 84 |         vid_data = fc3d['v_'+vid]['c3d_features'][:]
 85 |         frms_ignored = result[i]
 86 |         # get the c3d features that need to be ignored. Note that c3d features
 87 |         # are sampled every 8 frames, therefore position is divided by 8
 88 |         rows_ignored = [int(r/8) for r in frms_ignored]
 89 |         print "Rows ignored : {}" .format(set(rows_ignored))
 90 |         not_rows_ig = list(set(range(vid_data.shape[0])) - set(rows_ignored))
 91 |         (rows, cols) = vid_data.shape
 92 |         # get predictions for each row of c3d feature
 93 |         vid_probs = pd.DataFrame(np.zeros((rows, len(category_names))), \
 94 |                                  columns=category_names)
 95 |         #print frms_ignored
 96 |         for row in not_rows_ig:
 97 |             #print "Dims of vid_data[row,:] = {}" .format(vid_data[row,:].shape)
 98 |             #print "Values = {}" .format(vid_data[row,:])
 99 |             f = vid_data[row,:].reshape(cols, 1, 1)
100 |             # Subtract mean
101 |             f = f - train_mean
102 |             out = net.forward_all(data = np.asarray([f]))
103 |             predicted_label = out['prob'][0].argmax(axis=0)
104 |             #print "Predicted Label : {} :: Name : {}" .format(predicted_label, category_names[predicted_label])
105 |             #print "Rows :: "
106 |             vid_probs.iloc[row,:] = out['prob'][0]
107 |             #print vid_probs.iloc[row,:]
108 |         # returns a list of dict like [{'score': score, 'label':labels[idx]}...]
109 |         pred[vid], vprobs = globalPrediction(vid, category_names, vid_probs)
110 |         print pred[vid]
111 |         c3d_lev2.loc[vid,:] = vprobs
112 |         #break
113 |     fc3d.close()
114 |     fpca.close()
115 |     return pred, c3d_lev2
116 |     
117 | 
118 | def globalPrediction(vid, category_names, vid_probs):
119 |     """
120 |         Get a matrix of probabilities over the classes for the c3d features of 
121 |         a video. Generate the top 3 predictions from the prob matrix
122 |     """
123 |     anno_list = []
124 |     # Idea 1 : To form the hist over the categories, each bin has sum of probs
125 |     vprobs_sum = vid_probs.sum(axis=0)
126 |     top_n = vprobs_sum.sort_values(ascending = False)[:3]
127 |     labels = top_n.index.tolist()
128 |     scores = top_n.values.tolist()
129 |     for idx,score in enumerate(scores):
130 |         anno_list.append({'score': score, 'label':labels[idx]})
131 |         
132 |     
133 |     # Idea 2 : Detect temporal continuity of category predicted. Longer the better
134 |     
135 |     # Idea 3 : Count the number of highest votes for top category. (Worse than 1)
136 |     # If equal votes for >1 category then use Idea 1
137 |     # finds the max val index among the columns for each row and the freq of the 
138 |     # occurrence of the column names (in decreasing order)
139 | #    labels = vid_probs.idxmax(axis=1).value_counts()[:3].index.tolist()
140 | #    scores = probs_sum[labels].tolist()
141 | #    for idx,score in enumerate(scores):
142 | #        anno_list.append({'score': score, 'label':labels[idx]})
143 |     
144 |     return anno_list, vprobs_sum
145 |     
146 | def get_rows_ignored(vid, bgThresh, v_no):
147 |     """
148 |     Use background subtraction to decide which frames to ignore while prediction
149 |     """
150 |     # process the video frame by frame
151 |     print "For video : {} " .format(v_no)
152 |     W, H = 160, 120
153 |     vpath = os.path.join(VIDEOPATH, 'v_'+vid+'.mp4')
154 |     cap = cv2.VideoCapture(vpath)
155 |     if not cap.isOpened():
156 |         raise IOError("Capture object not opened !")
157 |     #fps = cap.get(cv2.CAP_PROP_FPS)
158 |     frms_ig = []
159 |     frms_msec = []
160 |     fgbg = cv2.createBackgroundSubtractorMOG2()     #bg subtractor
161 |     ret, prev_frame = cap.read()
162 |     prev_frame = cv2.resize(prev_frame, (W, H) )
163 |     fgmask = fgbg.apply(prev_frame)
164 |     # convert frame to GRAYSCALE
165 |     prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY)
166 |     # iterate over the frames 
167 |     count = 0
168 |     while cap.isOpened():
169 |         ret, frame = cap.read()
170 |         if not ret:
171 |             break
172 |         frame = cv2.resize(frame, (W, H))
173 |         curr_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
174 |         # To find the background mask and skip the frame if foreground is absent
175 |         fgmask = fgbg.apply(frame)
176 |         if np.sum(fgmask)<bgThresh:
177 |             #print "BG frame skipped !!"
178 |             #print "FGMASK : {}" .format(np.sum(fgmask))
179 |             frms_ig.append(cap.get(cv2.CAP_PROP_POS_FRAMES))
180 |             frms_msec.append(cap.get(cv2.CAP_PROP_POS_MSEC))
181 |             count += 1
182 |             #cv2.imshow("BG Ignored", curr_frame)
183 |             #waitTillEscPressed()
184 |             prev_frame = curr_frame
185 |             continue
186 | 
187 |     #print "Total Frames : {}" .format(cap.get(cv2.CAP_PROP_FRAME_COUNT))
188 |     #print "Skipped Frames : {}" .format(count)
189 |     #print frms_ig
190 |     #print frms_msec
191 |     cap.release()
192 |     #cv2.destroyAllWindows()
193 |     return frms_ig
194 | 
195 | def waitTillEscPressed():
196 |     while(True):
197 |         # For moving forward
198 |         if cv2.waitKey(10)==27:
199 |             print("Esc Pressed. Move Forward without labeling.")
200 |             return 1
201 | 
202 | def get_training_mean(meanFilePath):
203 |     blob = caffe.proto.caffe_pb2.BlobProto()
204 |     data = open( meanFilePath , 'rb' ).read()
205 |     blob.ParseFromString(data)
206 |     arr = np.array( caffe.io.blobproto_to_array(blob) )
207 |     out = arr[0]
208 |     print "Shape : {} " .format(out.shape)
209 |     print out
210 |     return out
211 |     #np.save( npyFilePath , out )
212 | 
213 | if __name__=='__main__':
214 | 
215 |     # Read the database, version and taxonomy from JSON file
216 |     with open(ANNOTATION_FILE, "r") as fobj:
217 |         data = json.load(fobj)
218 | 
219 |     database = data["database"]
220 |     taxonomy = data["taxonomy"]
221 |     version = data["version"]
222 |     
223 |     non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE)
224 | 
225 |     print "No of non-existing videos: %d" % len(non_existing_videos)
226 |     
227 |     train_vids_all = []
228 |     [train_vids_all.append(x) for x in database if database[x]['subset']=='training']
229 |     # Find list of available training videos
230 |     train_existing_vids = list(set(train_vids_all) - set(non_existing_videos))
231 |     
232 |     val_vids_all = []
233 |     [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET]
234 |     # Find list of available training videos
235 |     val_existing_vids = list(set(val_vids_all) - set(non_existing_videos))
236 |     
237 |     ###########################################################################
238 |     # Get categories information from the database (Train+Validation sets)
239 |     category = []
240 |     for x in database:
241 |         cc = []
242 |         for l in database[x]["annotations"]:
243 |             cc.append(l["label"])
244 |         category.extend(list(set(cc)))
245 |     category_count = collections.Counter(category)
246 | 
247 |     category_names = sorted(category_count.keys())
248 |     print "Total No of classes: %d" % len(category_names)
249 |     
250 |     #print category_names
251 |     ###########################################################################
252 |     # MBH and ImageNetShuffle Features in training_model_m2.py
253 |     ###########################################################################
254 |     # C3D features
255 |     # Read the meta_info and sample_positions files
256 |     samples_csv = "samples.csv"
257 |     samples_val_csv = "samples_val.csv"
258 |     with open("training_data_meta_info.json", "r") as fobj:
259 |         meta_info = json.load(fobj)
260 |     #construct_dataset(meta_info, samples_csv, category_names)
261 |     
262 |     with open("val_data_meta_info.json", "r") as fobj:
263 |         val_meta_info = json.load(fobj)
264 |     #construct_dataset(val_meta_info, samples_val_csv, category_names)
265 |     
266 |     
267 |     ###########################################################################
268 |     # Consider Taxonomy of the classes
269 |     # Temporal Proposals
270 |     
271 |     ###########################################################################
272 | 
273 |     caffe.set_mode_gpu()
274 |     # load the model
275 |     
276 |     bgThresholds = [105000, 115000]
277 |     net = caffe.Net(MODEL, PRETRAINED, caffe.TEST)
278 | 
279 |     # Predict on the validation set videos for each value of bgThreshold
280 |     #for th in bgThresholds:
281 |     pred, c3d_probs = get_predictions(net, val_existing_vids, category_names)
282 |     
283 |     print "Predicted Labels : "
284 |     print c3d_probs.head()
285 | 
286 | #
287 |     out_dict = {'version':version}
288 |     subset_video_ids = []
289 |     ext_data_dict = {'used': True, 'details': 'C3D features.'}
290 |     
291 |     out_dict['results'] = pred
292 |     out_dict['external_data'] = ext_data_dict
293 |             
294 |     json_filename = 'submission_t3_framewise_'+SUBSET+'.json'
295 |     with open(json_filename, 'w') as fp:
296 |         json.dump(out_dict, fp)
297 | #
298 | ##############################################################################
299 | 
300 | # Use LMDB to get the predictions
301 |     # MEANFILE is the path to the training mean binaryproto file    
302 | #    train_mean = get_training_mean(MEANFILE)
303 | #    print "Mean file : {}" .format(train_mean)
304 | #    import lmdb
305 | #    lmdb_env = lmdb.open(LMDB_FOLDER+'/val_c3d_lmdb')
306 | #    lmdb_txn = lmdb_env.begin()
307 | #    lmdb_cursor = lmdb_txn.cursor()
308 | #    count = 0
309 | #    correct = 0
310 | #    for key, value in lmdb_cursor:
311 | #        print "Count:"
312 | #        print count
313 | #        count = count + 1
314 | #        datum = caffe.proto.caffe_pb2.Datum()
315 | #        datum.ParseFromString(value)
316 | #        label = int(datum.label)
317 | #        image = caffe.io.datum_to_array(datum)
318 | #        print "Shape 1 : {}" .format(image.shape)
319 | #        #image = image.astype(np.uint8)
320 | #        image = image - train_mean
321 | #        print "Shape 2 : {}" .format(image.shape)
322 | #        print "Asarray shape : {}" .format(np.asarray([image]).shape)
323 | #        out = net.forward_all(data=np.asarray([image]))
324 | #        print "out Shape : {}" .format(out['prob'].shape)
325 | #        predicted_label = out['prob'][0].argmax(axis=0)
326 | #        print "Predicted Label : {}" .format(predicted_label)
327 | #        
328 | #        if count == 3:
329 | #            break
330 | 


--------------------------------------------------------------------------------
/Evaluation/get_classification_performance.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from eval_classification import ANETclassification
 4 | 
 5 | def main(ground_truth_filename, prediction_filename,
 6 |          subset='validation', verbose=True, check_status=True):
 7 |     anet_classification = ANETclassification(ground_truth_filename,
 8 |                                              prediction_filename,
 9 |                                              subset=subset, verbose=verbose,
10 |                                              check_status=True)
11 |     anet_classification.evaluate()
12 | 
13 | def parse_input():
14 |     description = ('This script allows you to evaluate the ActivityNet '
15 |                    'untrimmed video classification task which is intended to '
16 |                    'evaluate the ability of algorithms to predict activities '
17 |                    'in untrimmed video sequences.')
18 |     p = argparse.ArgumentParser(description=description)
19 |     p.add_argument('ground_truth_filename',
20 |                    help='Full path to json file containing the ground truth.')
21 |     p.add_argument('prediction_filename',
22 |                    help='Full path to json file containing the predictions.')
23 |     p.add_argument('--subset', default='validation',
24 |                    help=('String indicating subset to evaluate: '
25 |                          '(training, validation)'))
26 |     p.add_argument('--verbose', type=bool, default=True)
27 |     p.add_argument('--check_status', type=bool, default=True)
28 |     return p.parse_args()
29 | 
30 | if __name__ == '__main__':
31 |     args = parse_input()
32 |     main(**vars(args))
33 | 


--------------------------------------------------------------------------------
/Evaluation/get_detection_performance.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | 
 4 | from eval_detection import ANETdetection
 5 | 
 6 | def main(ground_truth_filename, prediction_filename,
 7 |          subset='validation', tiou_thresholds=np.linspace(0.5, 0.95, 10),
 8 |          verbose=True, check_status=True):
 9 | 
10 |     anet_detection = ANETdetection(ground_truth_filename, prediction_filename,
11 |                                    subset=subset, tiou_thresholds=tiou_thresholds,
12 |                                    verbose=verbose, check_status=True)
13 |     anet_detection.evaluate()
14 | 
15 | def parse_input():
16 |     description = ('This script allows you to evaluate the ActivityNet '
17 |                    'detection task which is intended to evaluate the ability '
18 |                    'of  algorithms to temporally localize activities in '
19 |                    'untrimmed video sequences.')
20 |     p = argparse.ArgumentParser(description=description)
21 |     p.add_argument('ground_truth_filename',
22 |                    help='Full path to json file containing the ground truth.')
23 |     p.add_argument('prediction_filename',
24 |                    help='Full path to json file containing the predictions.')
25 |     p.add_argument('--subset', default='validation',
26 |                    help=('String indicating subset to evaluate: '
27 |                          '(training, validation)'))
28 |     p.add_argument('--tiou_thresholds', type=float, default=np.linspace(0.5, 0.95, 10),
29 |                    help='Temporal intersection over union threshold.')
30 |     p.add_argument('--verbose', type=bool, default=True)
31 |     p.add_argument('--check_status', type=bool, default=True)
32 |     return p.parse_args()
33 | 
34 | if __name__ == '__main__':
35 |     args = parse_input()
36 |     main(**vars(args))
37 | 


--------------------------------------------------------------------------------
/Evaluation/get_kinetics_performance.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from eval_kinetics import ANETclassification
 4 | 
 5 | def main(ground_truth_filename, prediction_filename,
 6 |          subset='validation', verbose=True, check_status=True):
 7 |     anet_classification = ANETclassification(ground_truth_filename,
 8 |                                              prediction_filename,
 9 |                                              subset=subset, verbose=verbose,
10 |                                              check_status=True, top_k=1)
11 |     anet_classification.evaluate()
12 | 
13 |     anet_classification = ANETclassification(ground_truth_filename,
14 |                                              prediction_filename,
15 |                                              subset=subset, verbose=verbose,
16 |                                              check_status=True, top_k=5)
17 |     anet_classification.evaluate()
18 | 
19 | def parse_input():
20 |     description = ('This script allows you to evaluate the ActivityNet '
21 |                    'untrimmed video classification task which is intended to '
22 |                    'evaluate the ability of algorithms to predict activities '
23 |                    'in untrimmed video sequences.')
24 |     p = argparse.ArgumentParser(description=description)
25 |     p.add_argument('ground_truth_filename',
26 |                    help='Full path to json file containing the ground truth.')
27 |     p.add_argument('prediction_filename',
28 |                    help='Full path to json file containing the predictions.')
29 |     p.add_argument('--subset', default='validation',
30 |                    help=('String indicating subset to evaluate: '
31 |                          '(training, validation)'))
32 |     p.add_argument('--verbose', type=bool, default=True)
33 |     p.add_argument('--check_status', type=bool, default=True)
34 |     return p.parse_args()
35 | 
36 | if __name__ == '__main__':
37 |     args = parse_input()
38 |     main(**vars(args))
39 | 


--------------------------------------------------------------------------------
/Evaluation/get_proposal_performance.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | 
 4 | from eval_proposal import ANETproposal
 5 | 
 6 | def main(ground_truth_filename, proposal_filename, max_avg_nr_proposals=100,
 7 |          tiou_thresholds=np.linspace(0.5, 0.95, 10),
 8 |          subset='validation', verbose=True, check_status=True):
 9 | 
10 |     anet_proposal = ANETproposal(ground_truth_filename, proposal_filename,
11 |                                  tiou_thresholds=tiou_thresholds,
12 |                                  max_avg_nr_proposals=max_avg_nr_proposals,
13 |                                  subset=subset, verbose=True, check_status=True)
14 |     anet_proposal.evaluate()
15 | 
16 | def parse_input():
17 |     description = ('This script allows you to evaluate the ActivityNet '
18 |                    'proposal task which is intended to evaluate the ability '
19 |                    'of algorithms to generate activity proposals that temporally '
20 |                    'localize activities in untrimmed video sequences.')
21 |     p = argparse.ArgumentParser(description=description)
22 |     p.add_argument('ground_truth_filename',
23 |                    help='Full path to json file containing the ground truth.')
24 |     p.add_argument('proposal_filename',
25 |                    help='Full path to json file containing the proposals.')
26 |     p.add_argument('--subset', default='validation',
27 |                    help=('String indicating subset to evaluate: '
28 |                          '(training, validation)'))
29 |     p.add_argument('--verbose', type=bool, default=True)
30 |     p.add_argument('--check_status', type=bool, default=True)
31 |     return p.parse_args()
32 | 
33 | if __name__ == '__main__':
34 |     args = parse_input()
35 |     main(**vars(args))
36 | 


--------------------------------------------------------------------------------
/Evaluation/hog.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <opencv_storage>
 3 | <hog type_id="opencv-object-detector-hog">
 4 |   <winSize>
 5 |     160 120</winSize>
 6 |   <blockSize>
 7 |     16 16</blockSize>
 8 |   <blockStride>
 9 |     8 8</blockStride>
10 |   <cellSize>
11 |     8 8</cellSize>
12 |   <nbins>9</nbins>
13 |   <derivAperture>1</derivAperture>
14 |   <winSigma>4.</winSigma>
15 |   <histogramNormType>0</histogramNormType>
16 |   <L2HysThreshold>2.0000000000000001e-01</L2HysThreshold>
17 |   <gammaCorrection>0</gammaCorrection>
18 |   <nlevels>64</nlevels>
19 |   <signedGradient>0</signedGradient></hog>
20 | </opencv_storage>
21 | 


--------------------------------------------------------------------------------
/Evaluation/localization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jul 13 14:13:27 2017
  5 | 
  6 | @author: Arpan
  7 | Description : For Localization Task
  8 | """
  9 | import json
 10 | import os
 11 | import utils
 12 | import numpy as np
 13 | import h5py
 14 | import pandas as pd
 15 | import collections
 16 | import cPickle
 17 | import caffe
 18 | from joblib import Parallel, delayed
 19 | 
 20 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 21 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 22 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 23 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 24 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5"
 25 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 26 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5"
 27 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 28 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb"
 29 | MODEL = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/deploy_c3d_fc_net.prototxt"
 30 | PRETRAINED = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel"
 31 | MEANFILE = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto"
 32 | SUBSET = 'validation'
 33 | 
 34 | #def get_temporalProps(net, test_vids, meta_info, category_names, n):
 35 | #    fc3d = h5py.File(C3D, 'r')
 36 | #    fpca = h5py.File(C3D_PCA, 'r')
 37 | #    train_mean = get_training_mean(MEANFILE)
 38 | #    pred = {}
 39 | #    c3d_lev2 = pd.DataFrame(np.zeros((len(test_vids), len(category_names))), \
 40 | #                            index=test_vids, columns=category_names)
 41 | #    for i, vid in enumerate(test_vids):
 42 | #        print "{} --> For video : {}" .format(i, vid)
 43 | #        (rows, cols) = fc3d['v_'+vid]['c3d_features'].shape
 44 | #        vid_data = fc3d['v_'+vid]['c3d_features'][:]
 45 | #        # get predictions for each row of c3d feature of vid
 46 | #        vid_probs = pd.DataFrame(np.zeros((rows, len(category_names))), \
 47 | #                                 columns=category_names)
 48 | #        predicted_labels = []
 49 | #        for row in range(rows):
 50 | #            #print "Dims of vid_data[row,:] = {}" .format(vid_data[row,:].shape)
 51 | #            #print "Values = {}" .format(vid_data[row,:])
 52 | #            f = vid_data[row,:].reshape(cols, 1, 1)
 53 | #            #print "Values = {}" .format(f)
 54 | #            f = f - train_mean
 55 | #            #pr = net.forward()
 56 | #            out = net.forward_all(data = np.asarray([f]))
 57 | #            predicted_labels.append(out['prob'][0].argmax(axis=0))
 58 | #            #print "Predicted Label : {} :: Name : {}" .format(predicted_label, category_names[predicted_label])
 59 | #            #print "Rows :: "
 60 | #            vid_probs.iloc[row,:] = out['prob'][0]
 61 | #            #print vid_probs.iloc[row,:]
 62 | #        # returns a list of dict like [{'score': score, 'label':labels[idx]}...]
 63 | #
 64 | #        pred[vid], vprobs = get_vidProposal(vid, vid_probs, predicted_labels,\
 65 | #                            meta_info, category_names, n)
 66 | #        c3d_lev2.loc[vid,:] = vprobs
 67 | #        #break
 68 | #    fc3d.close()
 69 | #    fpca.close()
 70 | #    return pred, c3d_lev2
 71 | 
 72 | def get_vidLocalization(vid, vid_probs, vid_preds, meta_info, category_names, n):
 73 |     """Get a matrix of probabilities over the classes for the c3d features of 
 74 |         a video. Generate the top 3 predictions from the prob matrix
 75 |         vid_probs: matrix of probs t x C. where t is the no of c3d features per vid
 76 |         and C is the no of classes
 77 |         vid_preds: list of predictions for that video (vid). len(vid_preds) = t
 78 |         and predicted class at position i is category_names[vid_preds[i]]
 79 |         
 80 |     """
 81 |     anno_list = []
 82 |     #n = 1       # Taking top n categories
 83 |     vprobs_sum = vid_probs.sum(axis=0)
 84 |     top_n = vprobs_sum.sort_values(ascending = False)[:n]
 85 |     topn_labels = top_n.index.tolist()
 86 |     topn_idx = [category_names.index(l) for l in topn_labels]
 87 |     # Idea 2 : Detect temporal continuity of category predicted. Longer the better
 88 |     #print "Predictions list : {}" .format(vid_preds)
 89 |     # find the max number of occurences for any class
 90 |     #counter = collections.Counter(vid_preds)
 91 |     #top_n = counter.most_common(3)      # get a list of tuples 
 92 |     #fps = 27.673616877683916     # mean fps of all vids in training set
 93 |     fps = 29.970029970029969     # median 29.970029970029969 3018 times
 94 |     if vid in meta_info.keys():
 95 |         fps = meta_info[vid]['fps']
 96 |     for idx in range(n):
 97 |         # get list of tuples (beg_pos, end_pos)
 98 |         segments = get_segments_for_cat(vid_preds, topn_idx[idx]) 
 99 |         ##### get time in sec from video info
100 |         if len(segments)>0:
101 |             for (beg,end) in segments:
102 |                 begtime = (beg+1)*8./fps
103 |                 endtime = (end+1)*8./fps
104 |                 # taking score as the temporal extent of the activity of interest
105 |                 anno_list.append({'label': topn_labels[idx] ,'score': end-beg,\
106 |                                   'segment': [begtime, endtime]})
107 |     
108 |     # Find the top predicted label
109 |     return anno_list, vprobs_sum
110 | 
111 | #def get_segments_for_cat(pred_lst, cat_id, nth_val):
112 | #    """Retrieve segments corresponding to category number 'cat_id' from the list of 
113 | #    category predictions 'pred_lst'. Return a list of segment tuples
114 | #    """
115 | #    int_seg_dist = 20    # 2 for 8*i frames
116 | #    seg_len_th = 3 - nth_val
117 | #    segments = []
118 | #    beg , end = -1, -1
119 | #    seg_flag = False
120 | #    for i,pr in enumerate(pred_lst):
121 | #        if pr==cat_id and not seg_flag:
122 | #            beg = i
123 | #            seg_flag = True
124 | #        elif pr!=cat_id and seg_flag:
125 | #            end = i
126 | #            segments.append((beg, end))
127 | #            seg_flag = False
128 | #            beg, end = -1, -1
129 | #    if seg_flag:
130 | #        segments.append((beg, i+1))
131 | #    
132 | #    seg_flag = True
133 | #    merged_segments, new_segments = [], []
134 | #    if len(segments)==0:
135 | #        return []
136 | #    (bPrev, ePrev) = segments[0]
137 | #    # Merge 'close' segments based on int_seg_dist
138 | #    for i,(bCurr,eCurr) in enumerate(segments):
139 | #        if i==0:
140 | #            continue
141 | #        if (bCurr-ePrev)>int_seg_dist :
142 | #            merged_segments.append((bPrev, ePrev))
143 | #            bPrev = bCurr
144 | #        ePrev = eCurr
145 | #    merged_segments.append((bPrev, ePrev))
146 | #    # Create a dict of segment lengths for each tuple in segments
147 | #    seg_lens = {}
148 | #    for idx,seg in enumerate(merged_segments):
149 | #        seg_lens[idx] = seg[1] - seg[0]
150 | #    # get segment idxs in decreasing order of lens (sort dict values and get keys)
151 | #    decr_seg_lens = sorted(seg_lens, key=seg_lens.get, reverse=True)
152 | #    # For very small length videos
153 | #    if len(pred_lst) < 3 or len(decr_seg_lens) < 3:
154 | #        seg_len_th = 0
155 | #
156 | #    for idx in decr_seg_lens:
157 | #        if seg_lens[idx] < seg_len_th:
158 | #            break
159 | #        new_segments.append(merged_segments[idx])
160 | #        
161 | #    return new_segments
162 | 
163 | def get_segments_for_cat(pred_lst, cat_id):
164 |     """Retrieve segments corresponding to category number 'cat_id' from the list of 
165 |     category predictions 'pred_lst'. Return a list of segment tuples
166 |     """
167 |     int_seg_dist = 60    # 2 for 8*i frames
168 |     segments = []
169 |     beg , end = -1, -1
170 |     seg_flag = False
171 |     for i,pr in enumerate(pred_lst):
172 |         if pr==cat_id and not seg_flag:
173 |             beg = i
174 |             seg_flag = True
175 |         elif pr!=cat_id and seg_flag:
176 |             end = i
177 |             segments.append((beg, end))
178 |             seg_flag = False
179 |             beg, end = -1, -1
180 |     if seg_flag:
181 |         segments.append((beg, i+1))
182 |     
183 |     seg_flag = True
184 |     new_segments = []
185 |     if len(segments)==0:
186 |         return []
187 |     (bPrev, ePrev) = segments[0]
188 |     for i,(bCurr,eCurr) in enumerate(segments):
189 |         if i==0:
190 |             continue
191 |         if (bCurr-ePrev)<=int_seg_dist :
192 |             ePrev = eCurr
193 |         else:
194 |             new_segments.append((bPrev, ePrev))
195 |             bPrev = bCurr
196 |             ePrev = eCurr
197 |     new_segments.append((bPrev, ePrev))
198 |         
199 |     return new_segments
200 | 


--------------------------------------------------------------------------------
/Evaluation/optical_flow.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Feb 19 03:51:29 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: Extract the Optical Flow data from action videos
  9 | 
 10 | """
 11 | import cv2
 12 | import numpy as np
 13 | import os
 14 | import json
 15 | import lmdb
 16 | import caffe
 17 | import pandas as pd
 18 | from matplotlib import pyplot as plt
 19 | from joblib import Parallel, delayed
 20 | 
 21 | 
 22 | # Input: 
 23 | # srcVideoFolder: where the action videos are located (for train/val/test set)
 24 | # Output: Create optical flow visualization data, transformed to HSV space
 25 | # ToDo: write the feature onto a file and convert to lmdb.
 26 | def construct_datasets(srcVideoFolder, lmdb_folder, pathPrefix, \
 27 |                        samples_files, category_names):
 28 |     
 29 |     DIFF_FRAMES = [1]
 30 |     print("No of samples_files = "+str(len(samples_files)))      # =no_of_categories
 31 |     lmdb_name = os.path.join(lmdb_folder, "val_OF_lmdb")    
 32 |     if not os.path.exists(os.path.dirname(lmdb_name)):
 33 |         os.makedirs(os.path.dirname(lmdb_name))
 34 |     
 35 |     # form a pandas dataframe with video_id 
 36 |     video_id, pos, labels = [], [], []
 37 |     for idx,f in enumerate(samples_files):
 38 |         if category_names[idx] in f:
 39 |             with open(os.path.join(pathPrefix, f), "r") as fobj:
 40 |                 pos_samples = json.load(fobj)
 41 |             for v_id, pos_list in pos_samples.iteritems():
 42 |                 pos.extend(pos_list)
 43 |                 video_id.extend(np.repeat(v_id, len(pos_list)).tolist())
 44 |                 labels.extend(np.repeat(idx, len(pos_list)).tolist())
 45 |     samples_df = pd.DataFrame({'video_id': video_id,
 46 |                                'position': pos,
 47 |                                'label': labels})
 48 |     print "No of samples for all the categories = {} " .format(samples_df.shape[0]) 
 49 |     
 50 |     # Shuffle the dataframe in-place
 51 |     samples_df = samples_df.sample(frac=1).reset_index(drop=True)
 52 |     # write dataframe to disk (csv)
 53 |     samples_df.to_csv(os.path.join(lmdb_folder, "samples_val.csv"), index=False)
 54 |     
 55 |     # Create lmdb
 56 |     (H, W, C) = (120, 160, 3)
 57 |     N = samples_df.shape[0]     # no of rows (=no of visualizations = 5k)
 58 |     # twice the size of total number of OF visualizations
 59 |     map_size = int(N*H*W*C*3)     # approx 429 GB
 60 |     #map_size = int(N*720*1280*C*2)     # approx 429 GB
 61 |     
 62 |     env = lmdb.open(lmdb_name, map_size=map_size)
 63 |     
 64 |     i = 0   # LMDB index variable
 65 |     # iterate over the rows of the pandas dataframe
 66 |     end_samples = samples_df.shape[0]
 67 |     r = (end_samples - i)/200
 68 |     print "r = %d " %r
 69 |     ###########################################################################
 70 |     nCat = 4*len(category_names)          # = 200
 71 |     nCat_samples = (end_samples - i)/nCat    # = N = 1000
 72 |     lmdb_id = 0
 73 |                    
 74 |     # Parallelizing the lmdb creation process
 75 |     for i in range(nCat_samples):
 76 |         
 77 |         result = Parallel(n_jobs=4)(delayed(get_optical_flow_vid) \
 78 |                           (os.path.join(srcVideoFolder, 'v_'+samples_df['video_id'][i*nCat+j]+'.mp4'), \
 79 |                            samples_df['position'][i*nCat+j], \
 80 |                             DIFF_FRAMES, H, W) \
 81 |                           for j in range(nCat))
 82 |         
 83 |         with env.begin(write = True) as txn:
 84 |             for l in range(len(result)):
 85 |                 row_no = (i*nCat)+l
 86 |                 pos = samples_df['position'][row_no]
 87 |                 video_id = samples_df['video_id'][row_no]
 88 |                 lab = samples_df['label'][row_no]
 89 |                 print "idx : "+str(row_no)+" :: 'position' : "+str(pos)
 90 |                 
 91 |                 for img in result[l]:
 92 |                     img = np.rollaxis(img, 2)   # C, H, W
 93 |                     datum = caffe.proto.caffe_pb2.Datum()
 94 |                     datum.channels = img.shape[0]
 95 |                     datum.height = img.shape[1]
 96 |                     datum.width = img.shape[2]
 97 |                     datum.data = img.tobytes()
 98 |                     datum.label = lab
 99 |                     str_id = '{:08}'.format(lmdb_id)
100 |                     # The encode is only essential in Python 3
101 |                     txn.put(str_id.encode('ascii'), datum.SerializeToString())
102 |                     lmdb_id += 1
103 |         print "Write No : %d" %(i+1)
104 |     ###########################################################################    
105 | #    for commit_no in range(r):
106 | #        with env.begin(write=True) as txn:    
107 | #            for idx in range(200):  # samples_df.iterrows():
108 | #                row_no = (200*commit_no)+idx
109 | #                assert i==row_no
110 | #                pos = samples_df['position'][row_no]
111 | #                video_id = samples_df['video_id'][row_no]
112 | #                lab = samples_df['label'][row_no]
113 | #                print "idx : "+str(row_no)+" :: 'position' : "+str(pos)
114 | #                imgs = []
115 | #                vpath = os.path.join(srcVideoFolder, 'v_'+video_id+'.mp4')
116 | #                imgs.extend(get_optical_flow_vid(vpath, pos, DIFF_FRAMES, H, W))
117 | #                # returned frames are HxWxC (120x160x3) in a list
118 | #            
119 | #                for img in imgs:
120 | #                    # rollaxis if needed
121 | #                    img = np.rollaxis(img, 2)   # C, H, W
122 | #                    datum = caffe.proto.caffe_pb2.Datum()
123 | #                    datum.channels = img.shape[0]
124 | #                    datum.height = img.shape[1]
125 | #                    datum.width = img.shape[2]
126 | #                    datum.data = img.tobytes()
127 | #                    datum.label = lab
128 | #                    str_id = '{:08}'.format(i)
129 | #                    # The encode is only essential in Python 3
130 | #                    txn.put(str_id.encode('ascii'), datum.SerializeToString())
131 | #                    i = i+1
132 |         
133 |     print "LMDB Created Successfully !!"
134 |     return 
135 |     
136 | # from a srcVideo, get the optical flow data of ith and (i+x) frame 
137 | # where x belongs to diff_frames
138 | def get_optical_flow_vid(srcVideo, position, diff_frames, height, width):
139 |     res_flow_img = []
140 |     cap = cv2.VideoCapture(srcVideo)
141 |     #fgbg = cv2.createBackgroundSubtractorMOG2()     #bg subtractor
142 |     if not cap.isOpened():
143 |         raise IOError("Capture object cannot be opened for "+srcVideo)
144 |     ####################################################
145 |     # for resizing the optical flow visualization
146 |     resize_flag = True
147 |     (h, w) = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),\
148 |                   int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
149 |     if h==height and w==width:
150 |         resize_flag = False
151 |     
152 |     #print "No of frames = {}", format(cap.get(cv2.CAP_PROP_FRAME_COUNT))
153 |     
154 |     for diff in diff_frames:
155 |         #print "For diff = %d" %diff
156 |         cap.set(cv2.CAP_PROP_POS_FRAMES, position)
157 |         ret, frame = cap.read()
158 |         # Sometimes the last few frames of a video are not read, then read the
159 |         # last readable frame by moving backwards one frame at a time
160 |         while not ret:
161 |             #print "Frame not read ! Moving backwards in capture object !"
162 |             #raise IOError("Frame not read :: "+srcVideo+" :: Position: "+str(position))
163 |             position -= 1
164 |             cap.set(cv2.CAP_PROP_POS_FRAMES, position)
165 |             ret, frame = cap.read()
166 |     
167 | #        curr_frame = frame.copy()
168 | #        cap.set(cv2.CAP_PROP_POS_FRAMES, position+diff)
169 | #        ret, next_frame = cap.read()
170 | #        # If next frame is unavailable, then make cf as nf and read previous frame in cf
171 | #        if not ret:
172 | #            #print "Cannot read next frame... Reading previous frame instead."
173 | #            cap.set(cv2.CAP_PROP_POS_FRAMES, position-diff)
174 | #            next_frame = curr_frame.copy()
175 | #            ret, curr_frame = cap.read()
176 | #            if not ret:
177 | #                raise IOError("Cannot read previous frame also.")
178 | #    
179 | #        curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY)
180 | #        next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY)
181 | #        # Compute the optical flow        
182 | #        flow = cv2.calcOpticalFlowFarneback(curr_frame, next_frame, None, 0.5, 1, 12, 3, 5, 1.2, 0)
183 | #        #vis_vectors = draw_flow(curr_frame, flow, 8)
184 | #        vis_bgr = draw_flow_bgr(flow, frame)
185 |     
186 |         if resize_flag:
187 |             # scaling image. Mostly it scales down to 120x160 (hxw) INTER_LINEAR default
188 |             frame = cv2.resize(frame, (width, height) )
189 |         
190 |         res_flow_img.append(frame)
191 |         #cv2.imshow("Curr Frame", curr_frame)
192 |         #cv2.imshow("Next Frame", next_frame)
193 |         #cv2.imshow("Flow Vecs", vis_vectors)
194 |         #cv2.imshow("Flow BGR", vis_bgr)
195 |         #waitTillEscPressed()
196 |         
197 |     #res_mean = []
198 |     #res_mean.append(np.average(res_flow_img, axis=0).astype(np.uint8))
199 |     
200 |     cap.release()
201 |     #cv2.destroyAllWindows()
202 |     return res_flow_img
203 | 
204 | 
205 | # draw the OF field on image, with grids, decrease step for finer grid
206 | def draw_flow(img, flow, step=16):
207 |     h, w = img.shape[:2]
208 |     y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1)
209 |     fx, fy = flow[y,x].T
210 |     lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2)
211 |     lines = np.int32(lines + 0.5)
212 |     vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
213 |     cv2.polylines(vis, lines, 0, (0, 255, 0))
214 |     for (x1, y1), (x2, y2) in lines:
215 |         cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1)
216 |     return vis
217 | 
218 | def draw_flow_bgr(flow, sample_frame):
219 |     hsv = np.zeros_like(sample_frame)
220 |     #print "hsv_shape : "+str(hsv.shape)
221 |     hsv[...,1] = 255
222 |     mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
223 |     
224 |     hsv[...,0] = ang*180/np.pi/2
225 |     hsv[...,2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX)
226 |     bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
227 |     return bgr
228 | 
229 |     
230 | def waitTillEscPressed():
231 |     while(True):
232 |         if cv2.waitKey(10)==27:
233 |             print("Esc Pressed")
234 |             return
235 | 
236 |     
237 | if __name__=="__main__":
238 |     # the dataset folder contains 6 folders boxing, running etc containing videos for each
239 |     # It also contains 00sequences.txt where meta info is given
240 |     dataset = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos"
241 | 
242 |     srcVideo = os.path.join(dataset, 'v_2GEZgHcA7zU.mp4')
243 |     
244 | #    img = get_optical_flow_vid(srcVideo, 2984, [1,2,3], 120, 160)
245 | #    for i,im in enumerate(img):
246 | #        print "Flow image no : %d" %(i+1)
247 | #        cv2.imshow("Frame", im)
248 | #        waitTillEscPressed()
249 |     lmdb_folder = "/home/hadoop/VisionWorkspace/ActivityNet"
250 |     p = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/samples_test_5000"
251 |     sampls = os.listdir(p)
252 |     construct_datasets(dataset, lmdb_folder, p, sampls, ['Applying sunscreen'])
253 |     #cv2.destroyAllWindows()
254 |     
255 |     ###########################################################
256 |     # Training the caffe model    
257 |     #proc = subprocess.Popen(["/home/hadoop/caffe/build/tools/caffe","train","--solver=optical_flow_lenet_solver.prototxt"],stderr=subprocess.PIPE)
258 |     #res = proc.communicate()[1]
259 | 
260 |     #caffe.set_mode_gpu()
261 |     #solver = caffe.get_solver("config.prototxt")
262 |     #solver.solve()
263 |     
264 |     #print res
265 |     ###########################################################
266 |     # Applying the model
267 |     
268 |     #net = caffe.Net("demoDeploy.prototxt", "./opt_flow_quick_iter_20000.caffemodel", caffe.TEST)
269 |     #print(get_data_for_id_from_lmdb("/home/lnmiit/caffe/examples/optical_flow/val_opt_flow_lmdb/", "00000209"))
270 |     #l, f = get_data_for_id_from_lmdb("/home/lnmiit/caffe/examples/optical_flow/val_opt_flow_lmdb/", "00000209")
271 |     
272 |     ###########################################################
273 |     ## Check Background Subtraction on sample videos (Visualize)
274 | #    srcVideo = "/home/hadoop/VisionWorkspace/KTH_OpticalFlow/dataset/kth_actions_test/person03_walking_d1_uncomp.avi"
275 | #    cap = cv2.VideoCapture(srcVideo)
276 | #    fgbg = cv2.createBackgroundSubtractorMOG2()
277 | #    while(cap.isOpened()):
278 | #        ret, frame = cap.read()
279 | #        fgmask = fgbg.apply(frame)
280 | #        cv2.imshow('frame',fgmask)
281 | #        print np.sum(fgmask)
282 | #        waitTillEscPressed()
283 | #        #k = cv2.waitKey(30) & 0xff
284 | #        #if k == 27:
285 | #        #    break
286 | #    cap.release()
287 | #    cv2.destroyAllWindows()
288 | 
289 |     
290 |     
291 |     


--------------------------------------------------------------------------------
/Evaluation/taxonomy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Thu Jun 29 04:24:44 2017
  5 | @author: Arpan
  6 | Description: Taxonomy generation
  7 | 
  8 | """
  9 | import json
 10 | import numpy as np
 11 | import utils
 12 | import collections
 13 | 
 14 | 
 15 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 16 | JSONFILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 17 | SUBSET = 'validation'
 18 | 
 19 | def get_parentnode(taxonomy, nodeName):
 20 |     """
 21 |         Retrieve the parentId of a node given its label
 22 |     """    
 23 |     for entry in taxonomy:
 24 |         if entry['nodeName'] == nodeName:
 25 |             return entry['parentId']
 26 |     print "Node Name {} is invalid !" .format(nodeName)
 27 |     return -1
 28 | 
 29 | #def trace_path_to_root(taxonomy, label):
 30 |     
 31 | def get_nodeName(taxonomy, nodeId):
 32 |     """
 33 |         Retrieve nodeId given a nodeName
 34 |     """
 35 |     
 36 | def get_nodeId(taxonomy, nodeName):
 37 |     """
 38 |         Retrieve nodeID from the given nodeName
 39 |     """
 40 |     for entry in taxonomy:
 41 |         if nodeName == entry['nodeName']:
 42 |             return entry['nodeId']
 43 |     print "Node Name {} is invalid !" .format(nodeName)
 44 |     return -1
 45 | 
 46 | def nAIntersectB(database, taxonomy, train_vids_all):
 47 |     n = 0
 48 |     
 49 | def findDiscripancies(taxonomy):
 50 |     """
 51 |     Found nodeId 269 and 270 have same names 'Health-related self care'
 52 |     """
 53 |     i = 0
 54 |     for entry in taxonomy:
 55 |         if entry['parentName'] != None:
 56 |             print entry['nodeName']
 57 |             if entry['nodeName'].lower() == entry['parentName'].lower():
 58 |                 i += 1
 59 |     print "No of same nodes = {} " .format(i)
 60 | 
 61 | def get_no_of_annotations(database, label, train_vids_all):
 62 |     """
 63 |     Iterate over the training videos and count the no of egs belonging to class i
 64 |     """
 65 |     count = 0
 66 |     for vid in train_vids_all:
 67 |         for ann in database[vid]['annotations']:
 68 |             if ann['label'] == label:
 69 |                 count += 1
 70 |     return count
 71 |     
 72 | 
 73 | 
 74 | def display_all_paths(taxonomy):
 75 |     """
 76 |         Iterate over all the entries of the taxonomy dict and for each display the 
 77 |         path from that node to the root node.
 78 |     """
 79 |     for i,entry in enumerate(taxonomy):
 80 |         print "For nodeId : {} :: NodeName : {} " .format(entry['nodeId'], entry['nodeName'])
 81 |         parentId = entry['parentId']
 82 |         parentName = entry['parentName']
 83 |         while parentId != None:
 84 |             print "ParentId : {} :: ParentName : {}" .format(parentId, parentName)
 85 |             # Search for nodeId == parentId
 86 |             for temp in taxonomy:
 87 |                 if temp['nodeId'] == parentId:
 88 |                     parentId = temp['parentId']
 89 |                     parentName = temp['parentName']
 90 |                     break
 91 |         if i == 5:
 92 |             break
 93 |         
 94 | 
 95 | if __name__ == '__main__':
 96 |     
 97 |     # Read the database, version and taxonomy from JSON file
 98 |     with open("data/activity_net.v1-3.min.json", "r") as fobj:
 99 |         data = json.load(fobj)
100 | 
101 |     database = data["database"]
102 |     taxonomy = data["taxonomy"]
103 |     version = data["version"]
104 |     
105 |     non_existing_videos = utils.crosscheck_videos(VIDEOPATH, JSONFILE)
106 | 
107 |     print "No of non-existing videos: %d" % len(non_existing_videos)
108 |     
109 |     train_vids_all = []
110 |     [train_vids_all.append(x) for x in database if database[x]['subset']==SUBSET]
111 |     
112 |     # Find list of available training videos
113 |     train_existing_vids = list(set(train_vids_all) - set(non_existing_videos))
114 |     
115 |     ###########################################################################
116 |     # Get categories information from the database (Train+Validation sets)
117 |     category = []
118 |     for x in database:
119 |         cc = []
120 |         for l in database[x]["annotations"]:
121 |             cc.append(l["label"])
122 |         category.extend(list(set(cc)))
123 |     category_count = collections.Counter(category)
124 | 
125 |     category_names = sorted(category_count.keys())
126 |     print "Total No of classes: %d" % len(category_names)
127 |     
128 |     #print category_names
129 |     ###########################################################################
130 | 
131 |     display_all_paths(taxonomy)
132 |     #findDiscripancies(taxonomy)
133 |     
134 |     for cat in category_names:
135 |         ncat = get_no_of_annotations(database, cat, train_vids_all)
136 |         print "category {} :: |vids| {}" .format(cat, ncat)
137 |     


--------------------------------------------------------------------------------
/Evaluation/testing.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jun  9 17:23:43 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: ActivityNet -- Testing and submission file generation
  9 | """
 10 | 
 11 | import collections
 12 | import commands
 13 | import json
 14 | import glob
 15 | import matplotlib.pyplot as plt
 16 | import numpy as np
 17 | import os
 18 | from utils import get_video_number_of_frames
 19 | from skimage.transform import resize
 20 | import cv2
 21 | import random
 22 | 
 23 | # Server Params
 24 | # VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos'
 25 | VIDEO_PATH = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos"
 26 | SUBSET = 'validation'
 27 | 
 28 | 
 29 | ###########################################################################
 30 | 
 31 | def get_sample_frame_from_video(videoid, duration, start_time, end_time, \
 32 |                                 video_path=VIDEO_PATH):
 33 |     filename = glob.glob(os.path.join(video_path, "v_%s*" % videoid))[0]
 34 |     nr_frames = get_video_number_of_frames(filename)
 35 |     fps = (nr_frames*1.0)/duration
 36 |     start_frame, end_frame = int(start_time*fps), int(end_time*fps)
 37 |     frame_idx = random.choice(range(start_frame, end_frame))
 38 |     cap = cv2.VideoCapture(filename)
 39 |     keepdoing, cnt = True, 1
 40 |     while keepdoing:
 41 |         ret, img = cap.read()
 42 |         if cnt==frame_idx:
 43 |             break
 44 |         assert ret==True, "Ended video and frame not selected."
 45 |         cnt+=1
 46 |     return cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
 47 | 
 48 | ###########################################################################
 49 | 
 50 | def get_random_video_from_activity(database, activity, subset="validation"):
 51 |     videos = []
 52 |     for x in database:
 53 |         if database[x]["subset"] != subset: continue
 54 |         xx = random.choice(database[x]["annotations"])
 55 |         if xx["label"]==activity:
 56 |             yy = {"videoid": x, "duration": database[x]["duration"],
 57 |                   "start_time": xx["segment"][0], "end_time": xx["segment"][1]}
 58 |             videos.append(yy)
 59 |     return random.choice(videos)
 60 | 
 61 | ###########################################################################
 62 | 
 63 | def get_video_prediction(vid, category_names, model):
 64 |     # Read the video frames and predict categories with scores
 65 |     predictions_lst = []
 66 |     no_of_preds = np.random.randint(1,4)
 67 |     for i in range(no_of_preds):
 68 |         score = float(np.random.rand(1))
 69 |         label_idx = np.random.randint(200)
 70 |         label = category_names[label_idx]
 71 |         pred_dict = {'score': score, 'label':label}
 72 |         predictions_lst.append(pred_dict)
 73 |     return predictions_lst
 74 | 
 75 | 
 76 | if __name__=='__main__':
 77 |     with open("data/activity_net.v1-3.min.json", "r") as fobj:
 78 |         data = json.load(fobj)
 79 | 
 80 |     database = data["database"]
 81 |     taxonomy = data["taxonomy"]
 82 |     version = data["version"]
 83 | 
 84 |     ###########################################################################
 85 |     # Release Summary
 86 |     all_node_ids = [x["nodeId"] for x in taxonomy]
 87 |     print len(all_node_ids)
 88 |     leaf_node_ids = []
 89 |     for x in all_node_ids:
 90 |         is_parent = False
 91 |         # iterate through the parentIds and if the nodeID is a parentId then
 92 |         # it is not a leaf node else it is a leaf node
 93 |         for query_node in taxonomy:
 94 |             if query_node["parentId"]==x: 
 95 |                 is_parent = True
 96 |                 break
 97 |         if not is_parent: leaf_node_ids.append(x)
 98 |         
 99 |     leaf_nodes = [x for x in taxonomy if x["nodeId"] in  leaf_node_ids]
100 |     
101 |     vsize = commands.getoutput("du %s -lhs" % VIDEO_PATH).split("/")[0]
102 |     
103 |     total_duration = sum([database[x]['duration'] for x in database])/3600.0
104 | 
105 |     print "ActivityNet %s" % version
106 |     print "Total number of videos: %d" % len(database)
107 |     print "Total number of nodes in taxonomy: %d" % len(taxonomy)
108 |     print "Total number of leaf nodes: %d" % len(leaf_nodes)
109 |     print "Total size of downloaded videos: %s" % vsize
110 |     print "Total hours of video: %0.1f" % total_duration
111 | 
112 |     ###########################################################################
113 |     # Get categories information from the database (Train+Validation sets)
114 |     category = []
115 |     for x in database:
116 |         cc = []
117 |         for l in database[x]["annotations"]:
118 |             cc.append(l["label"])
119 |         category.extend(list(set(cc)))
120 |     category_count = collections.Counter(category)
121 | 
122 |     category_names = sorted(category_count.keys())
123 |     print "Total No of classes: %d" % len(category_names)
124 |     #print category_names
125 | 
126 |     ###########################################################################
127 |     # Iterate over the validation/test set video files and obtain 
128 |     # the prediction for each file
129 |     subset_video_ids = []
130 |     ext_data_dict = {'used': False, 'details': \
131 |                 'Describe the external data over here. If necessary for each prediction'}
132 |     
133 |     out_dict = {'version':version}
134 |     
135 |     [subset_video_ids.append(x) for x in database if database[x]['subset']==SUBSET]
136 |     results_dict = {}
137 |     for v_id in subset_video_ids:
138 |         results_dict[v_id] = get_video_prediction(v_id, category_names, "")
139 |     
140 |     out_dict['results'] = results_dict
141 |     out_dict['external_data'] = ext_data_dict
142 |             
143 |     json_filename = 'submission_'+SUBSET+'.json'
144 |     with open(json_filename, 'w') as fp:
145 |         json.dump(out_dict, fp)
146 |     
147 |     
148 |     # write the out_dict to a JSON file
149 |     ###########################################################################
150 | 
151 | #    plt.figure(num=None, figsize=(18, 8), dpi=100)
152 | #    xx = np.array(category_count.keys())
153 | #    yy = np.array([category_count[x] for x in category_count])
154 | #    xx_idx = yy.argsort()[::-1]
155 | #    plt.bar(range(len(xx)), yy[xx_idx], color=(240.0/255.0,28/255.0,1/255.0))
156 | #    plt.ylabel("Number of videos per activity ")
157 | #    plt.xticks(range(len(xx)), xx[xx_idx], rotation="vertical", size="small")
158 | #    plt.title("ActivityNet VERSION 1.2 - Untrimmed Video Classification")
159 | #    plt.show()
160 | 
161 |     ###########################################################################
162 |     
163 |     # read a model 
164 |     


--------------------------------------------------------------------------------
/Evaluation/training.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jun  9 17:21:26 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: ActivityNet -- Training
  9 | """
 10 | import json
 11 | import os
 12 | import utils
 13 | import collections
 14 | import training_model_svm as tm1
 15 | 
 16 | 
 17 | # Server Params
 18 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos'
 19 | #JSONFILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 20 | #LMDB_FOLDER = "/home/arpan/DATA_Drive/ActivityNet"
 21 | 
 22 | # Local Params
 23 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 24 | JSONFILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 25 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/new2_lmdb"
 26 | SUBSET = 'training'
 27 | ###############################################################################
 28 | 
 29 | # Train on HOG descriptors
 30 | # Iterate over the catogories and for each category train an SVM model
 31 | def sample_activity_frames(database, meta_info, category_names, N):
 32 |     """ Function samples N frame positions from the annotated video segments
 33 |     of each activity category. For 200 categories, 200 files will be created.
 34 |     Each file will have a 
 35 |     Input:dataframe cell value using column name and row no
 36 |     database: dictionary from activity_net.v1-3.min.json
 37 |     meta_info: dictionary of meta_information for training videos
 38 |         {'3aQnQEL3USQ':{u'total_frames': 6238, 
 39 |                         u'dimensions': [360, 480], u'fps': 29.5} ....}
 40 |     category_names: list of category_names, (sorted)
 41 |     Output: Write json files of the form
 42 |     {"vMYPNyBR3d0": [327, 327, 337, 345, 346, 359],...}
 43 |     Each file has N positions of activities, key is video-id and positions sampled 
 44 |     from that video
 45 |     """
 46 |     print "Called train_m1 !!!"
 47 |     video_ids = meta_info.keys()
 48 |     #N = 5000    # No of samples of each class to be picked from activity seq
 49 |     dest_folder = "samples_"+str(N)
 50 |     if not os.path.exists(dest_folder):
 51 |         os.makedirs(dest_folder)
 52 |     # get list of training videos which belong to category
 53 |     for cat in category_names:
 54 |         print "Iterate for category %s" %cat
 55 |         video_ids_for_cat = utils.get_videos_for_category(database, \
 56 |                                                           video_ids,\
 57 |                                                           cat)
 58 |         # Retrieve samples from positive example videos.
 59 |         # Get dict of videos_ids and segments of videos which have action 
 60 |         # corresponding to a category.
 61 |         #print "Getting video segment information for %s videos..." %cat
 62 |         # train_segments dict: 
 63 |         train_segments = tm1.get_training_segments(database, video_ids_for_cat, cat)
 64 |         #print "Getting random frames from +ve example videos..."
 65 |         #print train_segments
 66 |         tr_samples = tm1.get_sample_frames(train_segments, meta_info, N)
 67 |         with open(os.path.join(dest_folder, cat+".json"), "w") as fp:
 68 |             json.dump(tr_samples, fp)
 69 |         
 70 |         # select 640x480 resolution and resize accordingly
 71 |         #break
 72 |         # retrieve samples from negative example videos
 73 | 
 74 | 
 75 | ###############################################################################
 76 | 
 77 | def create_training_lmdb(srcSamplesMetaFiles, category_names):
 78 |     """ Loop over all the existing training videos
 79 |     category_names are sorted list of categories, where its index
 80 |     represents the category no.
 81 |     Path for json files of category videos and sample frame info: samples_5000
 82 |     This function assumes that you have already called train_m1 and the 
 83 |     json files for each category are present in the path specified
 84 |     Steps: 
 85 |     1. Extract the optical flow visualizations from the training set for each
 86 |     category.
 87 |     2. Convert into lmdb database 
 88 |     3. Train a CNN on the lmdb database
 89 |     4. Save the trained model to disk
 90 |     Input: meta_info: same as in the function above
 91 |     """
 92 |     
 93 |     samples_files = [s+".json" for s in category_names]
 94 |     
 95 |     assert len(samples_files)==len(category_names)
 96 |     # check order of categories names matches with samples_files
 97 |     for idx,f in enumerate(samples_files):
 98 |         if not (category_names[idx] in f):
 99 |             print f
100 |             print samples_files
101 |             raise IOError("Order of categories does not match order of sample files.")
102 |     
103 |     import optical_flow as of
104 |     of.construct_datasets(VIDEOPATH, LMDB_FOLDER, srcSamplesMetaFiles, \
105 |                           samples_files, category_names)
106 |     
107 |     return
108 | 
109 | 
110 | ###############################################################################
111 | 
112 | def train_m3(database, train_video_ids, category_names):
113 |     # Loop over all the existing training videos
114 |     # category_names are sorted list of categories, where its index
115 |     # represents the category no.
116 |     
117 |     for idx in train_video_ids:
118 |         # for each video call a method to train an SVM
119 |         tm1.train_svm(os.path.join(VIDEOPATH, "v_"+idx+".mp4"), \
120 |                      database[idx]['annotations'], 10,  category_names)
121 |         # break used to execute for only one video
122 |         break
123 |     
124 |     return
125 | 
126 | ###############################################################################
127 | 
128 | if __name__=='__main__':
129 |     # Read the database, version and taxonomy from JSON file
130 |     with open("data/activity_net.v1-3.min.json", "r") as fobj:
131 |         data = json.load(fobj)
132 | 
133 |     database = data["database"]
134 |     taxonomy = data["taxonomy"]
135 |     version = data["version"]
136 |     
137 |     non_existing_videos = utils.crosscheck_videos(VIDEOPATH, JSONFILE)
138 | 
139 |     print "No of non-existing videos: %d" % len(non_existing_videos)
140 |     
141 |     train_vids_all = []
142 |     [train_vids_all.append(x) for x in database if database[x]['subset']==SUBSET]
143 |     
144 |     # Find list of available training videos
145 |     train_existing_vids = list(set(train_vids_all) - set(non_existing_videos))
146 |     
147 |     ###########################################################################
148 |     # Get categories information from the database (Train+Validation sets)
149 |     category = []
150 |     for x in database:
151 |         cc = []
152 |         for l in database[x]["annotations"]:
153 |             cc.append(l["label"])
154 |         category.extend(list(set(cc)))
155 |     category_count = collections.Counter(category)
156 | 
157 |     category_names = sorted(category_count.keys())
158 |     print "Total No of classes: %d" % len(category_names)
159 |     
160 |     #print category_names
161 | 
162 |     ###########################################################################
163 |     # We use the meta-information such as FPS, totalFrames and dimensions 
164 |     # in order to obtain a lower and upper bound for the frame sampling
165 |     # To write meta-information to a json file. Uncomment following 3 lines 
166 |     # to generate the json file.
167 |     meta_info = tm1.get_meta_info(VIDEOPATH, train_existing_vids)
168 |     with open("training_data_meta_info.json", "w") as fp:
169 |         json.dump(meta_info, fp)
170 |     
171 |     # Read the training videos meta_information from file. 
172 |     #with open("val_data_meta_info.json", "r") as fobj:
173 |     #    meta_info = json.load(fobj)
174 |     
175 |     ###########################################################################
176 |     
177 |     # Train models 
178 |     
179 |     n = 4000    # no of samples to extract for each category of training videos
180 |     #sample_activity_frames(database, meta_info, category_names, N=n)
181 |     
182 |     # Method 1: Train a series of SVMs on the training set videos
183 |     
184 |     # Uncomment below 3 lines for viewing frames selected
185 | #    with open("samples_"+str(n)+"/Applying sunscreen.json") as fp:
186 | #        samples_d = json.load(fp)
187 | #    tm1.display_sample_frames(samples_d, VIDEOPATH)
188 |     
189 |     # Method 2: Train a CNN from scratch on the consecutive frame OF 
190 |     # visualization images. 
191 |     #create_training_lmdb("samples_"+str(n), category_names)
192 |     print "LMDB Created !!"
193 |     
194 |     # Method 3: Use existing or third-party pre-trained models
195 |     # Features: C3D , MBH (Improved Dense Traj) , ImageNetShuffle
196 |     
197 |     # Extract 
198 |     
199 |     # FineTune models
200 |     
201 |     # Save the models to files


--------------------------------------------------------------------------------
/Evaluation/training_model_hog.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jun 16 22:56:22 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: Use frame HOG features
  9 | """
 10 | 
 11 | import json
 12 | import os
 13 | import utils
 14 | import numpy as np
 15 | import h5py
 16 | import pandas as pd
 17 | import collections
 18 | import cPickle
 19 | from sklearn import svm
 20 | from sklearn.ensemble import RandomForestClassifier
 21 | from joblib import Parallel, delayed
 22 | import lmdb
 23 | import caffe
 24 | import cv2
 25 | 
 26 | 
 27 | # Temporal Proposals : Pretrained
 28 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos'
 29 | #ANNOTATION_FILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 30 | #PROPOSALS_FILENAME = '/home/arpan/DATA_Drive/ActivityNet/extra_features/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 31 | #SHUFFLE = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 32 | #MBH = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_features.h5"
 33 | #MBH_IDS = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_quids.txt"
 34 | #C3D = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 35 | #C3D_PCA = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/PCA_activitynet_v1-3.hdf5"
 36 | #SHUFFLE_IDS = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 37 | #SUBSET = 'validation'
 38 | 
 39 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 40 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 41 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 42 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 43 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5"
 44 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 45 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5"
 46 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 47 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb"
 48 | HOGFILE = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/hog.xml"
 49 | SUBSET = 'validation'
 50 | 
 51 | def construct_dataset(meta_info, samples_csv, category_names, prefix):
 52 |     
 53 |     lmdb_name = os.path.join(LMDB_FOLDER, prefix+"_hog_lmdb")
 54 |     if not os.path.exists(os.path.dirname(lmdb_name)):
 55 |         os.makedirs(os.path.dirname(lmdb_name))
 56 |     
 57 |     samples_df = pd.read_csv(samples_csv)
 58 |     print "Creating HOG features..."
 59 |     
 60 |     # Create lmdb
 61 |     (H, W, C) = (1, 1, 2000)
 62 |     N = samples_df.shape[0]     # no of rows (=no of visualizations = 5k)
 63 |     # twice the size of total number of OF visualizations
 64 |     map_size = int(N*H*W*C*3*15)     # approx 429 GB
 65 |     #map_size = int(N*720*1280*C*2)     # approx 429 GB
 66 |     
 67 |     env = lmdb.open(lmdb_name, map_size=map_size)    
 68 |     i = 0   # LMDB index variable
 69 |     # iterate over the rows of the pandas dataframe
 70 |     end_samples = samples_df.shape[0]
 71 |     r = (end_samples - i)/200
 72 |     print "No of samples per class = %d " %r
 73 |     ###########################################################################
 74 |     nCat = 4*len(category_names)          # = 800 per batch
 75 |     nCat_samples = (end_samples - i)/nCat    # = N = 1000
 76 |     lmdb_id = 0
 77 |     
 78 |     # HOG returns a 9576 sized vector
 79 |     # Parallelizing the lmdb creation process
 80 |     for i in range(nCat_samples):
 81 |         
 82 |         result = Parallel(n_jobs=4)(delayed(get_hog_feature) \
 83 |                           (samples_df['video_id'][i*nCat+j], \
 84 |                            samples_df['position'][i*nCat+j])
 85 |                           for j in range(nCat))
 86 |         
 87 |         with env.begin(write = True) as txn:
 88 |             for l,vec in enumerate(result):
 89 |                 row_no = (i*nCat)+l
 90 |                 pos = samples_df['position'][row_no]
 91 |                 video_id = samples_df['video_id'][row_no]
 92 |                 lab = samples_df['label'][row_no]
 93 |                 print "idx : "+str(row_no)+" :: 'position' : "+str(pos)
 94 |                 
 95 |                 #img = np.rollaxis(img, 2)   # C, H, W
 96 |                 datum = caffe.proto.caffe_pb2.Datum()
 97 |                 # since it is a vector, it only has 1st dimension
 98 |                 #print "vec shape : {}" .format(vec.shape)
 99 |                 datum.channels = vec.shape[0]
100 |                 datum.height = 1
101 |                 datum.width = 1
102 |                 #datum.data = img.tobytes()
103 |                 datum.float_data.extend(vec.astype(float).flat)
104 |                 datum.label = lab
105 |                 str_id = '{:08}'.format(lmdb_id)
106 |                 # The encode is only essential in Python 3
107 |                 txn.put(str_id.encode('ascii'), datum.SerializeToString())
108 |                 lmdb_id += 1
109 |         print "Write No : %d" %(i+1)
110 |     print "LMDB construction successful !"
111 |     return
112 | 
113 | def get_hog_feature(vid, pos):
114 |     '''
115 |     Read the frame at 'pos' of video and find the hog feature for the frame
116 |     '''
117 |     height, width = 120, 160
118 |     cap = cv2.VideoCapture(os.path.join(VIDEOPATH, 'v_'+vid+'.mp4'))
119 |     if not cap.isOpened():
120 |         raise IOError('Capture object not opened !')
121 |     hog = cv2.HOGDescriptor("hog.xml")
122 |     cap.set(cv2.CAP_PROP_POS_FRAMES, pos)
123 |     ret, frame = cap.read()
124 |     while not ret:
125 |         print "Frame not read. Move backwards."
126 |         pos -= 1
127 |         cap.set(cv2.CAP_PROP_POS_FRAMES, pos)
128 |         ret, frame = cap.read()
129 |     
130 |     frame = cv2.resize(frame, (width, height))
131 |     frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
132 |     #cv2.imshow("frame", frame)
133 |     #waitTillEscPressed()
134 |     hist = hog.compute(frame)
135 |     cols = hist.shape[0]
136 |     #print "HOG Shape : {}" .format(hist.shape)
137 |     #print "Reshaped : {}" .format(hist.reshape((cols)).shape)
138 |     hist = hist.reshape((cols))
139 |     cap.release()
140 |     #cv2.destroyAllWindows()
141 |     return hist
142 |     
143 | def waitTillEscPressed():
144 |     while(True):
145 |         # For moving forward
146 |         if cv2.waitKey(10)==27:
147 |             print("Esc Pressed. Move Forward without labeling.")
148 |             return 1
149 | 
150 | 
151 | def predict_on_glFeat(X_val, nFeatures, database, category_names, val_existing_vids, \
152 |                       destPath, nEstimators):
153 |     
154 |     # Create a dataframe with rows as egs and cols as class 1 prob values
155 |     threshold = 0.5
156 |     X = X_val.loc[:,range(nFeatures)]
157 |     y_prob = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \
158 |                           columns=category_names, index=X_val.index)
159 |     for cat in category_names:
160 |         # load the model
161 |         f_name = os.path.join(destPath+"_"+str(nEstimators),\
162 |                               destPath+"_"+str(nEstimators)+"_"+cat+".pkl")
163 |         with open(f_name, "rb") as fid:
164 |             rf_model = cPickle.load(fid)
165 |         
166 |         # Assign positive class probabilities 
167 |         y_prob[cat] = rf_model.predict_proba(X)[:,1]
168 |         print "No. of examples above threshold for class {} : {}" \
169 |                         .format(cat, sum(y_prob[cat]>threshold))
170 |     
171 |     # Top 5 predictions
172 |     pred = {}
173 |     #y_prob.apply(np.argmax, axis=1)
174 |     for vid in list(X.index):
175 |         #print "ID : %s " %vid
176 |         # select top 3 prediction values and their labels and save in dict
177 |         top_n = y_prob.loc[vid,:].sort_values(ascending=False)[:3]
178 |         labels = top_n.index.tolist()
179 |         scores = top_n.values.tolist()
180 |         pred[vid] = []
181 |         for idx,score in enumerate(scores):
182 |             pred[vid].append({'score': score, 'label':labels[idx]})
183 |         
184 |     return pred, y_prob
185 | 
186 | 
187 | if __name__=='__main__':
188 | 
189 |     # Read the database, version and taxonomy from JSON file
190 |     with open(ANNOTATION_FILE, "r") as fobj:
191 |         data = json.load(fobj)
192 | 
193 |     database = data["database"]
194 |     taxonomy = data["taxonomy"]
195 |     version = data["version"]
196 |     
197 |     non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE)
198 | 
199 |     print "No of non-existing videos: %d" % len(non_existing_videos)
200 |     
201 |     train_vids_all = []
202 |     [train_vids_all.append(x) for x in database if database[x]['subset']=='training']
203 |     # Find list of available training videos
204 |     train_existing_vids = list(set(train_vids_all) - set(non_existing_videos))
205 |     
206 |     val_vids_all = []
207 |     [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET]
208 |     # Find list of available training videos
209 |     val_existing_vids = list(set(val_vids_all) - set(non_existing_videos))
210 |     
211 |     ###########################################################################
212 |     # Get categories information from the database (Train+Validation sets)
213 |     category = []
214 |     for x in database:
215 |         cc = []
216 |         for l in database[x]["annotations"]:
217 |             cc.append(l["label"])
218 |         category.extend(list(set(cc)))
219 |     category_count = collections.Counter(category)
220 | 
221 |     category_names = sorted(category_count.keys())
222 |     print "Total No of classes: %d" % len(category_names)
223 |     
224 |     #print category_names
225 |     ###########################################################################
226 |     # MBH and ImageNetShuffle Features in training_model_m2.py
227 |     ###########################################################################
228 |     # Create HOG feature dataset
229 |      
230 |     # Read the meta_info and sample_positions files
231 |     samples_csv = "tr_samples_4k.csv"
232 |     samples_val_csv = "val_samples_1k.csv"
233 |     with open("training_data_meta_info.json", "r") as fobj:
234 |         meta_info = json.load(fobj)
235 |     construct_dataset(meta_info, samples_csv, category_names, "test_train")
236 |     
237 |     with open("val_data_meta_info.json", "r") as fobj:
238 |         val_meta_info = json.load(fobj)
239 |     construct_dataset(val_meta_info, samples_val_csv, category_names, "test_val")
240 |     
241 |     # train a model without convolution layers, only fc layers should be there
242 |     
243 |     
244 |     ###########################################################################
245 |     # Consider Taxonomy of the classes
246 |     # Temporal Proposals
247 |     
248 |     ###########################################################################
249 |     
250 | #    out_dict = {'version':version}
251 | #    subset_video_ids = []
252 | #    ext_data_dict = {'used': True, 'details': \
253 | #                'C3D features.'}
254 | #    
255 | #    out_dict['results'] = pred
256 | #    out_dict['external_data'] = ext_data_dict
257 | #            
258 | #    json_filename = 'submission_t3_'+SUBSET+'.json'
259 | #    with open(json_filename, 'w') as fp:
260 | #        json.dump(out_dict, fp)
261 |     ###########################################################################
262 | 


--------------------------------------------------------------------------------
/Evaluation/training_model_m3.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Jun 14 20:53:14 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: Training Model : Method 2
  9 | Using CNNs for training. Pretrained models
 10 | """
 11 | import json
 12 | import os
 13 | import utils
 14 | import numpy as np
 15 | import h5py
 16 | import pandas as pd
 17 | import collections
 18 | import cPickle
 19 | from sklearn import svm
 20 | from sklearn.ensemble import RandomForestClassifier
 21 | from joblib import Parallel, delayed
 22 | 
 23 | 
 24 | # Temporal Proposals : Pretrained
 25 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 26 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 27 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 28 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 29 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5"
 30 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 31 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5"
 32 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 33 | SUBSET = 'validation'
 34 | 
 35 | 
 36 | def train_model_linSVM(X, y):
 37 |     # select the parameters, generate probabilities etc
 38 |     model = svm.LinearSVC()
 39 |     return model.fit(X, y)
 40 | 
 41 | 
 42 | def train_on_shuffle(database, category_names, train_vids_all, destPath="shuffle_RF"):
 43 |     
 44 |     # ImageNet shuffle features:
 45 |     # 19994 x 1024 features
 46 |     fobj = h5py.File(SHUFFLE, 'r')
 47 |     
 48 |     # shape is 19994 x 1024
 49 |     print "Shape : {}" .format(fobj['features'].shape)
 50 |     
 51 |     # As the videos are sorted, the index created will be the video_no
 52 |     # corresponding to the video row in h5 database.
 53 |     shuffle_ids = pd.read_csv(SHUFFLE_IDS, header='infer', \
 54 |                               names = ['id'], usecols = [2])
 55 |     
 56 |     sh_id = [s.split('_', 1)[-1] for s in shuffle_ids['id']]
 57 |     sh_id = [s.rsplit('.',1)[0] for s in sh_id]
 58 |     # Copy all the values to numpy array var
 59 |     X_all = fobj['features'][:]
 60 |     
 61 |     # join features with video_ids
 62 |     X_all = pd.DataFrame(X_all, index=sh_id)
 63 |     
 64 |     # subset rows for training and validation
 65 |     X_train = X_all.loc[train_vids_all]
 66 |     #X_val = X_all[X_all['id'].isin(val_vids_appended)]
 67 |     del X_all
 68 |     fobj.close()
 69 |     print "X_train = {} " .format(X_train.shape)
 70 |     y_train = pd.DataFrame(np.zeros((len(X_train), len(category_names))),\
 71 |                            columns=category_names, index=X_train.index)
 72 |     
 73 |     # Join the columns for each category
 74 |     X_train = pd.concat([X_train, y_train], axis = 1)
 75 | 
 76 |     #print X_train.head()
 77 |     # Iterate over the videos of X_train and X_val and set labels
 78 |     for vid in train_vids_all:
 79 |         for annotation in database[vid]['annotations']:
 80 |             X_train.at[vid, annotation['label']] = 1
 81 |     
 82 |     print "Labels set !"
 83 |     #print X_train.head()
 84 |     # Iterate over the categories and for each category, prepare the dataset
 85 |     for cat in category_names:
 86 |         # for a cat, find the video IDs which have labels
 87 |         pos_samples = X_train[X_train[cat]==1]
 88 |         pos_samples = pos_samples.loc[:, range(1024)+[cat]]
 89 |         # sample negative rows equal to the no of pos examples
 90 |         neg_samples = X_train[X_train[cat]==0].sample(n=len(pos_samples), \
 91 |                              random_state = 321)
 92 |         neg_samples = neg_samples.loc[:, range(1024)+[cat]]
 93 |         
 94 |         # join pos and negative samples and shuffle
 95 |         X = pd.concat([pos_samples, neg_samples])
 96 |         
 97 |         X = X.sample(frac=1, random_state=231)    # shuffle
 98 |         y = np.array(X[cat])
 99 |         X = X.loc[:,range(1024)]
100 |         
101 |         rf_model = train_model_rf(X, y, estimators=20, seed=123)
102 |         
103 |         if not os.path.exists(destPath):
104 |             os.makedirs(destPath)
105 |         f_name = os.path.join(destPath, destPath+"_"+cat+".pkl")
106 |         with open(f_name, "wb") as fid:
107 |             cPickle.dump(rf_model, fid)
108 |         print "Model saved for category : %s " %cat
109 | 
110 |     print "Models Trained and saved to files."
111 |     # this returns a list of 10 SVM
112 |     #result = Parallel(n_jobs=3)(delayed(train_model_rf)(X, y, seed) for seed in range(10))
113 |     
114 |     
115 | def train_model_rf(X, y, estimators, seed):
116 |     # select the parameters, generate probabilities etc
117 |     clf = RandomForestClassifier(n_estimators = estimators, random_state=seed)
118 |     clf = clf.fit(X, y)
119 |     return clf
120 | 
121 | def predict_on_shuffle(database, category_names, val_existing_vids, destPath="RF"):
122 |     # ImageNet shuffle features:
123 |     # 19994 x 1024 features
124 |     fobj = h5py.File(SHUFFLE, 'r')
125 |     # shape is 19994 x 1024
126 |     print "Shape : {}" .format(fobj['features'].shape)
127 |     
128 |     shuffle_ids = pd.read_csv(SHUFFLE_IDS, header='infer', \
129 |                               names = ['id'], usecols = [2])
130 |     
131 |     sh_id = [s.split('_', 1)[-1] for s in shuffle_ids['id']]
132 |     sh_id = [s.rsplit('.',1)[0] for s in sh_id]
133 |     # Copy all the values to numpy array var
134 |     X_all = fobj['features'][:]
135 |     
136 |     # join features with video_ids
137 |     X_all = pd.DataFrame(X_all, index=sh_id)
138 |     
139 |     # subset rows for validation set
140 |     X_val = X_all.loc[val_existing_vids]
141 |     #X_val = X_all[X_all['id'].isin(val_vids_appended)]
142 |     del X_all
143 |     fobj.close()
144 |     print "X_val = {} " .format( X_val.shape)
145 |     y_val = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \
146 |                          columns=category_names, index=X_val.index)
147 |     
148 |     # Join the columns for each category
149 |     X_val = pd.concat([X_val, y_val], axis = 1)
150 |     
151 |     for vid in val_existing_vids:
152 |         for annotation in database[vid]['annotations']:
153 |             X_val.at[vid, annotation['label']] = 1
154 |     
155 |     print "Labels set !"
156 |     # Create a dataframe with rows are classes and cols are 0 class and 1 class
157 |     # Probability values
158 |     #prob = pd.DataFrame
159 |     X = X_val.loc[:,range(1024)]
160 |     y_prob = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \
161 |                           columns=category_names, index=X_val.index)
162 |     for cat in category_names:
163 |         # load the model
164 |         f_name = os.path.join(destPath, destPath+"_"+cat+".pkl")
165 |         with open(f_name, "rb") as fid:
166 |             rf_model = cPickle.load(fid)
167 |         
168 |         # Assign positive class probabilities 
169 |         y_prob[cat] = rf_model.predict_proba(X)[:,1]
170 |         
171 |         print "Probabilities for class {} : {}" .format(cat,y_prob[cat])
172 |     
173 |     # Top 5 predictions
174 |     threshold = 0.5
175 |     pred = {}
176 |     #y_prob.apply(np.argmax, axis=1)
177 |     for vid in list(X.index):
178 |         #print "ID : %s " %vid
179 |         # select top 5 prediction values and their labels and save in dict
180 |         top_n = y_prob.loc[vid,:].sort_values(ascending=False)[:3]
181 |         labels = top_n.index.tolist()
182 |         scores = top_n.values.tolist()
183 |         pred[vid] = []
184 |         for idx,score in enumerate(scores):
185 |             pred[vid].append({'score': score, 'label':labels[idx]})
186 |         
187 |     return pred
188 |     
189 |         
190 | # for testing the functions
191 | if __name__=='__main__':
192 |     
193 |     # Read the database, version and taxonomy from JSON file
194 |     with open(ANNOTATION_FILE, "r") as fobj:
195 |         data = json.load(fobj)
196 | 
197 |     database = data["database"]
198 |     taxonomy = data["taxonomy"]
199 |     version = data["version"]
200 |     
201 |     non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE)
202 | 
203 |     print "No of non-existing videos: %d" % len(non_existing_videos)
204 |     
205 |     train_vids_all = []
206 |     [train_vids_all.append(x) for x in database if database[x]['subset']=='training']
207 |     # Find list of available training videos
208 |     train_existing_vids = list(set(train_vids_all) - set(non_existing_videos))
209 |     
210 |     val_vids_all = []
211 |     [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET]
212 |     # Find list of available training videos
213 |     val_existing_vids = list(set(val_vids_all) - set(non_existing_videos))
214 |     
215 |     ###########################################################################
216 |     # Get categories information from the database (Train+Validation sets)
217 |     category = []
218 |     for x in database:
219 |         cc = []
220 |         for l in database[x]["annotations"]:
221 |             cc.append(l["label"])
222 |         category.extend(list(set(cc)))
223 |     category_count = collections.Counter(category)
224 | 
225 |     category_names = sorted(category_count.keys())
226 |     print "Total No of classes: %d" % len(category_names)
227 |     
228 |     #print category_names
229 |     ###########################################################################
230 |     
231 |     
232 |     
233 |     # Temporal Proposals 
234 |     # Optimized for high recall
235 |     # 19994 x Mi proposals (For each video a number of proposals 
236 |     # each with a score in decreasing order)
237 | 
238 |     # MBH
239 |     # 19994 x 65536  features
240 |     # 
241 |     
242 |     # ImageNet Shuffle Features
243 |     #train_on_shuffle(database, category_names, train_vids_all, "RF")
244 |     
245 |     pred = predict_on_shuffle(database, category_names, val_existing_vids, "RF")
246 |     out_dict = {'version':version}
247 |     subset_video_ids = []
248 |     ext_data_dict = {'used': False, 'details': \
249 |                 'Describe the external data over here. If necessary for each prediction'}
250 |     
251 |     out_dict['results'] = pred
252 |     out_dict['external_data'] = ext_data_dict
253 |             
254 |     json_filename = 'submission_t3_'+SUBSET+'.json'
255 |     with open(json_filename, 'w') as fp:
256 |         json.dump(out_dict, fp)
257 |     # Step 1: Form the datasets
258 |     # To train 200 SVMs, each for an activity class.
259 |     # Use One Vs All SVM ( for not used LinearSVC, which is a multi-class classifier )
260 |     
261 | 
262 |     
263 |     
264 |     # training videos_info is in meta_info
265 |     # check whether a particular video is 


--------------------------------------------------------------------------------
/Evaluation/training_model_m4.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Fri Jun 16 22:56:22 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: Use  C3D features
  9 | """
 10 | 
 11 | import json
 12 | import os
 13 | import utils
 14 | import numpy as np
 15 | import h5py
 16 | import pandas as pd
 17 | import collections
 18 | import cPickle
 19 | from sklearn import svm
 20 | from sklearn.ensemble import RandomForestClassifier
 21 | from joblib import Parallel, delayed
 22 | import lmdb
 23 | import caffe
 24 | 
 25 | 
 26 | # Temporal Proposals : Pretrained
 27 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos'
 28 | #ANNOTATION_FILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 29 | #PROPOSALS_FILENAME = '/home/arpan/DATA_Drive/ActivityNet/extra_features/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 30 | #SHUFFLE = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 31 | #MBH = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_features.h5"
 32 | #MBH_IDS = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_quids.txt"
 33 | #C3D = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 34 | #C3D_PCA = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/PCA_activitynet_v1-3.hdf5"
 35 | #SHUFFLE_IDS = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 36 | #SUBSET = 'validation'
 37 | 
 38 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
 39 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json'
 40 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5'
 41 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5'
 42 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5"
 43 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5"
 44 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5"
 45 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt'
 46 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb"
 47 | SUBSET = 'validation'
 48 | 
 49 | def construct_dataset(meta_info, samples_csv, category_names, prefix):
 50 |     
 51 |     lmdb_name = os.path.join(LMDB_FOLDER, prefix+"_c3d_lmdb")
 52 |     if not os.path.exists(os.path.dirname(lmdb_name)):
 53 |         os.makedirs(os.path.dirname(lmdb_name))
 54 |     
 55 |     samples_df = pd.read_csv(samples_csv)
 56 |     print "Loading C3D features..."
 57 |     fc3d = h5py.File(C3D, 'r')
 58 |     fpca = h5py.File(C3D_PCA, 'r')
 59 |     
 60 |     # Create lmdb
 61 |     (H, W, C) = (1, 1, 500)
 62 |     N = samples_df.shape[0]     # no of rows (=no of visualizations = 5k)
 63 |     # twice the size of total number of OF visualizations
 64 |     map_size = int(N*H*W*C*3*15)     # approx 429 GB
 65 |     #map_size = int(N*720*1280*C*2)     # approx 429 GB
 66 |     
 67 |     env = lmdb.open(lmdb_name, map_size=map_size)
 68 |     
 69 |     i = 0   # LMDB index variable
 70 |     # iterate over the rows of the pandas dataframe
 71 |     end_samples = samples_df.shape[0]
 72 |     r = (end_samples - i)/200
 73 |     print "No of samples per class = %d " %r
 74 |     ###########################################################################
 75 |     nCat = 4*len(category_names)          # = 200
 76 |     nCat_samples = (end_samples - i)/nCat    # = N = 1000
 77 |     lmdb_id = 0
 78 |                    
 79 |     # Parallelizing the lmdb creation process
 80 |     for i in range(nCat_samples):
 81 |         
 82 |         result = Parallel(n_jobs=1)(delayed(get_c3d_feature) \
 83 |                           (fc3d, 'v_'+samples_df['video_id'][i*nCat+j], \
 84 |                            samples_df['position'][i*nCat+j], \
 85 |                         meta_info[samples_df['video_id'][i*nCat+j]]['fps']) \
 86 |                           for j in range(nCat))
 87 |         
 88 |         with env.begin(write = True) as txn:
 89 |             for l in range(len(result)):
 90 |                 row_no = (i*nCat)+l
 91 |                 pos = samples_df['position'][row_no]
 92 |                 video_id = samples_df['video_id'][row_no]
 93 |                 lab = samples_df['label'][row_no]
 94 |                 print "idx : "+str(row_no)+" :: 'position' : "+str(pos)
 95 |                 
 96 |                 for vec in result[l]:
 97 |                     #img = np.rollaxis(img, 2)   # C, H, W
 98 |                     datum = caffe.proto.caffe_pb2.Datum()
 99 |                     # since it is a vector, it only has 1st dimension
100 |                     datum.channels = vec.shape[0]
101 |                     datum.height = 1
102 |                     datum.width = 1
103 |                     #datum.data = img.tobytes()
104 |                     datum.float_data.extend(vec.astype(float).flat)
105 |                     datum.label = lab
106 |                     str_id = '{:08}'.format(lmdb_id)
107 |                     # The encode is only essential in Python 3
108 |                     txn.put(str_id.encode('ascii'), datum.SerializeToString())
109 |                     lmdb_id += 1
110 |         print "Write No : %d" %(i+1)
111 |     print "LMDB construction successful !"
112 |     fc3d.close()
113 |     fpca.close()
114 |     return
115 | 
116 | def get_c3d_feature(fc3d, vid, pos, vfps):
117 |     '''
118 |     Read the feature vector that is near the pos of video
119 |     c3d features are taken for every 8th frame
120 |     '''
121 |     vec = []
122 |     #print "vid : {} :: pos : {} :: vfps : {}" .format(vid, pos, vfps)
123 |     #print "Shape : {}" .format(fc3d[vid]['c3d_features'].shape)
124 |     row = int(pos/8)
125 |     while not row < fc3d[vid]['c3d_features'].shape[0]:
126 |         #print "Decrement by 1"
127 |         row -= 1
128 |     vec.append(fc3d[vid]['c3d_features'][row,:])
129 |     return vec
130 |     
131 | 
132 | def partition_dataset(feature, train_vids_all, val_existing_vids):
133 |     if feature == "C3D":
134 |         print "Loading C3D features..."
135 |         fc3d = h5py.File(C3D, 'r')
136 |         fpca = h5py.File(C3D_PCA, 'r')        
137 |     else:
138 |         raise IOError("Invalid first argument: "+feature)
139 | 
140 |     for vid in fobj.keys():
141 |         fc3d[vid]['c3d_features'][:]
142 |     # Too large, need >10GB memory, for MBH
143 |     X_all = fobj['features'][:]
144 |     X_all = pd.DataFrame(X_all , index=ids)
145 |     X_train = X_all.loc[train_vids_all]
146 |     X_val = X_all.loc[val_existing_vids]
147 |     del X_all
148 |     fobj.close()
149 |     print "X_train = {} " .format(X_train.shape)
150 |     nFeat = X_train.shape[1]
151 |     y_train = pd.DataFrame(np.zeros((len(X_train), len(category_names))),\
152 |                            columns=category_names, index=X_train.index)
153 |     y_val = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \
154 |                          columns=category_names, index=X_val.index)
155 |     
156 |     # Join the columns for each category
157 |     X_train = pd.concat([X_train, y_train], axis = 1)
158 |     X_val = pd.concat([X_val, y_val], axis = 1)
159 |     #print X_train.head()
160 |     # Iterate over the videos of X_train and X_val and set labels
161 |     for vid in train_vids_all:
162 |         for annotation in database[vid]['annotations']:
163 |             X_train.at[vid, annotation['label']] = 1
164 |     
165 |     print "Labels set for Training Set !"
166 | 
167 |     for vid in val_existing_vids:
168 |         for annotation in database[vid]['annotations']:
169 |             X_val.at[vid, annotation['label']] = 1
170 |     
171 |     print "Labels set for Validation Test !"
172 | 
173 |     return X_train, X_val, nFeat
174 | 
175 | 
176 | def train_on_glFeat(X_train, nFeatures, database, category_names, train_vids_all, \
177 |                     destPath, seed, nEstimators):
178 |     """Function to read the MBH features and train a classifier for each class.
179 |     Input: 
180 |     feature: "MBH" for training on MBH features and "SHUFFLE" for training on shuffle
181 |     database: read from JSON file
182 |     category_names: sorted list of class names
183 |     train_vids_all: list of video ids in the training set
184 |     nEstimators: no of trees for Random Forest
185 |     """
186 |     #print X_train.head()
187 |     	# Iterate over the categories and for each category, prepare the dataset
188 |     for cat in category_names:
189 |         # for a cat, find the video IDs which have labels
190 |         pos_samples = X_train[X_train[cat]==1]
191 |         pos_samples = pos_samples.loc[:, range(nFeatures)+[cat]]
192 |         # sample negative rows equal to the no of pos examples
193 |         neg_samples = X_train[X_train[cat]==0].sample(n=len(pos_samples), \
194 |                              random_state = 321)
195 |         neg_samples = neg_samples.loc[:, range(nFeatures)+[cat]]
196 |         
197 |         # join pos and negative samples
198 |         X = pd.concat([pos_samples, neg_samples])
199 |         
200 |         X = X.sample(frac=1, random_state=231)    # shuffle
201 |         y = np.array(X[cat])
202 |         X = X.loc[:,range(nFeatures)]
203 |         
204 |         rf_model = train_model_rf(X, y, estimators = nEstimators, seed=seed)
205 |         
206 |         if not os.path.exists(destPath+"_"+str(nEstimators)):
207 |             os.makedirs(destPath+"_"+str(nEstimators))
208 |         f_name = os.path.join(destPath+"_"+str(nEstimators), \
209 |                               destPath+"_"+str(nEstimators)+"_"+cat+".pkl")
210 |         with open(f_name, "wb") as fid:
211 |             cPickle.dump(rf_model, fid)
212 |         print "Model saved for category : %s " %cat
213 | 
214 |     print "Models Trained and saved to files."
215 |     # this returns a list of 10 SVM
216 |     #result = Parallel(n_jobs=3)(delayed(train_model_rf)(X, y, seed) for seed in range(10))
217 |     
218 | def train_model_rf(X, y, estimators, seed):
219 |     # select the parameters, generate probabilities etc
220 |     clf = RandomForestClassifier(n_estimators = estimators, random_state=seed)
221 |     clf = clf.fit(X, y)
222 |     return clf
223 | 
224 | def predict_on_glFeat(X_val, nFeatures, database, category_names, val_existing_vids, \
225 |                       destPath, nEstimators):
226 |     
227 |     # Create a dataframe with rows as egs and cols as class 1 prob values
228 |     threshold = 0.5
229 |     X = X_val.loc[:,range(nFeatures)]
230 |     y_prob = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \
231 |                           columns=category_names, index=X_val.index)
232 |     for cat in category_names:
233 |         # load the model
234 |         f_name = os.path.join(destPath+"_"+str(nEstimators),\
235 |                               destPath+"_"+str(nEstimators)+"_"+cat+".pkl")
236 |         with open(f_name, "rb") as fid:
237 |             rf_model = cPickle.load(fid)
238 |         
239 |         # Assign positive class probabilities 
240 |         y_prob[cat] = rf_model.predict_proba(X)[:,1]
241 |         print "No. of examples above threshold for class {} : {}" \
242 |                         .format(cat, sum(y_prob[cat]>threshold))
243 |     
244 |     # Top 5 predictions
245 |     pred = {}
246 |     #y_prob.apply(np.argmax, axis=1)
247 |     for vid in list(X.index):
248 |         #print "ID : %s " %vid
249 |         # select top 3 prediction values and their labels and save in dict
250 |         top_n = y_prob.loc[vid,:].sort_values(ascending=False)[:3]
251 |         labels = top_n.index.tolist()
252 |         scores = top_n.values.tolist()
253 |         pred[vid] = []
254 |         for idx,score in enumerate(scores):
255 |             pred[vid].append({'score': score, 'label':labels[idx]})
256 |         
257 |     return pred, y_prob
258 | 
259 | 
260 | def train_on_C3D(database, category_names, train_vids_all):
261 |     """Function to read the C3D features and train a model on them
262 |     """
263 | 
264 | 
265 | if __name__=='__main__':
266 | 
267 |     # Read the database, version and taxonomy from JSON file
268 |     with open(ANNOTATION_FILE, "r") as fobj:
269 |         data = json.load(fobj)
270 | 
271 |     database = data["database"]
272 |     taxonomy = data["taxonomy"]
273 |     version = data["version"]
274 |     
275 |     non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE)
276 | 
277 |     print "No of non-existing videos: %d" % len(non_existing_videos)
278 |     
279 |     train_vids_all = []
280 |     [train_vids_all.append(x) for x in database if database[x]['subset']=='training']
281 |     # Find list of available training videos
282 |     train_existing_vids = list(set(train_vids_all) - set(non_existing_videos))
283 |     
284 |     val_vids_all = []
285 |     [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET]
286 |     # Find list of available training videos
287 |     val_existing_vids = list(set(val_vids_all) - set(non_existing_videos))
288 |     
289 |     ###########################################################################
290 |     # Get categories information from the database (Train+Validation sets)
291 |     category = []
292 |     for x in database:
293 |         cc = []
294 |         for l in database[x]["annotations"]:
295 |             cc.append(l["label"])
296 |         category.extend(list(set(cc)))
297 |     category_count = collections.Counter(category)
298 | 
299 |     category_names = sorted(category_count.keys())
300 |     print "Total No of classes: %d" % len(category_names)
301 |     
302 |     #print category_names
303 |     ###########################################################################
304 |     # MBH and ImageNetShuffle Features in training_model_m2.py
305 |     ###########################################################################
306 |     # C3D features
307 |      
308 |     # Read the meta_info and sample_positions files
309 |     samples_csv = "tr_samples_10k.csv"
310 |     samples_val_csv = "val_samples_2500.csv"
311 |     with open("training_data_meta_info.json", "r") as fobj:
312 |         meta_info = json.load(fobj)
313 |     construct_dataset(meta_info, samples_csv, category_names, "train")
314 |     
315 |     with open("val_data_meta_info.json", "r") as fobj:
316 |         val_meta_info = json.load(fobj)
317 |     construct_dataset(val_meta_info, samples_val_csv, category_names, "val")
318 |     
319 |     # train a model without convolution layers, only fc layers should be there
320 |     
321 |     
322 |     ###########################################################################
323 |     # Consider Taxonomy of the classes
324 |     # Temporal Proposals
325 |     
326 |     ###########################################################################
327 |     
328 | #    out_dict = {'version':version}
329 | #    subset_video_ids = []
330 | #    ext_data_dict = {'used': True, 'details': \
331 | #                'C3D features.'}
332 | #    
333 | #    out_dict['results'] = pred
334 | #    out_dict['external_data'] = ext_data_dict
335 | #            
336 | #    json_filename = 'submission_t3_'+SUBSET+'.json'
337 | #    with open(json_filename, 'w') as fp:
338 | #        json.dump(out_dict, fp)
339 | 
340 |     
341 | 
342 | 


--------------------------------------------------------------------------------
/Evaluation/training_model_svm.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python2
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sun Jun 11 20:53:14 2017
  5 | 
  6 | @author: Arpan
  7 | 
  8 | Description: Training Models
  9 | 
 10 | """
 11 | import json
 12 | import os
 13 | import utils
 14 | import numpy as np
 15 | import cv2
 16 | 
 17 | def get_hog(srcVideo, start, end):
 18 |     return
 19 | 
 20 | # To train a single SVM which identifies one class, get  +ve samples frames
 21 | # and get same amount of -ve sample frames
 22 |     
 23 | def get_meta_info(video_path, existing_vids):
 24 |     """Add meta information of existing training videos to a dictionary and 
 25 |     write the dictionary to a file.
 26 |     
 27 |     Input: existing_vids: Videos Ids of the mp4 files. 
 28 |         Note that only the training video Ids should be sent here
 29 |     Return: dictionary containing the video_ids as keys and corresponding
 30 |         meta-info
 31 |     """
 32 |     meta_dict = {}
 33 |     # loop over the VideoIDs and get the meta information for each file
 34 |     print "Getting video meta-information..."
 35 |     for v in existing_vids:
 36 |         filePath = os.path.join(video_path, "v_"+v+".mp4")
 37 |         cap = cv2.VideoCapture(filePath)
 38 |         if not cap.isOpened():
 39 |             raise IOError("Capture object not opened ! Abort !")
 40 |             break
 41 |         fps = cap.get(cv2.CAP_PROP_FPS)
 42 |         # dimensions = (Ht, Wd)
 43 |         dimensions = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), \
 44 |                       int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
 45 |         no_of_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 46 |         meta_dict[v] = {"fps":fps, "dimensions":dimensions, \
 47 |                  "total_frames": no_of_frames}
 48 |         cap.release()
 49 | 
 50 |     return meta_dict
 51 |     
 52 | def get_training_segments(database, video_ids_for_cat, category):
 53 |     """
 54 |     Get training segments from the videos and form a dictionary
 55 |     Note: It applies for +ve examples as of now.
 56 |     """
 57 |     segments_dict = {}
 58 |     start, stop = 0, 0
 59 |     for v in video_ids_for_cat:
 60 |         # list of annotations on video
 61 |         annotations = database[v]["annotations"]
 62 |         for ann in annotations:
 63 |             if ann["label"] == category:
 64 |                 start, stop = ann["segment"]
 65 |                 if v in segments_dict:
 66 |                     segments_dict[v].append({"start": start, "stop": stop})
 67 |                 else:
 68 |                     segments_dict[v] = [{"start": start, "stop": stop}]
 69 |         # for a dictionary of segments, with key as video id and 
 70 |         # values as the list of start and stop times of +ve examples
 71 |     return segments_dict
 72 |     
 73 | 
 74 | def get_sample_frames(seg, meta_info, N):
 75 |     """ Get N sample frames from the defined video segments of the given 
 76 |     video_ids. 
 77 |     Input:
 78 |     seg: (Dictionary) Training segments for positive example videos for single
 79 |          category.
 80 |         {"FKQIdqjY9nI": [{'start': 12.73, 'stop': 22.23} ... ]}
 81 |     meta_info: dict for meta_info of all existing training videos
 82 |             {"FKQIdqjY9nI": {'total_frames': 1056, 
 83 |             'dimensions': (720, 1280), 'fps': 30.0} ...}
 84 |     N : Total number of samples to be extracted
 85 |     Output: 
 86 |         pos_samples: {"FKQIdqjY9nI": [ 234, 543], ...}
 87 |     """
 88 |     # Get total number of frames in all the segments across all videos
 89 |     # Get N samples from total number of frames
 90 |     # Map the generated integers backwards to the frame numbers of video segments
 91 |     # Get the video_id, frame number that needs to be sampled
 92 |     total_frames = 0
 93 |     # Iterate over all the segments of the videos containing actions
 94 |     video_ids = sorted(seg.keys())
 95 |     for v_id in video_ids:
 96 |         for segment in seg[v_id]:
 97 |             frames_in_seg = int((segment["stop"] - segment["start"])*meta_info[v_id]["fps"])
 98 |             total_frames += frames_in_seg
 99 |         
100 |     print "Total frames in all segments = %d " % total_frames
101 |     # Randomly (uniform) sample N values from 0 to total_frames-1
102 |     # Backwards mapping
103 |     import random
104 |     random.seed(231)
105 |     samp = sorted(random.sample(range(1, total_frames), N), reverse=True)
106 |     #print "Samples list !! "
107 |     #print samp
108 |     pos_samples = {}
109 |     frame_ptr_lower = 0
110 |     for v_id in video_ids:
111 |         for segment in seg[v_id]:
112 |             frames_in_seg = int((segment["stop"]-segment["start"])*meta_info[v_id]["fps"])
113 |             #print "v_id %s || Frames in seg : %d || lower : %d" %(v_id, frames_in_seg, frame_ptr_lower)
114 |             while len(samp)!=0 and (frame_ptr_lower<=samp[-1] \
115 |                      and samp[-1]<=(frame_ptr_lower+frames_in_seg)):
116 |                 samp_no = samp.pop()
117 |                 # Pop until the popped item is not in range
118 |                 # Get no of frames in video segment using video's FPS 
119 |                 # calculate position (Frame number) in the video and write to dict
120 |                 pos = int(segment["start"]*meta_info[v_id]["fps"])+(samp_no-frame_ptr_lower)
121 |                 #print "lower : %d || samp_no : %d || pos : %d " %(frame_ptr_lower, samp_no, pos)
122 |                 if v_id in pos_samples:
123 |                     pos_samples[v_id].append(pos)
124 |                 else:
125 |                     pos_samples[v_id] = [pos]
126 |             frame_ptr_lower += frames_in_seg
127 |             
128 |     #print "Samples information written to dictionary with size: %d" %len(pos_samples)
129 |     return pos_samples
130 |                
131 | def display_sample_frames(samples_dict, srcFolder):
132 |     """
133 |     Display the frames from the samples dictionaries of the categories
134 |     Input:
135 |         samples_dict: {"FKQIdqjY9nI": [ 234, 543], ...}
136 |         srcFolder : path containing the videos
137 |     """
138 |     # Loop over the videos and display the frames
139 |     
140 |     for v_id in samples_dict:
141 |         cap = cv2.VideoCapture(os.path.join(srcFolder, "v_"+v_id+".mp4"))
142 |         if not cap.isOpened():
143 |             raise IOError("Capture object not opened !")
144 |         pos_lst = samples_dict[v_id]
145 |         for pos in pos_lst:
146 |             cap.set(cv2.CAP_PROP_POS_FRAMES, pos)
147 |             ret, frame = cap.read()
148 |             cv2.imshow("Frame", frame)
149 |             waitTillEscPressed()
150 |         cap.release()
151 |     cv2.destroyAllWindows()
152 |     return
153 |     
154 | 
155 | def get_negative_frames(seg, meta_info, N, category):
156 |     """
157 |         Get N samples from videos that do not belong to the segments mentioned in
158 |         seg and are not of 'category'. 
159 |     """
160 |     return 
161 | 
162 | def train_svm(srcVideo, annotations, incr_rate, category_names):
163 |     cap = cv2.VideoCapture(srcVideo)
164 |     
165 |     if not cap.isOpened():
166 |         raise IOError("Video cannot be opened !")
167 |     
168 |     dimensions = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)))
169 |     fps = cap.get(cv2.CAP_PROP_FPS)
170 |     print "Dimensions : %s " % str(dimensions)
171 |     print "Frame Rate : %f " % fps
172 |     # Loop over the annotation dictionaries
173 |     for ann in annotations:
174 |         start_time, stop_time = ann['segment']
175 |         start = int(start_time*fps)
176 |         stop = int(stop_time*fps)
177 |         label = ann['label']
178 |         print "Action Label : %s" %label
179 |         while cap.isOpened() and start<stop:
180 |             cap.set(cv2.CAP_PROP_POS_FRAMES, start)
181 |             ret, frame = cap.read()
182 |             frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
183 |             if ret:
184 |                 #frame = cv2.resize(frame )
185 |                 cv2.imshow("Action", frame)
186 |                 print "Frame no : %d " % start
187 |                 waitTillEscPressed()
188 |                 start += incr_rate
189 |                 continue
190 |             else:
191 |                 break
192 |     cap.release()
193 |     cv2.destroyAllWindows()
194 |     return
195 | 
196 | def waitTillEscPressed():
197 |     while(True):
198 |         # For moving forward
199 |         if cv2.waitKey(10)==27:
200 |             print("Esc Pressed. Move Forward without labeling.")
201 |             return 1
202 | 
203 | # for testing the functions
204 | if __name__=='__main__':
205 |     VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos'
206 |     srcVideo = ''
207 |     #test_frame_rate(VIDEOPATH)
208 |     samps = {}
209 |     #samps['x_luDzL03vw'] = []


--------------------------------------------------------------------------------
/Evaluation/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import urllib2
  3 | import cv2
  4 | 
  5 | import numpy as np
  6 | 
  7 | API = 'http://ec2-52-11-11-89.us-west-2.compute.amazonaws.com/challenge17/api.py'
  8 | 
  9 | def get_blocked_videos(api=API):
 10 |     api_url = '{}?action=get_blocked'.format(api)
 11 |     req = urllib2.Request(api_url)
 12 |     response = urllib2.urlopen(req)
 13 |     return json.loads(response.read())
 14 | 
 15 | def interpolated_prec_rec(prec, rec):
 16 |     """Interpolated AP - VOCdevkit from VOC 2011.
 17 |     """
 18 |     mprec = np.hstack([[0], prec, [0]])
 19 |     mrec = np.hstack([[0], rec, [1]])
 20 |     for i in range(len(mprec) - 1)[::-1]:
 21 |         mprec[i] = max(mprec[i], mprec[i + 1])
 22 |     idx = np.where(mrec[1::] != mrec[0:-1])[0] + 1
 23 |     ap = np.sum((mrec[idx] - mrec[idx - 1]) * mprec[idx])
 24 |     return ap
 25 | 
 26 | def segment_iou(target_segment, candidate_segments):
 27 |     """Compute the temporal intersection over union between a
 28 |     target segment and all the test segments.
 29 | 
 30 |     Parameters
 31 |     ----------
 32 |     target_segment : 1d array
 33 |         Temporal target segment containing [starting, ending] times.
 34 |     candidate_segments : 2d array
 35 |         Temporal candidate segments containing N x [starting, ending] times.
 36 | 
 37 |     Outputs
 38 |     -------
 39 |     tiou : 1d array
 40 |         Temporal intersection over union score of the N's candidate segments.
 41 |     """
 42 |     tt1 = np.maximum(target_segment[0], candidate_segments[:, 0])
 43 |     tt2 = np.minimum(target_segment[1], candidate_segments[:, 1])
 44 |     # Intersection including Non-negative overlap score.
 45 |     segments_intersection = (tt2 - tt1).clip(0)
 46 |     # Segment union.
 47 |     segments_union = (candidate_segments[:, 1] - candidate_segments[:, 0]) \
 48 |       + (target_segment[1] - target_segment[0]) - segments_intersection
 49 |     # Compute overlap as the ratio of the intersection
 50 |     # over union of two segments.
 51 |     tIoU = segments_intersection.astype(float) / segments_union
 52 |     return tIoU
 53 | 
 54 | def wrapper_segment_iou(target_segments, candidate_segments):
 55 |     """Compute intersection over union btw segments
 56 |     Parameters
 57 |     ----------
 58 |     target_segments : ndarray
 59 |         2-dim array in format [m x 2:=[init, end]]
 60 |     candidate_segments : ndarray
 61 |         2-dim array in format [n x 2:=[init, end]]
 62 |     Outputs
 63 |     -------
 64 |     tiou : ndarray
 65 |         2-dim array [n x m] with IOU ratio.
 66 |     Note: It assumes that candidate-segments are more scarce that target-segments
 67 |     """
 68 |     if candidate_segments.ndim != 2 or target_segments.ndim != 2:
 69 |         raise ValueError('Dimension of arguments is incorrect')
 70 | 
 71 |     n, m = candidate_segments.shape[0], target_segments.shape[0]
 72 |     tiou = np.empty((n, m))
 73 |     for i in xrange(m):
 74 |         tiou[:, i] = segment_iou(target_segments[i,:], candidate_segments)
 75 | 
 76 |     return tiou
 77 | 
 78 | def get_video_number_of_frames(filename):
 79 |     cap = cv2.VideoCapture(filename, 0)
 80 |     if not cap.isOpened():
 81 |         print "Error reading the video file. Abort."
 82 |         exit(0)
 83 |     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 84 |     cap.release()
 85 |     return length
 86 | 
 87 | def get_videos_for_category(database, video_ids, category):
 88 |     """ 
 89 |     Find the list of video IDS which belong to a particular category. 
 90 |     Search among the given list of video_ids as key values
 91 |     Note that only the training set existing_video_ids are to be used here.
 92 |     """
 93 |     video_ids_for_cat = []
 94 |     
 95 |     for idx in video_ids:
 96 |         ann_lst = database[idx]['annotations']
 97 |         for label in ann_lst:       # Iterate over each dictionary of label
 98 |             if label['label']==category:
 99 |                 video_ids_for_cat.append(idx)
100 |                 break
101 |     return video_ids_for_cat
102 | 
103 | 
104 | def crosscheck_videos(video_path, ann_file):
105 |     """ Get the list of videos present in the video_path and find the non-existing
106 |     videos after reading all the videoIDs from the database json file.
107 |     Return the list of non-existing videos
108 |     """
109 |     # Get existing videos
110 |     import glob
111 |     import os
112 |     existing_vids = glob.glob("%s/*.mp4" % video_path)
113 |     for idx, vid in enumerate(existing_vids):
114 |         basename = os.path.basename(vid).split(".mp4")[0]
115 |         if len(basename) == 13:
116 |             existing_vids[idx] = basename[2:]
117 |         elif len(basename) == 11:
118 |             existing_vids[idx] = basename
119 |         else:
120 |             raise RuntimeError("Unknown filename format: %s", vid)
121 |     # Read an get video IDs from annotation file
122 |     with open(ann_file, "r") as fobj:
123 |         anet_v_1_0 = json.load(fobj)
124 |     all_vids = anet_v_1_0["database"].keys()
125 |     non_existing_videos = []
126 |     for vid in all_vids:
127 |         if vid in existing_vids:
128 |             continue
129 |         else:
130 |             non_existing_videos.append(vid)
131 |     return non_existing_videos


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 ActivityNet
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # My attempt at the ActivityNet Challenge 2017
 2 | This is my attempt at the [ActivityNet Challenge 2017](http://activity-net.org/challenges/2017/index.html). Thanks to the organizers for providing the boilerplate code and annotated datasets. [Link](https://github.com/activitynet/ActivityNet) to repository.
 3 | 
 4 | ## Overview of the challenge
 5 | The ActivityNet Challenge 2017 had 5 tasks for activity recognition from videos. The datasets of ActivityNet (having 200 classes) and Kinetics (having 400 classes) were used. For details about the tasks and metrics used, do visit their [website](http://activity-net.org/challenges/2017/index.html)
 6 | 
 7 | ## For executing the code.
 8 | * Define the paths in training.py for the meta-files and LMDBs to be saved. The path for the set videos must be correctly defined and it should contain all .mp4 files.
 9 | 
10 | * Define the parameters for the number of samples to be generated for training and validation. 
11 | 
12 | * training.py file creates the LMDB and meta-files and saves them to disk.
13 | 
14 | * The folder caffe-models contains the details of the FC-Networks and other models that were tried out.
15 | 
16 | * Different files named as training\_model
17 | 
18 | ### Major Requirements
19 | 1. Python 2.7 
20 | 
21 | 2. Caffe (with Python wrappers)
22 | 
23 | 3. OpenCV 3.2.0 (any version >2.4.0 will work. For 2.X version you may need to edit a few lines)
24 | 
25 | 4. GPU card + CUDA Tools
26 |  
27 | 


--------------------------------------------------------------------------------
/caffe_models/c3d_fc_net.prototxt:
--------------------------------------------------------------------------------
  1 | name: "C3DNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: false
 12 |     mean_file: "mean_c3d_4k.binaryproto"
 13 |     #scale: 0.00390625
 14 |   }
 15 |   data_param {
 16 |     source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/train_c3d_lmdb"
 17 |     batch_size: 64
 18 |     backend: LMDB
 19 |   }
 20 | }
 21 | layer {
 22 |   name: "data"
 23 |   type: "Data"
 24 |   top: "data"
 25 |   top: "label"
 26 |   include {
 27 |     phase: TEST
 28 |   }
 29 |   transform_param {
 30 |     mirror: false
 31 |     mean_file: "mean_c3d_4k.binaryproto"
 32 |   }
 33 |   data_param {
 34 |     source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/val_c3d_lmdb"
 35 |     batch_size: 50
 36 |     backend: LMDB
 37 |   }
 38 | }
 39 | layer {
 40 |   name: "fc1"
 41 |   type: "InnerProduct"
 42 |   bottom: "data"
 43 |   top: "fc1"
 44 |   param {
 45 |     lr_mult: 1
 46 |   }
 47 |   param {
 48 |     lr_mult: 2
 49 |     decay_mult: 0
 50 |   }
 51 |   inner_product_param {
 52 |     num_output: 1024
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1"
 63 |   type: "ReLU"
 64 |   bottom: "fc1"
 65 |   top: "fc1"
 66 | }
 67 | layer {
 68 |   name: "drop1"
 69 |   type: "Dropout"
 70 |   bottom: "fc1"
 71 |   top: "fc1"
 72 |   dropout_param {
 73 |     dropout_ratio: 0.5
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "fc2"
 78 |   type: "InnerProduct"
 79 |   bottom: "fc1"
 80 |   top: "fc2"
 81 |   param {
 82 |     lr_mult: 1
 83 |   }
 84 |   param {
 85 |     lr_mult: 2
 86 |   }
 87 |   inner_product_param {
 88 |     num_output: 1024
 89 |     weight_filler {
 90 |       type: "xavier"
 91 |     }
 92 |     bias_filler {
 93 |       type: "constant"
 94 |     }
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2"
 99 |   type: "ReLU"
100 |   bottom: "fc2"
101 |   top: "fc2"
102 | }
103 | layer {
104 |   name: "drop2"
105 |   type: "Dropout"
106 |   bottom: "fc2"
107 |   top: "fc2"
108 |   dropout_param {
109 |     dropout_ratio: 0.5
110 |   }
111 | }
112 | layer {
113 |   name: "fc3"
114 |   type: "InnerProduct"
115 |   bottom: "fc2"
116 |   top: "fc3"
117 |   param {
118 |     lr_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |   }
123 |   inner_product_param {
124 |     num_output: 200
125 |     weight_filler {
126 |       type: "xavier"
127 |     }
128 |     bias_filler {
129 |       type: "constant"
130 |     }
131 |   }
132 | }
133 | layer {
134 |   name: "accuracy"
135 |   type: "Accuracy"
136 |   bottom: "fc3"
137 |   bottom: "label"
138 |   top: "accuracy"
139 |   include {
140 |     phase: TEST
141 |   }
142 | }
143 | layer {
144 |   name: "loss"
145 |   type: "SoftmaxWithLoss"
146 |   bottom: "fc3"
147 |   bottom: "label"
148 |   top: "loss"
149 | }
150 | 


--------------------------------------------------------------------------------
/caffe_models/c3d_fc_net_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/c3d_fc_net.prototxt"
 2 | test_iter: 10000
 3 | test_interval: 10000
 4 | base_lr: 0.01
 5 | #base_lr: 0.0001
 6 | momentum: 0.9
 7 | #momentum2: 0.999
 8 | #lr_policy: "fixed"
 9 | lr_policy: "step"
10 | gamma: 0.1
11 | stepsize: 100000	# To change
12 | display: 500
13 | max_iter: 400000
14 | weight_decay: 0.0005
15 | snapshot: 50000		# To change
16 | snapshot_prefix: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_fc_net_snap"
17 | #type: "Adam"
18 | solver_mode: GPU
19 | 


--------------------------------------------------------------------------------
/caffe_models/deploy_OF_alexnet_mirror.prototxt:
--------------------------------------------------------------------------------
  1 | name: "OptFlowAlexNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 1 dim: 3 dim: 120 dim: 160 } }
  7 | 
  8 |   #transform_param {
  9 |   #  scale: 0.00390625
 10 |   #}
 11 | }
 12 | layer {
 13 |   name: "conv1"
 14 |   type: "Convolution"
 15 |   bottom: "data"
 16 |   top: "conv1"
 17 |   param {
 18 |     lr_mult: 1
 19 |   }
 20 |   param {
 21 |     lr_mult: 2
 22 |   }
 23 |   convolution_param {
 24 |     num_output: 64
 25 |     kernel_size: 7
 26 |     stride: 1
 27 |     weight_filler {
 28 |       type: "xavier"
 29 |       #std: 0.01
 30 |     }
 31 |     bias_filler {
 32 |       type: "constant"
 33 |       #value: 0
 34 |     }
 35 |   }
 36 | }
 37 | layer {
 38 |   name: "relu1"
 39 |   type: "ReLU"
 40 |   bottom: "conv1"
 41 |   top: "conv1"
 42 | }
 43 | #layer {
 44 | #  name: "norm1"
 45 | #  type: "LRN"
 46 | #  bottom: "conv1"
 47 | #  top: "norm1"
 48 | #  lrn_param {
 49 | #    local_size: 5
 50 | #    alpha: 0.0001
 51 | #    beta: 0.75
 52 | #  }
 53 | #}
 54 | layer {
 55 |   name: "pool1"
 56 |   type: "Pooling"
 57 |   bottom: "conv1"
 58 |   top: "pool1"
 59 |   pooling_param {
 60 |     pool: MAX
 61 |     kernel_size: 2
 62 |     stride: 2
 63 |   }
 64 | }
 65 | layer {
 66 |   name: "conv2"
 67 |   type: "Convolution"
 68 |   bottom: "pool1"
 69 |   top: "conv2"
 70 |   param {
 71 |     lr_mult: 1
 72 |   }
 73 |   param {
 74 |     lr_mult: 2
 75 |   }
 76 |   convolution_param {
 77 |     num_output: 128
 78 |     #pad: 2
 79 |     kernel_size: 3
 80 |     stride: 2
 81 |     #group: 2
 82 |     weight_filler {
 83 |       type: "xavier"
 84 |       #std: 0.01
 85 |     }
 86 |     bias_filler {
 87 |       type: "constant"
 88 |       #value: 0.1
 89 |     }
 90 |   }
 91 | }
 92 | layer {
 93 |   name: "relu2"
 94 |   type: "ReLU"
 95 |   bottom: "conv2"
 96 |   top: "conv2"
 97 | }
 98 | #layer {
 99 | #  name: "norm2"
100 | #  type: "LRN"
101 | #  bottom: "conv2"
102 | #  top: "norm2"
103 | #  lrn_param {
104 | #    local_size: 5
105 | #    alpha: 0.0001
106 | #    beta: 0.75
107 | #  }
108 | #}
109 | layer {
110 |   name: "pool2"
111 |   type: "Pooling"
112 |   bottom: "conv2"
113 |   top: "pool2"
114 |   pooling_param {
115 |     pool: MAX
116 |     kernel_size: 2
117 |     stride: 2
118 |   }
119 | }
120 | layer {
121 |   name: "conv3"
122 |   type: "Convolution"
123 |   bottom: "pool2"
124 |   top: "conv3"
125 |   param {
126 |     lr_mult: 1
127 |     #decay_mult: 1
128 |   }
129 |   param {
130 |     lr_mult: 2
131 |     #decay_mult: 0
132 |   }
133 |   convolution_param {
134 |     num_output: 192
135 |     #pad: 1
136 |     kernel_size: 3
137 |     stride: 1
138 |     weight_filler {
139 |       type: "xavier"
140 |       #std: 0.01
141 |     }
142 |     bias_filler {
143 |       type: "constant"
144 |       #value: 0
145 |     }
146 |   }
147 | }
148 | layer {
149 |   name: "relu3"
150 |   type: "ReLU"
151 |   bottom: "conv3"
152 |   top: "conv3"
153 | }
154 | layer {
155 |   name: "conv4"
156 |   type: "Convolution"
157 |   bottom: "conv3"
158 |   top: "conv4"
159 |   param {
160 |     lr_mult: 1
161 |     #decay_mult: 1
162 |   }
163 |   param {
164 |     lr_mult: 2
165 |     #decay_mult: 0
166 |   }
167 |   convolution_param {
168 |     num_output: 128
169 |     #pad: 1
170 |     kernel_size: 3
171 |     stride: 1
172 |     #group: 2
173 |     weight_filler {
174 |       type: "xavier"
175 |       #std: 0.01
176 |     }
177 |     bias_filler {
178 |       type: "constant"
179 |       #value: 0.1
180 |     }
181 |   }
182 | }
183 | layer {
184 |   name: "relu4"
185 |   type: "ReLU"
186 |   bottom: "conv4"
187 |   top: "conv4"
188 | }
189 | layer {
190 |   name: "conv5"
191 |   type: "Convolution"
192 |   bottom: "conv4"
193 |   top: "conv5"
194 |   param {
195 |     lr_mult: 1
196 |     #decay_mult: 1
197 |   }
198 |   param {
199 |     lr_mult: 2
200 |     #decay_mult: 0
201 |   }
202 |   convolution_param {
203 |     num_output: 128
204 |     #pad: 1
205 |     kernel_size: 3
206 |     stride: 1
207 |     #group: 2
208 |     weight_filler {
209 |       type: "xavier"
210 |       #std: 0.01
211 |     }
212 |     bias_filler {
213 |       type: "constant"
214 |       #value: 0.1
215 |     }
216 |   }
217 | }
218 | layer {
219 |   name: "relu5"
220 |   type: "ReLU"
221 |   bottom: "conv5"
222 |   top: "conv5"
223 | }
224 | layer {
225 |   name: "pool5"
226 |   type: "Pooling"
227 |   bottom: "conv5"
228 |   top: "pool5"
229 |   pooling_param {
230 |     pool: MAX
231 |     kernel_size: 3
232 |     stride: 1
233 |   }
234 | }
235 | layer {
236 |   name: "fc6"
237 |   type: "InnerProduct"
238 |   bottom: "pool5"
239 |   top: "fc6"
240 |   param {
241 |     lr_mult: 1
242 |     #decay_mult: 1
243 |   }
244 |   param {
245 |     lr_mult: 2
246 |     #decay_mult: 0
247 |   }
248 |   inner_product_param {
249 |     num_output: 128
250 |     weight_filler {
251 |       type: "xavier"
252 |       #std: 0.005
253 |     }
254 |     bias_filler {
255 |       type: "constant"
256 |       #value: 0.1
257 |     }
258 |   }
259 | }
260 | layer {
261 |   name: "relu6"
262 |   type: "ReLU"
263 |   bottom: "fc6"
264 |   top: "fc6"
265 | }
266 | layer {
267 |   name: "drop6"
268 |   type: "Dropout"
269 |   bottom: "fc6"
270 |   top: "fc6"
271 |   dropout_param {
272 |     dropout_ratio: 0.5
273 |   }
274 | }
275 | layer {
276 |   name: "fc7"
277 |   type: "InnerProduct"
278 |   bottom: "fc6"
279 |   top: "fc7"
280 |   param {
281 |     lr_mult: 1
282 |     #decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     #decay_mult: 0
287 |   }
288 |   inner_product_param {
289 |     num_output: 128
290 |     weight_filler {
291 |       type: "xavier"
292 |       #std: 0.005
293 |     }
294 |     bias_filler {
295 |       type: "constant"
296 |       #value: 0.1
297 |     }
298 |   }
299 | }
300 | layer {
301 |   name: "relu7"
302 |   type: "ReLU"
303 |   bottom: "fc7"
304 |   top: "fc7"
305 | }
306 | layer {
307 |   name: "drop7"
308 |   type: "Dropout"
309 |   bottom: "fc7"
310 |   top: "fc7"
311 |   dropout_param {
312 |     dropout_ratio: 0.5
313 |   }
314 | }
315 | layer {
316 |   name: "fc8"
317 |   type: "InnerProduct"
318 |   bottom: "fc7"
319 |   top: "fc8"
320 |   param {
321 |     lr_mult: 1
322 |     #decay_mult: 1
323 |   }
324 |   param {
325 |     lr_mult: 2
326 |     #decay_mult: 0
327 |   }
328 |   inner_product_param {
329 |     num_output: 6
330 |     weight_filler {
331 |       type: "xavier"
332 |       #std: 0.01
333 |     }
334 |     bias_filler {
335 |       type: "constant"
336 |       #value: 0
337 |     }
338 |   }
339 | }
340 | layer {
341 |   name: "prob"
342 |   type: "Softmax"
343 |   bottom: "fc8"
344 |   top: "prob"
345 | }
346 | 


--------------------------------------------------------------------------------
/caffe_models/deploy_c3d_fc_net.prototxt:
--------------------------------------------------------------------------------
  1 | name: "C3DNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 1 dim: 500 dim: 1 dim: 1 } }
  7 | }
  8 | layer {
  9 |   name: "fc1"
 10 |   type: "InnerProduct"
 11 |   bottom: "data"
 12 |   top: "fc1"
 13 |   param {
 14 |     lr_mult: 1
 15 |   }
 16 |   param {
 17 |     lr_mult: 2
 18 |     decay_mult: 0
 19 |   }
 20 |   inner_product_param {
 21 |     num_output: 1024
 22 |     weight_filler {
 23 |       type: "xavier"
 24 |     }
 25 |     bias_filler {
 26 |       type: "constant"
 27 |     }
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "fc1"
 34 |   top: "fc1"
 35 | }
 36 | layer {
 37 |   name: "drop1"
 38 |   type: "Dropout"
 39 |   bottom: "fc1"
 40 |   top: "fc1"
 41 |   dropout_param {
 42 |     dropout_ratio: 0.5
 43 |   }
 44 | }
 45 | layer {
 46 |   name: "fc2"
 47 |   type: "InnerProduct"
 48 |   bottom: "fc1"
 49 |   top: "fc2"
 50 |   param {
 51 |     lr_mult: 1
 52 |   }
 53 |   param {
 54 |     lr_mult: 2
 55 |   }
 56 |   inner_product_param {
 57 |     num_output: 1024
 58 |     weight_filler {
 59 |       type: "xavier"
 60 |     }
 61 |     bias_filler {
 62 |       type: "constant"
 63 |     }
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "relu2"
 68 |   type: "ReLU"
 69 |   bottom: "fc2"
 70 |   top: "fc2"
 71 | }
 72 | layer {
 73 |   name: "drop2"
 74 |   type: "Dropout"
 75 |   bottom: "fc2"
 76 |   top: "fc2"
 77 |   dropout_param {
 78 |     dropout_ratio: 0.5
 79 |   }
 80 | }
 81 | layer {
 82 |   name: "fc3"
 83 |   type: "InnerProduct"
 84 |   bottom: "fc2"
 85 |   top: "fc3"
 86 |   param {
 87 |     lr_mult: 1
 88 |   }
 89 |   param {
 90 |     lr_mult: 2
 91 |   }
 92 |   inner_product_param {
 93 |     num_output: 200
 94 |     weight_filler {
 95 |       type: "xavier"
 96 |     }
 97 |     bias_filler {
 98 |       type: "constant"
 99 |     }
100 |   }
101 | }
102 | layer {
103 |   name: "prob"
104 |   type: "Softmax"
105 |   bottom: "fc3"
106 |   top: "prob"
107 | }
108 | 


--------------------------------------------------------------------------------
/caffe_models/deploy_hog_fc_net.prototxt:
--------------------------------------------------------------------------------
  1 | name: "HOGNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Input"
  5 |   top: "data"
  6 |   input_param { shape: { dim: 1 dim: 9576 dim: 1 dim: 1 } }
  7 | }
  8 | layer {
  9 |   name: "fc1"
 10 |   type: "InnerProduct"
 11 |   bottom: "data"
 12 |   top: "fc1"
 13 |   param {
 14 |     lr_mult: 1
 15 |   }
 16 |   param {
 17 |     lr_mult: 2
 18 |     decay_mult: 0
 19 |   }
 20 |   inner_product_param {
 21 |     num_output: 1024
 22 |     weight_filler {
 23 |       type: "xavier"
 24 |     }
 25 |     bias_filler {
 26 |       type: "constant"
 27 |     }
 28 |   }
 29 | }
 30 | layer {
 31 |   name: "relu1"
 32 |   type: "ReLU"
 33 |   bottom: "fc1"
 34 |   top: "fc1"
 35 | }
 36 | layer {
 37 |   name: "drop1"
 38 |   type: "Dropout"
 39 |   bottom: "fc1"
 40 |   top: "fc1"
 41 |   dropout_param {
 42 |     dropout_ratio: 0.5
 43 |   }
 44 | }
 45 | layer {
 46 |   name: "fc2"
 47 |   type: "InnerProduct"
 48 |   bottom: "fc1"
 49 |   top: "fc2"
 50 |   param {
 51 |     lr_mult: 1
 52 |   }
 53 |   param {
 54 |     lr_mult: 2
 55 |   }
 56 |   inner_product_param {
 57 |     num_output: 1024
 58 |     weight_filler {
 59 |       type: "xavier"
 60 |     }
 61 |     bias_filler {
 62 |       type: "constant"
 63 |     }
 64 |   }
 65 | }
 66 | layer {
 67 |   name: "relu2"
 68 |   type: "ReLU"
 69 |   bottom: "fc2"
 70 |   top: "fc2"
 71 | }
 72 | layer {
 73 |   name: "drop2"
 74 |   type: "Dropout"
 75 |   bottom: "fc2"
 76 |   top: "fc2"
 77 |   dropout_param {
 78 |     dropout_ratio: 0.5
 79 |   }
 80 | }
 81 | layer {
 82 |   name: "fc3"
 83 |   type: "InnerProduct"
 84 |   bottom: "fc2"
 85 |   top: "fc3"
 86 |   param {
 87 |     lr_mult: 1
 88 |   }
 89 |   param {
 90 |     lr_mult: 2
 91 |   }
 92 |   inner_product_param {
 93 |     num_output: 200
 94 |     weight_filler {
 95 |       type: "xavier"
 96 |     }
 97 |     bias_filler {
 98 |       type: "constant"
 99 |     }
100 |   }
101 | }
102 | layer {
103 |   name: "prob"
104 |   type: "Softmax"
105 |   bottom: "fc3"
106 |   top: "prob"
107 | }
108 | 


--------------------------------------------------------------------------------
/caffe_models/hog_fc_net.prototxt:
--------------------------------------------------------------------------------
  1 | name: "HOGNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: false
 12 |     mean_file: "mean_hog_4k.binaryproto"
 13 |     #scale: 0.00390625
 14 |   }
 15 |   data_param {
 16 |     source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/train_hog_lmdb"
 17 |     batch_size: 64
 18 |     backend: LMDB
 19 |   }
 20 | }
 21 | layer {
 22 |   name: "data"
 23 |   type: "Data"
 24 |   top: "data"
 25 |   top: "label"
 26 |   include {
 27 |     phase: TEST
 28 |   }
 29 |   transform_param {
 30 |     mirror: false
 31 |     mean_file: "mean_hog_4k.binaryproto"
 32 |   }
 33 |   data_param {
 34 |     source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/val_hog_lmdb"
 35 |     batch_size: 50
 36 |     backend: LMDB
 37 |   }
 38 | }
 39 | layer {
 40 |   name: "fc1"
 41 |   type: "InnerProduct"
 42 |   bottom: "data"
 43 |   top: "fc1"
 44 |   param {
 45 |     lr_mult: 1
 46 |   }
 47 |   param {
 48 |     lr_mult: 2
 49 |     decay_mult: 0
 50 |   }
 51 |   inner_product_param {
 52 |     num_output: 4096
 53 |     weight_filler {
 54 |       type: "xavier"
 55 |     }
 56 |     bias_filler {
 57 |       type: "constant"
 58 |     }
 59 |   }
 60 | }
 61 | layer {
 62 |   name: "relu1"
 63 |   type: "ReLU"
 64 |   bottom: "fc1"
 65 |   top: "fc1"
 66 | }
 67 | layer {
 68 |   name: "drop1"
 69 |   type: "Dropout"
 70 |   bottom: "fc1"
 71 |   top: "fc1"
 72 |   dropout_param {
 73 |     dropout_ratio: 0.5
 74 |   }
 75 | }
 76 | layer {
 77 |   name: "fc2"
 78 |   type: "InnerProduct"
 79 |   bottom: "fc1"
 80 |   top: "fc2"
 81 |   param {
 82 |     lr_mult: 1
 83 |   }
 84 |   param {
 85 |     lr_mult: 2
 86 |   }
 87 |   inner_product_param {
 88 |     num_output: 4096
 89 |     weight_filler {
 90 |       type: "xavier"
 91 |     }
 92 |     bias_filler {
 93 |       type: "constant"
 94 |     }
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "relu2"
 99 |   type: "ReLU"
100 |   bottom: "fc2"
101 |   top: "fc2"
102 | }
103 | layer {
104 |   name: "drop2"
105 |   type: "Dropout"
106 |   bottom: "fc2"
107 |   top: "fc2"
108 |   dropout_param {
109 |     dropout_ratio: 0.5
110 |   }
111 | }
112 | layer {
113 |   name: "fc3"
114 |   type: "InnerProduct"
115 |   bottom: "fc2"
116 |   top: "fc3"
117 |   param {
118 |     lr_mult: 1
119 |   }
120 |   param {
121 |     lr_mult: 2
122 |   }
123 |   inner_product_param {
124 |     num_output: 200
125 |     weight_filler {
126 |       type: "xavier"
127 |     }
128 |     bias_filler {
129 |       type: "constant"
130 |     }
131 |   }
132 | }
133 | layer {
134 |   name: "accuracy"
135 |   type: "Accuracy"
136 |   bottom: "fc3"
137 |   bottom: "label"
138 |   top: "accuracy"
139 |   include {
140 |     phase: TEST
141 |   }
142 | }
143 | layer {
144 |   name: "loss"
145 |   type: "SoftmaxWithLoss"
146 |   bottom: "fc3"
147 |   bottom: "label"
148 |   top: "loss"
149 | }
150 | 


--------------------------------------------------------------------------------
/caffe_models/hog_fc_net_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/hog_fc_net.prototxt"
 2 | test_iter: 4000
 3 | test_interval: 10000
 4 | base_lr: 0.01
 5 | #base_lr: 0.0001
 6 | momentum: 0.9
 7 | #momentum2: 0.999
 8 | #lr_policy: "fixed"
 9 | lr_policy: "step"
10 | gamma: 0.1
11 | stepsize: 100000	# To change
12 | display: 500
13 | max_iter: 400000
14 | weight_decay: 0.0005
15 | snapshot: 50000		# To change
16 | snapshot_prefix: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/hog_fc_net_snap"
17 | #type: "Adam"
18 | solver_mode: GPU
19 | 


--------------------------------------------------------------------------------
/caffe_models/mean_c3d.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_c3d.binaryproto


--------------------------------------------------------------------------------
/caffe_models/mean_c3d_10k.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_c3d_10k.binaryproto


--------------------------------------------------------------------------------
/caffe_models/mean_c3d_4k.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_c3d_4k.binaryproto


--------------------------------------------------------------------------------
/caffe_models/mean_hog_4k.binaryproto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_hog_4k.binaryproto


--------------------------------------------------------------------------------
/caffe_models/optical_flow_alexnet_mirror.prototxt:
--------------------------------------------------------------------------------
  1 | name: "OptFlowAlexNet"
  2 | layer {
  3 |   name: "data"
  4 |   type: "Data"
  5 |   top: "data"
  6 |   top: "label"
  7 |   include {
  8 |     phase: TRAIN
  9 |   }
 10 |   transform_param {
 11 |     mirror: true
 12 |     #crop_size: 227
 13 |     mean_file: "mean_image.binaryproto"
 14 |     #scale: 0.00390625
 15 |   }
 16 |   data_param {
 17 |     source: "/home/arpan/VisionWorkspace/ActivityNet/train_OF_lmdb"
 18 |     batch_size: 64
 19 |     backend: LMDB
 20 |   }
 21 | }
 22 | layer {
 23 |   name: "data"
 24 |   type: "Data"
 25 |   top: "data"
 26 |   top: "label"
 27 |   include {
 28 |     phase: TEST
 29 |   }
 30 |   transform_param {
 31 |     mirror: false
 32 |     #crop_size: 227
 33 |     mean_file: "mean_image.binaryproto"
 34 |     #scale: 0.00390625
 35 |   }
 36 |   data_param {
 37 |     source: "/home/arpan/VisionWorkspace/ActivityNet/val_OF_lmdb"
 38 |     batch_size: 50
 39 |     backend: LMDB
 40 |   }
 41 | }
 42 | layer {
 43 |   name: "conv1"
 44 |   type: "Convolution"
 45 |   bottom: "data"
 46 |   top: "conv1"
 47 |   param {
 48 |     lr_mult: 1
 49 |     decay_mult: 1
 50 |   }
 51 |   param {
 52 |     lr_mult: 2
 53 |     decay_mult: 0
 54 |   }
 55 |   convolution_param {
 56 |     num_output: 64
 57 |     kernel_size: 7
 58 |     stride: 1
 59 |     weight_filler {
 60 |       type: "gaussian"
 61 |       std: 0.01
 62 |     }
 63 |     bias_filler {
 64 |       type: "constant"
 65 |       value: 0
 66 |     }
 67 |   }
 68 | }
 69 | layer {
 70 |   name: "relu1"
 71 |   type: "ReLU"
 72 |   bottom: "conv1"
 73 |   top: "conv1"
 74 | }
 75 | #layer {
 76 | #  name: "norm1"
 77 | #  type: "LRN"
 78 | #  bottom: "conv1"
 79 | #  top: "norm1"
 80 | #  lrn_param {
 81 | #    local_size: 5
 82 | #    alpha: 0.0001
 83 | #    beta: 0.75
 84 | #  }
 85 | #}
 86 | layer {
 87 |   name: "pool1"
 88 |   type: "Pooling"
 89 |   bottom: "conv1"
 90 |   top: "pool1"
 91 |   pooling_param {
 92 |     pool: MAX
 93 |     kernel_size: 2
 94 |     stride: 2
 95 |   }
 96 | }
 97 | layer {
 98 |   name: "conv2"
 99 |   type: "Convolution"
100 |   bottom: "pool1"
101 |   top: "conv2"
102 |   param {
103 |     lr_mult: 1
104 |     decay_mult: 1
105 |   }
106 |   param {
107 |     lr_mult: 2
108 |     decay_mult: 0
109 |   }
110 |   convolution_param {
111 |     num_output: 128
112 |     #pad: 2
113 |     kernel_size: 3
114 |     stride: 2
115 |     #group: 2
116 |     weight_filler {
117 |       #type: "xavier"
118 |       #std: 0.01
119 |       type: "gaussian"
120 |       std: 0.01
121 |     }
122 |     bias_filler {
123 |       type: "constant"
124 |       value: 1
125 |     }
126 |   }
127 | }
128 | layer {
129 |   name: "relu2"
130 |   type: "ReLU"
131 |   bottom: "conv2"
132 |   top: "conv2"
133 | }
134 | #layer {
135 | #  name: "norm2"
136 | #  type: "LRN"
137 | #  bottom: "conv2"
138 | #  top: "norm2"
139 | #  lrn_param {
140 | #    local_size: 5
141 | #    alpha: 0.0001
142 | #    beta: 0.75
143 | #  }
144 | #}
145 | layer {
146 |   name: "pool2"
147 |   type: "Pooling"
148 |   bottom: "conv2"
149 |   top: "pool2"
150 |   pooling_param {
151 |     pool: MAX
152 |     kernel_size: 2
153 |     stride: 2
154 |   }
155 | }
156 | layer {
157 |   name: "conv3"
158 |   type: "Convolution"
159 |   bottom: "pool2"
160 |   top: "conv3"
161 |   param {
162 |     lr_mult: 1
163 |     decay_mult: 1
164 |   }
165 |   param {
166 |     lr_mult: 2
167 |     decay_mult: 0
168 |   }
169 |   convolution_param {
170 |     num_output: 192
171 |     #pad: 1
172 |     kernel_size: 3
173 |     stride: 1
174 |     weight_filler {
175 |       #type: "xavier"
176 |       #std: 0.01
177 |       type: "gaussian"
178 |       std: 0.01
179 |     }
180 |     bias_filler {
181 |       type: "constant"
182 |       value: 0
183 |     }
184 |   }
185 | }
186 | layer {
187 |   name: "relu3"
188 |   type: "ReLU"
189 |   bottom: "conv3"
190 |   top: "conv3"
191 | }
192 | layer {
193 |   name: "conv4"
194 |   type: "Convolution"
195 |   bottom: "conv3"
196 |   top: "conv4"
197 |   param {
198 |     lr_mult: 1
199 |     decay_mult: 1
200 |   }
201 |   param {
202 |     lr_mult: 2
203 |     decay_mult: 0
204 |   }
205 |   convolution_param {
206 |     num_output: 128
207 |     #pad: 1
208 |     kernel_size: 3
209 |     stride: 1
210 |     #group: 2
211 |     weight_filler {
212 |       #type: "xavier"
213 |       type: "gaussian"
214 |       std: 0.01
215 |     }
216 |     bias_filler {
217 |       type: "constant"
218 |       value: 1
219 |     }
220 |   }
221 | }
222 | layer {
223 |   name: "relu4"
224 |   type: "ReLU"
225 |   bottom: "conv4"
226 |   top: "conv4"
227 | }
228 | layer {
229 |   name: "conv5"
230 |   type: "Convolution"
231 |   bottom: "conv4"
232 |   top: "conv5"
233 |   param {
234 |     lr_mult: 1
235 |     decay_mult: 1
236 |   }
237 |   param {
238 |     lr_mult: 2
239 |     decay_mult: 0
240 |   }
241 |   convolution_param {
242 |     num_output: 128
243 |     #pad: 1
244 |     kernel_size: 3
245 |     stride: 1
246 |     #group: 2
247 |     weight_filler {
248 |       #type: "xavier"
249 |       type: "gaussian"
250 |       std: 0.01
251 |     }
252 |     bias_filler {
253 |       type: "constant"
254 |       value: 1
255 |     }
256 |   }
257 | }
258 | layer {
259 |   name: "relu5"
260 |   type: "ReLU"
261 |   bottom: "conv5"
262 |   top: "conv5"
263 | }
264 | layer {
265 |   name: "pool5"
266 |   type: "Pooling"
267 |   bottom: "conv5"
268 |   top: "pool5"
269 |   pooling_param {
270 |     pool: MAX
271 |     kernel_size: 3
272 |     stride: 1
273 |   }
274 | }
275 | layer {
276 |   name: "fc6"
277 |   type: "InnerProduct"
278 |   bottom: "pool5"
279 |   top: "fc6"
280 |   param {
281 |     lr_mult: 1
282 |     decay_mult: 1
283 |   }
284 |   param {
285 |     lr_mult: 2
286 |     decay_mult: 0
287 |   }
288 |   inner_product_param {
289 |     num_output: 512
290 |     weight_filler {
291 |       #type: "xavier"
292 | 	type: "gaussian"
293 |       std: 0.005
294 |     }
295 |     bias_filler {
296 |       type: "constant"
297 |       value: 1
298 |     }
299 |   }
300 | }
301 | layer {
302 |   name: "relu6"
303 |   type: "ReLU"
304 |   bottom: "fc6"
305 |   top: "fc6"
306 | }
307 | layer {
308 |   name: "drop6"
309 |   type: "Dropout"
310 |   bottom: "fc6"
311 |   top: "fc6"
312 |   dropout_param {
313 |     dropout_ratio: 0.5
314 |   }
315 | }
316 | layer {
317 |   name: "fc7"
318 |   type: "InnerProduct"
319 |   bottom: "fc6"
320 |   top: "fc7"
321 |   param {
322 |     lr_mult: 1
323 |     decay_mult: 1
324 |   }
325 |   param {
326 |     lr_mult: 2
327 |     decay_mult: 0
328 |   }
329 |   inner_product_param {
330 |     num_output: 512
331 |     weight_filler {
332 |       type: "gaussian"
333 |       std: 0.005
334 |     }
335 |     bias_filler {
336 |       type: "constant"
337 |       value: 1
338 |     }
339 |   }
340 | }
341 | layer {
342 |   name: "relu7"
343 |   type: "ReLU"
344 |   bottom: "fc7"
345 |   top: "fc7"
346 | }
347 | layer {
348 |   name: "drop7"
349 |   type: "Dropout"
350 |   bottom: "fc7"
351 |   top: "fc7"
352 |   dropout_param {
353 |     dropout_ratio: 0.5
354 |   }
355 | }
356 | layer {
357 |   name: "fc8"
358 |   type: "InnerProduct"
359 |   bottom: "fc7"
360 |   top: "fc8"
361 |   param {
362 |     lr_mult: 1
363 |     decay_mult: 1
364 |   }
365 |   param {
366 |     lr_mult: 2
367 |     decay_mult: 0
368 |   }
369 |   inner_product_param {
370 |     num_output: 200
371 |     weight_filler {
372 |       type: "gaussian"
373 |       std: 0.01
374 |     }
375 |     bias_filler {
376 |       type: "constant"
377 |       value: 0
378 |     }
379 |   }
380 | }
381 | layer {
382 |   name: "accuracy"
383 |   type: "Accuracy"
384 |   bottom: "fc8"
385 |   bottom: "label"
386 |   top: "accuracy"
387 |   include {
388 |     phase: TEST
389 |   }
390 | }
391 | layer {
392 |   name: "loss"
393 |   type: "SoftmaxWithLoss"
394 |   bottom: "fc8"
395 |   bottom: "label"
396 |   top: "loss"
397 | }
398 | 


--------------------------------------------------------------------------------
/caffe_models/optical_flow_alexnet_mirror_solver.prototxt:
--------------------------------------------------------------------------------
 1 | net: "/home/arpan/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/optical_flow_alexnet_mirror.prototxt"
 2 | test_iter: 2000
 3 | test_interval: 1000
 4 | base_lr: 0.01
 5 | lr_policy: "step"
 6 | gamma: 0.1
 7 | stepsize: 25000	# To change
 8 | display: 100
 9 | max_iter: 100000
10 | momentum: 0.9
11 | weight_decay: 0.0005
12 | snapshot: 25000		# To change
13 | snapshot_prefix: "/home/arpan/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/OF_alexnet_mirror_snap"
14 | solver_mode: GPU
15 | 


--------------------------------------------------------------------------------
/caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.caffemodel


--------------------------------------------------------------------------------
/caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.solverstate:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.solverstate


--------------------------------------------------------------------------------
/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel


--------------------------------------------------------------------------------
/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.solverstate:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.solverstate


--------------------------------------------------------------------------------
/caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.caffemodel:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.caffemodel


--------------------------------------------------------------------------------
/caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.solverstate:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.solverstate


--------------------------------------------------------------------------------