├── .gitignore ├── Crawler ├── Kinetics │ ├── LICENSE │ ├── README.md │ ├── data │ │ ├── kinetics_train.csv │ │ └── kinetics_val.csv │ ├── download.py │ ├── environment.yml │ └── process_download_report.py ├── README ├── activity_net.v1-3.min.json ├── command_list.txt ├── fetch_activitynet_videos.sh └── run_crosscheck.py ├── Evaluation ├── README.md ├── check_lmdb.py ├── eval_classification.py ├── eval_detection.py ├── eval_kinetics.py ├── eval_proposal.py ├── frame_prediction.py ├── frame_prediction_BG.py ├── get_classification_performance.py ├── get_detection_performance.py ├── get_kinetics_performance.py ├── get_proposal_performance.py ├── hog.xml ├── localization.py ├── optical_flow.py ├── taxonomy.py ├── test_data_meta_info.json ├── testing.py ├── training.py ├── training_data_meta_info.json ├── training_model_hog.py ├── training_model_m2.py ├── training_model_m3.py ├── training_model_m4.py ├── training_model_svm.py ├── utils.py ├── val_data_meta_info.json └── vid_probs.csv ├── LICENSE ├── Notebooks ├── ActivityNet-Release1.2-Classification.ipynb ├── ActivityNet-Release1.2-Detection.ipynb ├── ActivityNet-Release1.3.Proposals.ipynb └── ActivityNet-Temporal-Proposals.ipynb ├── README.md └── caffe_models ├── c3d_fc_net.prototxt ├── c3d_fc_net_solver.prototxt ├── deploy_OF_alexnet_mirror.prototxt ├── deploy_c3d_fc_net.prototxt ├── deploy_hog_fc_net.prototxt ├── frames_alexMir_step_80k.log ├── hog_fc_net.prototxt ├── hog_fc_net_solver.prototxt ├── mean_c3d.binaryproto ├── mean_c3d_10k.binaryproto ├── mean_c3d_4k.binaryproto ├── mean_hog_4k.binaryproto ├── optical_flow_alexnet_mirror.prototxt ├── optical_flow_alexnet_mirror_solver.prototxt └── snapshots ├── c3d_10k_2500_adam_e4 ├── c3d_fc_net_snap_iter_400000.caffemodel ├── c3d_fc_net_snap_iter_400000.solverstate └── c3d_train_10k_adam_1e-4.log ├── c3d_4k_1k ├── c3d_fc_net_snap_iter_400000.caffemodel ├── c3d_fc_net_snap_iter_400000.solverstate └── c3d_train.log ├── c3d_4k_1k_adam_e4 ├── c3d_fc_net_snap_iter_200000.caffemodel ├── c3d_fc_net_snap_iter_200000.solverstate └── c3d_train_adam_1e-4.log └── c3d_train_adam_1e-3.log /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.mp4 3 | *.jpeg 4 | *.pkl 5 | trained_models/ 6 | Crawler/videos/ 7 | Evaluation/samples_*/ 8 | Evaluation/data/ 9 | Evaluation/dumps/ 10 | Evaluation/submission_* 11 | Evaluation/sub_t* 12 | Evaluation/mbh_subs/ 13 | Evaluation/RF/ 14 | Evaluation/samples_*/ 15 | Evaluation/val_samples_* 16 | Evaluation/tr_samples_* 17 | my_subs/ 18 | caffe_models/snapshots/hog_4k_1k/ 19 | -------------------------------------------------------------------------------- /Crawler/Kinetics/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Fabian Caba H. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Crawler/Kinetics/README.md: -------------------------------------------------------------------------------- 1 | # Kinetics - Downloader 2 | 3 | ## Usage 4 | First, clone this repository and make sure that all the submodules are also cloned properly. 5 | ``` 6 | git clone https://github.com/activitynet/ActivityNet.git 7 | cd ActivityNet/Crawler/Kinetics 8 | ``` 9 | 10 | Next, setup your environment 11 | ``` 12 | conda env create -f environment.yml 13 | source activate kinetics 14 | pip install --upgrade youtube-dl 15 | ``` 16 | 17 | Finally, download a dataset split by calling: 18 | ``` 19 | mkdir ; python download.py {dataset_split}.csv 20 | ``` 21 | -------------------------------------------------------------------------------- /Crawler/Kinetics/download.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import fnmatch 3 | import glob 4 | import json 5 | import os 6 | import shutil 7 | import subprocess 8 | import uuid 9 | 10 | from joblib import delayed 11 | from joblib import Parallel 12 | import pandas as pd 13 | 14 | 15 | def create_video_folders(dataset, output_dir, tmp_dir): 16 | """Creates a directory for each label name in the dataset.""" 17 | if not os.path.exists(output_dir): 18 | os.makedirs(output_dir) 19 | if not os.path.exists(tmp_dir): 20 | os.makedirs(tmp_dir) 21 | 22 | label_to_dir = {} 23 | for label_name in dataset['label-name'].unique(): 24 | this_dir = os.path.join(output_dir, label_name) 25 | if not os.path.exists(this_dir): 26 | os.makedirs(this_dir) 27 | label_to_dir[label_name] = this_dir 28 | return label_to_dir 29 | 30 | 31 | def construct_video_filename(row, label_to_dir, trim_format='%06d'): 32 | """Given a dataset row, this function constructs the 33 | output filename for a given video. 34 | """ 35 | basename = '%s_%s_%s.mp4' % (row['video-id'], 36 | trim_format % row['start-time'], 37 | trim_format % row['end-time']) 38 | output_filename = os.path.join(label_to_dir[row['label-name']], basename) 39 | return output_filename 40 | 41 | 42 | def download_clip(video_identifier, output_filename, 43 | start_time, end_time, 44 | tmp_dir='/tmp/kinetics', 45 | num_attempts=5, 46 | url_base='https://www.youtube.com/watch?v='): 47 | """Download a video from youtube if exists and is not blocked. 48 | 49 | arguments: 50 | --------- 51 | video_identifier: str 52 | Unique YouTube video identifier (11 characters) 53 | output_filename: str 54 | File path where the video will be stored. 55 | start_time: float 56 | Indicates the begining time in seconds from where the video 57 | will be trimmed. 58 | end_time: float 59 | Indicates the ending time in seconds of the trimmed video. 60 | """ 61 | # Defensive argument checking. 62 | assert isinstance(video_identifier, str), 'video_identifier must be string' 63 | assert isinstance(output_filename, str), 'output_filename must be string' 64 | assert len(video_identifier) == 11, 'video_identifier must have length 11' 65 | 66 | status = False 67 | # Construct command line for getting the direct video link. 68 | tmp_filename = os.path.join(tmp_dir, 69 | '%s.%%(ext)s' % uuid.uuid4()) 70 | command = ['youtube-dl', 71 | '--quiet', '--no-warnings', 72 | '-f', 'mp4', 73 | '-o', '"%s"' % tmp_filename, 74 | '"%s"' % (url_base + video_identifier)] 75 | command = ' '.join(command) 76 | attempts = 0 77 | while True: 78 | try: 79 | output = subprocess.check_output(command, shell=True, 80 | stderr=subprocess.STDOUT) 81 | except subprocess.CalledProcessError as err: 82 | attempts += 1 83 | if attempts == num_attempts: 84 | return status, err.output 85 | else: 86 | break 87 | 88 | tmp_filename = glob.glob('%s*' % tmp_filename.split('.')[0])[0] 89 | # Construct command to trim the videos (ffmpeg required). 90 | command = ['ffmpeg', 91 | '-i', '"%s"' % tmp_filename, 92 | '-ss', str(start_time), 93 | '-t', str(end_time - start_time), 94 | '-c:v', 'libx264', '-c:a', 'copy', 95 | '-threads', '1', 96 | '-loglevel', 'panic', 97 | '"%s"' % output_filename] 98 | command = ' '.join(command) 99 | try: 100 | output = subprocess.check_output(command, shell=True, 101 | stderr=subprocess.STDOUT) 102 | except subprocess.CalledProcessError as err: 103 | return status, err.output 104 | 105 | # Check if the video was successfully saved. 106 | status = os.path.exists(output_filename) 107 | os.remove(tmp_filename) 108 | return status, 'Downloaded' 109 | 110 | 111 | def download_clip_wrapper(row, label_to_dir, trim_format, tmp_dir): 112 | """Wrapper for parallel processing purposes.""" 113 | output_filename = construct_video_filename(row, label_to_dir, 114 | trim_format) 115 | clip_id = os.path.basename(output_filename).split('.mp4')[0] 116 | if os.path.exists(output_filename): 117 | status = tuple([clip_id, True, 'Exists']) 118 | return status 119 | 120 | downloaded, log = download_clip(row['video-id'], output_filename, 121 | row['start-time'], row['end-time'], 122 | tmp_dir=tmp_dir) 123 | status = tuple([clip_id, downloaded, log]) 124 | return status 125 | 126 | 127 | def parse_kinetics_annotations(input_csv): 128 | """Returns a parsed DataFrame. 129 | 130 | arguments: 131 | --------- 132 | input_csv: str 133 | Path to CSV file containing the following columns: 134 | 'YouTube Identifier,Start time,End time,Class label' 135 | 136 | returns: 137 | ------- 138 | dataset: DataFrame 139 | Pandas with the following columns: 140 | 'video-id', 'start-time', 'end-time', 'label-name' 141 | """ 142 | df = pd.read_csv(input_csv) 143 | df.rename(columns={'youtube_id': 'video-id', 144 | 'time_start': 'start-time', 145 | 'time_end': 'end-time', 146 | 'label': 'label-name', 147 | 'is_cc': 'is-cc'}, inplace=True) 148 | return df 149 | 150 | def main(input_csv, output_dir, 151 | trim_format='%06d', num_jobs=24, tmp_dir='/tmp/kinetics'): 152 | 153 | # Reading and parsing Kinetics. 154 | dataset = parse_kinetics_annotations(input_csv) 155 | 156 | # Creates folders where videos will be saved later. 157 | label_to_dir = create_video_folders(dataset, output_dir, tmp_dir) 158 | 159 | # Download all clips. 160 | if num_jobs==1: 161 | status_lst = [] 162 | for i, row in dataset.iterrows(): 163 | status_lst.append(download_clip_wrapper(row, label_to_dir, 164 | trim_format, tmp_dir)) 165 | else: 166 | status_lst = Parallel(n_jobs=num_jobs)(delayed(download_clip_wrapper)( 167 | row, label_to_dir, 168 | trim_format, tmp_dir) for i, row in dataset.iterrows()) 169 | 170 | # Clean tmp dir. 171 | shutil.rmtree(tmp_dir) 172 | 173 | # Save download report. 174 | with open('download_report.json', 'w') as fobj: 175 | fobj.write(json.dumps(status_lst)) 176 | 177 | 178 | if __name__ == '__main__': 179 | description = 'Helper script for downloading and trimming kinetics videos.' 180 | p = argparse.ArgumentParser(description=description) 181 | p.add_argument('input_csv', type=str, 182 | help=('CSV file containing the following format: ' 183 | 'YouTube Identifier,Start time,End time,Class label')) 184 | p.add_argument('output_dir', type=str, 185 | help='Output directory where videos will be saved.') 186 | p.add_argument('-f', '--trim-format', type=str, default='%06d', 187 | help=('This will be the format for the ' 188 | 'filename of trimmed videos: ' 189 | 'videoid_%0xd(start_time)_%0xd(end_time).mp4')) 190 | p.add_argument('-n', '--num-jobs', type=int, default=24) 191 | p.add_argument('-t', '--tmp-dir', type=str, default='/tmp/kinetics') 192 | main(**vars(p.parse_args())) 193 | -------------------------------------------------------------------------------- /Crawler/Kinetics/environment.yml: -------------------------------------------------------------------------------- 1 | name: kinetics 2 | channels: !!python/tuple 3 | - !!python/unicode 4 | 'defaults' 5 | dependencies: 6 | - joblib=0.9.4=py27_0 7 | - menpo::ffmpeg=3.1.3=0 8 | - mkl=2017.0.1=0 9 | - numpy=1.12.1=py27_0 10 | - openssl=1.0.2k=1 11 | - pandas=0.19.2=np112py27_1 12 | - pip=9.0.1=py27_1 13 | - python=2.7.13=0 14 | - python-dateutil=2.6.0=py27_0 15 | - pytz=2017.2=py27_0 16 | - readline=6.2=2 17 | - setuptools=27.2.0=py27_0 18 | - six=1.10.0=py27_0 19 | - sqlite=3.13.0=0 20 | - tk=8.5.18=0 21 | - wheel=0.29.0=py27_0 22 | - zlib=1.2.8=3 23 | - pip: 24 | - decorator==4.0.11 25 | - olefile==0.44 26 | - youtube-dl==2017.6.5 27 | prefix: /home/cabaf/.conda/envs/kinetics 28 | 29 | -------------------------------------------------------------------------------- /Crawler/Kinetics/process_download_report.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import json 3 | import argparse 4 | 5 | 6 | status_and_reason_to_message_dict = { 7 | ('Downloaded', ''): ['Downloaded', 8 | 'Exists'], 9 | 10 | ('Network',''): ['unable to download video data', 11 | 'The read operation timed out', 12 | 'Did not get any data blocks', 13 | 'giving up after 10 retries', 14 | 'Network is unreachable', 15 | 'content too short'], 16 | 17 | ('Unavailable','User-Removed'): ['This video is no longer available because the uploader has closed their YouTube account.', 18 | 'account associated with this video has been terminated', 19 | 'This video has been removed by the user.', 20 | 'This video is not available.', 21 | 'This video does not exist.'], 22 | 23 | ('Unavailable','Copyright'): ['multiple third-party notifications of copyright infringement.', 24 | 'This video is no longer available due to a copyright claim', 25 | 'blocked it on copyright grounds', 26 | 'a duplicate of a previously uploaded video'], 27 | 28 | ('Unavailable','Country-Block'): ['The uploader has not made this video available in your country.', 29 | 'who has blocked it in your country on copyright grounds.'], 30 | 31 | ('Unavailable','Spam'): ['policy on spam, deceptive practices, and scams.'], 32 | ('Unavailable','Nudity'): ['policy on nudity or sexual content.'], 33 | ('Unavailable','Sign-In'): ['Please sign in to view this video.'], 34 | ('Unavailable','Private'): ['This video is private.'], 35 | ('Unavailable','Guidelines'): ['Community Guidelines.'], 36 | ('Unavailable','Harassment and Bullying'): ['policy on harassment and bullying.'], 37 | ('Unavailable','Service-Terms'): ['Terms of Service.'], 38 | ('Unavailable','Harmful'): ['policy on harmful or dangerous content'], 39 | } 40 | 41 | def get_status_and_reason(msg): 42 | for s_r, lst in status_and_reason_to_message_dict.iteritems(): 43 | if any([x in msg for x in lst]): 44 | return s_r 45 | 46 | print(": error message is not matched with a status and a reason. message:", msg) 47 | 48 | return ('Other', msg) 49 | 50 | def process_download_report(report): 51 | output = [] 52 | for r in report: 53 | name, b, msg = r[0], r[1], r[2] 54 | output += [(name, get_status_and_reason(msg))] 55 | return output 56 | 57 | 58 | def wrapper_process_download_reports(json_files): 59 | all_ouputs = [] 60 | for f in json_files: 61 | with open(f, 'r') as fobj: 62 | report = json.load(fobj) 63 | all_ouputs += process_download_report(report) 64 | return all_ouputs 65 | 66 | def main(input_csv, input_json, output_file, trim_format='%06d', num_input=1): 67 | json_files = [] 68 | if num_input <= 1: 69 | json_files += [input_json] 70 | else: 71 | for i in range(num_input): 72 | json_files +=[input_json + ("-%02d" % (i+1))] 73 | 74 | all_ouputs = wrapper_process_download_reports(json_files) 75 | all_ouputs = dict(all_ouputs) 76 | 77 | dataset = pd.read_csv(input_csv) 78 | 79 | status_lst = [] 80 | reason_lst = [] 81 | for indx, row in dataset.iterrows(): 82 | name = '%s_%s_%s' % (row['youtube_id'], 83 | trim_format % row['time_start'], 84 | trim_format % row['time_end']) 85 | 86 | s, r = all_ouputs[name] 87 | status_lst += [s] 88 | reason_lst += [r] 89 | if indx % 10000 == 0: 90 | print(indx) 91 | print("Done!!") 92 | dataset["status"] = status_lst 93 | dataset["reason"] = reason_lst 94 | 95 | dataset.to_csv(output_file, index=False) 96 | 97 | if __name__ == '__main__': 98 | description = 'Helper script for processing the reports from downloading and trimming kinetics videos.' 99 | p = argparse.ArgumentParser(description=description) 100 | p.add_argument('input_csv', type=str, 101 | help=('CSV file containing the following format: ' 102 | 'label, youtube_id, time_start, time_end, split, is_cc')) 103 | p.add_argument('input_json', type=str, 104 | help=('base name for download report json files'), 105 | default='download_report.json') 106 | p.add_argument('output_file', type=str, 107 | help='Output csv file with statuses and reasons.') 108 | p.add_argument('-f', '--trim-format', type=str, default='%06d', 109 | help=('This will be the format for the ' 110 | 'filename of trimmed videos: ' 111 | 'videoid_%0xd(start_time)_%0xd(end_time).mp4')) 112 | p.add_argument('-n', '--num_input', 113 | help=('number of input json files with the same base name input_json.'), 114 | type=int, default=1) 115 | main(**vars(p.parse_args())) 116 | -------------------------------------------------------------------------------- /Crawler/README: -------------------------------------------------------------------------------- 1 | ActivityNet Tools 2 | ================= 3 | 4 | Requirements 5 | ------------ 6 | 1. youtube-dl (https://github.com/rg3/youtube-dl/) 7 | 8 | Fetch ActivityNet 9 | ----------------- 10 | To download all the ActivityNet videos run the following command line: 11 | $ mkdir $VIDEO_PATH 12 | $ chmod +x fetch_activitynet_videos.sh 13 | $ ./fetch_activitynet_videos.sh $VIDEO_PATH activity_net.v1-X.json 14 | 15 | Where $VIDEO_PATH is the path where the videos will be located. If you already 16 | have a subset of the videos, input that directory. 17 | -------------------------------------------------------------------------------- /Crawler/fetch_activitynet_videos.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VIDEOPATH=$1 4 | JSON_FILE=$2 5 | TEMP_FILE="command_list.txt" 6 | 7 | if [ -d $VIDEOPATH ]; then 8 | python run_crosscheck.py $VIDEOPATH $JSON_FILE $TEMP_FILE 9 | if [ -f $TEMP_FILE ]; then 10 | bash $TEMP_FILE 11 | else 12 | echo "File $TEMP_FILE does not exists." 13 | fi 14 | else 15 | echo "Directory does not exists." 16 | exit 0 17 | fi 18 | 19 | #rm $TEMP_FILE 20 | echo "Have a good day!" 21 | -------------------------------------------------------------------------------- /Crawler/run_crosscheck.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | import glob 3 | import json 4 | import os 5 | 6 | def crosscheck_videos(video_path, ann_file): 7 | # Get existing videos 8 | existing_vids = glob.glob("%s/*.mp4" % video_path) 9 | for idx, vid in enumerate(existing_vids): 10 | basename = os.path.basename(vid).split(".mp4")[0] 11 | if len(basename) == 13: 12 | existing_vids[idx] = basename[2:] 13 | elif len(basename) == 11: 14 | existing_vids[idx] = basename 15 | else: 16 | raise RuntimeError("Unknown filename format: %s", vid) 17 | # Read an get video IDs from annotation file 18 | with open(ann_file, "r") as fobj: 19 | anet_v_1_0 = json.load(fobj) 20 | all_vids = anet_v_1_0["database"].keys() 21 | non_existing_videos = [] 22 | for vid in all_vids: 23 | if vid in existing_vids: 24 | continue 25 | else: 26 | non_existing_videos.append(vid) 27 | return non_existing_videos 28 | 29 | def main(video_path, ann_file, output_filename): 30 | non_existing_videos = crosscheck_videos(video_path, ann_file) 31 | print "No of non-existing videos = {}" .format(len(non_existing_videos)) 32 | filename = os.path.join(video_path, "v_%s.mp4") 33 | cmd_base = "youtube-dl -f best -f mp4 " 34 | cmd_base += '"https://www.youtube.com/watch?v=%s" ' 35 | cmd_base += '-o "%s"' % filename 36 | with open(output_filename, "w") as fobj: 37 | for vid in non_existing_videos: 38 | cmd = cmd_base % (vid, vid) 39 | fobj.write("%s\n" % cmd) 40 | 41 | if __name__ == "__main__": 42 | parser = ArgumentParser(description="Script to double check video content.") 43 | parser.add_argument("video_path", help="Where are located the videos? (Full path)") 44 | parser.add_argument("ann_file", help="Where is the annotation file?") 45 | parser.add_argument("output_filename", help="Output script location.") 46 | args = vars(parser.parse_args()) 47 | main(**args) 48 | -------------------------------------------------------------------------------- /Evaluation/README.md: -------------------------------------------------------------------------------- 1 | #ActivityNet Large Scale Activity Recognition Challenge - Evaluation Toolkit 2 | This file is taken as is from the source ActivityNet repository. 3 | This is the documentation of the ActivityNet Large Scale Activity Recognition 4 | Challenge Evaluation Toolkit. It includes APIs to evaluate the performance of a method in the two different tasks in the challenge: *untrimmed video classification* and *activity detection*. For more information about the challenge competitions, please read the [guidelines](http://activity-net.org/challenges/2016/guidelines.html). 5 | 6 | ##Dependencies 7 | The Evaluation Toolkit is purely written in Python (>=2.7) and it requires the 8 | following third party libraries: 9 | * [Numpy](http://www.numpy.org/) 10 | * [Pandas](http://pandas.pydata.org/) 11 | 12 | ##Getting started 13 | We include sample prediction files in the folder data to show how to evaluate your prediction results. Please follow this steps to obtain the performance evaluation on the provided sample files: 14 | * Run `git clone` this repository. 15 | * To evaluate classification performance call: `python get_classification_performance.py data/activity_net.v1-3.min.json sample_classification_prediction.json` 16 | * To evaluate detection performance call: `python get_detection_performance.py data/activity_net.v1-3.min.json sample_detection_prediction.json` 17 | 18 | ##Contributions and Troubleshooting 19 | We are welcome to contributions, please keep your pull-request simple so we can go back to you as soon as we can. If you found a bug please open a new issue and describe the problem. 20 | -------------------------------------------------------------------------------- /Evaluation/check_lmdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Wed Jun 21 22:20:12 2017 5 | 6 | @author: hadoop 7 | Description: Read LMDB 8 | 9 | """ 10 | import lmdb 11 | import numpy as np 12 | import caffe 13 | import cv2 14 | 15 | #LMDB_PATH = "/home/hadoop/VisionWorkspace/KTH_OpticalFlow/dataset/kth_actions_train/LMDB/OF_lmdb" 16 | LMDB_PATH = '/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/train_hog_lmdb' 17 | 18 | def waitTillEscPressed(): 19 | while(True): 20 | if cv2.waitKey(10)==27: 21 | print("Esc Pressed") 22 | return 23 | 24 | if __name__ == '__main__': 25 | 26 | env = lmdb.open(LMDB_PATH, readonly=True) 27 | print env.stat() 28 | i,j = 0, 0 29 | with env.begin() as txn: 30 | cursor = txn.cursor() 31 | datum = caffe.proto.caffe_pb2.Datum() 32 | for k,v in cursor: 33 | datum.ParseFromString(v) 34 | lab = datum.label 35 | #print "Shape : {}" .format(datum.width) 36 | #flat_x = np.fromstring(datum.data, dtype=np.uint8) 37 | flat_x = np.array(datum.float_data) 38 | x = flat_x.reshape(datum.channels, datum.height, datum.width) 39 | y = datum.label 40 | #print "sum(x) = {} " .format(np.sum(x)) 41 | #print "y = %d " %y 42 | j += 1 43 | if np.sum(x) == 0: 44 | print j 45 | print "class %d " %y 46 | i += 1 47 | #raw_datum = txn.get(b'00000000') 48 | 49 | print 'No of 0s are %d ' %i 50 | # 51 | 52 | 53 | #label = datum.label 54 | # data = caffe.io.datum_to_array(datum) 55 | # for l, d in zip(label, data): 56 | # print l, d 57 | 58 | # Iterate over the LMDB values 59 | 60 | #with env.begin() as txn: 61 | # cursor = txn.cursor() 62 | # datum = caffe.proto.caffe_pb2.Datum() 63 | # for key, value in cursor: 64 | # datum.ParseFromString(value) 65 | # label = datum.label 66 | # flat_x = np.fromstring(datum.data, dtype=np.uint8) 67 | # x = flat_x.reshape(datum.channels, datum.height, datum.width) 68 | # img = convert_to_bgr(x) 69 | # cv2.imshow("BGR_OF", img) 70 | # print "Label = "+str(label) 71 | # keyPressed = waitTillEscPressed() 72 | # if keyPressed==0: # write to file 73 | # cv2.imwrite(os.path.join(curr_path,key+"_"+str(label)+".jpg"),img) 74 | #if key == '00000099': 75 | # print(key, value) 76 | -------------------------------------------------------------------------------- /Evaluation/eval_classification.py: -------------------------------------------------------------------------------- 1 | import json 2 | import urllib2 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from utils import get_blocked_videos 8 | from utils import interpolated_prec_rec 9 | 10 | class ANETclassification(object): 11 | GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version'] 12 | PREDICTION_FIELDS = ['results', 'version', 'external_data'] 13 | 14 | def __init__(self, ground_truth_filename=None, prediction_filename=None, 15 | ground_truth_fields=GROUND_TRUTH_FIELDS, 16 | prediction_fields=PREDICTION_FIELDS, 17 | subset='validation', verbose=False, top_k=3, 18 | check_status=True): 19 | if not ground_truth_filename: 20 | raise IOError('Please input a valid ground truth file.') 21 | if not prediction_filename: 22 | raise IOError('Please input a valid prediction file.') 23 | self.subset = subset 24 | self.verbose = verbose 25 | self.gt_fields = ground_truth_fields 26 | self.pred_fields = prediction_fields 27 | self.top_k = top_k 28 | self.ap = None 29 | self.hit_at_k = None 30 | self.check_status = check_status 31 | # Retrieve blocked videos from server. 32 | if self.check_status: 33 | self.blocked_videos = get_blocked_videos() 34 | else: 35 | self.blocked_videos = list() 36 | # Import ground truth and predictions. 37 | self.ground_truth, self.activity_index = self._import_ground_truth( 38 | ground_truth_filename) 39 | self.prediction = self._import_prediction(prediction_filename) 40 | 41 | if self.verbose: 42 | print '[INIT] Loaded annotations from {} subset.'.format(subset) 43 | nr_gt = len(self.ground_truth) 44 | print '\tNumber of ground truth instances: {}'.format(nr_gt) 45 | nr_pred = len(self.prediction) 46 | print '\tNumber of predictions: {}'.format(nr_pred) 47 | 48 | def _import_ground_truth(self, ground_truth_filename): 49 | """Reads ground truth file, checks if it is well formatted, and returns 50 | the ground truth instances and the activity classes. 51 | 52 | Parameters 53 | ---------- 54 | ground_truth_filename : str 55 | Full path to the ground truth json file. 56 | 57 | Outputs 58 | ------- 59 | ground_truth : df 60 | Data frame containing the ground truth instances. 61 | activity_index : dict 62 | Dictionary containing class index. 63 | """ 64 | with open(ground_truth_filename, 'r') as fobj: 65 | data = json.load(fobj) 66 | # Checking format 67 | if not all([field in data.keys() for field in self.gt_fields]): 68 | raise IOError('Please input a valid ground truth file.') 69 | 70 | # Initialize data frame 71 | activity_index, cidx = {}, 0 72 | video_lst, label_lst = [], [] 73 | for videoid, v in data['database'].iteritems(): 74 | if self.subset != v['subset']: 75 | continue 76 | if videoid in self.blocked_videos: 77 | continue 78 | for ann in v['annotations']: 79 | if ann['label'] not in activity_index: 80 | activity_index[ann['label']] = cidx 81 | cidx += 1 82 | video_lst.append(videoid) 83 | label_lst.append(activity_index[ann['label']]) 84 | ground_truth = pd.DataFrame({'video-id': video_lst, 85 | 'label': label_lst}) 86 | ground_truth = ground_truth.drop_duplicates().reset_index(drop=True) 87 | return ground_truth, activity_index 88 | 89 | def _import_prediction(self, prediction_filename): 90 | """Reads prediction file, checks if it is well formatted, and returns 91 | the prediction instances. 92 | 93 | Parameters 94 | ---------- 95 | prediction_filename : str 96 | Full path to the prediction json file. 97 | 98 | Outputs 99 | ------- 100 | prediction : df 101 | Data frame containing the prediction instances. 102 | """ 103 | with open(prediction_filename, 'r') as fobj: 104 | data = json.load(fobj) 105 | # Checking format... 106 | if not all([field in data.keys() for field in self.pred_fields]): 107 | raise IOError('Please input a valid prediction file.') 108 | 109 | # Initialize data frame 110 | video_lst, label_lst, score_lst = [], [], [] 111 | for videoid, v in data['results'].iteritems(): 112 | if videoid in self.blocked_videos: 113 | continue 114 | for result in v: 115 | label = self.activity_index[result['label']] 116 | video_lst.append(videoid) 117 | label_lst.append(label) 118 | score_lst.append(result['score']) 119 | prediction = pd.DataFrame({'video-id': video_lst, 120 | 'label': label_lst, 121 | 'score': score_lst}) 122 | return prediction 123 | 124 | def wrapper_compute_average_precision(self): 125 | """Computes average precision for each class in the subset. 126 | """ 127 | ap = np.zeros(len(self.activity_index.items())) 128 | for activity, cidx in self.activity_index.iteritems(): 129 | gt_idx = self.ground_truth['label'] == cidx 130 | pred_idx = self.prediction['label'] == cidx 131 | ap[cidx] = compute_average_precision_classification( 132 | self.ground_truth.loc[gt_idx].reset_index(drop=True), 133 | self.prediction.loc[pred_idx].reset_index(drop=True)) 134 | return ap 135 | 136 | def evaluate(self): 137 | """Evaluates a prediction file. For the detection task we measure the 138 | interpolated mean average precision to measure the performance of a 139 | method. 140 | """ 141 | ap = self.wrapper_compute_average_precision() 142 | hit_at_k = compute_video_hit_at_k(self.ground_truth, 143 | self.prediction, top_k=self.top_k) 144 | avg_hit_at_k = compute_video_hit_at_k( 145 | self.ground_truth, self.prediction, top_k=self.top_k, avg=True) 146 | if self.verbose: 147 | print ('[RESULTS] Performance on ActivityNet untrimmed video ' 148 | 'classification task.') 149 | print '\tMean Average Precision: {}'.format(ap.mean()) 150 | print '\tHit@{}: {}'.format(self.top_k, hit_at_k) 151 | print '\tAvg Hit@{}: {}'.format(self.top_k, avg_hit_at_k) 152 | self.ap = ap 153 | self.hit_at_k = hit_at_k 154 | self.avg_hit_at_k = avg_hit_at_k 155 | 156 | ################################################################################ 157 | # Metrics 158 | ################################################################################ 159 | 160 | def compute_average_precision_classification(ground_truth, prediction): 161 | """Compute average precision (classification task) between ground truth and 162 | predictions data frames. If multiple predictions occurs for the same 163 | predicted segment, only the one with highest score is matched as 164 | true positive. This code is greatly inspired by Pascal VOC devkit. 165 | 166 | Parameters 167 | ---------- 168 | ground_truth : df 169 | Data frame containing the ground truth instances. 170 | Required fields: ['video-id'] 171 | prediction : df 172 | Data frame containing the prediction instances. 173 | Required fields: ['video-id, 'score'] 174 | 175 | Outputs 176 | ------- 177 | ap : float 178 | Average precision score. 179 | """ 180 | npos = float(len(ground_truth)) 181 | lock_gt = np.ones(len(ground_truth)) * -1 182 | # Sort predictions by decreasing score order. 183 | sort_idx = prediction['score'].values.argsort()[::-1] 184 | prediction = prediction.loc[sort_idx].reset_index(drop=True) 185 | 186 | # Initialize true positive and false positive vectors. 187 | tp = np.zeros(len(prediction)) 188 | fp = np.zeros(len(prediction)) 189 | 190 | # Assigning true positive to truly grount truth instances. 191 | for idx in range(len(prediction)): 192 | this_pred = prediction.loc[idx] 193 | gt_idx = ground_truth['video-id'] == this_pred['video-id'] 194 | # Check if there is at least one ground truth in the video associated. 195 | if not gt_idx.any(): 196 | fp[idx] = 1 197 | continue 198 | this_gt = ground_truth.loc[gt_idx].reset_index() 199 | if lock_gt[this_gt['index']] >= 0: 200 | fp[idx] = 1 201 | else: 202 | tp[idx] = 1 203 | lock_gt[this_gt['index']] = idx 204 | 205 | # Computing prec-rec 206 | tp = np.cumsum(tp).astype(np.float) 207 | fp = np.cumsum(fp).astype(np.float) 208 | rec = tp / npos 209 | prec = tp / (tp + fp) 210 | return interpolated_prec_rec(prec, rec) 211 | 212 | def compute_video_hit_at_k(ground_truth, prediction, top_k=3, avg=False): 213 | """Compute accuracy at k prediction between ground truth and 214 | predictions data frames. This code is greatly inspired by evaluation 215 | performed in Karpathy et al. CVPR14. 216 | 217 | Parameters 218 | ---------- 219 | ground_truth : df 220 | Data frame containing the ground truth instances. 221 | Required fields: ['video-id', 'label'] 222 | prediction : df 223 | Data frame containing the prediction instances. 224 | Required fields: ['video-id, 'label', 'score'] 225 | 226 | Outputs 227 | ------- 228 | acc : float 229 | Top k accuracy score. 230 | """ 231 | video_ids = np.unique(ground_truth['video-id'].values) 232 | avg_hits_per_vid = np.zeros(video_ids.size) 233 | for i, vid in enumerate(video_ids): 234 | pred_idx = prediction['video-id'] == vid 235 | if not pred_idx.any(): 236 | continue 237 | this_pred = prediction.loc[pred_idx].reset_index(drop=True) 238 | # Get top K predictions sorted by decreasing score. 239 | sort_idx = this_pred['score'].values.argsort()[::-1][:top_k] 240 | this_pred = this_pred.loc[sort_idx].reset_index(drop=True) 241 | # Get labels and compare against ground truth. 242 | pred_label = this_pred['label'].tolist() 243 | gt_idx = ground_truth['video-id'] == vid 244 | gt_label = ground_truth.loc[gt_idx]['label'].tolist() 245 | avg_hits_per_vid[i] = np.mean([1 if this_label in pred_label else 0 246 | for this_label in gt_label]) 247 | if not avg: 248 | avg_hits_per_vid[i] = np.ceil(avg_hits_per_vid[i]) 249 | return float(avg_hits_per_vid.mean()) 250 | -------------------------------------------------------------------------------- /Evaluation/eval_detection.py: -------------------------------------------------------------------------------- 1 | import json 2 | import urllib2 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from utils import get_blocked_videos 8 | from utils import interpolated_prec_rec 9 | from utils import segment_iou 10 | 11 | class ANETdetection(object): 12 | 13 | GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version'] 14 | PREDICTION_FIELDS = ['results', 'version', 'external_data'] 15 | 16 | def __init__(self, ground_truth_filename=None, prediction_filename=None, 17 | ground_truth_fields=GROUND_TRUTH_FIELDS, 18 | prediction_fields=PREDICTION_FIELDS, 19 | tiou_thresholds=np.linspace(0.5, 0.95, 10), 20 | subset='validation', verbose=False, 21 | check_status=True): 22 | if not ground_truth_filename: 23 | raise IOError('Please input a valid ground truth file.') 24 | if not prediction_filename: 25 | raise IOError('Please input a valid prediction file.') 26 | self.subset = subset 27 | self.tiou_thresholds = tiou_thresholds 28 | self.verbose = verbose 29 | self.gt_fields = ground_truth_fields 30 | self.pred_fields = prediction_fields 31 | self.ap = None 32 | self.check_status = check_status 33 | # Retrieve blocked videos from server. 34 | if self.check_status: 35 | self.blocked_videos = get_blocked_videos() 36 | else: 37 | self.blocked_videos = list() 38 | # Import ground truth and predictions. 39 | self.ground_truth, self.activity_index = self._import_ground_truth( 40 | ground_truth_filename) 41 | self.prediction = self._import_prediction(prediction_filename) 42 | 43 | if self.verbose: 44 | print '[INIT] Loaded annotations from {} subset.'.format(subset) 45 | nr_gt = len(self.ground_truth) 46 | print '\tNumber of ground truth instances: {}'.format(nr_gt) 47 | nr_pred = len(self.prediction) 48 | print '\tNumber of predictions: {}'.format(nr_pred) 49 | print '\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds) 50 | 51 | def _import_ground_truth(self, ground_truth_filename): 52 | """Reads ground truth file, checks if it is well formatted, and returns 53 | the ground truth instances and the activity classes. 54 | 55 | Parameters 56 | ---------- 57 | ground_truth_filename : str 58 | Full path to the ground truth json file. 59 | 60 | Outputs 61 | ------- 62 | ground_truth : df 63 | Data frame containing the ground truth instances. 64 | activity_index : dict 65 | Dictionary containing class index. 66 | """ 67 | with open(ground_truth_filename, 'r') as fobj: 68 | data = json.load(fobj) 69 | # Checking format 70 | if not all([field in data.keys() for field in self.gt_fields]): 71 | raise IOError('Please input a valid ground truth file.') 72 | 73 | # Read ground truth data. 74 | activity_index, cidx = {}, 0 75 | video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], [] 76 | for videoid, v in data['database'].iteritems(): 77 | if self.subset != v['subset']: 78 | continue 79 | if videoid in self.blocked_videos: 80 | continue 81 | for ann in v['annotations']: 82 | if ann['label'] not in activity_index: 83 | activity_index[ann['label']] = cidx 84 | cidx += 1 85 | video_lst.append(videoid) 86 | t_start_lst.append(ann['segment'][0]) 87 | t_end_lst.append(ann['segment'][1]) 88 | label_lst.append(activity_index[ann['label']]) 89 | 90 | ground_truth = pd.DataFrame({'video-id': video_lst, 91 | 't-start': t_start_lst, 92 | 't-end': t_end_lst, 93 | 'label': label_lst}) 94 | return ground_truth, activity_index 95 | 96 | def _import_prediction(self, prediction_filename): 97 | """Reads prediction file, checks if it is well formatted, and returns 98 | the prediction instances. 99 | 100 | Parameters 101 | ---------- 102 | prediction_filename : str 103 | Full path to the prediction json file. 104 | 105 | Outputs 106 | ------- 107 | prediction : df 108 | Data frame containing the prediction instances. 109 | """ 110 | with open(prediction_filename, 'r') as fobj: 111 | data = json.load(fobj) 112 | # Checking format... 113 | if not all([field in data.keys() for field in self.pred_fields]): 114 | raise IOError('Please input a valid prediction file.') 115 | 116 | # Read predicitons. 117 | video_lst, t_start_lst, t_end_lst = [], [], [] 118 | label_lst, score_lst = [], [] 119 | for videoid, v in data['results'].iteritems(): 120 | if videoid in self.blocked_videos: 121 | continue 122 | for result in v: 123 | label = self.activity_index[result['label']] 124 | video_lst.append(videoid) 125 | t_start_lst.append(result['segment'][0]) 126 | t_end_lst.append(result['segment'][1]) 127 | label_lst.append(label) 128 | score_lst.append(result['score']) 129 | prediction = pd.DataFrame({'video-id': video_lst, 130 | 't-start': t_start_lst, 131 | 't-end': t_end_lst, 132 | 'label': label_lst, 133 | 'score': score_lst}) 134 | return prediction 135 | 136 | def wrapper_compute_average_precision(self): 137 | """Computes average precision for each class in the subset. 138 | """ 139 | ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index.items()))) 140 | for activity, cidx in self.activity_index.iteritems(): 141 | gt_idx = self.ground_truth['label'] == cidx 142 | pred_idx = self.prediction['label'] == cidx 143 | ap[:,cidx] = compute_average_precision_detection( 144 | self.ground_truth.loc[gt_idx].reset_index(drop=True), 145 | self.prediction.loc[pred_idx].reset_index(drop=True), 146 | tiou_thresholds=self.tiou_thresholds) 147 | return ap 148 | 149 | def evaluate(self): 150 | """Evaluates a prediction file. For the detection task we measure the 151 | interpolated mean average precision to measure the performance of a 152 | method. 153 | """ 154 | self.ap = self.wrapper_compute_average_precision() 155 | self.mAP = self.ap.mean(axis=1) 156 | if self.verbose: 157 | print '[RESULTS] Performance on ActivityNet detection task.' 158 | print '\tAverage-mAP: {}'.format(self.mAP.mean()) 159 | 160 | def compute_average_precision_detection(ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10)): 161 | """Compute average precision (detection task) between ground truth and 162 | predictions data frames. If multiple predictions occurs for the same 163 | predicted segment, only the one with highest score is matches as 164 | true positive. This code is greatly inspired by Pascal VOC devkit. 165 | 166 | Parameters 167 | ---------- 168 | ground_truth : df 169 | Data frame containing the ground truth instances. 170 | Required fields: ['video-id', 't-start', 't-end'] 171 | prediction : df 172 | Data frame containing the prediction instances. 173 | Required fields: ['video-id, 't-start', 't-end', 'score'] 174 | tiou_thresholds : 1darray, optional 175 | Temporal intersection over union threshold. 176 | 177 | Outputs 178 | ------- 179 | ap : float 180 | Average precision score. 181 | """ 182 | npos = float(len(ground_truth)) 183 | lock_gt = np.ones((len(tiou_thresholds),len(ground_truth))) * -1 184 | # Sort predictions by decreasing score order. 185 | sort_idx = prediction['score'].values.argsort()[::-1] 186 | prediction = prediction.loc[sort_idx].reset_index(drop=True) 187 | 188 | # Initialize true positive and false positive vectors. 189 | tp = np.zeros((len(tiou_thresholds), len(prediction))) 190 | fp = np.zeros((len(tiou_thresholds), len(prediction))) 191 | 192 | # Adaptation to query faster 193 | ground_truth_gbvn = ground_truth.groupby('video-id') 194 | 195 | # Assigning true positive to truly grount truth instances. 196 | for idx, this_pred in prediction.iterrows(): 197 | 198 | try: 199 | # Check if there is at least one ground truth in the video associated. 200 | ground_truth_videoid = ground_truth_gbvn.get_group(this_pred['video-id']) 201 | except Exception as e: 202 | fp[:, idx] = 1 203 | continue 204 | 205 | this_gt = ground_truth_videoid.reset_index() 206 | tiou_arr = segment_iou(this_pred[['t-start', 't-end']].values, 207 | this_gt[['t-start', 't-end']].values) 208 | # We would like to retrieve the predictions with highest tiou score. 209 | tiou_sorted_idx = tiou_arr.argsort()[::-1] 210 | for tidx, tiou_thr in enumerate(tiou_thresholds): 211 | for jdx in tiou_sorted_idx: 212 | if tiou_arr[jdx] < tiou_thr: 213 | fp[tidx, idx] = 1 214 | break 215 | if lock_gt[tidx, this_gt.loc[jdx]['index']] >= 0: 216 | continue 217 | # Assign as true positive after the filters above. 218 | tp[tidx, idx] = 1 219 | lock_gt[tidx, this_gt.loc[jdx]['index']] = idx 220 | break 221 | 222 | if fp[tidx, idx] == 0 and tp[tidx, idx] == 0: 223 | fp[tidx, idx] = 1 224 | 225 | ap = np.zeros(len(tiou_thresholds)) 226 | 227 | for tidx in range(len(tiou_thresholds)): 228 | # Computing prec-rec 229 | this_tp = np.cumsum(tp[tidx,:]).astype(np.float) 230 | this_fp = np.cumsum(fp[tidx,:]).astype(np.float) 231 | rec = this_tp / npos 232 | prec = this_tp / (this_tp + this_fp) 233 | ap[tidx] = interpolated_prec_rec(prec, rec) 234 | 235 | return ap 236 | -------------------------------------------------------------------------------- /Evaluation/eval_kinetics.py: -------------------------------------------------------------------------------- 1 | import json 2 | import urllib2 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from utils import get_blocked_videos 8 | from utils import interpolated_prec_rec 9 | 10 | class ANETclassification(object): 11 | GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version'] 12 | PREDICTION_FIELDS = ['results', 'version', 'external_data'] 13 | 14 | def __init__(self, ground_truth_filename=None, prediction_filename=None, 15 | ground_truth_fields=GROUND_TRUTH_FIELDS, 16 | prediction_fields=PREDICTION_FIELDS, 17 | subset='validation', verbose=False, top_k=3, 18 | check_status=True): 19 | if not ground_truth_filename: 20 | raise IOError('Please input a valid ground truth file.') 21 | if not prediction_filename: 22 | raise IOError('Please input a valid prediction file.') 23 | self.subset = subset 24 | self.verbose = verbose 25 | self.gt_fields = ground_truth_fields 26 | self.pred_fields = prediction_fields 27 | self.top_k = top_k 28 | self.ap = None 29 | self.hit_at_k = None 30 | self.check_status = check_status 31 | # Retrieve blocked videos from server. 32 | if self.check_status: 33 | self.blocked_videos = get_blocked_videos() 34 | else: 35 | self.blocked_videos = list() 36 | # Import ground truth and predictions. 37 | self.ground_truth, self.activity_index = self._import_ground_truth( 38 | ground_truth_filename) 39 | self.prediction = self._import_prediction(prediction_filename) 40 | 41 | if self.verbose: 42 | print '[INIT] Loaded annotations from {} subset.'.format(subset) 43 | nr_gt = len(self.ground_truth) 44 | print '\tNumber of ground truth instances: {}'.format(nr_gt) 45 | nr_pred = len(self.prediction) 46 | print '\tNumber of predictions: {}'.format(nr_pred) 47 | 48 | def _import_ground_truth(self, ground_truth_filename): 49 | """Reads ground truth file, checks if it is well formatted, and returns 50 | the ground truth instances and the activity classes. 51 | 52 | Parameters 53 | ---------- 54 | ground_truth_filename : str 55 | Full path to the ground truth json file. 56 | 57 | Outputs 58 | ------- 59 | ground_truth : df 60 | Data frame containing the ground truth instances. 61 | activity_index : dict 62 | Dictionary containing class index. 63 | """ 64 | with open(ground_truth_filename, 'r') as fobj: 65 | data = json.load(fobj) 66 | # Checking format 67 | if not all([field in data.keys() for field in self.gt_fields]): 68 | raise IOError('Please input a valid ground truth file.') 69 | 70 | # Initialize data frame 71 | activity_index, cidx = {}, 0 72 | video_lst, label_lst = [], [] 73 | for videoid, v in data['database'].iteritems(): 74 | if self.subset != v['subset']: 75 | continue 76 | if videoid in self.blocked_videos: 77 | continue 78 | for ann in v['annotations']: 79 | if ann['label'] not in activity_index: 80 | activity_index[ann['label']] = cidx 81 | cidx += 1 82 | video_lst.append(videoid) 83 | label_lst.append(activity_index[ann['label']]) 84 | ground_truth = pd.DataFrame({'video-id': video_lst, 85 | 'label': label_lst}) 86 | ground_truth = ground_truth.drop_duplicates().reset_index(drop=True) 87 | return ground_truth, activity_index 88 | 89 | def _import_prediction(self, prediction_filename): 90 | """Reads prediction file, checks if it is well formatted, and returns 91 | the prediction instances. 92 | 93 | Parameters 94 | ---------- 95 | prediction_filename : str 96 | Full path to the prediction json file. 97 | 98 | Outputs 99 | ------- 100 | prediction : df 101 | Data frame containing the prediction instances. 102 | """ 103 | with open(prediction_filename, 'r') as fobj: 104 | data = json.load(fobj) 105 | # Checking format... 106 | if not all([field in data.keys() for field in self.pred_fields]): 107 | raise IOError('Please input a valid prediction file.') 108 | 109 | # Initialize data frame 110 | video_lst, label_lst, score_lst = [], [], [] 111 | for videoid, v in data['results'].iteritems(): 112 | if videoid in self.blocked_videos: 113 | continue 114 | for result in v: 115 | label = self.activity_index[result['label']] 116 | video_lst.append(videoid) 117 | label_lst.append(label) 118 | score_lst.append(result['score']) 119 | prediction = pd.DataFrame({'video-id': video_lst, 120 | 'label': label_lst, 121 | 'score': score_lst}) 122 | return prediction 123 | 124 | def wrapper_compute_average_precision(self): 125 | """Computes average precision for each class in the subset. 126 | """ 127 | ap = np.zeros(len(self.activity_index.items())) 128 | for activity, cidx in self.activity_index.iteritems(): 129 | gt_idx = self.ground_truth['label'] == cidx 130 | pred_idx = self.prediction['label'] == cidx 131 | ap[cidx] = compute_average_precision_classification( 132 | self.ground_truth.loc[gt_idx].reset_index(drop=True), 133 | self.prediction.loc[pred_idx].reset_index(drop=True)) 134 | return ap 135 | 136 | def evaluate(self): 137 | """Evaluates a prediction file. For the detection task we measure the 138 | interpolated mean average precision to measure the performance of a 139 | method. 140 | """ 141 | ap = self.wrapper_compute_average_precision() 142 | hit_at_k = compute_video_hit_at_k(self.ground_truth, 143 | self.prediction, top_k=self.top_k) 144 | avg_hit_at_k = compute_video_hit_at_k( 145 | self.ground_truth, self.prediction, top_k=self.top_k, avg=True) 146 | if self.verbose: 147 | print ('[RESULTS] Performance on ActivityNet untrimmed video ' 148 | 'classification task.') 149 | print '\tMean Average Precision: {}'.format(ap.mean()) 150 | print '\tError@{}: {}'.format(self.top_k, 1.0 - hit_at_k) 151 | #print '\tAvg Hit@{}: {}'.format(self.top_k, avg_hit_at_k) 152 | self.ap = ap 153 | self.hit_at_k = hit_at_k 154 | self.avg_hit_at_k = avg_hit_at_k 155 | 156 | ################################################################################ 157 | # Metrics 158 | ################################################################################ 159 | 160 | def compute_average_precision_classification(ground_truth, prediction): 161 | """Compute average precision (classification task) between ground truth and 162 | predictions data frames. If multiple predictions occurs for the same 163 | predicted segment, only the one with highest score is matched as 164 | true positive. This code is greatly inspired by Pascal VOC devkit. 165 | 166 | Parameters 167 | ---------- 168 | ground_truth : df 169 | Data frame containing the ground truth instances. 170 | Required fields: ['video-id'] 171 | prediction : df 172 | Data frame containing the prediction instances. 173 | Required fields: ['video-id, 'score'] 174 | 175 | Outputs 176 | ------- 177 | ap : float 178 | Average precision score. 179 | """ 180 | npos = float(len(ground_truth)) 181 | lock_gt = np.ones(len(ground_truth)) * -1 182 | # Sort predictions by decreasing score order. 183 | sort_idx = prediction['score'].values.argsort()[::-1] 184 | prediction = prediction.loc[sort_idx].reset_index(drop=True) 185 | 186 | # Initialize true positive and false positive vectors. 187 | tp = np.zeros(len(prediction)) 188 | fp = np.zeros(len(prediction)) 189 | 190 | # Assigning true positive to truly grount truth instances. 191 | for idx in range(len(prediction)): 192 | this_pred = prediction.loc[idx] 193 | gt_idx = ground_truth['video-id'] == this_pred['video-id'] 194 | # Check if there is at least one ground truth in the video associated. 195 | if not gt_idx.any(): 196 | fp[idx] = 1 197 | continue 198 | this_gt = ground_truth.loc[gt_idx].reset_index() 199 | if lock_gt[this_gt['index']] >= 0: 200 | fp[idx] = 1 201 | else: 202 | tp[idx] = 1 203 | lock_gt[this_gt['index']] = idx 204 | 205 | # Computing prec-rec 206 | tp = np.cumsum(tp).astype(np.float) 207 | fp = np.cumsum(fp).astype(np.float) 208 | rec = tp / npos 209 | prec = tp / (tp + fp) 210 | return interpolated_prec_rec(prec, rec) 211 | 212 | def compute_video_hit_at_k(ground_truth, prediction, top_k=3, avg=False): 213 | """Compute accuracy at k prediction between ground truth and 214 | predictions data frames. This code is greatly inspired by evaluation 215 | performed in Karpathy et al. CVPR14. 216 | 217 | Parameters 218 | ---------- 219 | ground_truth : df 220 | Data frame containing the ground truth instances. 221 | Required fields: ['video-id', 'label'] 222 | prediction : df 223 | Data frame containing the prediction instances. 224 | Required fields: ['video-id, 'label', 'score'] 225 | 226 | Outputs 227 | ------- 228 | acc : float 229 | Top k accuracy score. 230 | """ 231 | video_ids = np.unique(ground_truth['video-id'].values) 232 | avg_hits_per_vid = np.zeros(video_ids.size) 233 | for i, vid in enumerate(video_ids): 234 | pred_idx = prediction['video-id'] == vid 235 | if not pred_idx.any(): 236 | continue 237 | this_pred = prediction.loc[pred_idx].reset_index(drop=True) 238 | # Get top K predictions sorted by decreasing score. 239 | sort_idx = this_pred['score'].values.argsort()[::-1][:top_k] 240 | this_pred = this_pred.loc[sort_idx].reset_index(drop=True) 241 | # Get labels and compare against ground truth. 242 | pred_label = this_pred['label'].tolist() 243 | gt_idx = ground_truth['video-id'] == vid 244 | gt_label = ground_truth.loc[gt_idx]['label'].tolist() 245 | avg_hits_per_vid[i] = np.mean([1 if this_label in pred_label else 0 246 | for this_label in gt_label]) 247 | if not avg: 248 | avg_hits_per_vid[i] = np.ceil(avg_hits_per_vid[i]) 249 | return float(avg_hits_per_vid.mean()) 250 | -------------------------------------------------------------------------------- /Evaluation/eval_proposal.py: -------------------------------------------------------------------------------- 1 | import json 2 | import urllib2 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from utils import get_blocked_videos 8 | from utils import interpolated_prec_rec 9 | from utils import segment_iou 10 | from utils import wrapper_segment_iou 11 | 12 | class ANETproposal(object): 13 | 14 | GROUND_TRUTH_FIELDS = ['database', 'taxonomy', 'version'] 15 | PROPOSAL_FIELDS = ['results', 'version', 'external_data'] 16 | 17 | def __init__(self, ground_truth_filename=None, proposal_filename=None, 18 | ground_truth_fields=GROUND_TRUTH_FIELDS, 19 | proposal_fields=PROPOSAL_FIELDS, 20 | tiou_thresholds=np.linspace(0.5, 0.95, 10), 21 | max_avg_nr_proposals=None, 22 | subset='validation', verbose=False, 23 | check_status=True): 24 | if not ground_truth_filename: 25 | raise IOError('Please input a valid ground truth file.') 26 | if not proposal_filename: 27 | raise IOError('Please input a valid proposal file.') 28 | self.subset = subset 29 | self.tiou_thresholds = tiou_thresholds 30 | self.max_avg_nr_proposals = max_avg_nr_proposals 31 | self.verbose = verbose 32 | self.gt_fields = ground_truth_fields 33 | self.pred_fields = proposal_fields 34 | self.recall = None 35 | self.avg_recall = None 36 | self.proposals_per_video = None 37 | self.check_status = check_status 38 | # Retrieve blocked videos from server. 39 | if self.check_status: 40 | self.blocked_videos = get_blocked_videos() 41 | else: 42 | self.blocked_videos = list() 43 | # Import ground truth and proposals. 44 | self.ground_truth, self.activity_index = self._import_ground_truth( 45 | ground_truth_filename) 46 | self.proposal = self._import_proposal(proposal_filename) 47 | 48 | if self.verbose: 49 | print '[INIT] Loaded annotations from {} subset.'.format(subset) 50 | nr_gt = len(self.ground_truth) 51 | print '\tNumber of ground truth instances: {}'.format(nr_gt) 52 | nr_pred = len(self.proposal) 53 | print '\tNumber of proposals: {}'.format(nr_pred) 54 | print '\tFixed threshold for tiou score: {}'.format(self.tiou_thresholds) 55 | 56 | def _import_ground_truth(self, ground_truth_filename): 57 | """Reads ground truth file, checks if it is well formatted, and returns 58 | the ground truth instances and the activity classes. 59 | 60 | Parameters 61 | ---------- 62 | ground_truth_filename : str 63 | Full path to the ground truth json file. 64 | 65 | Outputs 66 | ------- 67 | ground_truth : df 68 | Data frame containing the ground truth instances. 69 | activity_index : dict 70 | Dictionary containing class index. 71 | """ 72 | with open(ground_truth_filename, 'r') as fobj: 73 | data = json.load(fobj) 74 | # Checking format 75 | if not all([field in data.keys() for field in self.gt_fields]): 76 | raise IOError('Please input a valid ground truth file.') 77 | 78 | # Read ground truth data. 79 | activity_index, cidx = {}, 0 80 | video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], [] 81 | for videoid, v in data['database'].iteritems(): 82 | if self.subset != v['subset']: 83 | continue 84 | if videoid in self.blocked_videos: 85 | continue 86 | for ann in v['annotations']: 87 | if ann['label'] not in activity_index: 88 | activity_index[ann['label']] = cidx 89 | cidx += 1 90 | video_lst.append(videoid) 91 | t_start_lst.append(ann['segment'][0]) 92 | t_end_lst.append(ann['segment'][1]) 93 | label_lst.append(activity_index[ann['label']]) 94 | 95 | ground_truth = pd.DataFrame({'video-id': video_lst, 96 | 't-start': t_start_lst, 97 | 't-end': t_end_lst, 98 | 'label': label_lst}) 99 | return ground_truth, activity_index 100 | 101 | def _import_proposal(self, proposal_filename): 102 | """Reads proposal file, checks if it is well formatted, and returns 103 | the proposal instances. 104 | 105 | Parameters 106 | ---------- 107 | proposal_filename : str 108 | Full path to the proposal json file. 109 | 110 | Outputs 111 | ------- 112 | proposal : df 113 | Data frame containing the proposal instances. 114 | """ 115 | with open(proposal_filename, 'r') as fobj: 116 | data = json.load(fobj) 117 | # Checking format... 118 | if not all([field in data.keys() for field in self.pred_fields]): 119 | raise IOError('Please input a valid proposal file.') 120 | 121 | # Read predictions. 122 | video_lst, t_start_lst, t_end_lst = [], [], [] 123 | score_lst = [] 124 | for videoid, v in data['results'].iteritems(): 125 | if videoid in self.blocked_videos: 126 | continue 127 | for result in v: 128 | video_lst.append(videoid) 129 | t_start_lst.append(result['segment'][0]) 130 | t_end_lst.append(result['segment'][1]) 131 | score_lst.append(result['score']) 132 | proposal = pd.DataFrame({'video-id': video_lst, 133 | 't-start': t_start_lst, 134 | 't-end': t_end_lst, 135 | 'score': score_lst}) 136 | return proposal 137 | 138 | def evaluate(self): 139 | """Evaluates a proposal file. To measure the performance of a 140 | method for the proposal task, we computes the area under the 141 | average recall vs average number of proposals per video curve. 142 | """ 143 | recall, avg_recall, proposals_per_video = average_recall_vs_avg_nr_proposals( 144 | self.ground_truth, self.proposal, 145 | max_avg_nr_proposals=self.max_avg_nr_proposals, 146 | tiou_thresholds=self.tiou_thresholds) 147 | print "Average Recall: {} " .format(avg_recall) 148 | area_under_curve = np.trapz(avg_recall, proposals_per_video) 149 | 150 | if self.verbose: 151 | print '[RESULTS] Performance on ActivityNet proposal task.' 152 | print '\tArea Under the AR vs AN curve: {}%'.format(100.*float(area_under_curve)/proposals_per_video[-1]) 153 | 154 | self.recall = recall 155 | self.avg_recall = avg_recall 156 | self.proposals_per_video = proposals_per_video 157 | 158 | def average_recall_vs_avg_nr_proposals(ground_truth, proposals, 159 | max_avg_nr_proposals=None, 160 | tiou_thresholds=np.linspace(0.5, 0.95, 10)): 161 | """ Computes the average recall given an average number 162 | of proposals per video. 163 | 164 | Parameters 165 | ---------- 166 | ground_truth : df 167 | Data frame containing the ground truth instances. 168 | Required fields: ['video-id', 't-start', 't-end'] 169 | proposal : df 170 | Data frame containing the proposal instances. 171 | Required fields: ['video-id, 't-start', 't-end', 'score'] 172 | tiou_thresholds : 1darray, optional 173 | array with tiou thresholds. 174 | 175 | Outputs 176 | ------- 177 | recall : 2darray 178 | recall[i,j] is recall at ith tiou threshold at the jth average number of average number of proposals per video. 179 | average_recall : 1darray 180 | recall averaged over a list of tiou threshold. This is equivalent to recall.mean(axis=0). 181 | proposals_per_video : 1darray 182 | average number of proposals per video. 183 | """ 184 | 185 | # Get list of videos. 186 | video_lst = ground_truth['video-id'].unique() 187 | 188 | if not max_avg_nr_proposals: 189 | max_avg_nr_proposals = float(proposals.shape[0])/video_lst.shape[0] 190 | 191 | ratio = max_avg_nr_proposals*float(video_lst.shape[0])/proposals.shape[0] 192 | 193 | # Adaptation to query faster 194 | ground_truth_gbvn = ground_truth.groupby('video-id') 195 | proposals_gbvn = proposals.groupby('video-id') 196 | 197 | # For each video, computes tiou scores among the retrieved proposals. 198 | score_lst = [] 199 | total_nr_proposals = 0 200 | for videoid in video_lst: 201 | 202 | # Get proposals for this video. 203 | proposals_videoid = proposals_gbvn.get_group(videoid) 204 | this_video_proposals = proposals_videoid.loc[:, ['t-start', 't-end']].values 205 | 206 | # Sort proposals by score. 207 | sort_idx = proposals_videoid['score'].argsort()[::-1] 208 | this_video_proposals = this_video_proposals[sort_idx, :] 209 | 210 | # Get ground-truth instances associated to this video. 211 | ground_truth_videoid = ground_truth_gbvn.get_group(videoid) 212 | this_video_ground_truth = ground_truth_videoid.loc[:,['t-start', 't-end']].values 213 | 214 | if this_video_proposals.shape[0] == 0: 215 | n = this_video_ground_truth.shape[0] 216 | score_lst.append(np.zeros((n, 1))) 217 | continue 218 | 219 | if this_video_proposals.ndim != 2: 220 | this_video_proposals = np.expand_dims(this_video_proposals, axis=0) 221 | if this_video_ground_truth.ndim != 2: 222 | this_video_ground_truth = np.expand_dims(this_video_ground_truth, axis=0) 223 | 224 | nr_proposals = np.minimum(int(this_video_proposals.shape[0] * ratio), this_video_proposals.shape[0]) 225 | total_nr_proposals += nr_proposals 226 | this_video_proposals = this_video_proposals[:nr_proposals, :] 227 | 228 | # Compute tiou scores. 229 | tiou = wrapper_segment_iou(this_video_proposals, this_video_ground_truth) 230 | score_lst.append(tiou) 231 | 232 | # Given that the length of the videos is really varied, we 233 | # compute the number of proposals in terms of a ratio of the total 234 | # proposals retrieved, i.e. average recall at a percentage of proposals 235 | # retrieved per video. 236 | 237 | # Computes average recall. 238 | pcn_lst = np.arange(1, 101) / 100.0 *(max_avg_nr_proposals*float(video_lst.shape[0])/total_nr_proposals) 239 | matches = np.empty((video_lst.shape[0], pcn_lst.shape[0])) 240 | positives = np.empty(video_lst.shape[0]) 241 | recall = np.empty((tiou_thresholds.shape[0], pcn_lst.shape[0])) 242 | # Iterates over each tiou threshold. 243 | for ridx, tiou in enumerate(tiou_thresholds): 244 | 245 | # Inspect positives retrieved per video at different 246 | # number of proposals (percentage of the total retrieved). 247 | for i, score in enumerate(score_lst): 248 | # Total positives per video. 249 | positives[i] = score.shape[0] 250 | # Find proposals that satisfies minimum tiou threshold. 251 | true_positives_tiou = score >= tiou 252 | # Get number of proposals as a percentage of total retrieved. 253 | pcn_proposals = np.minimum((score.shape[1] * pcn_lst).astype(np.int), score.shape[1]) 254 | 255 | for j, nr_proposals in enumerate(pcn_proposals): 256 | # Compute the number of matches for each percentage of the proposals 257 | matches[i, j] = np.count_nonzero((true_positives_tiou[:, :nr_proposals]).sum(axis=1)) 258 | 259 | # Computes recall given the set of matches per video. 260 | recall[ridx, :] = matches.sum(axis=0) / positives.sum() 261 | 262 | # Recall is averaged. 263 | avg_recall = recall.mean(axis=0) 264 | 265 | # Get the average number of proposals per video. 266 | proposals_per_video = pcn_lst * (float(total_nr_proposals) / video_lst.shape[0]) 267 | 268 | return recall, avg_recall, proposals_per_video 269 | 270 | -------------------------------------------------------------------------------- /Evaluation/frame_prediction_BG.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jul 3 14:49:28 2017 5 | 6 | @author: Arpan 7 | Description: Use c3d trained model for prediction. To be executed after 8 | training_model_m4.py 9 | """ 10 | 11 | import json 12 | import os 13 | import utils 14 | import numpy as np 15 | import h5py 16 | import pandas as pd 17 | import collections 18 | import cv2 19 | import caffe 20 | from joblib import Parallel, delayed 21 | 22 | 23 | # Temporal Proposals : Pretrained 24 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos' 25 | #ANNOTATION_FILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 26 | #PROPOSALS_FILENAME = '/home/arpan/DATA_Drive/ActivityNet/extra_features/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 27 | #SHUFFLE = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 28 | #MBH = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_features.h5" 29 | #MBH_IDS = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_quids.txt" 30 | #C3D = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D/sub_activitynet_v1-3.c3d.hdf5" 31 | #C3D_PCA = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D/PCA_activitynet_v1-3.hdf5" 32 | #SHUFFLE_IDS = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 33 | #MODEL = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/deploy_c3d_fc_net.prototxt" 34 | #PRETRAINED = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel" 35 | #MEANFILE = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto" 36 | #SUBSET = 'validation' 37 | 38 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 39 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 40 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 41 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 42 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5" 43 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 44 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5" 45 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 46 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb" 47 | MODEL = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/deploy_c3d_fc_net.prototxt" 48 | PRETRAINED = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel" 49 | MEANFILE = "/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto" 50 | MEANFILE = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto" 51 | SUBSET = 'validation' 52 | 53 | 54 | def get_c3d_feature(fc3d, vid, pos, vfps): 55 | ''' 56 | Read the feature vector that is near the pos of video 57 | c3d features are taken for every 8th frame 58 | ''' 59 | row = int(pos/8) 60 | while not row <= fc3d[vid]['c3d_features'].shape[0]: 61 | print "Decrement by 1" 62 | row -= 1 63 | assert row <= fc3d[vid]['c3d_features'].shape[0] 64 | vec = fc3d[vid]['c3d_features'][row,:] 65 | return vec 66 | 67 | 68 | def get_predictions(net, test_vids, category_names): 69 | fc3d = h5py.File(C3D, 'r') 70 | fpca = h5py.File(C3D_PCA, 'r') 71 | train_mean = get_training_mean(MEANFILE) 72 | pred = {} 73 | c3d_lev2 = pd.DataFrame(np.zeros((len(test_vids), len(category_names))), \ 74 | index=test_vids, columns=category_names) 75 | bgThresh = 500000 76 | 77 | print "Calculate frames being ignored ..." 78 | result = Parallel(n_jobs=4)(delayed(get_rows_ignored) \ 79 | (test_vids[j], bgThresh, j) \ 80 | for j in range(len(test_vids))) 81 | 82 | for i, vid in enumerate(test_vids): 83 | print "{} --> For video : {}" .format(i, vid) 84 | vid_data = fc3d['v_'+vid]['c3d_features'][:] 85 | frms_ignored = result[i] 86 | # get the c3d features that need to be ignored. Note that c3d features 87 | # are sampled every 8 frames, therefore position is divided by 8 88 | rows_ignored = [int(r/8) for r in frms_ignored] 89 | print "Rows ignored : {}" .format(set(rows_ignored)) 90 | not_rows_ig = list(set(range(vid_data.shape[0])) - set(rows_ignored)) 91 | (rows, cols) = vid_data.shape 92 | # get predictions for each row of c3d feature 93 | vid_probs = pd.DataFrame(np.zeros((rows, len(category_names))), \ 94 | columns=category_names) 95 | #print frms_ignored 96 | for row in not_rows_ig: 97 | #print "Dims of vid_data[row,:] = {}" .format(vid_data[row,:].shape) 98 | #print "Values = {}" .format(vid_data[row,:]) 99 | f = vid_data[row,:].reshape(cols, 1, 1) 100 | # Subtract mean 101 | f = f - train_mean 102 | out = net.forward_all(data = np.asarray([f])) 103 | predicted_label = out['prob'][0].argmax(axis=0) 104 | #print "Predicted Label : {} :: Name : {}" .format(predicted_label, category_names[predicted_label]) 105 | #print "Rows :: " 106 | vid_probs.iloc[row,:] = out['prob'][0] 107 | #print vid_probs.iloc[row,:] 108 | # returns a list of dict like [{'score': score, 'label':labels[idx]}...] 109 | pred[vid], vprobs = globalPrediction(vid, category_names, vid_probs) 110 | print pred[vid] 111 | c3d_lev2.loc[vid,:] = vprobs 112 | #break 113 | fc3d.close() 114 | fpca.close() 115 | return pred, c3d_lev2 116 | 117 | 118 | def globalPrediction(vid, category_names, vid_probs): 119 | """ 120 | Get a matrix of probabilities over the classes for the c3d features of 121 | a video. Generate the top 3 predictions from the prob matrix 122 | """ 123 | anno_list = [] 124 | # Idea 1 : To form the hist over the categories, each bin has sum of probs 125 | vprobs_sum = vid_probs.sum(axis=0) 126 | top_n = vprobs_sum.sort_values(ascending = False)[:3] 127 | labels = top_n.index.tolist() 128 | scores = top_n.values.tolist() 129 | for idx,score in enumerate(scores): 130 | anno_list.append({'score': score, 'label':labels[idx]}) 131 | 132 | 133 | # Idea 2 : Detect temporal continuity of category predicted. Longer the better 134 | 135 | # Idea 3 : Count the number of highest votes for top category. (Worse than 1) 136 | # If equal votes for >1 category then use Idea 1 137 | # finds the max val index among the columns for each row and the freq of the 138 | # occurrence of the column names (in decreasing order) 139 | # labels = vid_probs.idxmax(axis=1).value_counts()[:3].index.tolist() 140 | # scores = probs_sum[labels].tolist() 141 | # for idx,score in enumerate(scores): 142 | # anno_list.append({'score': score, 'label':labels[idx]}) 143 | 144 | return anno_list, vprobs_sum 145 | 146 | def get_rows_ignored(vid, bgThresh, v_no): 147 | """ 148 | Use background subtraction to decide which frames to ignore while prediction 149 | """ 150 | # process the video frame by frame 151 | print "For video : {} " .format(v_no) 152 | W, H = 160, 120 153 | vpath = os.path.join(VIDEOPATH, 'v_'+vid+'.mp4') 154 | cap = cv2.VideoCapture(vpath) 155 | if not cap.isOpened(): 156 | raise IOError("Capture object not opened !") 157 | #fps = cap.get(cv2.CAP_PROP_FPS) 158 | frms_ig = [] 159 | frms_msec = [] 160 | fgbg = cv2.createBackgroundSubtractorMOG2() #bg subtractor 161 | ret, prev_frame = cap.read() 162 | prev_frame = cv2.resize(prev_frame, (W, H) ) 163 | fgmask = fgbg.apply(prev_frame) 164 | # convert frame to GRAYSCALE 165 | prev_frame = cv2.cvtColor(prev_frame, cv2.COLOR_BGR2GRAY) 166 | # iterate over the frames 167 | count = 0 168 | while cap.isOpened(): 169 | ret, frame = cap.read() 170 | if not ret: 171 | break 172 | frame = cv2.resize(frame, (W, H)) 173 | curr_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 174 | # To find the background mask and skip the frame if foreground is absent 175 | fgmask = fgbg.apply(frame) 176 | if np.sum(fgmask) 2 | 3 | 4 | 5 | 160 120 6 | 7 | 16 16 8 | 9 | 8 8 10 | 11 | 8 8 12 | 9 13 | 1 14 | 4. 15 | 0 16 | 2.0000000000000001e-01 17 | 0 18 | 64 19 | 0 20 | 21 | -------------------------------------------------------------------------------- /Evaluation/localization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jul 13 14:13:27 2017 5 | 6 | @author: Arpan 7 | Description : For Localization Task 8 | """ 9 | import json 10 | import os 11 | import utils 12 | import numpy as np 13 | import h5py 14 | import pandas as pd 15 | import collections 16 | import cPickle 17 | import caffe 18 | from joblib import Parallel, delayed 19 | 20 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 21 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 22 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 23 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 24 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5" 25 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 26 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5" 27 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 28 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb" 29 | MODEL = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/deploy_c3d_fc_net.prototxt" 30 | PRETRAINED = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel" 31 | MEANFILE = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/mean_c3d_4k.binaryproto" 32 | SUBSET = 'validation' 33 | 34 | #def get_temporalProps(net, test_vids, meta_info, category_names, n): 35 | # fc3d = h5py.File(C3D, 'r') 36 | # fpca = h5py.File(C3D_PCA, 'r') 37 | # train_mean = get_training_mean(MEANFILE) 38 | # pred = {} 39 | # c3d_lev2 = pd.DataFrame(np.zeros((len(test_vids), len(category_names))), \ 40 | # index=test_vids, columns=category_names) 41 | # for i, vid in enumerate(test_vids): 42 | # print "{} --> For video : {}" .format(i, vid) 43 | # (rows, cols) = fc3d['v_'+vid]['c3d_features'].shape 44 | # vid_data = fc3d['v_'+vid]['c3d_features'][:] 45 | # # get predictions for each row of c3d feature of vid 46 | # vid_probs = pd.DataFrame(np.zeros((rows, len(category_names))), \ 47 | # columns=category_names) 48 | # predicted_labels = [] 49 | # for row in range(rows): 50 | # #print "Dims of vid_data[row,:] = {}" .format(vid_data[row,:].shape) 51 | # #print "Values = {}" .format(vid_data[row,:]) 52 | # f = vid_data[row,:].reshape(cols, 1, 1) 53 | # #print "Values = {}" .format(f) 54 | # f = f - train_mean 55 | # #pr = net.forward() 56 | # out = net.forward_all(data = np.asarray([f])) 57 | # predicted_labels.append(out['prob'][0].argmax(axis=0)) 58 | # #print "Predicted Label : {} :: Name : {}" .format(predicted_label, category_names[predicted_label]) 59 | # #print "Rows :: " 60 | # vid_probs.iloc[row,:] = out['prob'][0] 61 | # #print vid_probs.iloc[row,:] 62 | # # returns a list of dict like [{'score': score, 'label':labels[idx]}...] 63 | # 64 | # pred[vid], vprobs = get_vidProposal(vid, vid_probs, predicted_labels,\ 65 | # meta_info, category_names, n) 66 | # c3d_lev2.loc[vid,:] = vprobs 67 | # #break 68 | # fc3d.close() 69 | # fpca.close() 70 | # return pred, c3d_lev2 71 | 72 | def get_vidLocalization(vid, vid_probs, vid_preds, meta_info, category_names, n): 73 | """Get a matrix of probabilities over the classes for the c3d features of 74 | a video. Generate the top 3 predictions from the prob matrix 75 | vid_probs: matrix of probs t x C. where t is the no of c3d features per vid 76 | and C is the no of classes 77 | vid_preds: list of predictions for that video (vid). len(vid_preds) = t 78 | and predicted class at position i is category_names[vid_preds[i]] 79 | 80 | """ 81 | anno_list = [] 82 | #n = 1 # Taking top n categories 83 | vprobs_sum = vid_probs.sum(axis=0) 84 | top_n = vprobs_sum.sort_values(ascending = False)[:n] 85 | topn_labels = top_n.index.tolist() 86 | topn_idx = [category_names.index(l) for l in topn_labels] 87 | # Idea 2 : Detect temporal continuity of category predicted. Longer the better 88 | #print "Predictions list : {}" .format(vid_preds) 89 | # find the max number of occurences for any class 90 | #counter = collections.Counter(vid_preds) 91 | #top_n = counter.most_common(3) # get a list of tuples 92 | #fps = 27.673616877683916 # mean fps of all vids in training set 93 | fps = 29.970029970029969 # median 29.970029970029969 3018 times 94 | if vid in meta_info.keys(): 95 | fps = meta_info[vid]['fps'] 96 | for idx in range(n): 97 | # get list of tuples (beg_pos, end_pos) 98 | segments = get_segments_for_cat(vid_preds, topn_idx[idx]) 99 | ##### get time in sec from video info 100 | if len(segments)>0: 101 | for (beg,end) in segments: 102 | begtime = (beg+1)*8./fps 103 | endtime = (end+1)*8./fps 104 | # taking score as the temporal extent of the activity of interest 105 | anno_list.append({'label': topn_labels[idx] ,'score': end-beg,\ 106 | 'segment': [begtime, endtime]}) 107 | 108 | # Find the top predicted label 109 | return anno_list, vprobs_sum 110 | 111 | #def get_segments_for_cat(pred_lst, cat_id, nth_val): 112 | # """Retrieve segments corresponding to category number 'cat_id' from the list of 113 | # category predictions 'pred_lst'. Return a list of segment tuples 114 | # """ 115 | # int_seg_dist = 20 # 2 for 8*i frames 116 | # seg_len_th = 3 - nth_val 117 | # segments = [] 118 | # beg , end = -1, -1 119 | # seg_flag = False 120 | # for i,pr in enumerate(pred_lst): 121 | # if pr==cat_id and not seg_flag: 122 | # beg = i 123 | # seg_flag = True 124 | # elif pr!=cat_id and seg_flag: 125 | # end = i 126 | # segments.append((beg, end)) 127 | # seg_flag = False 128 | # beg, end = -1, -1 129 | # if seg_flag: 130 | # segments.append((beg, i+1)) 131 | # 132 | # seg_flag = True 133 | # merged_segments, new_segments = [], [] 134 | # if len(segments)==0: 135 | # return [] 136 | # (bPrev, ePrev) = segments[0] 137 | # # Merge 'close' segments based on int_seg_dist 138 | # for i,(bCurr,eCurr) in enumerate(segments): 139 | # if i==0: 140 | # continue 141 | # if (bCurr-ePrev)>int_seg_dist : 142 | # merged_segments.append((bPrev, ePrev)) 143 | # bPrev = bCurr 144 | # ePrev = eCurr 145 | # merged_segments.append((bPrev, ePrev)) 146 | # # Create a dict of segment lengths for each tuple in segments 147 | # seg_lens = {} 148 | # for idx,seg in enumerate(merged_segments): 149 | # seg_lens[idx] = seg[1] - seg[0] 150 | # # get segment idxs in decreasing order of lens (sort dict values and get keys) 151 | # decr_seg_lens = sorted(seg_lens, key=seg_lens.get, reverse=True) 152 | # # For very small length videos 153 | # if len(pred_lst) < 3 or len(decr_seg_lens) < 3: 154 | # seg_len_th = 0 155 | # 156 | # for idx in decr_seg_lens: 157 | # if seg_lens[idx] < seg_len_th: 158 | # break 159 | # new_segments.append(merged_segments[idx]) 160 | # 161 | # return new_segments 162 | 163 | def get_segments_for_cat(pred_lst, cat_id): 164 | """Retrieve segments corresponding to category number 'cat_id' from the list of 165 | category predictions 'pred_lst'. Return a list of segment tuples 166 | """ 167 | int_seg_dist = 60 # 2 for 8*i frames 168 | segments = [] 169 | beg , end = -1, -1 170 | seg_flag = False 171 | for i,pr in enumerate(pred_lst): 172 | if pr==cat_id and not seg_flag: 173 | beg = i 174 | seg_flag = True 175 | elif pr!=cat_id and seg_flag: 176 | end = i 177 | segments.append((beg, end)) 178 | seg_flag = False 179 | beg, end = -1, -1 180 | if seg_flag: 181 | segments.append((beg, i+1)) 182 | 183 | seg_flag = True 184 | new_segments = [] 185 | if len(segments)==0: 186 | return [] 187 | (bPrev, ePrev) = segments[0] 188 | for i,(bCurr,eCurr) in enumerate(segments): 189 | if i==0: 190 | continue 191 | if (bCurr-ePrev)<=int_seg_dist : 192 | ePrev = eCurr 193 | else: 194 | new_segments.append((bPrev, ePrev)) 195 | bPrev = bCurr 196 | ePrev = eCurr 197 | new_segments.append((bPrev, ePrev)) 198 | 199 | return new_segments 200 | -------------------------------------------------------------------------------- /Evaluation/optical_flow.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Feb 19 03:51:29 2017 5 | 6 | @author: Arpan 7 | 8 | Description: Extract the Optical Flow data from action videos 9 | 10 | """ 11 | import cv2 12 | import numpy as np 13 | import os 14 | import json 15 | import lmdb 16 | import caffe 17 | import pandas as pd 18 | from matplotlib import pyplot as plt 19 | from joblib import Parallel, delayed 20 | 21 | 22 | # Input: 23 | # srcVideoFolder: where the action videos are located (for train/val/test set) 24 | # Output: Create optical flow visualization data, transformed to HSV space 25 | # ToDo: write the feature onto a file and convert to lmdb. 26 | def construct_datasets(srcVideoFolder, lmdb_folder, pathPrefix, \ 27 | samples_files, category_names): 28 | 29 | DIFF_FRAMES = [1] 30 | print("No of samples_files = "+str(len(samples_files))) # =no_of_categories 31 | lmdb_name = os.path.join(lmdb_folder, "val_OF_lmdb") 32 | if not os.path.exists(os.path.dirname(lmdb_name)): 33 | os.makedirs(os.path.dirname(lmdb_name)) 34 | 35 | # form a pandas dataframe with video_id 36 | video_id, pos, labels = [], [], [] 37 | for idx,f in enumerate(samples_files): 38 | if category_names[idx] in f: 39 | with open(os.path.join(pathPrefix, f), "r") as fobj: 40 | pos_samples = json.load(fobj) 41 | for v_id, pos_list in pos_samples.iteritems(): 42 | pos.extend(pos_list) 43 | video_id.extend(np.repeat(v_id, len(pos_list)).tolist()) 44 | labels.extend(np.repeat(idx, len(pos_list)).tolist()) 45 | samples_df = pd.DataFrame({'video_id': video_id, 46 | 'position': pos, 47 | 'label': labels}) 48 | print "No of samples for all the categories = {} " .format(samples_df.shape[0]) 49 | 50 | # Shuffle the dataframe in-place 51 | samples_df = samples_df.sample(frac=1).reset_index(drop=True) 52 | # write dataframe to disk (csv) 53 | samples_df.to_csv(os.path.join(lmdb_folder, "samples_val.csv"), index=False) 54 | 55 | # Create lmdb 56 | (H, W, C) = (120, 160, 3) 57 | N = samples_df.shape[0] # no of rows (=no of visualizations = 5k) 58 | # twice the size of total number of OF visualizations 59 | map_size = int(N*H*W*C*3) # approx 429 GB 60 | #map_size = int(N*720*1280*C*2) # approx 429 GB 61 | 62 | env = lmdb.open(lmdb_name, map_size=map_size) 63 | 64 | i = 0 # LMDB index variable 65 | # iterate over the rows of the pandas dataframe 66 | end_samples = samples_df.shape[0] 67 | r = (end_samples - i)/200 68 | print "r = %d " %r 69 | ########################################################################### 70 | nCat = 4*len(category_names) # = 200 71 | nCat_samples = (end_samples - i)/nCat # = N = 1000 72 | lmdb_id = 0 73 | 74 | # Parallelizing the lmdb creation process 75 | for i in range(nCat_samples): 76 | 77 | result = Parallel(n_jobs=4)(delayed(get_optical_flow_vid) \ 78 | (os.path.join(srcVideoFolder, 'v_'+samples_df['video_id'][i*nCat+j]+'.mp4'), \ 79 | samples_df['position'][i*nCat+j], \ 80 | DIFF_FRAMES, H, W) \ 81 | for j in range(nCat)) 82 | 83 | with env.begin(write = True) as txn: 84 | for l in range(len(result)): 85 | row_no = (i*nCat)+l 86 | pos = samples_df['position'][row_no] 87 | video_id = samples_df['video_id'][row_no] 88 | lab = samples_df['label'][row_no] 89 | print "idx : "+str(row_no)+" :: 'position' : "+str(pos) 90 | 91 | for img in result[l]: 92 | img = np.rollaxis(img, 2) # C, H, W 93 | datum = caffe.proto.caffe_pb2.Datum() 94 | datum.channels = img.shape[0] 95 | datum.height = img.shape[1] 96 | datum.width = img.shape[2] 97 | datum.data = img.tobytes() 98 | datum.label = lab 99 | str_id = '{:08}'.format(lmdb_id) 100 | # The encode is only essential in Python 3 101 | txn.put(str_id.encode('ascii'), datum.SerializeToString()) 102 | lmdb_id += 1 103 | print "Write No : %d" %(i+1) 104 | ########################################################################### 105 | # for commit_no in range(r): 106 | # with env.begin(write=True) as txn: 107 | # for idx in range(200): # samples_df.iterrows(): 108 | # row_no = (200*commit_no)+idx 109 | # assert i==row_no 110 | # pos = samples_df['position'][row_no] 111 | # video_id = samples_df['video_id'][row_no] 112 | # lab = samples_df['label'][row_no] 113 | # print "idx : "+str(row_no)+" :: 'position' : "+str(pos) 114 | # imgs = [] 115 | # vpath = os.path.join(srcVideoFolder, 'v_'+video_id+'.mp4') 116 | # imgs.extend(get_optical_flow_vid(vpath, pos, DIFF_FRAMES, H, W)) 117 | # # returned frames are HxWxC (120x160x3) in a list 118 | # 119 | # for img in imgs: 120 | # # rollaxis if needed 121 | # img = np.rollaxis(img, 2) # C, H, W 122 | # datum = caffe.proto.caffe_pb2.Datum() 123 | # datum.channels = img.shape[0] 124 | # datum.height = img.shape[1] 125 | # datum.width = img.shape[2] 126 | # datum.data = img.tobytes() 127 | # datum.label = lab 128 | # str_id = '{:08}'.format(i) 129 | # # The encode is only essential in Python 3 130 | # txn.put(str_id.encode('ascii'), datum.SerializeToString()) 131 | # i = i+1 132 | 133 | print "LMDB Created Successfully !!" 134 | return 135 | 136 | # from a srcVideo, get the optical flow data of ith and (i+x) frame 137 | # where x belongs to diff_frames 138 | def get_optical_flow_vid(srcVideo, position, diff_frames, height, width): 139 | res_flow_img = [] 140 | cap = cv2.VideoCapture(srcVideo) 141 | #fgbg = cv2.createBackgroundSubtractorMOG2() #bg subtractor 142 | if not cap.isOpened(): 143 | raise IOError("Capture object cannot be opened for "+srcVideo) 144 | #################################################### 145 | # for resizing the optical flow visualization 146 | resize_flag = True 147 | (h, w) = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)),\ 148 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))) 149 | if h==height and w==width: 150 | resize_flag = False 151 | 152 | #print "No of frames = {}", format(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 153 | 154 | for diff in diff_frames: 155 | #print "For diff = %d" %diff 156 | cap.set(cv2.CAP_PROP_POS_FRAMES, position) 157 | ret, frame = cap.read() 158 | # Sometimes the last few frames of a video are not read, then read the 159 | # last readable frame by moving backwards one frame at a time 160 | while not ret: 161 | #print "Frame not read ! Moving backwards in capture object !" 162 | #raise IOError("Frame not read :: "+srcVideo+" :: Position: "+str(position)) 163 | position -= 1 164 | cap.set(cv2.CAP_PROP_POS_FRAMES, position) 165 | ret, frame = cap.read() 166 | 167 | # curr_frame = frame.copy() 168 | # cap.set(cv2.CAP_PROP_POS_FRAMES, position+diff) 169 | # ret, next_frame = cap.read() 170 | # # If next frame is unavailable, then make cf as nf and read previous frame in cf 171 | # if not ret: 172 | # #print "Cannot read next frame... Reading previous frame instead." 173 | # cap.set(cv2.CAP_PROP_POS_FRAMES, position-diff) 174 | # next_frame = curr_frame.copy() 175 | # ret, curr_frame = cap.read() 176 | # if not ret: 177 | # raise IOError("Cannot read previous frame also.") 178 | # 179 | # curr_frame = cv2.cvtColor(curr_frame, cv2.COLOR_BGR2GRAY) 180 | # next_frame = cv2.cvtColor(next_frame, cv2.COLOR_BGR2GRAY) 181 | # # Compute the optical flow 182 | # flow = cv2.calcOpticalFlowFarneback(curr_frame, next_frame, None, 0.5, 1, 12, 3, 5, 1.2, 0) 183 | # #vis_vectors = draw_flow(curr_frame, flow, 8) 184 | # vis_bgr = draw_flow_bgr(flow, frame) 185 | 186 | if resize_flag: 187 | # scaling image. Mostly it scales down to 120x160 (hxw) INTER_LINEAR default 188 | frame = cv2.resize(frame, (width, height) ) 189 | 190 | res_flow_img.append(frame) 191 | #cv2.imshow("Curr Frame", curr_frame) 192 | #cv2.imshow("Next Frame", next_frame) 193 | #cv2.imshow("Flow Vecs", vis_vectors) 194 | #cv2.imshow("Flow BGR", vis_bgr) 195 | #waitTillEscPressed() 196 | 197 | #res_mean = [] 198 | #res_mean.append(np.average(res_flow_img, axis=0).astype(np.uint8)) 199 | 200 | cap.release() 201 | #cv2.destroyAllWindows() 202 | return res_flow_img 203 | 204 | 205 | # draw the OF field on image, with grids, decrease step for finer grid 206 | def draw_flow(img, flow, step=16): 207 | h, w = img.shape[:2] 208 | y, x = np.mgrid[step/2:h:step, step/2:w:step].reshape(2,-1) 209 | fx, fy = flow[y,x].T 210 | lines = np.vstack([x, y, x+fx, y+fy]).T.reshape(-1, 2, 2) 211 | lines = np.int32(lines + 0.5) 212 | vis = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) 213 | cv2.polylines(vis, lines, 0, (0, 255, 0)) 214 | for (x1, y1), (x2, y2) in lines: 215 | cv2.circle(vis, (x1, y1), 1, (0, 255, 0), -1) 216 | return vis 217 | 218 | def draw_flow_bgr(flow, sample_frame): 219 | hsv = np.zeros_like(sample_frame) 220 | #print "hsv_shape : "+str(hsv.shape) 221 | hsv[...,1] = 255 222 | mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1]) 223 | 224 | hsv[...,0] = ang*180/np.pi/2 225 | hsv[...,2] = cv2.normalize(mag, None, 0, 255, cv2.NORM_MINMAX) 226 | bgr = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR) 227 | return bgr 228 | 229 | 230 | def waitTillEscPressed(): 231 | while(True): 232 | if cv2.waitKey(10)==27: 233 | print("Esc Pressed") 234 | return 235 | 236 | 237 | if __name__=="__main__": 238 | # the dataset folder contains 6 folders boxing, running etc containing videos for each 239 | # It also contains 00sequences.txt where meta info is given 240 | dataset = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos" 241 | 242 | srcVideo = os.path.join(dataset, 'v_2GEZgHcA7zU.mp4') 243 | 244 | # img = get_optical_flow_vid(srcVideo, 2984, [1,2,3], 120, 160) 245 | # for i,im in enumerate(img): 246 | # print "Flow image no : %d" %(i+1) 247 | # cv2.imshow("Frame", im) 248 | # waitTillEscPressed() 249 | lmdb_folder = "/home/hadoop/VisionWorkspace/ActivityNet" 250 | p = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/samples_test_5000" 251 | sampls = os.listdir(p) 252 | construct_datasets(dataset, lmdb_folder, p, sampls, ['Applying sunscreen']) 253 | #cv2.destroyAllWindows() 254 | 255 | ########################################################### 256 | # Training the caffe model 257 | #proc = subprocess.Popen(["/home/hadoop/caffe/build/tools/caffe","train","--solver=optical_flow_lenet_solver.prototxt"],stderr=subprocess.PIPE) 258 | #res = proc.communicate()[1] 259 | 260 | #caffe.set_mode_gpu() 261 | #solver = caffe.get_solver("config.prototxt") 262 | #solver.solve() 263 | 264 | #print res 265 | ########################################################### 266 | # Applying the model 267 | 268 | #net = caffe.Net("demoDeploy.prototxt", "./opt_flow_quick_iter_20000.caffemodel", caffe.TEST) 269 | #print(get_data_for_id_from_lmdb("/home/lnmiit/caffe/examples/optical_flow/val_opt_flow_lmdb/", "00000209")) 270 | #l, f = get_data_for_id_from_lmdb("/home/lnmiit/caffe/examples/optical_flow/val_opt_flow_lmdb/", "00000209") 271 | 272 | ########################################################### 273 | ## Check Background Subtraction on sample videos (Visualize) 274 | # srcVideo = "/home/hadoop/VisionWorkspace/KTH_OpticalFlow/dataset/kth_actions_test/person03_walking_d1_uncomp.avi" 275 | # cap = cv2.VideoCapture(srcVideo) 276 | # fgbg = cv2.createBackgroundSubtractorMOG2() 277 | # while(cap.isOpened()): 278 | # ret, frame = cap.read() 279 | # fgmask = fgbg.apply(frame) 280 | # cv2.imshow('frame',fgmask) 281 | # print np.sum(fgmask) 282 | # waitTillEscPressed() 283 | # #k = cv2.waitKey(30) & 0xff 284 | # #if k == 27: 285 | # # break 286 | # cap.release() 287 | # cv2.destroyAllWindows() 288 | 289 | 290 | 291 | -------------------------------------------------------------------------------- /Evaluation/taxonomy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Thu Jun 29 04:24:44 2017 5 | @author: Arpan 6 | Description: Taxonomy generation 7 | 8 | """ 9 | import json 10 | import numpy as np 11 | import utils 12 | import collections 13 | 14 | 15 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 16 | JSONFILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 17 | SUBSET = 'validation' 18 | 19 | def get_parentnode(taxonomy, nodeName): 20 | """ 21 | Retrieve the parentId of a node given its label 22 | """ 23 | for entry in taxonomy: 24 | if entry['nodeName'] == nodeName: 25 | return entry['parentId'] 26 | print "Node Name {} is invalid !" .format(nodeName) 27 | return -1 28 | 29 | #def trace_path_to_root(taxonomy, label): 30 | 31 | def get_nodeName(taxonomy, nodeId): 32 | """ 33 | Retrieve nodeId given a nodeName 34 | """ 35 | 36 | def get_nodeId(taxonomy, nodeName): 37 | """ 38 | Retrieve nodeID from the given nodeName 39 | """ 40 | for entry in taxonomy: 41 | if nodeName == entry['nodeName']: 42 | return entry['nodeId'] 43 | print "Node Name {} is invalid !" .format(nodeName) 44 | return -1 45 | 46 | def nAIntersectB(database, taxonomy, train_vids_all): 47 | n = 0 48 | 49 | def findDiscripancies(taxonomy): 50 | """ 51 | Found nodeId 269 and 270 have same names 'Health-related self care' 52 | """ 53 | i = 0 54 | for entry in taxonomy: 55 | if entry['parentName'] != None: 56 | print entry['nodeName'] 57 | if entry['nodeName'].lower() == entry['parentName'].lower(): 58 | i += 1 59 | print "No of same nodes = {} " .format(i) 60 | 61 | def get_no_of_annotations(database, label, train_vids_all): 62 | """ 63 | Iterate over the training videos and count the no of egs belonging to class i 64 | """ 65 | count = 0 66 | for vid in train_vids_all: 67 | for ann in database[vid]['annotations']: 68 | if ann['label'] == label: 69 | count += 1 70 | return count 71 | 72 | 73 | 74 | def display_all_paths(taxonomy): 75 | """ 76 | Iterate over all the entries of the taxonomy dict and for each display the 77 | path from that node to the root node. 78 | """ 79 | for i,entry in enumerate(taxonomy): 80 | print "For nodeId : {} :: NodeName : {} " .format(entry['nodeId'], entry['nodeName']) 81 | parentId = entry['parentId'] 82 | parentName = entry['parentName'] 83 | while parentId != None: 84 | print "ParentId : {} :: ParentName : {}" .format(parentId, parentName) 85 | # Search for nodeId == parentId 86 | for temp in taxonomy: 87 | if temp['nodeId'] == parentId: 88 | parentId = temp['parentId'] 89 | parentName = temp['parentName'] 90 | break 91 | if i == 5: 92 | break 93 | 94 | 95 | if __name__ == '__main__': 96 | 97 | # Read the database, version and taxonomy from JSON file 98 | with open("data/activity_net.v1-3.min.json", "r") as fobj: 99 | data = json.load(fobj) 100 | 101 | database = data["database"] 102 | taxonomy = data["taxonomy"] 103 | version = data["version"] 104 | 105 | non_existing_videos = utils.crosscheck_videos(VIDEOPATH, JSONFILE) 106 | 107 | print "No of non-existing videos: %d" % len(non_existing_videos) 108 | 109 | train_vids_all = [] 110 | [train_vids_all.append(x) for x in database if database[x]['subset']==SUBSET] 111 | 112 | # Find list of available training videos 113 | train_existing_vids = list(set(train_vids_all) - set(non_existing_videos)) 114 | 115 | ########################################################################### 116 | # Get categories information from the database (Train+Validation sets) 117 | category = [] 118 | for x in database: 119 | cc = [] 120 | for l in database[x]["annotations"]: 121 | cc.append(l["label"]) 122 | category.extend(list(set(cc))) 123 | category_count = collections.Counter(category) 124 | 125 | category_names = sorted(category_count.keys()) 126 | print "Total No of classes: %d" % len(category_names) 127 | 128 | #print category_names 129 | ########################################################################### 130 | 131 | display_all_paths(taxonomy) 132 | #findDiscripancies(taxonomy) 133 | 134 | for cat in category_names: 135 | ncat = get_no_of_annotations(database, cat, train_vids_all) 136 | print "category {} :: |vids| {}" .format(cat, ncat) 137 | -------------------------------------------------------------------------------- /Evaluation/testing.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jun 9 17:23:43 2017 5 | 6 | @author: Arpan 7 | 8 | Description: ActivityNet -- Testing and submission file generation 9 | """ 10 | 11 | import collections 12 | import commands 13 | import json 14 | import glob 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | import os 18 | from utils import get_video_number_of_frames 19 | from skimage.transform import resize 20 | import cv2 21 | import random 22 | 23 | # Server Params 24 | # VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos' 25 | VIDEO_PATH = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos" 26 | SUBSET = 'validation' 27 | 28 | 29 | ########################################################################### 30 | 31 | def get_sample_frame_from_video(videoid, duration, start_time, end_time, \ 32 | video_path=VIDEO_PATH): 33 | filename = glob.glob(os.path.join(video_path, "v_%s*" % videoid))[0] 34 | nr_frames = get_video_number_of_frames(filename) 35 | fps = (nr_frames*1.0)/duration 36 | start_frame, end_frame = int(start_time*fps), int(end_time*fps) 37 | frame_idx = random.choice(range(start_frame, end_frame)) 38 | cap = cv2.VideoCapture(filename) 39 | keepdoing, cnt = True, 1 40 | while keepdoing: 41 | ret, img = cap.read() 42 | if cnt==frame_idx: 43 | break 44 | assert ret==True, "Ended video and frame not selected." 45 | cnt+=1 46 | return cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 47 | 48 | ########################################################################### 49 | 50 | def get_random_video_from_activity(database, activity, subset="validation"): 51 | videos = [] 52 | for x in database: 53 | if database[x]["subset"] != subset: continue 54 | xx = random.choice(database[x]["annotations"]) 55 | if xx["label"]==activity: 56 | yy = {"videoid": x, "duration": database[x]["duration"], 57 | "start_time": xx["segment"][0], "end_time": xx["segment"][1]} 58 | videos.append(yy) 59 | return random.choice(videos) 60 | 61 | ########################################################################### 62 | 63 | def get_video_prediction(vid, category_names, model): 64 | # Read the video frames and predict categories with scores 65 | predictions_lst = [] 66 | no_of_preds = np.random.randint(1,4) 67 | for i in range(no_of_preds): 68 | score = float(np.random.rand(1)) 69 | label_idx = np.random.randint(200) 70 | label = category_names[label_idx] 71 | pred_dict = {'score': score, 'label':label} 72 | predictions_lst.append(pred_dict) 73 | return predictions_lst 74 | 75 | 76 | if __name__=='__main__': 77 | with open("data/activity_net.v1-3.min.json", "r") as fobj: 78 | data = json.load(fobj) 79 | 80 | database = data["database"] 81 | taxonomy = data["taxonomy"] 82 | version = data["version"] 83 | 84 | ########################################################################### 85 | # Release Summary 86 | all_node_ids = [x["nodeId"] for x in taxonomy] 87 | print len(all_node_ids) 88 | leaf_node_ids = [] 89 | for x in all_node_ids: 90 | is_parent = False 91 | # iterate through the parentIds and if the nodeID is a parentId then 92 | # it is not a leaf node else it is a leaf node 93 | for query_node in taxonomy: 94 | if query_node["parentId"]==x: 95 | is_parent = True 96 | break 97 | if not is_parent: leaf_node_ids.append(x) 98 | 99 | leaf_nodes = [x for x in taxonomy if x["nodeId"] in leaf_node_ids] 100 | 101 | vsize = commands.getoutput("du %s -lhs" % VIDEO_PATH).split("/")[0] 102 | 103 | total_duration = sum([database[x]['duration'] for x in database])/3600.0 104 | 105 | print "ActivityNet %s" % version 106 | print "Total number of videos: %d" % len(database) 107 | print "Total number of nodes in taxonomy: %d" % len(taxonomy) 108 | print "Total number of leaf nodes: %d" % len(leaf_nodes) 109 | print "Total size of downloaded videos: %s" % vsize 110 | print "Total hours of video: %0.1f" % total_duration 111 | 112 | ########################################################################### 113 | # Get categories information from the database (Train+Validation sets) 114 | category = [] 115 | for x in database: 116 | cc = [] 117 | for l in database[x]["annotations"]: 118 | cc.append(l["label"]) 119 | category.extend(list(set(cc))) 120 | category_count = collections.Counter(category) 121 | 122 | category_names = sorted(category_count.keys()) 123 | print "Total No of classes: %d" % len(category_names) 124 | #print category_names 125 | 126 | ########################################################################### 127 | # Iterate over the validation/test set video files and obtain 128 | # the prediction for each file 129 | subset_video_ids = [] 130 | ext_data_dict = {'used': False, 'details': \ 131 | 'Describe the external data over here. If necessary for each prediction'} 132 | 133 | out_dict = {'version':version} 134 | 135 | [subset_video_ids.append(x) for x in database if database[x]['subset']==SUBSET] 136 | results_dict = {} 137 | for v_id in subset_video_ids: 138 | results_dict[v_id] = get_video_prediction(v_id, category_names, "") 139 | 140 | out_dict['results'] = results_dict 141 | out_dict['external_data'] = ext_data_dict 142 | 143 | json_filename = 'submission_'+SUBSET+'.json' 144 | with open(json_filename, 'w') as fp: 145 | json.dump(out_dict, fp) 146 | 147 | 148 | # write the out_dict to a JSON file 149 | ########################################################################### 150 | 151 | # plt.figure(num=None, figsize=(18, 8), dpi=100) 152 | # xx = np.array(category_count.keys()) 153 | # yy = np.array([category_count[x] for x in category_count]) 154 | # xx_idx = yy.argsort()[::-1] 155 | # plt.bar(range(len(xx)), yy[xx_idx], color=(240.0/255.0,28/255.0,1/255.0)) 156 | # plt.ylabel("Number of videos per activity ") 157 | # plt.xticks(range(len(xx)), xx[xx_idx], rotation="vertical", size="small") 158 | # plt.title("ActivityNet VERSION 1.2 - Untrimmed Video Classification") 159 | # plt.show() 160 | 161 | ########################################################################### 162 | 163 | # read a model 164 | -------------------------------------------------------------------------------- /Evaluation/training.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jun 9 17:21:26 2017 5 | 6 | @author: Arpan 7 | 8 | Description: ActivityNet -- Training 9 | """ 10 | import json 11 | import os 12 | import utils 13 | import collections 14 | import training_model_svm as tm1 15 | 16 | 17 | # Server Params 18 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos' 19 | #JSONFILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 20 | #LMDB_FOLDER = "/home/arpan/DATA_Drive/ActivityNet" 21 | 22 | # Local Params 23 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 24 | JSONFILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 25 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/new2_lmdb" 26 | SUBSET = 'training' 27 | ############################################################################### 28 | 29 | # Train on HOG descriptors 30 | # Iterate over the catogories and for each category train an SVM model 31 | def sample_activity_frames(database, meta_info, category_names, N): 32 | """ Function samples N frame positions from the annotated video segments 33 | of each activity category. For 200 categories, 200 files will be created. 34 | Each file will have a 35 | Input:dataframe cell value using column name and row no 36 | database: dictionary from activity_net.v1-3.min.json 37 | meta_info: dictionary of meta_information for training videos 38 | {'3aQnQEL3USQ':{u'total_frames': 6238, 39 | u'dimensions': [360, 480], u'fps': 29.5} ....} 40 | category_names: list of category_names, (sorted) 41 | Output: Write json files of the form 42 | {"vMYPNyBR3d0": [327, 327, 337, 345, 346, 359],...} 43 | Each file has N positions of activities, key is video-id and positions sampled 44 | from that video 45 | """ 46 | print "Called train_m1 !!!" 47 | video_ids = meta_info.keys() 48 | #N = 5000 # No of samples of each class to be picked from activity seq 49 | dest_folder = "samples_"+str(N) 50 | if not os.path.exists(dest_folder): 51 | os.makedirs(dest_folder) 52 | # get list of training videos which belong to category 53 | for cat in category_names: 54 | print "Iterate for category %s" %cat 55 | video_ids_for_cat = utils.get_videos_for_category(database, \ 56 | video_ids,\ 57 | cat) 58 | # Retrieve samples from positive example videos. 59 | # Get dict of videos_ids and segments of videos which have action 60 | # corresponding to a category. 61 | #print "Getting video segment information for %s videos..." %cat 62 | # train_segments dict: 63 | train_segments = tm1.get_training_segments(database, video_ids_for_cat, cat) 64 | #print "Getting random frames from +ve example videos..." 65 | #print train_segments 66 | tr_samples = tm1.get_sample_frames(train_segments, meta_info, N) 67 | with open(os.path.join(dest_folder, cat+".json"), "w") as fp: 68 | json.dump(tr_samples, fp) 69 | 70 | # select 640x480 resolution and resize accordingly 71 | #break 72 | # retrieve samples from negative example videos 73 | 74 | 75 | ############################################################################### 76 | 77 | def create_training_lmdb(srcSamplesMetaFiles, category_names): 78 | """ Loop over all the existing training videos 79 | category_names are sorted list of categories, where its index 80 | represents the category no. 81 | Path for json files of category videos and sample frame info: samples_5000 82 | This function assumes that you have already called train_m1 and the 83 | json files for each category are present in the path specified 84 | Steps: 85 | 1. Extract the optical flow visualizations from the training set for each 86 | category. 87 | 2. Convert into lmdb database 88 | 3. Train a CNN on the lmdb database 89 | 4. Save the trained model to disk 90 | Input: meta_info: same as in the function above 91 | """ 92 | 93 | samples_files = [s+".json" for s in category_names] 94 | 95 | assert len(samples_files)==len(category_names) 96 | # check order of categories names matches with samples_files 97 | for idx,f in enumerate(samples_files): 98 | if not (category_names[idx] in f): 99 | print f 100 | print samples_files 101 | raise IOError("Order of categories does not match order of sample files.") 102 | 103 | import optical_flow as of 104 | of.construct_datasets(VIDEOPATH, LMDB_FOLDER, srcSamplesMetaFiles, \ 105 | samples_files, category_names) 106 | 107 | return 108 | 109 | 110 | ############################################################################### 111 | 112 | def train_m3(database, train_video_ids, category_names): 113 | # Loop over all the existing training videos 114 | # category_names are sorted list of categories, where its index 115 | # represents the category no. 116 | 117 | for idx in train_video_ids: 118 | # for each video call a method to train an SVM 119 | tm1.train_svm(os.path.join(VIDEOPATH, "v_"+idx+".mp4"), \ 120 | database[idx]['annotations'], 10, category_names) 121 | # break used to execute for only one video 122 | break 123 | 124 | return 125 | 126 | ############################################################################### 127 | 128 | if __name__=='__main__': 129 | # Read the database, version and taxonomy from JSON file 130 | with open("data/activity_net.v1-3.min.json", "r") as fobj: 131 | data = json.load(fobj) 132 | 133 | database = data["database"] 134 | taxonomy = data["taxonomy"] 135 | version = data["version"] 136 | 137 | non_existing_videos = utils.crosscheck_videos(VIDEOPATH, JSONFILE) 138 | 139 | print "No of non-existing videos: %d" % len(non_existing_videos) 140 | 141 | train_vids_all = [] 142 | [train_vids_all.append(x) for x in database if database[x]['subset']==SUBSET] 143 | 144 | # Find list of available training videos 145 | train_existing_vids = list(set(train_vids_all) - set(non_existing_videos)) 146 | 147 | ########################################################################### 148 | # Get categories information from the database (Train+Validation sets) 149 | category = [] 150 | for x in database: 151 | cc = [] 152 | for l in database[x]["annotations"]: 153 | cc.append(l["label"]) 154 | category.extend(list(set(cc))) 155 | category_count = collections.Counter(category) 156 | 157 | category_names = sorted(category_count.keys()) 158 | print "Total No of classes: %d" % len(category_names) 159 | 160 | #print category_names 161 | 162 | ########################################################################### 163 | # We use the meta-information such as FPS, totalFrames and dimensions 164 | # in order to obtain a lower and upper bound for the frame sampling 165 | # To write meta-information to a json file. Uncomment following 3 lines 166 | # to generate the json file. 167 | meta_info = tm1.get_meta_info(VIDEOPATH, train_existing_vids) 168 | with open("training_data_meta_info.json", "w") as fp: 169 | json.dump(meta_info, fp) 170 | 171 | # Read the training videos meta_information from file. 172 | #with open("val_data_meta_info.json", "r") as fobj: 173 | # meta_info = json.load(fobj) 174 | 175 | ########################################################################### 176 | 177 | # Train models 178 | 179 | n = 4000 # no of samples to extract for each category of training videos 180 | #sample_activity_frames(database, meta_info, category_names, N=n) 181 | 182 | # Method 1: Train a series of SVMs on the training set videos 183 | 184 | # Uncomment below 3 lines for viewing frames selected 185 | # with open("samples_"+str(n)+"/Applying sunscreen.json") as fp: 186 | # samples_d = json.load(fp) 187 | # tm1.display_sample_frames(samples_d, VIDEOPATH) 188 | 189 | # Method 2: Train a CNN from scratch on the consecutive frame OF 190 | # visualization images. 191 | #create_training_lmdb("samples_"+str(n), category_names) 192 | print "LMDB Created !!" 193 | 194 | # Method 3: Use existing or third-party pre-trained models 195 | # Features: C3D , MBH (Improved Dense Traj) , ImageNetShuffle 196 | 197 | # Extract 198 | 199 | # FineTune models 200 | 201 | # Save the models to files -------------------------------------------------------------------------------- /Evaluation/training_model_hog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jun 16 22:56:22 2017 5 | 6 | @author: Arpan 7 | 8 | Description: Use frame HOG features 9 | """ 10 | 11 | import json 12 | import os 13 | import utils 14 | import numpy as np 15 | import h5py 16 | import pandas as pd 17 | import collections 18 | import cPickle 19 | from sklearn import svm 20 | from sklearn.ensemble import RandomForestClassifier 21 | from joblib import Parallel, delayed 22 | import lmdb 23 | import caffe 24 | import cv2 25 | 26 | 27 | # Temporal Proposals : Pretrained 28 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos' 29 | #ANNOTATION_FILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 30 | #PROPOSALS_FILENAME = '/home/arpan/DATA_Drive/ActivityNet/extra_features/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 31 | #SHUFFLE = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 32 | #MBH = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_features.h5" 33 | #MBH_IDS = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_quids.txt" 34 | #C3D = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 35 | #C3D_PCA = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/PCA_activitynet_v1-3.hdf5" 36 | #SHUFFLE_IDS = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 37 | #SUBSET = 'validation' 38 | 39 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 40 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 41 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 42 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 43 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5" 44 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 45 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5" 46 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 47 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb" 48 | HOGFILE = "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/hog.xml" 49 | SUBSET = 'validation' 50 | 51 | def construct_dataset(meta_info, samples_csv, category_names, prefix): 52 | 53 | lmdb_name = os.path.join(LMDB_FOLDER, prefix+"_hog_lmdb") 54 | if not os.path.exists(os.path.dirname(lmdb_name)): 55 | os.makedirs(os.path.dirname(lmdb_name)) 56 | 57 | samples_df = pd.read_csv(samples_csv) 58 | print "Creating HOG features..." 59 | 60 | # Create lmdb 61 | (H, W, C) = (1, 1, 2000) 62 | N = samples_df.shape[0] # no of rows (=no of visualizations = 5k) 63 | # twice the size of total number of OF visualizations 64 | map_size = int(N*H*W*C*3*15) # approx 429 GB 65 | #map_size = int(N*720*1280*C*2) # approx 429 GB 66 | 67 | env = lmdb.open(lmdb_name, map_size=map_size) 68 | i = 0 # LMDB index variable 69 | # iterate over the rows of the pandas dataframe 70 | end_samples = samples_df.shape[0] 71 | r = (end_samples - i)/200 72 | print "No of samples per class = %d " %r 73 | ########################################################################### 74 | nCat = 4*len(category_names) # = 800 per batch 75 | nCat_samples = (end_samples - i)/nCat # = N = 1000 76 | lmdb_id = 0 77 | 78 | # HOG returns a 9576 sized vector 79 | # Parallelizing the lmdb creation process 80 | for i in range(nCat_samples): 81 | 82 | result = Parallel(n_jobs=4)(delayed(get_hog_feature) \ 83 | (samples_df['video_id'][i*nCat+j], \ 84 | samples_df['position'][i*nCat+j]) 85 | for j in range(nCat)) 86 | 87 | with env.begin(write = True) as txn: 88 | for l,vec in enumerate(result): 89 | row_no = (i*nCat)+l 90 | pos = samples_df['position'][row_no] 91 | video_id = samples_df['video_id'][row_no] 92 | lab = samples_df['label'][row_no] 93 | print "idx : "+str(row_no)+" :: 'position' : "+str(pos) 94 | 95 | #img = np.rollaxis(img, 2) # C, H, W 96 | datum = caffe.proto.caffe_pb2.Datum() 97 | # since it is a vector, it only has 1st dimension 98 | #print "vec shape : {}" .format(vec.shape) 99 | datum.channels = vec.shape[0] 100 | datum.height = 1 101 | datum.width = 1 102 | #datum.data = img.tobytes() 103 | datum.float_data.extend(vec.astype(float).flat) 104 | datum.label = lab 105 | str_id = '{:08}'.format(lmdb_id) 106 | # The encode is only essential in Python 3 107 | txn.put(str_id.encode('ascii'), datum.SerializeToString()) 108 | lmdb_id += 1 109 | print "Write No : %d" %(i+1) 110 | print "LMDB construction successful !" 111 | return 112 | 113 | def get_hog_feature(vid, pos): 114 | ''' 115 | Read the frame at 'pos' of video and find the hog feature for the frame 116 | ''' 117 | height, width = 120, 160 118 | cap = cv2.VideoCapture(os.path.join(VIDEOPATH, 'v_'+vid+'.mp4')) 119 | if not cap.isOpened(): 120 | raise IOError('Capture object not opened !') 121 | hog = cv2.HOGDescriptor("hog.xml") 122 | cap.set(cv2.CAP_PROP_POS_FRAMES, pos) 123 | ret, frame = cap.read() 124 | while not ret: 125 | print "Frame not read. Move backwards." 126 | pos -= 1 127 | cap.set(cv2.CAP_PROP_POS_FRAMES, pos) 128 | ret, frame = cap.read() 129 | 130 | frame = cv2.resize(frame, (width, height)) 131 | frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 132 | #cv2.imshow("frame", frame) 133 | #waitTillEscPressed() 134 | hist = hog.compute(frame) 135 | cols = hist.shape[0] 136 | #print "HOG Shape : {}" .format(hist.shape) 137 | #print "Reshaped : {}" .format(hist.reshape((cols)).shape) 138 | hist = hist.reshape((cols)) 139 | cap.release() 140 | #cv2.destroyAllWindows() 141 | return hist 142 | 143 | def waitTillEscPressed(): 144 | while(True): 145 | # For moving forward 146 | if cv2.waitKey(10)==27: 147 | print("Esc Pressed. Move Forward without labeling.") 148 | return 1 149 | 150 | 151 | def predict_on_glFeat(X_val, nFeatures, database, category_names, val_existing_vids, \ 152 | destPath, nEstimators): 153 | 154 | # Create a dataframe with rows as egs and cols as class 1 prob values 155 | threshold = 0.5 156 | X = X_val.loc[:,range(nFeatures)] 157 | y_prob = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \ 158 | columns=category_names, index=X_val.index) 159 | for cat in category_names: 160 | # load the model 161 | f_name = os.path.join(destPath+"_"+str(nEstimators),\ 162 | destPath+"_"+str(nEstimators)+"_"+cat+".pkl") 163 | with open(f_name, "rb") as fid: 164 | rf_model = cPickle.load(fid) 165 | 166 | # Assign positive class probabilities 167 | y_prob[cat] = rf_model.predict_proba(X)[:,1] 168 | print "No. of examples above threshold for class {} : {}" \ 169 | .format(cat, sum(y_prob[cat]>threshold)) 170 | 171 | # Top 5 predictions 172 | pred = {} 173 | #y_prob.apply(np.argmax, axis=1) 174 | for vid in list(X.index): 175 | #print "ID : %s " %vid 176 | # select top 3 prediction values and their labels and save in dict 177 | top_n = y_prob.loc[vid,:].sort_values(ascending=False)[:3] 178 | labels = top_n.index.tolist() 179 | scores = top_n.values.tolist() 180 | pred[vid] = [] 181 | for idx,score in enumerate(scores): 182 | pred[vid].append({'score': score, 'label':labels[idx]}) 183 | 184 | return pred, y_prob 185 | 186 | 187 | if __name__=='__main__': 188 | 189 | # Read the database, version and taxonomy from JSON file 190 | with open(ANNOTATION_FILE, "r") as fobj: 191 | data = json.load(fobj) 192 | 193 | database = data["database"] 194 | taxonomy = data["taxonomy"] 195 | version = data["version"] 196 | 197 | non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE) 198 | 199 | print "No of non-existing videos: %d" % len(non_existing_videos) 200 | 201 | train_vids_all = [] 202 | [train_vids_all.append(x) for x in database if database[x]['subset']=='training'] 203 | # Find list of available training videos 204 | train_existing_vids = list(set(train_vids_all) - set(non_existing_videos)) 205 | 206 | val_vids_all = [] 207 | [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET] 208 | # Find list of available training videos 209 | val_existing_vids = list(set(val_vids_all) - set(non_existing_videos)) 210 | 211 | ########################################################################### 212 | # Get categories information from the database (Train+Validation sets) 213 | category = [] 214 | for x in database: 215 | cc = [] 216 | for l in database[x]["annotations"]: 217 | cc.append(l["label"]) 218 | category.extend(list(set(cc))) 219 | category_count = collections.Counter(category) 220 | 221 | category_names = sorted(category_count.keys()) 222 | print "Total No of classes: %d" % len(category_names) 223 | 224 | #print category_names 225 | ########################################################################### 226 | # MBH and ImageNetShuffle Features in training_model_m2.py 227 | ########################################################################### 228 | # Create HOG feature dataset 229 | 230 | # Read the meta_info and sample_positions files 231 | samples_csv = "tr_samples_4k.csv" 232 | samples_val_csv = "val_samples_1k.csv" 233 | with open("training_data_meta_info.json", "r") as fobj: 234 | meta_info = json.load(fobj) 235 | construct_dataset(meta_info, samples_csv, category_names, "test_train") 236 | 237 | with open("val_data_meta_info.json", "r") as fobj: 238 | val_meta_info = json.load(fobj) 239 | construct_dataset(val_meta_info, samples_val_csv, category_names, "test_val") 240 | 241 | # train a model without convolution layers, only fc layers should be there 242 | 243 | 244 | ########################################################################### 245 | # Consider Taxonomy of the classes 246 | # Temporal Proposals 247 | 248 | ########################################################################### 249 | 250 | # out_dict = {'version':version} 251 | # subset_video_ids = [] 252 | # ext_data_dict = {'used': True, 'details': \ 253 | # 'C3D features.'} 254 | # 255 | # out_dict['results'] = pred 256 | # out_dict['external_data'] = ext_data_dict 257 | # 258 | # json_filename = 'submission_t3_'+SUBSET+'.json' 259 | # with open(json_filename, 'w') as fp: 260 | # json.dump(out_dict, fp) 261 | ########################################################################### 262 | -------------------------------------------------------------------------------- /Evaluation/training_model_m3.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 14 20:53:14 2017 5 | 6 | @author: Arpan 7 | 8 | Description: Training Model : Method 2 9 | Using CNNs for training. Pretrained models 10 | """ 11 | import json 12 | import os 13 | import utils 14 | import numpy as np 15 | import h5py 16 | import pandas as pd 17 | import collections 18 | import cPickle 19 | from sklearn import svm 20 | from sklearn.ensemble import RandomForestClassifier 21 | from joblib import Parallel, delayed 22 | 23 | 24 | # Temporal Proposals : Pretrained 25 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 26 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 27 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 28 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 29 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5" 30 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 31 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5" 32 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 33 | SUBSET = 'validation' 34 | 35 | 36 | def train_model_linSVM(X, y): 37 | # select the parameters, generate probabilities etc 38 | model = svm.LinearSVC() 39 | return model.fit(X, y) 40 | 41 | 42 | def train_on_shuffle(database, category_names, train_vids_all, destPath="shuffle_RF"): 43 | 44 | # ImageNet shuffle features: 45 | # 19994 x 1024 features 46 | fobj = h5py.File(SHUFFLE, 'r') 47 | 48 | # shape is 19994 x 1024 49 | print "Shape : {}" .format(fobj['features'].shape) 50 | 51 | # As the videos are sorted, the index created will be the video_no 52 | # corresponding to the video row in h5 database. 53 | shuffle_ids = pd.read_csv(SHUFFLE_IDS, header='infer', \ 54 | names = ['id'], usecols = [2]) 55 | 56 | sh_id = [s.split('_', 1)[-1] for s in shuffle_ids['id']] 57 | sh_id = [s.rsplit('.',1)[0] for s in sh_id] 58 | # Copy all the values to numpy array var 59 | X_all = fobj['features'][:] 60 | 61 | # join features with video_ids 62 | X_all = pd.DataFrame(X_all, index=sh_id) 63 | 64 | # subset rows for training and validation 65 | X_train = X_all.loc[train_vids_all] 66 | #X_val = X_all[X_all['id'].isin(val_vids_appended)] 67 | del X_all 68 | fobj.close() 69 | print "X_train = {} " .format(X_train.shape) 70 | y_train = pd.DataFrame(np.zeros((len(X_train), len(category_names))),\ 71 | columns=category_names, index=X_train.index) 72 | 73 | # Join the columns for each category 74 | X_train = pd.concat([X_train, y_train], axis = 1) 75 | 76 | #print X_train.head() 77 | # Iterate over the videos of X_train and X_val and set labels 78 | for vid in train_vids_all: 79 | for annotation in database[vid]['annotations']: 80 | X_train.at[vid, annotation['label']] = 1 81 | 82 | print "Labels set !" 83 | #print X_train.head() 84 | # Iterate over the categories and for each category, prepare the dataset 85 | for cat in category_names: 86 | # for a cat, find the video IDs which have labels 87 | pos_samples = X_train[X_train[cat]==1] 88 | pos_samples = pos_samples.loc[:, range(1024)+[cat]] 89 | # sample negative rows equal to the no of pos examples 90 | neg_samples = X_train[X_train[cat]==0].sample(n=len(pos_samples), \ 91 | random_state = 321) 92 | neg_samples = neg_samples.loc[:, range(1024)+[cat]] 93 | 94 | # join pos and negative samples and shuffle 95 | X = pd.concat([pos_samples, neg_samples]) 96 | 97 | X = X.sample(frac=1, random_state=231) # shuffle 98 | y = np.array(X[cat]) 99 | X = X.loc[:,range(1024)] 100 | 101 | rf_model = train_model_rf(X, y, estimators=20, seed=123) 102 | 103 | if not os.path.exists(destPath): 104 | os.makedirs(destPath) 105 | f_name = os.path.join(destPath, destPath+"_"+cat+".pkl") 106 | with open(f_name, "wb") as fid: 107 | cPickle.dump(rf_model, fid) 108 | print "Model saved for category : %s " %cat 109 | 110 | print "Models Trained and saved to files." 111 | # this returns a list of 10 SVM 112 | #result = Parallel(n_jobs=3)(delayed(train_model_rf)(X, y, seed) for seed in range(10)) 113 | 114 | 115 | def train_model_rf(X, y, estimators, seed): 116 | # select the parameters, generate probabilities etc 117 | clf = RandomForestClassifier(n_estimators = estimators, random_state=seed) 118 | clf = clf.fit(X, y) 119 | return clf 120 | 121 | def predict_on_shuffle(database, category_names, val_existing_vids, destPath="RF"): 122 | # ImageNet shuffle features: 123 | # 19994 x 1024 features 124 | fobj = h5py.File(SHUFFLE, 'r') 125 | # shape is 19994 x 1024 126 | print "Shape : {}" .format(fobj['features'].shape) 127 | 128 | shuffle_ids = pd.read_csv(SHUFFLE_IDS, header='infer', \ 129 | names = ['id'], usecols = [2]) 130 | 131 | sh_id = [s.split('_', 1)[-1] for s in shuffle_ids['id']] 132 | sh_id = [s.rsplit('.',1)[0] for s in sh_id] 133 | # Copy all the values to numpy array var 134 | X_all = fobj['features'][:] 135 | 136 | # join features with video_ids 137 | X_all = pd.DataFrame(X_all, index=sh_id) 138 | 139 | # subset rows for validation set 140 | X_val = X_all.loc[val_existing_vids] 141 | #X_val = X_all[X_all['id'].isin(val_vids_appended)] 142 | del X_all 143 | fobj.close() 144 | print "X_val = {} " .format( X_val.shape) 145 | y_val = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \ 146 | columns=category_names, index=X_val.index) 147 | 148 | # Join the columns for each category 149 | X_val = pd.concat([X_val, y_val], axis = 1) 150 | 151 | for vid in val_existing_vids: 152 | for annotation in database[vid]['annotations']: 153 | X_val.at[vid, annotation['label']] = 1 154 | 155 | print "Labels set !" 156 | # Create a dataframe with rows are classes and cols are 0 class and 1 class 157 | # Probability values 158 | #prob = pd.DataFrame 159 | X = X_val.loc[:,range(1024)] 160 | y_prob = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \ 161 | columns=category_names, index=X_val.index) 162 | for cat in category_names: 163 | # load the model 164 | f_name = os.path.join(destPath, destPath+"_"+cat+".pkl") 165 | with open(f_name, "rb") as fid: 166 | rf_model = cPickle.load(fid) 167 | 168 | # Assign positive class probabilities 169 | y_prob[cat] = rf_model.predict_proba(X)[:,1] 170 | 171 | print "Probabilities for class {} : {}" .format(cat,y_prob[cat]) 172 | 173 | # Top 5 predictions 174 | threshold = 0.5 175 | pred = {} 176 | #y_prob.apply(np.argmax, axis=1) 177 | for vid in list(X.index): 178 | #print "ID : %s " %vid 179 | # select top 5 prediction values and their labels and save in dict 180 | top_n = y_prob.loc[vid,:].sort_values(ascending=False)[:3] 181 | labels = top_n.index.tolist() 182 | scores = top_n.values.tolist() 183 | pred[vid] = [] 184 | for idx,score in enumerate(scores): 185 | pred[vid].append({'score': score, 'label':labels[idx]}) 186 | 187 | return pred 188 | 189 | 190 | # for testing the functions 191 | if __name__=='__main__': 192 | 193 | # Read the database, version and taxonomy from JSON file 194 | with open(ANNOTATION_FILE, "r") as fobj: 195 | data = json.load(fobj) 196 | 197 | database = data["database"] 198 | taxonomy = data["taxonomy"] 199 | version = data["version"] 200 | 201 | non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE) 202 | 203 | print "No of non-existing videos: %d" % len(non_existing_videos) 204 | 205 | train_vids_all = [] 206 | [train_vids_all.append(x) for x in database if database[x]['subset']=='training'] 207 | # Find list of available training videos 208 | train_existing_vids = list(set(train_vids_all) - set(non_existing_videos)) 209 | 210 | val_vids_all = [] 211 | [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET] 212 | # Find list of available training videos 213 | val_existing_vids = list(set(val_vids_all) - set(non_existing_videos)) 214 | 215 | ########################################################################### 216 | # Get categories information from the database (Train+Validation sets) 217 | category = [] 218 | for x in database: 219 | cc = [] 220 | for l in database[x]["annotations"]: 221 | cc.append(l["label"]) 222 | category.extend(list(set(cc))) 223 | category_count = collections.Counter(category) 224 | 225 | category_names = sorted(category_count.keys()) 226 | print "Total No of classes: %d" % len(category_names) 227 | 228 | #print category_names 229 | ########################################################################### 230 | 231 | 232 | 233 | # Temporal Proposals 234 | # Optimized for high recall 235 | # 19994 x Mi proposals (For each video a number of proposals 236 | # each with a score in decreasing order) 237 | 238 | # MBH 239 | # 19994 x 65536 features 240 | # 241 | 242 | # ImageNet Shuffle Features 243 | #train_on_shuffle(database, category_names, train_vids_all, "RF") 244 | 245 | pred = predict_on_shuffle(database, category_names, val_existing_vids, "RF") 246 | out_dict = {'version':version} 247 | subset_video_ids = [] 248 | ext_data_dict = {'used': False, 'details': \ 249 | 'Describe the external data over here. If necessary for each prediction'} 250 | 251 | out_dict['results'] = pred 252 | out_dict['external_data'] = ext_data_dict 253 | 254 | json_filename = 'submission_t3_'+SUBSET+'.json' 255 | with open(json_filename, 'w') as fp: 256 | json.dump(out_dict, fp) 257 | # Step 1: Form the datasets 258 | # To train 200 SVMs, each for an activity class. 259 | # Use One Vs All SVM ( for not used LinearSVC, which is a multi-class classifier ) 260 | 261 | 262 | 263 | 264 | # training videos_info is in meta_info 265 | # check whether a particular video is -------------------------------------------------------------------------------- /Evaluation/training_model_m4.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Fri Jun 16 22:56:22 2017 5 | 6 | @author: Arpan 7 | 8 | Description: Use C3D features 9 | """ 10 | 11 | import json 12 | import os 13 | import utils 14 | import numpy as np 15 | import h5py 16 | import pandas as pd 17 | import collections 18 | import cPickle 19 | from sklearn import svm 20 | from sklearn.ensemble import RandomForestClassifier 21 | from joblib import Parallel, delayed 22 | import lmdb 23 | import caffe 24 | 25 | 26 | # Temporal Proposals : Pretrained 27 | #VIDEOPATH = '/home/arpan/DATA_Drive/ActivityNet/videos' 28 | #ANNOTATION_FILE = '/home/arpan/DATA_Drive/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 29 | #PROPOSALS_FILENAME = '/home/arpan/DATA_Drive/ActivityNet/extra_features/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 30 | #SHUFFLE = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 31 | #MBH = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_features.h5" 32 | #MBH_IDS = "/home/arpan/VisionWorkspace/ActivityNet/MBH Features/MBH_Videos_quids.txt" 33 | #C3D = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 34 | #C3D_PCA = "/home/arpan/DATA_Drive/ActivityNet/extra_features/C3D Features/PCA_activitynet_v1-3.hdf5" 35 | #SHUFFLE_IDS = '/home/arpan/DATA_Drive/ActivityNet/extra_features/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 36 | #SUBSET = 'validation' 37 | 38 | VIDEOPATH = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Crawler/videos' 39 | ANNOTATION_FILE = '/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/Evaluation/data/activity_net.v1-3.min.json' 40 | PROPOSALS_FILENAME = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/Temporal Activity Proposals/activitynet_v1-3_proposals.hdf5' 41 | SHUFFLE = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_features.h5' 42 | MBH = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/MBH Features/MBH_Videos_features.h5" 43 | C3D = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/sub_activitynet_v1-3.c3d.hdf5" 44 | C3D_PCA = "/home/hadoop/VisionWorkspace/ActivityNet/Downloads/C3D Features/PCA_activitynet_v1-3.hdf5" 45 | SHUFFLE_IDS = '/home/hadoop/VisionWorkspace/ActivityNet/Downloads/ImageNet Shuffle Features/ImageNetShuffle2016_quids.txt' 46 | LMDB_FOLDER = "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb" 47 | SUBSET = 'validation' 48 | 49 | def construct_dataset(meta_info, samples_csv, category_names, prefix): 50 | 51 | lmdb_name = os.path.join(LMDB_FOLDER, prefix+"_c3d_lmdb") 52 | if not os.path.exists(os.path.dirname(lmdb_name)): 53 | os.makedirs(os.path.dirname(lmdb_name)) 54 | 55 | samples_df = pd.read_csv(samples_csv) 56 | print "Loading C3D features..." 57 | fc3d = h5py.File(C3D, 'r') 58 | fpca = h5py.File(C3D_PCA, 'r') 59 | 60 | # Create lmdb 61 | (H, W, C) = (1, 1, 500) 62 | N = samples_df.shape[0] # no of rows (=no of visualizations = 5k) 63 | # twice the size of total number of OF visualizations 64 | map_size = int(N*H*W*C*3*15) # approx 429 GB 65 | #map_size = int(N*720*1280*C*2) # approx 429 GB 66 | 67 | env = lmdb.open(lmdb_name, map_size=map_size) 68 | 69 | i = 0 # LMDB index variable 70 | # iterate over the rows of the pandas dataframe 71 | end_samples = samples_df.shape[0] 72 | r = (end_samples - i)/200 73 | print "No of samples per class = %d " %r 74 | ########################################################################### 75 | nCat = 4*len(category_names) # = 200 76 | nCat_samples = (end_samples - i)/nCat # = N = 1000 77 | lmdb_id = 0 78 | 79 | # Parallelizing the lmdb creation process 80 | for i in range(nCat_samples): 81 | 82 | result = Parallel(n_jobs=1)(delayed(get_c3d_feature) \ 83 | (fc3d, 'v_'+samples_df['video_id'][i*nCat+j], \ 84 | samples_df['position'][i*nCat+j], \ 85 | meta_info[samples_df['video_id'][i*nCat+j]]['fps']) \ 86 | for j in range(nCat)) 87 | 88 | with env.begin(write = True) as txn: 89 | for l in range(len(result)): 90 | row_no = (i*nCat)+l 91 | pos = samples_df['position'][row_no] 92 | video_id = samples_df['video_id'][row_no] 93 | lab = samples_df['label'][row_no] 94 | print "idx : "+str(row_no)+" :: 'position' : "+str(pos) 95 | 96 | for vec in result[l]: 97 | #img = np.rollaxis(img, 2) # C, H, W 98 | datum = caffe.proto.caffe_pb2.Datum() 99 | # since it is a vector, it only has 1st dimension 100 | datum.channels = vec.shape[0] 101 | datum.height = 1 102 | datum.width = 1 103 | #datum.data = img.tobytes() 104 | datum.float_data.extend(vec.astype(float).flat) 105 | datum.label = lab 106 | str_id = '{:08}'.format(lmdb_id) 107 | # The encode is only essential in Python 3 108 | txn.put(str_id.encode('ascii'), datum.SerializeToString()) 109 | lmdb_id += 1 110 | print "Write No : %d" %(i+1) 111 | print "LMDB construction successful !" 112 | fc3d.close() 113 | fpca.close() 114 | return 115 | 116 | def get_c3d_feature(fc3d, vid, pos, vfps): 117 | ''' 118 | Read the feature vector that is near the pos of video 119 | c3d features are taken for every 8th frame 120 | ''' 121 | vec = [] 122 | #print "vid : {} :: pos : {} :: vfps : {}" .format(vid, pos, vfps) 123 | #print "Shape : {}" .format(fc3d[vid]['c3d_features'].shape) 124 | row = int(pos/8) 125 | while not row < fc3d[vid]['c3d_features'].shape[0]: 126 | #print "Decrement by 1" 127 | row -= 1 128 | vec.append(fc3d[vid]['c3d_features'][row,:]) 129 | return vec 130 | 131 | 132 | def partition_dataset(feature, train_vids_all, val_existing_vids): 133 | if feature == "C3D": 134 | print "Loading C3D features..." 135 | fc3d = h5py.File(C3D, 'r') 136 | fpca = h5py.File(C3D_PCA, 'r') 137 | else: 138 | raise IOError("Invalid first argument: "+feature) 139 | 140 | for vid in fobj.keys(): 141 | fc3d[vid]['c3d_features'][:] 142 | # Too large, need >10GB memory, for MBH 143 | X_all = fobj['features'][:] 144 | X_all = pd.DataFrame(X_all , index=ids) 145 | X_train = X_all.loc[train_vids_all] 146 | X_val = X_all.loc[val_existing_vids] 147 | del X_all 148 | fobj.close() 149 | print "X_train = {} " .format(X_train.shape) 150 | nFeat = X_train.shape[1] 151 | y_train = pd.DataFrame(np.zeros((len(X_train), len(category_names))),\ 152 | columns=category_names, index=X_train.index) 153 | y_val = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \ 154 | columns=category_names, index=X_val.index) 155 | 156 | # Join the columns for each category 157 | X_train = pd.concat([X_train, y_train], axis = 1) 158 | X_val = pd.concat([X_val, y_val], axis = 1) 159 | #print X_train.head() 160 | # Iterate over the videos of X_train and X_val and set labels 161 | for vid in train_vids_all: 162 | for annotation in database[vid]['annotations']: 163 | X_train.at[vid, annotation['label']] = 1 164 | 165 | print "Labels set for Training Set !" 166 | 167 | for vid in val_existing_vids: 168 | for annotation in database[vid]['annotations']: 169 | X_val.at[vid, annotation['label']] = 1 170 | 171 | print "Labels set for Validation Test !" 172 | 173 | return X_train, X_val, nFeat 174 | 175 | 176 | def train_on_glFeat(X_train, nFeatures, database, category_names, train_vids_all, \ 177 | destPath, seed, nEstimators): 178 | """Function to read the MBH features and train a classifier for each class. 179 | Input: 180 | feature: "MBH" for training on MBH features and "SHUFFLE" for training on shuffle 181 | database: read from JSON file 182 | category_names: sorted list of class names 183 | train_vids_all: list of video ids in the training set 184 | nEstimators: no of trees for Random Forest 185 | """ 186 | #print X_train.head() 187 | # Iterate over the categories and for each category, prepare the dataset 188 | for cat in category_names: 189 | # for a cat, find the video IDs which have labels 190 | pos_samples = X_train[X_train[cat]==1] 191 | pos_samples = pos_samples.loc[:, range(nFeatures)+[cat]] 192 | # sample negative rows equal to the no of pos examples 193 | neg_samples = X_train[X_train[cat]==0].sample(n=len(pos_samples), \ 194 | random_state = 321) 195 | neg_samples = neg_samples.loc[:, range(nFeatures)+[cat]] 196 | 197 | # join pos and negative samples 198 | X = pd.concat([pos_samples, neg_samples]) 199 | 200 | X = X.sample(frac=1, random_state=231) # shuffle 201 | y = np.array(X[cat]) 202 | X = X.loc[:,range(nFeatures)] 203 | 204 | rf_model = train_model_rf(X, y, estimators = nEstimators, seed=seed) 205 | 206 | if not os.path.exists(destPath+"_"+str(nEstimators)): 207 | os.makedirs(destPath+"_"+str(nEstimators)) 208 | f_name = os.path.join(destPath+"_"+str(nEstimators), \ 209 | destPath+"_"+str(nEstimators)+"_"+cat+".pkl") 210 | with open(f_name, "wb") as fid: 211 | cPickle.dump(rf_model, fid) 212 | print "Model saved for category : %s " %cat 213 | 214 | print "Models Trained and saved to files." 215 | # this returns a list of 10 SVM 216 | #result = Parallel(n_jobs=3)(delayed(train_model_rf)(X, y, seed) for seed in range(10)) 217 | 218 | def train_model_rf(X, y, estimators, seed): 219 | # select the parameters, generate probabilities etc 220 | clf = RandomForestClassifier(n_estimators = estimators, random_state=seed) 221 | clf = clf.fit(X, y) 222 | return clf 223 | 224 | def predict_on_glFeat(X_val, nFeatures, database, category_names, val_existing_vids, \ 225 | destPath, nEstimators): 226 | 227 | # Create a dataframe with rows as egs and cols as class 1 prob values 228 | threshold = 0.5 229 | X = X_val.loc[:,range(nFeatures)] 230 | y_prob = pd.DataFrame(np.zeros((len(X_val), len(category_names))), \ 231 | columns=category_names, index=X_val.index) 232 | for cat in category_names: 233 | # load the model 234 | f_name = os.path.join(destPath+"_"+str(nEstimators),\ 235 | destPath+"_"+str(nEstimators)+"_"+cat+".pkl") 236 | with open(f_name, "rb") as fid: 237 | rf_model = cPickle.load(fid) 238 | 239 | # Assign positive class probabilities 240 | y_prob[cat] = rf_model.predict_proba(X)[:,1] 241 | print "No. of examples above threshold for class {} : {}" \ 242 | .format(cat, sum(y_prob[cat]>threshold)) 243 | 244 | # Top 5 predictions 245 | pred = {} 246 | #y_prob.apply(np.argmax, axis=1) 247 | for vid in list(X.index): 248 | #print "ID : %s " %vid 249 | # select top 3 prediction values and their labels and save in dict 250 | top_n = y_prob.loc[vid,:].sort_values(ascending=False)[:3] 251 | labels = top_n.index.tolist() 252 | scores = top_n.values.tolist() 253 | pred[vid] = [] 254 | for idx,score in enumerate(scores): 255 | pred[vid].append({'score': score, 'label':labels[idx]}) 256 | 257 | return pred, y_prob 258 | 259 | 260 | def train_on_C3D(database, category_names, train_vids_all): 261 | """Function to read the C3D features and train a model on them 262 | """ 263 | 264 | 265 | if __name__=='__main__': 266 | 267 | # Read the database, version and taxonomy from JSON file 268 | with open(ANNOTATION_FILE, "r") as fobj: 269 | data = json.load(fobj) 270 | 271 | database = data["database"] 272 | taxonomy = data["taxonomy"] 273 | version = data["version"] 274 | 275 | non_existing_videos = utils.crosscheck_videos(VIDEOPATH, ANNOTATION_FILE) 276 | 277 | print "No of non-existing videos: %d" % len(non_existing_videos) 278 | 279 | train_vids_all = [] 280 | [train_vids_all.append(x) for x in database if database[x]['subset']=='training'] 281 | # Find list of available training videos 282 | train_existing_vids = list(set(train_vids_all) - set(non_existing_videos)) 283 | 284 | val_vids_all = [] 285 | [val_vids_all.append(x) for x in database if database[x]['subset']==SUBSET] 286 | # Find list of available training videos 287 | val_existing_vids = list(set(val_vids_all) - set(non_existing_videos)) 288 | 289 | ########################################################################### 290 | # Get categories information from the database (Train+Validation sets) 291 | category = [] 292 | for x in database: 293 | cc = [] 294 | for l in database[x]["annotations"]: 295 | cc.append(l["label"]) 296 | category.extend(list(set(cc))) 297 | category_count = collections.Counter(category) 298 | 299 | category_names = sorted(category_count.keys()) 300 | print "Total No of classes: %d" % len(category_names) 301 | 302 | #print category_names 303 | ########################################################################### 304 | # MBH and ImageNetShuffle Features in training_model_m2.py 305 | ########################################################################### 306 | # C3D features 307 | 308 | # Read the meta_info and sample_positions files 309 | samples_csv = "tr_samples_10k.csv" 310 | samples_val_csv = "val_samples_2500.csv" 311 | with open("training_data_meta_info.json", "r") as fobj: 312 | meta_info = json.load(fobj) 313 | construct_dataset(meta_info, samples_csv, category_names, "train") 314 | 315 | with open("val_data_meta_info.json", "r") as fobj: 316 | val_meta_info = json.load(fobj) 317 | construct_dataset(val_meta_info, samples_val_csv, category_names, "val") 318 | 319 | # train a model without convolution layers, only fc layers should be there 320 | 321 | 322 | ########################################################################### 323 | # Consider Taxonomy of the classes 324 | # Temporal Proposals 325 | 326 | ########################################################################### 327 | 328 | # out_dict = {'version':version} 329 | # subset_video_ids = [] 330 | # ext_data_dict = {'used': True, 'details': \ 331 | # 'C3D features.'} 332 | # 333 | # out_dict['results'] = pred 334 | # out_dict['external_data'] = ext_data_dict 335 | # 336 | # json_filename = 'submission_t3_'+SUBSET+'.json' 337 | # with open(json_filename, 'w') as fp: 338 | # json.dump(out_dict, fp) 339 | 340 | 341 | 342 | -------------------------------------------------------------------------------- /Evaluation/training_model_svm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python2 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sun Jun 11 20:53:14 2017 5 | 6 | @author: Arpan 7 | 8 | Description: Training Models 9 | 10 | """ 11 | import json 12 | import os 13 | import utils 14 | import numpy as np 15 | import cv2 16 | 17 | def get_hog(srcVideo, start, end): 18 | return 19 | 20 | # To train a single SVM which identifies one class, get +ve samples frames 21 | # and get same amount of -ve sample frames 22 | 23 | def get_meta_info(video_path, existing_vids): 24 | """Add meta information of existing training videos to a dictionary and 25 | write the dictionary to a file. 26 | 27 | Input: existing_vids: Videos Ids of the mp4 files. 28 | Note that only the training video Ids should be sent here 29 | Return: dictionary containing the video_ids as keys and corresponding 30 | meta-info 31 | """ 32 | meta_dict = {} 33 | # loop over the VideoIDs and get the meta information for each file 34 | print "Getting video meta-information..." 35 | for v in existing_vids: 36 | filePath = os.path.join(video_path, "v_"+v+".mp4") 37 | cap = cv2.VideoCapture(filePath) 38 | if not cap.isOpened(): 39 | raise IOError("Capture object not opened ! Abort !") 40 | break 41 | fps = cap.get(cv2.CAP_PROP_FPS) 42 | # dimensions = (Ht, Wd) 43 | dimensions = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), \ 44 | int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))) 45 | no_of_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) 46 | meta_dict[v] = {"fps":fps, "dimensions":dimensions, \ 47 | "total_frames": no_of_frames} 48 | cap.release() 49 | 50 | return meta_dict 51 | 52 | def get_training_segments(database, video_ids_for_cat, category): 53 | """ 54 | Get training segments from the videos and form a dictionary 55 | Note: It applies for +ve examples as of now. 56 | """ 57 | segments_dict = {} 58 | start, stop = 0, 0 59 | for v in video_ids_for_cat: 60 | # list of annotations on video 61 | annotations = database[v]["annotations"] 62 | for ann in annotations: 63 | if ann["label"] == category: 64 | start, stop = ann["segment"] 65 | if v in segments_dict: 66 | segments_dict[v].append({"start": start, "stop": stop}) 67 | else: 68 | segments_dict[v] = [{"start": start, "stop": stop}] 69 | # for a dictionary of segments, with key as video id and 70 | # values as the list of start and stop times of +ve examples 71 | return segments_dict 72 | 73 | 74 | def get_sample_frames(seg, meta_info, N): 75 | """ Get N sample frames from the defined video segments of the given 76 | video_ids. 77 | Input: 78 | seg: (Dictionary) Training segments for positive example videos for single 79 | category. 80 | {"FKQIdqjY9nI": [{'start': 12.73, 'stop': 22.23} ... ]} 81 | meta_info: dict for meta_info of all existing training videos 82 | {"FKQIdqjY9nI": {'total_frames': 1056, 83 | 'dimensions': (720, 1280), 'fps': 30.0} ...} 84 | N : Total number of samples to be extracted 85 | Output: 86 | pos_samples: {"FKQIdqjY9nI": [ 234, 543], ...} 87 | """ 88 | # Get total number of frames in all the segments across all videos 89 | # Get N samples from total number of frames 90 | # Map the generated integers backwards to the frame numbers of video segments 91 | # Get the video_id, frame number that needs to be sampled 92 | total_frames = 0 93 | # Iterate over all the segments of the videos containing actions 94 | video_ids = sorted(seg.keys()) 95 | for v_id in video_ids: 96 | for segment in seg[v_id]: 97 | frames_in_seg = int((segment["stop"] - segment["start"])*meta_info[v_id]["fps"]) 98 | total_frames += frames_in_seg 99 | 100 | print "Total frames in all segments = %d " % total_frames 101 | # Randomly (uniform) sample N values from 0 to total_frames-1 102 | # Backwards mapping 103 | import random 104 | random.seed(231) 105 | samp = sorted(random.sample(range(1, total_frames), N), reverse=True) 106 | #print "Samples list !! " 107 | #print samp 108 | pos_samples = {} 109 | frame_ptr_lower = 0 110 | for v_id in video_ids: 111 | for segment in seg[v_id]: 112 | frames_in_seg = int((segment["stop"]-segment["start"])*meta_info[v_id]["fps"]) 113 | #print "v_id %s || Frames in seg : %d || lower : %d" %(v_id, frames_in_seg, frame_ptr_lower) 114 | while len(samp)!=0 and (frame_ptr_lower<=samp[-1] \ 115 | and samp[-1]<=(frame_ptr_lower+frames_in_seg)): 116 | samp_no = samp.pop() 117 | # Pop until the popped item is not in range 118 | # Get no of frames in video segment using video's FPS 119 | # calculate position (Frame number) in the video and write to dict 120 | pos = int(segment["start"]*meta_info[v_id]["fps"])+(samp_no-frame_ptr_lower) 121 | #print "lower : %d || samp_no : %d || pos : %d " %(frame_ptr_lower, samp_no, pos) 122 | if v_id in pos_samples: 123 | pos_samples[v_id].append(pos) 124 | else: 125 | pos_samples[v_id] = [pos] 126 | frame_ptr_lower += frames_in_seg 127 | 128 | #print "Samples information written to dictionary with size: %d" %len(pos_samples) 129 | return pos_samples 130 | 131 | def display_sample_frames(samples_dict, srcFolder): 132 | """ 133 | Display the frames from the samples dictionaries of the categories 134 | Input: 135 | samples_dict: {"FKQIdqjY9nI": [ 234, 543], ...} 136 | srcFolder : path containing the videos 137 | """ 138 | # Loop over the videos and display the frames 139 | 140 | for v_id in samples_dict: 141 | cap = cv2.VideoCapture(os.path.join(srcFolder, "v_"+v_id+".mp4")) 142 | if not cap.isOpened(): 143 | raise IOError("Capture object not opened !") 144 | pos_lst = samples_dict[v_id] 145 | for pos in pos_lst: 146 | cap.set(cv2.CAP_PROP_POS_FRAMES, pos) 147 | ret, frame = cap.read() 148 | cv2.imshow("Frame", frame) 149 | waitTillEscPressed() 150 | cap.release() 151 | cv2.destroyAllWindows() 152 | return 153 | 154 | 155 | def get_negative_frames(seg, meta_info, N, category): 156 | """ 157 | Get N samples from videos that do not belong to the segments mentioned in 158 | seg and are not of 'category'. 159 | """ 160 | return 161 | 162 | def train_svm(srcVideo, annotations, incr_rate, category_names): 163 | cap = cv2.VideoCapture(srcVideo) 164 | 165 | if not cap.isOpened(): 166 | raise IOError("Video cannot be opened !") 167 | 168 | dimensions = (int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)), int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))) 169 | fps = cap.get(cv2.CAP_PROP_FPS) 170 | print "Dimensions : %s " % str(dimensions) 171 | print "Frame Rate : %f " % fps 172 | # Loop over the annotation dictionaries 173 | for ann in annotations: 174 | start_time, stop_time = ann['segment'] 175 | start = int(start_time*fps) 176 | stop = int(stop_time*fps) 177 | label = ann['label'] 178 | print "Action Label : %s" %label 179 | while cap.isOpened() and start2.4.0 will work. For 2.X version you may need to edit a few lines) 24 | 25 | 4. GPU card + CUDA Tools 26 | 27 | -------------------------------------------------------------------------------- /caffe_models/c3d_fc_net.prototxt: -------------------------------------------------------------------------------- 1 | name: "C3DNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: false 12 | mean_file: "mean_c3d_4k.binaryproto" 13 | #scale: 0.00390625 14 | } 15 | data_param { 16 | source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/train_c3d_lmdb" 17 | batch_size: 64 18 | backend: LMDB 19 | } 20 | } 21 | layer { 22 | name: "data" 23 | type: "Data" 24 | top: "data" 25 | top: "label" 26 | include { 27 | phase: TEST 28 | } 29 | transform_param { 30 | mirror: false 31 | mean_file: "mean_c3d_4k.binaryproto" 32 | } 33 | data_param { 34 | source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/val_c3d_lmdb" 35 | batch_size: 50 36 | backend: LMDB 37 | } 38 | } 39 | layer { 40 | name: "fc1" 41 | type: "InnerProduct" 42 | bottom: "data" 43 | top: "fc1" 44 | param { 45 | lr_mult: 1 46 | } 47 | param { 48 | lr_mult: 2 49 | decay_mult: 0 50 | } 51 | inner_product_param { 52 | num_output: 1024 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "relu1" 63 | type: "ReLU" 64 | bottom: "fc1" 65 | top: "fc1" 66 | } 67 | layer { 68 | name: "drop1" 69 | type: "Dropout" 70 | bottom: "fc1" 71 | top: "fc1" 72 | dropout_param { 73 | dropout_ratio: 0.5 74 | } 75 | } 76 | layer { 77 | name: "fc2" 78 | type: "InnerProduct" 79 | bottom: "fc1" 80 | top: "fc2" 81 | param { 82 | lr_mult: 1 83 | } 84 | param { 85 | lr_mult: 2 86 | } 87 | inner_product_param { 88 | num_output: 1024 89 | weight_filler { 90 | type: "xavier" 91 | } 92 | bias_filler { 93 | type: "constant" 94 | } 95 | } 96 | } 97 | layer { 98 | name: "relu2" 99 | type: "ReLU" 100 | bottom: "fc2" 101 | top: "fc2" 102 | } 103 | layer { 104 | name: "drop2" 105 | type: "Dropout" 106 | bottom: "fc2" 107 | top: "fc2" 108 | dropout_param { 109 | dropout_ratio: 0.5 110 | } 111 | } 112 | layer { 113 | name: "fc3" 114 | type: "InnerProduct" 115 | bottom: "fc2" 116 | top: "fc3" 117 | param { 118 | lr_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | } 123 | inner_product_param { 124 | num_output: 200 125 | weight_filler { 126 | type: "xavier" 127 | } 128 | bias_filler { 129 | type: "constant" 130 | } 131 | } 132 | } 133 | layer { 134 | name: "accuracy" 135 | type: "Accuracy" 136 | bottom: "fc3" 137 | bottom: "label" 138 | top: "accuracy" 139 | include { 140 | phase: TEST 141 | } 142 | } 143 | layer { 144 | name: "loss" 145 | type: "SoftmaxWithLoss" 146 | bottom: "fc3" 147 | bottom: "label" 148 | top: "loss" 149 | } 150 | -------------------------------------------------------------------------------- /caffe_models/c3d_fc_net_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/c3d_fc_net.prototxt" 2 | test_iter: 10000 3 | test_interval: 10000 4 | base_lr: 0.01 5 | #base_lr: 0.0001 6 | momentum: 0.9 7 | #momentum2: 0.999 8 | #lr_policy: "fixed" 9 | lr_policy: "step" 10 | gamma: 0.1 11 | stepsize: 100000 # To change 12 | display: 500 13 | max_iter: 400000 14 | weight_decay: 0.0005 15 | snapshot: 50000 # To change 16 | snapshot_prefix: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/c3d_fc_net_snap" 17 | #type: "Adam" 18 | solver_mode: GPU 19 | -------------------------------------------------------------------------------- /caffe_models/deploy_OF_alexnet_mirror.prototxt: -------------------------------------------------------------------------------- 1 | name: "OptFlowAlexNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 1 dim: 3 dim: 120 dim: 160 } } 7 | 8 | #transform_param { 9 | # scale: 0.00390625 10 | #} 11 | } 12 | layer { 13 | name: "conv1" 14 | type: "Convolution" 15 | bottom: "data" 16 | top: "conv1" 17 | param { 18 | lr_mult: 1 19 | } 20 | param { 21 | lr_mult: 2 22 | } 23 | convolution_param { 24 | num_output: 64 25 | kernel_size: 7 26 | stride: 1 27 | weight_filler { 28 | type: "xavier" 29 | #std: 0.01 30 | } 31 | bias_filler { 32 | type: "constant" 33 | #value: 0 34 | } 35 | } 36 | } 37 | layer { 38 | name: "relu1" 39 | type: "ReLU" 40 | bottom: "conv1" 41 | top: "conv1" 42 | } 43 | #layer { 44 | # name: "norm1" 45 | # type: "LRN" 46 | # bottom: "conv1" 47 | # top: "norm1" 48 | # lrn_param { 49 | # local_size: 5 50 | # alpha: 0.0001 51 | # beta: 0.75 52 | # } 53 | #} 54 | layer { 55 | name: "pool1" 56 | type: "Pooling" 57 | bottom: "conv1" 58 | top: "pool1" 59 | pooling_param { 60 | pool: MAX 61 | kernel_size: 2 62 | stride: 2 63 | } 64 | } 65 | layer { 66 | name: "conv2" 67 | type: "Convolution" 68 | bottom: "pool1" 69 | top: "conv2" 70 | param { 71 | lr_mult: 1 72 | } 73 | param { 74 | lr_mult: 2 75 | } 76 | convolution_param { 77 | num_output: 128 78 | #pad: 2 79 | kernel_size: 3 80 | stride: 2 81 | #group: 2 82 | weight_filler { 83 | type: "xavier" 84 | #std: 0.01 85 | } 86 | bias_filler { 87 | type: "constant" 88 | #value: 0.1 89 | } 90 | } 91 | } 92 | layer { 93 | name: "relu2" 94 | type: "ReLU" 95 | bottom: "conv2" 96 | top: "conv2" 97 | } 98 | #layer { 99 | # name: "norm2" 100 | # type: "LRN" 101 | # bottom: "conv2" 102 | # top: "norm2" 103 | # lrn_param { 104 | # local_size: 5 105 | # alpha: 0.0001 106 | # beta: 0.75 107 | # } 108 | #} 109 | layer { 110 | name: "pool2" 111 | type: "Pooling" 112 | bottom: "conv2" 113 | top: "pool2" 114 | pooling_param { 115 | pool: MAX 116 | kernel_size: 2 117 | stride: 2 118 | } 119 | } 120 | layer { 121 | name: "conv3" 122 | type: "Convolution" 123 | bottom: "pool2" 124 | top: "conv3" 125 | param { 126 | lr_mult: 1 127 | #decay_mult: 1 128 | } 129 | param { 130 | lr_mult: 2 131 | #decay_mult: 0 132 | } 133 | convolution_param { 134 | num_output: 192 135 | #pad: 1 136 | kernel_size: 3 137 | stride: 1 138 | weight_filler { 139 | type: "xavier" 140 | #std: 0.01 141 | } 142 | bias_filler { 143 | type: "constant" 144 | #value: 0 145 | } 146 | } 147 | } 148 | layer { 149 | name: "relu3" 150 | type: "ReLU" 151 | bottom: "conv3" 152 | top: "conv3" 153 | } 154 | layer { 155 | name: "conv4" 156 | type: "Convolution" 157 | bottom: "conv3" 158 | top: "conv4" 159 | param { 160 | lr_mult: 1 161 | #decay_mult: 1 162 | } 163 | param { 164 | lr_mult: 2 165 | #decay_mult: 0 166 | } 167 | convolution_param { 168 | num_output: 128 169 | #pad: 1 170 | kernel_size: 3 171 | stride: 1 172 | #group: 2 173 | weight_filler { 174 | type: "xavier" 175 | #std: 0.01 176 | } 177 | bias_filler { 178 | type: "constant" 179 | #value: 0.1 180 | } 181 | } 182 | } 183 | layer { 184 | name: "relu4" 185 | type: "ReLU" 186 | bottom: "conv4" 187 | top: "conv4" 188 | } 189 | layer { 190 | name: "conv5" 191 | type: "Convolution" 192 | bottom: "conv4" 193 | top: "conv5" 194 | param { 195 | lr_mult: 1 196 | #decay_mult: 1 197 | } 198 | param { 199 | lr_mult: 2 200 | #decay_mult: 0 201 | } 202 | convolution_param { 203 | num_output: 128 204 | #pad: 1 205 | kernel_size: 3 206 | stride: 1 207 | #group: 2 208 | weight_filler { 209 | type: "xavier" 210 | #std: 0.01 211 | } 212 | bias_filler { 213 | type: "constant" 214 | #value: 0.1 215 | } 216 | } 217 | } 218 | layer { 219 | name: "relu5" 220 | type: "ReLU" 221 | bottom: "conv5" 222 | top: "conv5" 223 | } 224 | layer { 225 | name: "pool5" 226 | type: "Pooling" 227 | bottom: "conv5" 228 | top: "pool5" 229 | pooling_param { 230 | pool: MAX 231 | kernel_size: 3 232 | stride: 1 233 | } 234 | } 235 | layer { 236 | name: "fc6" 237 | type: "InnerProduct" 238 | bottom: "pool5" 239 | top: "fc6" 240 | param { 241 | lr_mult: 1 242 | #decay_mult: 1 243 | } 244 | param { 245 | lr_mult: 2 246 | #decay_mult: 0 247 | } 248 | inner_product_param { 249 | num_output: 128 250 | weight_filler { 251 | type: "xavier" 252 | #std: 0.005 253 | } 254 | bias_filler { 255 | type: "constant" 256 | #value: 0.1 257 | } 258 | } 259 | } 260 | layer { 261 | name: "relu6" 262 | type: "ReLU" 263 | bottom: "fc6" 264 | top: "fc6" 265 | } 266 | layer { 267 | name: "drop6" 268 | type: "Dropout" 269 | bottom: "fc6" 270 | top: "fc6" 271 | dropout_param { 272 | dropout_ratio: 0.5 273 | } 274 | } 275 | layer { 276 | name: "fc7" 277 | type: "InnerProduct" 278 | bottom: "fc6" 279 | top: "fc7" 280 | param { 281 | lr_mult: 1 282 | #decay_mult: 1 283 | } 284 | param { 285 | lr_mult: 2 286 | #decay_mult: 0 287 | } 288 | inner_product_param { 289 | num_output: 128 290 | weight_filler { 291 | type: "xavier" 292 | #std: 0.005 293 | } 294 | bias_filler { 295 | type: "constant" 296 | #value: 0.1 297 | } 298 | } 299 | } 300 | layer { 301 | name: "relu7" 302 | type: "ReLU" 303 | bottom: "fc7" 304 | top: "fc7" 305 | } 306 | layer { 307 | name: "drop7" 308 | type: "Dropout" 309 | bottom: "fc7" 310 | top: "fc7" 311 | dropout_param { 312 | dropout_ratio: 0.5 313 | } 314 | } 315 | layer { 316 | name: "fc8" 317 | type: "InnerProduct" 318 | bottom: "fc7" 319 | top: "fc8" 320 | param { 321 | lr_mult: 1 322 | #decay_mult: 1 323 | } 324 | param { 325 | lr_mult: 2 326 | #decay_mult: 0 327 | } 328 | inner_product_param { 329 | num_output: 6 330 | weight_filler { 331 | type: "xavier" 332 | #std: 0.01 333 | } 334 | bias_filler { 335 | type: "constant" 336 | #value: 0 337 | } 338 | } 339 | } 340 | layer { 341 | name: "prob" 342 | type: "Softmax" 343 | bottom: "fc8" 344 | top: "prob" 345 | } 346 | -------------------------------------------------------------------------------- /caffe_models/deploy_c3d_fc_net.prototxt: -------------------------------------------------------------------------------- 1 | name: "C3DNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 1 dim: 500 dim: 1 dim: 1 } } 7 | } 8 | layer { 9 | name: "fc1" 10 | type: "InnerProduct" 11 | bottom: "data" 12 | top: "fc1" 13 | param { 14 | lr_mult: 1 15 | } 16 | param { 17 | lr_mult: 2 18 | decay_mult: 0 19 | } 20 | inner_product_param { 21 | num_output: 1024 22 | weight_filler { 23 | type: "xavier" 24 | } 25 | bias_filler { 26 | type: "constant" 27 | } 28 | } 29 | } 30 | layer { 31 | name: "relu1" 32 | type: "ReLU" 33 | bottom: "fc1" 34 | top: "fc1" 35 | } 36 | layer { 37 | name: "drop1" 38 | type: "Dropout" 39 | bottom: "fc1" 40 | top: "fc1" 41 | dropout_param { 42 | dropout_ratio: 0.5 43 | } 44 | } 45 | layer { 46 | name: "fc2" 47 | type: "InnerProduct" 48 | bottom: "fc1" 49 | top: "fc2" 50 | param { 51 | lr_mult: 1 52 | } 53 | param { 54 | lr_mult: 2 55 | } 56 | inner_product_param { 57 | num_output: 1024 58 | weight_filler { 59 | type: "xavier" 60 | } 61 | bias_filler { 62 | type: "constant" 63 | } 64 | } 65 | } 66 | layer { 67 | name: "relu2" 68 | type: "ReLU" 69 | bottom: "fc2" 70 | top: "fc2" 71 | } 72 | layer { 73 | name: "drop2" 74 | type: "Dropout" 75 | bottom: "fc2" 76 | top: "fc2" 77 | dropout_param { 78 | dropout_ratio: 0.5 79 | } 80 | } 81 | layer { 82 | name: "fc3" 83 | type: "InnerProduct" 84 | bottom: "fc2" 85 | top: "fc3" 86 | param { 87 | lr_mult: 1 88 | } 89 | param { 90 | lr_mult: 2 91 | } 92 | inner_product_param { 93 | num_output: 200 94 | weight_filler { 95 | type: "xavier" 96 | } 97 | bias_filler { 98 | type: "constant" 99 | } 100 | } 101 | } 102 | layer { 103 | name: "prob" 104 | type: "Softmax" 105 | bottom: "fc3" 106 | top: "prob" 107 | } 108 | -------------------------------------------------------------------------------- /caffe_models/deploy_hog_fc_net.prototxt: -------------------------------------------------------------------------------- 1 | name: "HOGNet" 2 | layer { 3 | name: "data" 4 | type: "Input" 5 | top: "data" 6 | input_param { shape: { dim: 1 dim: 9576 dim: 1 dim: 1 } } 7 | } 8 | layer { 9 | name: "fc1" 10 | type: "InnerProduct" 11 | bottom: "data" 12 | top: "fc1" 13 | param { 14 | lr_mult: 1 15 | } 16 | param { 17 | lr_mult: 2 18 | decay_mult: 0 19 | } 20 | inner_product_param { 21 | num_output: 1024 22 | weight_filler { 23 | type: "xavier" 24 | } 25 | bias_filler { 26 | type: "constant" 27 | } 28 | } 29 | } 30 | layer { 31 | name: "relu1" 32 | type: "ReLU" 33 | bottom: "fc1" 34 | top: "fc1" 35 | } 36 | layer { 37 | name: "drop1" 38 | type: "Dropout" 39 | bottom: "fc1" 40 | top: "fc1" 41 | dropout_param { 42 | dropout_ratio: 0.5 43 | } 44 | } 45 | layer { 46 | name: "fc2" 47 | type: "InnerProduct" 48 | bottom: "fc1" 49 | top: "fc2" 50 | param { 51 | lr_mult: 1 52 | } 53 | param { 54 | lr_mult: 2 55 | } 56 | inner_product_param { 57 | num_output: 1024 58 | weight_filler { 59 | type: "xavier" 60 | } 61 | bias_filler { 62 | type: "constant" 63 | } 64 | } 65 | } 66 | layer { 67 | name: "relu2" 68 | type: "ReLU" 69 | bottom: "fc2" 70 | top: "fc2" 71 | } 72 | layer { 73 | name: "drop2" 74 | type: "Dropout" 75 | bottom: "fc2" 76 | top: "fc2" 77 | dropout_param { 78 | dropout_ratio: 0.5 79 | } 80 | } 81 | layer { 82 | name: "fc3" 83 | type: "InnerProduct" 84 | bottom: "fc2" 85 | top: "fc3" 86 | param { 87 | lr_mult: 1 88 | } 89 | param { 90 | lr_mult: 2 91 | } 92 | inner_product_param { 93 | num_output: 200 94 | weight_filler { 95 | type: "xavier" 96 | } 97 | bias_filler { 98 | type: "constant" 99 | } 100 | } 101 | } 102 | layer { 103 | name: "prob" 104 | type: "Softmax" 105 | bottom: "fc3" 106 | top: "prob" 107 | } 108 | -------------------------------------------------------------------------------- /caffe_models/hog_fc_net.prototxt: -------------------------------------------------------------------------------- 1 | name: "HOGNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: false 12 | mean_file: "mean_hog_4k.binaryproto" 13 | #scale: 0.00390625 14 | } 15 | data_param { 16 | source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/train_hog_lmdb" 17 | batch_size: 64 18 | backend: LMDB 19 | } 20 | } 21 | layer { 22 | name: "data" 23 | type: "Data" 24 | top: "data" 25 | top: "label" 26 | include { 27 | phase: TEST 28 | } 29 | transform_param { 30 | mirror: false 31 | mean_file: "mean_hog_4k.binaryproto" 32 | } 33 | data_param { 34 | source: "/home/hadoop/VisionWorkspace/ActivityNet/new_lmdb/val_hog_lmdb" 35 | batch_size: 50 36 | backend: LMDB 37 | } 38 | } 39 | layer { 40 | name: "fc1" 41 | type: "InnerProduct" 42 | bottom: "data" 43 | top: "fc1" 44 | param { 45 | lr_mult: 1 46 | } 47 | param { 48 | lr_mult: 2 49 | decay_mult: 0 50 | } 51 | inner_product_param { 52 | num_output: 4096 53 | weight_filler { 54 | type: "xavier" 55 | } 56 | bias_filler { 57 | type: "constant" 58 | } 59 | } 60 | } 61 | layer { 62 | name: "relu1" 63 | type: "ReLU" 64 | bottom: "fc1" 65 | top: "fc1" 66 | } 67 | layer { 68 | name: "drop1" 69 | type: "Dropout" 70 | bottom: "fc1" 71 | top: "fc1" 72 | dropout_param { 73 | dropout_ratio: 0.5 74 | } 75 | } 76 | layer { 77 | name: "fc2" 78 | type: "InnerProduct" 79 | bottom: "fc1" 80 | top: "fc2" 81 | param { 82 | lr_mult: 1 83 | } 84 | param { 85 | lr_mult: 2 86 | } 87 | inner_product_param { 88 | num_output: 4096 89 | weight_filler { 90 | type: "xavier" 91 | } 92 | bias_filler { 93 | type: "constant" 94 | } 95 | } 96 | } 97 | layer { 98 | name: "relu2" 99 | type: "ReLU" 100 | bottom: "fc2" 101 | top: "fc2" 102 | } 103 | layer { 104 | name: "drop2" 105 | type: "Dropout" 106 | bottom: "fc2" 107 | top: "fc2" 108 | dropout_param { 109 | dropout_ratio: 0.5 110 | } 111 | } 112 | layer { 113 | name: "fc3" 114 | type: "InnerProduct" 115 | bottom: "fc2" 116 | top: "fc3" 117 | param { 118 | lr_mult: 1 119 | } 120 | param { 121 | lr_mult: 2 122 | } 123 | inner_product_param { 124 | num_output: 200 125 | weight_filler { 126 | type: "xavier" 127 | } 128 | bias_filler { 129 | type: "constant" 130 | } 131 | } 132 | } 133 | layer { 134 | name: "accuracy" 135 | type: "Accuracy" 136 | bottom: "fc3" 137 | bottom: "label" 138 | top: "accuracy" 139 | include { 140 | phase: TEST 141 | } 142 | } 143 | layer { 144 | name: "loss" 145 | type: "SoftmaxWithLoss" 146 | bottom: "fc3" 147 | bottom: "label" 148 | top: "loss" 149 | } 150 | -------------------------------------------------------------------------------- /caffe_models/hog_fc_net_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/hog_fc_net.prototxt" 2 | test_iter: 4000 3 | test_interval: 10000 4 | base_lr: 0.01 5 | #base_lr: 0.0001 6 | momentum: 0.9 7 | #momentum2: 0.999 8 | #lr_policy: "fixed" 9 | lr_policy: "step" 10 | gamma: 0.1 11 | stepsize: 100000 # To change 12 | display: 500 13 | max_iter: 400000 14 | weight_decay: 0.0005 15 | snapshot: 50000 # To change 16 | snapshot_prefix: "/home/hadoop/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/hog_fc_net_snap" 17 | #type: "Adam" 18 | solver_mode: GPU 19 | -------------------------------------------------------------------------------- /caffe_models/mean_c3d.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_c3d.binaryproto -------------------------------------------------------------------------------- /caffe_models/mean_c3d_10k.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_c3d_10k.binaryproto -------------------------------------------------------------------------------- /caffe_models/mean_c3d_4k.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_c3d_4k.binaryproto -------------------------------------------------------------------------------- /caffe_models/mean_hog_4k.binaryproto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/mean_hog_4k.binaryproto -------------------------------------------------------------------------------- /caffe_models/optical_flow_alexnet_mirror.prototxt: -------------------------------------------------------------------------------- 1 | name: "OptFlowAlexNet" 2 | layer { 3 | name: "data" 4 | type: "Data" 5 | top: "data" 6 | top: "label" 7 | include { 8 | phase: TRAIN 9 | } 10 | transform_param { 11 | mirror: true 12 | #crop_size: 227 13 | mean_file: "mean_image.binaryproto" 14 | #scale: 0.00390625 15 | } 16 | data_param { 17 | source: "/home/arpan/VisionWorkspace/ActivityNet/train_OF_lmdb" 18 | batch_size: 64 19 | backend: LMDB 20 | } 21 | } 22 | layer { 23 | name: "data" 24 | type: "Data" 25 | top: "data" 26 | top: "label" 27 | include { 28 | phase: TEST 29 | } 30 | transform_param { 31 | mirror: false 32 | #crop_size: 227 33 | mean_file: "mean_image.binaryproto" 34 | #scale: 0.00390625 35 | } 36 | data_param { 37 | source: "/home/arpan/VisionWorkspace/ActivityNet/val_OF_lmdb" 38 | batch_size: 50 39 | backend: LMDB 40 | } 41 | } 42 | layer { 43 | name: "conv1" 44 | type: "Convolution" 45 | bottom: "data" 46 | top: "conv1" 47 | param { 48 | lr_mult: 1 49 | decay_mult: 1 50 | } 51 | param { 52 | lr_mult: 2 53 | decay_mult: 0 54 | } 55 | convolution_param { 56 | num_output: 64 57 | kernel_size: 7 58 | stride: 1 59 | weight_filler { 60 | type: "gaussian" 61 | std: 0.01 62 | } 63 | bias_filler { 64 | type: "constant" 65 | value: 0 66 | } 67 | } 68 | } 69 | layer { 70 | name: "relu1" 71 | type: "ReLU" 72 | bottom: "conv1" 73 | top: "conv1" 74 | } 75 | #layer { 76 | # name: "norm1" 77 | # type: "LRN" 78 | # bottom: "conv1" 79 | # top: "norm1" 80 | # lrn_param { 81 | # local_size: 5 82 | # alpha: 0.0001 83 | # beta: 0.75 84 | # } 85 | #} 86 | layer { 87 | name: "pool1" 88 | type: "Pooling" 89 | bottom: "conv1" 90 | top: "pool1" 91 | pooling_param { 92 | pool: MAX 93 | kernel_size: 2 94 | stride: 2 95 | } 96 | } 97 | layer { 98 | name: "conv2" 99 | type: "Convolution" 100 | bottom: "pool1" 101 | top: "conv2" 102 | param { 103 | lr_mult: 1 104 | decay_mult: 1 105 | } 106 | param { 107 | lr_mult: 2 108 | decay_mult: 0 109 | } 110 | convolution_param { 111 | num_output: 128 112 | #pad: 2 113 | kernel_size: 3 114 | stride: 2 115 | #group: 2 116 | weight_filler { 117 | #type: "xavier" 118 | #std: 0.01 119 | type: "gaussian" 120 | std: 0.01 121 | } 122 | bias_filler { 123 | type: "constant" 124 | value: 1 125 | } 126 | } 127 | } 128 | layer { 129 | name: "relu2" 130 | type: "ReLU" 131 | bottom: "conv2" 132 | top: "conv2" 133 | } 134 | #layer { 135 | # name: "norm2" 136 | # type: "LRN" 137 | # bottom: "conv2" 138 | # top: "norm2" 139 | # lrn_param { 140 | # local_size: 5 141 | # alpha: 0.0001 142 | # beta: 0.75 143 | # } 144 | #} 145 | layer { 146 | name: "pool2" 147 | type: "Pooling" 148 | bottom: "conv2" 149 | top: "pool2" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 2 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "conv3" 158 | type: "Convolution" 159 | bottom: "pool2" 160 | top: "conv3" 161 | param { 162 | lr_mult: 1 163 | decay_mult: 1 164 | } 165 | param { 166 | lr_mult: 2 167 | decay_mult: 0 168 | } 169 | convolution_param { 170 | num_output: 192 171 | #pad: 1 172 | kernel_size: 3 173 | stride: 1 174 | weight_filler { 175 | #type: "xavier" 176 | #std: 0.01 177 | type: "gaussian" 178 | std: 0.01 179 | } 180 | bias_filler { 181 | type: "constant" 182 | value: 0 183 | } 184 | } 185 | } 186 | layer { 187 | name: "relu3" 188 | type: "ReLU" 189 | bottom: "conv3" 190 | top: "conv3" 191 | } 192 | layer { 193 | name: "conv4" 194 | type: "Convolution" 195 | bottom: "conv3" 196 | top: "conv4" 197 | param { 198 | lr_mult: 1 199 | decay_mult: 1 200 | } 201 | param { 202 | lr_mult: 2 203 | decay_mult: 0 204 | } 205 | convolution_param { 206 | num_output: 128 207 | #pad: 1 208 | kernel_size: 3 209 | stride: 1 210 | #group: 2 211 | weight_filler { 212 | #type: "xavier" 213 | type: "gaussian" 214 | std: 0.01 215 | } 216 | bias_filler { 217 | type: "constant" 218 | value: 1 219 | } 220 | } 221 | } 222 | layer { 223 | name: "relu4" 224 | type: "ReLU" 225 | bottom: "conv4" 226 | top: "conv4" 227 | } 228 | layer { 229 | name: "conv5" 230 | type: "Convolution" 231 | bottom: "conv4" 232 | top: "conv5" 233 | param { 234 | lr_mult: 1 235 | decay_mult: 1 236 | } 237 | param { 238 | lr_mult: 2 239 | decay_mult: 0 240 | } 241 | convolution_param { 242 | num_output: 128 243 | #pad: 1 244 | kernel_size: 3 245 | stride: 1 246 | #group: 2 247 | weight_filler { 248 | #type: "xavier" 249 | type: "gaussian" 250 | std: 0.01 251 | } 252 | bias_filler { 253 | type: "constant" 254 | value: 1 255 | } 256 | } 257 | } 258 | layer { 259 | name: "relu5" 260 | type: "ReLU" 261 | bottom: "conv5" 262 | top: "conv5" 263 | } 264 | layer { 265 | name: "pool5" 266 | type: "Pooling" 267 | bottom: "conv5" 268 | top: "pool5" 269 | pooling_param { 270 | pool: MAX 271 | kernel_size: 3 272 | stride: 1 273 | } 274 | } 275 | layer { 276 | name: "fc6" 277 | type: "InnerProduct" 278 | bottom: "pool5" 279 | top: "fc6" 280 | param { 281 | lr_mult: 1 282 | decay_mult: 1 283 | } 284 | param { 285 | lr_mult: 2 286 | decay_mult: 0 287 | } 288 | inner_product_param { 289 | num_output: 512 290 | weight_filler { 291 | #type: "xavier" 292 | type: "gaussian" 293 | std: 0.005 294 | } 295 | bias_filler { 296 | type: "constant" 297 | value: 1 298 | } 299 | } 300 | } 301 | layer { 302 | name: "relu6" 303 | type: "ReLU" 304 | bottom: "fc6" 305 | top: "fc6" 306 | } 307 | layer { 308 | name: "drop6" 309 | type: "Dropout" 310 | bottom: "fc6" 311 | top: "fc6" 312 | dropout_param { 313 | dropout_ratio: 0.5 314 | } 315 | } 316 | layer { 317 | name: "fc7" 318 | type: "InnerProduct" 319 | bottom: "fc6" 320 | top: "fc7" 321 | param { 322 | lr_mult: 1 323 | decay_mult: 1 324 | } 325 | param { 326 | lr_mult: 2 327 | decay_mult: 0 328 | } 329 | inner_product_param { 330 | num_output: 512 331 | weight_filler { 332 | type: "gaussian" 333 | std: 0.005 334 | } 335 | bias_filler { 336 | type: "constant" 337 | value: 1 338 | } 339 | } 340 | } 341 | layer { 342 | name: "relu7" 343 | type: "ReLU" 344 | bottom: "fc7" 345 | top: "fc7" 346 | } 347 | layer { 348 | name: "drop7" 349 | type: "Dropout" 350 | bottom: "fc7" 351 | top: "fc7" 352 | dropout_param { 353 | dropout_ratio: 0.5 354 | } 355 | } 356 | layer { 357 | name: "fc8" 358 | type: "InnerProduct" 359 | bottom: "fc7" 360 | top: "fc8" 361 | param { 362 | lr_mult: 1 363 | decay_mult: 1 364 | } 365 | param { 366 | lr_mult: 2 367 | decay_mult: 0 368 | } 369 | inner_product_param { 370 | num_output: 200 371 | weight_filler { 372 | type: "gaussian" 373 | std: 0.01 374 | } 375 | bias_filler { 376 | type: "constant" 377 | value: 0 378 | } 379 | } 380 | } 381 | layer { 382 | name: "accuracy" 383 | type: "Accuracy" 384 | bottom: "fc8" 385 | bottom: "label" 386 | top: "accuracy" 387 | include { 388 | phase: TEST 389 | } 390 | } 391 | layer { 392 | name: "loss" 393 | type: "SoftmaxWithLoss" 394 | bottom: "fc8" 395 | bottom: "label" 396 | top: "loss" 397 | } 398 | -------------------------------------------------------------------------------- /caffe_models/optical_flow_alexnet_mirror_solver.prototxt: -------------------------------------------------------------------------------- 1 | net: "/home/arpan/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/optical_flow_alexnet_mirror.prototxt" 2 | test_iter: 2000 3 | test_interval: 1000 4 | base_lr: 0.01 5 | lr_policy: "step" 6 | gamma: 0.1 7 | stepsize: 25000 # To change 8 | display: 100 9 | max_iter: 100000 10 | momentum: 0.9 11 | weight_decay: 0.0005 12 | snapshot: 25000 # To change 13 | snapshot_prefix: "/home/arpan/VisionWorkspace/ActivityNet/ActivityNet-master/caffe_models/snapshots/OF_alexnet_mirror_snap" 14 | solver_mode: GPU 15 | -------------------------------------------------------------------------------- /caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.caffemodel -------------------------------------------------------------------------------- /caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.solverstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_10k_2500_adam_e4/c3d_fc_net_snap_iter_400000.solverstate -------------------------------------------------------------------------------- /caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.caffemodel -------------------------------------------------------------------------------- /caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.solverstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k/c3d_fc_net_snap_iter_400000.solverstate -------------------------------------------------------------------------------- /caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.caffemodel: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.caffemodel -------------------------------------------------------------------------------- /caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.solverstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arpane4c5/ActivityNet/31a0972bb7461107e24d2be4fb76bf168382016f/caffe_models/snapshots/c3d_4k_1k_adam_e4/c3d_fc_net_snap_iter_200000.solverstate --------------------------------------------------------------------------------