├── .gitignore ├── videofeatures ├── BaseFeatureExtractor.py ├── __init__.py ├── TwentyBNDatasetProvider.py ├── CNNFeatures.py ├── ActivityNetDatasetProvider.py ├── CVFeatures.py └── VideoFeatures.py ├── requirements.txt ├── config.ini ├── .travis.yml ├── setup.py ├── LICENSE ├── tests.py └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.pyc 3 | 4 | DataDumps -------------------------------------------------------------------------------- /videofeatures/BaseFeatureExtractor.py: -------------------------------------------------------------------------------- 1 | class BaseFeatures: 2 | 3 | def computeFeatures(self, frames_batch): 4 | raise NotImplementedError 5 | 6 | 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Keras==2.1.2 2 | pandas==0.21.0 3 | numpy==1.13.3 4 | gulpio==521.36 5 | opencv_python==3.4.0.12 6 | fishervector==0.1 7 | scikit_learn==0.19.1 8 | -------------------------------------------------------------------------------- /config.ini: -------------------------------------------------------------------------------- 1 | [GULP] 2 | train_data_gulp = /common/homes/students/ferreira/20bn-someting-gulp/train 3 | valid_data_gulp = /common/homes/students/ferreira/20bn-someting-gulp/valid 4 | test_data_gulp = /common/homes/students/ferreira/20bn-someting-gulp/test -------------------------------------------------------------------------------- /videofeatures/__init__.py: -------------------------------------------------------------------------------- 1 | from videofeatures.VideoFeatures import Pipeline 2 | from videofeatures.ActivityNetDatasetProvider import ActivityNetDataset 3 | from videofeatures.TwentyBNDatasetProvider import TwentyBNDataset 4 | from videofeatures.CNNFeatures import ResNetFeatures 5 | from videofeatures.CNNFeatures import VGGFeatures 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "3.4" 5 | - "3.5" 6 | - "3.5-dev" # 3.5 development branch 7 | - "3.6" 8 | - "3.6-dev" # 3.6 development branch 9 | 10 | # command to install dependencies 11 | install: 12 | - pip install -r requirements.txt 13 | 14 | # command to run tests 15 | script: 16 | - python tests.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name="videofeatures", 4 | version='0.1', 5 | description='Feature extraction from video or image (ResNet, VGG, SIFT, SURF) and training of a Fisher Vector GMM to compute (improved) ' 6 | 'Fisher Vectors', 7 | url='https://github.com/jonasrothfuss/python-image-video-features', 8 | author='Jonas Rothfuss, Fabio Ferreira', 9 | author_email='fabioferreira@mailbox.org', 10 | license='MIT', 11 | packages=['videofeatures'], 12 | test_suite='nose.collector', 13 | tests_require=['nose'], 14 | install_requires=[ 15 | 'numpy', 16 | 'scikit_learn' 17 | 'py-fisher-vector' 18 | 'keras' 19 | 'pandas' 20 | 'opencv_python' 21 | 'gulpio' 22 | ], 23 | zip_safe=False) -------------------------------------------------------------------------------- /videofeatures/TwentyBNDatasetProvider.py: -------------------------------------------------------------------------------- 1 | from gulpio.transforms import CenterCrop, ComposeVideo, Scale 2 | from gulpio.dataset import GulpVideoDataset 3 | from gulpio.loader import DataLoader 4 | 5 | 6 | class TwentyBNDataset: 7 | 8 | def __init__(self, train_dir, valid_dir, batch_size=20, n_frames=20): 9 | transforms = ComposeVideo([CenterCrop(128), Scale((224, 224))]) 10 | self.n_frames = n_frames 11 | 12 | self.train_dataset = GulpVideoDataset(train_dir, n_frames, 1, False, transform=transforms) 13 | self.train_loader = DataLoader(self.train_dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=True) 14 | 15 | self.val_dataset = GulpVideoDataset(valid_dir, n_frames, 1, False, transform=transforms) 16 | self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=True) 17 | 18 | def getDataLoader(self, train=False): 19 | if train: 20 | return self.train_loader 21 | else: 22 | return self.val_loader 23 | 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jonas Rothfuss 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import unittest 3 | import numpy as np 4 | 5 | from videofeatures.TwentyBNDatasetProvider import TwentyBNDataset 6 | from videofeatures.CNNFeatures import ResNetFeatures 7 | from videofeatures.VideoFeatures import Pipeline 8 | 9 | 10 | class PipelineTwentyBNTest(unittest.TestCase): 11 | def setUp(self): 12 | self.config = configparser.ConfigParser() 13 | self.config.read('../config.ini') 14 | self.train_dir = self.config['GULP']['train_data_gulp'] 15 | self.valid_dir = self.config['GULP']['valid_data_gulp'] 16 | self.dataset = TwentyBNDataset(batch_size=20, train_dir=self.train_dir, valid_dir=self.valid_dir).getDataLoader( 17 | train=False) 18 | self.extractor = ResNetFeatures() 19 | self._base_dir = "../output" 20 | self.pipeline = Pipeline(dataset=self.dataset, extractor=self.extractor, dataset_name="twentybn", 21 | base_dir=self._base_dir) 22 | 23 | 24 | def test_00_feature_extraction_gulp(self): 25 | self.pipeline.extractFeatures() 26 | 27 | def test_01_load_features_and_train_gmm_gulp(self): 28 | features, labels = self.pipeline.loadFeatures() 29 | self.pipeline.trainFisherVectorGMM(features) 30 | 31 | def test_01_load_trained_gmm_gulp(self): 32 | self.pipeline.loadFisherVectorGMM() 33 | 34 | def test_02_compute_fv_gulp(self): 35 | features, labels = self.pipeline.loadFeatures() 36 | fv_gmm = self.pipeline.loadFisherVectorGMM() 37 | self.pipeline.computeFisherVectors(features=features, labels=labels, fv_gmm=fv_gmm) 38 | 39 | def test_03_load_fv_gulp(self): 40 | self.pipeline.loadFisherVectors() 41 | 42 | 43 | 44 | class PipelineTest(unittest.TestCase): 45 | def setUp(self): 46 | """ use a dummy Pipeline without setting up dataset and extractor""" 47 | self._base_dir = "../output" 48 | self.pipeline = Pipeline(dataset=None, extractor=None, dataset_name="nprandom", 49 | base_dir=self._base_dir) 50 | 51 | 52 | def test_00_load_features_and_train_gmm(self): 53 | features = np.random.normal(size=(50, 20, 80, 1)) 54 | self.pipeline.trainFisherVectorGMM(features) 55 | 56 | def test_01_compute_fv(self): 57 | features = np.random.normal(size=(50, 20, 80, 1)) 58 | labels = np.random.randint(1, 10, size=50) 59 | fv_gmm = self.pipeline.loadFisherVectorGMM() 60 | self.pipeline.computeFisherVectors(features=features, labels=labels, fv_gmm=fv_gmm) 61 | 62 | def test_02_load_fv(self): 63 | self.pipeline.loadFisherVectors() 64 | 65 | 66 | if __name__ == '__main__': 67 | test_classes_to_run = [PipelineTest, PipelineTwentyBNTest] 68 | 69 | loader = unittest.TestLoader() 70 | suites_list = [] 71 | for test_class in test_classes_to_run: 72 | suite = loader.loadTestsFromTestCase(test_class) 73 | suites_list.append(suite) 74 | 75 | big_suite = unittest.TestSuite(suites_list) 76 | 77 | runner = unittest.TextTestRunner() 78 | results = runner.run(big_suite) 79 | 80 | unittest.main() 81 | 82 | 83 | 84 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Downloads](http://pepy.tech/badge/videofeatures)](http://pepy.tech/count/videofeatures) 2 | 3 | # Description 4 | This package implements the feature extraction from video or image (currently supported: **ResNet**, **VGG**, **SIFT** and **SURF**) and based on these features, the training of a Fisher Vector GMM and computation of (improved) Fisher Vectors. A perhaps useful application of this implementation is to set-up a feature extractor baseline and compare one's approach to both FV and standard computer vision features. 5 | In particular, the package covers the following functionalities: 6 | 1) **extraction**, exporting and restoring **of several features from videos** or image data 7 | 2) **training a Fisher Vector GMM (FVGMM)** for a Fisher Vector encoding based on the features, then exporting the fitted FVGMM model parameters 8 | 3) **computation of (improved) Fisher Vectors** from the FVGMM and features 9 | 10 | For representation of datasets we use the convenient and straightforward GulpIO storage format. The above mentioned feature extractors are ready-to-use with this package and adding more extractors can be easily done. 11 | 12 | # Installation and import 13 | ``` 14 | $ pip install videofeatures 15 | ``` 16 | then import the package with 17 | ``` 18 | from videofeatures import Pipeline, ActivityNetDataset, ResNetFeatures 19 | ``` 20 | 21 | # First steps 22 | ### 1. Setting up the dataset 23 | It is very straightforward to bring your dataset into the right 'gulp' format. The GulpIO documentation gets you started quickly [1]. If you're using one of the prevalent datasets, e.g. ActivityNet, Kinetics or TwentyBN-something-something, it's even simpler to get you started - simply use the available adapter [2] to gulp your local files. 24 | 25 | ### 2. Initialization 26 | First, we instantiate both dataset (here ActivityNet) and extractor (ResNet) 27 | ``` 28 | activitynet = ActivityNetDataset(batch_size=20, train_dir=path_train, valid_dir=path_train).getDataLoader(train=True) 29 | resnet = ResNetFeatures() 30 | pipeline = Pipeline(dataset=activitynet, extractor=resnet, base_dir="./output") 31 | ``` 32 | 33 | ### 3. Feature extraction, GMM training and FV computation 34 | Having initialized the pipeline, we can do 35 | ``` 36 | features, labels = pipeline.extractFeatures() 37 | fisher_vector_gmm = pipeline.trainFisherVectorGMM(features) 38 | fisher_vectors, labels = pipeline.computeFisherVectors(features=features, labels=labels, fv_gmm=fisher_vector_gmm) 39 | ``` 40 | 41 | ### Full example 42 | A full example can be viewed in the following gist: 43 | https://gist.github.com/ferreirafabio/60323a87ba80c052ab272ff769149577 44 | 45 | # Available imports 46 | if you want to use 47 | * VGG, SIFT and SURF features: `from videofeatures import VGGFeatures, SIFTFeatures, SURFFeatures` 48 | * TwentyBNDataset: `from videofeatures import TwentyBNDataset` 49 | 50 | ## Contributors: 51 | * Jonas Rothfuss (https://github.com/jonasrothfuss/) 52 | * Fabio Ferreira (https://github.com/ferreirafabio/) 53 | 54 | ## References 55 | - [1] https://github.com/TwentyBN/GulpIO 56 | - [2] https://github.com/TwentyBN/GulpIO/blob/master/src/main/python/gulpio/adapters.py 57 | -------------------------------------------------------------------------------- /videofeatures/CNNFeatures.py: -------------------------------------------------------------------------------- 1 | import keras.applications.vgg16 as vgg16 2 | import keras.applications.resnet50 as resnet50 3 | from keras.models import Model 4 | from gulpio.loader import DataLoader 5 | import numpy as np 6 | import pandas as pd 7 | 8 | 9 | 10 | 11 | class CNNFeatures: 12 | 13 | def computeFeaturesForVideoDataset(self, dataloader, pickle_path=None): 14 | """ 15 | Computes Feature Vectors for the video dataset provided via a dataloader object 16 | :param dataloader: gulpIO Dataloader object which represents a dataset 17 | :param pickle_path: (optional) if provided the features are pickeled to the specified location 18 | :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels of videos 19 | """ 20 | assert isinstance(dataloader, DataLoader) 21 | 22 | feature_batch_list = [] 23 | labels = [] 24 | n_batches = len(dataloader) 25 | for i, (data_batch, label_batch) in enumerate(dataloader): 26 | assert data_batch.ndim == 5 27 | n_frames = data_batch.shape[1] 28 | 29 | frames_batch = data_batch.reshape( 30 | (data_batch.shape[0] * n_frames, data_batch.shape[2], data_batch.shape[3], data_batch.shape[4])) 31 | frames_batch = frames_batch.astype('float32') 32 | 33 | feature_batch = self.computeFeatures(frames_batch) 34 | assert feature_batch.ndim == 2 35 | feature_batch = feature_batch.reshape((data_batch.shape[0], data_batch.shape[1], feature_batch.shape[1])) 36 | 37 | feature_batch_list.append(feature_batch) 38 | labels.extend(label_batch) 39 | print("batch %i of %i" % (i, n_batches)) 40 | 41 | features = np.concatenate(feature_batch_list, axis=0) 42 | 43 | # reshape features to (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) 44 | features = features.reshape((features.shape[0], features.shape[1], 1, features.shape[2])) 45 | assert features.shape[0] == len(labels) and features.ndim == 4 46 | 47 | # store as pandas dataframe 48 | if pickle_path: 49 | df = pd.DataFrame(data={'labels': labels, 'features': np.vsplit(features, features.shape[0])}) 50 | print('Dumped feature dataframe to', pickle_path) 51 | df.to_pickle(pickle_path) 52 | 53 | return features, labels 54 | 55 | 56 | class VGGFeatures(CNNFeatures): 57 | 58 | def __init__(self, feature = 'fc1'): 59 | self.base_model = vgg16.VGG16() 60 | assert feature in ["fc1", "fc2"] 61 | self.model = Model(inputs=self.base_model.input, outputs=self.base_model.get_layer(feature).output) 62 | 63 | def computeFeatures(self, video): 64 | x = vgg16.preprocess_input(video) 65 | features = self.model.predict(x) 66 | return features 67 | 68 | 69 | class ResNetFeatures(CNNFeatures): 70 | def __init__(self): 71 | self.base_model = resnet50.ResNet50() 72 | self.model = Model(inputs=self.base_model.input, outputs=self.base_model.get_layer('avg_pool').output) 73 | 74 | def computeFeatures(self, video): 75 | x = resnet50.preprocess_input(video) 76 | features = self.model.predict(x) 77 | return features.reshape((-1, 2048)) 78 | 79 | -------------------------------------------------------------------------------- /videofeatures/ActivityNetDatasetProvider.py: -------------------------------------------------------------------------------- 1 | import os, random, csv 2 | from gulpio.adapters import AbstractDatasetAdapter, Custom20BNAdapterMixin 3 | from gulpio.transforms import CenterCrop, ComposeVideo, Scale 4 | from gulpio.dataset import GulpVideoDataset 5 | from gulpio.loader import DataLoader 6 | 7 | 8 | 9 | from gulpio.utils import ( 10 | resize_images, 11 | burst_video_into_frames, 12 | temp_dir_for_bursting, 13 | remove_entries_with_duplicate_ids, 14 | ) 15 | 16 | 17 | 18 | class ActivityNetDataset: 19 | 20 | def __init__(self, train_dir, valid_dir, batch_size=200, n_frames=20): 21 | 22 | transforms = ComposeVideo([CenterCrop(128), Scale((224, 224))]) 23 | self.n_frames = n_frames 24 | 25 | self.train_dataset = GulpVideoDataset(train_dir, n_frames, 1, False, transform=transforms) 26 | self.train_loader = DataLoader(self.train_dataset, batch_size=10, shuffle=False, num_workers=8, drop_last=True) 27 | 28 | self.val_dataset = GulpVideoDataset(valid_dir, n_frames, 1, False, transform=transforms) 29 | self.val_loader = DataLoader(self.val_dataset, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=True) 30 | 31 | def getDataLoader(self, train=False): 32 | if train: 33 | return self.train_loader 34 | else: 35 | return self.val_loader 36 | 37 | 38 | class ActivityNetCsvAviAdapter(AbstractDatasetAdapter, 39 | Custom20BNAdapterMixin): 40 | """ Adapter for ActivityNet dataset specified by CSV file and avi videos. """ 41 | 42 | def __init__(self, csv_file, folder, output_folder, 43 | shuffle=False, frame_size=-1, frame_rate=12, 44 | shm_dir_path='/dev/shm', label_name='label', 45 | remove_duplicate_ids=False): 46 | self.data = self.read_csv(csv_file) 47 | self.label_name = label_name 48 | self.output_folder = output_folder 49 | self.labels2idx = self.create_label2idx_dict(self.label_name) 50 | self.folder = folder 51 | self.shuffle = bool(shuffle) 52 | self.frame_size = int(frame_size) 53 | self.frame_rate = int(frame_rate) 54 | self.shm_dir_path = shm_dir_path 55 | self.all_meta = self.get_meta() 56 | if remove_duplicate_ids: 57 | self.all_meta = remove_entries_with_duplicate_ids( 58 | self.output_folder, self.all_meta) 59 | if self.shuffle: 60 | random.shuffle(self.all_meta) 61 | 62 | def read_csv(self, csv_file): 63 | with open(csv_file, newline='\n') as f: 64 | content = csv.reader(f, delimiter=';') 65 | data = [] 66 | for row in content: 67 | if len(row) == 1: # For test case 68 | data.append({'id': row[0], 'label': "dummy"}) 69 | else: # For train and validation case 70 | data.append({'id': row[0], 'label': row[1], 'file_name': row[4]}) 71 | return data 72 | 73 | def get_meta(self): 74 | return [{'id': entry['id'], 75 | 'label': entry[self.label_name], 76 | 'file_name': entry['file_name'], 77 | 'idx': self.labels2idx[entry[self.label_name]]} 78 | for entry in self.data] 79 | 80 | def __len__(self): 81 | return len(self.all_meta) 82 | 83 | 84 | def iter_data(self, slice_element=None): 85 | slice_element = slice_element or slice(0, len(self)) 86 | for meta in self.all_meta[slice_element]: 87 | video_path = os.path.join(self.folder, str(meta['file_name'])) 88 | 89 | with temp_dir_for_bursting(self.shm_dir_path) as temp_burst_dir: 90 | frame_paths = burst_video_into_frames( 91 | video_path, temp_burst_dir, frame_rate=self.frame_rate) 92 | frames = list(resize_images(frame_paths, self.frame_size)) 93 | result = {'meta': meta, 94 | 'frames': frames, 95 | 'id': meta['id']} 96 | yield result 97 | else: 98 | self.write_label2idx_dict() 99 | -------------------------------------------------------------------------------- /videofeatures/CVFeatures.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | from FeatureExtractor.BaseFeatureExtractor import BaseFeatures 3 | from gulpio.loader import DataLoader 4 | import numpy as np 5 | import pandas as pd 6 | 7 | class CVFeatures(BaseFeatures): 8 | 9 | def computeFeaturesForVideoDataset(self, dataloader, pickle_path=None): 10 | """ 11 | Computes Feature Vectors for the video dataset provided via a dataloader object 12 | :param dataloader: gulpIO Dataloader object which represents a dataset 13 | :param pickle_path: (optional) if provided the features are pickeled to the specified location 14 | :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels of videos 15 | """ 16 | assert isinstance(dataloader, DataLoader) 17 | 18 | feature_batch_list = [] 19 | labels = [] 20 | n_batches = len(dataloader) 21 | for i, (data_batch, label_batch) in enumerate(dataloader): 22 | assert data_batch.ndim == 5 23 | n_frames = data_batch.shape[1] 24 | 25 | frames_batch = data_batch.reshape( 26 | (data_batch.shape[0] * n_frames, data_batch.shape[2], data_batch.shape[3], data_batch.shape[4])) 27 | frames_batch = frames_batch.astype('float32') 28 | 29 | feature_batch = self.computeFeatures(frames_batch) 30 | assert feature_batch.ndim == 2 31 | feature_batch = feature_batch.reshape((data_batch.shape[0], data_batch.shape[1], -1, feature_batch.shape[1])) 32 | 33 | feature_batch_list.append(feature_batch) 34 | labels.extend(label_batch) 35 | print("batch %i of %i" % (i, n_batches)) 36 | 37 | features = np.concatenate(feature_batch_list, axis=0) 38 | assert features.shape[0] == len(labels) and features.ndim == 4 39 | 40 | if pickle_path: 41 | df = pd.DataFrame(data={'labels': labels, 'features': np.vsplit(features, features.shape[0])}) 42 | print('Dumped feature dataframe to', pickle_path) 43 | df.to_pickle(pickle_path) 44 | 45 | return features, labels 46 | 47 | class SIFTFeatures(CVFeatures): 48 | 49 | def __init__(self, n_descriptors=5): 50 | self.n_descriptors = n_descriptors 51 | 52 | def computeFeatures(self, video): 53 | """ 54 | todo: improve documentation 55 | Computes SIFT features for a single video. 56 | :param video: a video of shape (n_frames, width, height, channel) 57 | :return: the features, shape () 58 | """ 59 | descriptor_array = [] 60 | for i in range(video.shape[0]): 61 | frame = cv2.cvtColor(video[i], cv2.COLOR_RGB2GRAY).astype('uint8') 62 | _, descriptors = cv2.xfeatures2d.SIFT_create(nfeatures=self.n_descriptors).detectAndCompute(frame, None) 63 | 64 | if descriptors is not None: 65 | if descriptors.shape[0] < self.n_descriptors: 66 | descriptors = np.concatenate([descriptors, np.zeros((self.n_descriptors - descriptors.shape[0], 128))], axis=0) 67 | else: 68 | descriptors = descriptors[:self.n_descriptors] 69 | else: 70 | descriptors = np.zeros((self.n_descriptors, 128)) 71 | 72 | assert descriptors.shape == (self.n_descriptors, 128) 73 | descriptor_array.append(descriptors) 74 | features = np.concatenate(descriptor_array, axis=0) 75 | return features 76 | 77 | class SURFFeatures(CVFeatures): 78 | # todo: documentation 79 | def __init__(self, n_descriptors=5): 80 | self.n_descriptors = n_descriptors 81 | 82 | def computeFeatures(self, video): 83 | descriptor_array = [] 84 | for i in range(video.shape[0]): 85 | frame = cv2.cvtColor(video[i], cv2.COLOR_RGB2GRAY).astype('uint8') 86 | _, descriptors = cv2.xfeatures2d.SURF_create().detectAndCompute(frame, None) 87 | 88 | # make sure that descriptors have shape (n_descriptor, 64) 89 | if descriptors is not None: 90 | if descriptors.shape[0] < self.n_descriptors: 91 | descriptors = np.concatenate([descriptors, np.zeros((self.n_descriptors - descriptors.shape[0], 64))], 92 | axis=0) 93 | else: 94 | descriptors = descriptors[:self.n_descriptors] 95 | else: 96 | descriptors = np.zeros((self.n_descriptors, 64)) 97 | 98 | assert descriptors.shape == (self.n_descriptors, 64) 99 | descriptor_array.append(descriptors) 100 | 101 | return np.concatenate(descriptor_array, axis=0) 102 | 103 | -------------------------------------------------------------------------------- /videofeatures/VideoFeatures.py: -------------------------------------------------------------------------------- 1 | from fishervector import FisherVectorGMM 2 | import pandas as pd 3 | import numpy as np 4 | import os, pickle 5 | import logging 6 | import gc 7 | import datetime as dt 8 | 9 | 10 | 11 | class Pipeline: 12 | def __init__(self, dataset, extractor, base_dir, dataset_name='VideoDataset'): 13 | """ 14 | A pipeline object initialized with a dataset and a feature extractor allows to 15 | 1) extract, store and load features (see repo description for currently supported features) 16 | 2) train a GMM for a Fisher Vector encoding based on the chosen features, store and load its model parameters after training 17 | 3) compute Fisher Vectors from the GMM, store and load them 18 | :param dataset: gulpIO DataLoader object which represents a dataset 19 | :param extractor: feature extractor object of type BaseFeatures (must implement computeFeatures function) 20 | :param base_dir: absolute or relative path to a directory where sub folders for logs and data dumps from the pipeline are created 21 | :param dataset_name: a string representing the name of the dataset used for logs and file name 22 | """ 23 | self.dataset = dataset 24 | self.dataset_name = dataset_name 25 | self.extractor = extractor 26 | self.base_dir = base_dir 27 | 28 | self.model_dumps_dir = create_dir(os.path.join(base_dir, "models")) 29 | self.feature_dumps_dir = create_dir(os.path.join(base_dir, "features")) 30 | self.fisher_vector_dumps_dir = create_dir(os.path.join(base_dir, "fishervectors")) 31 | self.log_dir = create_dir(os.path.join(base_dir, "logs")) 32 | self.results_dir = create_dir(os.path.join(base_dir, "results")) 33 | 34 | time_stamp = str(dt.datetime.now().strftime("%m-%d-%y_%H-%M")) 35 | self.logger = self.setup_logger(logfile_name = time_stamp + '_video_features.log') 36 | 37 | 38 | def extractFeatures(self, feature_dump_path=None): 39 | """ 40 | Extracts features from (gulped) dataset 41 | :param batch_size: number of videos per batch 42 | :param feature_dump_path: (optional) if set the features are stored as pandas df to the denoted location, else the df is dumped to the default path /DataDumps/Features 43 | :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels of videos 44 | """ 45 | if not feature_dump_path: 46 | feature_dump_path = self.getDumpFileName(type='features') 47 | 48 | self.logger.info('Started extracting {} features from {} dataset'.format(self.extractor.__class__.__name__, self.dataset_name)) 49 | 50 | features, labels = self.extractor.computeFeaturesForVideoDataset(self.dataset, pickle_path=feature_dump_path) 51 | 52 | self.logger.info('Finished extracting features from dataset. Features have shape {}.' 53 | ' Dumped features to {}'.format(np.shape(features), feature_dump_path)) 54 | 55 | return features, labels 56 | 57 | 58 | def loadFeatures(self, feature_df_path=None): 59 | """ 60 | loads features from pd dataframe and returns them as a matrix 61 | :param feature_df_path: path to pandas dataframe that holds features 62 | :return: (features, labels) - features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels (list) of videos 63 | """ 64 | 65 | if feature_df_path is None: 66 | feature_df_path = self.getDumpFileName('features') 67 | 68 | assert os.path.isfile(feature_df_path) 69 | feature_df = pd.read_pickle(feature_df_path) 70 | 71 | assert 'features' in feature_df and 'labels' in feature_df 72 | 73 | # stack video features to a 2d matrix 74 | features = np.concatenate(feature_df['features'], axis=0) 75 | 76 | labels = list(feature_df['labels']) 77 | 78 | if features.ndim == 3: # assume only one feature vector is given -> insert dimension 79 | features = features.reshape((features.shape[0], features.shape[1], 1, features.shape[2])) 80 | 81 | self.logger.info( 82 | 'Loaded {} features from {}. Features have shape {}'.format(self.extractor.__class__.__name__, feature_df_path, 83 | np.shape(features))) 84 | 85 | assert features.ndim == 4 and len(labels) == features.shape[0] 86 | return features, labels 87 | 88 | 89 | 90 | def getDumpFileName(self, type): 91 | assert type in ['features', 'model', 'fv_npy', 'results'] 92 | if type is 'features': 93 | return os.path.join(self.feature_dumps_dir, 'features_' + self.extractor.__class__.__name__ + '_' + self.dataset_name + '.pickle') 94 | elif type is 'model': 95 | return os.path.join(self.model_dumps_dir, 'gmm_' + self.extractor.__class__.__name__ + '_' + self.dataset_name + '.pickle') 96 | elif type is 'fv_npy': 97 | return os.path.join(self.fisher_vector_dumps_dir, 'fv_' + self.extractor.__class__.__name__ + '_' + self.dataset_name) 98 | elif type is 'results': 99 | return os.path.join(self.results_dir, 'results_{}.pickle'.format(self.dataset_name)) 100 | 101 | 102 | 103 | def trainFisherVectorGMM(self, features, by_bic=True, model_dump_path=None, n_kernels=50): 104 | """ 105 | Trains a GMM for Fisher Vectors on the given features. 106 | :param features: features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) and labels (list) of videos 107 | :param by_bic: denotes whether the gmm fit is chosen based on the lowest BIC 108 | :param n_kernels: provide if not fitted by bic (bic-method chooses from a fixed set of n_kernels) 109 | :return: the fitted object of type FisherVectorGMM 110 | """ 111 | if model_dump_path is None: 112 | model_dump_path = self.getDumpFileName('model') 113 | 114 | self.logger.info('Started training FisherVector GMM') 115 | fv_gmm = FisherVectorGMM(n_kernels=n_kernels) 116 | if by_bic: 117 | fv_gmm.fit_by_bic(features, model_dump_path=model_dump_path) 118 | else: 119 | fv_gmm.fit(features, model_dump_path=model_dump_path) 120 | assert fv_gmm.fitted 121 | self.logger.info('Finished training FisherVector GMM. Dumped model to {}'.format(model_dump_path)) 122 | 123 | return fv_gmm 124 | 125 | 126 | def loadFisherVectorGMM(self, pickle_path=None): 127 | if pickle_path is None: 128 | pickle_path = self.getDumpFileName('model') 129 | assert os.path.isfile(pickle_path) 130 | with open(pickle_path, 'rb') as f: 131 | fv_gmm = pickle.load(f) 132 | assert isinstance(fv_gmm, FisherVectorGMM) 133 | 134 | self.logger.info('Loaded FisherVector GMM from {}'.format(pickle_path)) 135 | 136 | return fv_gmm 137 | 138 | 139 | def computeFisherVectors(self, features, labels, fv_gmm, normalized=True, mem_map_mode = False, dump_path=None, batch_size=100): 140 | """ 141 | Compute the (improved) Fisher Vectors of features with a fitted FisherVectorGMM. 142 | :param features: features as ndarray of shape (n_videos, n_frames, n_descriptors_per_image, n_dim_descriptor) 143 | :param labels: labels correspodning to features - array of shape (n_videos,) 144 | :param fv_gmm: fitted FisherVectorGMM instance 145 | :param normalized: boolean - if true: improved fisher vectors are computed 146 | :param mem_map_mode: set to True, if the fisher vectors should be computed in batches which are stored as numpy memory map on disk (recommended 147 | if the computation overflows the available RAM) 148 | :param dump_path: file path specifying the dump location for the computed vectors 149 | :param batch_size: batch size for computing the fisher vactors 150 | :return: (fv, labels) fisher vectors - ndarray of shape (n_videos, n_frames, 2*n_kernels, n_feature_dim) 151 | """ 152 | assert isinstance(fv_gmm, FisherVectorGMM) 153 | assert isinstance(features, np.ndarray) 154 | assert features.ndim == 4 155 | 156 | labels = np.asarray(labels) 157 | assert labels.ndim ==1 158 | assert features.shape[0] == labels.shape[0] 159 | 160 | self.logger.info('Started Computing Fisher Vectors') 161 | 162 | if dump_path is None: 163 | dump_path = self.getDumpFileName('fv_npy') 164 | 165 | """ compute Fisher vectors in batches and store as memory map """ 166 | if mem_map_mode: 167 | n_instances = features.shape[0] 168 | n_batches = n_instances // batch_size 169 | split_tuples = list(zip([i * batch_size for i in range(0, n_batches + 1)], [i*batch_size for i in range(1,n_batches+1)] + [n_instances])) 170 | 171 | fv_shape = (features.shape[0], features.shape[1], fv_gmm.n_kernels*2, features.shape[3]) 172 | fv = np.memmap(dump_path + '_memmap.npy', dtype='float32', mode='w+', shape=fv_shape) 173 | 174 | for i, (batch_start, batch_end) in enumerate(split_tuples): 175 | batch = features[batch_start:batch_end] 176 | if batch.shape[0] > 0: 177 | print('Compute FV batch {} of {}'.format(i+1, n_batches+1)) 178 | fv[batch_start:batch_end] = fv_gmm.predict(batch, normalized=normalized) 179 | # clean memory 180 | del batch 181 | gc.collect() 182 | 183 | np.save(dump_path + '.npy', fv) 184 | np.save(dump_path + '_labels.npy', labels) 185 | del fv 186 | return np.load(dump_path + '.npy', mmap_mode='r'), labels 187 | 188 | else: 189 | fv = fv_gmm.predict(features, normalized=normalized) 190 | np.save(dump_path + '.npy', fv) 191 | np.save(dump_path + '_labels.npy', labels) 192 | 193 | assert fv.shape[0] == labels.shape[0] 194 | 195 | self.logger.info('Finished computing Fisher Vectors. Dumped vectors to {}'.format(dump_path)) 196 | 197 | return fv, labels 198 | 199 | 200 | def loadFisherVectors(self, fisher_vector_path=None): 201 | """ 202 | Loads fisher vectors from pd dataframe and returns them as a matrix 203 | :param feature_df: pandas dataframe which holds features in a column 'features' 204 | :param feature_df_path: path to pandas dataframe that holds features 205 | :return: (fv, labels) - fv as ndarray of shape (n_videos, n_frames, 2*n_kernels, n_dim_descriptor) and labels (as array) of videos 206 | """ 207 | 208 | if fisher_vector_path is None: 209 | fisher_vector_path = self.getDumpFileName('fv_npy') 210 | assert os.path.isfile(fisher_vector_path + '_labels.npy') 211 | assert os.path.isfile(fisher_vector_path + '.npy') 212 | 213 | fv = np.load(fisher_vector_path + '.npy', mmap_mode='r') 214 | labels = np.load(fisher_vector_path + '_labels.npy') 215 | self.logger.info('Loaded Fisher Vectors from {} with shape {}'.format(fisher_vector_path + '.npy', fv.shape)) 216 | 217 | return fv, labels 218 | 219 | 220 | def setup_logger(self, logfile_name = '_video_features.log'): 221 | log_file_path = os.path.join(self.log_dir, logfile_name) 222 | logger = logging.getLogger('PipelineRunLogger') 223 | logger.setLevel(logging.INFO) 224 | fh = logging.FileHandler(log_file_path) 225 | fh.setLevel(logging.DEBUG) 226 | ch = logging.StreamHandler() 227 | ch.setLevel(logging.INFO) 228 | formatter = logging.Formatter('[%(asctime)s - %(levelname)s] %(message)s') 229 | fh.setFormatter(formatter) 230 | ch.setFormatter(formatter) 231 | logger.addHandler(fh) 232 | logger.addHandler(ch) 233 | 234 | return logger 235 | 236 | 237 | 238 | def create_dir(output_dir): 239 | assert(output_dir) 240 | if not os.path.isdir(output_dir): 241 | os.mkdir(output_dir) 242 | return output_dir 243 | 244 | --------------------------------------------------------------------------------