├── eval ├── __init__.py ├── classificationMAP.py ├── utils_eval.py ├── eval_anno_file_generation.py ├── detectionMAP.py └── eval_detection.py ├── utils ├── __init__.py └── wsad_utils.py ├── libMR ├── libmr.c ├── libmr.pxd ├── Makefile ├── compile.sh ├── setup.py ├── build_libmr_python.sh ├── estimate_wscores.py ├── test_libmr.py ├── weibull.h ├── MetaRecognition.h ├── COPYRIGHT_Libmr.txt ├── libmr.pyx ├── MetaRecognition.cpp └── malloc.h ├── thumos_splits ├── split_0 │ ├── Class_Unknown.txt │ └── Class_Known.txt ├── split_2 │ ├── Class_Unknown.txt │ └── Class_Known.txt └── split_1 │ ├── Class_Unknown.txt │ └── Class_Known.txt ├── scripts ├── test_split0.sh ├── test_split1.sh ├── test_split2.sh ├── train_split0.sh ├── train_split1.sh └── train_split2.sh ├── Dist.py ├── README.md ├── train.py ├── LICENSE ├── PL.py ├── base.py ├── main.py ├── options.py ├── test.py ├── proposal_methods.py ├── edl_loss.py └── wsad_dataset.py /eval/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /libMR/libmr.c: -------------------------------------------------------------------------------- 1 | #error Do not use this file, it is the result of a failed Cython compilation. 2 | -------------------------------------------------------------------------------- /thumos_splits/split_0/Class_Unknown.txt: -------------------------------------------------------------------------------- 1 | CleanAndJerk 2 | CliffDiving 3 | Diving 4 | LongJump 5 | SoccerPenalty -------------------------------------------------------------------------------- /thumos_splits/split_2/Class_Unknown.txt: -------------------------------------------------------------------------------- 1 | CricketShot 2 | JavelinThrow 3 | LongJump 4 | PoleVault 5 | Shotput 6 | -------------------------------------------------------------------------------- /thumos_splits/split_1/Class_Unknown.txt: -------------------------------------------------------------------------------- 1 | CliffDiving 2 | CricketBowling 3 | Diving 4 | HammerThrow 5 | JavelinThrow 6 | -------------------------------------------------------------------------------- /libMR/libmr.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "MetaRecognition.h": 2 | cdef struct svm_node_libsvm: 3 | int index 4 | double value -------------------------------------------------------------------------------- /libMR/Makefile: -------------------------------------------------------------------------------- 1 | CXX= g++ 2 | SRC= MetaRecognition.cpp weibull.c 3 | 4 | libmr: $(SRC) weibull.h malloc.h MetaRecognition.h 5 | $(CXX) -o libmr $(SRC) -I. 6 | 7 | clean: 8 | rm -f *~ *.o libmr -------------------------------------------------------------------------------- /thumos_splits/split_0/Class_Known.txt: -------------------------------------------------------------------------------- 1 | BaseballPitch 2 | BasketballDunk 3 | Billiards 4 | CricketBowling 5 | CricketShot 6 | FrisbeeCatch 7 | GolfSwing 8 | HammerThrow 9 | HighJump 10 | JavelinThrow 11 | PoleVault 12 | Shotput 13 | TennisSwing 14 | ThrowDiscus 15 | VolleyballSpiking -------------------------------------------------------------------------------- /thumos_splits/split_1/Class_Known.txt: -------------------------------------------------------------------------------- 1 | BaseballPitch 2 | BasketballDunk 3 | Billiards 4 | CleanAndJerk 5 | CricketShot 6 | FrisbeeCatch 7 | GolfSwing 8 | HighJump 9 | LongJump 10 | PoleVault 11 | Shotput 12 | SoccerPenalty 13 | TennisSwing 14 | ThrowDiscus 15 | VolleyballSpiking 16 | -------------------------------------------------------------------------------- /thumos_splits/split_2/Class_Known.txt: -------------------------------------------------------------------------------- 1 | BaseballPitch 2 | BasketballDunk 3 | Billiards 4 | CleanAndJerk 5 | CliffDiving 6 | CricketBowling 7 | Diving 8 | FrisbeeCatch 9 | GolfSwing 10 | HammerThrow 11 | HighJump 12 | SoccerPenalty 13 | TennisSwing 14 | ThrowDiscus 15 | VolleyballSpiking 16 | -------------------------------------------------------------------------------- /scripts/test_split0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd .. 3 | CUDA_VISIBLE_DEVICES=0 \ 4 | python test.py \ 5 | --max_seqlen 500 \ 6 | --lr 0.00005 \ 7 | --k 7 \ 8 | --dataset_name Thumos14reduced \ 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \ 10 | --use_model CO2 \ 11 | --dataset SampleDataset \ 12 | --weight_decay 0.001 \ 13 | --AWM BWA_fusion_dropout_feat_v2 \ 14 | --seed 0 \ 15 | --test_ckpt ./ckpt/split0_ckpt.pkl \ 16 | --split_idx 0 \ 17 | --without_wandb \ 18 | --topk_test -------------------------------------------------------------------------------- /scripts/test_split1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd .. 3 | CUDA_VISIBLE_DEVICES=0 \ 4 | python test.py \ 5 | --max_seqlen 500 \ 6 | --lr 0.00005 \ 7 | --k 7 \ 8 | --dataset_name Thumos14reduced \ 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \ 10 | --use_model CO2 \ 11 | --dataset SampleDataset \ 12 | --weight_decay 0.001 \ 13 | --AWM BWA_fusion_dropout_feat_v2 \ 14 | --seed 0 \ 15 | --test_ckpt ./ckpt/split1_ckpt.pkl \ 16 | --split_idx 1 \ 17 | --without_wandb \ 18 | --topk_test -------------------------------------------------------------------------------- /scripts/test_split2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd .. 3 | CUDA_VISIBLE_DEVICES=0 \ 4 | python test.py \ 5 | --max_seqlen 500 \ 6 | --lr 0.00005 \ 7 | --k 7 \ 8 | --dataset_name Thumos14reduced \ 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \ 10 | --use_model CO2 \ 11 | --dataset SampleDataset \ 12 | --weight_decay 0.001 \ 13 | --AWM BWA_fusion_dropout_feat_v2 \ 14 | --seed 0 \ 15 | --test_ckpt ./ckpt/split2_ckpt.pkl \ 16 | --split_idx 2 \ 17 | --without_wandb \ 18 | --topk_test -------------------------------------------------------------------------------- /scripts/train_split0.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd .. 3 | CUDA_VISIBLE_DEVICES=0 \ 4 | python main.py \ 5 | --max_seqlen 500 \ 6 | --lr 0.00005 \ 7 | --k 7 \ 8 | --dataset_name Thumos14reduced \ 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \ 10 | --use_model CO2 \ 11 | --dataset SampleDataset \ 12 | --weight_decay 0.001 \ 13 | --AWM BWA_fusion_dropout_feat_v2 \ 14 | --group_name CELL \ 15 | --model_name split0_ckpt \ 16 | --split_idx 0 \ 17 | --k_edl 7 \ 18 | --num_centers 2 \ 19 | --seed 0 \ 20 | --without_wandb -------------------------------------------------------------------------------- /scripts/train_split1.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd .. 3 | CUDA_VISIBLE_DEVICES=0 \ 4 | python main.py \ 5 | --max_seqlen 500 \ 6 | --lr 0.00005 \ 7 | --k 7 \ 8 | --dataset_name Thumos14reduced \ 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \ 10 | --use_model CO2 \ 11 | --dataset SampleDataset \ 12 | --weight_decay 0.001 \ 13 | --AWM BWA_fusion_dropout_feat_v2 \ 14 | --group_name CELL \ 15 | --model_name split1_ckpt \ 16 | --split_idx 1 \ 17 | --k_edl 7 \ 18 | --num_centers 2 \ 19 | --seed 0 \ 20 | --without_wandb -------------------------------------------------------------------------------- /scripts/train_split2.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd .. 3 | CUDA_VISIBLE_DEVICES=0 \ 4 | python main.py \ 5 | --max_seqlen 500 \ 6 | --lr 0.00005 \ 7 | --k 7 \ 8 | --dataset_name Thumos14reduced \ 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \ 10 | --use_model CO2 \ 11 | --dataset SampleDataset \ 12 | --weight_decay 0.001 \ 13 | --AWM BWA_fusion_dropout_feat_v2 \ 14 | --group_name CELL \ 15 | --model_name split2_ckpt \ 16 | --split_idx 2 \ 17 | --k_edl 7 \ 18 | --num_centers 2 \ 19 | --seed 0 \ 20 | --without_wandb -------------------------------------------------------------------------------- /libMR/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "----- Removing previously compiled libmr.so -----\n" 4 | rm -r build 5 | rm *.model 6 | rm libmr.so 7 | rm *.dump 8 | rm ../libmr.so 9 | 10 | echo "----- Building and compiling libmr ------- \n" 11 | python setup.py build_ext -i 12 | # cp libmr.so ../ 13 | 14 | # echo "----- Completed Compiling libmr -------- \n" 15 | # echo "Now trying python -c \"import libmr\"" 16 | # python test_libmr.py 17 | # echo "----- Compiling Done. Now import *.so file in your application -----\n" 18 | -------------------------------------------------------------------------------- /Dist.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Dist(nn.Module): 6 | def __init__(self, num_classes=10, num_centers=1, feat_dim=2): 7 | super(Dist, self).__init__() 8 | self.feat_dim = feat_dim 9 | self.num_classes = num_classes 10 | self.num_centers = num_centers 11 | 12 | self.pos_centers = nn.Parameter(0.1 * torch.randn(num_classes * num_centers, self.feat_dim)) 13 | self.neg_centers = nn.Parameter(0.1 * torch.randn(num_classes * num_centers, self.feat_dim)) 14 | 15 | def forward(self, features, center): 16 | dist = features.matmul(center.t()) 17 | dist = torch.reshape(dist, [-1, self.num_classes, self.num_centers]) 18 | dist = torch.mean(dist, dim=2) 19 | 20 | return dist 21 | -------------------------------------------------------------------------------- /eval/classificationMAP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def getAP(conf, labels): 5 | assert len(conf) == len(labels) 6 | sortind = np.argsort(-conf) 7 | tp = labels[sortind] == 1 8 | fp = labels[sortind] != 1 9 | npos = np.sum(labels) 10 | 11 | fp = np.cumsum(fp).astype('float32') 12 | tp = np.cumsum(tp).astype('float32') 13 | rec = tp / npos 14 | prec = tp / (fp + tp) 15 | tmp = (labels[sortind] == 1).astype('float32') 16 | 17 | return np.sum(tmp * prec) / npos 18 | 19 | 20 | def getClassificationMAP(confidence, labels): 21 | ''' confidence and labels are of dimension n_samples x n_label ''' 22 | 23 | AP = [] 24 | for i in range(np.shape(labels)[1]): 25 | AP.append(getAP(confidence[:, i], labels[:, i])) 26 | return 100 * sum(AP) / len(AP) 27 | -------------------------------------------------------------------------------- /libMR/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from distutils.extension import Extension 3 | from Cython.Distutils import build_ext 4 | from Cython.Build import cythonize 5 | import sys 6 | import numpy 7 | #ext_modules = [Extension("libmr", ["libmr.pyx", "MetaRecognition.cpp"])] 8 | 9 | setup( 10 | ext_modules = cythonize(Extension('libmr', 11 | ["libmr.pyx", 12 | "MetaRecognition.cpp", 13 | "weibull.c" 14 | ], 15 | include_dirs = [".", numpy.get_include()], 16 | language="c++", 17 | )), 18 | data_files = [('.', ['MetaRecognition.h', 'weibull.h'])], 19 | 20 | ) 21 | -------------------------------------------------------------------------------- /libMR/build_libmr_python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This script creates a clean temporary environment Python, and then 4 | # builds LibMR's python bindings. 5 | 6 | if [ '!' -f setup.py ]; then 7 | echo Put this script into the same folder as setup.py 8 | exit 1 9 | fi 10 | 11 | echo Step 1: Download virtualenv 12 | wget -O virtualenv-1.9.1.tar.gz --no-check-certificate https://pypi.python.org/packages/source/v/virtualenv/virtualenv-1.9.1.tar.gz 13 | tar xvf virtualenv-1.9.1.tar.gz 14 | 15 | echo Step 2: Create virtualenv 16 | python virtualenv-1.9.1/virtualenv.py --system-site-packages venv 17 | 18 | echo Step 3: Entering virtualenv and installing dependencies 19 | source venv/bin/activate 20 | pip install cython==0.19.1 21 | 22 | echo Step 5: Build the extension 23 | rm -f python/libmr.cpp 24 | python setup.py build_ext -i 25 | 26 | deactivate 27 | 28 | echo The .so should be built in the current folder. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CVPR2023-OWTAL 2 | An implementation of a baseline method of OWTAL. However, it is not the complete version yet. 3 | 4 | ### THUMOS-14 Dataset: 5 | We use the 2048-d features provided by MM 2021 paper: Cross-modal Consensus Network for Weakly Supervised Temporal Action Localization. You can get access of the dataset from [Google Drive](https://drive.google.com/file/d/1SFEsQNLsG8vgBbqx056L9fjA4TzVZQEu/view?usp=sharing) or [Baidu Disk](https://pan.baidu.com/s/1nspCSpzgwh5AHpSBPPibrQ?pwd=2dej). The annotations are included within this package. 6 | 7 | ### Pre-trained models: 8 | Pre-trained models can be downloaded from [Google Disk](https://drive.google.com/file/d/1GjiNATcUdJlFpX6rK0FIik7ma2QO-L5c/view?usp=sharing). 9 | They need to be unzipped and put in the directory './ckpt/'. 10 | 11 | ### Quick start 12 | To test pre-trained models, run: 13 | ``` 14 | cd scripts 15 | bash test_split0/1/2.sh 16 | ``` 17 | 18 | To train from scratch, run: 19 | ``` 20 | cd scripts 21 | bash train_split0/1/2.sh 22 | ``` 23 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | torch.set_default_dtype(torch.float32) 5 | 6 | 7 | def train(itr, dataset, args, model, optimizer, device): 8 | model.train() 9 | features, labels, pairs_id = dataset.load_data(n_similar=args.num_similar) 10 | seq_len = np.sum(np.max(np.abs(features), axis=2) > 0, axis=1) 11 | features = features[:, :np.max(seq_len), :] 12 | 13 | features = torch.from_numpy(features).float().to(device) 14 | labels = torch.from_numpy(labels).float().to(device) 15 | 16 | outputs = model(features, seq_len=seq_len, is_training=True, itr=itr, opt=args, labels=labels) 17 | total_loss, loss_dict = model.criterion(outputs, labels, seq_len=seq_len, device=device, opt=args, itr=itr, 18 | pairs_id=pairs_id, inputs=features) 19 | 20 | optimizer.zero_grad() 21 | total_loss.backward() 22 | optimizer.step() 23 | 24 | if not args.without_wandb: 25 | if itr % 20 == 0 and itr != 0: 26 | wandb.log(loss_dict) 27 | 28 | return total_loss.data.cpu().numpy() 29 | -------------------------------------------------------------------------------- /libMR/estimate_wscores.py: -------------------------------------------------------------------------------- 1 | import os, sys 2 | import scipy as sp 3 | import libmr 4 | 5 | def main(): 6 | 7 | posscores = sp.asarray([0.245 , 0.2632, 0.3233, 0.3573, 0.4014, 0.4055, 0.4212, 0.5677]) 8 | test_distances = sp.asarray([ 0.05, 0.1 , 0.25, 0.4 , 0.75, 1. , 1.5 , 2.]) 9 | 10 | mr = libmr.MR() 11 | # since higher is worse and we want to fit the higher tail, 12 | # use fit_high() 13 | mr.fit_high(posscores, posscores.shape[0]) 14 | wscores = mr.w_score_vector(test_distances) 15 | for i in range(wscores.shape[0]): 16 | print "%.2f %.2f %.2f" %(test_distances[i], wscores[i], mr.inv(wscores[i])) 17 | # wscores are the ones to be used in the equation 18 | # s_i * (1 - rho_i) 19 | print "Low wscore --> Low probability that the score is outlier i.e. sample IS NOT outlier" 20 | print "High wscore --> High probability that the score is outlier i.e. sample IS an outlier" 21 | print "posscores: ", posscores 22 | print "test_distances: ", test_distances 23 | print "wscores: ", wscores 24 | 25 | if __name__ == "__main__": 26 | main() 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Mengyuan Chen 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /PL.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from Dist import Dist 5 | 6 | class PL(nn.CrossEntropyLoss): 7 | def __init__(self, args): 8 | super(PL, self).__init__() 9 | self.args = args 10 | self.use_gpu = True 11 | self.weight_pl = float(args.weight_pl) 12 | self.temp = args.temp 13 | self.Dist = Dist(num_classes=args.n_known_class, num_centers=args.num_centers, feat_dim=args.feature_size) 14 | self.pos_points = self.Dist.pos_centers 15 | self.neg_points = self.Dist.neg_centers 16 | 17 | def forward(self, x, labels=None): 18 | dist_dot_pos = self.Dist(x, center=self.pos_points) 19 | dist_dot_neg = self.Dist(x, center=self.neg_points) 20 | logits = dist_dot_pos - dist_dot_neg # (batch_size, class_num) 21 | logits = logits / self.temp 22 | 23 | if labels is None: 24 | return logits, 0 25 | 26 | true_logits = (torch.exp(logits) * labels).sum(dim=0) 27 | false_logits = torch.exp(logits).sum(dim=0) 28 | 29 | loss = - torch.log(true_logits / false_logits + 1e-3).mean() 30 | 31 | # loss = F.cross_entropy(logits, labels) 32 | 33 | return logits, loss 34 | -------------------------------------------------------------------------------- /libMR/test_libmr.py: -------------------------------------------------------------------------------- 1 | import scipy as sp 2 | import sys, os 3 | try: 4 | import libmr 5 | print("Imported libmr succesfully") 6 | except ImportError: 7 | print("Cannot import libmr") 8 | sys.exit() 9 | 10 | import pickle 11 | svm_data = {} 12 | svm_data["labels"] = [1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, 13 | 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, 14 | 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, 15 | 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1, 16 | 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1] 17 | svm_data["scores"] = sp.randn(100).tolist() 18 | fit_data = sp.rand(3) 19 | def main(): 20 | 21 | mr = libmr.MR() 22 | datasize = len(svm_data["scores"]) 23 | mr.fit_svm(svm_data, datasize, 1, 1, 1, 10) 24 | print(fit_data) 25 | print(mr.w_score_vector(fit_data)) 26 | mr.mr_save("meta_rec.model") 27 | datadump = {} 28 | datadump = {"data": fit_data} 29 | 30 | f = open("data.dump", "w") 31 | pickle.dump(datadump, f) 32 | f.close() 33 | print(dir(mr)) 34 | 35 | 36 | if __name__ == "__main__": 37 | main() 38 | -------------------------------------------------------------------------------- /base.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | import torch.nn as nn 4 | 5 | 6 | class BaseWeightedLoss(nn.Module, metaclass=ABCMeta): 7 | """Base class for loss. 8 | 9 | All subclass should overwrite the ``_forward()`` method which returns the 10 | normal loss without loss weights. 11 | 12 | Args: 13 | loss_weight (float): Factor scalar multiplied on the loss. 14 | Default: 1.0. 15 | """ 16 | 17 | def __init__(self, loss_weight=1.0): 18 | super().__init__() 19 | self.loss_weight = loss_weight 20 | 21 | @abstractmethod 22 | def _forward(self, *args, **kwargs): 23 | pass 24 | 25 | def forward(self, *args, **kwargs): 26 | """Defines the computation performed at every call. 27 | 28 | Args: 29 | *args: The positional arguments for the corresponding 30 | loss. 31 | **kwargs: The keyword arguments for the corresponding 32 | loss. 33 | 34 | Returns: 35 | torch.Tensor: The calculated loss. 36 | """ 37 | ret = self._forward(*args, **kwargs) 38 | if isinstance(ret, dict): 39 | for k in ret: 40 | if 'loss' in k: 41 | ret[k] *= self.loss_weight 42 | else: 43 | ret *= self.loss_weight 44 | return ret 45 | -------------------------------------------------------------------------------- /eval/utils_eval.py: -------------------------------------------------------------------------------- 1 | # This code is originally from the official ActivityNet repo 2 | # https://github.com/activitynet/ActivityNet 3 | 4 | import json 5 | import urllib.request 6 | 7 | import numpy as np 8 | 9 | API = 'http://ec2-52-11-11-89.us-west-2.compute.amazonaws.com/challenge17/api.py' 10 | 11 | def get_blocked_videos(api=API): 12 | api_url = '{}?action=get_blocked'.format(api) 13 | req = urllib.request.Request(api_url) 14 | response = urllib.request.urlopen(req) 15 | return json.loads(response.read().decode('utf-8')) 16 | 17 | def interpolated_prec_rec(prec, rec): 18 | """Interpolated AP - VOCdevkit from VOC 2011. 19 | """ 20 | mprec = np.hstack([[0], prec, [0]]) 21 | mrec = np.hstack([[0], rec, [1]]) 22 | for i in range(len(mprec) - 1)[::-1]: 23 | mprec[i] = max(mprec[i], mprec[i + 1]) 24 | idx = np.where(mrec[1::] != mrec[0:-1])[0] + 1 25 | ap = np.sum((mrec[idx] - mrec[idx - 1]) * mprec[idx]) 26 | return ap 27 | 28 | def segment_iou(target_segment, candidate_segments): 29 | """Compute the temporal intersection over union between a 30 | target segment and all the test segments. 31 | 32 | Parameters 33 | ---------- 34 | target_segment : 1d array 35 | Temporal target segment containing [starting, ending] times. 36 | candidate_segments : 2d array 37 | Temporal candidate segments containing N x [starting, ending] times. 38 | 39 | Outputs 40 | ------- 41 | tiou : 1d array 42 | Temporal intersection over union score of the N's candidate segments. 43 | """ 44 | tt1 = np.maximum(target_segment[0], candidate_segments[:, 0]) 45 | tt2 = np.minimum(target_segment[1], candidate_segments[:, 1]) 46 | # Intersection including Non-negative overlap score. 47 | segments_intersection = (tt2 - tt1).clip(0) 48 | # Segment union. 49 | segments_union = (candidate_segments[:, 1] - candidate_segments[:, 0]) \ 50 | + (target_segment[1] - target_segment[0]) - segments_intersection 51 | # Compute overlap as the ratio of the intersection 52 | # over union of two segments. 53 | tIoU = segments_intersection.astype(float) / segments_union 54 | return tIoU 55 | 56 | def wrapper_segment_iou(target_segments, candidate_segments): 57 | """Compute intersection over union btw segments 58 | Parameters 59 | ---------- 60 | target_segments : ndarray 61 | 2-dim array in format [m x 2:=[init, end]] 62 | candidate_segments : ndarray 63 | 2-dim array in format [n x 2:=[init, end]] 64 | Outputs 65 | ------- 66 | tiou : ndarray 67 | 2-dim array [n x m] with IOU ratio. 68 | Note: It assumes that candidate-segments are more scarce that target-segments 69 | """ 70 | if candidate_segments.ndim != 2 or target_segments.ndim != 2: 71 | raise ValueError('Dimension of arguments is incorrect') 72 | 73 | n, m = candidate_segments.shape[0], target_segments.shape[0] 74 | tiou = np.empty((n, m)) 75 | for i in range(m): 76 | tiou[:, i] = segment_iou(target_segments[i,:], candidate_segments) 77 | 78 | return tiou -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import random 5 | 6 | import numpy as np 7 | import torch 8 | from tqdm import tqdm 9 | 10 | import model 11 | import options 12 | import wsad_dataset 13 | from test import test 14 | from train import train 15 | 16 | torch.set_default_dtype(torch.float32) 17 | 18 | 19 | def setup_seed(seed): 20 | random.seed(seed) 21 | os.environ['PYTHONHASHSEED'] = str(seed) 22 | np.random.seed(seed) 23 | torch.manual_seed(seed) 24 | torch.cuda.manual_seed(seed) 25 | torch.cuda.manual_seed_all(seed) 26 | torch.backends.cudnn.benchmark = False 27 | torch.backends.cudnn.deterministic = True 28 | 29 | 30 | import torch.optim as optim 31 | 32 | if __name__ == '__main__': 33 | args = options.parser.parse_args() 34 | 35 | seed = args.seed 36 | print('=============seed: {}, pid: {}============='.format(seed, os.getpid())) 37 | setup_seed(seed) 38 | device = torch.device("cuda") 39 | dataset = getattr(wsad_dataset, args.dataset)(args) 40 | if 'Thumos' in args.dataset_name: 41 | max_map = [0] * 9 42 | else: 43 | max_map = [0] * 10 44 | max_uct_rank_acc = 0 45 | ckpt_folder_path = args.path_dataset + '/aaai23osr/ckpt/' + args.group_name 46 | if not os.path.exists(ckpt_folder_path): 47 | os.makedirs(ckpt_folder_path) 48 | print(args) 49 | model = getattr(model, args.use_model)(dataset.feature_size, dataset.num_class, opt=args).to(device) 50 | 51 | if args.pretrained_ckpt is not None: 52 | model.load_state_dict(torch.load(args.pretrained_ckpt)) 53 | 54 | optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) 55 | 56 | total_loss = 0 57 | lrs = [args.lr, args.lr / 5, args.lr / 5 / 5] 58 | print(model) 59 | for itr in tqdm(range(args.max_iter)): 60 | 61 | loss = train(itr, dataset, args, model, optimizer, device) 62 | total_loss += loss 63 | if itr % args.interval == 0 and not itr == 0: 64 | print('Iteration: %d, Loss: %.5f' % (itr, total_loss / args.interval)) 65 | total_loss = 0 66 | # torch.save(model.state_dict(), ckpt_folder_path + '/last_' + args.model_name + '.pkl') 67 | torch.save(model.state_dict(), ckpt_folder_path + '/last_' + args.model_name + '.pkl') 68 | 69 | iou, dmap, uct_rank_acc = test(itr, dataset, args, model, device) 70 | 71 | if 'Thumos' in args.dataset_name: 72 | map_update_cond = sum(dmap[:7]) > sum(max_map[:7]) 73 | else: 74 | map_update_cond = np.mean(dmap) > np.mean(max_map) 75 | uct_update_cond = uct_rank_acc > max_uct_rank_acc 76 | 77 | if args.main_evaluate_indicator == 'map': 78 | ckpt_save_cond = map_update_cond 79 | elif args.main_evaluate_indicator == 'uct': 80 | ckpt_save_cond = uct_update_cond 81 | else: 82 | raise "Unknown indicator!" 83 | 84 | if ckpt_save_cond: 85 | torch.save(model.state_dict(), ckpt_folder_path + '/best_' + args.model_name + '.pkl') 86 | if map_update_cond: 87 | max_map = dmap 88 | if uct_update_cond: 89 | max_uct_rank_acc = uct_rank_acc 90 | 91 | print(f'MAX uct_rank_acc: {max_uct_rank_acc:.3f}') 92 | 93 | print('----------------------------------------------------------------') 94 | print('For all classes (MAX):') 95 | print('||'.join(['MAX map @ {} = {:.3f} '.format(iou[i], max_map[i]) for i in range(len(iou))])) 96 | max_map = np.array(max_map) 97 | if 'Thumos' in args.dataset_name: 98 | print('Max mAP Avg 0.1-0.5: {:.3f}, Max mAP Avg 0.1-0.7: {:.3f}, Max mAP Avg 0.1-0.9: {:.3f}' 99 | .format(np.mean(max_map[:5]), np.mean(max_map[:7]), np.mean(max_map))) 100 | print("------------------pid: {}--------------------".format(os.getpid())) 101 | -------------------------------------------------------------------------------- /options.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | parser = argparse.ArgumentParser(description='CO2-NET') 4 | parser.add_argument('--path_dataset', type=str, default='/data_SSD1/cmy/CO2-THUMOS-14', help='the path of data feature') 5 | # '/data_SDD3/mmc_mychen/CO2-THUMOS-14' 6 | parser.add_argument('--lr', type=float, default=0.00005, help='learning rate (default: 0.0001)') 7 | parser.add_argument('--batch_size', type=int, default=10, help='number of instances in a batch of data (default: 10)') 8 | parser.add_argument('--model_name', default='default', help='name to save model') 9 | parser.add_argument('--group_name', default='default', help='name to save model') 10 | parser.add_argument('--pretrained_ckpt', default=None, help='ckpt for pretrained model') 11 | parser.add_argument('--feature_size', default=2048, help='size of feature (default: 2048)') 12 | parser.add_argument('--num_class', type=int, default=20, help='number of classes (default: )') 13 | parser.add_argument('--dataset_name', default='Thumos14reduced', help='dataset to train on (default: )') 14 | parser.add_argument('--max_seqlen', type=int, default=320, 15 | help='maximum sequence length during training (default: 750)') 16 | parser.add_argument('--num_similar', default=3, type=int, 17 | help='number of similar pairs in a batch of data (default: 3)') 18 | parser.add_argument('--seed', type=int, default=3552, help='random seed (default: 1)') 19 | parser.add_argument('--max_iter', type=int, default=5000, help='maximum iteration to train (default: 50000)') 20 | parser.add_argument('--feature_type', type=str, default='I3D', 21 | help='type of feature to be used I3D or UNT (default: I3D)') 22 | parser.add_argument('--use_model', type=str, help='model used to train the network') 23 | parser.add_argument('--interval', type=int, default=50, help='time interval of performing the test') 24 | parser.add_argument('--similar_size', type=int, default=2) 25 | 26 | parser.add_argument('--weight_decay', type=float, default=5e-4) 27 | parser.add_argument('--dataset', type=str, default='SampleDataset') 28 | parser.add_argument('--proposal_method', type=str, default='multiple_threshold_hamnet') 29 | 30 | # for proposal genration 31 | parser.add_argument('--scale', type=float, default=1) 32 | parser.add_argument("--feature_fps", type=int, default=25) 33 | parser.add_argument('--gamma-oic', type=float, default=0.2) 34 | 35 | parser.add_argument('--k', type=float, default=7) 36 | # for testing time usage 37 | parser.add_argument("--topk2", type=float, default=10) 38 | parser.add_argument("--topk", type=float, default=60) 39 | 40 | parser.add_argument('--dropout_ratio', type=float, default=0.7) 41 | parser.add_argument('--reduce_ratio', type=int, default=16) 42 | # for pooling kernel size calculate 43 | parser.add_argument('--t', type=int, default=5) 44 | 45 | # -------------loss weight--------------- 46 | parser.add_argument("--alpha1", type=float, default=0.8) 47 | parser.add_argument("--alpha2", type=float, default=0.8) 48 | parser.add_argument("--alpha3", type=float, default=1) 49 | parser.add_argument('--alpha4', type=float, default=1) 50 | 51 | parser.add_argument("--AWM", type=str, default='BWA_fusion_dropout_feat_v2') 52 | 53 | # --------------new arguments------------ 54 | parser.add_argument('--alpha_cls', type=float, default=1) 55 | parser.add_argument("--n_known_class", type=int, default=15) 56 | parser.add_argument("--without_wandb", action='store_true') 57 | parser.add_argument("--split_idx", type=int, default=0) 58 | parser.add_argument("--main_evaluate_indicator", type=str, default='map') 59 | parser.add_argument('--k_edl', type=int, default=7) 60 | 61 | # --------------arpl arguments------------ 62 | parser.add_argument('--temp', type=float, default=1.0) 63 | parser.add_argument("--num_centers", type=int, default=2) 64 | parser.add_argument('--weight_pl', type=float, default=0.1) 65 | 66 | # --------------balance parameters-------- 67 | parser.add_argument('--alpha_ori_edl', type=float, default=1) 68 | parser.add_argument('--alpha_cali_edl', type=float, default=1) 69 | parser.add_argument('--alpha_pl', type=float, default=0.5) 70 | 71 | parser.add_argument('--topk_test', action='store_true') 72 | 73 | parser.add_argument('--test_ckpt', default=None, help='ckpt for testing') -------------------------------------------------------------------------------- /eval/eval_anno_file_generation.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from joblib import Parallel, delayed 3 | from scipy.signal import savgol_filter 4 | import os 5 | import numpy as np 6 | import pandas as pd 7 | 8 | def str2ind(categoryname, classlist): 9 | return [i for i in range(len(classlist)) if categoryname == classlist[i]][0] 10 | 11 | 12 | def strlist2indlist(strlist, classlist): 13 | return [str2ind(s, classlist) for s in strlist] 14 | 15 | def filter_segments(segment_predict, videonames, ambilist): 16 | ind = np.zeros(np.shape(segment_predict)[0]) 17 | for i in range(np.shape(segment_predict)[0]): 18 | vn = videonames[int(segment_predict[i, 0])] 19 | for a in ambilist: 20 | if a[0] == vn: 21 | gt = range( 22 | int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16)) 23 | ) 24 | pd = range(int(segment_predict[i][1]), int(segment_predict[i][2])) 25 | IoU = float(len(set(gt).intersection(set(pd)))) / float( 26 | len(set(gt).union(set(pd))) 27 | ) 28 | if IoU > 0: 29 | ind[i] = 1 30 | s = [ 31 | segment_predict[i, :] 32 | for i in range(np.shape(segment_predict)[0]) 33 | if ind[i] == 0 34 | ] 35 | return np.array(s) 36 | 37 | def generate_single_ground_truth_file(annotation_path,args,subset,verbose,output_annotation_path): 38 | '''the content have to be stored: 39 | 1. idx_to_take 40 | 2. videoname 41 | 3. ambilist 42 | 4. ground_truth 43 | 5. activity_index 44 | ''' 45 | 46 | gtsegments = np.load(annotation_path + "/segments.npy", allow_pickle=True) 47 | gtlabels = np.load(annotation_path + "/labels.npy", allow_pickle=True) 48 | videoname = np.load(annotation_path + "/videoname.npy", allow_pickle=True) 49 | videoname = np.array([i.decode("utf8") for i in videoname]) 50 | gt_subset = np.load(annotation_path + "/subset.npy", allow_pickle=True) 51 | gt_subset = np.array([s.decode("utf-8") for s in gt_subset]) 52 | # classlist = np.load(annotation_path + "/classlist.npy", allow_pickle=True) 53 | # classlist = np.array([c.decode("utf-8") for c in classlist]) 54 | # classlist = np.load("./new_classlist.npy", allow_pickle=True) 55 | classlist = args.classlist 56 | duration = np.load(annotation_path + "/duration.npy", allow_pickle=True) 57 | ambilist = annotation_path + "/Ambiguous_test.txt" 58 | 59 | try: 60 | ambilist = list(open(ambilist, "r")) 61 | ambilist = [a.strip("\n").split(" ") for a in ambilist] 62 | except: 63 | ambilist = [] 64 | 65 | subset_ind = (subset == gt_subset) 66 | gtsegments = gtsegments[subset_ind] 67 | gtlabels = gtlabels[subset_ind] 68 | videoname = videoname[subset_ind] 69 | duration = duration[subset_ind] 70 | 71 | idx_to_take = [i for i, s in enumerate(gtsegments) 72 | if len(s) > 0] 73 | 74 | gtsegments = gtsegments[idx_to_take] 75 | gtlabels = gtlabels[idx_to_take] 76 | videoname = videoname[idx_to_take] 77 | 78 | 79 | 80 | # which categories have temporal labels ? 81 | templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl]))) 82 | 83 | # # the number index for those categories. 84 | templabelidx = [] 85 | for t in templabelcategories: 86 | templabelidx.append(str2ind(t, classlist)) 87 | 88 | 89 | video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], [] 90 | 91 | for i in range(len(gtsegments)): 92 | for j in range(len(gtsegments[i])): 93 | video_lst.append(str(videoname[i])) 94 | t_start_lst.append(round(gtsegments[i][j][0] * 25 / 16)) 95 | t_end_lst.append(round(gtsegments[i][j][1] * 25 / 16)) 96 | label_lst.append(str2ind(gtlabels[i][j], classlist)) 97 | ground_truth = pd.DataFrame( 98 | { 99 | "video-id": video_lst, 100 | "t-start": t_start_lst, 101 | "t-end": t_end_lst, 102 | "label": label_lst, 103 | } 104 | ) 105 | activity_index = {i: templabelidx[i] for i in range(len(templabelidx))} 106 | 107 | # to store all these things into a single pkl file 108 | stored_content={'idx_to_take':idx_to_take,'videoname':videoname, 109 | 'ambilist':ambilist,'ground_truth':ground_truth,'activity_index':activity_index} 110 | # store in the target path 111 | np.save(output_annotation_path,stored_content) 112 | 113 | 114 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from collections import defaultdict 4 | 5 | import numpy as np 6 | import pandas as pd 7 | import torch 8 | from prettytable import PrettyTable 9 | from torch.autograd import Variable 10 | 11 | import model 12 | import options 13 | import proposal_methods as PM 14 | import wsad_dataset 15 | from eval.eval_detection import ANETdetection 16 | 17 | torch.set_default_dtype(torch.float32) 18 | 19 | 20 | def _get_predictions_with_label(prediction_by_label, cidx): 21 | """Get all predicitons of the given label. Return empty DataFrame if there 22 | is no predcitions with the given label. 23 | """ 24 | try: 25 | return prediction_by_label.get_group(cidx).reset_index(drop=True) 26 | except: 27 | print("Warning: No predictions of label '%s' were provdied." % cidx) 28 | return pd.DataFrame() 29 | 30 | def get_video_detections(args, tmp): 31 | proposal_list = [] 32 | for i in range(tmp.shape[0]): 33 | tmp_proposal = {} 34 | tmp_proposal['label'] = args.classlist[int(tmp.loc[i]['label'])] 35 | tmp_proposal['score'] = float(tmp.loc[i]['score']) 36 | tmp_proposal['segment'] = [float(tmp.loc[i]['t-start'] / 1.5626), float(tmp.loc[i]['t-end'] / 1.5626)] 37 | tmp_proposal['uncertainty'] = float(tmp.loc[i]['uct']) 38 | tmp_proposal['actionness'] = float(tmp.loc[i]['act']) 39 | proposal_list.append(tmp_proposal) 40 | return proposal_list 41 | 42 | 43 | @torch.no_grad() 44 | def test(itr, dataset, args, model, device): 45 | model.eval() 46 | done = False 47 | if args.topk_test: 48 | topk_proposals_list = [[], [], [], [], [], []] 49 | else: 50 | topk_proposals_list = [[], ] 51 | results = defaultdict(dict) 52 | 53 | train_uct_list = [] 54 | train_ori_uct_list = [] 55 | while not done: 56 | features, labels, vn, done = dataset.load_data_for_threshold() 57 | seq_len = [features.shape[0]] 58 | if seq_len == 0: 59 | continue 60 | features = torch.from_numpy(features).float().to(device).unsqueeze(0) 61 | with torch.no_grad(): 62 | outputs = model(Variable(features), is_training=True, seq_len=seq_len, itr=itr, opt=args, labels=None) 63 | this_uct = outputs['uct'][0].cpu().item() 64 | this_ori_uct = outputs['ori_uct'][0].cpu().item() 65 | train_uct_list.append(this_uct) 66 | train_ori_uct_list.append(this_ori_uct) 67 | train_uct_list = np.sort(np.array(train_uct_list), axis=0) 68 | train_ori_uct_list = np.sort(np.array(train_ori_uct_list), axis=0) 69 | thres = train_uct_list[int(0.95 * len(train_uct_list))] 70 | print(f"We select {thres:.4f} as the uncertainty threshold.") 71 | 72 | mu = train_ori_uct_list[int(0.5 * len(train_ori_uct_list))] 73 | mu_path = './temp/' + args.group_name + '/' + args.model_name 74 | if not os.path.exists(mu_path): 75 | os.makedirs(mu_path) 76 | np.save(os.path.join(mu_path, 'mu.npy'), mu) 77 | print(f"We select {mu:.4f} as the mean of the gaussian function.") 78 | 79 | n_correct = 0 80 | n_test_vid = 0 81 | test_uct_list = [] 82 | done = False 83 | 84 | result_dict = {} 85 | while not done: 86 | n_test_vid += 1 87 | features, labels, vn, done = dataset.load_data(is_training=False) 88 | seq_len = [features.shape[0]] 89 | if seq_len == 0: 90 | continue 91 | features = torch.from_numpy(features).float().to(device).unsqueeze(0) 92 | with torch.no_grad(): 93 | outputs = model(Variable(features), is_training=False, seq_len=seq_len, itr=itr, opt=args, labels=None) 94 | results[vn] = {'cas': outputs['cas'], 'attn': outputs['attn']} 95 | video_uct = outputs['uct'][0].cpu().item() 96 | prediction_list = getattr(PM, args.proposal_method)(vn, outputs, labels, args, thres) 97 | 98 | if video_uct <= thres and labels[:args.n_known_class].sum() > 0: 99 | n_correct += 1 100 | elif video_uct > thres and labels[args.n_known_class:].sum() > 0: 101 | n_correct += 1 102 | else: 103 | n_correct += 0 104 | 105 | test_uct_list.append(video_uct) 106 | for idx, prediction in enumerate(prediction_list): 107 | topk_proposals_list[idx].append(prediction) 108 | 109 | if not os.path.exists('temp'): 110 | os.mkdir('temp') 111 | np.save('temp/{}.npy'.format(args.model_name), results) 112 | 113 | if 'Thumos14' in args.dataset_name: 114 | iou = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9] 115 | dmap_detect = ANETdetection(dataset.path_to_annotations, iou, args=args, verbose=True) 116 | else: 117 | iou = [0.5, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95] 118 | dmap_detect = ANETdetection(dataset.path_to_annotations, iou, args=args, subset='validation', verbose=True) 119 | 120 | # video-id, t-start, t-end, label, score 121 | table = PrettyTable(['k', 'split', 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, '0.1-0.5', '0.3-0.7', '0.1-0.7']) 122 | 123 | log_all_mAP = [] 124 | for idx, topk_proposals in enumerate(topk_proposals_list): 125 | proposals = pd.concat(topk_proposals).reset_index(drop=True) 126 | 127 | dmap_detect.prediction = proposals 128 | known_mAP, unknown_mAP = dmap_detect.evaluate() 129 | 130 | known_mAP *= 100 131 | unknown_mAP *= 100 132 | all_mAP = known_mAP * 0.5 + unknown_mAP * 0.5 133 | mAP_list = [known_mAP, unknown_mAP, all_mAP] 134 | for j, split in enumerate(['known', 'unknown', 'all']): 135 | # if j == 2: 136 | table.add_row([args.n_pred_list[idx], split] + list(np.around(mAP_list[j][:7], decimals=2)) + 137 | list(np.around([mAP_list[j][:5].mean(), mAP_list[j][2:7].mean(), mAP_list[j][:7].mean()], decimals=2)) 138 | ) 139 | 140 | if args.n_pred_list[idx] == 'unlimit': 141 | log_known_mAP, log_unknown_mAP, log_all_mAP = mAP_list 142 | 143 | np.set_printoptions(precision=2, suppress=True) 144 | print(table) 145 | uct_rank_acc = n_correct / n_test_vid * 100 146 | print(f'Accuracy of binary classification: {uct_rank_acc:.4f}%') 147 | 148 | return iou, log_all_mAP, uct_rank_acc 149 | 150 | 151 | if __name__ == '__main__': 152 | args = options.parser.parse_args() 153 | device = torch.device("cuda") 154 | dataset = getattr(wsad_dataset, args.dataset)(args) 155 | 156 | model = getattr(model, args.use_model)(dataset.feature_size, dataset.num_class, opt=args).to(device) 157 | model.load_state_dict(torch.load(args.test_ckpt)) 158 | iou, dmap, _ = test(-1, dataset, args, model, device) 159 | -------------------------------------------------------------------------------- /libMR/weibull.h: -------------------------------------------------------------------------------- 1 | /*! \file 2 | * weibull.h provides the headers for the core functionality for the internal computing weibull fittings, as well as CDF and INF given parameters 3 | * this file is not intended for direc usage... 4 | * 5 | * 6 | * Author Brian Heflin bheflin at securics com 7 | * Author Walter Scheirer walter at securics com 8 | * Author Terry Boult tboult at securics com 9 | * 10 | * Copyright 2010, 2011, Securics Inc. 11 | * 12 | * @section LICENSE 13 | * See accompanying LICENSE agreement for full details on rights. 14 | * 15 | * Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918 16 | * 17 | *The Government's rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Non-commercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract. Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later. 18 | * 19 | * No restrictions on government use apply after the expiration date shown above. Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings. 20 | * 21 | * @section Summary Description 22 | * This file contains the "C" interface functions for very basic Weibull usage for Meta-Recognition. The weibull_fit and weibull_cdf are the primary functions to use. 23 | * 24 | * The code herein has a number of STRONG assumptions you must follow as we cannot test for all of them which is why we don't recommend use it directly 25 | * 1) All fitting and testing are presuming "larger is better", If you are fitting something where smaller is better you need to transform it. 26 | * 2) All data is positive (okay we can and do test for that, but better to know up front what you are doing) 27 | * 3) There must be sufficient range in your data to actually fit the Weibull. If all the data is the same, or nearly the same, it may fail to converge and will report errors. 28 | * 4) For efficient fitting, we must satisfy a regularity condition (see N. M. Kiefer, Maximum likelihood estimation (MLE), http://instruct1.cit.cornell.edu/courses/econ620/reviewm5.pdf, 2007), and to do that the lower bound in Weibull data/fitting cannot be too small so we recommend you translated the data to be well away from zero (part of why we only fit on upper side and, in the MetaRecognition class we translate more than just the min..) 29 | * 30 | * 31 | */ 32 | 33 | #pragma once 34 | #ifndef WEIBULL_H 35 | #define WEIBULL_H 36 | 37 | #ifdef _WIN32 38 | #ifdef __cplusplus 39 | extern "C" { 40 | #endif 41 | _declspec(dllexport) double weibull_inv(double x, double scale, double shape); 42 | _declspec(dllexport) double weibull_cdf(double x, double scale, double shape); 43 | _declspec(dllexport) int weibull_fit(double* weibull_parms, double* wparm_confidenceintervals, double* inputData, double alpha, int size); 44 | _declspec(dllexport) void printWeibullBuildInfo(FILE *fh); 45 | #ifdef __cplusplus 46 | } 47 | #endif 48 | #else 49 | #ifdef __cplusplus 50 | extern "C" { 51 | #endif 52 | 53 | /** if WEIBULL_USE_ASSERTS is defined, the code will use asserts to ensure its requirements are true, otherwise it returns error codes. Default is not defined */ 54 | /** if WEIBULL_IGNORE_ERRORS is defined, the code will just presume things will work out and not waste time on testing for error. Default is not defined */ 55 | 56 | 57 | /*#define WEIBULL_USE_ASSERTS //!< \def define this to force asserts rather than error codes. */ 58 | /*#define WEIBULL_IGNORE_ERRORS //!< \def defien this to skip printing/return code for errors */ 59 | 60 | 61 | /** weibull_cdf computes the probability (given our assumptions) that the value x is an outlier ABOVE the fit distribution. if the distribution was non-match data, then it provides this probability that x is a match score. If data was match-data then it would be the probability of it being a larger non-match. 62 | computes @f[ 1-e^{{\frac{x}{scale}}^{shape}} @f] 63 | 64 | @param x the location at which to compute the probability of being an outlier 65 | @param scale the scale parmaeter of the weibull. This is the first element in weibull_parms (as computed by our wlbfit) 66 | @param shape the scale parmaeter of the weibull. This is the first second in weibull_parms (as computed by our wlbfit) 67 | @return if in the range [0-1] it is the probability of X being an outlier. Any value < 0 is an error code. returns -1 for invalid scale <=0 , -2 for invalid shape <=0 68 | * 69 | */ 70 | double weibull_cdf(double x, double scale, double shape); 71 | 72 | 73 | /** weibull_inv computes the inverse weibull, i.e. returns the score S (given our assumptions) such that x=wlbcdf(s,scale,shape). Note it estimates from above, so if x=1.0 expect an answer of Inf (infinity). 74 | 75 | @param x the location at which you compute the inverse (must be between [0,1] 76 | @param scale the scale parmaeter of the weibull. This is the first element in weibull_parms (as computed by our wlbfit) 77 | @param shape the scale parmaeter of the weibull. This is the first second in weibull_parms (as computed by our wlbfit) 78 | @return if X in the range [0-1], return S such that x=wlbcdf(s,scale,shape). The return value is in the range [0,Inf]. Any return value < 0 is an error code. returns -1 for invalid scale <=0 , -2 for invalid shape <=0 -3 for X<0, -4 for x >1 79 | * 80 | */ 81 | double weibull_inv(double x, double scale, double shape); 82 | 83 | /** 84 | weibull_fit does a maximum likelihood fitting to estimate the shape and scale parameters of a weibull probability distributon @f[ \frac{shape}{scale} \left(\frac{x}{scale} \cdot e^{-{\left(\frac{x}{scale}\right)}^{shape}}\right)@f] 85 | 86 | @param weibull_parms is an array of 2 doubles, which must be preallocated. On successful completeion it will have shape and scale respectively. 87 | @param wparm_confidenceintervals is an array of 4 doubles, which must be preallocated. On successful completeion it will have confidence interval for shape in the first two item and the CI for scale in the second two items 88 | @param inputData is a pointer the data to use for fitting the distribution. It must have at least size elements 89 | @param size is the size of the data to be used for fitting. 90 | @param alpha is parameter for Confidence interval size estimation. 91 | @return return should be 1 if all went well. Values < 0 imply errors in fitting or data. -1 means some data was negative, -2 means bad data range (e.g. all the same) -3 or lower means MLE did not converge. 92 | 93 | */ 94 | int weibull_fit(double* weibullparms, double* wparm_confidenceintervals, double* inputData, double alpha, int size); 95 | 96 | 97 | /** 98 | Print information about this build to a file descriptor. Used for checking what is loaded for supporting people 99 | */ 100 | void printWeibullBuildInfo(FILE *fh); 101 | #ifdef __cplusplus 102 | } 103 | #endif 104 | 105 | #endif 106 | #endif 107 | -------------------------------------------------------------------------------- /libMR/MetaRecognition.h: -------------------------------------------------------------------------------- 1 | /** 2 | * MetaRecognition.h: 3 | 4 | * @Author Terry Boult tboult at securics com 5 | * @Author Vijay Iyer viyer at securics com 6 | 7 | * 8 | * Copyright 2010, 2011, Securics Inc. 9 | 10 | * Copyright 2011, Securics Inc. 11 | See accompanying LICENSE agrement for details on rights. 12 | 13 | Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918 14 | 15 | The Government's rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Noncommercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract. Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later. 16 | 17 | No restrictions on government use apply after the expiration date shown above. Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings. 18 | * 19 | */ 20 | 21 | #pragma once 22 | #ifndef MetaRecognition_H 23 | #define MetaRecognition_H 24 | 25 | 26 | #ifdef HAVE_CONFIG_H 27 | # include "config.h" 28 | #endif 29 | 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | #include 36 | 37 | 38 | #include "weibull.h" 39 | 40 | #ifdef _WIN32 41 | #define DLLEXPORT _declspec(dllexport) 42 | #else 43 | #define DLLEXPORT 44 | #endif 45 | 46 | #define MAX_LINE 256 47 | 48 | /// structure for svm data used by libSVM, used to allow easy MetaRecognition for SVM results (used as an argument for MetaRecogniton::fitSVM) 49 | struct svm_node_libsvm 50 | { 51 | int index; //!< class label, classic is -1 for negative class add +1 for positive class, but can be general its for multi-class 52 | double value;//!< the SVM decision score 53 | }; 54 | 55 | /**! 56 | Class MetaRecognition provides a object-based interface for Meta-Recognition. The object can be ... 57 | 58 | TBD 59 | 60 | 61 | */ 62 | class DLLEXPORT MetaRecognition //! Primary object/methods for tranforming and computing needed for any Meta recogntion task 63 | { 64 | public: 65 | 66 | /** Ctor, can call with no arguments (uses default arguments for construciton). 67 | All space is on the stack. 68 | Object will exist but is not valid until some fitting fucntion is called 69 | */ 70 | 71 | MetaRecognition( int scores_to_drop=0, //!< is this object for prediction, if so how many top scores to drop when fitting 72 | int fitting_size=9, //!< tail size for fitting. With small data the defaults are fine.. if you have millions make it larger for better predictions 73 | bool verbose = false, //!< is the code chatty on errors during fitting, useful for debugging 74 | double alpha=5.0, //!< band for confidence interfals 75 | int translate_amount=10000 //!< shifting data to ensure all is positive.. if data is very broad and you want some probabilities for all points you can make it larger.. 76 | ); 77 | 78 | ~MetaRecognition(); 79 | 80 | bool is_valid(); //!< is this object valid..i.e. has data been properly fit to determine parameters. 81 | void set_translate(double t); //!< Change translate_amount to x, invalidates object 82 | 83 | void Reset(); //!< reset to "invalid" state 84 | 85 | bool Predict_Match(double x, double threshold = .9999999); //!< Is X from the "match" distribution (i.e. we reject null hypothesis of non-match), 86 | double W_score(double x); //!< This is the commonly used function.. after fitting, it returns the probability of the given score being "correct". It is the same as CDF 87 | double CDF(double x); //!< This is the cummumlative probablity of match being corrrect (or more precisely the probility the score (after transform) being an outlier for the distribution, which given the transforms applied, so bigger is better, this is the probablity the score is correct. 88 | double Inv(double p); //!< This is score for which one would obtain CDF probability p (i.e. x such that p = CDF(x)) 89 | 90 | int ReNormalize(double *invec, double *outvec, int length); //!< W-score Renormalize the vecotor invec[0:length-1] into outvec (in and out can be same) return is 1 for success, <0 for error code 91 | 92 | 93 | /// Use FitHight if your data is such that is larger is better. The code will still transform, and keep parmeters to keep small data away from zero. 94 | // If you get scores that are complain about it being negative, make a MR object with different (larger) translate amount 95 | /// returns 1 for success, <0 for error code 96 | int FitHigh(double* inputData, int inputDataSize, int fit_size=-1); 97 | 98 | ///Use FitLow if your data is such that smaller scores are better.. we'll transform it for you and keep the 99 | ///transform parameters in the class so later calls to W_score or CDF do the right thing. 100 | /// returns 1 for success, <0 for error code 101 | int FitLow(double* inputData, int inputDataSize, int fit_size=-1);// 102 | 103 | /// the types of fitting supported for SVM modeling 104 | typedef enum {complement_reject=1, positive_reject=2, complement_model=3, positive_model=4} MR_fitting_type; 105 | 106 | /// The function to use if you have SVM data, it separated out the data for the label of interst (or rejecting 107 | /// the complement of that label, which is the default) and uses that for fitting. 108 | /// Returns 1 if it worked, <0 for error codes. 109 | int FitSVM(svm_node_libsvm* SVMdata, int inputDataSize, int label_of_interest =1, bool label_has_positive_score=true, int fit_type = 1, int fit_size=9 ); 110 | 111 | 112 | friend std::ostream& operator<<( std::ostream&, const MetaRecognition& ); //!< various I/O functions 113 | friend std::istream& operator>>( std::istream&, MetaRecognition& ); //!< various I/O functions 114 | 115 | void Save(std::ostream &outputStream) const; //!< various I/O functions 116 | void Load(std::istream &inputStream); //!< various I/O functions 117 | void Save(FILE *outputFile) const; //!< various I/O functions 118 | void Load(FILE *inputFile); //!< various I/O functions 119 | void Save(char* filename) const; //!< various I/O functions 120 | void Load(char* filename); //!< various I/O functions 121 | int get_fitting_size(); //!< Get get_fitting_size (aka tail size) 122 | int set_fitting_size(int nsize); //!< reset object and define new fitting size 123 | int get_translate_amount(); //!< Get get_internal tranlation amount (you probably don't need this, but just in case) 124 | int set_translate_amount(int ntrans); //!< reset object and define new translate amount.. if you get errors because of negative data, increase this 125 | int get_sign(); //!< Get get_internal sign variable. (you probably don't need this, but just in case) 126 | int set_sign(int nsign); //!< reset object and set sign (you probably don't need this, but just in case) 127 | double get_small_score(); //!< Get get_internal smaller translation amount (you probably don't need this, but just in case) 128 | double set_small_score(double nscore); //!< reset object and reset internal smaller translation amount (you probably don't need this, but just in case) 129 | bool verbose; //!< do we print internal/debugging stuff. Default is false. (you probably don't need this, but just in case) 130 | std::string to_string(); //!< Convert this object to a C++ string 131 | void from_string(std::string in); //!< Convert this object from a C++ string 132 | 133 | protected: 134 | int EvtGeneric(double* inputData, int inputDataSize, int fit_inward=0, double x=0); 135 | double parmhat[2]; //!< parameters of the Weibull, scale then shape 136 | double parmci[4]; //!< confidence interval for parms scale high, scale low, shape high, shape low 137 | double alpha; //!< parameter for estimation of size of confidence interval 138 | int sign; //!< sign is postive is larger is better, negative means orginally smaller was better (we transformed for fitting). 139 | MR_fitting_type ftype; //!< type of fitting used for SVM.. default is reject complement 140 | int fitting_size; //!< tail size for fitting in any of the FitXX functions 141 | int translate_amount; //!< we transform data so all fittng data data is positive and bigger is better, this predefined constant helps ensure more of the end-user data is non-negative. 142 | double small_score; //!< the smallest score, so all fitting data is consistently postive. part of our transform 143 | int scores_to_drop; //!< when fitting for recognition prediction, how many top score are hypothesized to be a match, so we can fit on non-match data. Only used in for fitting, no impact on transform. 144 | bool isvalid; //!< is the parameters in the object valid. private: 145 | 146 | }; 147 | 148 | #endif 149 | -------------------------------------------------------------------------------- /proposal_methods.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | from scipy.signal import savgol_filter 5 | 6 | import options 7 | import utils.wsad_utils as utils 8 | import math 9 | from edl_loss import exp_evidence 10 | 11 | args = options.parser.parse_args() 12 | 13 | def filter_segments(segment_predict, vn): 14 | ambilist = args.path_dataset + '/Thumos14reduced-Annotations/Ambiguous_test.txt' 15 | try: 16 | ambilist = list(open(ambilist, "r")) 17 | ambilist = [a.strip("\n").split(" ") for a in ambilist] 18 | except: 19 | ambilist = [] 20 | ind = np.zeros(np.shape(segment_predict)[0]) 21 | for i in range(np.shape(segment_predict)[0]): 22 | for a in ambilist: 23 | if a[0] == vn: 24 | gt = range( 25 | int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16)) 26 | ) 27 | pd = range(int(segment_predict[i][0]), int(segment_predict[i][1])) 28 | IoU = float(len(set(gt).intersection(set(pd)))) / float( 29 | len(set(gt).union(set(pd))) 30 | ) 31 | if IoU > 0: 32 | ind[i] = 1 33 | s = [ 34 | segment_predict[i, :] 35 | for i in range(np.shape(segment_predict)[0]) 36 | if ind[i] == 0 37 | ] 38 | return np.array(s) 39 | 40 | 41 | def smooth(v, order=2, lens=200): 42 | l = min(lens, len(v)) 43 | l = l - (1 - l % 2) 44 | if len(v) <= order: 45 | return v 46 | return savgol_filter(v, l, order) 47 | 48 | 49 | def get_topk_mean(x, k, axis=0): 50 | return np.mean(np.sort(x, axis=axis)[-int(k):, :], axis=0) 51 | 52 | 53 | def get_cls_score(element_cls, rat=20): 54 | topk_val, _ = torch.topk(element_cls, k=max(1, int(element_cls.shape[-2] // rat)), dim=-2) 55 | instance_logits = torch.mean(topk_val, dim=-2) 56 | pred_vid_score = torch.softmax(instance_logits, dim=-1)[..., :-1].squeeze().data.cpu().numpy() 57 | return pred_vid_score 58 | 59 | 60 | def __vector_minmax_norm(vector, min_val=None, max_val=None): 61 | if min_val is None or max_val is None: 62 | max_val = np.max(vector) 63 | min_val = np.min(vector) 64 | 65 | delta = max_val - min_val 66 | # delta[delta <= 0] = 1 67 | ret = (vector - min_val) / delta 68 | 69 | return ret 70 | 71 | 72 | def _multiply(x, atn, dim=-1, include_min=False): 73 | if include_min: 74 | _min = x.min(dim=dim, keepdim=True)[0] 75 | else: 76 | _min = 0 77 | return atn * (x - _min) + _min 78 | 79 | 80 | def sigmoid(x, thres_uct_list, max_score_class): 81 | # x = (x - 0.25) / (0.75 - 0.25) 82 | x = x - thres_uct_list[max_score_class] + thres_uct_list.mean() 83 | return 1 / (1 + torch.exp(-16 * (x - 0.45))) 84 | 85 | 86 | @torch.no_grad() 87 | def multiple_threshold_hamnet(vid_name, data_dict, labels, args, thres): 88 | labels = torch.tensor(labels) 89 | open_labels = torch.zeros(args.n_known_class + 1) 90 | open_labels[:args.n_known_class] = labels[:args.n_known_class] 91 | if labels[args.n_known_class:].sum() > 0: 92 | open_labels[-1] = 1 93 | 94 | cas = data_dict['cas'] 95 | atn = data_dict['attn'] 96 | video_uct = data_dict['uct'][0].cpu().item() 97 | # video_uct = obtain_uct(args, data_dict) 98 | 99 | element_logits = cas * atn 100 | 101 | pred_vid_score = get_cls_score(element_logits, rat=10) 102 | 103 | pred_vid_score = np.concatenate((pred_vid_score, np.array([video_uct]))) 104 | cas_supp = element_logits[..., :-1] 105 | 106 | known_flag = True 107 | if video_uct <= thres: # uct小于阈值,只有已知类 108 | unknown_flag = False 109 | pred = np.where(pred_vid_score[:-1] >= 0.2)[0] 110 | if len(pred) == 0: 111 | pred = np.array([np.argmax(pred_vid_score[:-1])]) 112 | else: # uct大于阈值,有未知类 113 | unknown_flag = True 114 | pred = np.where(pred_vid_score[:-1] >= 0.5)[0] 115 | if len(pred) == 0: 116 | known_flag = False 117 | pred = np.concatenate([pred, np.array([args.n_known_class])]) 118 | # ----------------------------------------------------------------------- 119 | 120 | num_segments = cas.shape[1] 121 | 122 | cas_pred_atn = atn[0].cpu().numpy()[:, [0]] 123 | cas_pred_atn = np.reshape(cas_pred_atn, (num_segments, -1, 1)) 124 | if known_flag and not unknown_flag: 125 | cas_pred = cas_supp[0].cpu().numpy()[:, pred] 126 | cas_pred = np.reshape(cas_pred, (num_segments, -1, 1)) 127 | elif not known_flag and unknown_flag: 128 | cas_pred = cas_pred_atn 129 | elif known_flag and unknown_flag: 130 | cas_pred = cas_supp[0].cpu().numpy()[:, pred[:-1]] 131 | cas_pred = np.reshape(cas_pred, (num_segments, -1, 1)) 132 | cas_pred = np.hstack((cas_pred, cas_pred_atn)) 133 | else: 134 | raise "Error" 135 | 136 | # NOTE: threshold 137 | act_thresh = np.linspace(0.1, 0.9, 10) 138 | 139 | proposal_dict = {} 140 | 141 | for i in range(len(act_thresh)): 142 | cas_temp = cas_pred.copy() 143 | cas_temp_atn = cas_pred_atn.copy() 144 | seg_list = [] 145 | for c in range(len(pred)): 146 | pos = np.where(cas_temp_atn[:, 0, 0] > act_thresh[i]) 147 | seg_list.append(pos) 148 | 149 | proposals = utils.get_proposal_oic_2(seg_list, 150 | cas_temp, 151 | pred_vid_score, 152 | pred, 153 | gamma=args.gamma_oic) 154 | 155 | for j in range(len(proposals)): 156 | class_id = proposals[j][0][0] 157 | 158 | if class_id not in proposal_dict.keys(): 159 | proposal_dict[class_id] = [] 160 | 161 | proposal_dict[class_id] += proposals[j] 162 | 163 | final_proposals = [] 164 | for class_id in proposal_dict.keys(): 165 | final_proposals.append( 166 | utils.soft_nms(proposal_dict[class_id], 0.7, sigma=0.3)) 167 | 168 | # [c_pred[i], c_score, t_start, t_end] 169 | segment_predict = [] 170 | for i in range(len(final_proposals)): 171 | for j in range(len(final_proposals[i])): 172 | [c_pred, c_score, t_start, t_end] = final_proposals[i][j] 173 | segment_predict.append([t_start, t_end, c_score, c_pred]) 174 | 175 | segment_predict = np.array(segment_predict) 176 | segment_predict = filter_segments(segment_predict, vid_name.decode()) 177 | 178 | video_lst, t_start_lst, t_end_lst = [], [], [] 179 | label_lst, score_lst = [], [] 180 | uct_lst, act_lst = [], [] 181 | for i in range(np.shape(segment_predict)[0]): 182 | video_lst.append(vid_name.decode()) 183 | t_start_lst.append(segment_predict[i, 0]) 184 | t_end_lst.append(segment_predict[i, 1]) 185 | score_lst.append(segment_predict[i, 2]) 186 | label_lst.append(segment_predict[i, 3]) 187 | prediction = pd.DataFrame( 188 | { 189 | "video-id": video_lst, 190 | "t-start": t_start_lst, 191 | "t-end": t_end_lst, 192 | "label": label_lst, 193 | "score": score_lst, 194 | } 195 | ) 196 | 197 | if not args.topk_test: 198 | args.n_pred_list = ['unlimit'] 199 | return [prediction] 200 | else: 201 | args.n_pred_list = [5, 10, 20, 50, 100, 'unlimit'] 202 | if prediction.empty: 203 | return [prediction] * 6 204 | 205 | topk_prediction_list = [] 206 | for n_pred in args.n_pred_list[:-1]: 207 | if known_flag and not unknown_flag: # 只有已知类 208 | n_known_pred = int(n_pred / pred.shape[0]) 209 | n_unknown_pred = 0 210 | elif not known_flag and unknown_flag: # 只有未知类 211 | n_known_pred = 0 212 | n_unknown_pred = n_pred 213 | elif known_flag and unknown_flag: # 同时存在 214 | n_known_pred = int(n_pred * 0.5 / pred.shape[0]) 215 | n_unknown_pred = n_pred - n_known_pred 216 | else: 217 | raise "Error" 218 | 219 | all_class_topk_proposal = [] 220 | prediction_by_label = prediction.groupby("label") 221 | for i, cidx in enumerate(pred): 222 | one_class_prediction = _get_predictions_with_label(prediction_by_label, cidx) 223 | sort_idx = one_class_prediction["score"].values.argsort()[::-1] # idx from high to low 224 | one_class_prediction = one_class_prediction.loc[sort_idx].reset_index(drop=True) # value from high to low 225 | if cidx < args.n_known_class: 226 | k = n_known_pred 227 | elif cidx == args.n_known_class: 228 | k = n_unknown_pred 229 | else: 230 | raise ValueError 231 | one_class_topk_proposal = one_class_prediction[: k] 232 | all_class_topk_proposal.append(one_class_topk_proposal) 233 | topk_prediction = pd.concat(all_class_topk_proposal).reset_index(drop=True) 234 | 235 | topk_prediction_list.append(topk_prediction) 236 | topk_prediction_list.append(prediction) 237 | 238 | return topk_prediction_list 239 | 240 | 241 | def _get_predictions_with_label(prediction_by_label, cidx): 242 | """Get all predicitons of the given label. Return empty DataFrame if there 243 | is no predcitions with the given label. 244 | """ 245 | return prediction_by_label.get_group(cidx).reset_index(drop=True) 246 | -------------------------------------------------------------------------------- /libMR/COPYRIGHT_Libmr.txt: -------------------------------------------------------------------------------- 1 | SOURCE CODE LICENSE AGREEMENT 2 | PREAMBLE 3 | 4 | This SOFTWARE implements concepts of statistical Meta-recognition for which Securics/Univ. of Colorado have a pending patent (CU TTO File CU2338C). Securics and University of Colorado, hereafter The Owners, have joint interest in the invention and the software. Securics currently holds the excusive license commercial to both the patent and the code. Securcs, hereafter the Licensor, is offering a non-exclusive right to use for non-commecial use. 5 | This license agreement allows you to use the source code for personal or non profit purposes. This includes any use that does not involve making money, and does not include uses like: 6 | • deploying the software for use by a for-profit organization 7 | • providing a service to a paying customer 8 | For-profit companies may not use this source code. If you work for a for-profit company, you may only use this software as an individual, for your personal use. 9 | 10 | This is a right to use license. It does not not include the right to redistribute copies. Non-profit users can only use the copies obtained from authroized sourcehs which include: securics.com, metarecognition.com or vast.uccs.edu. 11 | 12 | This license agreement also allows you to create derivative products for your own use, but does not permit re-distribute 13 | of modified code in any form. You may choose to destribute patch files, which can be applied to officially distributed code. Any the derivative products, must be distributed under the same conditions as specified in this agreement unless a separate commercial license is obtained from Securics Inc or its designates. 14 | 15 | As a condition of using this source code, you agree not to assert any patents or copyrights against the owners or any of the Owners’ licensees for use of derivative products. Any derivative products must include a copy of license and instructions for accessing the orignal source. You must also include attribution to the authors in any publication that results from the use of this code or data derived from the code. Any papers/research/report based on results that uses this software must cite: 16 | 17 | @article{Scheirer_2011_TPAMI, 18 | author = {Walter J. Scheirer and Anderson Rocha and Ross Michaels and Terrance E. Boult}, 19 | title = {Meta-Recognition: The Theory and Practice of Recognition Score Analysis}, 20 | journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)}, 21 | volume = {33}, 22 | issue = {8}, 23 | pages = {1689–1695}, 24 | year = {2011} 25 | } 26 | Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918 27 | 28 | The Government’s rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Noncommercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract. Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later. 29 | 30 | No restrictions on government use apply after the expiration date shown above. Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings. 31 | 32 | This license includes other conditions that should be read carefully. This SOFWARE usage agreement (the “Agreement”) applies to the libMR and is between YOU and the Licensor 33 | 34 | 1. DEFINITIONS 35 | 36 | “Software” means all or any portion of the human-readable source code files of the software programs including without limitation, associated flow charts, algorithms, comments and other written instructions and technical documentation, and all corrections, updates, and new versions incorporated into such programs. 37 | 38 | “Derivative Work” means a work based upon the Software, such as a revision, modification, translation, abridgement, condensation, expansion, collection, compilation, or any other form in which the Software may be recast, transformed, adapted, or distributed as a part of a larger work and which, if prepared without proper authorization would constitute a copyright infringement. If identifiable sections of that work are not derived from the Software, and can be reasonably considered independent and separate works in themselves, then they are not considered Derivative Work. 39 | 40 | “Personal Use” means use of Software and/or Derivative Work by an individual solely for his or her personal, private and non-commercial use. An individual’s use in his or her capacity as an officer, employee, member, independent contractor or agent of a corporation, business or organization does not qualify as Personal Use. 41 | 42 | “You” or “Your” means an individual or a legal entity exercising rights under this License. For legal entities, “You” or “Your” includes any non-profit entity which controls, is controlled by, or is under common control with, You, where “control” means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of fifty percent (50%) or more of the beneficial ownership of such entity. 43 | 44 | 2. GRANT OF LICENSE: 45 | 46 | WHEREAS, the Licensor, desires to aid the academic and non-commercial research community and raise awareness of the PATENTED INVENTION and thereby agrees to grant a limited copyright license to the SOFTWARE for research and non-commercial purposes only, with the Owners retaining all ownership rights in the PATENTED INVENTION and the SOFTWARE; 47 | 48 | THEREFORE: 49 | The Licensor grants, and You accept, a personal, nonexclusive, nontransferable license: 50 | 51 | a) to use Software, at no charge, in accordance with the terms herein, solely for (i) Personal Use, or (ii) academic or non-commercial research, development and deployment; and 52 | 53 | b) to develop Derivative Works that may be used solely for (i) Personal Use or (ii) academic or non-commercial research, development and deployment; and 54 | 55 | c) to copy, distribute and sublicense Software and Derivative Works solely in accordance with the terms herein. Any Software or Derivative Works distributed shall be pursuant to a license agreement that contains all of the terms herein; and shall contain prominent notices stating how the Software, Derivative Works, or documentation was changed, the author and date of any such change and require acknowledgement of the orginal software/publicaitons by any users of the Derivative Works. 56 | 57 | d) You acknowledge that the Software is a valuable, proprietary asset of The Owners. You shall not market or sell the Software or Derivative Works. 58 | 59 | 3. LICENSE EXCLUSIONS 60 | 61 | a) EXCEPT AS EXPRESSLY PROVIDED HEREIN, YOU SHALL MAKE NO OTHER USE OF THE SOFTWARE. 62 | 63 | b) You must obtain permission from The Licensor before receiving payment for distribution of or services using the Software or Derivative Works. 64 | 65 | c) You shall not allege or enjoin infringement or misappropriation by The Licensor in any Derivative Works, or by any third party obtaining Derivative Works, prepared by The Licensor and under license from The Licensor. 66 | 4. TITLE AND PROTECTION OF SOFTWARE 67 | 68 | a) The Owners retains all title, right and interest to the Software and the underlying patents. 69 | 70 | b) Except for the Software, You retain all title, right and interest to the Derivative Works, subject to the terms of this Agreement. 71 | 72 | 5. NO REPRESENTATIONS 73 | 74 | THE OWNERS DISCLAIMS ALL OTHER REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 75 | 6. ATTRIBUTION 76 | 77 | a) You agree to retain and reproduce in all copies of Software the copyright and other proprietary notices and disclaimers as they appear in the Software, and keep intact all notices in the Software that refer to this License. 78 | 79 | b) You agree to provide attribution to the authors of this Software in any article based on research performed using Software or Derivative Works or with any distribution of Software or Derivative works. 80 | 81 | 7. DEFAULT 82 | 83 | If YOU fail to perform any of its obligations under this Agreement, The Licensor, in addition to any other rights available to it under law or equity, may terminate this Agreement and the licenses granted hereunder by written notice to You. Unless otherwise provided in this Agreement, remedies shall be cumulative and there shall be no obligation to exercise a particular remedy. 84 | 85 | 8. TERMINATION 86 | 87 | a) In the event that this Agreement is terminated, any sublicenses granted or Derivative Works distributed by Licensee shall remain in full force and effect. 88 | 89 | b) Within thirty (30) days of termination, You shall return to The Licensor or certify in writing to The Licensor that all copies or partial copies of Software in Your possession or control have been destroyed. c) In addition to this section, the sections entitled “Title and Protection of Software “No Representations” and “Limitation of Liability” shall survive termination of this Agreement. 90 | 91 | 9. GENERAL 92 | a) No agency, partnership or employment is created by this Agreement. 93 | 94 | b) You may not use any of The Owners’ names, the terms in Meta-Recognition, or W-score in any advertising, public relations or media release without the prior written consent of the Owner. 95 | 96 | c) This Agreement shall be governed by the laws of the State of Colorado. Venue for any action or proceeding shall be Denver, Colorado. This Agreement constitutes the entire agreement between the parties and may only be modified by a written instrument signed by each parties authorized officers. 97 | 98 | If you accept this license please opt-in for and you will receive email with instructions. The email will also be used for update emails on future changes to the code. 99 | -------------------------------------------------------------------------------- /edl_loss.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | from base import BaseWeightedLoss 5 | 6 | 7 | def relu_evidence(y): 8 | return F.relu(y) 9 | 10 | 11 | def exp_evidence(y): 12 | return torch.exp(torch.clamp(y, -10, 10)) 13 | 14 | 15 | def softplus_evidence(y): 16 | return F.softplus(y) 17 | 18 | 19 | class EvidenceLoss(BaseWeightedLoss): 20 | """Evidential MSE Loss.""" 21 | 22 | def __init__(self, num_classes, 23 | evidence='relu', 24 | loss_type='log', 25 | with_kldiv=False, 26 | with_annealing=False, 27 | disentangle=False, 28 | annealing_method='step', 29 | annealing_start=0.01, 30 | annealing_step=10, 31 | redl_lamb=1.0): 32 | super().__init__() 33 | self.num_classes = num_classes 34 | self.evidence = evidence 35 | self.loss_type = loss_type 36 | self.with_kldiv = with_kldiv 37 | self.with_annealing = with_annealing 38 | self.disentangle = disentangle 39 | self.annealing_method = annealing_method 40 | self.annealing_start = annealing_start 41 | self.annealing_step = annealing_step 42 | self.eps = 1e-10 43 | self.redl_lamb = redl_lamb 44 | 45 | def kl_divergence(self, alpha): 46 | beta = torch.ones([1, self.num_classes], dtype=torch.float32).to(alpha.device) 47 | S_alpha = torch.sum(alpha, dim=1, keepdim=True) 48 | S_beta = torch.sum(beta, dim=1, keepdim=True) 49 | lnB = torch.lgamma(S_alpha) - \ 50 | torch.sum(torch.lgamma(alpha), dim=1, keepdim=True) 51 | lnB_uni = torch.sum(torch.lgamma(beta), dim=1, 52 | keepdim=True) - torch.lgamma(S_beta) 53 | 54 | dg0 = torch.digamma(S_alpha) 55 | dg1 = torch.digamma(alpha) 56 | 57 | kl = torch.sum((alpha - beta) * (dg1 - dg0), dim=1, 58 | keepdim=True) + lnB + lnB_uni 59 | return kl 60 | 61 | def loglikelihood_loss(self, y, alpha): 62 | S = torch.sum(alpha, dim=1, keepdim=True) 63 | loglikelihood_err = torch.sum( 64 | (y - (alpha / S)) ** 2, dim=1, keepdim=True) 65 | loglikelihood_var = torch.sum( 66 | alpha * (S - alpha) / (S * S * (S + 1)), dim=1, keepdim=True) 67 | return loglikelihood_err, loglikelihood_var 68 | 69 | def mse_loss(self, y, alpha, annealing_coef): 70 | """Used only for loss_type == 'mse' 71 | y: the one-hot labels (batchsize, num_classes) 72 | alpha: the predictions (batchsize, num_classes) 73 | epoch_num: the current training epoch 74 | """ 75 | losses = {} 76 | loglikelihood_err, loglikelihood_var = self.loglikelihood_loss(y, alpha) 77 | losses.update({'loss_cls': loglikelihood_err, 'loss_var': loglikelihood_var}) 78 | 79 | losses.update({'lambda': annealing_coef}) 80 | if self.with_kldiv: 81 | kl_alpha = (alpha - 1) * (1 - y) + 1 82 | kl_div = annealing_coef * \ 83 | self.kl_divergence(kl_alpha) 84 | losses.update({'loss_kl': kl_div}) 85 | 86 | if self.with_avuloss: 87 | S = torch.sum(alpha, dim=1, keepdim=True) # Dirichlet strength 88 | pred_score = alpha / S 89 | uncertainty = self.num_classes / S 90 | # avu_loss = annealing_coef * 91 | return losses 92 | 93 | def ce_loss(self, target, y, alpha, annealing_coef): 94 | """Used only for loss_type == 'ce' 95 | target: the scalar labels (batchsize,) 96 | alpha: the predictions (batchsize, num_classes), alpha >= 1 97 | epoch_num: the current training epoch 98 | """ 99 | losses = {} 100 | # (1) the classification loss term 101 | S = torch.sum(alpha, dim=1, keepdim=True) 102 | pred_score = alpha / S 103 | loss_cls = F.nll_loss(torch.log(pred_score), target, reduction='none') 104 | losses.update({'loss_cls': loss_cls}) 105 | 106 | # (2) the likelihood variance term 107 | loglikelihood_var = torch.sum( 108 | alpha * (S - alpha) / (S * S * (S + 1)), dim=1, keepdim=True) 109 | losses.update({'loss_var': loglikelihood_var}) 110 | 111 | # (3) the KL divergence term 112 | kl_alpha = (alpha - 1) * (1 - y) + 1 113 | kl_div = annealing_coef * \ 114 | self.kl_divergence(kl_alpha) 115 | losses.update({'loss_kl': kl_div, 'lambda': annealing_coef}) 116 | return losses 117 | 118 | def edl_loss(self, func, y, alpha, annealing_coef, target): 119 | """Used for both loss_type == 'log' and loss_type == 'digamma' 120 | func: function handler (torch.log, or torch.digamma) 121 | y: the one-hot labels (batch_size, num_classes) 122 | alpha: the predictions (batch_size, num_classes) 123 | epoch_num: the current training epoch 124 | """ 125 | # BALD Uncertainty 126 | # -------------------------------------------------------------------------------- 127 | # losses = {} 128 | # S = torch.sum(alpha, dim=1, keepdim=True) 129 | # pred = alpha / S 130 | # uncertainty = compute_BALD_uncertainty(pred) 131 | # label_num = torch.sum(y, dim=1, keepdim=True) 132 | # temp = 1 / alpha * y 133 | # g = (1 - uncertainty) * label_num * torch.div(temp, torch.sum(temp, dim=1, keepdim=True)) 134 | # A = torch.sum(g * (func(S) - func(alpha)), dim=1, keepdim=True) 135 | 136 | # Final DELU 137 | # -------------------------------------------------------------------------------- 138 | # losses = {} 139 | # S = torch.sum(alpha, dim=1, keepdim=True) 140 | # uncertainty = self.num_classes / S 141 | # label_num = torch.sum(y, dim=1, keepdim=True) 142 | # temp = 1 / alpha * y 143 | # g = (1 - uncertainty.detach()) * label_num * torch.div(temp, torch.sum(temp, dim=1, keepdim=True)) 144 | # A = torch.sum(g * (func(S) - func(alpha)), dim=1, keepdim=True) 145 | 146 | # Traditional EDL 147 | # -------------------------------------------------------------------------------- 148 | 149 | losses = {} 150 | if y is None: 151 | A = torch.tensor([0.]) 152 | else: 153 | S = torch.sum(alpha, dim=1, keepdim=True) 154 | A = torch.sum(y * (func(S) - func(alpha)), dim=1, keepdim=True) 155 | 156 | # -------------------------------------------------------------------------------- 157 | 158 | losses.update({'loss_cls': A}) 159 | 160 | if self.with_annealing: 161 | losses.update({'lambda': annealing_coef}) 162 | 163 | if self.with_kldiv: 164 | kl_alpha = (alpha - 1) * (1 - y) + 1 165 | kl_div = annealing_coef * \ 166 | self.kl_divergence(kl_alpha) 167 | losses.update({'loss_kl': kl_div}) 168 | 169 | return losses 170 | 171 | def compute_annealing_coef(self, **kwargs): 172 | assert 'epoch' in kwargs, "epoch number is missing!" 173 | assert 'total_epoch' in kwargs, "total epoch number is missing!" 174 | epoch_num, total_epoch = kwargs['epoch'], kwargs['total_epoch'] 175 | # annealing coefficient 176 | if self.annealing_method == 'step': 177 | annealing_coef = torch.min(torch.tensor( 178 | 1.0, dtype=torch.float32), torch.tensor(epoch_num / self.annealing_step, dtype=torch.float32)) 179 | elif self.annealing_method == 'exp': 180 | annealing_start = torch.tensor(self.annealing_start, dtype=torch.float32) 181 | annealing_coef = annealing_start * torch.exp(-torch.log(annealing_start) / total_epoch * epoch_num) 182 | else: 183 | raise NotImplementedError 184 | return annealing_coef 185 | 186 | def _forward(self, output, target, output_is_evidence=False, **kwargs): 187 | """Forward function. 188 | Args: 189 | output (torch.Tensor): The class score (before softmax). 190 | target (torch.Tensor): The ground truth label. 191 | epoch_num: The number of epochs during training. 192 | Returns: 193 | torch.Tensor: The returned EvidenceLoss loss. 194 | """ 195 | if output_is_evidence: 196 | evidence = output 197 | else: 198 | # get evidence 199 | if self.evidence == 'relu': 200 | evidence = relu_evidence(output) 201 | elif self.evidence == 'exp': 202 | evidence = exp_evidence(output) 203 | elif self.evidence == 'softplus': 204 | evidence = softplus_evidence(output) 205 | else: 206 | raise NotImplementedError 207 | 208 | alpha = evidence + self.redl_lamb 209 | 210 | # Our target is a vector, as result, no need for one-hot embedding 211 | y = target 212 | 213 | # # one-hot embedding for the target 214 | # y = torch.eye(self.num_classes).to(output.device) 215 | # y = y[target] 216 | 217 | # compute annealing coefficient 218 | if self.with_annealing: 219 | annealing_coef = self.compute_annealing_coef(**kwargs) 220 | else: 221 | annealing_coef = None 222 | 223 | # compute the EDL loss 224 | if self.loss_type == 'mse': 225 | results = self.mse_loss(y, alpha, annealing_coef) 226 | elif self.loss_type == 'log': 227 | results = self.edl_loss(torch.log, y, alpha, annealing_coef, target) 228 | elif self.loss_type == 'digamma': 229 | results = self.edl_loss(torch.digamma, y, alpha, annealing_coef, target) 230 | elif self.loss_type == 'cross_entropy': 231 | results = self.ce_loss(target, y, alpha, annealing_coef) 232 | else: 233 | raise NotImplementedError 234 | 235 | uncertainty = self.redl_lamb * self.num_classes / torch.sum(alpha, dim=1, keepdim=True) 236 | results.update({'uncertainty': uncertainty}) 237 | results.update({'evidence': evidence}) 238 | 239 | return results 240 | -------------------------------------------------------------------------------- /libMR/libmr.pyx: -------------------------------------------------------------------------------- 1 | # 2 | # libmr.pyx: 3 | # 4 | # @Author Terry Boult tboult at securics com 5 | # @Author Vijay Iyer viyer at securics com 6 | # @Author Michael Wilber mwilber at securics.com 7 | # 8 | # Copyright 2013, Securics Inc. 9 | # 10 | # See accompanying LICENSE agrement for details on rights. 11 | # 12 | # Parts of this technology are subject to SBIR data rights and as 13 | # described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which 14 | # apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 15 | # to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 16 | # 80918 17 | # 18 | # The Government's rights to use, modify, reproduce, release, perform, 19 | # display, or disclose technical data or computer software marked with 20 | # this legend are restricted during the period shown as provided in 21 | # paragraph (b)(4) of the Rights in Noncommercial Technical Data and 22 | # Computer Software-Small Business Innovative Research (SBIR) Program 23 | # clause contained in the above identified contract. Expiration of 24 | # SBIR Data Rights: Expires four years after completion of the above 25 | # cited project work for this or any other follow-on SBIR contract, 26 | # whichever is later. 27 | # 28 | # No restrictions on government use apply after the expiration date 29 | # shown above. Any reproduction of technical data, computer software, 30 | # or portions thereof marked with this legend must also reproduce the 31 | # markings. 32 | 33 | from libc.stdlib cimport malloc,free 34 | from libcpp cimport bool 35 | from libcpp.string cimport string 36 | cimport numpy as np 37 | import numpy as np 38 | 39 | cdef extern from "MetaRecognition.h": 40 | cdef struct svm_node_libsvm: 41 | int index 42 | double value 43 | 44 | #cdef extern from "MetaRecognition.h": 45 | 46 | cdef extern from "MetaRecognition.h": 47 | 48 | ctypedef enum MR_fitting_type: 49 | complement_reject 50 | positive_reject 51 | complement_model 52 | positive_model 53 | 54 | cppclass MetaRecognition: 55 | MetaRecognition(int scores_to_drop, 56 | int fitting_size, 57 | bool verbose, 58 | double alpha, 59 | int translate_amount) except + 60 | bool is_valid() 61 | void set_translate(double t) 62 | void Reset() 63 | bool Predict_Match(double x, double threshold) 64 | double W_score(double x) 65 | double CDF(double x) 66 | double Inv(double p) 67 | 68 | int ReNormalize(double *invec, double *outvec, int length) 69 | 70 | int FitHigh(double* inputData, int inputDataSize, int fit_size) 71 | 72 | int FitLow(double* inputData, int inputDataSize, int fit_size) 73 | 74 | int FitSVM(svm_node_libsvm* svmdata, int inputDataSize, int label_of_interest, bool label_has_positive_score, 75 | int fit_type, int fit_size ) 76 | 77 | # void Save(FILE *outputFile) const 78 | # void Load(FILE *inputFile) 79 | void Save(char* filename) 80 | void Load(char* filename) 81 | int get_fitting_size() 82 | int set_fitting_size(int nsize) 83 | int get_translate_amount() 84 | int set_translate_amount(int ntrans) 85 | int get_sign() 86 | int set_sign(int nsign) 87 | double get_small_score() 88 | double set_small_score(double nscore) 89 | bool verbose 90 | string to_string() 91 | void from_string(string input) 92 | 93 | 94 | # This is the Python wrapper class. 95 | cdef class MR: 96 | cdef MetaRecognition *thisptr 97 | def __cinit__(self, int scores_to_drop=0, 98 | int fitting_size=9, 99 | bool verbose=False, 100 | double alpha=5.0, 101 | int translate_amount=10000): 102 | """ 103 | Create a new MR object. 104 | """ 105 | self.thisptr = new MetaRecognition(scores_to_drop,fitting_size,verbose,alpha,translate_amount) 106 | def __dealloc__(self): 107 | del self.thisptr 108 | def fit_low(self, inputData, int fit_size): 109 | """Use fit_low if your data is such that is smaller is better. Fits a 110 | MR object to the given data. We'll transform it for you 111 | and keep the transform parameters in the class so later calls 112 | to W_score or CDF do the right thing.""" 113 | cdef double *data 114 | data = malloc(sizeof(double)*len(inputData)) 115 | for i in xrange(len(inputData)): 116 | data[i] = inputData[i] 117 | self.thisptr.FitLow(data, len(inputData), fit_size) 118 | free(data) 119 | def fit_high(self, inputData, int fit_size): 120 | """Use fit_high if your data is such that is larger is better. Fits a 121 | MR object to the given data. We'll transform it for you 122 | and keep the transform parameters in the class so later calls 123 | to W_score or CDF do the right thing. 124 | """ 125 | cdef double *data 126 | data = malloc(sizeof(double)*len(inputData)) 127 | for i in xrange(len(inputData)): 128 | data[i] = inputData[i] 129 | self.thisptr.FitHigh(data, len(inputData), fit_size) 130 | free(data) 131 | 132 | def mr_save(self, filename): 133 | """ 134 | save mr object to file 135 | """ 136 | cdef char *filetosave 137 | filetosave = filename 138 | self.thisptr.Save(filetosave) 139 | 140 | def mr_load(self, filename): 141 | """ 142 | save mr object to file 143 | """ 144 | cdef char *filetosave 145 | filetosave = filename 146 | self.thisptr.Load(filetosave) 147 | 148 | def fit_svm(self, svm_data, inputDataSize, label_of_interest, 149 | label_has_positive_score, fit_type, fit_size ): 150 | """ 151 | Input: 152 | -------- 153 | svm_data: dict containing labels and decision scores. 154 | eg. svm_data['scores'] = [], svm_data['labels'] = [] 155 | inputDataSize : total no of decision scores 156 | label_of_interest : eg +1, -1 157 | label_has_positive_score : bool i.e 0 or 1 158 | fit_type : complement_reject=1, positive_reject=2, complement_model=3, positive_model=4 159 | fit_size : size of tail to be used 160 | 161 | Output: 162 | -------- 163 | None 164 | You can access parameters from weibull fitting using other attributes. 165 | Loading/Saving of weibull model parameters can be done using load/save methods 166 | in MR class 167 | 168 | """ 169 | 170 | # initialize svm_data 171 | cdef svm_node_libsvm *svm_data_to_c 172 | 173 | svm_data_to_c = < svm_node_libsvm* >malloc(inputDataSize * sizeof(svm_node_libsvm) ) 174 | 175 | assert svm_data.has_key("scores") 176 | assert svm_data.has_key("scores") 177 | assert len(svm_data["scores"]) == len(svm_data["labels"]) 178 | assert fit_type in [1, 2, 3, 4] 179 | for i in range(inputDataSize): 180 | svm_data_to_c[i].index = svm_data["labels"][i] 181 | svm_data_to_c[i].value = svm_data["scores"][i] 182 | 183 | print "Data initizalization complete. Now calling C++ code" 184 | self.thisptr.FitSVM(svm_data_to_c, inputDataSize, label_of_interest, label_has_positive_score, fit_type, fit_size) 185 | free(svm_data_to_c) 186 | 187 | property is_valid: 188 | def __get__(self): 189 | return self.thisptr.is_valid() 190 | def reset(self): 191 | self.thisptr.Reset() 192 | def predict_match(self, double x, double threshold = .9999999): 193 | """ 194 | Is X from the "match" distribution (i.e. we reject null hypothesis 195 | of non-match) 196 | 197 | """ 198 | return self.thisptr.Predict_Match(x,threshold) 199 | def w_score(self, double x): 200 | """ 201 | This is the commonly used function. After fitting, it returns the probability of the given score being "correct". It is the same as CDF 202 | """ 203 | return self.thisptr.W_score(x) 204 | def cdf(self, double x): 205 | """ 206 | This is the cummumlative probablity of match being corrrect (or more precisely the probility the score (after transform) being an outlier for the distribution, which given the transforms applied, so bigger is better, this is the probablity the score is correct. 207 | """ 208 | return self.thisptr.CDF(x) 209 | def inv(self, double p): 210 | """ 211 | This is score for which one would obtain CDF probability p (i.e. x such that p = CDF(x)) 212 | """ 213 | return self.thisptr.Inv(p) 214 | def w_score_vector(self, double[::1] invec): 215 | """ 216 | Apply w_score to each element of invec, returning a new vector of W-scores 217 | """ 218 | cdef np.ndarray[np.double_t,ndim=1]new_vec = np.zeros(len(invec), dtype='d') 219 | self.thisptr.ReNormalize(&invec[0], &new_vec[0], len(invec)) 220 | return new_vec 221 | def __str__(self): 222 | """ 223 | Serialize the MR object to a string. Use load_from_string to recover it. 224 | """ 225 | return self.thisptr.to_string() 226 | def __repr__(self): 227 | return "" % str(self) 228 | property tailsize: 229 | def __get__(self): 230 | return self.thisptr.get_fitting_size() 231 | def __set__(self, int nsize): 232 | self.thisptr.set_fitting_size(nsize) 233 | property translate_amount: 234 | def __get__(self): 235 | return self.thisptr.get_translate_amount() 236 | def __set__(self, int ntrans): 237 | self.thisptr.set_translate_amount(ntrans) 238 | property sign: 239 | def __get__(self): 240 | return self.thisptr.get_sign() 241 | def __set__(self, int nsign): 242 | self.thisptr.set_sign(nsign) 243 | property small_score: 244 | def __get__(self): 245 | return self.thisptr.get_small_score() 246 | def __set__(self, double nscore): 247 | self.thisptr.set_small_score(nscore) 248 | property verbose: 249 | def __get__(self): 250 | return self.thisptr.verbose 251 | def __set__(self, bool verbose): 252 | self.thisptr.verbose = verbose 253 | 254 | def load_from_string(str input): 255 | """ 256 | Deserialize an MR object. This turns a string back into an MR object; it is the inverse of str(MR()) 257 | """ 258 | pymr = MR() 259 | pymr.thisptr.from_string(input) 260 | return pymr 261 | 262 | -------------------------------------------------------------------------------- /utils/wsad_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | from scipy.interpolate import interp1d 5 | 6 | 7 | def sigmoid_rampup(current, rampup_length): 8 | """Exponential rampup from https://arxiv.org/abs/1610.02242""" 9 | if rampup_length == 0: 10 | return 1.0 11 | else: 12 | current = np.clip(current, 0.0, rampup_length) 13 | phase = 1.0 - current / rampup_length 14 | return float(np.exp(-5.0 * phase * phase)) 15 | 16 | 17 | def linear_rampup(current, rampup_length): 18 | """Linear rampup""" 19 | assert current >= 0 and rampup_length >= 0 20 | if current >= rampup_length: 21 | return 1.0 22 | else: 23 | return current / rampup_length 24 | 25 | 26 | def cosine_rampdown(current, rampdown_length): 27 | """Cosine rampdown from https://arxiv.org/abs/1608.03983""" 28 | assert 0 <= current <= rampdown_length 29 | return float(.5 * (np.cos(np.pi * current / rampdown_length) + 1)) 30 | 31 | 32 | def str2ind(categoryname, classlist): 33 | return [ 34 | i for i in range(len(classlist)) 35 | # if categoryname == classlist[i].decode("utf-8") 36 | if categoryname == classlist[i] 37 | ][0] 38 | 39 | 40 | def strlist2indlist(strlist, classlist): 41 | return [str2ind(s, classlist) for s in strlist] 42 | 43 | 44 | def strlist2multihot(strlist, classlist): 45 | return np.sum(np.eye(len(classlist))[strlist2indlist(strlist, classlist)], 46 | axis=0) 47 | 48 | 49 | def idx2multihot(id_list, num_class): 50 | return np.sum(np.eye(num_class)[id_list], axis=0) 51 | 52 | 53 | def random_extract(feat, t_max): 54 | # ind = np.arange(feat.shape[0]) 55 | # splits = np.array_split(ind, t_max) 56 | # nind = np.array([np.random.choice(split, 1)[0] for split in splits]) 57 | # return feat[nind] 58 | 59 | # ind = np.random.choice(feat.shape[0], size=t_max) 60 | # ind = sorted(ind) 61 | # return feat[ind] 62 | r = np.random.randint(len(feat) - t_max) 63 | return feat[r: r + t_max] 64 | 65 | 66 | def pad(feat, min_len): 67 | if feat.shape[0] <= min_len: 68 | return np.pad( 69 | feat, 70 | ((0, min_len - feat.shape[0]), (0, 0)), 71 | mode="constant", 72 | constant_values=0, 73 | ) 74 | else: 75 | return feat 76 | 77 | 78 | def fn_normalize(x): 79 | return (x - np.mean(x, 0, keepdims=True)) / \ 80 | (np.std(x, 0, keepdims=True) + 1e-10) 81 | 82 | 83 | def process_feat(feat, length=None, normalize=False): 84 | if length is not None: 85 | if len(feat) > length: 86 | x = random_extract(feat, length) 87 | else: 88 | x = pad(feat, length) 89 | else: 90 | x = feat 91 | if normalize: 92 | x = fn_normalize(x) 93 | return x 94 | 95 | 96 | def write_to_file(dname, dmap, cmap, itr): 97 | fid = open(dname + "-results.log", "a+") 98 | string_to_write = str(itr) 99 | # if dmap: 100 | for item in dmap: 101 | string_to_write += " " + "%.2f" % item 102 | string_to_write += " " + "%.2f" % cmap 103 | fid.write(string_to_write + "\n") 104 | fid.close() 105 | 106 | 107 | def soft_nms(dets, iou_thr=0.7, method='gaussian', sigma=0.3): 108 | dets = np.array(dets) 109 | x1 = dets[:, 2] 110 | x2 = dets[:, 3] 111 | scores = dets[:, 1] 112 | 113 | areas = x2 - x1 + 1 114 | 115 | # expand dets with areas, and the second dimension is 116 | # x1, x2, score, area 117 | dets = np.concatenate((dets, areas[:, None]), axis=1) 118 | 119 | retained_box = [] 120 | while dets.size > 0: 121 | max_idx = np.argmax(dets[:, 1], axis=0) 122 | dets[[0, max_idx], :] = dets[[max_idx, 0], :] 123 | retained_box.append(dets[0, :-1].tolist()) 124 | 125 | xx1 = np.maximum(dets[0, 2], dets[1:, 2]) 126 | xx2 = np.minimum(dets[0, 3], dets[1:, 3]) 127 | 128 | inter = np.maximum(xx2 - xx1 + 1, 0.0) 129 | iou = inter / (dets[0, -1] + dets[1:, -1] - inter) 130 | 131 | if method == 'linear': 132 | weight = np.ones_like(iou) 133 | weight[iou > iou_thr] -= iou[iou > iou_thr] 134 | elif method == 'gaussian': 135 | weight = np.exp(-(iou * iou) / sigma) 136 | else: # traditional nms 137 | weight = np.ones_like(iou) 138 | weight[iou > iou_thr] = 0 139 | 140 | dets[1:, 1] *= weight 141 | dets = dets[1:, :] 142 | 143 | return retained_box 144 | 145 | 146 | def minmax_norm(act_map, min_val=None, max_val=None): 147 | if min_val is None or max_val is None: 148 | relu = nn.ReLU() 149 | max_val = relu(torch.max(act_map, dim=1)[0]) 150 | min_val = relu(torch.min(act_map, dim=1)[0]) 151 | 152 | delta = max_val - min_val 153 | delta[delta <= 0] = 1 154 | ret = (act_map - min_val) / delta 155 | 156 | ret[ret > 1] = 1 157 | ret[ret < 0] = 0 158 | 159 | return ret 160 | 161 | 162 | def upgrade_resolution(arr, scale): 163 | x = np.arange(0, arr.shape[0]) 164 | f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate') 165 | scale_x = np.arange(0, arr.shape[0], 1 / scale) 166 | up_scale = f(scale_x) 167 | return up_scale 168 | 169 | 170 | def nms(proposals, thresh): 171 | proposals = np.array(proposals) 172 | x1 = proposals[:, 2] 173 | x2 = proposals[:, 3] 174 | scores = proposals[:, 1] 175 | 176 | areas = x2 - x1 + 1 177 | order = scores.argsort()[::-1] 178 | 179 | keep = [] 180 | while order.size > 0: 181 | i = order[0] 182 | keep.append(proposals[i].tolist()) 183 | xx1 = np.maximum(x1[i], x1[order[1:]]) 184 | xx2 = np.minimum(x2[i], x2[order[1:]]) 185 | 186 | inter = np.maximum(0.0, xx2 - xx1 + 1) # 交集 187 | 188 | iou = inter / (areas[i] + areas[order[1:]] - inter) 189 | 190 | inds = np.where(iou < thresh)[0] # 取出不重叠的 191 | order = order[inds + 1] 192 | 193 | return keep 194 | 195 | 196 | def get_proposal_oic(tList, wtcam, final_score, c_pred, _lambda=0.25, gamma=0.2): 197 | # t_factor = (16 * v_len) / (scale * num_segments * sampling_frames) #(24*N*25) 198 | temp = [] 199 | for i in range(len(tList)): 200 | c_temp = [] 201 | temp_list = np.array(tList[i])[0] 202 | if temp_list.any(): 203 | grouped_temp_list = grouping(temp_list) 204 | for j in range(len(grouped_temp_list)): 205 | if len(grouped_temp_list[j]) < 2: 206 | continue 207 | inner_score = np.mean(wtcam[grouped_temp_list[j], i, 0]) 208 | 209 | len_proposal = len(grouped_temp_list[j]) 210 | 211 | outer_s = max(0, int(grouped_temp_list[j][0] - _lambda * len_proposal)) 212 | outer_e = min(int(wtcam.shape[0] - 1), int(grouped_temp_list[j][-1] + _lambda * len_proposal)) 213 | 214 | outer_temp_list = list(range(outer_s, int(grouped_temp_list[j][0]))) + list( 215 | range(int(grouped_temp_list[j][-1] + 1), outer_e + 1)) 216 | 217 | if len(outer_temp_list) == 0: 218 | outer_score = 0 219 | else: 220 | outer_score = np.mean(wtcam[outer_temp_list, i, 0]) 221 | 222 | c_score = inner_score - outer_score + gamma * final_score[c_pred[i]] 223 | t_start = grouped_temp_list[j][0] 224 | t_end = (grouped_temp_list[j][-1] + 1) 225 | c_temp.append([c_pred[i], c_score, t_start, t_end]) 226 | if len(c_temp) > 0: 227 | temp.append(c_temp) 228 | return temp 229 | 230 | 231 | def get_proposal_oic_2(tList, 232 | wtcam, 233 | final_score, 234 | c_pred, 235 | lambda_=0.25, 236 | gamma=0.2, 237 | loss_type="oic"): 238 | temp = [] 239 | for i in range(len(tList)): 240 | c_temp = [] 241 | temp_list = np.array(tList[i])[0] 242 | if temp_list.any(): 243 | grouped_temp_list = grouping(temp_list) 244 | for j in range(len(grouped_temp_list)): 245 | inner_score = np.mean(wtcam[grouped_temp_list[j], i, 0]) 246 | 247 | len_proposal = len(grouped_temp_list[j]) 248 | outer_s = max( 249 | 0, int(grouped_temp_list[j][0] - lambda_ * len_proposal)) 250 | outer_e = min( 251 | int(wtcam.shape[0] - 1), 252 | int(grouped_temp_list[j][-1] + lambda_ * len_proposal), 253 | ) 254 | 255 | outer_temp_list = list( 256 | range(outer_s, int(grouped_temp_list[j][0]))) + list( 257 | range(int(grouped_temp_list[j][-1] + 1), outer_e + 1)) 258 | 259 | if len(outer_temp_list) == 0: 260 | outer_score = 0 261 | else: 262 | outer_score = np.mean(wtcam[outer_temp_list, i, 0]) 263 | 264 | if loss_type == "oic": 265 | c_score = inner_score - outer_score + gamma * final_score[ 266 | c_pred[i]] 267 | else: 268 | c_score = inner_score 269 | t_start = grouped_temp_list[j][0] 270 | t_end = (grouped_temp_list[j][-1] + 1) 271 | c_temp.append([c_pred[i], c_score, t_start, t_end]) 272 | temp.append(c_temp) 273 | return temp 274 | 275 | 276 | def grouping(arr): 277 | return np.split(arr, np.where(np.diff(arr) != 1)[0] + 1) 278 | 279 | 280 | """ 281 | ramp up 282 | """ 283 | 284 | 285 | def get_current_consistency_weight(epoch, args): 286 | # Consistency ramp-up from https://arxiv.org/abs/1610.02242 287 | return args.consistency * linear_rampup(epoch, args.consistency_rampup) 288 | 289 | 290 | def sigmoid_rampup(current, rampup_length): 291 | """Exponential rampup from https://arxiv.org/abs/1610.02242""" 292 | if rampup_length == 0: 293 | return 1.0 294 | else: 295 | current = np.clip(current, 0.0, rampup_length) 296 | phase = 1.0 - current / rampup_length 297 | return float(np.exp(-5.0 * phase * phase)) 298 | 299 | 300 | def linear_rampup(current, rampup_length): 301 | """Linear rampup""" 302 | assert current >= 0 and rampup_length >= 0 303 | if current >= rampup_length: 304 | return 1.0 305 | else: 306 | return current / rampup_length 307 | 308 | 309 | def cosine_rampdown(current, rampdown_length): 310 | """Cosine rampdown from https://arxiv.org/abs/1608.03983""" 311 | assert 0 <= current <= rampdown_length 312 | return float(.5 * (np.cos(np.pi * current / rampdown_length) + 1)) 313 | -------------------------------------------------------------------------------- /eval/detectionMAP.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | from scipy.signal import savgol_filter 4 | import sys 5 | import scipy.io as sio 6 | from joblib import Parallel, delayed 7 | import time 8 | import multiprocessing as mp 9 | import pdb 10 | 11 | 12 | def str2ind(categoryname, classlist): 13 | return [i for i in range(len(classlist)) if categoryname == classlist[i]][0] 14 | 15 | 16 | def smooth(v): 17 | return v 18 | # l = min(351, len(v)); l = l - (1-l%2) 19 | # if len(v) <= 3: 20 | # return v 21 | # return savgol_filter(v, l, 1) #savgol_filter(v, l, 1) #0.5*(np.concatenate([v[1:],v[-1:]],axis=0) + v) 22 | 23 | 24 | def filter_segments(segment_predict, videonames, ambilist, factor): 25 | ind = np.zeros(np.shape(segment_predict)[0]) 26 | for i in range(np.shape(segment_predict)[0]): 27 | vn = videonames[int(segment_predict[i, 0])] 28 | for a in ambilist: 29 | if a[0] == vn: 30 | gt = range(int(round(float(a[2]) * factor)), int(round(float(a[3]) * factor))) 31 | pd = range(int(segment_predict[i][1]), int(segment_predict[i][2])) 32 | IoU = float(len(set(gt).intersection(set(pd)))) / float(len(set(gt).union(set(pd)))) 33 | if IoU > 0: 34 | ind[i] = 1 35 | s = [segment_predict[i, :] for i in range(np.shape(segment_predict)[0]) if ind[i] == 0] 36 | return np.array(s) 37 | 38 | 39 | def getLocMAP(predictions, th, annotation_path, args): 40 | gtsegments = np.load(annotation_path + '/segments.npy', allow_pickle=True) 41 | gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True) 42 | gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True) 43 | videoname = np.load(annotation_path + '/videoname.npy', allow_pickle=True) 44 | videoname = np.array([v.decode('utf-8') for v in videoname]) 45 | subset = np.load(annotation_path + '/subset.npy', allow_pickle=True) 46 | subset = np.array([s.decode('utf-8') for s in subset]) 47 | # classlist = np.load(annotation_path + '/classlist.npy', allow_pickle=True) 48 | # classlist = np.array([c.decode('utf-8') for c in classlist]) 49 | # classlist = np.load('./new_classlist.npy', allow_pickle=True) 50 | classlist = args.classlist 51 | duration = np.load(annotation_path + '/duration.npy', allow_pickle=True) 52 | ambilist = annotation_path + '/Ambiguous_test.txt' 53 | if args.feature_type == 'UNT': 54 | factor = 10.0 / 4.0 55 | else: 56 | factor = 25.0 / 16.0 57 | try: 58 | ambilist = list(open(ambilist, 'r')) 59 | ambilist = [a.strip('\n').split(' ') for a in ambilist] 60 | except: 61 | ambilist = [] 62 | 63 | # keep training gtlabels for plotting 64 | gtltr = [] 65 | for i, s in enumerate(subset): 66 | if subset[i] == 'validation' and len(gtsegments[i]): 67 | gtltr.append(gtlabels[i]) 68 | gtlabelstr = gtltr 69 | 70 | # Keep only the test subset annotations 71 | gts, gtl, vn, dn = [], [], [], [] 72 | for i, s in enumerate(subset): 73 | if subset[i] == 'test': 74 | gts.append(gtsegments[i]) 75 | gtl.append(gtlabels[i]) 76 | vn.append(videoname[i]) 77 | dn.append(duration[i, 0]) 78 | gtsegments = gts 79 | gtlabels = gtl 80 | videoname = vn 81 | duration = dn 82 | 83 | # keep ground truth and predictions for instances with temporal annotations 84 | gts, gtl, vn, pred, dn = [], [], [], [], [] 85 | for i, s in enumerate(gtsegments): 86 | if len(s): 87 | gts.append(gtsegments[i]) 88 | gtl.append(gtlabels[i]) 89 | vn.append(videoname[i]) 90 | pred.append(predictions[i]) 91 | dn.append(duration[i]) 92 | gtsegments = gts 93 | gtlabels = gtl 94 | videoname = vn 95 | predictions = pred 96 | 97 | # which categories have temporal labels ? 98 | templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl]))) 99 | 100 | # the number index for those categories. 101 | templabelidx = [] 102 | for t in templabelcategories: 103 | templabelidx.append(str2ind(t, classlist)) 104 | 105 | # process the predictions such that classes having greater than a certain threshold are detected only 106 | predictions_mod = [] 107 | c_score = [] 108 | for p in predictions: 109 | pp = - p; 110 | [pp[:, i].sort() for i in range(np.shape(pp)[1])]; 111 | pp = -pp 112 | c_s = np.mean(pp[:int(np.shape(pp)[0] / 8), :], axis=0) 113 | ind = c_s > 0.0 114 | c_score.append(c_s) 115 | new_pred = np.zeros((np.shape(p)[0], np.shape(p)[1]), dtype='float32') 116 | predictions_mod.append(p * ind) 117 | predictions = predictions_mod 118 | 119 | detection_results = [] 120 | for i, vn in enumerate(videoname): 121 | detection_results.append([]) 122 | detection_results[i].append(vn) 123 | 124 | ap = [] 125 | for c in templabelidx: 126 | segment_predict = [] 127 | # Get list of all predictions for class c 128 | for i in range(len(predictions)): 129 | tmp = smooth(predictions[i][:, c]) 130 | threshold = np.max(tmp) - (np.max(tmp) - np.min(tmp)) * 0.5 131 | vid_pred = np.concatenate([np.zeros(1), (tmp > threshold).astype('float32'), np.zeros(1)], axis=0) 132 | vid_pred_diff = [vid_pred[idt] - vid_pred[idt - 1] for idt in range(1, len(vid_pred))] 133 | s = [idk for idk, item in enumerate(vid_pred_diff) if item == 1] 134 | e = [idk for idk, item in enumerate(vid_pred_diff) if item == -1] 135 | for j in range(len(s)): 136 | aggr_score = np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c] 137 | if e[j] - s[j] >= 2: 138 | segment_predict.append([i, s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]]) 139 | detection_results[i].append( 140 | [classlist[c], s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]]) 141 | segment_predict = np.array(segment_predict) 142 | segment_predict = filter_segments(segment_predict, videoname, ambilist, factor) 143 | 144 | # Sort the list of predictions for class c based on score 145 | if len(segment_predict) == 0: 146 | return 0 147 | segment_predict = segment_predict[np.argsort(-segment_predict[:, 3])] 148 | 149 | # Create gt (category c)list 150 | segment_gt = [[i, gtsegments[i][j][0], gtsegments[i][j][1]] for i in range(len(gtsegments)) for j in 151 | range(len(gtsegments[i])) if str2ind(gtlabels[i][j], classlist) == c] 152 | gtpos = len(segment_gt) 153 | # Compare predictions and gt 154 | tp, fp = [], [] 155 | for i in range(len(segment_predict)): 156 | flag = 0. 157 | for j in range(len(segment_gt)): 158 | if segment_predict[i][0] == segment_gt[j][0]: # The same video 159 | gt = range(int(round(segment_gt[j][1] * factor)), int(round(segment_gt[j][2] * factor))) 160 | p = range(int(segment_predict[i][1]), int(segment_predict[i][2])) 161 | IoU = float(len(set(gt).intersection(set(p)))) / float(len(set(gt).union(set(p)))) 162 | if IoU >= th: 163 | flag = 1. 164 | del segment_gt[j] 165 | break 166 | tp.append(flag) 167 | fp.append(1. - flag) 168 | tp_c = np.cumsum(tp) 169 | fp_c = np.cumsum(fp) 170 | if sum(tp) == 0: 171 | prc = 0. 172 | else: 173 | prc = np.sum((tp_c / (fp_c + tp_c)) * tp) / gtpos 174 | ap.append(prc) 175 | 176 | return 100 * np.mean(ap) 177 | 178 | 179 | def AntgetLocMAP(predictions, th, annotation_path, args): 180 | gtsegments = np.load(annotation_path + '/segments.npy', allow_pickle=True) 181 | gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True) 182 | gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True) 183 | videoname = np.load(annotation_path + '/videoname.npy', allow_pickle=True); 184 | videoname = np.array([v.decode('utf-8') for v in videoname]) 185 | subset = np.load(annotation_path + '/subset.npy', allow_pickle=True); 186 | subset = np.array([s.decode('utf-8') for s in subset]) 187 | classlist = np.load(annotation_path + '/classlist.npy', allow_pickle=True); 188 | classlist = np.array([c.decode('utf-8') for c in classlist]) 189 | duration = np.load(annotation_path + '/duration.npy', allow_pickle=True) 190 | ambilist = annotation_path + '/Ambiguous_test.txt' 191 | if args.feature_type == 'UNT': 192 | factor = 10.0 / 4.0 193 | else: 194 | factor = 25.0 / 16.0 195 | try: 196 | ambilist = list(open(ambilist, 'r')) 197 | ambilist = [a.strip('\n').split(' ') for a in ambilist] 198 | except: 199 | ambilist = [] 200 | 201 | # keep training gtlabels for plotting 202 | gtltr = [] 203 | for i, s in enumerate(subset): 204 | if subset[i] == 'training' and len(gtsegments[i]): 205 | gtltr.append(gtlabels[i]) 206 | gtlabelstr = gtltr 207 | 208 | # Keep only the test subset annotations 209 | gts, gtl, vn, dn = [], [], [], [] 210 | for i, s in enumerate(subset): 211 | if subset[i] == 'validation': 212 | gts.append(gtsegments[i]) 213 | gtl.append(gtlabels[i]) 214 | vn.append(videoname[i]) 215 | dn.append(duration[i]) 216 | gtsegments = gts 217 | gtlabels = gtl 218 | videoname = vn 219 | duration = dn 220 | # pdb.set_trace() 221 | 222 | # keep ground truth and predictions for instances with temporal annotations 223 | gts, gtl, vn, pred, dn = [], [], [], [], [] 224 | for i, s in enumerate(gtsegments): 225 | if len(s): 226 | gts.append(gtsegments[i]) 227 | gtl.append(gtlabels[i]) 228 | vn.append(videoname[i]) 229 | pred.append(predictions[i]) 230 | dn.append(duration[i]) 231 | gtsegments = gts 232 | gtlabels = gtl 233 | videoname = vn 234 | predictions = pred 235 | 236 | # which categories have temporal labels ? 237 | templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl]))) 238 | 239 | # the number index for those categories. 240 | templabelidx = [] 241 | for t in templabelcategories: 242 | templabelidx.append(str2ind(t, classlist)) 243 | 244 | # process the predictions such that classes having greater than a certain threshold are detected only 245 | predictions_mod = [] 246 | c_score = [] 247 | length = [] 248 | for p in predictions: 249 | length.append(len(p)) 250 | pp = - p; 251 | [pp[:, i].sort() for i in range(np.shape(pp)[1])]; 252 | pp = -pp 253 | if int(np.shape(pp)[0] / 8) > 0: 254 | c_s = np.mean(pp[:int(np.shape(pp)[0] / 8), :], axis=0) 255 | else: 256 | c_s = np.mean(pp[:np.shape(pp)[0], :], axis=0) 257 | ind = c_s > 0.0 258 | c_score.append(c_s) 259 | new_pred = np.zeros((np.shape(p)[0], np.shape(p)[1]), dtype='float32') 260 | predictions_mod.append(p * ind) 261 | predictions = predictions_mod 262 | 263 | detection_results = [] 264 | for i, vn in enumerate(videoname): 265 | detection_results.append([]) 266 | detection_results[i].append(vn) 267 | # pdb.set_trace() 268 | ap = [] 269 | for c in templabelidx: 270 | segment_predict = [] 271 | # Get list of all predictions for class c 272 | for i in range(len(predictions)): 273 | tmp = smooth(predictions[i][:, c]) 274 | threshold = np.max(tmp) - (np.max(tmp) - np.min(tmp)) * 0.5 275 | vid_pred = np.concatenate([np.zeros(1), (tmp > threshold).astype('float32'), np.zeros(1)], axis=0) 276 | vid_pred_diff = [vid_pred[idt] - vid_pred[idt - 1] for idt in range(1, len(vid_pred))] 277 | s = [idk for idk, item in enumerate(vid_pred_diff) if item == 1] 278 | e = [idk for idk, item in enumerate(vid_pred_diff) if item == -1] 279 | for j in range(len(s)): 280 | aggr_score = np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c] 281 | if e[j] - s[j] >= 2: 282 | segment_predict.append([i, s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]]) 283 | detection_results[i].append( 284 | [classlist[c], s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]]) 285 | segment_predict = np.array(segment_predict) 286 | segment_predict = filter_segments(segment_predict, videoname, ambilist, factor) 287 | 288 | # Sort the list of predictions for class c based on score 289 | if len(segment_predict) == 0: 290 | return 0 291 | pdb.set_trace() 292 | segment_predict = segment_predict[np.argsort(-segment_predict[:, 3])] 293 | 294 | # Create gt list 295 | segment_gt = [[i, gtsegments[i][j][0], gtsegments[i][j][1]] for i in range(len(gtsegments)) for j in 296 | range(len(gtsegments[i])) if str2ind(gtlabels[i][j], classlist) == c] 297 | gtpos = len(segment_gt) 298 | # Compare predictions and gt 299 | tp, fp = [], [] 300 | for i in range(len(segment_predict)): 301 | flag = 0. 302 | for j in range(len(segment_gt)): 303 | if segment_predict[i][0] == segment_gt[j][0]: 304 | gt = range(int(round(segment_gt[j][1] * factor)), int(round(segment_gt[j][2] * factor))) 305 | p = range(int(segment_predict[i][1]), int(segment_predict[i][2])) 306 | IoU = float(len(set(gt).intersection(set(p)))) / float(len(set(gt).union(set(p)))) 307 | if IoU >= th: 308 | flag = 1. 309 | del segment_gt[j] 310 | break 311 | tp.append(flag) 312 | fp.append(1. - flag) 313 | tp_c = np.cumsum(tp) 314 | fp_c = np.cumsum(fp) 315 | if sum(tp) == 0: 316 | prc = 0. 317 | else: 318 | prc = np.sum((tp_c / (fp_c + tp_c)) * tp) / gtpos 319 | ap.append(prc) 320 | 321 | return 100 * np.mean(ap) 322 | 323 | 324 | def getDetectionMAP(predictions, annotation_path, args, pool): 325 | iou_list = [0.1, 0.2, 0.3, 0.4, 0.5] 326 | dmap_list = [] 327 | if 'Thumos' in annotation_path: 328 | func = getLocMAP 329 | else: 330 | func = AntgetLocMAP 331 | results = [pool.apply_async(getLocMAP, args=(predictions, iou, annotation_path, args)) for iou in iou_list] 332 | dmap_list = [p.get() for p in results] 333 | # for iou in iou_list: 334 | # print('Testing for IoU %f' %iou) 335 | # dmap_list.append(func(predictions, iou, annotation_path, args)) 336 | 337 | return dmap_list, iou_list 338 | -------------------------------------------------------------------------------- /libMR/MetaRecognition.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | * MetaRecognition.cpp 3 | * Copyright 2011, Securics Inc. 4 | See accompanying LICENSE agrement for details on rights. 5 | 6 | Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918 7 | 8 | The Government's rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Noncommercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract. Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later. 9 | 10 | No restrictions on government use apply after the expiration date shown above. Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings. 11 | * 12 | */ 13 | 14 | /** \mainpage 15 | 16 | 17 | This library provides support for meta-recognition, i.e. recognizing when a recognition system is working well and when it is not and using that self-knowledge to improve the system. It can be used for prediction of failure, fusion, score renormalization, SVM renormalization and converting SVM or recognition scores into statistially well supported probility estimtes. The analysis is based on an analysis of the recognition system scores. 18 | 19 | 20 | The fundamental ideas are described in 21 | 22 | "Meta-Recognition: The Theory and Practice of Recognition Score Analysis," 23 | Walter J. Scheirer, Anderson Rocha, Ross Micheals, Terrance E. Boult, 24 | IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI), 25 | 33(8), pp 1689--1695, Aug, 2011. 26 | 27 | and SVM support as described in 28 | 29 | "Multi-Attribute Spaces: Calibration for Attribute Fusion and Similarity Search," 30 | Walter J. Scheirer, Neeraj Kumar, Peter N. Belhumeur, Terrance E. Boult, 31 | Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 32 | June 2012. 33 | 34 | 35 | The underlying extream value theory provide stong theortical basis for the computations, but to make it useful one must transform the data into the proper frame. The C++ version provides objects that can compute and store information about the transform and then provide for prediction, w-score values (probability estimates), or renormalizatoin of a vector of data. 36 | 37 | The library also contains a "C" interface functions for very basic weilbull usage for Meta-Recognition. 38 | The C-based library has a number of STRONG assumptions you must follow as we cannot test for all of them. 39 | 1) All fitting and testing are presuming "larger is better", If you are fitting something where smaller is better you need to transform it before fitting. 40 | 2) All data is positive (okay we can and do test for that, but better to know upfront what you are doing) 41 | 3) There must be sufficient range in your data to actually fit the weilbull. If all the data is the same, or nearly the same, it may fal to converge and will report errors. 42 | 43 | While free for non-commercial use this library is subject to the license restrictions, see LICENSE.TXT for details. 44 | 45 | */ 46 | 47 | #include "MetaRecognition.h" 48 | #include 49 | //#include 50 | #include 51 | 52 | #ifdef __cplusplus 53 | extern "C" { 54 | #endif 55 | extern int weibull_fit_verbose_debug; 56 | #ifdef __cplusplus 57 | } 58 | #endif 59 | MetaRecognition::MetaRecognition(int scores_to_dropx, int fitting_sizex, bool verb, double alphax, int translate_amountx): 60 | scores_to_drop(scores_to_dropx),verbose(verb),fitting_size(fitting_sizex),alpha(alphax),translate_amount(translate_amountx) 61 | { 62 | memset(parmhat,0,sizeof(parmhat)); 63 | memset(parmci,0,sizeof(parmci)); 64 | sign = 1; 65 | ftype = complement_reject; 66 | small_score=0; 67 | isvalid=false; 68 | if(verb) weibull_fit_verbose_debug=1; 69 | else weibull_fit_verbose_debug=0; 70 | } 71 | 72 | MetaRecognition::~MetaRecognition() 73 | { 74 | // free(parmhat); 75 | // free(parmci); 76 | } 77 | 78 | bool MetaRecognition::is_valid(){ 79 | return isvalid; 80 | } 81 | 82 | void MetaRecognition::set_translate(double t){ 83 | translate_amount = t; 84 | isvalid=false; 85 | }; 86 | 87 | 88 | void MetaRecognition::Reset(){ 89 | memset(parmhat,0,sizeof(parmhat)); 90 | memset(parmci,0,sizeof(parmci)); 91 | sign = 1; 92 | scores_to_drop = 0; 93 | small_score=0; 94 | isvalid=false; 95 | } 96 | 97 | 98 | int compare_sort_decending (const void * a, const void * b) 99 | { 100 | const double *da = (const double *) a; 101 | const double *db = (const double *) b; 102 | return (*da < *db) - (*da > *db); 103 | } 104 | 105 | int compare_sort_assending (const void * a, const void * b) 106 | { 107 | const double *da = (const double *) a; 108 | const double *db = (const double *) b; 109 | return (*da > *db) - (*da < *db); 110 | } 111 | 112 | inline const char * const BoolToString(bool b) 113 | { 114 | return b ? "true" : "false"; 115 | } 116 | 117 | inline int const BoolToInt(bool b) 118 | { 119 | return b ? 1 : 0; 120 | } 121 | 122 | inline const bool IntToBool(const char * s) 123 | { 124 | int val= atoi(s); 125 | if(val) return true; 126 | else return false; 127 | } 128 | 129 | //Wraps calls to real weibull_inv and weibull_cdf functions and handles properly translating the data passed 130 | //May eventually be a good idea to move real implementations of the functions here 131 | //IF we do away with the C implementation. For now this allows for backward compantiblity with 132 | //older code 133 | // Inv computes the scores of the inverse CDF, i.e. returns y such that CDF(y) = 134 | double MetaRecognition::Inv(double x) 135 | { 136 | if(!isvalid) return -9999.0; 137 | double score = weibull_inv(x, parmhat[0], parmhat[1]); 138 | return (score - translate_amount + small_score)*sign; 139 | } 140 | 141 | double MetaRecognition::CDF(double x) 142 | { 143 | if(!isvalid) return -9999.0; 144 | double translated_x = x*sign + translate_amount - small_score; 145 | double wscore=weibull_cdf(translated_x, parmhat[0], parmhat[1]); 146 | if(ftype==complement_model || ftype==positive_model) return 1-wscore; 147 | return wscore; 148 | }; 149 | 150 | double MetaRecognition::W_score(double x){ 151 | return CDF(x); 152 | }; 153 | 154 | bool MetaRecognition::Predict_Match(double x, double threshold){ 155 | double score = Inv(threshold); 156 | if(sign <0) return (x < score); 157 | return (x > score); 158 | }; 159 | 160 | int MetaRecognition::ReNormalize(double *invec, double *outvec, int length) 161 | { 162 | if(!isvalid) return -9997.0; 163 | int rval=1; 164 | for(int i=0; i< length; i++){ 165 | outvec[i] = W_score(invec[i]); 166 | } 167 | return rval; 168 | } 169 | 170 | 171 | //used by weibull__evt_low and weibull__evt_high, which sets the desired sign(low -1, high 1) 172 | //before passing to generic 173 | int MetaRecognition::EvtGeneric(double* inputData, int inputDataSize, int inward, double x) 174 | { 175 | double * inputDataCopy = (double *) malloc(sizeof(double) * inputDataSize); 176 | 177 | double * dataPtr = NULL; 178 | int icnt=0; 179 | if(!inward && (sign > 0) ) { 180 | icnt = inputDataSize; 181 | memcpy(inputDataCopy,inputData, inputDataSize*sizeof(double)); 182 | } 183 | if(!inward && (sign < 0) ){ 184 | for(int i=0; i < inputDataSize; i++) inputDataCopy[i] = (inputData[i]*sign); //doing extremes just flip sign if needed 185 | icnt = inputDataSize; 186 | } 187 | else if(inward && (sign < 0)) { /* this is fit above x but approaching x */ 188 | for(int i=0; i < inputDataSize; i++) { 189 | if(inputData[i] > x) { 190 | inputDataCopy[icnt++] = (inputData[i]*sign); //copy what is above x, and flip signs (so biggest is important) 191 | } 192 | } 193 | } else if(inward && (sign > 0)) { /* this is fit below x but approaching x */ 194 | for(int i=0; i < inputDataSize; i++) { 195 | if(inputData[i] < x) { 196 | inputDataCopy[icnt++] = (inputData[i]); //copy only what is above x. 197 | } 198 | } 199 | } 200 | 201 | //sort data and get smallest score 202 | qsort(inputDataCopy, icnt , sizeof(double), compare_sort_decending); 203 | 204 | //Want only the top fitting_size scores but als noneed to adap if dropping top score 205 | if(scores_to_drop>0){ 206 | dataPtr=inputDataCopy+scores_to_drop; 207 | } else { 208 | dataPtr=inputDataCopy; 209 | } 210 | 211 | small_score = dataPtr[fitting_size-1]; 212 | 213 | for(int i=0; i < fitting_size; i++) 214 | { 215 | //translate and subtract small score 216 | dataPtr[i] = dataPtr[i] + translate_amount - small_score; 217 | } 218 | 219 | 220 | int rval = weibull_fit(parmhat, parmci, dataPtr, alpha, fitting_size); 221 | isvalid= true; 222 | if(rval != 1) Reset(); 223 | free(inputDataCopy); 224 | return rval; 225 | } 226 | 227 | //Wrapper fitting functions EvtLow and EvtHigh to make it simpler for new users of the library. 228 | int MetaRecognition::FitLow(double* inputData, int inputDataSize, int fsize) 229 | { 230 | if(fsize>0) fitting_size=fsize; 231 | sign = -1; 232 | return EvtGeneric(inputData, inputDataSize); 233 | } 234 | 235 | int MetaRecognition::FitHigh(double* inputData, int inputDataSize, int fsize) 236 | { 237 | if(fsize>0) fitting_size=fsize; 238 | sign = 1; 239 | return EvtGeneric(inputData, inputDataSize); 240 | } 241 | 242 | int MetaRecognition::FitSVM(svm_node_libsvm* SVMdata, int inputDataSize, int label_of_interest, bool label_has_positive_score, int fit_type, int fit_size ) 243 | { 244 | 245 | Reset(); 246 | ftype = (MR_fitting_type)fit_type; 247 | fitting_size = fit_size; 248 | double * inputDataCopy = (double *) malloc(sizeof(double) * inputDataSize); 249 | int sign_of_label_of_interest=0; 250 | double * dataPtr = NULL; 251 | int sign_of_expected_score=-1; 252 | if(label_has_positive_score) sign_of_expected_score=1; 253 | 254 | int icnt=0; 255 | bool rejection=(ftype==complement_reject || ftype == positive_reject); 256 | if(rejection) { // default we fit on the complement class and then do rejection to determine probability 257 | for(int i=0; i < inputDataSize; i++) { 258 | if(SVMdata[i].index != label_of_interest) inputDataCopy[icnt++] = (SVMdata[i].value); //doing extremes just flip sign if needed 259 | else { 260 | if(SVMdata[i].value >0) sign_of_label_of_interest++; 261 | else sign_of_label_of_interest--; 262 | } 263 | } 264 | } else { 265 | for(int i=0; i < inputDataSize; i++) { 266 | if(SVMdata[i].index == label_of_interest) inputDataCopy[icnt++] = (SVMdata[i].value); //doing extremes just flip sign if needed 267 | else { 268 | if(SVMdata[i].value >0) sign_of_label_of_interest++; 269 | else sign_of_label_of_interest--; 270 | } 271 | } 272 | } 273 | if (verbose && sign_of_label_of_interest * sign_of_expected_score > 0){ 274 | printf("In MetaRecognition::FitSVM, warning: possible inconsistency average of the non-matching data has score %d, but expected sign is %d\n", 275 | sign_of_label_of_interest, -sign_of_expected_score); 276 | } 277 | 278 | 279 | /* expected sign combines with reject_complement to determine if we have to flip or not. 280 | We flip if positives scores, with smaller is better, is the goal, 281 | we flip if sign_of_expected_score >0 and !force_rejection 282 | we flip if sign_of_expected_score <0 and force_rejection */ 283 | 284 | if((!label_has_positive_score && rejection) 285 | || (label_has_positive_score && !rejection)) { 286 | sign = -1; 287 | for(int i=0; i < icnt; i++) { 288 | inputDataCopy[i] *= -1; //doing extremes just flip sign if needed 289 | } 290 | } else sign=1; 291 | 292 | //sort data and get smallest score 293 | qsort(inputDataCopy, icnt , sizeof(double), compare_sort_decending); 294 | 295 | //Want only the top fitting_size scores but als noneed to adap if dropping top score 296 | if(scores_to_drop){ 297 | dataPtr=inputDataCopy+scores_to_drop; 298 | } else { 299 | dataPtr=inputDataCopy; 300 | } 301 | 302 | small_score = dataPtr[fitting_size - 1]; 303 | 304 | for(int i=0; i < fitting_size; i++) 305 | { 306 | //translate and subtract small score 307 | dataPtr[i] = dataPtr[i] + translate_amount - small_score; 308 | } 309 | 310 | int rval = weibull_fit(parmhat, parmci, dataPtr, alpha, fitting_size); 311 | 312 | isvalid= true; 313 | if(rval != 1) Reset(); 314 | free(inputDataCopy); 315 | printf("Completed weibull fitting\n"); 316 | return rval; 317 | }; 318 | 319 | void MetaRecognition::Save(std::ostream &outputStream) const 320 | { 321 | if(outputStream.good() && isvalid) 322 | { 323 | try { 324 | outputStream.precision(21); 325 | outputStream.setf(std::ios::scientific,std::ios::floatfield); 326 | outputStream << parmhat[0] << " " << parmhat[1] << " " 327 | << parmci[0] << " " << parmci[1] << " " 328 | << parmci[2] << " " << parmci[3] << " " 329 | << sign << " " 330 | << alpha << " " 331 | << (int) ftype << " " 332 | << fitting_size << " " 333 | << translate_amount << " " 334 | << small_score<< " " 335 | << scores_to_drop 336 | << std::endl; 337 | } catch(std::bad_alloc& e) { 338 | std::cout << "Could not allocate the required memory, failed with error: '" << e.what() << "'" << std::endl; 339 | } 340 | } 341 | } 342 | 343 | std::ostream& operator<< ( std::ostream& os, const MetaRecognition& mr ) 344 | { 345 | mr.Save(os); 346 | return os; 347 | } 348 | 349 | std::istream& operator>> ( std::istream& is, MetaRecognition& mr ) 350 | { 351 | mr.Load(is); 352 | return is; 353 | } 354 | 355 | 356 | void MetaRecognition::Load(std::istream &inputStream) 357 | { 358 | isvalid=false; 359 | int temp; 360 | if(inputStream.good()) 361 | { 362 | int iftype; 363 | inputStream >> parmhat[0] >> parmhat[1] 364 | >> parmci[0] >> parmci[1] 365 | >> parmci[2] >> parmci[3] 366 | >> sign 367 | >> alpha 368 | >> iftype 369 | >> fitting_size 370 | >> translate_amount 371 | >> small_score 372 | >> scores_to_drop; 373 | isvalid=true; 374 | ftype = (MR_fitting_type) iftype; 375 | } 376 | } 377 | 378 | void MetaRecognition::Save(FILE *outputFile) const 379 | { 380 | if((outputFile != NULL) && !feof(outputFile)) 381 | { 382 | fprintf(outputFile, 383 | "%21.18g %21.18g " //parmaht 384 | "%21.18g %21.18g " //parmci 385 | "%21.18g %21.18g " 386 | "%d %f %d %d " //sign, alpha, fitting size 387 | "%d %21.18g %d\n", //translate, small_score, scores_to_drop 388 | parmhat[0], parmhat[1], 389 | parmci[0],parmci[1], 390 | parmci[2],parmci[3], 391 | sign, alpha, (int) ftype,fitting_size, 392 | translate_amount, small_score, scores_to_drop); 393 | } 394 | } 395 | 396 | void MetaRecognition::Load(FILE *inputFile) 397 | { 398 | int temp, iftype; 399 | int retcode=0; 400 | isvalid=false; 401 | if((inputFile != NULL) && !feof(inputFile)) 402 | { 403 | 404 | retcode = fscanf(inputFile, 405 | "%lf %lf " //parmaht 406 | "%lf %lf " //parmci 407 | "%lf %lf " 408 | "%d %lf %d %d " //sign, alpha, fitting size 409 | "%d %lf %d ", //translate, small_score, scores_to_drop, 410 | parmhat, parmhat+1, 411 | parmci,parmci+1, 412 | parmci+2,parmci+3, 413 | &sign, &alpha, &iftype, &fitting_size, 414 | &translate_amount, &small_score, &scores_to_drop); 415 | isvalid=true; 416 | ftype = (MR_fitting_type) iftype; 417 | } 418 | } 419 | 420 | 421 | void MetaRecognition::Save(char* filename) const 422 | { 423 | FILE* fp = fopen(filename,"w"); 424 | if(fp) { 425 | Save(fp); 426 | fclose(fp); 427 | } else if(strlen(filename)>0) 428 | fprintf(stderr,"SaveWeibull could not open file |%s|\n",filename); 429 | else fprintf(stderr,"SaveWeibull called with null filename\n"); 430 | } 431 | 432 | void MetaRecognition::Load(char* filename){ 433 | FILE* fp = fopen(filename,"r"); 434 | isvalid=false; 435 | if(fp) { 436 | Load(fp); 437 | isvalid=true; 438 | fclose(fp); 439 | } else if(strlen(filename)>0) 440 | fprintf(stderr,"LoadWeibull could not open file |%s|\n",filename); 441 | else fprintf(stderr,"LoadWeibull called with null filename\n"); 442 | 443 | } 444 | 445 | std::string MetaRecognition::to_string() { 446 | std::stringstream oss; 447 | this->Save(oss); 448 | return oss.str(); 449 | } 450 | void MetaRecognition::from_string(std::string input) { 451 | std::stringstream iss(input); 452 | this->Load(iss); 453 | } 454 | 455 | 456 | int MetaRecognition::set_fitting_size(int nsize){ isvalid=false; return fitting_size=nsize;} 457 | int MetaRecognition::get_fitting_size(){ return fitting_size;} 458 | int MetaRecognition::get_translate_amount(){ return translate_amount;} 459 | int MetaRecognition::set_translate_amount(int ntrans) {isvalid=false; return translate_amount=ntrans;} 460 | double MetaRecognition::get_small_score(){return small_score;} 461 | double MetaRecognition::set_small_score(double nscore){isvalid=false; return small_score=nscore;} 462 | int MetaRecognition::get_sign(){return sign;} 463 | int MetaRecognition::set_sign(int nsign){return sign=nsign;} 464 | -------------------------------------------------------------------------------- /eval/eval_detection.py: -------------------------------------------------------------------------------- 1 | # This code is originally from the official ActivityNet repo 2 | # https://github.com/activitynet/ActivityNet 3 | # Small modification from ActivityNet Code 4 | from __future__ import print_function 5 | import json 6 | import numpy as np 7 | import pandas as pd 8 | from joblib import Parallel, delayed 9 | from scipy.signal import savgol_filter, medfilt 10 | import sys 11 | import scipy.io as sio 12 | import os 13 | from eval.utils_eval import get_blocked_videos 14 | from eval.utils_eval import interpolated_prec_rec 15 | from eval.utils_eval import segment_iou 16 | import pdb 17 | 18 | 19 | def str2ind(categoryname, classlist): 20 | return [i for i in range(len(classlist)) if categoryname == classlist[i]][0] 21 | 22 | 23 | def strlist2indlist(strlist, classlist): 24 | return [str2ind(s, classlist) for s in strlist] 25 | 26 | 27 | def sigmoid(x, eps=1e-10): 28 | return 1 / (1 + np.exp(-x) + eps) 29 | 30 | 31 | def smooth(v, order=2, lens=200): 32 | # return v 33 | l = min(lens, len(v)) 34 | l = l - (1 - l % 2) 35 | if len(v) <= order: 36 | return v 37 | return savgol_filter(v, l, order) 38 | 39 | 40 | def smooth_medfilt(v, lens=200): 41 | l = min(lens, len(v)) 42 | l = l - (1 - l % 2) 43 | if len(v) <= lens: 44 | return v 45 | return medfilt(v, l) 46 | 47 | 48 | def filter_segments(segment_predict, videonames, ambilist): 49 | ind = np.zeros(np.shape(segment_predict)[0]) 50 | for i in range(np.shape(segment_predict)[0]): 51 | vn = videonames[int(segment_predict[i, 0])] 52 | for a in ambilist: 53 | if a[0] == vn: 54 | gt = range( 55 | int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16)) 56 | ) 57 | gt = range( 58 | int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16)) 59 | ) 60 | pd = range(int(segment_predict[i][1]), int(segment_predict[i][2])) 61 | IoU = float(len(set(gt).intersection(set(pd)))) / float( 62 | len(set(gt).union(set(pd))) 63 | ) 64 | if IoU > 0: 65 | ind[i] = 1 66 | s = [ 67 | segment_predict[i, :] 68 | for i in range(np.shape(segment_predict)[0]) 69 | if ind[i] == 0 70 | ] 71 | return np.array(s) 72 | 73 | 74 | def moving_smooth(y, box_size): 75 | assert box_size % 2 == 1, 'The bosx size should be ood' 76 | box = np.ones(box_size) / box_size 77 | y = np.array([y[0]] * (box_size // 2) + y.tolist() + [y[-1]] * (box_size // 2)) 78 | y_smooth = np.convolve(y, box, mode='valid') 79 | return y_smooth 80 | 81 | 82 | def gaussian_smooth(score, sigma=30): 83 | # r = score.shape[0] //39 84 | # if r%2==0: 85 | # r+=1 86 | r = 125 87 | if r > score.shape[0] // 2: 88 | r = score.shape[0] // 2 - 1 89 | if r % 2 == 0: 90 | r += 1 91 | gaussian_temp = np.ones(r * 2 - 1) 92 | for i in range(r * 2 - 1): 93 | gaussian_temp[i] = np.exp(-(i - r) ** 2 / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi)) 94 | new_score = score 95 | for i in range(r, score.shape[0] - r): 96 | new_score[i] = np.dot(score[i - r:i + r - 1], gaussian_temp) 97 | return new_score 98 | 99 | 100 | def min_max_norm(p): 101 | min_p = np.min(p) 102 | max_p = np.max(p) 103 | return (p - min_p) / (max_p - min_p) 104 | 105 | 106 | class ANETdetection(object): 107 | 108 | def __init__( 109 | self, 110 | annotation_path='./Thumos14reduced-Annotations', 111 | tiou_thresholds=np.array([0.1, 0.3, 0.5]), 112 | args=None, 113 | subset="test", 114 | verbose=False 115 | ): 116 | # if args.validate: 117 | # self.subset = 'validation' 118 | # else: 119 | # self.subset = 'test' 120 | self.subset = subset 121 | self.args = args 122 | self.tiou_thresholds = tiou_thresholds 123 | self.verbose = verbose 124 | self.ap = None 125 | self.annotation_path = os.path.join(args.path_dataset, annotation_path) 126 | self.prediction = None 127 | # 传入已知类个数 128 | self.n_known_class = args.n_known_class 129 | 130 | self._import_ground_truth(self.annotation_path) 131 | 132 | def _import_ground_truth(self, annotation_path): 133 | gtsegments = np.load(annotation_path + "/segments.npy", allow_pickle=True) 134 | gtlabels = np.load(annotation_path + "/labels.npy", allow_pickle=True) 135 | videoname = np.load(annotation_path + "/videoname.npy", allow_pickle=True) 136 | videoname = np.array([i.decode("utf8") for i in videoname]) 137 | subset = np.load(annotation_path + "/subset.npy", allow_pickle=True) 138 | subset = np.array([s.decode("utf-8") for s in subset]) 139 | # classlist = np.load(annotation_path + "/classlist.npy", allow_pickle=True) 140 | # classlist = np.array([c.decode("utf-8") for c in classlist]) 141 | # classlist = np.load("./new_classlist.npy", allow_pickle=True) 142 | classlist = self.args.classlist 143 | duration = np.load(annotation_path + "/duration.npy", allow_pickle=True) 144 | ambilist = annotation_path + "/Ambiguous_test.txt" 145 | 146 | try: 147 | ambilist = list(open(ambilist, "r")) 148 | ambilist = [a.strip("\n").split(" ") for a in ambilist] 149 | except: 150 | ambilist = [] 151 | 152 | self.ambilist = ambilist 153 | self.classlist = classlist 154 | 155 | subset_ind = (subset == self.subset) 156 | gtsegments = gtsegments[subset_ind] 157 | gtlabels = gtlabels[subset_ind] 158 | videoname = videoname[subset_ind] 159 | duration = duration[subset_ind] 160 | 161 | # 数据清洗:删除长度为0的segment。 162 | self.idx_to_take = [i for i, s in enumerate(gtsegments) 163 | if len(s) > 0] 164 | gtsegments = gtsegments[self.idx_to_take] 165 | gtlabels = gtlabels[self.idx_to_take] 166 | videoname = videoname[self.idx_to_take] 167 | 168 | # 计算Ground Truth的先验分布 169 | # prior = np.zeros((20, 1000)) 170 | # for vid_idx in range(gtsegments.shape[0]): 171 | # for seg_idx in range(len(gtsegments[vid_idx])): 172 | # start_time = gtsegments[vid_idx][seg_idx][0] 173 | # end_time = gtsegments[vid_idx][seg_idx][1] 174 | # vid_duration = duration[vid_idx][0] 175 | # start_time_percentage = round(start_time / vid_duration * 1000) 176 | # end_time_percentage = round(end_time / vid_duration * 1000) 177 | # seg_label = str2ind(gtlabels[vid_idx][seg_idx], self.classlist) 178 | # prior[seg_label][start_time_percentage:end_time_percentage-1] += 1 179 | # np.save('./prior.npy', prior) 180 | 181 | self.videoname = videoname 182 | # which categories have temporal labels ? 183 | # templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl]))) 184 | 185 | # the number index for those categories. 186 | # templabelidx = [] 187 | # for t in templabelcategories: 188 | # templabelidx.append(str2ind(t, classlist)) 189 | 190 | video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], [] 191 | 192 | for i in range(len(gtsegments)): 193 | for j in range(len(gtsegments[i])): 194 | video_lst.append(str(videoname[i])) 195 | t_start_lst.append(round(gtsegments[i][j][0] * 25 / 16)) 196 | t_end_lst.append(round(gtsegments[i][j][1] * 25 / 16)) 197 | # 如果segment类别为Unknown类,将其类别置为15(第16类) 198 | this_label = str2ind(gtlabels[i][j], self.classlist) 199 | if this_label > self.n_known_class - 1: 200 | this_label = self.n_known_class 201 | # label_lst.append(str2ind(gtlabels[i][j], self.classlist)) 202 | label_lst.append(this_label) 203 | ground_truth = pd.DataFrame( 204 | { 205 | "video-id": video_lst, 206 | "t-start": t_start_lst, 207 | "t-end": t_end_lst, 208 | "label": label_lst, 209 | } 210 | ) 211 | self.ground_truth = ground_truth 212 | # self.activity_index = {i: templabelidx[i] for i in range(len(templabelidx))} 213 | 214 | def get_topk_mean(self, x, k, axis=0): 215 | return np.mean(np.sort(x, axis=axis)[-int(k):, :], axis=0) 216 | 217 | def _get_vid_score(self, pred): 218 | # pred : (n, class) 219 | if self.args is None: 220 | k = 8 221 | topk_mean = self.get_topk_mean(pred, k) 222 | # ind = topk_mean > -50 223 | return pred, topk_mean 224 | 225 | win_size = int(self.args.topk) 226 | split_list = [i * win_size for i in range(1, int(pred.shape[0] // win_size))] 227 | splits = np.split(pred, split_list, axis=0) 228 | 229 | tops = [] 230 | # select the avg over topk2 segments in each window 231 | for each_split in splits: 232 | top_mean = self.get_topk_mean(each_split, self.args.topk2) 233 | tops.append(top_mean) 234 | tops = np.array(tops) 235 | c_s = np.max(tops, axis=0) 236 | return pred, c_s 237 | 238 | def _get_vid_score_1(self, p): 239 | pp = - p 240 | [pp[:, i].sort() for i in range(np.shape(pp)[1])] 241 | pp = -pp 242 | if int(np.shape(pp)[0] / 8) > 0: 243 | c_s = np.mean(pp[:int(np.shape(pp)[0] / 8), :], axis=0) 244 | else: 245 | c_s = np.mean(pp[:np.shape(pp)[0], :], axis=0) 246 | return p, c_s 247 | 248 | def _get_att_topk_mean(self, p, att_logits, k): 249 | args_topk = np.argsort(att_logits, axis=0)[-k:] 250 | topk_mean = 1 / (1 + np.exp(-np.mean(att_logits[args_topk], axis=0))) * 1 / ( 251 | 1 + np.exp(-np.mean(p[args_topk], axis=0))) 252 | return topk_mean 253 | 254 | def _get_vid_score_2(self, p, att_logits): 255 | if self.args is None: 256 | k = 8 257 | topk_mean = self._get_att_topk_mean(p, att_logits, k) 258 | return p, topk_mean 259 | win_size = int(self.args.topk) 260 | split_list = [i * win_size for i in range(1, int(p.shape[0] // win_size))] 261 | p_splits = np.split(p, split_list, axis=0) 262 | att_splits = np.split(att_logits, split_list, axis=0) 263 | 264 | tops = [] 265 | for p_s, a_s in zip(p_splits, att_splits): 266 | top_mean = self._get_att_topk_mean(p_s, a_s, self.args.topk2) 267 | tops.append(top_mean) 268 | tops = np.array(tops) 269 | c_s = np.max(tops, axis=0) 270 | return p, c_s 271 | 272 | def OIC_Cofidence(self, s, e, cls_pred, c_s, _lambda=0.25): 273 | for i in range(len(s)): 274 | seg = cls_pred[s[i]:e[i]] 275 | inner_score = np.mean(seg) 276 | proposal_len = e[i] - s[i] 277 | outer_s = max(0, int(s[i] - proposal_len * _lambda)) 278 | outer_e = min(cls_pred.shape[0], int(e[i] - proposal_len * _lambda)) 279 | 280 | front_outer_score = np.mean(cls_pred[outer_s:s[i]]) 281 | back_outer_score = np.mean(cls_pred[e[i]:outer_e]) 282 | 283 | # def _get_predictions_with_label(self, prediction_by_label, label_name, cidx): 284 | def _get_predictions_with_label(self, prediction_by_label, cidx): 285 | """Get all predicitons of the given label. Return empty DataFrame if there 286 | is no predcitions with the given label. 287 | """ 288 | try: 289 | return prediction_by_label.get_group(cidx).reset_index(drop=True) 290 | except: 291 | # print("Warning: No predictions of label '%s' were provdied." % label_name) 292 | print("Warning: No predictions of label '%s' were provdied." % cidx) 293 | return pd.DataFrame() 294 | 295 | def wrapper_compute_average_precision(self): 296 | """Computes average precision for each class in the subset. 297 | """ 298 | # ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index))) 299 | ap = np.zeros((len(self.tiou_thresholds), self.n_known_class + 1)) 300 | 301 | # Adaptation to query faster 302 | ground_truth_by_label = self.ground_truth.groupby("label") 303 | prediction_by_label = self.prediction.groupby("label") 304 | 305 | results = Parallel(n_jobs=3)( 306 | delayed(compute_average_precision_detection)( 307 | ground_truth=ground_truth_by_label.get_group(cidx).reset_index( 308 | drop=True 309 | ), 310 | prediction=self._get_predictions_with_label( 311 | prediction_by_label, cidx 312 | ), 313 | tiou_thresholds=self.tiou_thresholds, 314 | ) 315 | for cidx in range(self.n_known_class + 1) 316 | ) 317 | 318 | for cidx in range(self.n_known_class + 1): 319 | ap[:, cidx] = results[cidx] 320 | 321 | # results = Parallel(n_jobs=3)( 322 | # delayed(compute_average_precision_detection)( 323 | # ground_truth=ground_truth_by_label.get_group(cidx).reset_index( 324 | # drop=True 325 | # ), 326 | # prediction=self._get_predictions_with_label( 327 | # prediction_by_label, label_name, cidx 328 | # ), 329 | # tiou_thresholds=self.tiou_thresholds, 330 | # ) 331 | # for label_name, cidx in self.activity_index.items() 332 | # ) 333 | 334 | # for i, cidx in enumerate(self.activity_index.values()): 335 | # ap[:, cidx] = results[i] 336 | 337 | return ap 338 | 339 | def evaluate(self): 340 | """Evaluates a prediction file. For the detection task we measure the 341 | interpolated mean average precision to measure the performance of a 342 | method. 343 | """ 344 | if self.verbose: 345 | # print("[INIT] Loaded annotations from {} subset.".format(self.subset)) 346 | nr_gt = len(self.ground_truth) 347 | print("\tNumber of ground truth instances: {}".format(nr_gt)) 348 | nr_pred = len(self.prediction) 349 | print("\tNumber of predictions: {}".format(nr_pred)) 350 | # print("\tFixed threshold for tiou score: {}".format(self.tiou_thresholds)) 351 | 352 | self.ap = self.wrapper_compute_average_precision() 353 | 354 | # self.mAP = self.ap.mean(axis=1) 355 | # self.average_mAP = self.mAP.mean() 356 | 357 | # Known类别mAP 358 | known_mAP = self.ap[:, :-1].mean(axis=1) 359 | known_average_mAP = known_mAP.mean() 360 | 361 | # Unknown类别mAP 362 | unknown_mAP = self.ap[:, -1] 363 | unknown_average_mAP = unknown_mAP.mean() 364 | 365 | # if self.verbose: 366 | # # print ('[RESULTS] Performance on ActivityNet detection task.') 367 | # for k in range(len(self.tiou_thresholds)): 368 | # print("Detection map @ %f = %f" % (self.tiou_thresholds[k], self.mAP[k])) 369 | # print("Average-mAP: {}\n".format(self.mAP)) 370 | # return self.mAP 371 | 372 | return known_mAP, unknown_mAP 373 | 374 | def save_info(self, fname): 375 | import pickle 376 | Dat = { 377 | "prediction": self.prediction, 378 | "gt": self.ground_truth 379 | } 380 | with open(fname, 'wb') as fp: 381 | pickle.dump(Dat, fp) 382 | 383 | 384 | def compute_average_precision_detection( 385 | ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10) 386 | ): 387 | """Compute average precision (detection task) between ground truth and 388 | predictions data frames. If multiple predictions occurs for the same 389 | predicted segment, only the one with highest score is matches as 390 | true positive. This code is greatly inspired by Pascal VOC devkit. 391 | 392 | Parameters 393 | ---------- 394 | ground_truth : df 395 | Data frame containing the ground truth instances. 396 | Required fields: ['video-id', 't-start', 't-end'] 397 | prediction : df 398 | Data frame containing the prediction instances. 399 | Required fields: ['video-id, 't-start', 't-end', 'score'] 400 | tiou_thresholds : 1darray, optional 401 | Temporal intersection over union threshold. 402 | 403 | Outputs 404 | ------- 405 | ap : float 406 | Average precision score. 407 | """ 408 | ap = np.zeros(len(tiou_thresholds)) 409 | if prediction.empty: 410 | return ap 411 | 412 | npos = float(len(ground_truth)) 413 | lock_gt = np.ones((len(tiou_thresholds), len(ground_truth))) * -1 414 | # Sort predictions by decreasing score order. 415 | sort_idx = prediction["score"].values.argsort()[::-1] # idx from high to low 416 | prediction = prediction.loc[sort_idx].reset_index(drop=True) # value from high to low 417 | 418 | # Initialize true positive and false positive vectors. 419 | tp = np.zeros((len(tiou_thresholds), len(prediction))) 420 | fp = np.zeros((len(tiou_thresholds), len(prediction))) 421 | 422 | # Adaptation to query faster 423 | ground_truth_gbvn = ground_truth.groupby("video-id") 424 | 425 | # Assigning true positive to truly grount truth instances. 426 | for idx, this_pred in prediction.iterrows(): 427 | 428 | try: 429 | # Check if there is at least one ground truth in the video associated. 430 | ground_truth_videoid = ground_truth_gbvn.get_group(this_pred["video-id"]) 431 | except Exception as e: 432 | fp[:, idx] = 1 433 | continue 434 | 435 | this_gt = ground_truth_videoid.reset_index() 436 | tiou_arr = segment_iou( 437 | this_pred[["t-start", "t-end"]].values, this_gt[["t-start", "t-end"]].values 438 | ) 439 | # We would like to retrieve the predictions with highest tiou score. 440 | tiou_sorted_idx = tiou_arr.argsort()[::-1] 441 | for tidx, tiou_thr in enumerate(tiou_thresholds): 442 | for jdx in tiou_sorted_idx: 443 | if tiou_arr[jdx] < tiou_thr: 444 | fp[tidx, idx] = 1 445 | break 446 | if lock_gt[tidx, this_gt.loc[jdx]["index"]] >= 0: 447 | continue 448 | # Assign as true positive after the filters above. 449 | tp[tidx, idx] = 1 450 | lock_gt[tidx, this_gt.loc[jdx]["index"]] = idx 451 | break 452 | 453 | if fp[tidx, idx] == 0 and tp[tidx, idx] == 0: 454 | fp[tidx, idx] = 1 455 | 456 | tp_cumsum = np.cumsum(tp, axis=1).astype(float) 457 | fp_cumsum = np.cumsum(fp, axis=1).astype(float) 458 | recall_cumsum = tp_cumsum / npos 459 | 460 | precision_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum) 461 | 462 | for tidx in range(len(tiou_thresholds)): 463 | ap[tidx] = interpolated_prec_rec( 464 | precision_cumsum[tidx, :], recall_cumsum[tidx, :] 465 | ) 466 | 467 | return ap 468 | -------------------------------------------------------------------------------- /wsad_dataset.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | 5 | import numpy as np 6 | 7 | import options 8 | import utils.wsad_utils as utils 9 | 10 | 11 | class SampleDataset: 12 | def __init__(self, args, mode="both", sampling='random'): 13 | self.args = args 14 | self.dataset_name = args.dataset_name 15 | self.num_class = args.num_class 16 | self.sampling = sampling 17 | self.num_segments = args.max_seqlen 18 | self.feature_size = args.feature_size 19 | self.path_to_features = os.path.join(args.path_dataset, self.dataset_name + "-I3D-JOINTFeatures.npy") 20 | self.path_to_annotations = os.path.join(args.path_dataset, self.dataset_name + "-Annotations/") 21 | self.features = np.load( 22 | self.path_to_features, encoding="bytes", allow_pickle=True 23 | ) 24 | self.segments = np.load( 25 | self.path_to_annotations + "segments.npy", allow_pickle=True 26 | ) 27 | self.labels = np.load( 28 | self.path_to_annotations + "labels_all.npy", allow_pickle=True 29 | ) 30 | # Specific to Thumos14 31 | 32 | self._labels = np.load( 33 | self.path_to_annotations + "labels.npy", allow_pickle=True 34 | ) 35 | # self.classlist = np.load( 36 | # self.path_to_annotations + "classlist.npy", allow_pickle=True 37 | # ) 38 | self.subset = np.load( 39 | self.path_to_annotations + "subset.npy", allow_pickle=True 40 | ) 41 | self.videonames = np.load( 42 | self.path_to_annotations + "videoname.npy", allow_pickle=True 43 | ) 44 | 45 | split_path = f'./thumos_splits/split_{args.split_idx}' 46 | # split_path = f'./activitynet_splits/split_{args.split_idx}' 47 | # 从txt文件读入Known类别 48 | self.known_classes = [] 49 | # with open('./split_0/Class_Known.txt', 'rb') as file: 50 | with open(os.path.join(split_path, 'Class_Known.txt'), 'rb') as file: 51 | for line in file.readlines(): 52 | self.known_classes.append(line.decode().strip()) 53 | 54 | # 从txt文件读入Unknown类别 55 | self.unknown_classes = [] 56 | # with open('./split_0/Class_Unknown.txt', 'rb') as file: 57 | with open(os.path.join(split_path, 'Class_Unknown.txt'), 'rb') as file: 58 | for line in file.readlines(): 59 | self.unknown_classes.append(line.decode().strip()) 60 | 61 | # 组织新的classlist 62 | self.classlist = self.known_classes + self.unknown_classes 63 | args.classlist = self.classlist 64 | # np.save('./new_classlist.npy', self.classlist) 65 | 66 | self.batch_size = args.batch_size 67 | self.trainidx = [] 68 | self.testidx = [] 69 | self.classwiseidx = [] 70 | self.currenttestidx = 0 71 | self.currenttrainidx = 0 72 | 73 | # 原作用是将string形式的标签转化为multi-hot形式。 74 | # 注意:multi-hot标签需要按新的classlist排序,使前15类是Known类别,后5类是Unknown类别。 75 | # 训练集中,只看前15项标签即可。测试集中,将后5项标签归为1类即可。 76 | self.labels_multihot = [ 77 | utils.strlist2multihot(labs, self.classlist) 78 | for labs in self.labels 79 | ] 80 | 81 | # 原作用是划分训练集和测试集。注意:训练集中,只保留含有Known类别动作的视频 82 | self.train_test_idx() 83 | 84 | np.save('train_video_names_split_' + str(args.split_idx) + '.npy', self.videonames[self.trainidx]) 85 | 86 | # 原作用是将训练集数据按类别进行划分。注意:训练集中,只看Known类别。 87 | self.classwise_feature_mapping() 88 | 89 | self.normalize = False 90 | self.mode = mode 91 | if mode == "rgb" or mode == "flow": 92 | self.feature_size = 1024 93 | 94 | def train_test_idx(self): 95 | for i, s in enumerate(self.subset): 96 | # Specific to Thumos14 97 | if s.decode("utf-8") == "validation" and list(set(self.labels[i]) & set(self.known_classes)): 98 | self.trainidx.append(i) 99 | elif s.decode("utf-8") == "test": 100 | self.testidx.append(i) 101 | 102 | def classwise_feature_mapping(self): 103 | # for category in self.classlist: 104 | for category in self.known_classes: 105 | idx = [] 106 | for i in self.trainidx: 107 | for label in self.labels[i]: 108 | # if label == category.decode("utf-8"): 109 | if label == category: 110 | idx.append(i) 111 | break 112 | self.classwiseidx.append(idx) 113 | 114 | def load_data_for_threshold(self): 115 | labs = self.labels_multihot[self.trainidx[self.currenttrainidx]] 116 | feat = self.features[self.trainidx[self.currenttrainidx]] 117 | vn = self.videonames[self.trainidx[self.currenttrainidx]] 118 | if self.currenttrainidx == len(self.trainidx) - 1: 119 | done = True 120 | self.currenttrainidx = 0 121 | else: 122 | done = False 123 | self.currenttrainidx += 1 124 | feat = np.array(feat) 125 | if self.mode == "rgb": 126 | feat = feat[..., : self.feature_size] 127 | elif self.mode == "flow": 128 | feat = feat[..., self.feature_size:] 129 | return feat, np.array(labs), vn, done 130 | 131 | def load_data(self, n_similar=0, is_training=True, similar_size=2): 132 | if is_training: 133 | idx = [] 134 | 135 | # Load similar pairs 136 | if n_similar != 0: 137 | rand_classid = np.random.choice( 138 | len(self.classwiseidx), size=n_similar 139 | ) 140 | for rid in rand_classid: 141 | rand_sampleid = np.random.choice( 142 | len(self.classwiseidx[rid]), 143 | size=similar_size, 144 | replace=False, 145 | ) 146 | 147 | for k in rand_sampleid: 148 | idx.append(self.classwiseidx[rid][k]) 149 | 150 | # Load rest pairs 151 | if self.batch_size - similar_size * n_similar < 0: 152 | self.batch_size = similar_size * n_similar 153 | 154 | rand_sampleid = np.random.choice( 155 | len(self.trainidx), 156 | size=self.batch_size - similar_size * n_similar, 157 | ) 158 | 159 | for r in rand_sampleid: 160 | idx.append(self.trainidx[r]) 161 | feat = [] 162 | for i in idx: 163 | ifeat = self.features[i] 164 | if self.sampling == 'random': 165 | sample_idx = self.random_perturb(ifeat.shape[0]) 166 | elif self.sampling == 'uniform': 167 | sample_idx = self.uniform_sampling(ifeat.shape[0]) 168 | elif self.sampling == "all": 169 | sample_idx = np.arange(ifeat.shape[0]) 170 | else: 171 | raise AssertionError('Not supported sampling !') 172 | ifeat = ifeat[sample_idx] 173 | feat.append(ifeat) 174 | feat = np.array(feat) 175 | n_known_class = len(self.known_classes) 176 | labels = np.array([self.labels_multihot[i][:n_known_class] for i in idx]) 177 | if self.mode == "rgb": 178 | feat = feat[..., : self.feature_size] 179 | elif self.mode == "flow": 180 | feat = feat[..., self.feature_size:] 181 | return feat, labels, rand_sampleid 182 | 183 | else: 184 | labs = self.labels_multihot[self.testidx[self.currenttestidx]] 185 | feat = self.features[self.testidx[self.currenttestidx]] 186 | # feat = utils.process_feat(feat, normalize=self.normalize) 187 | # feature = feature[sample_idx] 188 | vn = self.videonames[self.testidx[self.currenttestidx]] 189 | if self.currenttestidx == len(self.testidx) - 1: 190 | done = True 191 | self.currenttestidx = 0 192 | else: 193 | done = False 194 | self.currenttestidx += 1 195 | feat = np.array(feat) 196 | if self.mode == "rgb": 197 | feat = feat[..., : self.feature_size] 198 | elif self.mode == "flow": 199 | feat = feat[..., self.feature_size:] 200 | return feat, np.array(labs), vn, done 201 | 202 | def random_avg(self, x, segm=None): 203 | if len(x) < self.num_segments: 204 | ind = self.random_perturb(len(x)) 205 | x_n = x[ind] 206 | segm = segm[ind] if segm is not None else None 207 | return x_n, segm 208 | else: 209 | inds = np.array_split(np.arange(len(x)), self.num_segments) 210 | x_n = np.zeros((self.num_segments, x.shape[-1])).astype(x.dtype) 211 | segm_n = np.zeros( 212 | (self.num_segments, segm.shape[-1])).astype(x.dtype) 213 | for i, ind in enumerate(inds): 214 | x_n[i] = np.mean(x[ind], axis=0) 215 | if segm is not None: 216 | segm_n[i] = segm[(ind[0] + ind[-1]) // 2] 217 | return x_n, segm_n if segm is not None else None 218 | 219 | def random_pad(self, x, segm=None): 220 | length = self.num_segments 221 | if x.shape[0] > length: 222 | strt = np.random.randint(0, x.shape[0] - length) 223 | x_ret = x[strt:strt + length] 224 | if segm is not None: 225 | segm = segm[strt:strt + length] 226 | return x_ret, segm 227 | elif x.shape[0] == length: 228 | return x, segm 229 | else: 230 | pad_len = length - x.shape[0] 231 | x_ret = np.pad(x, ((0, pad_len), (0, 0)), mode='constant') 232 | if segm is not None: 233 | segm = np.pad(segm, ((0, pad_len), (0, 0)), mode='constant') 234 | return x_ret, segm 235 | 236 | def random_perturb(self, length): 237 | if self.num_segments == length: 238 | return np.arange(self.num_segments).astype(int) 239 | samples = np.arange(self.num_segments) * length / self.num_segments 240 | for i in range(self.num_segments): 241 | if i < self.num_segments - 1: 242 | if int(samples[i]) != int(samples[i + 1]): 243 | samples[i] = np.random.choice( 244 | range(int(samples[i]), 245 | int(samples[i + 1]) + 1)) 246 | else: 247 | samples[i] = int(samples[i]) 248 | else: 249 | if int(samples[i]) < length - 1: 250 | samples[i] = np.random.choice( 251 | range(int(samples[i]), length)) 252 | else: 253 | samples[i] = int(samples[i]) 254 | return samples.astype(int) 255 | 256 | def uniform_sampling(self, length): 257 | if self.num_segments == length: 258 | return np.arange(self.num_segments).astype(int) 259 | samples = np.arange(self.num_segments) * length / self.num_segments 260 | samples = np.floor(samples) 261 | return samples.astype(int) 262 | 263 | 264 | class AntSampleDataset: 265 | def __init__(self, args, mode="both", sampling='random'): 266 | self.dataset_name = args.dataset_name 267 | self.num_class = args.num_class 268 | self.sampling = sampling 269 | self.num_segments = args.max_seqlen 270 | self.feature_size = args.feature_size 271 | self.path_to_features = os.path.join(args.path_dataset, self.dataset_name + "-I3D-JOINTFeatures.npy") 272 | self.path_to_annotations = os.path.join(args.path_dataset, self.dataset_name + "-Annotations/") 273 | self.features = np.load( 274 | self.path_to_features, encoding="bytes", allow_pickle=True 275 | ) 276 | self.segments = np.load( 277 | self.path_to_annotations + "segments.npy", allow_pickle=True 278 | ) 279 | self.labels = np.load( 280 | self.path_to_annotations + "labels_all.npy", allow_pickle=True 281 | ) 282 | # Specific to Thumos14 283 | 284 | self._labels = np.load( 285 | self.path_to_annotations + "labels.npy", allow_pickle=True 286 | ) 287 | # self.classlist = np.load( 288 | # self.path_to_annotations + "classlist.npy", allow_pickle=True 289 | # ) 290 | self.subset = np.load( 291 | self.path_to_annotations + "subset.npy", allow_pickle=True 292 | ) 293 | self.videonames = np.load( 294 | self.path_to_annotations + "videoname.npy", allow_pickle=True 295 | ) 296 | self.batch_size = args.batch_size 297 | self.t_max = args.max_seqlen 298 | 299 | split_path = f'./activitynet_splits/split_{args.split_idx}' 300 | # 从txt文件读入Known类别 301 | self.known_classes = [] 302 | with open(os.path.join(split_path, 'Class_Known.txt'), 'rb') as file: 303 | for line in file.readlines(): 304 | self.known_classes.append(line.decode().strip()) 305 | 306 | # 从txt文件读入Unknown类别 307 | self.unknown_classes = [] 308 | with open(os.path.join(split_path, 'Class_Unknown.txt'), 'rb') as file: 309 | for line in file.readlines(): 310 | self.unknown_classes.append(line.decode().strip()) 311 | 312 | # 组织新的classlist,格式为string list,保存为new_classlist.npy,以供其它模块读取 313 | self.classlist = self.known_classes + self.unknown_classes 314 | np.save('./new_classlist.npy', self.classlist) 315 | 316 | self.trainidx = [] 317 | self.testidx = [] 318 | self.classwiseidx = [] 319 | self.currenttestidx = 0 320 | self.labels_multihot = [ 321 | utils.strlist2multihot(labs, self.classlist) 322 | for labs in self.labels 323 | ] 324 | try: 325 | ambilist = self.path_to_annotations + "/Ambiguous_test.txt" 326 | ambilist = list(open(ambilist, "r")) 327 | ambilist = [a.strip("\n").split(" ")[0] for a in ambilist] 328 | except: 329 | ambilist = [] 330 | self.train_test_idx() 331 | self.classwise_feature_mapping() 332 | 333 | self.normalize = False 334 | self.mode = mode 335 | if mode == "rgb" or mode == "flow": 336 | self.feature_size = 1024 337 | self.filter() 338 | 339 | def filter(self): 340 | new_testidx = [] 341 | for idx in self.testidx: 342 | feat = self.features[idx] 343 | if len(feat) > 10: 344 | new_testidx.append(idx) 345 | self.testidx = new_testidx 346 | 347 | new_trainidx = [] 348 | for idx in self.trainidx: 349 | feat = self.features[idx] 350 | if len(feat) > 10: 351 | new_trainidx.append(idx) 352 | self.trainidx = new_trainidx 353 | 354 | def train_test_idx(self): 355 | for i, s in enumerate(self.subset): 356 | if s.decode("utf-8") == "training" and list(set(self.labels[i]) & set(self.known_classes)): 357 | self.trainidx.append(i) 358 | elif s.decode("utf-8") == "validation": 359 | self.testidx.append(i) 360 | 361 | def classwise_feature_mapping(self): 362 | # for category in self.classlist: 363 | for category in self.known_classes: 364 | idx = [] 365 | for i in self.trainidx: 366 | if self.features[i].sum() == 0: 367 | continue 368 | for label in self.labels[i]: 369 | # if label == category.decode("utf-8"): 370 | if label == category: 371 | idx.append(i) 372 | break 373 | self.classwiseidx.append(idx) 374 | 375 | def load_data(self, n_similar=0, is_training=True, similar_size=2): 376 | if is_training: 377 | labels = [] 378 | idx = [] 379 | # Load similar pairs 380 | if n_similar != 0: 381 | rand_classid = np.random.choice( 382 | len(self.classwiseidx), size=n_similar 383 | ) 384 | for rid in rand_classid: 385 | rand_sampleid = np.random.choice( 386 | len(self.classwiseidx[rid]), 387 | size=similar_size, 388 | replace=False, 389 | ) 390 | 391 | for k in rand_sampleid: 392 | idx.append(self.classwiseidx[rid][k]) 393 | 394 | # Load rest pairs 395 | if self.batch_size - similar_size * n_similar < 0: 396 | self.batch_size = similar_size * n_similar 397 | 398 | rand_sampleid = np.random.choice( 399 | len(self.trainidx), 400 | size=self.batch_size - similar_size * n_similar, 401 | ) 402 | 403 | for r in rand_sampleid: 404 | idx.append(self.trainidx[r]) 405 | feat = [] 406 | for i in idx: 407 | ifeat = self.features[i] 408 | if self.sampling == 'random': 409 | sample_idx = self.random_perturb(ifeat.shape[0]) 410 | elif self.sampling == 'uniform': 411 | sample_idx = self.uniform_sampling(ifeat.shape[0]) 412 | elif self.sampling == "all": 413 | sample_idx = np.arange(ifeat.shape[0]) 414 | else: 415 | raise AssertionError('Not supported sampling !') 416 | ifeat = ifeat[sample_idx] 417 | feat.append(ifeat) 418 | feat = np.array(feat) 419 | 420 | n_known_class = len(self.known_classes) 421 | labels = np.array([self.labels_multihot[i][:n_known_class] for i in idx]) 422 | 423 | # labels = np.array([self.labels_multihot[i] for i in idx]) 424 | if self.mode == "rgb": 425 | feat = feat[..., : self.feature_size] 426 | elif self.mode == "flow": 427 | feat = feat[..., self.feature_size:] 428 | return feat, labels, rand_sampleid 429 | 430 | else: 431 | labs = self.labels_multihot[self.testidx[self.currenttestidx]] 432 | feat = self.features[self.testidx[self.currenttestidx]] 433 | # feat = utils.process_feat(feat, normalize=self.normalize) 434 | # feature = feature[sample_idx] 435 | vn = self.videonames[self.testidx[self.currenttestidx]] 436 | if self.currenttestidx == len(self.testidx) - 1: 437 | done = True 438 | self.currenttestidx = 0 439 | else: 440 | done = False 441 | self.currenttestidx += 1 442 | feat = np.array(feat) 443 | if self.mode == "rgb": 444 | feat = feat[..., : self.feature_size] 445 | elif self.mode == "flow": 446 | feat = feat[..., self.feature_size:] 447 | return feat, np.array(labs), vn, done 448 | 449 | def random_avg(self, x, segm=None): 450 | if len(x) < self.num_segments: 451 | ind = self.random_perturb(len(x)) 452 | x_n = x[ind] 453 | segm = segm[ind] if segm is not None else None 454 | return x_n, segm 455 | else: 456 | inds = np.array_split(np.arange(len(x)), self.num_segments) 457 | x_n = np.zeros((self.num_segments, x.shape[-1])).astype(x.dtype) 458 | segm_n = np.zeros( 459 | (self.num_segments, segm.shape[-1])).astype(x.dtype) 460 | for i, ind in enumerate(inds): 461 | x_n[i] = np.mean(x[ind], axis=0) 462 | if segm is not None: 463 | segm_n[i] = segm[(ind[0] + ind[-1]) // 2] 464 | return x_n, segm_n if segm is not None else None 465 | 466 | def random_pad(self, x, segm=None): 467 | length = self.num_segments 468 | if x.shape[0] > length: 469 | strt = np.random.randint(0, x.shape[0] - length) 470 | x_ret = x[strt:strt + length] 471 | if segm is not None: 472 | segm = segm[strt:strt + length] 473 | return x_ret, segm 474 | elif x.shape[0] == length: 475 | return x, segm 476 | else: 477 | pad_len = length - x.shape[0] 478 | x_ret = np.pad(x, ((0, pad_len), (0, 0)), mode='constant') 479 | if segm is not None: 480 | segm = np.pad(segm, ((0, pad_len), (0, 0)), mode='constant') 481 | return x_ret, segm 482 | 483 | def random_perturb(self, length): 484 | if self.num_segments == length: 485 | return np.arange(self.num_segments).astype(int) 486 | samples = np.arange(self.num_segments) * length / self.num_segments 487 | for i in range(self.num_segments): 488 | if i < self.num_segments - 1: 489 | if int(samples[i]) != int(samples[i + 1]): 490 | samples[i] = np.random.choice( 491 | range(int(samples[i]), 492 | int(samples[i + 1]) + 1)) 493 | else: 494 | samples[i] = int(samples[i]) 495 | else: 496 | if int(samples[i]) < length - 1: 497 | samples[i] = np.random.choice( 498 | range(int(samples[i]), length)) 499 | else: 500 | samples[i] = int(samples[i]) 501 | return samples.astype(int) 502 | 503 | def uniform_sampling(self, length): 504 | if self.num_segments == length: 505 | return np.arange(self.num_segments).astype(int) 506 | samples = np.arange(self.num_segments) * length / self.num_segments 507 | samples = np.floor(samples) 508 | return samples.astype(int) 509 | 510 | 511 | if __name__ == '__main__': 512 | args = options.parser.parse_args() 513 | dt = SampleDataset(args) 514 | data = dt.load_data() 515 | print(data) 516 | import pdb 517 | 518 | pdb.set_trace() 519 | print(dt) 520 | -------------------------------------------------------------------------------- /libMR/malloc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Default header file for malloc-2.8.x, written by Doug Lea 3 | and released to the public domain, as explained at 4 | http://creativecommons.org/publicdomain/zero/1.0/ 5 | 6 | This header is for ANSI C/C++ only. You can set any of 7 | the following #defines before including: 8 | 9 | * If USE_DL_PREFIX is defined, it is assumed that malloc.c 10 | was also compiled with this option, so all routines 11 | have names starting with "dl". 12 | 13 | * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this 14 | file will be #included AFTER . This is needed only if 15 | your system defines a struct mallinfo that is incompatible with the 16 | standard one declared here. Otherwise, you can include this file 17 | INSTEAD of your system system . At least on ANSI, all 18 | declarations should be compatible with system versions 19 | 20 | * If MSPACES is defined, declarations for mspace versions are included. 21 | */ 22 | 23 | #ifndef MALLOC_280_H 24 | #define MALLOC_280_H 25 | 26 | #ifdef __cplusplus 27 | extern "C" { 28 | #endif 29 | 30 | #include /* for size_t */ 31 | 32 | #ifndef ONLY_MSPACES 33 | #define ONLY_MSPACES 0 /* define to a value */ 34 | #elif ONLY_MSPACES != 0 35 | #define ONLY_MSPACES 1 36 | #endif /* ONLY_MSPACES */ 37 | #ifndef NO_MALLINFO 38 | #define NO_MALLINFO 0 39 | #endif /* NO_MALLINFO */ 40 | 41 | #ifndef MSPACES 42 | #if ONLY_MSPACES 43 | #define MSPACES 1 44 | #else /* ONLY_MSPACES */ 45 | #define MSPACES 0 46 | #endif /* ONLY_MSPACES */ 47 | #endif /* MSPACES */ 48 | 49 | #if !ONLY_MSPACES 50 | 51 | #ifndef USE_DL_PREFIX 52 | #define dlcalloc calloc 53 | #define dlfree free 54 | #define dlmalloc malloc 55 | #define dlmemalign memalign 56 | #define dlposix_memalign posix_memalign 57 | #define dlrealloc realloc 58 | #define dlvalloc valloc 59 | #define dlpvalloc pvalloc 60 | #define dlmallinfo mallinfo 61 | #define dlmallopt mallopt 62 | #define dlmalloc_trim malloc_trim 63 | #define dlmalloc_stats malloc_stats 64 | #define dlmalloc_usable_size malloc_usable_size 65 | #define dlmalloc_footprint malloc_footprint 66 | #define dlmalloc_max_footprint malloc_max_footprint 67 | #define dlmalloc_footprint_limit malloc_footprint_limit 68 | #define dlmalloc_set_footprint_limit malloc_set_footprint_limit 69 | #define dlmalloc_inspect_all malloc_inspect_all 70 | #define dlindependent_calloc independent_calloc 71 | #define dlindependent_comalloc independent_comalloc 72 | #define dlbulk_free bulk_free 73 | #endif /* USE_DL_PREFIX */ 74 | 75 | #if !NO_MALLINFO 76 | #ifndef HAVE_USR_INCLUDE_MALLOC_H 77 | #ifndef _MALLOC_H 78 | #ifndef MALLINFO_FIELD_TYPE 79 | #define MALLINFO_FIELD_TYPE size_t 80 | #endif /* MALLINFO_FIELD_TYPE */ 81 | #ifndef STRUCT_MALLINFO_DECLARED 82 | #define STRUCT_MALLINFO_DECLARED 1 83 | struct mallinfo { 84 | MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ 85 | MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ 86 | MALLINFO_FIELD_TYPE smblks; /* always 0 */ 87 | MALLINFO_FIELD_TYPE hblks; /* always 0 */ 88 | MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ 89 | MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ 90 | MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ 91 | MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ 92 | MALLINFO_FIELD_TYPE fordblks; /* total free space */ 93 | MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ 94 | }; 95 | #endif /* STRUCT_MALLINFO_DECLARED */ 96 | #endif /* _MALLOC_H */ 97 | #endif /* HAVE_USR_INCLUDE_MALLOC_H */ 98 | #endif /* !NO_MALLINFO */ 99 | 100 | /* 101 | malloc(size_t n) 102 | Returns a pointer to a newly allocated chunk of at least n bytes, or 103 | null if no space is available, in which case errno is set to ENOMEM 104 | on ANSI C systems. 105 | 106 | If n is zero, malloc returns a minimum-sized chunk. (The minimum 107 | size is 16 bytes on most 32bit systems, and 32 bytes on 64bit 108 | systems.) Note that size_t is an unsigned type, so calls with 109 | arguments that would be negative if signed are interpreted as 110 | requests for huge amounts of space, which will often fail. The 111 | maximum supported value of n differs across systems, but is in all 112 | cases less than the maximum representable value of a size_t. 113 | */ 114 | void* dlmalloc(size_t); 115 | 116 | /* 117 | free(void* p) 118 | Releases the chunk of memory pointed to by p, that had been previously 119 | allocated using malloc or a related routine such as realloc. 120 | It has no effect if p is null. If p was not malloced or already 121 | freed, free(p) will by default cuase the current program to abort. 122 | */ 123 | void dlfree(void*); 124 | 125 | /* 126 | calloc(size_t n_elements, size_t element_size); 127 | Returns a pointer to n_elements * element_size bytes, with all locations 128 | set to zero. 129 | */ 130 | void* dlcalloc(size_t, size_t); 131 | 132 | /* 133 | realloc(void* p, size_t n) 134 | Returns a pointer to a chunk of size n that contains the same data 135 | as does chunk p up to the minimum of (n, p's size) bytes, or null 136 | if no space is available. 137 | 138 | The returned pointer may or may not be the same as p. The algorithm 139 | prefers extending p in most cases when possible, otherwise it 140 | employs the equivalent of a malloc-copy-free sequence. 141 | 142 | If p is null, realloc is equivalent to malloc. 143 | 144 | If space is not available, realloc returns null, errno is set (if on 145 | ANSI) and p is NOT freed. 146 | 147 | if n is for fewer bytes than already held by p, the newly unused 148 | space is lopped off and freed if possible. realloc with a size 149 | argument of zero (re)allocates a minimum-sized chunk. 150 | 151 | The old unix realloc convention of allowing the last-free'd chunk 152 | to be used as an argument to realloc is not supported. 153 | */ 154 | void* dlrealloc(void*, size_t); 155 | 156 | /* 157 | realloc_in_place(void* p, size_t n) 158 | Resizes the space allocated for p to size n, only if this can be 159 | done without moving p (i.e., only if there is adjacent space 160 | available if n is greater than p's current allocated size, or n is 161 | less than or equal to p's size). This may be used instead of plain 162 | realloc if an alternative allocation strategy is needed upon failure 163 | to expand space; for example, reallocation of a buffer that must be 164 | memory-aligned or cleared. You can use realloc_in_place to trigger 165 | these alternatives only when needed. 166 | 167 | Returns p if successful; otherwise null. 168 | */ 169 | void* dlrealloc_in_place(void*, size_t); 170 | 171 | /* 172 | memalign(size_t alignment, size_t n); 173 | Returns a pointer to a newly allocated chunk of n bytes, aligned 174 | in accord with the alignment argument. 175 | 176 | The alignment argument should be a power of two. If the argument is 177 | not a power of two, the nearest greater power is used. 178 | 8-byte alignment is guaranteed by normal malloc calls, so don't 179 | bother calling memalign with an argument of 8 or less. 180 | 181 | Overreliance on memalign is a sure way to fragment space. 182 | */ 183 | void* dlmemalign(size_t, size_t); 184 | 185 | /* 186 | int posix_memalign(void** pp, size_t alignment, size_t n); 187 | Allocates a chunk of n bytes, aligned in accord with the alignment 188 | argument. Differs from memalign only in that it (1) assigns the 189 | allocated memory to *pp rather than returning it, (2) fails and 190 | returns EINVAL if the alignment is not a power of two (3) fails and 191 | returns ENOMEM if memory cannot be allocated. 192 | */ 193 | int dlposix_memalign(void**, size_t, size_t); 194 | 195 | /* 196 | valloc(size_t n); 197 | Equivalent to memalign(pagesize, n), where pagesize is the page 198 | size of the system. If the pagesize is unknown, 4096 is used. 199 | */ 200 | void* dlvalloc(size_t); 201 | 202 | /* 203 | mallopt(int parameter_number, int parameter_value) 204 | Sets tunable parameters The format is to provide a 205 | (parameter-number, parameter-value) pair. mallopt then sets the 206 | corresponding parameter to the argument value if it can (i.e., so 207 | long as the value is meaningful), and returns 1 if successful else 208 | 0. SVID/XPG/ANSI defines four standard param numbers for mallopt, 209 | normally defined in malloc.h. None of these are use in this malloc, 210 | so setting them has no effect. But this malloc also supports other 211 | options in mallopt: 212 | 213 | Symbol param # default allowed param values 214 | M_TRIM_THRESHOLD -1 2*1024*1024 any (-1U disables trimming) 215 | M_GRANULARITY -2 page size any power of 2 >= page size 216 | M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) 217 | */ 218 | int dlmallopt(int, int); 219 | 220 | #define M_TRIM_THRESHOLD (-1) 221 | #define M_GRANULARITY (-2) 222 | #define M_MMAP_THRESHOLD (-3) 223 | 224 | 225 | /* 226 | malloc_footprint(); 227 | Returns the number of bytes obtained from the system. The total 228 | number of bytes allocated by malloc, realloc etc., is less than this 229 | value. Unlike mallinfo, this function returns only a precomputed 230 | result, so can be called frequently to monitor memory consumption. 231 | Even if locks are otherwise defined, this function does not use them, 232 | so results might not be up to date. 233 | */ 234 | size_t dlmalloc_footprint(void); 235 | 236 | /* 237 | malloc_max_footprint(); 238 | Returns the maximum number of bytes obtained from the system. This 239 | value will be greater than current footprint if deallocated space 240 | has been reclaimed by the system. The peak number of bytes allocated 241 | by malloc, realloc etc., is less than this value. Unlike mallinfo, 242 | this function returns only a precomputed result, so can be called 243 | frequently to monitor memory consumption. Even if locks are 244 | otherwise defined, this function does not use them, so results might 245 | not be up to date. 246 | */ 247 | size_t dlmalloc_max_footprint(void); 248 | 249 | /* 250 | malloc_footprint_limit(); 251 | Returns the number of bytes that the heap is allowed to obtain from 252 | the system, returning the last value returned by 253 | malloc_set_footprint_limit, or the maximum size_t value if 254 | never set. The returned value reflects a permission. There is no 255 | guarantee that this number of bytes can actually be obtained from 256 | the system. 257 | */ 258 | size_t dlmalloc_footprint_limit(void); 259 | 260 | /* 261 | malloc_set_footprint_limit(); 262 | Sets the maximum number of bytes to obtain from the system, causing 263 | failure returns from malloc and related functions upon attempts to 264 | exceed this value. The argument value may be subject to page 265 | rounding to an enforceable limit; this actual value is returned. 266 | Using an argument of the maximum possible size_t effectively 267 | disables checks. If the argument is less than or equal to the 268 | current malloc_footprint, then all future allocations that require 269 | additional system memory will fail. However, invocation cannot 270 | retroactively deallocate existing used memory. 271 | */ 272 | size_t dlmalloc_set_footprint_limit(size_t bytes); 273 | 274 | /* 275 | malloc_inspect_all(void(*handler)(void *start, 276 | void *end, 277 | size_t used_bytes, 278 | void* callback_arg), 279 | void* arg); 280 | Traverses the heap and calls the given handler for each managed 281 | region, skipping all bytes that are (or may be) used for bookkeeping 282 | purposes. Traversal does not include include chunks that have been 283 | directly memory mapped. Each reported region begins at the start 284 | address, and continues up to but not including the end address. The 285 | first used_bytes of the region contain allocated data. If 286 | used_bytes is zero, the region is unallocated. The handler is 287 | invoked with the given callback argument. If locks are defined, they 288 | are held during the entire traversal. It is a bad idea to invoke 289 | other malloc functions from within the handler. 290 | 291 | For example, to count the number of in-use chunks with size greater 292 | than 1000, you could write: 293 | static int count = 0; 294 | void count_chunks(void* start, void* end, size_t used, void* arg) { 295 | if (used >= 1000) ++count; 296 | } 297 | then: 298 | malloc_inspect_all(count_chunks, NULL); 299 | 300 | malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined. 301 | */ 302 | void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*), 303 | void* arg); 304 | 305 | #if !NO_MALLINFO 306 | /* 307 | mallinfo() 308 | Returns (by copy) a struct containing various summary statistics: 309 | 310 | arena: current total non-mmapped bytes allocated from system 311 | ordblks: the number of free chunks 312 | smblks: always zero. 313 | hblks: current number of mmapped regions 314 | hblkhd: total bytes held in mmapped regions 315 | usmblks: the maximum total allocated space. This will be greater 316 | than current total if trimming has occurred. 317 | fsmblks: always zero 318 | uordblks: current total allocated space (normal or mmapped) 319 | fordblks: total free space 320 | keepcost: the maximum number of bytes that could ideally be released 321 | back to system via malloc_trim. ("ideally" means that 322 | it ignores page restrictions etc.) 323 | 324 | Because these fields are ints, but internal bookkeeping may 325 | be kept as longs, the reported values may wrap around zero and 326 | thus be inaccurate. 327 | */ 328 | 329 | struct mallinfo dlmallinfo(void); 330 | #endif /* NO_MALLINFO */ 331 | 332 | /* 333 | independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); 334 | 335 | independent_calloc is similar to calloc, but instead of returning a 336 | single cleared space, it returns an array of pointers to n_elements 337 | independent elements that can hold contents of size elem_size, each 338 | of which starts out cleared, and can be independently freed, 339 | realloc'ed etc. The elements are guaranteed to be adjacently 340 | allocated (this is not guaranteed to occur with multiple callocs or 341 | mallocs), which may also improve cache locality in some 342 | applications. 343 | 344 | The "chunks" argument is optional (i.e., may be null, which is 345 | probably the most typical usage). If it is null, the returned array 346 | is itself dynamically allocated and should also be freed when it is 347 | no longer needed. Otherwise, the chunks array must be of at least 348 | n_elements in length. It is filled in with the pointers to the 349 | chunks. 350 | 351 | In either case, independent_calloc returns this pointer array, or 352 | null if the allocation failed. If n_elements is zero and "chunks" 353 | is null, it returns a chunk representing an array with zero elements 354 | (which should be freed if not wanted). 355 | 356 | Each element must be freed when it is no longer needed. This can be 357 | done all at once using bulk_free. 358 | 359 | independent_calloc simplifies and speeds up implementations of many 360 | kinds of pools. It may also be useful when constructing large data 361 | structures that initially have a fixed number of fixed-sized nodes, 362 | but the number is not known at compile time, and some of the nodes 363 | may later need to be freed. For example: 364 | 365 | struct Node { int item; struct Node* next; }; 366 | 367 | struct Node* build_list() { 368 | struct Node** pool; 369 | int n = read_number_of_nodes_needed(); 370 | if (n <= 0) return 0; 371 | pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); 372 | if (pool == 0) die(); 373 | // organize into a linked list... 374 | struct Node* first = pool[0]; 375 | for (i = 0; i < n-1; ++i) 376 | pool[i]->next = pool[i+1]; 377 | free(pool); // Can now free the array (or not, if it is needed later) 378 | return first; 379 | } 380 | */ 381 | void** dlindependent_calloc(size_t, size_t, void**); 382 | 383 | /* 384 | independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); 385 | 386 | independent_comalloc allocates, all at once, a set of n_elements 387 | chunks with sizes indicated in the "sizes" array. It returns 388 | an array of pointers to these elements, each of which can be 389 | independently freed, realloc'ed etc. The elements are guaranteed to 390 | be adjacently allocated (this is not guaranteed to occur with 391 | multiple callocs or mallocs), which may also improve cache locality 392 | in some applications. 393 | 394 | The "chunks" argument is optional (i.e., may be null). If it is null 395 | the returned array is itself dynamically allocated and should also 396 | be freed when it is no longer needed. Otherwise, the chunks array 397 | must be of at least n_elements in length. It is filled in with the 398 | pointers to the chunks. 399 | 400 | In either case, independent_comalloc returns this pointer array, or 401 | null if the allocation failed. If n_elements is zero and chunks is 402 | null, it returns a chunk representing an array with zero elements 403 | (which should be freed if not wanted). 404 | 405 | Each element must be freed when it is no longer needed. This can be 406 | done all at once using bulk_free. 407 | 408 | independent_comallac differs from independent_calloc in that each 409 | element may have a different size, and also that it does not 410 | automatically clear elements. 411 | 412 | independent_comalloc can be used to speed up allocation in cases 413 | where several structs or objects must always be allocated at the 414 | same time. For example: 415 | 416 | struct Head { ... } 417 | struct Foot { ... } 418 | 419 | void send_message(char* msg) { 420 | int msglen = strlen(msg); 421 | size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; 422 | void* chunks[3]; 423 | if (independent_comalloc(3, sizes, chunks) == 0) 424 | die(); 425 | struct Head* head = (struct Head*)(chunks[0]); 426 | char* body = (char*)(chunks[1]); 427 | struct Foot* foot = (struct Foot*)(chunks[2]); 428 | // ... 429 | } 430 | 431 | In general though, independent_comalloc is worth using only for 432 | larger values of n_elements. For small values, you probably won't 433 | detect enough difference from series of malloc calls to bother. 434 | 435 | Overuse of independent_comalloc can increase overall memory usage, 436 | since it cannot reuse existing noncontiguous small chunks that 437 | might be available for some of the elements. 438 | */ 439 | void** dlindependent_comalloc(size_t, size_t*, void**); 440 | 441 | /* 442 | bulk_free(void* array[], size_t n_elements) 443 | Frees and clears (sets to null) each non-null pointer in the given 444 | array. This is likely to be faster than freeing them one-by-one. 445 | If footers are used, pointers that have been allocated in different 446 | mspaces are not freed or cleared, and the count of all such pointers 447 | is returned. For large arrays of pointers with poor locality, it 448 | may be worthwhile to sort this array before calling bulk_free. 449 | */ 450 | size_t dlbulk_free(void**, size_t n_elements); 451 | 452 | /* 453 | pvalloc(size_t n); 454 | Equivalent to valloc(minimum-page-that-holds(n)), that is, 455 | round up n to nearest pagesize. 456 | */ 457 | void* dlpvalloc(size_t); 458 | 459 | /* 460 | malloc_trim(size_t pad); 461 | 462 | If possible, gives memory back to the system (via negative arguments 463 | to sbrk) if there is unused memory at the `high' end of the malloc 464 | pool or in unused MMAP segments. You can call this after freeing 465 | large blocks of memory to potentially reduce the system-level memory 466 | requirements of a program. However, it cannot guarantee to reduce 467 | memory. Under some allocation patterns, some large free blocks of 468 | memory will be locked between two used chunks, so they cannot be 469 | given back to the system. 470 | 471 | The `pad' argument to malloc_trim represents the amount of free 472 | trailing space to leave untrimmed. If this argument is zero, only 473 | the minimum amount of memory to maintain internal data structures 474 | will be left. Non-zero arguments can be supplied to maintain enough 475 | trailing space to service future expected allocations without having 476 | to re-obtain memory from the system. 477 | 478 | Malloc_trim returns 1 if it actually released any memory, else 0. 479 | */ 480 | int dlmalloc_trim(size_t); 481 | 482 | /* 483 | malloc_stats(); 484 | Prints on stderr the amount of space obtained from the system (both 485 | via sbrk and mmap), the maximum amount (which may be more than 486 | current if malloc_trim and/or munmap got called), and the current 487 | number of bytes allocated via malloc (or realloc, etc) but not yet 488 | freed. Note that this is the number of bytes allocated, not the 489 | number requested. It will be larger than the number requested 490 | because of alignment and bookkeeping overhead. Because it includes 491 | alignment wastage as being in use, this figure may be greater than 492 | zero even when no user-level chunks are allocated. 493 | 494 | The reported current and maximum system memory can be inaccurate if 495 | a program makes other calls to system memory allocation functions 496 | (normally sbrk) outside of malloc. 497 | 498 | malloc_stats prints only the most commonly interesting statistics. 499 | More information can be obtained by calling mallinfo. 500 | 501 | malloc_stats is not compiled if NO_MALLOC_STATS is defined. 502 | */ 503 | void dlmalloc_stats(void); 504 | 505 | #endif /* !ONLY_MSPACES */ 506 | 507 | /* 508 | malloc_usable_size(void* p); 509 | 510 | Returns the number of bytes you can actually use in 511 | an allocated chunk, which may be more than you requested (although 512 | often not) due to alignment and minimum size constraints. 513 | You can use this many bytes without worrying about 514 | overwriting other allocated objects. This is not a particularly great 515 | programming practice. malloc_usable_size can be more useful in 516 | debugging and assertions, for example: 517 | 518 | p = malloc(n); 519 | assert(malloc_usable_size(p) >= 256); 520 | */ 521 | size_t dlmalloc_usable_size(const void*); 522 | 523 | #if MSPACES 524 | 525 | /* 526 | mspace is an opaque type representing an independent 527 | region of space that supports mspace_malloc, etc. 528 | */ 529 | typedef void* mspace; 530 | 531 | /* 532 | create_mspace creates and returns a new independent space with the 533 | given initial capacity, or, if 0, the default granularity size. It 534 | returns null if there is no system memory available to create the 535 | space. If argument locked is non-zero, the space uses a separate 536 | lock to control access. The capacity of the space will grow 537 | dynamically as needed to service mspace_malloc requests. You can 538 | control the sizes of incremental increases of this space by 539 | compiling with a different DEFAULT_GRANULARITY or dynamically 540 | setting with mallopt(M_GRANULARITY, value). 541 | */ 542 | mspace create_mspace(size_t capacity, int locked); 543 | 544 | /* 545 | destroy_mspace destroys the given space, and attempts to return all 546 | of its memory back to the system, returning the total number of 547 | bytes freed. After destruction, the results of access to all memory 548 | used by the space become undefined. 549 | */ 550 | size_t destroy_mspace(mspace msp); 551 | 552 | /* 553 | create_mspace_with_base uses the memory supplied as the initial base 554 | of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this 555 | space is used for bookkeeping, so the capacity must be at least this 556 | large. (Otherwise 0 is returned.) When this initial space is 557 | exhausted, additional memory will be obtained from the system. 558 | Destroying this space will deallocate all additionally allocated 559 | space (if possible) but not the initial base. 560 | */ 561 | mspace create_mspace_with_base(void* base, size_t capacity, int locked); 562 | 563 | /* 564 | mspace_track_large_chunks controls whether requests for large chunks 565 | are allocated in their own untracked mmapped regions, separate from 566 | others in this mspace. By default large chunks are not tracked, 567 | which reduces fragmentation. However, such chunks are not 568 | necessarily released to the system upon destroy_mspace. Enabling 569 | tracking by setting to true may increase fragmentation, but avoids 570 | leakage when relying on destroy_mspace to release all memory 571 | allocated using this space. The function returns the previous 572 | setting. 573 | */ 574 | int mspace_track_large_chunks(mspace msp, int enable); 575 | 576 | #if !NO_MALLINFO 577 | /* 578 | mspace_mallinfo behaves as mallinfo, but reports properties of 579 | the given space. 580 | */ 581 | struct mallinfo mspace_mallinfo(mspace msp); 582 | #endif /* NO_MALLINFO */ 583 | 584 | /* 585 | An alias for mallopt. 586 | */ 587 | int mspace_mallopt(int, int); 588 | 589 | /* 590 | The following operate identically to their malloc counterparts 591 | but operate only for the given mspace argument 592 | */ 593 | void* mspace_malloc(mspace msp, size_t bytes); 594 | void mspace_free(mspace msp, void* mem); 595 | void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); 596 | void* mspace_realloc(mspace msp, void* mem, size_t newsize); 597 | void* mspace_realloc_in_place(mspace msp, void* mem, size_t newsize); 598 | void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); 599 | void** mspace_independent_calloc(mspace msp, size_t n_elements, 600 | size_t elem_size, void* chunks[]); 601 | void** mspace_independent_comalloc(mspace msp, size_t n_elements, 602 | size_t sizes[], void* chunks[]); 603 | size_t mspace_bulk_free(mspace msp, void**, size_t n_elements); 604 | size_t mspace_usable_size(const void* mem); 605 | void mspace_malloc_stats(mspace msp); 606 | int mspace_trim(mspace msp, size_t pad); 607 | size_t mspace_footprint(mspace msp); 608 | size_t mspace_max_footprint(mspace msp); 609 | size_t mspace_footprint_limit(mspace msp); 610 | size_t mspace_set_footprint_limit(mspace msp, size_t bytes); 611 | void mspace_inspect_all(mspace msp, 612 | void(*handler)(void *, void *, size_t, void*), 613 | void* arg); 614 | #endif /* MSPACES */ 615 | 616 | #ifdef __cplusplus 617 | }; /* end of extern "C" */ 618 | #endif 619 | 620 | #endif /* MALLOC_280_H */ 621 | --------------------------------------------------------------------------------