├── eval
    ├── __init__.py
    ├── classificationMAP.py
    ├── utils_eval.py
    ├── eval_anno_file_generation.py
    ├── detectionMAP.py
    └── eval_detection.py
├── utils
    ├── __init__.py
    └── wsad_utils.py
├── libMR
    ├── libmr.c
    ├── libmr.pxd
    ├── Makefile
    ├── compile.sh
    ├── setup.py
    ├── build_libmr_python.sh
    ├── estimate_wscores.py
    ├── test_libmr.py
    ├── weibull.h
    ├── MetaRecognition.h
    ├── COPYRIGHT_Libmr.txt
    ├── libmr.pyx
    ├── MetaRecognition.cpp
    └── malloc.h
├── thumos_splits
    ├── split_0
    │   ├── Class_Unknown.txt
    │   └── Class_Known.txt
    ├── split_2
    │   ├── Class_Unknown.txt
    │   └── Class_Known.txt
    └── split_1
    │   ├── Class_Unknown.txt
    │   └── Class_Known.txt
├── scripts
    ├── test_split0.sh
    ├── test_split1.sh
    ├── test_split2.sh
    ├── train_split0.sh
    ├── train_split1.sh
    └── train_split2.sh
├── Dist.py
├── README.md
├── train.py
├── LICENSE
├── PL.py
├── base.py
├── main.py
├── options.py
├── test.py
├── proposal_methods.py
├── edl_loss.py
└── wsad_dataset.py


/eval/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/libMR/libmr.c:
--------------------------------------------------------------------------------
1 | #error Do not use this file, it is the result of a failed Cython compilation.
2 | 


--------------------------------------------------------------------------------
/thumos_splits/split_0/Class_Unknown.txt:
--------------------------------------------------------------------------------
1 | CleanAndJerk
2 | CliffDiving
3 | Diving
4 | LongJump
5 | SoccerPenalty


--------------------------------------------------------------------------------
/thumos_splits/split_2/Class_Unknown.txt:
--------------------------------------------------------------------------------
1 | CricketShot
2 | JavelinThrow
3 | LongJump
4 | PoleVault
5 | Shotput
6 | 


--------------------------------------------------------------------------------
/thumos_splits/split_1/Class_Unknown.txt:
--------------------------------------------------------------------------------
1 | CliffDiving
2 | CricketBowling
3 | Diving
4 | HammerThrow
5 | JavelinThrow
6 | 


--------------------------------------------------------------------------------
/libMR/libmr.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "MetaRecognition.h":
2 |     cdef struct svm_node_libsvm:
3 |         int index
4 |         double value


--------------------------------------------------------------------------------
/libMR/Makefile:
--------------------------------------------------------------------------------
1 | CXX= g++
2 | SRC= MetaRecognition.cpp weibull.c
3 | 
4 | libmr: $(SRC) weibull.h malloc.h MetaRecognition.h
5 | 	$(CXX) -o libmr $(SRC) -I.
6 | 
7 | clean:
8 | 	rm -f *~ *.o libmr


--------------------------------------------------------------------------------
/thumos_splits/split_0/Class_Known.txt:
--------------------------------------------------------------------------------
 1 | BaseballPitch
 2 | BasketballDunk
 3 | Billiards
 4 | CricketBowling
 5 | CricketShot
 6 | FrisbeeCatch
 7 | GolfSwing
 8 | HammerThrow
 9 | HighJump
10 | JavelinThrow
11 | PoleVault
12 | Shotput
13 | TennisSwing
14 | ThrowDiscus
15 | VolleyballSpiking


--------------------------------------------------------------------------------
/thumos_splits/split_1/Class_Known.txt:
--------------------------------------------------------------------------------
 1 | BaseballPitch
 2 | BasketballDunk
 3 | Billiards
 4 | CleanAndJerk
 5 | CricketShot
 6 | FrisbeeCatch
 7 | GolfSwing
 8 | HighJump
 9 | LongJump
10 | PoleVault
11 | Shotput
12 | SoccerPenalty
13 | TennisSwing
14 | ThrowDiscus
15 | VolleyballSpiking
16 | 


--------------------------------------------------------------------------------
/thumos_splits/split_2/Class_Known.txt:
--------------------------------------------------------------------------------
 1 | BaseballPitch
 2 | BasketballDunk
 3 | Billiards
 4 | CleanAndJerk
 5 | CliffDiving
 6 | CricketBowling
 7 | Diving
 8 | FrisbeeCatch
 9 | GolfSwing
10 | HammerThrow
11 | HighJump
12 | SoccerPenalty
13 | TennisSwing
14 | ThrowDiscus
15 | VolleyballSpiking
16 | 


--------------------------------------------------------------------------------
/scripts/test_split0.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd ..
 3 | CUDA_VISIBLE_DEVICES=0 \
 4 | python test.py \
 5 | --max_seqlen 500 \
 6 | --lr 0.00005 \
 7 | --k 7 \
 8 | --dataset_name Thumos14reduced \
 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \
10 | --use_model CO2 \
11 | --dataset SampleDataset \
12 | --weight_decay 0.001 \
13 | --AWM BWA_fusion_dropout_feat_v2 \
14 | --seed 0 \
15 | --test_ckpt ./ckpt/split0_ckpt.pkl \
16 | --split_idx 0 \
17 | --without_wandb \
18 | --topk_test


--------------------------------------------------------------------------------
/scripts/test_split1.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd ..
 3 | CUDA_VISIBLE_DEVICES=0 \
 4 | python test.py \
 5 | --max_seqlen 500 \
 6 | --lr 0.00005 \
 7 | --k 7 \
 8 | --dataset_name Thumos14reduced \
 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \
10 | --use_model CO2 \
11 | --dataset SampleDataset \
12 | --weight_decay 0.001 \
13 | --AWM BWA_fusion_dropout_feat_v2 \
14 | --seed 0 \
15 | --test_ckpt ./ckpt/split1_ckpt.pkl \
16 | --split_idx 1 \
17 | --without_wandb \
18 | --topk_test


--------------------------------------------------------------------------------
/scripts/test_split2.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd ..
 3 | CUDA_VISIBLE_DEVICES=0 \
 4 | python test.py \
 5 | --max_seqlen 500 \
 6 | --lr 0.00005 \
 7 | --k 7 \
 8 | --dataset_name Thumos14reduced \
 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \
10 | --use_model CO2 \
11 | --dataset SampleDataset \
12 | --weight_decay 0.001 \
13 | --AWM BWA_fusion_dropout_feat_v2 \
14 | --seed 0 \
15 | --test_ckpt ./ckpt/split2_ckpt.pkl \
16 | --split_idx 2 \
17 | --without_wandb \
18 | --topk_test


--------------------------------------------------------------------------------
/scripts/train_split0.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd ..
 3 | CUDA_VISIBLE_DEVICES=0 \
 4 | python main.py \
 5 | --max_seqlen 500 \
 6 | --lr 0.00005 \
 7 | --k 7 \
 8 | --dataset_name Thumos14reduced \
 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \
10 | --use_model CO2 \
11 | --dataset SampleDataset \
12 | --weight_decay 0.001 \
13 | --AWM BWA_fusion_dropout_feat_v2 \
14 | --group_name CELL \
15 | --model_name split0_ckpt \
16 | --split_idx 0 \
17 | --k_edl 7 \
18 | --num_centers 2 \
19 | --seed 0 \
20 | --without_wandb


--------------------------------------------------------------------------------
/scripts/train_split1.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd ..
 3 | CUDA_VISIBLE_DEVICES=0 \
 4 | python main.py \
 5 | --max_seqlen 500 \
 6 | --lr 0.00005 \
 7 | --k 7 \
 8 | --dataset_name Thumos14reduced \
 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \
10 | --use_model CO2 \
11 | --dataset SampleDataset \
12 | --weight_decay 0.001 \
13 | --AWM BWA_fusion_dropout_feat_v2 \
14 | --group_name CELL \
15 | --model_name split1_ckpt \
16 | --split_idx 1 \
17 | --k_edl 7 \
18 | --num_centers 2 \
19 | --seed 0 \
20 | --without_wandb


--------------------------------------------------------------------------------
/scripts/train_split2.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | cd ..
 3 | CUDA_VISIBLE_DEVICES=0 \
 4 | python main.py \
 5 | --max_seqlen 500 \
 6 | --lr 0.00005 \
 7 | --k 7 \
 8 | --dataset_name Thumos14reduced \
 9 | --path_dataset /data_SSD1/cmy/CO2-THUMOS-14 \
10 | --use_model CO2 \
11 | --dataset SampleDataset \
12 | --weight_decay 0.001 \
13 | --AWM BWA_fusion_dropout_feat_v2 \
14 | --group_name CELL \
15 | --model_name split2_ckpt \
16 | --split_idx 2 \
17 | --k_edl 7 \
18 | --num_centers 2 \
19 | --seed 0 \
20 | --without_wandb


--------------------------------------------------------------------------------
/libMR/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "----- Removing previously compiled libmr.so -----\n"
 4 | rm -r build
 5 | rm *.model
 6 | rm libmr.so
 7 | rm *.dump
 8 | rm ../libmr.so
 9 | 
10 | echo "----- Building and compiling libmr ------- \n"
11 | python setup.py build_ext -i
12 | # cp libmr.so ../
13 | 
14 | # echo "----- Completed Compiling libmr -------- \n"
15 | # echo "Now trying python -c \"import libmr\""
16 | # python test_libmr.py
17 | # echo "----- Compiling Done. Now import *.so file in your application -----\n"
18 | 


--------------------------------------------------------------------------------
/Dist.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Dist(nn.Module):
 6 |     def __init__(self, num_classes=10, num_centers=1, feat_dim=2):
 7 |         super(Dist, self).__init__()
 8 |         self.feat_dim = feat_dim
 9 |         self.num_classes = num_classes
10 |         self.num_centers = num_centers
11 | 
12 |         self.pos_centers = nn.Parameter(0.1 * torch.randn(num_classes * num_centers, self.feat_dim))
13 |         self.neg_centers = nn.Parameter(0.1 * torch.randn(num_classes * num_centers, self.feat_dim))
14 | 
15 |     def forward(self, features, center):
16 |         dist = features.matmul(center.t())
17 |         dist = torch.reshape(dist, [-1, self.num_classes, self.num_centers])
18 |         dist = torch.mean(dist, dim=2)
19 | 
20 |         return dist
21 | 


--------------------------------------------------------------------------------
/eval/classificationMAP.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def getAP(conf, labels):
 5 |     assert len(conf) == len(labels)
 6 |     sortind = np.argsort(-conf)
 7 |     tp = labels[sortind] == 1
 8 |     fp = labels[sortind] != 1
 9 |     npos = np.sum(labels)
10 | 
11 |     fp = np.cumsum(fp).astype('float32')
12 |     tp = np.cumsum(tp).astype('float32')
13 |     rec = tp / npos
14 |     prec = tp / (fp + tp)
15 |     tmp = (labels[sortind] == 1).astype('float32')
16 | 
17 |     return np.sum(tmp * prec) / npos
18 | 
19 | 
20 | def getClassificationMAP(confidence, labels):
21 |     ''' confidence and labels are of dimension n_samples x n_label '''
22 | 
23 |     AP = []
24 |     for i in range(np.shape(labels)[1]):
25 |         AP.append(getAP(confidence[:, i], labels[:, i]))
26 |     return 100 * sum(AP) / len(AP)
27 | 


--------------------------------------------------------------------------------
/libMR/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from distutils.extension import Extension
 3 | from Cython.Distutils import build_ext
 4 | from Cython.Build import cythonize
 5 | import sys
 6 | import numpy
 7 | #ext_modules = [Extension("libmr", ["libmr.pyx", "MetaRecognition.cpp"])]
 8 | 
 9 | setup(
10 |       ext_modules = cythonize(Extension('libmr',
11 |                                         ["libmr.pyx",
12 |                                          "MetaRecognition.cpp",
13 |                                          "weibull.c"
14 |                                          ],
15 |                                         include_dirs = [".", numpy.get_include()],
16 |                                         language="c++",
17 |                   )),
18 |       data_files = [('.', ['MetaRecognition.h', 'weibull.h'])],
19 | 
20 | )
21 | 


--------------------------------------------------------------------------------
/libMR/build_libmr_python.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This script creates a clean temporary environment Python, and then
 4 | # builds LibMR's python bindings.
 5 | 
 6 | if [ '!' -f setup.py ]; then
 7 |   echo Put this script into the same folder as setup.py
 8 |   exit 1
 9 | fi
10 | 
11 | echo Step 1: Download virtualenv
12 | wget -O virtualenv-1.9.1.tar.gz --no-check-certificate https://pypi.python.org/packages/source/v/virtualenv/virtualenv-1.9.1.tar.gz
13 | tar xvf virtualenv-1.9.1.tar.gz
14 | 
15 | echo Step 2: Create virtualenv
16 | python virtualenv-1.9.1/virtualenv.py --system-site-packages venv
17 | 
18 | echo Step 3: Entering virtualenv and installing dependencies
19 | source venv/bin/activate
20 | pip install cython==0.19.1
21 | 
22 | echo Step 5: Build the extension
23 | rm -f python/libmr.cpp
24 | python setup.py build_ext -i
25 | 
26 | deactivate
27 | 
28 | echo The .so should be built in the current folder.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CVPR2023-OWTAL
 2 | An implementation of a baseline method of OWTAL. However, it is not the complete version yet.
 3 | 
 4 | ### THUMOS-14 Dataset：
 5 | We use the 2048-d features provided by MM 2021 paper: Cross-modal Consensus Network for Weakly Supervised Temporal Action Localization. You can get access of the dataset from [Google Drive](https://drive.google.com/file/d/1SFEsQNLsG8vgBbqx056L9fjA4TzVZQEu/view?usp=sharing) or [Baidu Disk](https://pan.baidu.com/s/1nspCSpzgwh5AHpSBPPibrQ?pwd=2dej). The annotations are included within this package.
 6 | 
 7 | ### Pre-trained models:
 8 | Pre-trained models can be downloaded from [Google Disk](https://drive.google.com/file/d/1GjiNATcUdJlFpX6rK0FIik7ma2QO-L5c/view?usp=sharing).
 9 | They need to be unzipped and put in the directory './ckpt/'.
10 | 
11 | ### Quick start
12 | To test pre-trained models, run:
13 |    ```
14 |    cd scripts
15 |    bash test_split0/1/2.sh
16 |    ```
17 | 
18 | To train from scratch, run:
19 |    ```
20 |    cd scripts
21 |    bash train_split0/1/2.sh
22 |    ```
23 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | torch.set_default_dtype(torch.float32)
 5 | 
 6 | 
 7 | def train(itr, dataset, args, model, optimizer, device):
 8 |     model.train()
 9 |     features, labels, pairs_id = dataset.load_data(n_similar=args.num_similar)
10 |     seq_len = np.sum(np.max(np.abs(features), axis=2) > 0, axis=1)
11 |     features = features[:, :np.max(seq_len), :]
12 | 
13 |     features = torch.from_numpy(features).float().to(device)
14 |     labels = torch.from_numpy(labels).float().to(device)
15 | 
16 |     outputs = model(features, seq_len=seq_len, is_training=True, itr=itr, opt=args, labels=labels)
17 |     total_loss, loss_dict = model.criterion(outputs, labels, seq_len=seq_len, device=device, opt=args, itr=itr,
18 |                                             pairs_id=pairs_id, inputs=features)
19 | 
20 |     optimizer.zero_grad()
21 |     total_loss.backward()
22 |     optimizer.step()
23 | 
24 |     if not args.without_wandb:
25 |         if itr % 20 == 0 and itr != 0:
26 |             wandb.log(loss_dict)
27 | 
28 |     return total_loss.data.cpu().numpy()
29 | 


--------------------------------------------------------------------------------
/libMR/estimate_wscores.py:
--------------------------------------------------------------------------------
 1 | import os, sys
 2 | import scipy as sp
 3 | import libmr
 4 | 
 5 | def main():
 6 | 
 7 |     posscores = sp.asarray([0.245 ,  0.2632,  0.3233,  0.3573,  0.4014,  0.4055,  0.4212, 0.5677])
 8 |     test_distances = sp.asarray([ 0.05,  0.1 ,  0.25,  0.4 ,  0.75,  1.  ,  1.5 ,  2.])
 9 | 
10 |     mr = libmr.MR()
11 |     # since higher is worse and we want to fit the higher tail,
12 |     # use fit_high()
13 |     mr.fit_high(posscores, posscores.shape[0])
14 |     wscores = mr.w_score_vector(test_distances)
15 |     for i in range(wscores.shape[0]):
16 |         print "%.2f %.2f %.2f" %(test_distances[i], wscores[i], mr.inv(wscores[i]))
17 |     # wscores are the ones to be used in the equation
18 |     # s_i * (1 - rho_i)
19 |     print "Low wscore --> Low probability that the score is outlier i.e. sample IS NOT outlier"
20 |     print "High wscore --> High probability that the score is outlier i.e. sample IS an outlier"
21 |     print "posscores: ", posscores
22 |     print "test_distances: ", test_distances
23 |     print "wscores: ", wscores
24 | 
25 | if __name__ == "__main__":
26 |     main()
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Mengyuan Chen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/PL.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from Dist import Dist
 5 | 
 6 | class PL(nn.CrossEntropyLoss):
 7 |     def __init__(self, args):
 8 |         super(PL, self).__init__()
 9 |         self.args = args
10 |         self.use_gpu = True
11 |         self.weight_pl = float(args.weight_pl)
12 |         self.temp = args.temp
13 |         self.Dist = Dist(num_classes=args.n_known_class, num_centers=args.num_centers, feat_dim=args.feature_size)
14 |         self.pos_points = self.Dist.pos_centers
15 |         self.neg_points = self.Dist.neg_centers
16 | 
17 |     def forward(self, x, labels=None):
18 |         dist_dot_pos = self.Dist(x, center=self.pos_points)
19 |         dist_dot_neg = self.Dist(x, center=self.neg_points)
20 |         logits = dist_dot_pos - dist_dot_neg  # (batch_size, class_num)
21 |         logits = logits / self.temp
22 | 
23 |         if labels is None:
24 |             return logits, 0
25 | 
26 |         true_logits = (torch.exp(logits) * labels).sum(dim=0)
27 |         false_logits = torch.exp(logits).sum(dim=0)
28 | 
29 |         loss = - torch.log(true_logits / false_logits + 1e-3).mean()
30 | 
31 |         # loss = F.cross_entropy(logits, labels)
32 | 
33 |         return logits, loss
34 | 


--------------------------------------------------------------------------------
/libMR/test_libmr.py:
--------------------------------------------------------------------------------
 1 | import scipy as sp
 2 | import sys, os
 3 | try:
 4 |     import libmr
 5 |     print("Imported libmr succesfully")
 6 | except ImportError:
 7 |     print("Cannot import libmr")
 8 |     sys.exit()
 9 | 
10 | import pickle
11 | svm_data = {}
12 | svm_data["labels"] = [1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1,
13 |                       1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1,
14 |                       1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1,
15 |                       1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1,
16 |                       1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1, -1 , -1, -1, -1, -1, -1]
17 | svm_data["scores"] = sp.randn(100).tolist()
18 | fit_data = sp.rand(3)
19 | def main():
20 | 
21 |     mr = libmr.MR()
22 |     datasize = len(svm_data["scores"])
23 |     mr.fit_svm(svm_data, datasize, 1, 1, 1, 10)
24 |     print(fit_data)
25 |     print(mr.w_score_vector(fit_data))
26 |     mr.mr_save("meta_rec.model")
27 |     datadump = {}
28 |     datadump = {"data": fit_data}
29 | 
30 |     f = open("data.dump", "w")
31 |     pickle.dump(datadump, f)
32 |     f.close()
33 |     print(dir(mr))
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     main()
38 | 


--------------------------------------------------------------------------------
/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | 
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class BaseWeightedLoss(nn.Module, metaclass=ABCMeta):
 7 |     """Base class for loss.
 8 | 
 9 |     All subclass should overwrite the ``_forward()`` method which returns the
10 |     normal loss without loss weights.
11 | 
12 |     Args:
13 |         loss_weight (float): Factor scalar multiplied on the loss.
14 |             Default: 1.0.
15 |     """
16 | 
17 |     def __init__(self, loss_weight=1.0):
18 |         super().__init__()
19 |         self.loss_weight = loss_weight
20 | 
21 |     @abstractmethod
22 |     def _forward(self, *args, **kwargs):
23 |         pass
24 | 
25 |     def forward(self, *args, **kwargs):
26 |         """Defines the computation performed at every call.
27 | 
28 |         Args:
29 |             *args: The positional arguments for the corresponding
30 |                 loss.
31 |             **kwargs: The keyword arguments for the corresponding
32 |                 loss.
33 | 
34 |         Returns:
35 |             torch.Tensor: The calculated loss.
36 |         """
37 |         ret = self._forward(*args, **kwargs)
38 |         if isinstance(ret, dict):
39 |             for k in ret:
40 |                 if 'loss' in k:
41 |                     ret[k] *= self.loss_weight
42 |         else:
43 |             ret *= self.loss_weight
44 |         return ret
45 | 


--------------------------------------------------------------------------------
/eval/utils_eval.py:
--------------------------------------------------------------------------------
 1 | # This code is originally from the official ActivityNet repo
 2 | # https://github.com/activitynet/ActivityNet
 3 | 
 4 | import json
 5 | import urllib.request
 6 | 
 7 | import numpy as np
 8 | 
 9 | API = 'http://ec2-52-11-11-89.us-west-2.compute.amazonaws.com/challenge17/api.py'
10 | 
11 | def get_blocked_videos(api=API):
12 |     api_url = '{}?action=get_blocked'.format(api)
13 |     req = urllib.request.Request(api_url)
14 |     response = urllib.request.urlopen(req)
15 |     return json.loads(response.read().decode('utf-8'))
16 | 
17 | def interpolated_prec_rec(prec, rec):
18 |     """Interpolated AP - VOCdevkit from VOC 2011.
19 |     """
20 |     mprec = np.hstack([[0], prec, [0]])
21 |     mrec = np.hstack([[0], rec, [1]])
22 |     for i in range(len(mprec) - 1)[::-1]:
23 |         mprec[i] = max(mprec[i], mprec[i + 1])
24 |     idx = np.where(mrec[1::] != mrec[0:-1])[0] + 1
25 |     ap = np.sum((mrec[idx] - mrec[idx - 1]) * mprec[idx])
26 |     return ap
27 | 
28 | def segment_iou(target_segment, candidate_segments):
29 |     """Compute the temporal intersection over union between a
30 |     target segment and all the test segments.
31 | 
32 |     Parameters
33 |     ----------
34 |     target_segment : 1d array
35 |         Temporal target segment containing [starting, ending] times.
36 |     candidate_segments : 2d array
37 |         Temporal candidate segments containing N x [starting, ending] times.
38 | 
39 |     Outputs
40 |     -------
41 |     tiou : 1d array
42 |         Temporal intersection over union score of the N's candidate segments.
43 |     """
44 |     tt1 = np.maximum(target_segment[0], candidate_segments[:, 0])
45 |     tt2 = np.minimum(target_segment[1], candidate_segments[:, 1])
46 |     # Intersection including Non-negative overlap score.
47 |     segments_intersection = (tt2 - tt1).clip(0)
48 |     # Segment union.
49 |     segments_union = (candidate_segments[:, 1] - candidate_segments[:, 0]) \
50 |       + (target_segment[1] - target_segment[0]) - segments_intersection
51 |     # Compute overlap as the ratio of the intersection
52 |     # over union of two segments.
53 |     tIoU = segments_intersection.astype(float) / segments_union
54 |     return tIoU
55 | 
56 | def wrapper_segment_iou(target_segments, candidate_segments):
57 |     """Compute intersection over union btw segments
58 |     Parameters
59 |     ----------
60 |     target_segments : ndarray
61 |         2-dim array in format [m x 2:=[init, end]]
62 |     candidate_segments : ndarray
63 |         2-dim array in format [n x 2:=[init, end]]
64 |     Outputs
65 |     -------
66 |     tiou : ndarray
67 |         2-dim array [n x m] with IOU ratio.
68 |     Note: It assumes that candidate-segments are more scarce that target-segments
69 |     """
70 |     if candidate_segments.ndim != 2 or target_segments.ndim != 2:
71 |         raise ValueError('Dimension of arguments is incorrect')
72 | 
73 |     n, m = candidate_segments.shape[0], target_segments.shape[0]
74 |     tiou = np.empty((n, m))
75 |     for i in range(m):
76 |         tiou[:, i] = segment_iou(target_segments[i,:], candidate_segments)
77 | 
78 |     return tiou


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import random
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | from tqdm import tqdm
  9 | 
 10 | import model
 11 | import options
 12 | import wsad_dataset
 13 | from test import test
 14 | from train import train
 15 | 
 16 | torch.set_default_dtype(torch.float32)
 17 | 
 18 | 
 19 | def setup_seed(seed):
 20 |     random.seed(seed)
 21 |     os.environ['PYTHONHASHSEED'] = str(seed)
 22 |     np.random.seed(seed)
 23 |     torch.manual_seed(seed)
 24 |     torch.cuda.manual_seed(seed)
 25 |     torch.cuda.manual_seed_all(seed)
 26 |     torch.backends.cudnn.benchmark = False
 27 |     torch.backends.cudnn.deterministic = True
 28 | 
 29 | 
 30 | import torch.optim as optim
 31 | 
 32 | if __name__ == '__main__':
 33 |     args = options.parser.parse_args()
 34 | 
 35 |     seed = args.seed
 36 |     print('=============seed: {}, pid: {}============='.format(seed, os.getpid()))
 37 |     setup_seed(seed)
 38 |     device = torch.device("cuda")
 39 |     dataset = getattr(wsad_dataset, args.dataset)(args)
 40 |     if 'Thumos' in args.dataset_name:
 41 |         max_map = [0] * 9
 42 |     else:
 43 |         max_map = [0] * 10
 44 |     max_uct_rank_acc = 0
 45 |     ckpt_folder_path = args.path_dataset + '/aaai23osr/ckpt/' + args.group_name
 46 |     if not os.path.exists(ckpt_folder_path):
 47 |         os.makedirs(ckpt_folder_path)
 48 |     print(args)
 49 |     model = getattr(model, args.use_model)(dataset.feature_size, dataset.num_class, opt=args).to(device)
 50 | 
 51 |     if args.pretrained_ckpt is not None:
 52 |         model.load_state_dict(torch.load(args.pretrained_ckpt))
 53 | 
 54 |     optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
 55 | 
 56 |     total_loss = 0
 57 |     lrs = [args.lr, args.lr / 5, args.lr / 5 / 5]
 58 |     print(model)
 59 |     for itr in tqdm(range(args.max_iter)):
 60 | 
 61 |         loss = train(itr, dataset, args, model, optimizer, device)
 62 |         total_loss += loss
 63 |         if itr % args.interval == 0 and not itr == 0:
 64 |             print('Iteration: %d, Loss: %.5f' % (itr, total_loss / args.interval))
 65 |             total_loss = 0
 66 |             # torch.save(model.state_dict(), ckpt_folder_path + '/last_' + args.model_name + '.pkl')
 67 |             torch.save(model.state_dict(), ckpt_folder_path + '/last_' + args.model_name + '.pkl')
 68 | 
 69 |             iou, dmap, uct_rank_acc = test(itr, dataset, args, model, device)
 70 | 
 71 |             if 'Thumos' in args.dataset_name:
 72 |                 map_update_cond = sum(dmap[:7]) > sum(max_map[:7])
 73 |             else:
 74 |                 map_update_cond = np.mean(dmap) > np.mean(max_map)
 75 |             uct_update_cond = uct_rank_acc > max_uct_rank_acc
 76 | 
 77 |             if args.main_evaluate_indicator == 'map':
 78 |                 ckpt_save_cond = map_update_cond
 79 |             elif args.main_evaluate_indicator == 'uct':
 80 |                 ckpt_save_cond = uct_update_cond
 81 |             else:
 82 |                 raise "Unknown indicator!"
 83 | 
 84 |             if ckpt_save_cond:
 85 |                 torch.save(model.state_dict(), ckpt_folder_path + '/best_' + args.model_name + '.pkl')
 86 |             if map_update_cond:
 87 |                 max_map = dmap
 88 |             if uct_update_cond:
 89 |                 max_uct_rank_acc = uct_rank_acc
 90 | 
 91 |             print(f'MAX uct_rank_acc: {max_uct_rank_acc:.3f}')
 92 | 
 93 |             print('----------------------------------------------------------------')
 94 |             print('For all classes (MAX):')
 95 |             print('||'.join(['MAX map @ {} = {:.3f} '.format(iou[i], max_map[i]) for i in range(len(iou))]))
 96 |             max_map = np.array(max_map)
 97 |             if 'Thumos' in args.dataset_name:
 98 |                 print('Max mAP Avg 0.1-0.5: {:.3f}, Max mAP Avg 0.1-0.7: {:.3f}, Max mAP Avg 0.1-0.9: {:.3f}'
 99 |                       .format(np.mean(max_map[:5]), np.mean(max_map[:7]), np.mean(max_map)))
100 |             print("------------------pid: {}--------------------".format(os.getpid()))
101 | 


--------------------------------------------------------------------------------
/options.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | parser = argparse.ArgumentParser(description='CO2-NET')
 4 | parser.add_argument('--path_dataset', type=str, default='/data_SSD1/cmy/CO2-THUMOS-14', help='the path of data feature')
 5 | # '/data_SDD3/mmc_mychen/CO2-THUMOS-14'
 6 | parser.add_argument('--lr', type=float, default=0.00005, help='learning rate (default: 0.0001)')
 7 | parser.add_argument('--batch_size', type=int, default=10, help='number of instances in a batch of data (default: 10)')
 8 | parser.add_argument('--model_name', default='default', help='name to save model')
 9 | parser.add_argument('--group_name', default='default', help='name to save model')
10 | parser.add_argument('--pretrained_ckpt', default=None, help='ckpt for pretrained model')
11 | parser.add_argument('--feature_size', default=2048, help='size of feature (default: 2048)')
12 | parser.add_argument('--num_class', type=int, default=20, help='number of classes (default: )')
13 | parser.add_argument('--dataset_name', default='Thumos14reduced', help='dataset to train on (default: )')
14 | parser.add_argument('--max_seqlen', type=int, default=320,
15 |                     help='maximum sequence length during training (default: 750)')
16 | parser.add_argument('--num_similar', default=3, type=int,
17 |                     help='number of similar pairs in a batch of data  (default: 3)')
18 | parser.add_argument('--seed', type=int, default=3552, help='random seed (default: 1)')
19 | parser.add_argument('--max_iter', type=int, default=5000, help='maximum iteration to train (default: 50000)')
20 | parser.add_argument('--feature_type', type=str, default='I3D',
21 |                     help='type of feature to be used I3D or UNT (default: I3D)')
22 | parser.add_argument('--use_model', type=str, help='model used to train the network')
23 | parser.add_argument('--interval', type=int, default=50, help='time interval of performing the test')
24 | parser.add_argument('--similar_size', type=int, default=2)
25 | 
26 | parser.add_argument('--weight_decay', type=float, default=5e-4)
27 | parser.add_argument('--dataset', type=str, default='SampleDataset')
28 | parser.add_argument('--proposal_method', type=str, default='multiple_threshold_hamnet')
29 | 
30 | # for proposal genration
31 | parser.add_argument('--scale', type=float, default=1)
32 | parser.add_argument("--feature_fps", type=int, default=25)
33 | parser.add_argument('--gamma-oic', type=float, default=0.2)
34 | 
35 | parser.add_argument('--k', type=float, default=7)
36 | # for testing time usage
37 | parser.add_argument("--topk2", type=float, default=10)
38 | parser.add_argument("--topk", type=float, default=60)
39 | 
40 | parser.add_argument('--dropout_ratio', type=float, default=0.7)
41 | parser.add_argument('--reduce_ratio', type=int, default=16)
42 | # for pooling kernel size calculate
43 | parser.add_argument('--t', type=int, default=5)
44 | 
45 | # -------------loss weight---------------
46 | parser.add_argument("--alpha1", type=float, default=0.8)
47 | parser.add_argument("--alpha2", type=float, default=0.8)
48 | parser.add_argument("--alpha3", type=float, default=1)
49 | parser.add_argument('--alpha4', type=float, default=1)
50 | 
51 | parser.add_argument("--AWM", type=str, default='BWA_fusion_dropout_feat_v2')
52 | 
53 | # --------------new arguments------------
54 | parser.add_argument('--alpha_cls', type=float, default=1)
55 | parser.add_argument("--n_known_class", type=int, default=15)
56 | parser.add_argument("--without_wandb", action='store_true')
57 | parser.add_argument("--split_idx", type=int, default=0)
58 | parser.add_argument("--main_evaluate_indicator", type=str, default='map')
59 | parser.add_argument('--k_edl', type=int, default=7)
60 | 
61 | # --------------arpl arguments------------
62 | parser.add_argument('--temp', type=float, default=1.0)
63 | parser.add_argument("--num_centers", type=int, default=2)
64 | parser.add_argument('--weight_pl', type=float, default=0.1)
65 | 
66 | # --------------balance parameters--------
67 | parser.add_argument('--alpha_ori_edl', type=float, default=1)
68 | parser.add_argument('--alpha_cali_edl', type=float, default=1)
69 | parser.add_argument('--alpha_pl', type=float, default=0.5)
70 | 
71 | parser.add_argument('--topk_test', action='store_true')
72 | 
73 | parser.add_argument('--test_ckpt', default=None, help='ckpt for testing')


--------------------------------------------------------------------------------
/eval/eval_anno_file_generation.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from joblib import Parallel, delayed
  3 | from scipy.signal import savgol_filter
  4 | import os
  5 | import numpy as np
  6 | import pandas as pd
  7 | 
  8 | def str2ind(categoryname, classlist):
  9 |     return [i for i in range(len(classlist)) if categoryname == classlist[i]][0]
 10 | 
 11 | 
 12 | def strlist2indlist(strlist, classlist):
 13 |     return [str2ind(s, classlist) for s in strlist]
 14 | 
 15 | def filter_segments(segment_predict, videonames, ambilist):
 16 |     ind = np.zeros(np.shape(segment_predict)[0])
 17 |     for i in range(np.shape(segment_predict)[0]):
 18 |         vn = videonames[int(segment_predict[i, 0])]
 19 |         for a in ambilist:
 20 |             if a[0] == vn:
 21 |                 gt = range(
 22 |                     int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16))
 23 |                 )
 24 |                 pd = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
 25 |                 IoU = float(len(set(gt).intersection(set(pd)))) / float(
 26 |                     len(set(gt).union(set(pd)))
 27 |                 )
 28 |                 if IoU > 0:
 29 |                     ind[i] = 1
 30 |     s = [
 31 |         segment_predict[i, :]
 32 |         for i in range(np.shape(segment_predict)[0])
 33 |         if ind[i] == 0
 34 |     ]
 35 |     return np.array(s)
 36 | 
 37 | def generate_single_ground_truth_file(annotation_path,args,subset,verbose,output_annotation_path):
 38 |     '''the content have to be stored:
 39 |     1. idx_to_take
 40 |     2. videoname
 41 |     3. ambilist
 42 |     4. ground_truth
 43 |     5. activity_index
 44 |     '''
 45 | 
 46 |     gtsegments = np.load(annotation_path + "/segments.npy", allow_pickle=True)
 47 |     gtlabels = np.load(annotation_path + "/labels.npy", allow_pickle=True)
 48 |     videoname = np.load(annotation_path + "/videoname.npy", allow_pickle=True)
 49 |     videoname = np.array([i.decode("utf8") for i in videoname])
 50 |     gt_subset = np.load(annotation_path + "/subset.npy", allow_pickle=True)
 51 |     gt_subset = np.array([s.decode("utf-8") for s in gt_subset])
 52 |     # classlist = np.load(annotation_path + "/classlist.npy", allow_pickle=True)
 53 |     # classlist = np.array([c.decode("utf-8") for c in classlist])
 54 |     # classlist = np.load("./new_classlist.npy", allow_pickle=True)
 55 |     classlist = args.classlist
 56 |     duration = np.load(annotation_path + "/duration.npy", allow_pickle=True)
 57 |     ambilist = annotation_path + "/Ambiguous_test.txt"
 58 | 
 59 |     try:
 60 |         ambilist = list(open(ambilist, "r"))
 61 |         ambilist = [a.strip("\n").split(" ") for a in ambilist]
 62 |     except:
 63 |         ambilist = []
 64 | 
 65 |     subset_ind = (subset == gt_subset)
 66 |     gtsegments = gtsegments[subset_ind]
 67 |     gtlabels = gtlabels[subset_ind]
 68 |     videoname = videoname[subset_ind]
 69 |     duration = duration[subset_ind]
 70 | 
 71 |     idx_to_take = [i for i, s in enumerate(gtsegments)
 72 |                        if len(s) > 0]
 73 | 
 74 |     gtsegments = gtsegments[idx_to_take]
 75 |     gtlabels = gtlabels[idx_to_take]
 76 |     videoname = videoname[idx_to_take]
 77 | 
 78 | 
 79 | 
 80 |     # which categories have temporal labels ?
 81 |     templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl])))
 82 | 
 83 |     # # the number index for those categories.
 84 |     templabelidx = []
 85 |     for t in templabelcategories:
 86 |         templabelidx.append(str2ind(t, classlist))
 87 | 
 88 | 
 89 |     video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], []
 90 | 
 91 |     for i in range(len(gtsegments)):
 92 |         for j in range(len(gtsegments[i])):
 93 |             video_lst.append(str(videoname[i]))
 94 |             t_start_lst.append(round(gtsegments[i][j][0] * 25 / 16))
 95 |             t_end_lst.append(round(gtsegments[i][j][1] * 25 / 16))
 96 |             label_lst.append(str2ind(gtlabels[i][j], classlist))
 97 |     ground_truth = pd.DataFrame(
 98 |         {
 99 |             "video-id": video_lst,
100 |             "t-start": t_start_lst,
101 |             "t-end": t_end_lst,
102 |             "label": label_lst,
103 |         }
104 |     )
105 |     activity_index = {i: templabelidx[i] for i in range(len(templabelidx))}
106 | 
107 |     # to store all these things into a single pkl file
108 |     stored_content={'idx_to_take':idx_to_take,'videoname':videoname,
109 |                     'ambilist':ambilist,'ground_truth':ground_truth,'activity_index':activity_index}
110 |     # store in the target path
111 |     np.save(output_annotation_path,stored_content)
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/test.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from collections import defaultdict
  4 | 
  5 | import numpy as np
  6 | import pandas as pd
  7 | import torch
  8 | from prettytable import PrettyTable
  9 | from torch.autograd import Variable
 10 | 
 11 | import model
 12 | import options
 13 | import proposal_methods as PM
 14 | import wsad_dataset
 15 | from eval.eval_detection import ANETdetection
 16 | 
 17 | torch.set_default_dtype(torch.float32)
 18 | 
 19 | 
 20 | def _get_predictions_with_label(prediction_by_label, cidx):
 21 |     """Get all predicitons of the given label. Return empty DataFrame if there
 22 |     is no predcitions with the given label.
 23 |     """
 24 |     try:
 25 |         return prediction_by_label.get_group(cidx).reset_index(drop=True)
 26 |     except:
 27 |         print("Warning: No predictions of label '%s' were provdied." % cidx)
 28 |         return pd.DataFrame()
 29 | 
 30 | def get_video_detections(args, tmp):
 31 |     proposal_list = []
 32 |     for i in range(tmp.shape[0]):
 33 |         tmp_proposal = {}
 34 |         tmp_proposal['label'] = args.classlist[int(tmp.loc[i]['label'])]
 35 |         tmp_proposal['score'] = float(tmp.loc[i]['score'])
 36 |         tmp_proposal['segment'] = [float(tmp.loc[i]['t-start'] / 1.5626), float(tmp.loc[i]['t-end'] / 1.5626)]
 37 |         tmp_proposal['uncertainty'] = float(tmp.loc[i]['uct'])
 38 |         tmp_proposal['actionness'] = float(tmp.loc[i]['act'])
 39 |         proposal_list.append(tmp_proposal)
 40 |     return proposal_list
 41 | 
 42 | 
 43 | @torch.no_grad()
 44 | def test(itr, dataset, args, model, device):
 45 |     model.eval()
 46 |     done = False
 47 |     if args.topk_test:
 48 |         topk_proposals_list = [[], [], [], [], [], []]
 49 |     else:
 50 |         topk_proposals_list = [[], ]
 51 |     results = defaultdict(dict)
 52 | 
 53 |     train_uct_list = []
 54 |     train_ori_uct_list = []
 55 |     while not done:
 56 |         features, labels, vn, done = dataset.load_data_for_threshold()
 57 |         seq_len = [features.shape[0]]
 58 |         if seq_len == 0:
 59 |             continue
 60 |         features = torch.from_numpy(features).float().to(device).unsqueeze(0)
 61 |         with torch.no_grad():
 62 |             outputs = model(Variable(features), is_training=True, seq_len=seq_len, itr=itr, opt=args, labels=None)
 63 |             this_uct = outputs['uct'][0].cpu().item()
 64 |             this_ori_uct = outputs['ori_uct'][0].cpu().item()
 65 |             train_uct_list.append(this_uct)
 66 |             train_ori_uct_list.append(this_ori_uct)
 67 |     train_uct_list = np.sort(np.array(train_uct_list), axis=0)
 68 |     train_ori_uct_list = np.sort(np.array(train_ori_uct_list), axis=0)
 69 |     thres = train_uct_list[int(0.95 * len(train_uct_list))]
 70 |     print(f"We select {thres:.4f} as the uncertainty threshold.")
 71 | 
 72 |     mu = train_ori_uct_list[int(0.5 * len(train_ori_uct_list))]
 73 |     mu_path = './temp/' + args.group_name + '/' + args.model_name
 74 |     if not os.path.exists(mu_path):
 75 |         os.makedirs(mu_path)
 76 |     np.save(os.path.join(mu_path, 'mu.npy'), mu)
 77 |     print(f"We select {mu:.4f} as the mean of the gaussian function.")
 78 | 
 79 |     n_correct = 0
 80 |     n_test_vid = 0
 81 |     test_uct_list = []
 82 |     done = False
 83 | 
 84 |     result_dict = {}
 85 |     while not done:
 86 |         n_test_vid += 1
 87 |         features, labels, vn, done = dataset.load_data(is_training=False)
 88 |         seq_len = [features.shape[0]]
 89 |         if seq_len == 0:
 90 |             continue
 91 |         features = torch.from_numpy(features).float().to(device).unsqueeze(0)
 92 |         with torch.no_grad():
 93 |             outputs = model(Variable(features), is_training=False, seq_len=seq_len, itr=itr, opt=args, labels=None)
 94 |             results[vn] = {'cas': outputs['cas'], 'attn': outputs['attn']}
 95 |             video_uct = outputs['uct'][0].cpu().item()
 96 |             prediction_list = getattr(PM, args.proposal_method)(vn, outputs, labels, args, thres)
 97 | 
 98 |             if video_uct <= thres and labels[:args.n_known_class].sum() > 0:
 99 |                 n_correct += 1
100 |             elif video_uct > thres and labels[args.n_known_class:].sum() > 0:
101 |                 n_correct += 1
102 |             else:
103 |                 n_correct += 0
104 | 
105 |         test_uct_list.append(video_uct)
106 |         for idx, prediction in enumerate(prediction_list):
107 |             topk_proposals_list[idx].append(prediction)
108 | 
109 |     if not os.path.exists('temp'):
110 |         os.mkdir('temp')
111 |     np.save('temp/{}.npy'.format(args.model_name), results)
112 | 
113 |     if 'Thumos14' in args.dataset_name:
114 |         iou = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
115 |         dmap_detect = ANETdetection(dataset.path_to_annotations, iou, args=args, verbose=True)
116 |     else:
117 |         iou = [0.5, 0.55, 0.60, 0.65, 0.70, 0.75, 0.80, 0.85, 0.90, 0.95]
118 |         dmap_detect = ANETdetection(dataset.path_to_annotations, iou, args=args, subset='validation', verbose=True)
119 | 
120 |     # video-id, t-start, t-end, label, score
121 |     table = PrettyTable(['k', 'split', 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, '0.1-0.5', '0.3-0.7', '0.1-0.7'])
122 | 
123 |     log_all_mAP = []
124 |     for idx, topk_proposals in enumerate(topk_proposals_list):
125 |         proposals = pd.concat(topk_proposals).reset_index(drop=True)
126 | 
127 |         dmap_detect.prediction = proposals
128 |         known_mAP, unknown_mAP = dmap_detect.evaluate()
129 | 
130 |         known_mAP *= 100
131 |         unknown_mAP *= 100
132 |         all_mAP = known_mAP * 0.5 + unknown_mAP * 0.5
133 |         mAP_list = [known_mAP, unknown_mAP, all_mAP]
134 |         for j, split in enumerate(['known', 'unknown', 'all']):
135 |             # if j == 2:
136 |             table.add_row([args.n_pred_list[idx], split] + list(np.around(mAP_list[j][:7], decimals=2)) +
137 |                           list(np.around([mAP_list[j][:5].mean(), mAP_list[j][2:7].mean(), mAP_list[j][:7].mean()], decimals=2))
138 |                           )
139 | 
140 |         if args.n_pred_list[idx] == 'unlimit':
141 |             log_known_mAP, log_unknown_mAP, log_all_mAP = mAP_list
142 | 
143 |     np.set_printoptions(precision=2, suppress=True)
144 |     print(table)
145 |     uct_rank_acc = n_correct / n_test_vid * 100
146 |     print(f'Accuracy of binary classification: {uct_rank_acc:.4f}%')
147 | 
148 |     return iou, log_all_mAP, uct_rank_acc
149 | 
150 | 
151 | if __name__ == '__main__':
152 |     args = options.parser.parse_args()
153 |     device = torch.device("cuda")
154 |     dataset = getattr(wsad_dataset, args.dataset)(args)
155 | 
156 |     model = getattr(model, args.use_model)(dataset.feature_size, dataset.num_class, opt=args).to(device)
157 |     model.load_state_dict(torch.load(args.test_ckpt))
158 |     iou, dmap, _ = test(-1, dataset, args, model, device)
159 | 


--------------------------------------------------------------------------------
/libMR/weibull.h:
--------------------------------------------------------------------------------
  1 | /*! \file
  2 |  * weibull.h provides the headers for the  core functionality for the internal computing weibull fittings, as well as CDF and INF given parameters 
  3 |  * this file is not intended for direc usage...
  4 |  *  
  5 |  * 
  6 |  * Author Brian Heflin bheflin  at securics com
  7 |  * Author Walter Scheirer walter at securics com
  8 |  * Author Terry Boult tboult  at securics com
  9 |  *
 10 |  * Copyright 2010, 2011, Securics Inc.
 11 |  *
 12 |  * @section LICENSE
 13 |  *  See accompanying LICENSE agreement for full details on rights.
 14 |  *
 15 |  * Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918
 16 |  *
 17 |  *The Government's rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Non-commercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract.  Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later.
 18 |  *
 19 |  * No restrictions on government use apply after the expiration date shown above.  Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings.
 20 |  *
 21 |  * @section Summary Description
 22 |  * This file contains the "C" interface functions for very basic Weibull usage for Meta-Recognition.   The weibull_fit and weibull_cdf are the primary functions to use. 
 23 |  *
 24 |  * The code herein has a number of STRONG assumptions you must follow as we cannot test for all of them which is why we don't recommend use it directly
 25 |  *   1) All fitting and testing are presuming  "larger is better",  If you are fitting something where smaller is better you need to transform it.
 26 |  *   2) All data is positive (okay we can and do test for that, but better to know up front what you are doing) 
 27 |  *   3) There must be sufficient range in your data to actually fit the Weibull.  If all the data is the same, or nearly the same, it may fail to converge and will report errors.
 28 |  *   4) For efficient fitting, we must satisfy a regularity condition (see N. M. Kiefer, Maximum likelihood estimation (MLE),  http://instruct1.cit.cornell.edu/courses/econ620/reviewm5.pdf, 2007),  and to do that the lower bound in Weibull data/fitting cannot be too small  so we recommend you translated the data to be well away from zero (part of why we only fit on upper side and, in the MetaRecognition class we translate more than just the min..)
 29 |  * 
 30 |  *
 31 |  */
 32 | 
 33 | #pragma once
 34 | #ifndef WEIBULL_H
 35 | #define WEIBULL_H
 36 | 
 37 | #ifdef _WIN32
 38 | #ifdef __cplusplus
 39 | extern "C" {
 40 | #endif
 41 | _declspec(dllexport) double weibull_inv(double x, double scale, double shape);
 42 | _declspec(dllexport) double weibull_cdf(double x, double scale, double shape);
 43 | _declspec(dllexport) int weibull_fit(double* weibull_parms, double* wparm_confidenceintervals, double* inputData, double alpha, int size);
 44 | _declspec(dllexport) void printWeibullBuildInfo(FILE *fh);
 45 | #ifdef __cplusplus
 46 | }
 47 | #endif
 48 | #else
 49 | #ifdef __cplusplus
 50 | extern "C" {
 51 | #endif
 52 | 
 53 |   /** if WEIBULL_USE_ASSERTS is defined, the code will use asserts to ensure its requirements are true, otherwise it returns error codes. Default is not defined */ 
 54 |   /** if WEIBULL_IGNORE_ERRORS is defined, the code will just presume things will work out and not waste time on testing for error. Default is not defined */ 
 55 | 
 56 | 
 57 |   /*#define WEIBULL_USE_ASSERTS  //!< \def define this to force asserts rather than error codes. */
 58 |   /*#define WEIBULL_IGNORE_ERRORS //!< \def defien this to skip printing/return code for errors */
 59 | 
 60 | 
 61 |   /**  weibull_cdf computes the probability (given our assumptions) that the value x is an outlier ABOVE the fit distribution.  if the distribution was non-match data, then it provides this probability that x is a match score.   If data was match-data then it would be the probability of it being a larger non-match. 
 62 |   computes @f[ 1-e^{{\frac{x}{scale}}^{shape}} @f]
 63 | 
 64 |   @param x  the location at which to compute the probability of being an outlier
 65 |   @param scale the scale parmaeter of the weibull.  This is the first element in weibull_parms (as computed by our wlbfit) 
 66 |   @param shape the scale parmaeter of the weibull.  This is the first second in weibull_parms (as computed by our wlbfit) 
 67 |   @return if in the range [0-1] it is the probability of X being an outlier.  Any value < 0 is an error code.  returns -1 for invalid scale <=0 ,  -2 for invalid shape <=0 
 68 |   *
 69 |   */ 
 70 | double weibull_cdf(double x, double scale, double shape);
 71 | 
 72 | 
 73 |   /**  weibull_inv computes the inverse weibull, i.e. returns the score S (given our assumptions) such that x=wlbcdf(s,scale,shape). Note it estimates from above, so if x=1.0 expect an answer of Inf (infinity). 
 74 | 
 75 |   @param x  the location at which you compute the inverse (must be between [0,1]
 76 |   @param scale the scale parmaeter of the weibull.  This is the first element in weibull_parms (as computed by our wlbfit) 
 77 |   @param shape the scale parmaeter of the weibull.  This is the first second in weibull_parms (as computed by our wlbfit) 
 78 |   @return if X in the range [0-1], return S such that x=wlbcdf(s,scale,shape).  The return value is in the range [0,Inf].  Any return value < 0 is an error code.  returns -1 for invalid scale <=0 ,  -2 for invalid shape <=0  -3 for  X<0, -4 for x >1
 79 |   *
 80 |   */ 
 81 | double weibull_inv(double x, double scale, double shape);
 82 | 
 83 |   /**
 84 |      weibull_fit does a maximum likelihood fitting to estimate the shape and scale parameters of a weibull probability distributon  @f[ \frac{shape}{scale} \left(\frac{x}{scale} \cdot e^{-{\left(\frac{x}{scale}\right)}^{shape}}\right)@f]     
 85 |      
 86 |   @param weibull_parms is an array of 2 doubles, which must be preallocated.  On successful completeion it will have shape and scale respectively.
 87 |   @param wparm_confidenceintervals is an array of 4 doubles, which must be preallocated.  On successful completeion it will have confidence interval for shape in the first two item and the CI for scale in the second two items
 88 |   @param inputData is a pointer the data to use for fitting the distribution. It must have at least size elements
 89 |   @param size is the size of the data to be used for fitting.
 90 |   @param alpha is parameter for Confidence interval size estimation. 
 91 |   @return return should be  1 if all went well. Values < 0 imply errors in fitting or data.  -1 means some data was negative, -2 means bad data range (e.g. all the same)  -3 or lower means MLE did not converge.
 92 | 
 93 |    */
 94 | int weibull_fit(double* weibullparms, double* wparm_confidenceintervals, double* inputData, double alpha, int size);
 95 | 
 96 | 
 97 |   /**
 98 |      Print information about this build to a file descriptor.  Used for checking what is loaded for supporting people
 99 |   */
100 | void printWeibullBuildInfo(FILE *fh); 
101 | #ifdef __cplusplus
102 | }
103 | #endif
104 | 
105 | #endif
106 | #endif
107 | 


--------------------------------------------------------------------------------
/libMR/MetaRecognition.h:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * MetaRecognition.h: 
  3 | 
  4 |  * @Author Terry Boult tboult at securics com
  5 |  * @Author Vijay Iyer viyer at securics com
  6 | 
  7 |  *
  8 |  * Copyright 2010, 2011, Securics Inc.
  9 | 
 10 |  * Copyright 2011, Securics Inc.
 11 |    See accompanying LICENSE agrement for details on rights.
 12 | 
 13 | Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918
 14 | 
 15 | The Government's rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Noncommercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract.  Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later.
 16 | 
 17 | No restrictions on government use apply after the expiration date shown above.  Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings.
 18 |  *
 19 | */
 20 | 
 21 | #pragma once
 22 | #ifndef MetaRecognition_H
 23 | #define MetaRecognition_H
 24 | 
 25 | 
 26 | #ifdef HAVE_CONFIG_H
 27 | # include "config.h"
 28 | #endif
 29 | 
 30 | #include <stdio.h>
 31 | #include <istream>
 32 | #include <ostream>
 33 | #include <iostream>
 34 | #include <sstream>
 35 | #include <string>
 36 | 
 37 | 
 38 | #include "weibull.h"
 39 | 
 40 | #ifdef _WIN32
 41 | #define DLLEXPORT _declspec(dllexport)
 42 | #else 
 43 | #define DLLEXPORT
 44 | #endif
 45 | 
 46 | #define MAX_LINE 256
 47 | 
 48 | /// structure for svm data used by libSVM, used to  allow easy MetaRecognition for SVM results (used as an argument for MetaRecogniton::fitSVM)
 49 | struct svm_node_libsvm
 50 | {
 51 |   int index;  //!< class label,  classic is -1 for negative class add +1 for positive class, but can be general its for multi-class
 52 |   double value;//!< the SVM decision score
 53 | };
 54 | 
 55 | /**!
 56 |   Class MetaRecognition provides a object-based interface for Meta-Recognition.  The object can be ...
 57 |  
 58 |   TBD
 59 | 
 60 | 
 61 | */
 62 | class DLLEXPORT MetaRecognition //!  Primary object/methods for tranforming and computing needed for any Meta recogntion task 
 63 | {
 64 | public:
 65 | 
 66 | /**  Ctor,  can call with no arguments (uses default arguments for construciton).  
 67 |      All space is on the stack. 
 68 |      Object will exist but is not valid until some fitting fucntion is called 
 69 | */
 70 | 
 71 |   MetaRecognition( int scores_to_drop=0,   //!< is this object for prediction, if so  how many top scores to drop when fitting
 72 |                    int fitting_size=9,     //!< tail size for fitting.  With small data the defaults are fine.. if you have millions make it larger for better predictions 
 73 |                    bool verbose = false,    //!< is the code chatty on errors during fitting, useful for debugging
 74 |                    double alpha=5.0,        //!< band for confidence interfals
 75 |                    int translate_amount=10000 //!< shifting data to ensure all is positive.. if data is very broad and you want some probabilities for all points you can make it larger.. 
 76 |                    );
 77 | 
 78 | 	~MetaRecognition();
 79 | 
 80 |         bool is_valid(); //!< is this object valid..i.e. has data been properly fit to determine parameters.
 81 |         void set_translate(double t); //!< Change translate_amount to x, invalidates object
 82 | 
 83 |         void Reset(); //!< reset to "invalid" state
 84 | 
 85 | 	bool Predict_Match(double x, double threshold = .9999999);     //!< Is X from the "match" distribution (i.e. we reject null hypothesis of non-match), 
 86 | 	double W_score(double x); //!< This is the commonly used function.. after fitting, it returns the probability of the given score being "correct".  It is the same as CDF
 87 | 	double CDF(double x);     //!< This is the cummumlative probablity of match being corrrect (or more precisely the probility the score (after transform) being an outlier for the distribution, which given the transforms applied, so bigger is better, this is the probablity the score is correct. 
 88 | 	double Inv(double p);     //!< This is score for which one would obtain CDF probability p (i.e. x such that p = CDF(x))
 89 | 
 90 | 	int ReNormalize(double *invec, double *outvec, int length);     //!< W-score Renormalize the vecotor invec[0:length-1] into outvec (in and out can be same) return is 1 for success, <0 for error code
 91 | 
 92 | 
 93 |         /// Use FitHight if your data is such that is larger is better.  The code will still transform, and keep parmeters to keep small data away from zero.  
 94 |         // If you get scores that are complain about it being negative, make a MR object with different (larger) translate amount
 95 |         /// returns 1 for success, <0 for error code
 96 | 	int FitHigh(double* inputData, int inputDataSize,  int fit_size=-1); 
 97 | 
 98 |         ///Use FitLow if your data is such that smaller scores are better.. we'll transform it for you and keep the
 99 |         ///transform parameters in the class so later calls to W_score or CDF do the right thing.  
100 |         /// returns 1 for success, <0 for error code
101 | 	int FitLow(double* inputData, int inputDataSize,  int fit_size=-1);// 
102 | 
103 |         /// the types of fitting supported for SVM modeling 
104 |         typedef enum  {complement_reject=1, positive_reject=2, complement_model=3, positive_model=4} MR_fitting_type; 
105 | 
106 |         /// The function to use if you have SVM data, it separated out the data for the label of interst (or rejecting
107 |         /// the complement of that label, which is the default) and uses that for fitting.  
108 |         /// Returns 1 if it worked, <0 for error codes. 
109 |         int FitSVM(svm_node_libsvm* SVMdata, int inputDataSize, int label_of_interest =1, bool label_has_positive_score=true, int fit_type = 1, int fit_size=9 ); 
110 | 
111 | 
112 |         friend std::ostream& operator<<( std::ostream&, const MetaRecognition& );         //!< various I/O functions
113 |         friend std::istream& operator>>( std::istream&, MetaRecognition& );        //!< various I/O functions
114 | 
115 | 	void Save(std::ostream &outputStream) const;         //!< various I/O functions
116 | 	void Load(std::istream &inputStream);        //!< various I/O functions
117 | 	void Save(FILE *outputFile) const;        //!< various I/O functions
118 | 	void Load(FILE *inputFile);        //!< various I/O functions
119 | 	void Save(char* filename) const;        //!< various I/O functions
120 | 	void Load(char* filename);        //!< various I/O functions
121 |         int get_fitting_size();  //!<  Get get_fitting_size (aka tail size)
122 |         int set_fitting_size(int nsize);  //!<  reset object and define new fitting size
123 |         int get_translate_amount();  //!<  Get get_internal tranlation amount (you probably don't need this, but just in case)
124 |         int set_translate_amount(int ntrans);  //!<  reset object and define new translate amount.. if you get errors because of negative data, increase this
125 |         int get_sign();   //!<  Get get_internal sign variable. (you probably don't need this, but just in case)
126 |         int set_sign(int nsign); //!<  reset object and  set sign  (you probably don't need this, but just in case)
127 |         double get_small_score();   //!<  Get get_internal smaller translation amount (you probably don't need this, but just in case)
128 |         double set_small_score(double nscore); //!<  reset object and  reset internal smaller translation amount (you probably don't need this, but just in case)
129 |         bool verbose;  //!<  do we print internal/debugging stuff.  Default is false. (you probably don't need this, but just in case)
130 |         std::string to_string(); //!< Convert this object to a C++ string
131 |         void from_string(std::string in); //!< Convert this object from a C++ string
132 | 
133 | protected:
134 |         int EvtGeneric(double* inputData, int inputDataSize, int fit_inward=0, double x=0);
135 | 	double parmhat[2];          //!<  parameters of the Weibull,  scale then shape
136 | 	double parmci[4];    //!< confidence interval for parms  scale high, scale low, shape high, shape low
137 | 	double alpha;  //!< parameter for estimation of size of confidence interval
138 | 	int sign;   //!< sign is postive is larger is better,  negative means orginally smaller was better (we transformed for fitting).
139 |         MR_fitting_type ftype;  //!< type of fitting used for SVM.. default is reject complement
140 | 	int fitting_size;   //!< tail size for fitting in any of the FitXX functions
141 | 	int translate_amount; //!< we transform data so all fittng data data is positive and bigger is better, this predefined constant helps ensure more of the end-user data is non-negative.  
142 | 	double small_score;   //!< the smallest score, so all fitting data is consistently postive. part of our transform
143 | 	int scores_to_drop; //!< when fitting for recognition prediction, how many top score are hypothesized to be a match, so we can fit on non-match data.  Only used in for fitting, no impact on transform. 
144 |         bool isvalid; //!< is the parameters in the object valid. private:
145 | 
146 | };
147 | 
148 | #endif
149 | 


--------------------------------------------------------------------------------
/proposal_methods.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import torch
  4 | from scipy.signal import savgol_filter
  5 | 
  6 | import options
  7 | import utils.wsad_utils as utils
  8 | import math
  9 | from edl_loss import exp_evidence
 10 | 
 11 | args = options.parser.parse_args()
 12 | 
 13 | def filter_segments(segment_predict, vn):
 14 |     ambilist = args.path_dataset + '/Thumos14reduced-Annotations/Ambiguous_test.txt'
 15 |     try:
 16 |         ambilist = list(open(ambilist, "r"))
 17 |         ambilist = [a.strip("\n").split(" ") for a in ambilist]
 18 |     except:
 19 |         ambilist = []
 20 |     ind = np.zeros(np.shape(segment_predict)[0])
 21 |     for i in range(np.shape(segment_predict)[0]):
 22 |         for a in ambilist:
 23 |             if a[0] == vn:
 24 |                 gt = range(
 25 |                     int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16))
 26 |                 )
 27 |                 pd = range(int(segment_predict[i][0]), int(segment_predict[i][1]))
 28 |                 IoU = float(len(set(gt).intersection(set(pd)))) / float(
 29 |                     len(set(gt).union(set(pd)))
 30 |                 )
 31 |                 if IoU > 0:
 32 |                     ind[i] = 1
 33 |     s = [
 34 |         segment_predict[i, :]
 35 |         for i in range(np.shape(segment_predict)[0])
 36 |         if ind[i] == 0
 37 |     ]
 38 |     return np.array(s)
 39 | 
 40 | 
 41 | def smooth(v, order=2, lens=200):
 42 |     l = min(lens, len(v))
 43 |     l = l - (1 - l % 2)
 44 |     if len(v) <= order:
 45 |         return v
 46 |     return savgol_filter(v, l, order)
 47 | 
 48 | 
 49 | def get_topk_mean(x, k, axis=0):
 50 |     return np.mean(np.sort(x, axis=axis)[-int(k):, :], axis=0)
 51 | 
 52 | 
 53 | def get_cls_score(element_cls, rat=20):
 54 |     topk_val, _ = torch.topk(element_cls, k=max(1, int(element_cls.shape[-2] // rat)), dim=-2)
 55 |     instance_logits = torch.mean(topk_val, dim=-2)
 56 |     pred_vid_score = torch.softmax(instance_logits, dim=-1)[..., :-1].squeeze().data.cpu().numpy()
 57 |     return pred_vid_score
 58 | 
 59 | 
 60 | def __vector_minmax_norm(vector, min_val=None, max_val=None):
 61 |     if min_val is None or max_val is None:
 62 |         max_val = np.max(vector)
 63 |         min_val = np.min(vector)
 64 | 
 65 |     delta = max_val - min_val
 66 |     # delta[delta <= 0] = 1
 67 |     ret = (vector - min_val) / delta
 68 | 
 69 |     return ret
 70 | 
 71 | 
 72 | def _multiply(x, atn, dim=-1, include_min=False):
 73 |     if include_min:
 74 |         _min = x.min(dim=dim, keepdim=True)[0]
 75 |     else:
 76 |         _min = 0
 77 |     return atn * (x - _min) + _min
 78 | 
 79 | 
 80 | def sigmoid(x, thres_uct_list, max_score_class):
 81 |     # x = (x - 0.25) / (0.75 - 0.25)
 82 |     x = x - thres_uct_list[max_score_class] + thres_uct_list.mean()
 83 |     return 1 / (1 + torch.exp(-16 * (x - 0.45)))
 84 | 
 85 | 
 86 | @torch.no_grad()
 87 | def multiple_threshold_hamnet(vid_name, data_dict, labels, args, thres):
 88 |     labels = torch.tensor(labels)
 89 |     open_labels = torch.zeros(args.n_known_class + 1)
 90 |     open_labels[:args.n_known_class] = labels[:args.n_known_class]
 91 |     if labels[args.n_known_class:].sum() > 0:
 92 |         open_labels[-1] = 1
 93 | 
 94 |     cas = data_dict['cas']
 95 |     atn = data_dict['attn']
 96 |     video_uct = data_dict['uct'][0].cpu().item()
 97 |     # video_uct = obtain_uct(args, data_dict)
 98 | 
 99 |     element_logits = cas * atn
100 | 
101 |     pred_vid_score = get_cls_score(element_logits, rat=10)
102 | 
103 |     pred_vid_score = np.concatenate((pred_vid_score, np.array([video_uct])))
104 |     cas_supp = element_logits[..., :-1]
105 | 
106 |     known_flag = True
107 |     if video_uct <= thres:  # uct小于阈值，只有已知类
108 |         unknown_flag = False
109 |         pred = np.where(pred_vid_score[:-1] >= 0.2)[0]
110 |         if len(pred) == 0:
111 |             pred = np.array([np.argmax(pred_vid_score[:-1])])
112 |     else:  # uct大于阈值，有未知类
113 |         unknown_flag = True
114 |         pred = np.where(pred_vid_score[:-1] >= 0.5)[0]
115 |         if len(pred) == 0:
116 |             known_flag = False
117 |         pred = np.concatenate([pred, np.array([args.n_known_class])])
118 | # -----------------------------------------------------------------------
119 | 
120 |     num_segments = cas.shape[1]
121 | 
122 |     cas_pred_atn = atn[0].cpu().numpy()[:, [0]]
123 |     cas_pred_atn = np.reshape(cas_pred_atn, (num_segments, -1, 1))
124 |     if known_flag and not unknown_flag:
125 |         cas_pred = cas_supp[0].cpu().numpy()[:, pred]
126 |         cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))
127 |     elif not known_flag and unknown_flag:
128 |         cas_pred = cas_pred_atn
129 |     elif known_flag and unknown_flag:
130 |         cas_pred = cas_supp[0].cpu().numpy()[:, pred[:-1]]
131 |         cas_pred = np.reshape(cas_pred, (num_segments, -1, 1))
132 |         cas_pred = np.hstack((cas_pred, cas_pred_atn))
133 |     else:
134 |         raise "Error"
135 | 
136 |     # NOTE: threshold
137 |     act_thresh = np.linspace(0.1, 0.9, 10)
138 | 
139 |     proposal_dict = {}
140 | 
141 |     for i in range(len(act_thresh)):
142 |         cas_temp = cas_pred.copy()
143 |         cas_temp_atn = cas_pred_atn.copy()
144 |         seg_list = []
145 |         for c in range(len(pred)):
146 |             pos = np.where(cas_temp_atn[:, 0, 0] > act_thresh[i])
147 |             seg_list.append(pos)
148 | 
149 |         proposals = utils.get_proposal_oic_2(seg_list,
150 |                                              cas_temp,
151 |                                              pred_vid_score,
152 |                                              pred,
153 |                                              gamma=args.gamma_oic)
154 | 
155 |         for j in range(len(proposals)):
156 |             class_id = proposals[j][0][0]
157 | 
158 |             if class_id not in proposal_dict.keys():
159 |                 proposal_dict[class_id] = []
160 | 
161 |             proposal_dict[class_id] += proposals[j]
162 | 
163 |     final_proposals = []
164 |     for class_id in proposal_dict.keys():
165 |         final_proposals.append(
166 |             utils.soft_nms(proposal_dict[class_id], 0.7, sigma=0.3))
167 | 
168 |     # [c_pred[i], c_score, t_start, t_end]
169 |     segment_predict = []
170 |     for i in range(len(final_proposals)):
171 |         for j in range(len(final_proposals[i])):
172 |             [c_pred, c_score, t_start, t_end] = final_proposals[i][j]
173 |             segment_predict.append([t_start, t_end, c_score, c_pred])
174 | 
175 |     segment_predict = np.array(segment_predict)
176 |     segment_predict = filter_segments(segment_predict, vid_name.decode())
177 | 
178 |     video_lst, t_start_lst, t_end_lst = [], [], []
179 |     label_lst, score_lst = [], []
180 |     uct_lst, act_lst = [], []
181 |     for i in range(np.shape(segment_predict)[0]):
182 |         video_lst.append(vid_name.decode())
183 |         t_start_lst.append(segment_predict[i, 0])
184 |         t_end_lst.append(segment_predict[i, 1])
185 |         score_lst.append(segment_predict[i, 2])
186 |         label_lst.append(segment_predict[i, 3])
187 |     prediction = pd.DataFrame(
188 |         {
189 |             "video-id": video_lst,
190 |             "t-start": t_start_lst,
191 |             "t-end": t_end_lst,
192 |             "label": label_lst,
193 |             "score": score_lst,
194 |         }
195 |     )
196 | 
197 |     if not args.topk_test:
198 |         args.n_pred_list = ['unlimit']
199 |         return [prediction]
200 |     else:
201 |         args.n_pred_list = [5, 10, 20, 50, 100, 'unlimit']
202 |         if prediction.empty:
203 |             return [prediction] * 6
204 | 
205 |         topk_prediction_list = []
206 |         for n_pred in args.n_pred_list[:-1]:
207 |             if known_flag and not unknown_flag:  # 只有已知类
208 |                 n_known_pred = int(n_pred / pred.shape[0])
209 |                 n_unknown_pred = 0
210 |             elif not known_flag and unknown_flag:  # 只有未知类
211 |                 n_known_pred = 0
212 |                 n_unknown_pred = n_pred
213 |             elif known_flag and unknown_flag:  # 同时存在
214 |                 n_known_pred = int(n_pred * 0.5 / pred.shape[0])
215 |                 n_unknown_pred = n_pred - n_known_pred
216 |             else:
217 |                 raise "Error"
218 | 
219 |             all_class_topk_proposal = []
220 |             prediction_by_label = prediction.groupby("label")
221 |             for i, cidx in enumerate(pred):
222 |                 one_class_prediction = _get_predictions_with_label(prediction_by_label, cidx)
223 |                 sort_idx = one_class_prediction["score"].values.argsort()[::-1]  # idx from high to low
224 |                 one_class_prediction = one_class_prediction.loc[sort_idx].reset_index(drop=True)  # value from high to low
225 |                 if cidx < args.n_known_class:
226 |                     k = n_known_pred
227 |                 elif cidx == args.n_known_class:
228 |                     k = n_unknown_pred
229 |                 else:
230 |                     raise ValueError
231 |                 one_class_topk_proposal = one_class_prediction[: k]
232 |                 all_class_topk_proposal.append(one_class_topk_proposal)
233 |             topk_prediction = pd.concat(all_class_topk_proposal).reset_index(drop=True)
234 | 
235 |             topk_prediction_list.append(topk_prediction)
236 |         topk_prediction_list.append(prediction)
237 | 
238 |     return topk_prediction_list
239 | 
240 | 
241 | def _get_predictions_with_label(prediction_by_label, cidx):
242 |     """Get all predicitons of the given label. Return empty DataFrame if there
243 |     is no predcitions with the given label.
244 |     """
245 |     return prediction_by_label.get_group(cidx).reset_index(drop=True)
246 | 


--------------------------------------------------------------------------------
/libMR/COPYRIGHT_Libmr.txt:
--------------------------------------------------------------------------------
 1 | SOURCE CODE LICENSE AGREEMENT
 2 | PREAMBLE
 3 | 
 4 | This SOFTWARE implements concepts of statistical Meta-recognition for which Securics/Univ. of Colorado have a pending patent (CU TTO File CU2338C). Securics and University of Colorado, hereafter The Owners, have joint interest in the invention and the software. Securics currently holds the excusive license commercial to both the patent and the code. Securcs, hereafter the Licensor, is offering a non-exclusive right to use for non-commecial use.
 5 | This license agreement allows you to use the source code for personal or non profit purposes. This includes any use that does not involve making money, and does not include uses like:
 6 | • deploying the software for use by a for-profit organization
 7 | • providing a service to a paying customer
 8 | For-profit companies may not use this source code. If you work for a for-profit company, you may only use this software as an individual, for your personal use.
 9 | 
10 | This is a right to use license. It does not not include the right to redistribute copies. Non-profit users can only use the copies obtained from authroized sourcehs which include: securics.com, metarecognition.com or vast.uccs.edu.
11 | 
12 | This license agreement also allows you to create derivative products for your own use, but does not permit re-distribute
13 | of modified code in any form. You may choose to destribute patch files, which can be applied to officially distributed code. Any the derivative products, must be distributed under the same conditions as specified in this agreement unless a separate commercial license is obtained from Securics Inc or its designates.
14 | 
15 | As a condition of using this source code, you agree not to assert any patents or copyrights against the owners or any of the Owners’ licensees for use of derivative products. Any derivative products must include a copy of license and instructions for accessing the orignal source. You must also include attribution to the authors in any publication that results from the use of this code or data derived from the code. Any papers/research/report based on results that uses this software must cite:
16 | 
17 | @article{Scheirer_2011_TPAMI,
18 | author = {Walter J. Scheirer and Anderson Rocha and Ross Michaels and Terrance E. Boult},
19 | title = {Meta-Recognition: The Theory and Practice of Recognition Score Analysis},
20 | journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (PAMI)},
21 | volume = {33},
22 | issue = {8},
23 | pages = {1689–1695},
24 | year = {2011}
25 | }
26 | Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918
27 | 
28 | The Government’s rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Noncommercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract. Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later.
29 | 
30 | No restrictions on government use apply after the expiration date shown above. Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings.
31 | 
32 | This license includes other conditions that should be read carefully. This SOFWARE usage agreement (the “Agreement”) applies to the libMR and is between YOU and the Licensor
33 | 
34 | 1. DEFINITIONS
35 | 
36 | “Software” means all or any portion of the human-readable source code files of the software programs including without limitation, associated flow charts, algorithms, comments and other written instructions and technical documentation, and all corrections, updates, and new versions incorporated into such programs.
37 | 
38 | “Derivative Work” means a work based upon the Software, such as a revision, modification, translation, abridgement, condensation, expansion, collection, compilation, or any other form in which the Software may be recast, transformed, adapted, or distributed as a part of a larger work and which, if prepared without proper authorization would constitute a copyright infringement. If identifiable sections of that work are not derived from the Software, and can be reasonably considered independent and separate works in themselves, then they are not considered Derivative Work.
39 | 
40 | “Personal Use” means use of Software and/or Derivative Work by an individual solely for his or her personal, private and non-commercial use. An individual’s use in his or her capacity as an officer, employee, member, independent contractor or agent of a corporation, business or organization does not qualify as Personal Use.
41 | 
42 | “You” or “Your” means an individual or a legal entity exercising rights under this License. For legal entities, “You” or “Your” includes any non-profit entity which controls, is controlled by, or is under common control with, You, where “control” means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of fifty percent (50%) or more of the beneficial ownership of such entity.
43 | 
44 | 2. GRANT OF LICENSE:
45 | 
46 | WHEREAS, the Licensor, desires to aid the academic and non-commercial research community and raise awareness of the PATENTED INVENTION and thereby agrees to grant a limited copyright license to the SOFTWARE for research and non-commercial purposes only, with the Owners retaining all ownership rights in the PATENTED INVENTION and the SOFTWARE;
47 | 
48 | THEREFORE:
49 | The Licensor grants, and You accept, a personal, nonexclusive, nontransferable license:
50 | 
51 | a) to use Software, at no charge, in accordance with the terms herein, solely for (i) Personal Use, or (ii) academic or non-commercial research, development and deployment; and
52 | 
53 | b) to develop Derivative Works that may be used solely for (i) Personal Use or (ii) academic or non-commercial research, development and deployment; and
54 | 
55 | c) to copy, distribute and sublicense Software and Derivative Works solely in accordance with the terms herein. Any Software or Derivative Works distributed shall be pursuant to a license agreement that contains all of the terms herein; and shall contain prominent notices stating how the Software, Derivative Works, or documentation was changed, the author and date of any such change and require acknowledgement of the orginal software/publicaitons by any users of the Derivative Works.
56 | 
57 | d) You acknowledge that the Software is a valuable, proprietary asset of The Owners. You shall not market or sell the Software or Derivative Works.
58 | 
59 | 3. LICENSE EXCLUSIONS
60 | 
61 | a) EXCEPT AS EXPRESSLY PROVIDED HEREIN, YOU SHALL MAKE NO OTHER USE OF THE SOFTWARE.
62 | 
63 | b) You must obtain permission from The Licensor before receiving payment for distribution of or services using the Software or Derivative Works.
64 | 
65 | c) You shall not allege or enjoin infringement or misappropriation by The Licensor in any Derivative Works, or by any third party obtaining Derivative Works, prepared by The Licensor and under license from The Licensor.
66 | 4. TITLE AND PROTECTION OF SOFTWARE
67 | 
68 | a) The Owners retains all title, right and interest to the Software and the underlying patents.
69 | 
70 | b) Except for the Software, You retain all title, right and interest to the Derivative Works, subject to the terms of this Agreement.
71 | 
72 | 5. NO REPRESENTATIONS
73 | 
74 | THE OWNERS DISCLAIMS ALL OTHER REPRESENTATIONS OR WARRANTIES, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
75 | 6. ATTRIBUTION
76 | 
77 | a) You agree to retain and reproduce in all copies of Software the copyright and other proprietary notices and disclaimers as they appear in the Software, and keep intact all notices in the Software that refer to this License.
78 | 
79 | b) You agree to provide attribution to the authors of this Software in any article based on research performed using Software or Derivative Works or with any distribution of Software or Derivative works.
80 | 
81 | 7. DEFAULT
82 | 
83 | If YOU fail to perform any of its obligations under this Agreement, The Licensor, in addition to any other rights available to it under law or equity, may terminate this Agreement and the licenses granted hereunder by written notice to You. Unless otherwise provided in this Agreement, remedies shall be cumulative and there shall be no obligation to exercise a particular remedy.
84 | 
85 | 8. TERMINATION
86 | 
87 | a) In the event that this Agreement is terminated, any sublicenses granted or Derivative Works distributed by Licensee shall remain in full force and effect.
88 | 
89 | b) Within thirty (30) days of termination, You shall return to The Licensor or certify in writing to The Licensor that all copies or partial copies of Software in Your possession or control have been destroyed. c) In addition to this section, the sections entitled “Title and Protection of Software “No Representations” and “Limitation of Liability” shall survive termination of this Agreement.
90 | 
91 | 9. GENERAL
92 | a) No agency, partnership or employment is created by this Agreement.
93 | 
94 | b) You may not use any of The Owners’ names, the terms in Meta-Recognition, or W-score in any advertising, public relations or media release without the prior written consent of the Owner.
95 | 
96 | c) This Agreement shall be governed by the laws of the State of Colorado. Venue for any action or proceeding shall be Denver, Colorado. This Agreement constitutes the entire agreement between the parties and may only be modified by a written instrument signed by each parties authorized officers.
97 | 
98 | If you accept this license please opt-in for and you will receive email with instructions. The email will also be used for update emails on future changes to the code.
99 | 


--------------------------------------------------------------------------------
/edl_loss.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn.functional as F
  3 | 
  4 | from base import BaseWeightedLoss
  5 | 
  6 | 
  7 | def relu_evidence(y):
  8 |     return F.relu(y)
  9 | 
 10 | 
 11 | def exp_evidence(y):
 12 |     return torch.exp(torch.clamp(y, -10, 10))
 13 | 
 14 | 
 15 | def softplus_evidence(y):
 16 |     return F.softplus(y)
 17 | 
 18 | 
 19 | class EvidenceLoss(BaseWeightedLoss):
 20 |     """Evidential MSE Loss."""
 21 | 
 22 |     def __init__(self, num_classes,
 23 |                  evidence='relu',
 24 |                  loss_type='log',
 25 |                  with_kldiv=False,
 26 |                  with_annealing=False,
 27 |                  disentangle=False,
 28 |                  annealing_method='step',
 29 |                  annealing_start=0.01,
 30 |                  annealing_step=10,
 31 |                  redl_lamb=1.0):
 32 |         super().__init__()
 33 |         self.num_classes = num_classes
 34 |         self.evidence = evidence
 35 |         self.loss_type = loss_type
 36 |         self.with_kldiv = with_kldiv
 37 |         self.with_annealing = with_annealing
 38 |         self.disentangle = disentangle
 39 |         self.annealing_method = annealing_method
 40 |         self.annealing_start = annealing_start
 41 |         self.annealing_step = annealing_step
 42 |         self.eps = 1e-10
 43 |         self.redl_lamb = redl_lamb
 44 | 
 45 |     def kl_divergence(self, alpha):
 46 |         beta = torch.ones([1, self.num_classes], dtype=torch.float32).to(alpha.device)
 47 |         S_alpha = torch.sum(alpha, dim=1, keepdim=True)
 48 |         S_beta = torch.sum(beta, dim=1, keepdim=True)
 49 |         lnB = torch.lgamma(S_alpha) - \
 50 |               torch.sum(torch.lgamma(alpha), dim=1, keepdim=True)
 51 |         lnB_uni = torch.sum(torch.lgamma(beta), dim=1,
 52 |                             keepdim=True) - torch.lgamma(S_beta)
 53 | 
 54 |         dg0 = torch.digamma(S_alpha)
 55 |         dg1 = torch.digamma(alpha)
 56 | 
 57 |         kl = torch.sum((alpha - beta) * (dg1 - dg0), dim=1,
 58 |                        keepdim=True) + lnB + lnB_uni
 59 |         return kl
 60 | 
 61 |     def loglikelihood_loss(self, y, alpha):
 62 |         S = torch.sum(alpha, dim=1, keepdim=True)
 63 |         loglikelihood_err = torch.sum(
 64 |             (y - (alpha / S)) ** 2, dim=1, keepdim=True)
 65 |         loglikelihood_var = torch.sum(
 66 |             alpha * (S - alpha) / (S * S * (S + 1)), dim=1, keepdim=True)
 67 |         return loglikelihood_err, loglikelihood_var
 68 | 
 69 |     def mse_loss(self, y, alpha, annealing_coef):
 70 |         """Used only for loss_type == 'mse'
 71 |         y: the one-hot labels (batchsize, num_classes)
 72 |         alpha: the predictions (batchsize, num_classes)
 73 |         epoch_num: the current training epoch
 74 |         """
 75 |         losses = {}
 76 |         loglikelihood_err, loglikelihood_var = self.loglikelihood_loss(y, alpha)
 77 |         losses.update({'loss_cls': loglikelihood_err, 'loss_var': loglikelihood_var})
 78 | 
 79 |         losses.update({'lambda': annealing_coef})
 80 |         if self.with_kldiv:
 81 |             kl_alpha = (alpha - 1) * (1 - y) + 1
 82 |             kl_div = annealing_coef * \
 83 |                      self.kl_divergence(kl_alpha)
 84 |             losses.update({'loss_kl': kl_div})
 85 | 
 86 |         if self.with_avuloss:
 87 |             S = torch.sum(alpha, dim=1, keepdim=True)  # Dirichlet strength
 88 |             pred_score = alpha / S
 89 |             uncertainty = self.num_classes / S
 90 |             # avu_loss = annealing_coef * 
 91 |         return losses
 92 | 
 93 |     def ce_loss(self, target, y, alpha, annealing_coef):
 94 |         """Used only for loss_type == 'ce'
 95 |         target: the scalar labels (batchsize,)
 96 |         alpha: the predictions (batchsize, num_classes), alpha >= 1
 97 |         epoch_num: the current training epoch
 98 |         """
 99 |         losses = {}
100 |         # (1) the classification loss term
101 |         S = torch.sum(alpha, dim=1, keepdim=True)
102 |         pred_score = alpha / S
103 |         loss_cls = F.nll_loss(torch.log(pred_score), target, reduction='none')
104 |         losses.update({'loss_cls': loss_cls})
105 | 
106 |         # (2) the likelihood variance term
107 |         loglikelihood_var = torch.sum(
108 |             alpha * (S - alpha) / (S * S * (S + 1)), dim=1, keepdim=True)
109 |         losses.update({'loss_var': loglikelihood_var})
110 | 
111 |         # (3) the KL divergence term
112 |         kl_alpha = (alpha - 1) * (1 - y) + 1
113 |         kl_div = annealing_coef * \
114 |                  self.kl_divergence(kl_alpha)
115 |         losses.update({'loss_kl': kl_div, 'lambda': annealing_coef})
116 |         return losses
117 | 
118 |     def edl_loss(self, func, y, alpha, annealing_coef, target):
119 |         """Used for both loss_type == 'log' and loss_type == 'digamma'
120 |         func: function handler (torch.log, or torch.digamma)
121 |         y: the one-hot labels (batch_size, num_classes)
122 |         alpha: the predictions (batch_size, num_classes)
123 |         epoch_num: the current training epoch
124 |         """
125 |         # BALD Uncertainty
126 |         # --------------------------------------------------------------------------------
127 |         # losses = {}
128 |         # S = torch.sum(alpha, dim=1, keepdim=True)
129 |         # pred = alpha / S
130 |         # uncertainty = compute_BALD_uncertainty(pred)
131 |         # label_num = torch.sum(y, dim=1, keepdim=True)
132 |         # temp = 1 / alpha * y
133 |         # g = (1 - uncertainty) * label_num * torch.div(temp, torch.sum(temp, dim=1, keepdim=True))
134 |         # A = torch.sum(g * (func(S) - func(alpha)), dim=1, keepdim=True)
135 | 
136 |         # Final DELU
137 |         # --------------------------------------------------------------------------------
138 |         # losses = {}
139 |         # S = torch.sum(alpha, dim=1, keepdim=True)
140 |         # uncertainty = self.num_classes / S
141 |         # label_num = torch.sum(y, dim=1, keepdim=True)
142 |         # temp = 1 / alpha * y
143 |         # g = (1 - uncertainty.detach()) * label_num * torch.div(temp, torch.sum(temp, dim=1, keepdim=True))
144 |         # A = torch.sum(g * (func(S) - func(alpha)), dim=1, keepdim=True)
145 | 
146 |         # Traditional EDL
147 |         # --------------------------------------------------------------------------------
148 | 
149 |         losses = {}
150 |         if y is None:
151 |             A = torch.tensor([0.])
152 |         else:
153 |             S = torch.sum(alpha, dim=1, keepdim=True)
154 |             A = torch.sum(y * (func(S) - func(alpha)), dim=1, keepdim=True)
155 | 
156 |         # --------------------------------------------------------------------------------
157 | 
158 |         losses.update({'loss_cls': A})
159 | 
160 |         if self.with_annealing:
161 |             losses.update({'lambda': annealing_coef})
162 | 
163 |         if self.with_kldiv:
164 |             kl_alpha = (alpha - 1) * (1 - y) + 1
165 |             kl_div = annealing_coef * \
166 |                      self.kl_divergence(kl_alpha)
167 |             losses.update({'loss_kl': kl_div})
168 | 
169 |         return losses
170 | 
171 |     def compute_annealing_coef(self, **kwargs):
172 |         assert 'epoch' in kwargs, "epoch number is missing!"
173 |         assert 'total_epoch' in kwargs, "total epoch number is missing!"
174 |         epoch_num, total_epoch = kwargs['epoch'], kwargs['total_epoch']
175 |         # annealing coefficient
176 |         if self.annealing_method == 'step':
177 |             annealing_coef = torch.min(torch.tensor(
178 |                 1.0, dtype=torch.float32), torch.tensor(epoch_num / self.annealing_step, dtype=torch.float32))
179 |         elif self.annealing_method == 'exp':
180 |             annealing_start = torch.tensor(self.annealing_start, dtype=torch.float32)
181 |             annealing_coef = annealing_start * torch.exp(-torch.log(annealing_start) / total_epoch * epoch_num)
182 |         else:
183 |             raise NotImplementedError
184 |         return annealing_coef
185 | 
186 |     def _forward(self, output, target, output_is_evidence=False, **kwargs):
187 |         """Forward function.
188 |         Args:
189 |             output (torch.Tensor): The class score (before softmax).
190 |             target (torch.Tensor): The ground truth label.
191 |             epoch_num: The number of epochs during training.
192 |         Returns:
193 |             torch.Tensor: The returned EvidenceLoss loss.
194 |         """
195 |         if output_is_evidence:
196 |             evidence = output
197 |         else:
198 |             # get evidence
199 |             if self.evidence == 'relu':
200 |                 evidence = relu_evidence(output)
201 |             elif self.evidence == 'exp':
202 |                 evidence = exp_evidence(output)
203 |             elif self.evidence == 'softplus':
204 |                 evidence = softplus_evidence(output)
205 |             else:
206 |                 raise NotImplementedError
207 | 
208 |         alpha = evidence + self.redl_lamb
209 | 
210 |         # Our target is a vector, as result, no need for one-hot embedding
211 |         y = target
212 | 
213 |         # # one-hot embedding for the target
214 |         # y = torch.eye(self.num_classes).to(output.device)
215 |         # y = y[target]
216 | 
217 |         # compute annealing coefficient
218 |         if self.with_annealing:
219 |             annealing_coef = self.compute_annealing_coef(**kwargs)
220 |         else:
221 |             annealing_coef = None
222 | 
223 |         # compute the EDL loss
224 |         if self.loss_type == 'mse':
225 |             results = self.mse_loss(y, alpha, annealing_coef)
226 |         elif self.loss_type == 'log':
227 |             results = self.edl_loss(torch.log, y, alpha, annealing_coef, target)
228 |         elif self.loss_type == 'digamma':
229 |             results = self.edl_loss(torch.digamma, y, alpha, annealing_coef, target)
230 |         elif self.loss_type == 'cross_entropy':
231 |             results = self.ce_loss(target, y, alpha, annealing_coef)
232 |         else:
233 |             raise NotImplementedError
234 | 
235 |         uncertainty = self.redl_lamb * self.num_classes / torch.sum(alpha, dim=1, keepdim=True)
236 |         results.update({'uncertainty': uncertainty})
237 |         results.update({'evidence': evidence})
238 | 
239 |         return results
240 | 


--------------------------------------------------------------------------------
/libMR/libmr.pyx:
--------------------------------------------------------------------------------
  1 | #
  2 | # libmr.pyx:
  3 | #
  4 | # @Author Terry Boult tboult at securics com
  5 | # @Author Vijay Iyer viyer at securics com
  6 | # @Author Michael Wilber mwilber at securics.com
  7 | #
  8 | # Copyright 2013, Securics Inc.
  9 | #
 10 | #   See accompanying LICENSE agrement for details on rights.
 11 | #
 12 | # Parts of this technology are subject to SBIR data rights and as
 13 | # described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which
 14 | # apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421
 15 | # to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO
 16 | # 80918
 17 | #
 18 | # The Government's rights to use, modify, reproduce, release, perform,
 19 | # display, or disclose technical data or computer software marked with
 20 | # this legend are restricted during the period shown as provided in
 21 | # paragraph (b)(4) of the Rights in Noncommercial Technical Data and
 22 | # Computer Software-Small Business Innovative Research (SBIR) Program
 23 | # clause contained in the above identified contract. Expiration of
 24 | # SBIR Data Rights: Expires four years after completion of the above
 25 | # cited project work for this or any other follow-on SBIR contract,
 26 | # whichever is later.
 27 | #
 28 | # No restrictions on government use apply after the expiration date
 29 | # shown above. Any reproduction of technical data, computer software,
 30 | # or portions thereof marked with this legend must also reproduce the
 31 | # markings.
 32 | 
 33 | from libc.stdlib cimport malloc,free
 34 | from libcpp cimport bool
 35 | from libcpp.string cimport string
 36 | cimport numpy as np
 37 | import numpy as np
 38 | 
 39 | cdef extern from "MetaRecognition.h":
 40 |     cdef struct svm_node_libsvm:
 41 |         int index
 42 |         double value
 43 | 
 44 | #cdef extern from "MetaRecognition.h":
 45 | 
 46 | cdef extern from "MetaRecognition.h":
 47 | 
 48 |     ctypedef enum MR_fitting_type:
 49 |         complement_reject
 50 |         positive_reject 
 51 |         complement_model 
 52 |         positive_model
 53 | 
 54 |     cppclass MetaRecognition:
 55 |         MetaRecognition(int scores_to_drop,
 56 |                         int fitting_size,
 57 |                         bool verbose,
 58 |                         double alpha,
 59 |                         int translate_amount) except +
 60 |         bool is_valid()
 61 |         void set_translate(double t)
 62 |         void Reset()
 63 |         bool Predict_Match(double x, double threshold)
 64 |         double W_score(double x)
 65 |         double CDF(double x)
 66 |         double Inv(double p)
 67 | 
 68 |         int ReNormalize(double *invec, double *outvec, int length)
 69 | 
 70 |         int FitHigh(double* inputData, int inputDataSize,  int fit_size)
 71 | 
 72 |         int FitLow(double* inputData, int inputDataSize,  int fit_size)
 73 |         
 74 |         int FitSVM(svm_node_libsvm* svmdata, int inputDataSize, int label_of_interest, bool label_has_positive_score, 
 75 |                    int fit_type, int fit_size )
 76 | 
 77 |         # void Save(FILE *outputFile) const
 78 |         # void Load(FILE *inputFile)
 79 |         void Save(char* filename)
 80 |         void Load(char* filename)
 81 |         int get_fitting_size()
 82 |         int set_fitting_size(int nsize)
 83 |         int get_translate_amount()
 84 |         int set_translate_amount(int ntrans)
 85 |         int get_sign()
 86 |         int set_sign(int nsign)
 87 |         double get_small_score()
 88 |         double set_small_score(double nscore)
 89 |         bool verbose
 90 |         string to_string()
 91 |         void from_string(string input)
 92 | 
 93 | 
 94 | # This is the Python wrapper class.
 95 | cdef class MR:
 96 |     cdef MetaRecognition *thisptr
 97 |     def __cinit__(self, int scores_to_drop=0,
 98 |                   int fitting_size=9,
 99 |                   bool verbose=False,
100 |                   double alpha=5.0,
101 |                   int translate_amount=10000):
102 |         """
103 |         Create a new MR object.
104 |         """
105 |         self.thisptr = new MetaRecognition(scores_to_drop,fitting_size,verbose,alpha,translate_amount)
106 |     def __dealloc__(self):
107 |         del self.thisptr
108 |     def fit_low(self, inputData, int fit_size):
109 |         """Use fit_low if your data is such that is smaller is better. Fits a
110 |         MR object to the given data. We'll transform it for you
111 |         and keep the transform parameters in the class so later calls
112 |         to W_score or CDF do the right thing."""
113 |         cdef double *data
114 |         data = <double*>malloc(sizeof(double)*len(inputData))
115 |         for i in xrange(len(inputData)):
116 |             data[i] = inputData[i]
117 |         self.thisptr.FitLow(data, len(inputData), fit_size)
118 |         free(data)
119 |     def fit_high(self, inputData, int fit_size):
120 |         """Use fit_high if your data is such that is larger is better. Fits a
121 |         MR object to the given data. We'll transform it for you
122 |         and keep the transform parameters in the class so later calls
123 |         to W_score or CDF do the right thing.
124 |         """
125 |         cdef double *data
126 |         data = <double*>malloc(sizeof(double)*len(inputData))
127 |         for i in xrange(len(inputData)):
128 |             data[i] = inputData[i]
129 |         self.thisptr.FitHigh(data, len(inputData), fit_size)
130 |         free(data)
131 | 
132 |     def mr_save(self, filename):
133 |         """
134 |         save mr object to file
135 |         """
136 |         cdef char *filetosave
137 |         filetosave = filename
138 |         self.thisptr.Save(filetosave)
139 | 
140 |     def mr_load(self, filename):
141 |         """
142 |         save mr object to file
143 |         """
144 |         cdef char *filetosave
145 |         filetosave = filename
146 |         self.thisptr.Load(filetosave)
147 | 
148 |     def fit_svm(self, svm_data, inputDataSize, label_of_interest,  
149 |                 label_has_positive_score, fit_type, fit_size ):
150 |         """
151 |         Input:
152 |         --------
153 |         svm_data: dict containing labels and decision scores. 
154 |                   eg. svm_data['scores'] = [], svm_data['labels'] = []
155 |         inputDataSize : total no of decision scores
156 |         label_of_interest : eg +1, -1
157 |         label_has_positive_score : bool i.e 0 or 1
158 |         fit_type : complement_reject=1, positive_reject=2, complement_model=3, positive_model=4
159 |         fit_size : size of tail to be used
160 | 
161 |         Output:
162 |         --------
163 |         None
164 |         You can access parameters from weibull fitting using other attributes.
165 |         Loading/Saving of weibull model parameters can be done using load/save methods
166 |         in MR class
167 | 
168 |         """
169 | 
170 |         # initialize svm_data
171 |         cdef svm_node_libsvm *svm_data_to_c
172 | 
173 |         svm_data_to_c =  < svm_node_libsvm* >malloc(inputDataSize * sizeof(svm_node_libsvm) )
174 | 
175 |         assert svm_data.has_key("scores")
176 |         assert svm_data.has_key("scores")
177 |         assert len(svm_data["scores"]) == len(svm_data["labels"])
178 |         assert fit_type in [1, 2, 3, 4]
179 |         for i in range(inputDataSize):
180 |             svm_data_to_c[i].index  = svm_data["labels"][i]
181 |             svm_data_to_c[i].value = svm_data["scores"][i]
182 | 
183 |         print "Data initizalization complete. Now calling C++ code"
184 |         self.thisptr.FitSVM(svm_data_to_c, inputDataSize, label_of_interest, label_has_positive_score, fit_type, fit_size)
185 |         free(svm_data_to_c)
186 | 
187 |     property is_valid:
188 |         def __get__(self):
189 |             return self.thisptr.is_valid()
190 |     def reset(self):
191 |         self.thisptr.Reset()
192 |     def predict_match(self, double x, double threshold = .9999999):
193 |         """
194 |         Is X from the "match" distribution (i.e. we reject null hypothesis
195 |         of non-match)
196 | 
197 |         """
198 |         return self.thisptr.Predict_Match(x,threshold)
199 |     def w_score(self, double x):
200 |         """
201 | 	This is the commonly used function. After fitting, it returns the probability of the given score being "correct".  It is the same as CDF
202 |         """
203 |         return self.thisptr.W_score(x)
204 |     def cdf(self, double x):
205 |         """
206 |         This is the cummumlative probablity of match being corrrect (or more precisely the probility the score (after transform) being an outlier for the distribution, which given the transforms applied, so bigger is better, this is the probablity the score is correct.
207 |         """
208 |         return self.thisptr.CDF(x)
209 |     def inv(self, double p):
210 |         """
211 |         This is score for which one would obtain CDF probability p (i.e. x such that p = CDF(x))
212 |         """
213 |         return self.thisptr.Inv(p)
214 |     def w_score_vector(self, double[::1] invec):
215 |         """
216 |         Apply w_score to each element of invec, returning a new vector of W-scores
217 |         """
218 |         cdef np.ndarray[np.double_t,ndim=1]new_vec = np.zeros(len(invec), dtype='d')
219 |         self.thisptr.ReNormalize(&invec[0], &new_vec[0], len(invec))
220 |         return new_vec
221 |     def __str__(self):
222 |         """
223 |         Serialize the MR object to a string. Use load_from_string to recover it.
224 |         """
225 |         return self.thisptr.to_string()
226 |     def __repr__(self):
227 |         return "<MR object: %r>" % str(self)
228 |     property tailsize:
229 |         def __get__(self):
230 |             return self.thisptr.get_fitting_size()
231 |         def __set__(self, int nsize):
232 |             self.thisptr.set_fitting_size(nsize)
233 |     property translate_amount:
234 |         def __get__(self):
235 |             return self.thisptr.get_translate_amount()
236 |         def __set__(self, int ntrans):
237 |             self.thisptr.set_translate_amount(ntrans)
238 |     property sign:
239 |         def __get__(self):
240 |             return self.thisptr.get_sign()
241 |         def __set__(self, int nsign):
242 |             self.thisptr.set_sign(nsign)
243 |     property small_score:
244 |         def __get__(self):
245 |             return self.thisptr.get_small_score()
246 |         def __set__(self, double nscore):
247 |             self.thisptr.set_small_score(nscore)
248 |     property verbose:
249 |         def __get__(self):
250 |             return self.thisptr.verbose
251 |         def __set__(self, bool verbose):
252 |             self.thisptr.verbose = verbose
253 | 
254 | def load_from_string(str input):
255 |     """
256 |     Deserialize an MR object. This turns a string back into an MR object; it is the inverse of str(MR())
257 |     """
258 |     pymr = MR()
259 |     pymr.thisptr.from_string(input)
260 |     return pymr
261 | 
262 | 


--------------------------------------------------------------------------------
/utils/wsad_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import torch.nn as nn
  4 | from scipy.interpolate import interp1d
  5 | 
  6 | 
  7 | def sigmoid_rampup(current, rampup_length):
  8 |     """Exponential rampup from https://arxiv.org/abs/1610.02242"""
  9 |     if rampup_length == 0:
 10 |         return 1.0
 11 |     else:
 12 |         current = np.clip(current, 0.0, rampup_length)
 13 |         phase = 1.0 - current / rampup_length
 14 |         return float(np.exp(-5.0 * phase * phase))
 15 | 
 16 | 
 17 | def linear_rampup(current, rampup_length):
 18 |     """Linear rampup"""
 19 |     assert current >= 0 and rampup_length >= 0
 20 |     if current >= rampup_length:
 21 |         return 1.0
 22 |     else:
 23 |         return current / rampup_length
 24 | 
 25 | 
 26 | def cosine_rampdown(current, rampdown_length):
 27 |     """Cosine rampdown from https://arxiv.org/abs/1608.03983"""
 28 |     assert 0 <= current <= rampdown_length
 29 |     return float(.5 * (np.cos(np.pi * current / rampdown_length) + 1))
 30 | 
 31 | 
 32 | def str2ind(categoryname, classlist):
 33 |     return [
 34 |         i for i in range(len(classlist))
 35 |         # if categoryname == classlist[i].decode("utf-8")
 36 |         if categoryname == classlist[i]
 37 |     ][0]
 38 | 
 39 | 
 40 | def strlist2indlist(strlist, classlist):
 41 |     return [str2ind(s, classlist) for s in strlist]
 42 | 
 43 | 
 44 | def strlist2multihot(strlist, classlist):
 45 |     return np.sum(np.eye(len(classlist))[strlist2indlist(strlist, classlist)],
 46 |                   axis=0)
 47 | 
 48 | 
 49 | def idx2multihot(id_list, num_class):
 50 |     return np.sum(np.eye(num_class)[id_list], axis=0)
 51 | 
 52 | 
 53 | def random_extract(feat, t_max):
 54 |     # ind = np.arange(feat.shape[0])
 55 |     # splits = np.array_split(ind, t_max)
 56 |     # nind = np.array([np.random.choice(split, 1)[0] for split in splits])
 57 |     # return feat[nind]
 58 | 
 59 |     # ind = np.random.choice(feat.shape[0], size=t_max)
 60 |     # ind = sorted(ind)
 61 |     # return feat[ind]
 62 |     r = np.random.randint(len(feat) - t_max)
 63 |     return feat[r: r + t_max]
 64 | 
 65 | 
 66 | def pad(feat, min_len):
 67 |     if feat.shape[0] <= min_len:
 68 |         return np.pad(
 69 |             feat,
 70 |             ((0, min_len - feat.shape[0]), (0, 0)),
 71 |             mode="constant",
 72 |             constant_values=0,
 73 |         )
 74 |     else:
 75 |         return feat
 76 | 
 77 | 
 78 | def fn_normalize(x):
 79 |     return (x - np.mean(x, 0, keepdims=True)) / \
 80 |            (np.std(x, 0, keepdims=True) + 1e-10)
 81 | 
 82 | 
 83 | def process_feat(feat, length=None, normalize=False):
 84 |     if length is not None:
 85 |         if len(feat) > length:
 86 |             x = random_extract(feat, length)
 87 |         else:
 88 |             x = pad(feat, length)
 89 |     else:
 90 |         x = feat
 91 |     if normalize:
 92 |         x = fn_normalize(x)
 93 |     return x
 94 | 
 95 | 
 96 | def write_to_file(dname, dmap, cmap, itr):
 97 |     fid = open(dname + "-results.log", "a+")
 98 |     string_to_write = str(itr)
 99 |     # if dmap:
100 |     for item in dmap:
101 |         string_to_write += " " + "%.2f" % item
102 |     string_to_write += " " + "%.2f" % cmap
103 |     fid.write(string_to_write + "\n")
104 |     fid.close()
105 | 
106 | 
107 | def soft_nms(dets, iou_thr=0.7, method='gaussian', sigma=0.3):
108 |     dets = np.array(dets)
109 |     x1 = dets[:, 2]
110 |     x2 = dets[:, 3]
111 |     scores = dets[:, 1]
112 | 
113 |     areas = x2 - x1 + 1
114 | 
115 |     # expand dets with areas, and the second dimension is
116 |     # x1, x2, score, area
117 |     dets = np.concatenate((dets, areas[:, None]), axis=1)
118 | 
119 |     retained_box = []
120 |     while dets.size > 0:
121 |         max_idx = np.argmax(dets[:, 1], axis=0)
122 |         dets[[0, max_idx], :] = dets[[max_idx, 0], :]
123 |         retained_box.append(dets[0, :-1].tolist())
124 | 
125 |         xx1 = np.maximum(dets[0, 2], dets[1:, 2])
126 |         xx2 = np.minimum(dets[0, 3], dets[1:, 3])
127 | 
128 |         inter = np.maximum(xx2 - xx1 + 1, 0.0)
129 |         iou = inter / (dets[0, -1] + dets[1:, -1] - inter)
130 | 
131 |         if method == 'linear':
132 |             weight = np.ones_like(iou)
133 |             weight[iou > iou_thr] -= iou[iou > iou_thr]
134 |         elif method == 'gaussian':
135 |             weight = np.exp(-(iou * iou) / sigma)
136 |         else:  # traditional nms
137 |             weight = np.ones_like(iou)
138 |             weight[iou > iou_thr] = 0
139 | 
140 |         dets[1:, 1] *= weight
141 |         dets = dets[1:, :]
142 | 
143 |     return retained_box
144 | 
145 | 
146 | def minmax_norm(act_map, min_val=None, max_val=None):
147 |     if min_val is None or max_val is None:
148 |         relu = nn.ReLU()
149 |         max_val = relu(torch.max(act_map, dim=1)[0])
150 |         min_val = relu(torch.min(act_map, dim=1)[0])
151 | 
152 |     delta = max_val - min_val
153 |     delta[delta <= 0] = 1
154 |     ret = (act_map - min_val) / delta
155 | 
156 |     ret[ret > 1] = 1
157 |     ret[ret < 0] = 0
158 | 
159 |     return ret
160 | 
161 | 
162 | def upgrade_resolution(arr, scale):
163 |     x = np.arange(0, arr.shape[0])
164 |     f = interp1d(x, arr, kind='linear', axis=0, fill_value='extrapolate')
165 |     scale_x = np.arange(0, arr.shape[0], 1 / scale)
166 |     up_scale = f(scale_x)
167 |     return up_scale
168 | 
169 | 
170 | def nms(proposals, thresh):
171 |     proposals = np.array(proposals)
172 |     x1 = proposals[:, 2]
173 |     x2 = proposals[:, 3]
174 |     scores = proposals[:, 1]
175 | 
176 |     areas = x2 - x1 + 1
177 |     order = scores.argsort()[::-1]
178 | 
179 |     keep = []
180 |     while order.size > 0:
181 |         i = order[0]
182 |         keep.append(proposals[i].tolist())
183 |         xx1 = np.maximum(x1[i], x1[order[1:]])
184 |         xx2 = np.minimum(x2[i], x2[order[1:]])
185 | 
186 |         inter = np.maximum(0.0, xx2 - xx1 + 1)  # 交集
187 | 
188 |         iou = inter / (areas[i] + areas[order[1:]] - inter)
189 | 
190 |         inds = np.where(iou < thresh)[0]  # 取出不重叠的
191 |         order = order[inds + 1]
192 | 
193 |     return keep
194 | 
195 | 
196 | def get_proposal_oic(tList, wtcam, final_score, c_pred, _lambda=0.25, gamma=0.2):
197 |     # t_factor = (16 * v_len) / (scale * num_segments * sampling_frames)  #（24*N*25）
198 |     temp = []
199 |     for i in range(len(tList)):
200 |         c_temp = []
201 |         temp_list = np.array(tList[i])[0]
202 |         if temp_list.any():
203 |             grouped_temp_list = grouping(temp_list)
204 |             for j in range(len(grouped_temp_list)):
205 |                 if len(grouped_temp_list[j]) < 2:
206 |                     continue
207 |                 inner_score = np.mean(wtcam[grouped_temp_list[j], i, 0])
208 | 
209 |                 len_proposal = len(grouped_temp_list[j])
210 | 
211 |                 outer_s = max(0, int(grouped_temp_list[j][0] - _lambda * len_proposal))
212 |                 outer_e = min(int(wtcam.shape[0] - 1), int(grouped_temp_list[j][-1] + _lambda * len_proposal))
213 | 
214 |                 outer_temp_list = list(range(outer_s, int(grouped_temp_list[j][0]))) + list(
215 |                     range(int(grouped_temp_list[j][-1] + 1), outer_e + 1))
216 | 
217 |                 if len(outer_temp_list) == 0:
218 |                     outer_score = 0
219 |                 else:
220 |                     outer_score = np.mean(wtcam[outer_temp_list, i, 0])
221 | 
222 |                 c_score = inner_score - outer_score + gamma * final_score[c_pred[i]]
223 |                 t_start = grouped_temp_list[j][0]
224 |                 t_end = (grouped_temp_list[j][-1] + 1)
225 |                 c_temp.append([c_pred[i], c_score, t_start, t_end])
226 |             if len(c_temp) > 0:
227 |                 temp.append(c_temp)
228 |     return temp
229 | 
230 | 
231 | def get_proposal_oic_2(tList,
232 |                        wtcam,
233 |                        final_score,
234 |                        c_pred,
235 |                        lambda_=0.25,
236 |                        gamma=0.2,
237 |                        loss_type="oic"):
238 |     temp = []
239 |     for i in range(len(tList)):
240 |         c_temp = []
241 |         temp_list = np.array(tList[i])[0]
242 |         if temp_list.any():
243 |             grouped_temp_list = grouping(temp_list)
244 |             for j in range(len(grouped_temp_list)):
245 |                 inner_score = np.mean(wtcam[grouped_temp_list[j], i, 0])
246 | 
247 |                 len_proposal = len(grouped_temp_list[j])
248 |                 outer_s = max(
249 |                     0, int(grouped_temp_list[j][0] - lambda_ * len_proposal))
250 |                 outer_e = min(
251 |                     int(wtcam.shape[0] - 1),
252 |                     int(grouped_temp_list[j][-1] + lambda_ * len_proposal),
253 |                 )
254 | 
255 |                 outer_temp_list = list(
256 |                     range(outer_s, int(grouped_temp_list[j][0]))) + list(
257 |                     range(int(grouped_temp_list[j][-1] + 1), outer_e + 1))
258 | 
259 |                 if len(outer_temp_list) == 0:
260 |                     outer_score = 0
261 |                 else:
262 |                     outer_score = np.mean(wtcam[outer_temp_list, i, 0])
263 | 
264 |                 if loss_type == "oic":
265 |                     c_score = inner_score - outer_score + gamma * final_score[
266 |                         c_pred[i]]
267 |                 else:
268 |                     c_score = inner_score
269 |                 t_start = grouped_temp_list[j][0]
270 |                 t_end = (grouped_temp_list[j][-1] + 1)
271 |                 c_temp.append([c_pred[i], c_score, t_start, t_end])
272 |             temp.append(c_temp)
273 |     return temp
274 | 
275 | 
276 | def grouping(arr):
277 |     return np.split(arr, np.where(np.diff(arr) != 1)[0] + 1)
278 | 
279 | 
280 | """
281 | ramp up
282 | """
283 | 
284 | 
285 | def get_current_consistency_weight(epoch, args):
286 |     # Consistency ramp-up from https://arxiv.org/abs/1610.02242
287 |     return args.consistency * linear_rampup(epoch, args.consistency_rampup)
288 | 
289 | 
290 | def sigmoid_rampup(current, rampup_length):
291 |     """Exponential rampup from https://arxiv.org/abs/1610.02242"""
292 |     if rampup_length == 0:
293 |         return 1.0
294 |     else:
295 |         current = np.clip(current, 0.0, rampup_length)
296 |         phase = 1.0 - current / rampup_length
297 |         return float(np.exp(-5.0 * phase * phase))
298 | 
299 | 
300 | def linear_rampup(current, rampup_length):
301 |     """Linear rampup"""
302 |     assert current >= 0 and rampup_length >= 0
303 |     if current >= rampup_length:
304 |         return 1.0
305 |     else:
306 |         return current / rampup_length
307 | 
308 | 
309 | def cosine_rampdown(current, rampdown_length):
310 |     """Cosine rampdown from https://arxiv.org/abs/1608.03983"""
311 |     assert 0 <= current <= rampdown_length
312 |     return float(.5 * (np.cos(np.pi * current / rampdown_length) + 1))
313 | 


--------------------------------------------------------------------------------
/eval/detectionMAP.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import time
  3 | from scipy.signal import savgol_filter
  4 | import sys
  5 | import scipy.io as sio
  6 | from joblib import Parallel, delayed
  7 | import time
  8 | import multiprocessing as mp
  9 | import pdb
 10 | 
 11 | 
 12 | def str2ind(categoryname, classlist):
 13 |     return [i for i in range(len(classlist)) if categoryname == classlist[i]][0]
 14 | 
 15 | 
 16 | def smooth(v):
 17 |     return v
 18 |     # l = min(351, len(v)); l = l - (1-l%2)
 19 |     # if len(v) <= 3:
 20 |     #   return v
 21 |     # return savgol_filter(v, l, 1) #savgol_filter(v, l, 1) #0.5*(np.concatenate([v[1:],v[-1:]],axis=0) + v)
 22 | 
 23 | 
 24 | def filter_segments(segment_predict, videonames, ambilist, factor):
 25 |     ind = np.zeros(np.shape(segment_predict)[0])
 26 |     for i in range(np.shape(segment_predict)[0]):
 27 |         vn = videonames[int(segment_predict[i, 0])]
 28 |         for a in ambilist:
 29 |             if a[0] == vn:
 30 |                 gt = range(int(round(float(a[2]) * factor)), int(round(float(a[3]) * factor)))
 31 |                 pd = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
 32 |                 IoU = float(len(set(gt).intersection(set(pd)))) / float(len(set(gt).union(set(pd))))
 33 |                 if IoU > 0:
 34 |                     ind[i] = 1
 35 |     s = [segment_predict[i, :] for i in range(np.shape(segment_predict)[0]) if ind[i] == 0]
 36 |     return np.array(s)
 37 | 
 38 | 
 39 | def getLocMAP(predictions, th, annotation_path, args):
 40 |     gtsegments = np.load(annotation_path + '/segments.npy', allow_pickle=True)
 41 |     gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True)
 42 |     gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True)
 43 |     videoname = np.load(annotation_path + '/videoname.npy', allow_pickle=True)
 44 |     videoname = np.array([v.decode('utf-8') for v in videoname])
 45 |     subset = np.load(annotation_path + '/subset.npy', allow_pickle=True)
 46 |     subset = np.array([s.decode('utf-8') for s in subset])
 47 |     # classlist = np.load(annotation_path + '/classlist.npy', allow_pickle=True)
 48 |     # classlist = np.array([c.decode('utf-8') for c in classlist])
 49 |     # classlist = np.load('./new_classlist.npy', allow_pickle=True)
 50 |     classlist = args.classlist
 51 |     duration = np.load(annotation_path + '/duration.npy', allow_pickle=True)
 52 |     ambilist = annotation_path + '/Ambiguous_test.txt'
 53 |     if args.feature_type == 'UNT':
 54 |         factor = 10.0 / 4.0
 55 |     else:
 56 |         factor = 25.0 / 16.0
 57 |     try:
 58 |         ambilist = list(open(ambilist, 'r'))
 59 |         ambilist = [a.strip('\n').split(' ') for a in ambilist]
 60 |     except:
 61 |         ambilist = []
 62 | 
 63 |     # keep training gtlabels for plotting
 64 |     gtltr = []
 65 |     for i, s in enumerate(subset):
 66 |         if subset[i] == 'validation' and len(gtsegments[i]):
 67 |             gtltr.append(gtlabels[i])
 68 |     gtlabelstr = gtltr
 69 | 
 70 |     # Keep only the test subset annotations
 71 |     gts, gtl, vn, dn = [], [], [], []
 72 |     for i, s in enumerate(subset):
 73 |         if subset[i] == 'test':
 74 |             gts.append(gtsegments[i])
 75 |             gtl.append(gtlabels[i])
 76 |             vn.append(videoname[i])
 77 |             dn.append(duration[i, 0])
 78 |     gtsegments = gts
 79 |     gtlabels = gtl
 80 |     videoname = vn
 81 |     duration = dn
 82 | 
 83 |     # keep ground truth and predictions for instances with temporal annotations
 84 |     gts, gtl, vn, pred, dn = [], [], [], [], []
 85 |     for i, s in enumerate(gtsegments):
 86 |         if len(s):
 87 |             gts.append(gtsegments[i])
 88 |             gtl.append(gtlabels[i])
 89 |             vn.append(videoname[i])
 90 |             pred.append(predictions[i])
 91 |             dn.append(duration[i])
 92 |     gtsegments = gts
 93 |     gtlabels = gtl
 94 |     videoname = vn
 95 |     predictions = pred
 96 | 
 97 |     # which categories have temporal labels ?
 98 |     templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl])))
 99 | 
100 |     # the number index for those categories.
101 |     templabelidx = []
102 |     for t in templabelcategories:
103 |         templabelidx.append(str2ind(t, classlist))
104 | 
105 |     # process the predictions such that classes having greater than a certain threshold are detected only
106 |     predictions_mod = []
107 |     c_score = []
108 |     for p in predictions:
109 |         pp = - p;
110 |         [pp[:, i].sort() for i in range(np.shape(pp)[1])];
111 |         pp = -pp
112 |         c_s = np.mean(pp[:int(np.shape(pp)[0] / 8), :], axis=0)
113 |         ind = c_s > 0.0
114 |         c_score.append(c_s)
115 |         new_pred = np.zeros((np.shape(p)[0], np.shape(p)[1]), dtype='float32')
116 |         predictions_mod.append(p * ind)
117 |     predictions = predictions_mod
118 | 
119 |     detection_results = []
120 |     for i, vn in enumerate(videoname):
121 |         detection_results.append([])
122 |         detection_results[i].append(vn)
123 | 
124 |     ap = []
125 |     for c in templabelidx:
126 |         segment_predict = []
127 |         # Get list of all predictions for class c
128 |         for i in range(len(predictions)):
129 |             tmp = smooth(predictions[i][:, c])
130 |             threshold = np.max(tmp) - (np.max(tmp) - np.min(tmp)) * 0.5
131 |             vid_pred = np.concatenate([np.zeros(1), (tmp > threshold).astype('float32'), np.zeros(1)], axis=0)
132 |             vid_pred_diff = [vid_pred[idt] - vid_pred[idt - 1] for idt in range(1, len(vid_pred))]
133 |             s = [idk for idk, item in enumerate(vid_pred_diff) if item == 1]
134 |             e = [idk for idk, item in enumerate(vid_pred_diff) if item == -1]
135 |             for j in range(len(s)):
136 |                 aggr_score = np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]
137 |                 if e[j] - s[j] >= 2:
138 |                     segment_predict.append([i, s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]])
139 |                     detection_results[i].append(
140 |                         [classlist[c], s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]])
141 |         segment_predict = np.array(segment_predict)
142 |         segment_predict = filter_segments(segment_predict, videoname, ambilist, factor)
143 | 
144 |         # Sort the list of predictions for class c based on score
145 |         if len(segment_predict) == 0:
146 |             return 0
147 |         segment_predict = segment_predict[np.argsort(-segment_predict[:, 3])]
148 | 
149 |         # Create gt (category c)list
150 |         segment_gt = [[i, gtsegments[i][j][0], gtsegments[i][j][1]] for i in range(len(gtsegments)) for j in
151 |                       range(len(gtsegments[i])) if str2ind(gtlabels[i][j], classlist) == c]
152 |         gtpos = len(segment_gt)
153 |         # Compare predictions and gt
154 |         tp, fp = [], []
155 |         for i in range(len(segment_predict)):
156 |             flag = 0.
157 |             for j in range(len(segment_gt)):
158 |                 if segment_predict[i][0] == segment_gt[j][0]:  # The same video
159 |                     gt = range(int(round(segment_gt[j][1] * factor)), int(round(segment_gt[j][2] * factor)))
160 |                     p = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
161 |                     IoU = float(len(set(gt).intersection(set(p)))) / float(len(set(gt).union(set(p))))
162 |                     if IoU >= th:
163 |                         flag = 1.
164 |                         del segment_gt[j]
165 |                         break
166 |             tp.append(flag)
167 |             fp.append(1. - flag)
168 |         tp_c = np.cumsum(tp)
169 |         fp_c = np.cumsum(fp)
170 |         if sum(tp) == 0:
171 |             prc = 0.
172 |         else:
173 |             prc = np.sum((tp_c / (fp_c + tp_c)) * tp) / gtpos
174 |         ap.append(prc)
175 | 
176 |     return 100 * np.mean(ap)
177 | 
178 | 
179 | def AntgetLocMAP(predictions, th, annotation_path, args):
180 |     gtsegments = np.load(annotation_path + '/segments.npy', allow_pickle=True)
181 |     gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True)
182 |     gtlabels = np.load(annotation_path + '/labels.npy', allow_pickle=True)
183 |     videoname = np.load(annotation_path + '/videoname.npy', allow_pickle=True);
184 |     videoname = np.array([v.decode('utf-8') for v in videoname])
185 |     subset = np.load(annotation_path + '/subset.npy', allow_pickle=True);
186 |     subset = np.array([s.decode('utf-8') for s in subset])
187 |     classlist = np.load(annotation_path + '/classlist.npy', allow_pickle=True);
188 |     classlist = np.array([c.decode('utf-8') for c in classlist])
189 |     duration = np.load(annotation_path + '/duration.npy', allow_pickle=True)
190 |     ambilist = annotation_path + '/Ambiguous_test.txt'
191 |     if args.feature_type == 'UNT':
192 |         factor = 10.0 / 4.0
193 |     else:
194 |         factor = 25.0 / 16.0
195 |     try:
196 |         ambilist = list(open(ambilist, 'r'))
197 |         ambilist = [a.strip('\n').split(' ') for a in ambilist]
198 |     except:
199 |         ambilist = []
200 | 
201 |     # keep training gtlabels for plotting
202 |     gtltr = []
203 |     for i, s in enumerate(subset):
204 |         if subset[i] == 'training' and len(gtsegments[i]):
205 |             gtltr.append(gtlabels[i])
206 |     gtlabelstr = gtltr
207 | 
208 |     # Keep only the test subset annotations
209 |     gts, gtl, vn, dn = [], [], [], []
210 |     for i, s in enumerate(subset):
211 |         if subset[i] == 'validation':
212 |             gts.append(gtsegments[i])
213 |             gtl.append(gtlabels[i])
214 |             vn.append(videoname[i])
215 |             dn.append(duration[i])
216 |     gtsegments = gts
217 |     gtlabels = gtl
218 |     videoname = vn
219 |     duration = dn
220 |     # pdb.set_trace()
221 | 
222 |     # keep ground truth and predictions for instances with temporal annotations
223 |     gts, gtl, vn, pred, dn = [], [], [], [], []
224 |     for i, s in enumerate(gtsegments):
225 |         if len(s):
226 |             gts.append(gtsegments[i])
227 |             gtl.append(gtlabels[i])
228 |             vn.append(videoname[i])
229 |             pred.append(predictions[i])
230 |             dn.append(duration[i])
231 |     gtsegments = gts
232 |     gtlabels = gtl
233 |     videoname = vn
234 |     predictions = pred
235 | 
236 |     # which categories have temporal labels ?
237 |     templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl])))
238 | 
239 |     # the number index for those categories.
240 |     templabelidx = []
241 |     for t in templabelcategories:
242 |         templabelidx.append(str2ind(t, classlist))
243 | 
244 |     # process the predictions such that classes having greater than a certain threshold are detected only
245 |     predictions_mod = []
246 |     c_score = []
247 |     length = []
248 |     for p in predictions:
249 |         length.append(len(p))
250 |         pp = - p;
251 |         [pp[:, i].sort() for i in range(np.shape(pp)[1])];
252 |         pp = -pp
253 |         if int(np.shape(pp)[0] / 8) > 0:
254 |             c_s = np.mean(pp[:int(np.shape(pp)[0] / 8), :], axis=0)
255 |         else:
256 |             c_s = np.mean(pp[:np.shape(pp)[0], :], axis=0)
257 |         ind = c_s > 0.0
258 |         c_score.append(c_s)
259 |         new_pred = np.zeros((np.shape(p)[0], np.shape(p)[1]), dtype='float32')
260 |         predictions_mod.append(p * ind)
261 |     predictions = predictions_mod
262 | 
263 |     detection_results = []
264 |     for i, vn in enumerate(videoname):
265 |         detection_results.append([])
266 |         detection_results[i].append(vn)
267 |     # pdb.set_trace()
268 |     ap = []
269 |     for c in templabelidx:
270 |         segment_predict = []
271 |         # Get list of all predictions for class c
272 |         for i in range(len(predictions)):
273 |             tmp = smooth(predictions[i][:, c])
274 |             threshold = np.max(tmp) - (np.max(tmp) - np.min(tmp)) * 0.5
275 |             vid_pred = np.concatenate([np.zeros(1), (tmp > threshold).astype('float32'), np.zeros(1)], axis=0)
276 |             vid_pred_diff = [vid_pred[idt] - vid_pred[idt - 1] for idt in range(1, len(vid_pred))]
277 |             s = [idk for idk, item in enumerate(vid_pred_diff) if item == 1]
278 |             e = [idk for idk, item in enumerate(vid_pred_diff) if item == -1]
279 |             for j in range(len(s)):
280 |                 aggr_score = np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]
281 |                 if e[j] - s[j] >= 2:
282 |                     segment_predict.append([i, s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]])
283 |                     detection_results[i].append(
284 |                         [classlist[c], s[j], e[j], np.max(tmp[s[j]:e[j]]) + 0.7 * c_score[i][c]])
285 |         segment_predict = np.array(segment_predict)
286 |         segment_predict = filter_segments(segment_predict, videoname, ambilist, factor)
287 | 
288 |         # Sort the list of predictions for class c based on score
289 |         if len(segment_predict) == 0:
290 |             return 0
291 |         pdb.set_trace()
292 |         segment_predict = segment_predict[np.argsort(-segment_predict[:, 3])]
293 | 
294 |         # Create gt list
295 |         segment_gt = [[i, gtsegments[i][j][0], gtsegments[i][j][1]] for i in range(len(gtsegments)) for j in
296 |                       range(len(gtsegments[i])) if str2ind(gtlabels[i][j], classlist) == c]
297 |         gtpos = len(segment_gt)
298 |         # Compare predictions and gt
299 |         tp, fp = [], []
300 |         for i in range(len(segment_predict)):
301 |             flag = 0.
302 |             for j in range(len(segment_gt)):
303 |                 if segment_predict[i][0] == segment_gt[j][0]:
304 |                     gt = range(int(round(segment_gt[j][1] * factor)), int(round(segment_gt[j][2] * factor)))
305 |                     p = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
306 |                     IoU = float(len(set(gt).intersection(set(p)))) / float(len(set(gt).union(set(p))))
307 |                     if IoU >= th:
308 |                         flag = 1.
309 |                         del segment_gt[j]
310 |                         break
311 |             tp.append(flag)
312 |             fp.append(1. - flag)
313 |         tp_c = np.cumsum(tp)
314 |         fp_c = np.cumsum(fp)
315 |         if sum(tp) == 0:
316 |             prc = 0.
317 |         else:
318 |             prc = np.sum((tp_c / (fp_c + tp_c)) * tp) / gtpos
319 |         ap.append(prc)
320 | 
321 |     return 100 * np.mean(ap)
322 | 
323 | 
324 | def getDetectionMAP(predictions, annotation_path, args, pool):
325 |     iou_list = [0.1, 0.2, 0.3, 0.4, 0.5]
326 |     dmap_list = []
327 |     if 'Thumos' in annotation_path:
328 |         func = getLocMAP
329 |     else:
330 |         func = AntgetLocMAP
331 |     results = [pool.apply_async(getLocMAP, args=(predictions, iou, annotation_path, args)) for iou in iou_list]
332 |     dmap_list = [p.get() for p in results]
333 |     # for iou in iou_list:
334 |     #    print('Testing for IoU %f' %iou)
335 |     #    dmap_list.append(func(predictions, iou, annotation_path, args))
336 | 
337 |     return dmap_list, iou_list
338 | 


--------------------------------------------------------------------------------
/libMR/MetaRecognition.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * MetaRecognition.cpp
  3 |  * Copyright 2011, Securics Inc.
  4 |    See accompanying LICENSE agrement for details on rights.
  5 | 
  6 | Parts of this technology are subject to SBIR data rights and as described in DFARS 252.227-7018 (June 1995) SBIR Data Rights which apply to Contract Number: N00014-11-C-0243 and STTR N00014-07-M-0421 to Securics Inc, 1870 Austin Bluffs Parkway, Colorado Springs, CO 80918
  7 | 
  8 | The Government's rights to use, modify, reproduce, release, perform, display, or disclose technical data or computer software marked with this legend are restricted during the period shown as provided in paragraph (b)(4) of the Rights in Noncommercial Technical Data and Computer Software-Small Business Innovative Research (SBIR) Program clause contained in the above identified contract.  Expiration of SBIR Data Rights: Expires four years after completion of the above cited project work for this or any other follow-on SBIR contract, whichever is later.
  9 | 
 10 | No restrictions on government use apply after the expiration date shown above.  Any reproduction of technical data, computer software, or portions thereof marked with this legend must also reproduce the markings.
 11 |  *
 12 | */
 13 | 
 14 | /** \mainpage
 15 | 
 16 |   
 17 |     This library provides support for meta-recognition, i.e. recognizing when a recognition system is working well and when it is not and using that self-knowledge to improve the system.    It can be used for prediction of failure,  fusion,  score renormalization, SVM renormalization and converting SVM or recognition scores into statistially well supported probility estimtes.  The analysis is based on an analysis of the recognition system scores. 
 18 | 
 19 | 
 20 | The fundamental ideas are described in 
 21 | 
 22 | "Meta-Recognition: The Theory and Practice of Recognition Score Analysis,"
 23 | Walter J. Scheirer, Anderson Rocha, Ross Micheals, Terrance E. Boult,
 24 | IEEE Transactions on Pattern Analysis and Machine Intelligence (T-PAMI),
 25 | 33(8), pp 1689--1695, Aug, 2011.
 26 | 
 27 | and SVM support as described in 
 28 | 
 29 | "Multi-Attribute Spaces: Calibration for Attribute Fusion and Similarity Search,"
 30 | Walter J. Scheirer, Neeraj Kumar, Peter N. Belhumeur, Terrance E. Boult,
 31 | Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR),
 32 | June 2012.
 33 |  
 34 | 
 35 | The underlying extream value theory provide stong theortical basis for the computations, but to make it useful one must transform the data into the proper frame.   The C++ version provides objects that can compute and store information about the transform and then provide for prediction, w-score values (probability estimates), or  renormalizatoin of a vector of data. 
 36 |    
 37 |   The library also contains a  "C" interface functions for very basic weilbull usage for Meta-Recognition.    
 38 |   The C-based library  has a number of STRONG assumptions you must follow as we cannot test for all of them.
 39 |     1) All fitting and testing are presuming  "larger is better",  If you are fitting something where smaller is better you need to transform it before fitting. 
 40 |     2) All data is positive (okay we can and do test for that, but better to know upfront what you are doing) 
 41 |     3) There must be sufficient range in your data to actually fit the weilbull.  If all the data is the same, or nearly the same, it may fal to converge and will report errors.
 42 | 
 43 |     While free for non-commercial use this library is subject to the license restrictions, see LICENSE.TXT  for details.  
 44 |     
 45 | */
 46 | 
 47 | #include "MetaRecognition.h"
 48 | #include <string.h> 
 49 | //#include <stdio.h>
 50 | #include <stdlib.h>
 51 | 
 52 | #ifdef __cplusplus
 53 | extern "C" {
 54 | #endif
 55 | extern int weibull_fit_verbose_debug;
 56 | #ifdef __cplusplus
 57 | }
 58 | #endif
 59 | MetaRecognition::MetaRecognition(int scores_to_dropx,  int fitting_sizex, bool verb, double alphax, int translate_amountx):
 60 |   scores_to_drop(scores_to_dropx),verbose(verb),fitting_size(fitting_sizex),alpha(alphax),translate_amount(translate_amountx)
 61 | {
 62 |   memset(parmhat,0,sizeof(parmhat));
 63 |   memset(parmci,0,sizeof(parmci));
 64 |   sign = 1;
 65 |   ftype = complement_reject;
 66 |   small_score=0;
 67 |   isvalid=false;
 68 |   if(verb) weibull_fit_verbose_debug=1;
 69 |   else weibull_fit_verbose_debug=0;
 70 | }
 71 | 
 72 | MetaRecognition::~MetaRecognition()
 73 | {
 74 |   //  free(parmhat);
 75 |   //  free(parmci);
 76 | }
 77 | 
 78 | bool MetaRecognition::is_valid(){
 79 |   return isvalid;
 80 | }
 81 | 
 82 | void MetaRecognition::set_translate(double t){
 83 |   translate_amount = t;
 84 |   isvalid=false;
 85 | };
 86 | 
 87 | 
 88 | void MetaRecognition::Reset(){
 89 |   memset(parmhat,0,sizeof(parmhat));
 90 |   memset(parmci,0,sizeof(parmci));
 91 |   sign = 1;
 92 |   scores_to_drop = 0;
 93 |   small_score=0;
 94 |   isvalid=false;
 95 | }
 96 | 
 97 | 
 98 | int compare_sort_decending (const void * a, const void * b)
 99 | {
100 | 	const double *da = (const double *) a;
101 | 	const double *db = (const double *) b;
102 | 	return  (*da < *db) - (*da > *db);
103 | }
104 | 
105 | int compare_sort_assending (const void * a, const void * b)
106 | {
107 | 	const double *da = (const double *) a;
108 | 	const double *db = (const double *) b;
109 | 	return   (*da > *db) - (*da < *db);
110 | }
111 | 
112 | inline const char * const BoolToString(bool b)
113 | {
114 | 	return b ? "true" : "false";
115 | }
116 | 
117 | inline int  const BoolToInt(bool b)
118 | {
119 | 	return b ? 1 : 0;
120 | }
121 | 
122 | inline const bool IntToBool(const char * s)
123 | {
124 |   int val= atoi(s);
125 |   if(val) return true;
126 |   else return false;
127 | }
128 | 
129 | //Wraps calls to real weibull_inv and weibull_cdf functions and handles properly translating the data passed
130 | //May eventually be a good idea to move real implementations of the functions here
131 | //IF we do away with the C implementation. For now this allows for backward compantiblity with
132 | //older code
133 | // Inv computes the scores of the inverse CDF, i.e. returns y such that  CDF(y) = 
134 | double MetaRecognition::Inv(double x)
135 | {
136 |   if(!isvalid) return -9999.0;
137 |   double score = weibull_inv(x, parmhat[0], parmhat[1]);
138 |   return (score - translate_amount + small_score)*sign;
139 | }
140 | 
141 | double MetaRecognition::CDF(double x)
142 | {
143 |   if(!isvalid) return -9999.0;
144 |   double translated_x = x*sign + translate_amount - small_score;
145 |   double wscore=weibull_cdf(translated_x, parmhat[0], parmhat[1]);
146 |   if(ftype==complement_model || ftype==positive_model) return 1-wscore;
147 |   return wscore;
148 | };
149 | 
150 | double MetaRecognition::W_score(double x){
151 |     return CDF(x);
152 | };
153 | 
154 | bool MetaRecognition::Predict_Match(double x, double threshold){
155 |   double score = Inv(threshold);
156 |   if(sign <0)   return (x < score);
157 |   return (x > score);
158 | };
159 | 
160 | int MetaRecognition::ReNormalize(double *invec, double *outvec, int length)
161 | {
162 |   if(!isvalid) return -9997.0;
163 |   int rval=1;
164 |   for(int i=0; i< length; i++){
165 |     outvec[i] = W_score(invec[i]);
166 |   }
167 |   return rval;
168 | }
169 | 
170 | 
171 | //used by weibull__evt_low and weibull__evt_high, which sets the desired sign(low -1, high 1)
172 | //before passing to generic
173 | int MetaRecognition::EvtGeneric(double* inputData, int inputDataSize, int inward, double x)
174 | {
175 |   double * inputDataCopy = (double *) malloc(sizeof(double) * inputDataSize);
176 | 
177 |   double * dataPtr = NULL;
178 |   int icnt=0;
179 |   if(!inward && (sign > 0) ) {
180 |     icnt = inputDataSize;
181 |     memcpy(inputDataCopy,inputData, inputDataSize*sizeof(double));
182 |   }
183 |   if(!inward && (sign < 0) ){
184 |     for(int i=0; i < inputDataSize; i++)       inputDataCopy[i] = (inputData[i]*sign);       //doing extremes just flip sign if needed
185 |     icnt = inputDataSize;
186 |   }
187 |   else if(inward  && (sign < 0)) { /* this is fit above x but  approaching x */ 
188 |     for(int i=0; i < inputDataSize; i++)       {
189 |       if(inputData[i] > x) {
190 |         inputDataCopy[icnt++] = (inputData[i]*sign);       //copy what is above x, and flip signs (so biggest is important)
191 |       } 
192 |     }
193 |   } else if(inward  && (sign > 0)) { /* this is fit below x but  approaching x */ 
194 |       for(int i=0; i < inputDataSize; i++)       {
195 |         if(inputData[i] < x) {
196 |           inputDataCopy[icnt++] = (inputData[i]);       //copy only what is above x. 
197 |         } 
198 |       }
199 |   } 
200 | 
201 |   //sort data and get smallest score
202 |   qsort(inputDataCopy, icnt , sizeof(double), compare_sort_decending);
203 | 
204 |   //Want only the top fitting_size scores but als noneed to adap if dropping top score
205 |   if(scores_to_drop>0){
206 |     dataPtr=inputDataCopy+scores_to_drop;
207 |   } else {
208 |     dataPtr=inputDataCopy;
209 |   }
210 | 
211 |   small_score = dataPtr[fitting_size-1];
212 |   
213 |   for(int i=0; i < fitting_size; i++)
214 |     {	
215 |       //translate and subtract small score
216 |       dataPtr[i] = dataPtr[i] + translate_amount - small_score;
217 |     }
218 |   
219 |  
220 |   int rval =   weibull_fit(parmhat, parmci, dataPtr, alpha, fitting_size);
221 |   isvalid= true;
222 |   if(rval != 1) Reset();
223 |   free(inputDataCopy);
224 |   return rval;
225 | }
226 | 
227 | //Wrapper fitting functions EvtLow and EvtHigh to make it simpler for new users of the library.
228 | int MetaRecognition::FitLow(double* inputData, int inputDataSize, int fsize)
229 | {
230 |   if(fsize>0)    fitting_size=fsize;
231 |   sign = -1;
232 |   return EvtGeneric(inputData, inputDataSize);
233 | }
234 | 
235 | int MetaRecognition::FitHigh(double* inputData, int inputDataSize, int fsize)
236 | {
237 |   if(fsize>0)    fitting_size=fsize;
238 |   sign = 1;
239 |   return EvtGeneric(inputData, inputDataSize);
240 | }
241 | 
242 | int MetaRecognition::FitSVM(svm_node_libsvm* SVMdata, int inputDataSize, int label_of_interest, bool label_has_positive_score, int fit_type, int fit_size )
243 | {
244 | 
245 |   Reset();
246 |   ftype = (MR_fitting_type)fit_type;
247 |   fitting_size = fit_size;
248 |   double * inputDataCopy = (double *) malloc(sizeof(double) * inputDataSize);
249 |   int sign_of_label_of_interest=0;
250 |   double * dataPtr = NULL;
251 |   int sign_of_expected_score=-1;
252 |   if(label_has_positive_score) sign_of_expected_score=1;
253 | 
254 |   int icnt=0;
255 |   bool rejection=(ftype==complement_reject || ftype == positive_reject);
256 |   if(rejection) {  // default we fit on the complement class and then do rejection to determine probability
257 |     for(int i=0; i < inputDataSize; i++) {
258 |       if(SVMdata[i].index != label_of_interest) inputDataCopy[icnt++] = (SVMdata[i].value);       //doing extremes just flip sign if needed
259 |       else {
260 |         if(SVMdata[i].value >0) sign_of_label_of_interest++;
261 |         else sign_of_label_of_interest--;
262 |       }
263 |     }
264 |   } else {
265 |     for(int i=0; i < inputDataSize; i++) {
266 |       if(SVMdata[i].index == label_of_interest) inputDataCopy[icnt++] = (SVMdata[i].value);       //doing extremes just flip sign if needed
267 |       else {
268 |         if(SVMdata[i].value >0) sign_of_label_of_interest++;
269 |         else sign_of_label_of_interest--;
270 |       }
271 |     }
272 |   }
273 |   if (verbose && sign_of_label_of_interest * sign_of_expected_score > 0){
274 |     printf("In MetaRecognition::FitSVM,  warning: possible inconsistency average of the non-matching data has score %d, but expected sign is %d\n",
275 |            sign_of_label_of_interest, -sign_of_expected_score);
276 |   }
277 | 
278 | 
279 |   /* expected sign combines with reject_complement to determine if we have to flip or not.
280 |      We flip if positives scores, with smaller is better, is the goal, 
281 |      we flip if sign_of_expected_score >0 and !force_rejection
282 |      we flip if sign_of_expected_score <0 and force_rejection */
283 | 
284 |   if((!label_has_positive_score  && rejection)
285 |      || (label_has_positive_score  && !rejection)) {
286 |     sign = -1;
287 |     for(int i=0; i < icnt; i++) {
288 |       inputDataCopy[i] *= -1;       //doing extremes just flip sign if needed
289 |     }
290 |   } else sign=1;
291 | 
292 |   //sort data and get smallest score
293 |   qsort(inputDataCopy, icnt , sizeof(double), compare_sort_decending);
294 | 
295 |   //Want only the top fitting_size scores but als noneed to adap if dropping top score
296 |   if(scores_to_drop){
297 |     dataPtr=inputDataCopy+scores_to_drop;
298 |   } else {
299 |     dataPtr=inputDataCopy;
300 |   }
301 | 
302 |   small_score = dataPtr[fitting_size - 1];
303 |   
304 |   for(int i=0; i < fitting_size; i++)
305 |     {	
306 |       //translate and subtract small score
307 |       dataPtr[i] = dataPtr[i] + translate_amount - small_score;
308 |     }
309 |   
310 |   int rval = weibull_fit(parmhat, parmci, dataPtr, alpha, fitting_size);
311 | 
312 |   isvalid= true;
313 |   if(rval != 1) Reset();
314 |   free(inputDataCopy);
315 |   printf("Completed weibull fitting\n");  
316 |   return rval;
317 | };
318 | 
319 | void MetaRecognition::Save(std::ostream &outputStream) const
320 | {
321 | 	if(outputStream.good() && isvalid)
322 | 	{
323 | 		try {
324 | 		outputStream.precision(21);
325 | 		outputStream.setf(std::ios::scientific,std::ios::floatfield); 
326 | 		outputStream << parmhat[0] << " " << parmhat[1] <<   "  "  
327 |                              << parmci[0] << " " << parmci[1] << " " 
328 |                              << parmci[2] << " " << parmci[3] << "  " 
329 |                              << sign << " " 
330 |                              << alpha << " " 
331 |                              << (int) ftype << " " 
332 |                              << fitting_size << " " 
333 |                              << translate_amount << " " 
334 |                              << small_score<< " "
335 |                              << scores_to_drop
336 |                              << std::endl;
337 | 		} catch(std::bad_alloc& e) {
338 | 			std::cout << "Could not allocate the required memory, failed with error: '" << e.what() << "'" << std::endl;
339 | 		}
340 | 	}
341 | }
342 | 
343 | std::ostream& operator<< ( std::ostream& os, const MetaRecognition& mr )
344 |   {
345 |     mr.Save(os);
346 |     return os;
347 |   }
348 | 
349 | std::istream& operator>> ( std::istream& is, MetaRecognition& mr )
350 |   {
351 |     mr.Load(is);
352 |     return is;
353 |   }
354 | 
355 | 
356 | void MetaRecognition::Load(std::istream &inputStream)
357 | {
358 |   isvalid=false;
359 |   int temp;
360 |   if(inputStream.good())
361 |     {
362 |       int iftype;
363 |       inputStream >> parmhat[0] >> parmhat[1]
364 |                   >> parmci[0] >> parmci[1]
365 |                   >> parmci[2] >> parmci[3]
366 |                   >> sign 
367 |                   >> alpha 
368 |                   >> iftype 
369 |                   >> fitting_size 
370 |                   >> translate_amount 
371 |                   >> small_score
372 |                   >> scores_to_drop;
373 |       isvalid=true;
374 |       ftype =  (MR_fitting_type) iftype;
375 |     }
376 | }
377 | 
378 | void MetaRecognition::Save(FILE *outputFile) const
379 | {
380 | 	if((outputFile != NULL) && !feof(outputFile))
381 | 	{
382 |           fprintf(outputFile, 
383 |                   "%21.18g %21.18g  " //parmaht
384 |                   "%21.18g %21.18g " //parmci 
385 |                   "%21.18g %21.18g  "
386 |                   "%d %f %d %d "  //sign, alpha, fitting size
387 |                   "%d %21.18g %d\n", //translate,  small_score, scores_to_drop
388 |                   parmhat[0], parmhat[1],
389 |                   parmci[0],parmci[1],
390 |                   parmci[2],parmci[3],
391 |                   sign, alpha, (int) ftype,fitting_size,
392 |                   translate_amount, small_score, scores_to_drop);
393 | 	}
394 | }
395 | 
396 | void MetaRecognition::Load(FILE *inputFile)
397 | {
398 |   int temp, iftype;
399 |   int retcode=0;
400 |   isvalid=false;
401 |   if((inputFile != NULL) && !feof(inputFile))
402 |     {
403 |       
404 |       retcode = fscanf(inputFile, 
405 |                        "%lf %lf " //parmaht
406 |                        "%lf %lf " //parmci 
407 |                        "%lf %lf "
408 |                        "%d %lf %d %d "  //sign, alpha, fitting size
409 |                        "%d %lf %d ", //translate, small_score, scores_to_drop, 
410 |                        parmhat, parmhat+1,
411 |                        parmci,parmci+1,
412 |                        parmci+2,parmci+3,
413 |                        &sign, &alpha, &iftype, &fitting_size,
414 |                        &translate_amount, &small_score, &scores_to_drop);
415 |       isvalid=true;
416 |       ftype =  (MR_fitting_type) iftype;
417 |     }
418 | }
419 | 
420 | 
421 | void MetaRecognition::Save(char* filename) const
422 | {
423 |   FILE*  fp = fopen(filename,"w");
424 |   if(fp) {
425 |     Save(fp);
426 |     fclose(fp);
427 |   } else if(strlen(filename)>0) 
428 |     fprintf(stderr,"SaveWeibull could not open file |%s|\n",filename);
429 |   else     fprintf(stderr,"SaveWeibull called with null filename\n");
430 | }
431 | 
432 | void MetaRecognition::Load(char* filename){
433 |   FILE*  fp = fopen(filename,"r");
434 |   isvalid=false;
435 |   if(fp) {
436 |     Load(fp);
437 |     isvalid=true;
438 |     fclose(fp);
439 |   } else if(strlen(filename)>0) 
440 |     fprintf(stderr,"LoadWeibull could not open file |%s|\n",filename);
441 |   else     fprintf(stderr,"LoadWeibull called with null filename\n");
442 | 
443 | }
444 | 
445 | std::string MetaRecognition::to_string() {
446 |     std::stringstream oss;
447 |     this->Save(oss);
448 |     return oss.str();
449 | }
450 | void MetaRecognition::from_string(std::string input) {
451 |     std::stringstream iss(input);
452 |     this->Load(iss);
453 | }
454 | 
455 | 
456 | int MetaRecognition::set_fitting_size(int nsize){ isvalid=false; return fitting_size=nsize;}
457 | int MetaRecognition::get_fitting_size(){ return fitting_size;}
458 | int MetaRecognition::get_translate_amount(){ return translate_amount;}
459 | int MetaRecognition::set_translate_amount(int ntrans) {isvalid=false; return translate_amount=ntrans;}
460 | double MetaRecognition::get_small_score(){return small_score;}
461 | double MetaRecognition::set_small_score(double nscore){isvalid=false;  return small_score=nscore;}
462 | int MetaRecognition::get_sign(){return sign;}
463 | int MetaRecognition::set_sign(int nsign){return sign=nsign;}
464 | 


--------------------------------------------------------------------------------
/eval/eval_detection.py:
--------------------------------------------------------------------------------
  1 | # This code is originally from the official ActivityNet repo
  2 | # https://github.com/activitynet/ActivityNet
  3 | # Small modification from ActivityNet Code
  4 | from __future__ import print_function
  5 | import json
  6 | import numpy as np
  7 | import pandas as pd
  8 | from joblib import Parallel, delayed
  9 | from scipy.signal import savgol_filter, medfilt
 10 | import sys
 11 | import scipy.io as sio
 12 | import os
 13 | from eval.utils_eval import get_blocked_videos
 14 | from eval.utils_eval import interpolated_prec_rec
 15 | from eval.utils_eval import segment_iou
 16 | import pdb
 17 | 
 18 | 
 19 | def str2ind(categoryname, classlist):
 20 |     return [i for i in range(len(classlist)) if categoryname == classlist[i]][0]
 21 | 
 22 | 
 23 | def strlist2indlist(strlist, classlist):
 24 |     return [str2ind(s, classlist) for s in strlist]
 25 | 
 26 | 
 27 | def sigmoid(x, eps=1e-10):
 28 |     return 1 / (1 + np.exp(-x) + eps)
 29 | 
 30 | 
 31 | def smooth(v, order=2, lens=200):
 32 |     # return v
 33 |     l = min(lens, len(v))
 34 |     l = l - (1 - l % 2)
 35 |     if len(v) <= order:
 36 |         return v
 37 |     return savgol_filter(v, l, order)
 38 | 
 39 | 
 40 | def smooth_medfilt(v, lens=200):
 41 |     l = min(lens, len(v))
 42 |     l = l - (1 - l % 2)
 43 |     if len(v) <= lens:
 44 |         return v
 45 |     return medfilt(v, l)
 46 | 
 47 | 
 48 | def filter_segments(segment_predict, videonames, ambilist):
 49 |     ind = np.zeros(np.shape(segment_predict)[0])
 50 |     for i in range(np.shape(segment_predict)[0]):
 51 |         vn = videonames[int(segment_predict[i, 0])]
 52 |         for a in ambilist:
 53 |             if a[0] == vn:
 54 |                 gt = range(
 55 |                     int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16))
 56 |                 )
 57 |                 gt = range(
 58 |                     int(round(float(a[2]) * 25 / 16)), int(round(float(a[3]) * 25 / 16))
 59 |                 )
 60 |                 pd = range(int(segment_predict[i][1]), int(segment_predict[i][2]))
 61 |                 IoU = float(len(set(gt).intersection(set(pd)))) / float(
 62 |                     len(set(gt).union(set(pd)))
 63 |                 )
 64 |                 if IoU > 0:
 65 |                     ind[i] = 1
 66 |     s = [
 67 |         segment_predict[i, :]
 68 |         for i in range(np.shape(segment_predict)[0])
 69 |         if ind[i] == 0
 70 |     ]
 71 |     return np.array(s)
 72 | 
 73 | 
 74 | def moving_smooth(y, box_size):
 75 |     assert box_size % 2 == 1, 'The bosx size should be ood'
 76 |     box = np.ones(box_size) / box_size
 77 |     y = np.array([y[0]] * (box_size // 2) + y.tolist() + [y[-1]] * (box_size // 2))
 78 |     y_smooth = np.convolve(y, box, mode='valid')
 79 |     return y_smooth
 80 | 
 81 | 
 82 | def gaussian_smooth(score, sigma=30):
 83 |     # r = score.shape[0] //39
 84 |     # if r%2==0:
 85 |     #     r+=1
 86 |     r = 125
 87 |     if r > score.shape[0] // 2:
 88 |         r = score.shape[0] // 2 - 1
 89 |     if r % 2 == 0:
 90 |         r += 1
 91 |     gaussian_temp = np.ones(r * 2 - 1)
 92 |     for i in range(r * 2 - 1):
 93 |         gaussian_temp[i] = np.exp(-(i - r) ** 2 / (2 * sigma ** 2)) / (sigma * np.sqrt(2 * np.pi))
 94 |     new_score = score
 95 |     for i in range(r, score.shape[0] - r):
 96 |         new_score[i] = np.dot(score[i - r:i + r - 1], gaussian_temp)
 97 |     return new_score
 98 | 
 99 | 
100 | def min_max_norm(p):
101 |     min_p = np.min(p)
102 |     max_p = np.max(p)
103 |     return (p - min_p) / (max_p - min_p)
104 | 
105 | 
106 | class ANETdetection(object):
107 | 
108 |     def __init__(
109 |             self,
110 |             annotation_path='./Thumos14reduced-Annotations',
111 |             tiou_thresholds=np.array([0.1, 0.3, 0.5]),
112 |             args=None,
113 |             subset="test",
114 |             verbose=False
115 |     ):
116 |         # if args.validate:
117 |         #     self.subset = 'validation'
118 |         # else:
119 |         #     self.subset = 'test'
120 |         self.subset = subset
121 |         self.args = args
122 |         self.tiou_thresholds = tiou_thresholds
123 |         self.verbose = verbose
124 |         self.ap = None
125 |         self.annotation_path = os.path.join(args.path_dataset, annotation_path)
126 |         self.prediction = None
127 |         # 传入已知类个数
128 |         self.n_known_class = args.n_known_class
129 | 
130 |         self._import_ground_truth(self.annotation_path)
131 | 
132 |     def _import_ground_truth(self, annotation_path):
133 |         gtsegments = np.load(annotation_path + "/segments.npy", allow_pickle=True)
134 |         gtlabels = np.load(annotation_path + "/labels.npy", allow_pickle=True)
135 |         videoname = np.load(annotation_path + "/videoname.npy", allow_pickle=True)
136 |         videoname = np.array([i.decode("utf8") for i in videoname])
137 |         subset = np.load(annotation_path + "/subset.npy", allow_pickle=True)
138 |         subset = np.array([s.decode("utf-8") for s in subset])
139 |         # classlist = np.load(annotation_path + "/classlist.npy", allow_pickle=True)
140 |         # classlist = np.array([c.decode("utf-8") for c in classlist])
141 |         # classlist = np.load("./new_classlist.npy", allow_pickle=True)
142 |         classlist = self.args.classlist
143 |         duration = np.load(annotation_path + "/duration.npy", allow_pickle=True)
144 |         ambilist = annotation_path + "/Ambiguous_test.txt"
145 | 
146 |         try:
147 |             ambilist = list(open(ambilist, "r"))
148 |             ambilist = [a.strip("\n").split(" ") for a in ambilist]
149 |         except:
150 |             ambilist = []
151 | 
152 |         self.ambilist = ambilist
153 |         self.classlist = classlist
154 | 
155 |         subset_ind = (subset == self.subset)
156 |         gtsegments = gtsegments[subset_ind]
157 |         gtlabels = gtlabels[subset_ind]
158 |         videoname = videoname[subset_ind]
159 |         duration = duration[subset_ind]
160 | 
161 |         # 数据清洗：删除长度为0的segment。
162 |         self.idx_to_take = [i for i, s in enumerate(gtsegments)
163 |                             if len(s) > 0]
164 |         gtsegments = gtsegments[self.idx_to_take]
165 |         gtlabels = gtlabels[self.idx_to_take]
166 |         videoname = videoname[self.idx_to_take]
167 | 
168 |         # 计算Ground Truth的先验分布
169 |         # prior = np.zeros((20, 1000))
170 |         # for vid_idx in range(gtsegments.shape[0]):
171 |         #     for seg_idx in range(len(gtsegments[vid_idx])):
172 |         #         start_time = gtsegments[vid_idx][seg_idx][0]
173 |         #         end_time = gtsegments[vid_idx][seg_idx][1]
174 |         #         vid_duration = duration[vid_idx][0]
175 |         #         start_time_percentage = round(start_time / vid_duration * 1000)
176 |         #         end_time_percentage = round(end_time / vid_duration * 1000)
177 |         #         seg_label = str2ind(gtlabels[vid_idx][seg_idx], self.classlist)
178 |         #         prior[seg_label][start_time_percentage:end_time_percentage-1] += 1
179 |         # np.save('./prior.npy', prior)
180 | 
181 |         self.videoname = videoname
182 |         # which categories have temporal labels ?
183 |         # templabelcategories = sorted(list(set([l for gtl in gtlabels for l in gtl])))
184 | 
185 |         # the number index for those categories.
186 |         # templabelidx = []
187 |         # for t in templabelcategories:
188 |             # templabelidx.append(str2ind(t, classlist))
189 | 
190 |         video_lst, t_start_lst, t_end_lst, label_lst = [], [], [], []
191 | 
192 |         for i in range(len(gtsegments)):
193 |             for j in range(len(gtsegments[i])):
194 |                 video_lst.append(str(videoname[i]))
195 |                 t_start_lst.append(round(gtsegments[i][j][0] * 25 / 16))
196 |                 t_end_lst.append(round(gtsegments[i][j][1] * 25 / 16))
197 |                 # 如果segment类别为Unknown类，将其类别置为15（第16类）
198 |                 this_label = str2ind(gtlabels[i][j], self.classlist)
199 |                 if this_label > self.n_known_class - 1:
200 |                     this_label = self.n_known_class
201 |                 # label_lst.append(str2ind(gtlabels[i][j], self.classlist))
202 |                 label_lst.append(this_label)
203 |         ground_truth = pd.DataFrame(
204 |             {
205 |                 "video-id": video_lst,
206 |                 "t-start": t_start_lst,
207 |                 "t-end": t_end_lst,
208 |                 "label": label_lst,
209 |             }
210 |         )
211 |         self.ground_truth = ground_truth
212 |         # self.activity_index = {i: templabelidx[i] for i in range(len(templabelidx))}
213 | 
214 |     def get_topk_mean(self, x, k, axis=0):
215 |         return np.mean(np.sort(x, axis=axis)[-int(k):, :], axis=0)
216 | 
217 |     def _get_vid_score(self, pred):
218 |         # pred : (n, class)
219 |         if self.args is None:
220 |             k = 8
221 |             topk_mean = self.get_topk_mean(pred, k)
222 |             # ind = topk_mean > -50
223 |             return pred, topk_mean
224 | 
225 |         win_size = int(self.args.topk)
226 |         split_list = [i * win_size for i in range(1, int(pred.shape[0] // win_size))]
227 |         splits = np.split(pred, split_list, axis=0)
228 | 
229 |         tops = []
230 |         # select the avg over topk2 segments in each window
231 |         for each_split in splits:
232 |             top_mean = self.get_topk_mean(each_split, self.args.topk2)
233 |             tops.append(top_mean)
234 |         tops = np.array(tops)
235 |         c_s = np.max(tops, axis=0)
236 |         return pred, c_s
237 | 
238 |     def _get_vid_score_1(self, p):
239 |         pp = - p
240 |         [pp[:, i].sort() for i in range(np.shape(pp)[1])]
241 |         pp = -pp
242 |         if int(np.shape(pp)[0] / 8) > 0:
243 |             c_s = np.mean(pp[:int(np.shape(pp)[0] / 8), :], axis=0)
244 |         else:
245 |             c_s = np.mean(pp[:np.shape(pp)[0], :], axis=0)
246 |         return p, c_s
247 | 
248 |     def _get_att_topk_mean(self, p, att_logits, k):
249 |         args_topk = np.argsort(att_logits, axis=0)[-k:]
250 |         topk_mean = 1 / (1 + np.exp(-np.mean(att_logits[args_topk], axis=0))) * 1 / (
251 |                 1 + np.exp(-np.mean(p[args_topk], axis=0)))
252 |         return topk_mean
253 | 
254 |     def _get_vid_score_2(self, p, att_logits):
255 |         if self.args is None:
256 |             k = 8
257 |             topk_mean = self._get_att_topk_mean(p, att_logits, k)
258 |             return p, topk_mean
259 |         win_size = int(self.args.topk)
260 |         split_list = [i * win_size for i in range(1, int(p.shape[0] // win_size))]
261 |         p_splits = np.split(p, split_list, axis=0)
262 |         att_splits = np.split(att_logits, split_list, axis=0)
263 | 
264 |         tops = []
265 |         for p_s, a_s in zip(p_splits, att_splits):
266 |             top_mean = self._get_att_topk_mean(p_s, a_s, self.args.topk2)
267 |             tops.append(top_mean)
268 |         tops = np.array(tops)
269 |         c_s = np.max(tops, axis=0)
270 |         return p, c_s
271 | 
272 |     def OIC_Cofidence(self, s, e, cls_pred, c_s, _lambda=0.25):
273 |         for i in range(len(s)):
274 |             seg = cls_pred[s[i]:e[i]]
275 |             inner_score = np.mean(seg)
276 |             proposal_len = e[i] - s[i]
277 |             outer_s = max(0, int(s[i] - proposal_len * _lambda))
278 |             outer_e = min(cls_pred.shape[0], int(e[i] - proposal_len * _lambda))
279 | 
280 |             front_outer_score = np.mean(cls_pred[outer_s:s[i]])
281 |             back_outer_score = np.mean(cls_pred[e[i]:outer_e])
282 | 
283 |     # def _get_predictions_with_label(self, prediction_by_label, label_name, cidx):
284 |     def _get_predictions_with_label(self, prediction_by_label, cidx):
285 |         """Get all predicitons of the given label. Return empty DataFrame if there
286 |         is no predcitions with the given label.
287 |         """
288 |         try:
289 |             return prediction_by_label.get_group(cidx).reset_index(drop=True)
290 |         except:
291 |             # print("Warning: No predictions of label '%s' were provdied." % label_name)
292 |             print("Warning: No predictions of label '%s' were provdied." % cidx)
293 |             return pd.DataFrame()
294 | 
295 |     def wrapper_compute_average_precision(self):
296 |         """Computes average precision for each class in the subset.
297 |         """
298 |         # ap = np.zeros((len(self.tiou_thresholds), len(self.activity_index)))
299 |         ap = np.zeros((len(self.tiou_thresholds), self.n_known_class + 1))
300 | 
301 |         # Adaptation to query faster
302 |         ground_truth_by_label = self.ground_truth.groupby("label")
303 |         prediction_by_label = self.prediction.groupby("label")
304 | 
305 |         results = Parallel(n_jobs=3)(
306 |             delayed(compute_average_precision_detection)(
307 |                 ground_truth=ground_truth_by_label.get_group(cidx).reset_index(
308 |                     drop=True
309 |                 ),
310 |                 prediction=self._get_predictions_with_label(
311 |                     prediction_by_label, cidx
312 |                 ),
313 |                 tiou_thresholds=self.tiou_thresholds,
314 |             )
315 |             for cidx in range(self.n_known_class + 1)
316 |         )
317 | 
318 |         for cidx in range(self.n_known_class + 1):
319 |             ap[:, cidx] = results[cidx]
320 | 
321 |         # results = Parallel(n_jobs=3)(
322 |         #     delayed(compute_average_precision_detection)(
323 |         #         ground_truth=ground_truth_by_label.get_group(cidx).reset_index(
324 |         #             drop=True
325 |         #         ),
326 |         #         prediction=self._get_predictions_with_label(
327 |         #             prediction_by_label, label_name, cidx
328 |         #         ),
329 |         #         tiou_thresholds=self.tiou_thresholds,
330 |         #     )
331 |         #     for label_name, cidx in self.activity_index.items()
332 |         # )
333 | 
334 |         # for i, cidx in enumerate(self.activity_index.values()):
335 |         #     ap[:, cidx] = results[i]
336 | 
337 |         return ap
338 | 
339 |     def evaluate(self):
340 |         """Evaluates a prediction file. For the detection task we measure the
341 |         interpolated mean average precision to measure the performance of a
342 |         method.
343 |         """
344 |         if self.verbose:
345 |             # print("[INIT] Loaded annotations from {} subset.".format(self.subset))
346 |             nr_gt = len(self.ground_truth)
347 |             print("\tNumber of ground truth instances: {}".format(nr_gt))
348 |             nr_pred = len(self.prediction)
349 |             print("\tNumber of predictions: {}".format(nr_pred))
350 |             # print("\tFixed threshold for tiou score: {}".format(self.tiou_thresholds))
351 | 
352 |         self.ap = self.wrapper_compute_average_precision()
353 | 
354 |         # self.mAP = self.ap.mean(axis=1)
355 |         # self.average_mAP = self.mAP.mean()
356 | 
357 |         # Known类别mAP
358 |         known_mAP = self.ap[:, :-1].mean(axis=1)
359 |         known_average_mAP = known_mAP.mean()
360 | 
361 |         # Unknown类别mAP
362 |         unknown_mAP = self.ap[:, -1]
363 |         unknown_average_mAP = unknown_mAP.mean()
364 | 
365 |         # if self.verbose:
366 |         #     # print ('[RESULTS] Performance on ActivityNet detection task.')
367 |         #     for k in range(len(self.tiou_thresholds)):
368 |         #         print("Detection map @ %f = %f" % (self.tiou_thresholds[k], self.mAP[k]))
369 |         #     print("Average-mAP: {}\n".format(self.mAP))
370 |         # return self.mAP
371 | 
372 |         return known_mAP, unknown_mAP
373 | 
374 |     def save_info(self, fname):
375 |         import pickle
376 |         Dat = {
377 |             "prediction": self.prediction,
378 |             "gt": self.ground_truth
379 |         }
380 |         with open(fname, 'wb') as fp:
381 |             pickle.dump(Dat, fp)
382 | 
383 | 
384 | def compute_average_precision_detection(
385 |         ground_truth, prediction, tiou_thresholds=np.linspace(0.5, 0.95, 10)
386 | ):
387 |     """Compute average precision (detection task) between ground truth and
388 |     predictions data frames. If multiple predictions occurs for the same
389 |     predicted segment, only the one with highest score is matches as
390 |     true positive. This code is greatly inspired by Pascal VOC devkit.
391 | 
392 |     Parameters
393 |     ----------
394 |     ground_truth : df
395 |         Data frame containing the ground truth instances.
396 |         Required fields: ['video-id', 't-start', 't-end']
397 |     prediction : df
398 |         Data frame containing the prediction instances.
399 |         Required fields: ['video-id, 't-start', 't-end', 'score']
400 |     tiou_thresholds : 1darray, optional
401 |         Temporal intersection over union threshold.
402 | 
403 |     Outputs
404 |     -------
405 |     ap : float
406 |         Average precision score.
407 |     """
408 |     ap = np.zeros(len(tiou_thresholds))
409 |     if prediction.empty:
410 |         return ap
411 | 
412 |     npos = float(len(ground_truth))
413 |     lock_gt = np.ones((len(tiou_thresholds), len(ground_truth))) * -1
414 |     # Sort predictions by decreasing score order.
415 |     sort_idx = prediction["score"].values.argsort()[::-1]  # idx from high to low
416 |     prediction = prediction.loc[sort_idx].reset_index(drop=True)  # value from high to low
417 | 
418 |     # Initialize true positive and false positive vectors.
419 |     tp = np.zeros((len(tiou_thresholds), len(prediction)))
420 |     fp = np.zeros((len(tiou_thresholds), len(prediction)))
421 | 
422 |     # Adaptation to query faster
423 |     ground_truth_gbvn = ground_truth.groupby("video-id")
424 | 
425 |     # Assigning true positive to truly grount truth instances.
426 |     for idx, this_pred in prediction.iterrows():
427 | 
428 |         try:
429 |             # Check if there is at least one ground truth in the video associated.
430 |             ground_truth_videoid = ground_truth_gbvn.get_group(this_pred["video-id"])
431 |         except Exception as e:
432 |             fp[:, idx] = 1
433 |             continue
434 | 
435 |         this_gt = ground_truth_videoid.reset_index()
436 |         tiou_arr = segment_iou(
437 |             this_pred[["t-start", "t-end"]].values, this_gt[["t-start", "t-end"]].values
438 |         )
439 |         # We would like to retrieve the predictions with highest tiou score.
440 |         tiou_sorted_idx = tiou_arr.argsort()[::-1]
441 |         for tidx, tiou_thr in enumerate(tiou_thresholds):
442 |             for jdx in tiou_sorted_idx:
443 |                 if tiou_arr[jdx] < tiou_thr:
444 |                     fp[tidx, idx] = 1
445 |                     break
446 |                 if lock_gt[tidx, this_gt.loc[jdx]["index"]] >= 0:
447 |                     continue
448 |                 # Assign as true positive after the filters above.
449 |                 tp[tidx, idx] = 1
450 |                 lock_gt[tidx, this_gt.loc[jdx]["index"]] = idx
451 |                 break
452 | 
453 |             if fp[tidx, idx] == 0 and tp[tidx, idx] == 0:
454 |                 fp[tidx, idx] = 1
455 | 
456 |     tp_cumsum = np.cumsum(tp, axis=1).astype(float)
457 |     fp_cumsum = np.cumsum(fp, axis=1).astype(float)
458 |     recall_cumsum = tp_cumsum / npos
459 | 
460 |     precision_cumsum = tp_cumsum / (tp_cumsum + fp_cumsum)
461 | 
462 |     for tidx in range(len(tiou_thresholds)):
463 |         ap[tidx] = interpolated_prec_rec(
464 |             precision_cumsum[tidx, :], recall_cumsum[tidx, :]
465 |         )
466 | 
467 |     return ap
468 | 


--------------------------------------------------------------------------------
/wsad_dataset.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | 
  5 | import numpy as np
  6 | 
  7 | import options
  8 | import utils.wsad_utils as utils
  9 | 
 10 | 
 11 | class SampleDataset:
 12 |     def __init__(self, args, mode="both", sampling='random'):
 13 |         self.args = args
 14 |         self.dataset_name = args.dataset_name
 15 |         self.num_class = args.num_class
 16 |         self.sampling = sampling
 17 |         self.num_segments = args.max_seqlen
 18 |         self.feature_size = args.feature_size
 19 |         self.path_to_features = os.path.join(args.path_dataset, self.dataset_name + "-I3D-JOINTFeatures.npy")
 20 |         self.path_to_annotations = os.path.join(args.path_dataset, self.dataset_name + "-Annotations/")
 21 |         self.features = np.load(
 22 |             self.path_to_features, encoding="bytes", allow_pickle=True
 23 |         )
 24 |         self.segments = np.load(
 25 |             self.path_to_annotations + "segments.npy", allow_pickle=True
 26 |         )
 27 |         self.labels = np.load(
 28 |             self.path_to_annotations + "labels_all.npy", allow_pickle=True
 29 |         )
 30 |         # Specific to Thumos14
 31 | 
 32 |         self._labels = np.load(
 33 |             self.path_to_annotations + "labels.npy", allow_pickle=True
 34 |         )
 35 |         # self.classlist = np.load(
 36 |         #     self.path_to_annotations + "classlist.npy", allow_pickle=True
 37 |         # )
 38 |         self.subset = np.load(
 39 |             self.path_to_annotations + "subset.npy", allow_pickle=True
 40 |         )
 41 |         self.videonames = np.load(
 42 |             self.path_to_annotations + "videoname.npy", allow_pickle=True
 43 |         )
 44 | 
 45 |         split_path = f'./thumos_splits/split_{args.split_idx}'
 46 |         # split_path = f'./activitynet_splits/split_{args.split_idx}'
 47 |         # 从txt文件读入Known类别
 48 |         self.known_classes = []
 49 |         # with open('./split_0/Class_Known.txt', 'rb') as file:
 50 |         with open(os.path.join(split_path, 'Class_Known.txt'), 'rb') as file:
 51 |             for line in file.readlines():
 52 |                 self.known_classes.append(line.decode().strip())
 53 | 
 54 |         # 从txt文件读入Unknown类别
 55 |         self.unknown_classes = []
 56 |         # with open('./split_0/Class_Unknown.txt', 'rb') as file:
 57 |         with open(os.path.join(split_path, 'Class_Unknown.txt'), 'rb') as file:
 58 |             for line in file.readlines():
 59 |                 self.unknown_classes.append(line.decode().strip())
 60 | 
 61 |         # 组织新的classlist
 62 |         self.classlist = self.known_classes + self.unknown_classes
 63 |         args.classlist = self.classlist
 64 |         # np.save('./new_classlist.npy', self.classlist)
 65 | 
 66 |         self.batch_size = args.batch_size
 67 |         self.trainidx = []
 68 |         self.testidx = []
 69 |         self.classwiseidx = []
 70 |         self.currenttestidx = 0
 71 |         self.currenttrainidx = 0
 72 | 
 73 |         # 原作用是将string形式的标签转化为multi-hot形式。
 74 |         # 注意：multi-hot标签需要按新的classlist排序，使前15类是Known类别，后5类是Unknown类别。
 75 |         # 训练集中，只看前15项标签即可。测试集中，将后5项标签归为1类即可。
 76 |         self.labels_multihot = [
 77 |             utils.strlist2multihot(labs, self.classlist)
 78 |             for labs in self.labels
 79 |         ]
 80 | 
 81 |         # 原作用是划分训练集和测试集。注意：训练集中，只保留含有Known类别动作的视频
 82 |         self.train_test_idx()
 83 | 
 84 |         np.save('train_video_names_split_' + str(args.split_idx) + '.npy', self.videonames[self.trainidx])
 85 | 
 86 |         # 原作用是将训练集数据按类别进行划分。注意：训练集中，只看Known类别。
 87 |         self.classwise_feature_mapping()
 88 | 
 89 |         self.normalize = False
 90 |         self.mode = mode
 91 |         if mode == "rgb" or mode == "flow":
 92 |             self.feature_size = 1024
 93 | 
 94 |     def train_test_idx(self):
 95 |         for i, s in enumerate(self.subset):
 96 |             # Specific to Thumos14
 97 |             if s.decode("utf-8") == "validation" and list(set(self.labels[i]) & set(self.known_classes)):
 98 |                 self.trainidx.append(i)
 99 |             elif s.decode("utf-8") == "test":
100 |                 self.testidx.append(i)
101 | 
102 |     def classwise_feature_mapping(self):
103 |         # for category in self.classlist:
104 |         for category in self.known_classes:
105 |             idx = []
106 |             for i in self.trainidx:
107 |                 for label in self.labels[i]:
108 |                     # if label == category.decode("utf-8"):
109 |                     if label == category:
110 |                         idx.append(i)
111 |                         break
112 |             self.classwiseidx.append(idx)
113 | 
114 |     def load_data_for_threshold(self):
115 |         labs = self.labels_multihot[self.trainidx[self.currenttrainidx]]
116 |         feat = self.features[self.trainidx[self.currenttrainidx]]
117 |         vn = self.videonames[self.trainidx[self.currenttrainidx]]
118 |         if self.currenttrainidx == len(self.trainidx) - 1:
119 |             done = True
120 |             self.currenttrainidx = 0
121 |         else:
122 |             done = False
123 |             self.currenttrainidx += 1
124 |         feat = np.array(feat)
125 |         if self.mode == "rgb":
126 |             feat = feat[..., : self.feature_size]
127 |         elif self.mode == "flow":
128 |             feat = feat[..., self.feature_size:]
129 |         return feat, np.array(labs), vn, done
130 | 
131 |     def load_data(self, n_similar=0, is_training=True, similar_size=2):
132 |         if is_training:
133 |             idx = []
134 | 
135 |             # Load similar pairs
136 |             if n_similar != 0:
137 |                 rand_classid = np.random.choice(
138 |                     len(self.classwiseidx), size=n_similar
139 |                 )
140 |                 for rid in rand_classid:
141 |                     rand_sampleid = np.random.choice(
142 |                         len(self.classwiseidx[rid]),
143 |                         size=similar_size,
144 |                         replace=False,
145 |                     )
146 | 
147 |                     for k in rand_sampleid:
148 |                         idx.append(self.classwiseidx[rid][k])
149 | 
150 |             # Load rest pairs
151 |             if self.batch_size - similar_size * n_similar < 0:
152 |                 self.batch_size = similar_size * n_similar
153 | 
154 |             rand_sampleid = np.random.choice(
155 |                 len(self.trainidx),
156 |                 size=self.batch_size - similar_size * n_similar,
157 |             )
158 | 
159 |             for r in rand_sampleid:
160 |                 idx.append(self.trainidx[r])
161 |             feat = []
162 |             for i in idx:
163 |                 ifeat = self.features[i]
164 |                 if self.sampling == 'random':
165 |                     sample_idx = self.random_perturb(ifeat.shape[0])
166 |                 elif self.sampling == 'uniform':
167 |                     sample_idx = self.uniform_sampling(ifeat.shape[0])
168 |                 elif self.sampling == "all":
169 |                     sample_idx = np.arange(ifeat.shape[0])
170 |                 else:
171 |                     raise AssertionError('Not supported sampling !')
172 |                 ifeat = ifeat[sample_idx]
173 |                 feat.append(ifeat)
174 |             feat = np.array(feat)
175 |             n_known_class = len(self.known_classes)
176 |             labels = np.array([self.labels_multihot[i][:n_known_class] for i in idx])
177 |             if self.mode == "rgb":
178 |                 feat = feat[..., : self.feature_size]
179 |             elif self.mode == "flow":
180 |                 feat = feat[..., self.feature_size:]
181 |             return feat, labels, rand_sampleid
182 | 
183 |         else:
184 |             labs = self.labels_multihot[self.testidx[self.currenttestidx]]
185 |             feat = self.features[self.testidx[self.currenttestidx]]
186 |             # feat = utils.process_feat(feat, normalize=self.normalize)
187 |             # feature = feature[sample_idx]
188 |             vn = self.videonames[self.testidx[self.currenttestidx]]
189 |             if self.currenttestidx == len(self.testidx) - 1:
190 |                 done = True
191 |                 self.currenttestidx = 0
192 |             else:
193 |                 done = False
194 |                 self.currenttestidx += 1
195 |             feat = np.array(feat)
196 |             if self.mode == "rgb":
197 |                 feat = feat[..., : self.feature_size]
198 |             elif self.mode == "flow":
199 |                 feat = feat[..., self.feature_size:]
200 |             return feat, np.array(labs), vn, done
201 | 
202 |     def random_avg(self, x, segm=None):
203 |         if len(x) < self.num_segments:
204 |             ind = self.random_perturb(len(x))
205 |             x_n = x[ind]
206 |             segm = segm[ind] if segm is not None else None
207 |             return x_n, segm
208 |         else:
209 |             inds = np.array_split(np.arange(len(x)), self.num_segments)
210 |             x_n = np.zeros((self.num_segments, x.shape[-1])).astype(x.dtype)
211 |             segm_n = np.zeros(
212 |                 (self.num_segments, segm.shape[-1])).astype(x.dtype)
213 |             for i, ind in enumerate(inds):
214 |                 x_n[i] = np.mean(x[ind], axis=0)
215 |                 if segm is not None:
216 |                     segm_n[i] = segm[(ind[0] + ind[-1]) // 2]
217 |             return x_n, segm_n if segm is not None else None
218 | 
219 |     def random_pad(self, x, segm=None):
220 |         length = self.num_segments
221 |         if x.shape[0] > length:
222 |             strt = np.random.randint(0, x.shape[0] - length)
223 |             x_ret = x[strt:strt + length]
224 |             if segm is not None:
225 |                 segm = segm[strt:strt + length]
226 |                 return x_ret, segm
227 |         elif x.shape[0] == length:
228 |             return x, segm
229 |         else:
230 |             pad_len = length - x.shape[0]
231 |             x_ret = np.pad(x, ((0, pad_len), (0, 0)), mode='constant')
232 |             if segm is not None:
233 |                 segm = np.pad(segm, ((0, pad_len), (0, 0)), mode='constant')
234 |             return x_ret, segm
235 | 
236 |     def random_perturb(self, length):
237 |         if self.num_segments == length:
238 |             return np.arange(self.num_segments).astype(int)
239 |         samples = np.arange(self.num_segments) * length / self.num_segments
240 |         for i in range(self.num_segments):
241 |             if i < self.num_segments - 1:
242 |                 if int(samples[i]) != int(samples[i + 1]):
243 |                     samples[i] = np.random.choice(
244 |                         range(int(samples[i]),
245 |                               int(samples[i + 1]) + 1))
246 |                 else:
247 |                     samples[i] = int(samples[i])
248 |             else:
249 |                 if int(samples[i]) < length - 1:
250 |                     samples[i] = np.random.choice(
251 |                         range(int(samples[i]), length))
252 |                 else:
253 |                     samples[i] = int(samples[i])
254 |         return samples.astype(int)
255 | 
256 |     def uniform_sampling(self, length):
257 |         if self.num_segments == length:
258 |             return np.arange(self.num_segments).astype(int)
259 |         samples = np.arange(self.num_segments) * length / self.num_segments
260 |         samples = np.floor(samples)
261 |         return samples.astype(int)
262 | 
263 | 
264 | class AntSampleDataset:
265 |     def __init__(self, args, mode="both", sampling='random'):
266 |         self.dataset_name = args.dataset_name
267 |         self.num_class = args.num_class
268 |         self.sampling = sampling
269 |         self.num_segments = args.max_seqlen
270 |         self.feature_size = args.feature_size
271 |         self.path_to_features = os.path.join(args.path_dataset, self.dataset_name + "-I3D-JOINTFeatures.npy")
272 |         self.path_to_annotations = os.path.join(args.path_dataset, self.dataset_name + "-Annotations/")
273 |         self.features = np.load(
274 |             self.path_to_features, encoding="bytes", allow_pickle=True
275 |         )
276 |         self.segments = np.load(
277 |             self.path_to_annotations + "segments.npy", allow_pickle=True
278 |         )
279 |         self.labels = np.load(
280 |             self.path_to_annotations + "labels_all.npy", allow_pickle=True
281 |         )
282 |         # Specific to Thumos14
283 | 
284 |         self._labels = np.load(
285 |             self.path_to_annotations + "labels.npy", allow_pickle=True
286 |         )
287 |         # self.classlist = np.load(
288 |         #     self.path_to_annotations + "classlist.npy", allow_pickle=True
289 |         # )
290 |         self.subset = np.load(
291 |             self.path_to_annotations + "subset.npy", allow_pickle=True
292 |         )
293 |         self.videonames = np.load(
294 |             self.path_to_annotations + "videoname.npy", allow_pickle=True
295 |         )
296 |         self.batch_size = args.batch_size
297 |         self.t_max = args.max_seqlen
298 | 
299 |         split_path = f'./activitynet_splits/split_{args.split_idx}'
300 |         # 从txt文件读入Known类别
301 |         self.known_classes = []
302 |         with open(os.path.join(split_path, 'Class_Known.txt'), 'rb') as file:
303 |             for line in file.readlines():
304 |                 self.known_classes.append(line.decode().strip())
305 | 
306 |         # 从txt文件读入Unknown类别
307 |         self.unknown_classes = []
308 |         with open(os.path.join(split_path, 'Class_Unknown.txt'), 'rb') as file:
309 |             for line in file.readlines():
310 |                 self.unknown_classes.append(line.decode().strip())
311 | 
312 |         # 组织新的classlist，格式为string list，保存为new_classlist.npy，以供其它模块读取
313 |         self.classlist = self.known_classes + self.unknown_classes
314 |         np.save('./new_classlist.npy', self.classlist)
315 | 
316 |         self.trainidx = []
317 |         self.testidx = []
318 |         self.classwiseidx = []
319 |         self.currenttestidx = 0
320 |         self.labels_multihot = [
321 |             utils.strlist2multihot(labs, self.classlist)
322 |             for labs in self.labels
323 |         ]
324 |         try:
325 |             ambilist = self.path_to_annotations + "/Ambiguous_test.txt"
326 |             ambilist = list(open(ambilist, "r"))
327 |             ambilist = [a.strip("\n").split(" ")[0] for a in ambilist]
328 |         except:
329 |             ambilist = []
330 |         self.train_test_idx()
331 |         self.classwise_feature_mapping()
332 | 
333 |         self.normalize = False
334 |         self.mode = mode
335 |         if mode == "rgb" or mode == "flow":
336 |             self.feature_size = 1024
337 |         self.filter()
338 | 
339 |     def filter(self):
340 |         new_testidx = []
341 |         for idx in self.testidx:
342 |             feat = self.features[idx]
343 |             if len(feat) > 10:
344 |                 new_testidx.append(idx)
345 |         self.testidx = new_testidx
346 | 
347 |         new_trainidx = []
348 |         for idx in self.trainidx:
349 |             feat = self.features[idx]
350 |             if len(feat) > 10:
351 |                 new_trainidx.append(idx)
352 |         self.trainidx = new_trainidx
353 | 
354 |     def train_test_idx(self):
355 |         for i, s in enumerate(self.subset):
356 |             if s.decode("utf-8") == "training" and list(set(self.labels[i]) & set(self.known_classes)):
357 |                 self.trainidx.append(i)
358 |             elif s.decode("utf-8") == "validation":
359 |                 self.testidx.append(i)
360 | 
361 |     def classwise_feature_mapping(self):
362 |         # for category in self.classlist:
363 |         for category in self.known_classes:
364 |             idx = []
365 |             for i in self.trainidx:
366 |                 if self.features[i].sum() == 0:
367 |                     continue
368 |                 for label in self.labels[i]:
369 |                     # if label == category.decode("utf-8"):
370 |                     if label == category:
371 |                         idx.append(i)
372 |                         break
373 |             self.classwiseidx.append(idx)
374 | 
375 |     def load_data(self, n_similar=0, is_training=True, similar_size=2):
376 |         if is_training:
377 |             labels = []
378 |             idx = []
379 |             # Load similar pairs
380 |             if n_similar != 0:
381 |                 rand_classid = np.random.choice(
382 |                     len(self.classwiseidx), size=n_similar
383 |                 )
384 |                 for rid in rand_classid:
385 |                     rand_sampleid = np.random.choice(
386 |                         len(self.classwiseidx[rid]),
387 |                         size=similar_size,
388 |                         replace=False,
389 |                     )
390 | 
391 |                     for k in rand_sampleid:
392 |                         idx.append(self.classwiseidx[rid][k])
393 | 
394 |             # Load rest pairs
395 |             if self.batch_size - similar_size * n_similar < 0:
396 |                 self.batch_size = similar_size * n_similar
397 | 
398 |             rand_sampleid = np.random.choice(
399 |                 len(self.trainidx),
400 |                 size=self.batch_size - similar_size * n_similar,
401 |             )
402 | 
403 |             for r in rand_sampleid:
404 |                 idx.append(self.trainidx[r])
405 |             feat = []
406 |             for i in idx:
407 |                 ifeat = self.features[i]
408 |                 if self.sampling == 'random':
409 |                     sample_idx = self.random_perturb(ifeat.shape[0])
410 |                 elif self.sampling == 'uniform':
411 |                     sample_idx = self.uniform_sampling(ifeat.shape[0])
412 |                 elif self.sampling == "all":
413 |                     sample_idx = np.arange(ifeat.shape[0])
414 |                 else:
415 |                     raise AssertionError('Not supported sampling !')
416 |                 ifeat = ifeat[sample_idx]
417 |                 feat.append(ifeat)
418 |             feat = np.array(feat)
419 | 
420 |             n_known_class = len(self.known_classes)
421 |             labels = np.array([self.labels_multihot[i][:n_known_class] for i in idx])
422 | 
423 |             # labels = np.array([self.labels_multihot[i] for i in idx])
424 |             if self.mode == "rgb":
425 |                 feat = feat[..., : self.feature_size]
426 |             elif self.mode == "flow":
427 |                 feat = feat[..., self.feature_size:]
428 |             return feat, labels, rand_sampleid
429 | 
430 |         else:
431 |             labs = self.labels_multihot[self.testidx[self.currenttestidx]]
432 |             feat = self.features[self.testidx[self.currenttestidx]]
433 |             # feat = utils.process_feat(feat, normalize=self.normalize)
434 |             # feature = feature[sample_idx]
435 |             vn = self.videonames[self.testidx[self.currenttestidx]]
436 |             if self.currenttestidx == len(self.testidx) - 1:
437 |                 done = True
438 |                 self.currenttestidx = 0
439 |             else:
440 |                 done = False
441 |                 self.currenttestidx += 1
442 |             feat = np.array(feat)
443 |             if self.mode == "rgb":
444 |                 feat = feat[..., : self.feature_size]
445 |             elif self.mode == "flow":
446 |                 feat = feat[..., self.feature_size:]
447 |             return feat, np.array(labs), vn, done
448 | 
449 |     def random_avg(self, x, segm=None):
450 |         if len(x) < self.num_segments:
451 |             ind = self.random_perturb(len(x))
452 |             x_n = x[ind]
453 |             segm = segm[ind] if segm is not None else None
454 |             return x_n, segm
455 |         else:
456 |             inds = np.array_split(np.arange(len(x)), self.num_segments)
457 |             x_n = np.zeros((self.num_segments, x.shape[-1])).astype(x.dtype)
458 |             segm_n = np.zeros(
459 |                 (self.num_segments, segm.shape[-1])).astype(x.dtype)
460 |             for i, ind in enumerate(inds):
461 |                 x_n[i] = np.mean(x[ind], axis=0)
462 |                 if segm is not None:
463 |                     segm_n[i] = segm[(ind[0] + ind[-1]) // 2]
464 |             return x_n, segm_n if segm is not None else None
465 | 
466 |     def random_pad(self, x, segm=None):
467 |         length = self.num_segments
468 |         if x.shape[0] > length:
469 |             strt = np.random.randint(0, x.shape[0] - length)
470 |             x_ret = x[strt:strt + length]
471 |             if segm is not None:
472 |                 segm = segm[strt:strt + length]
473 |                 return x_ret, segm
474 |         elif x.shape[0] == length:
475 |             return x, segm
476 |         else:
477 |             pad_len = length - x.shape[0]
478 |             x_ret = np.pad(x, ((0, pad_len), (0, 0)), mode='constant')
479 |             if segm is not None:
480 |                 segm = np.pad(segm, ((0, pad_len), (0, 0)), mode='constant')
481 |             return x_ret, segm
482 | 
483 |     def random_perturb(self, length):
484 |         if self.num_segments == length:
485 |             return np.arange(self.num_segments).astype(int)
486 |         samples = np.arange(self.num_segments) * length / self.num_segments
487 |         for i in range(self.num_segments):
488 |             if i < self.num_segments - 1:
489 |                 if int(samples[i]) != int(samples[i + 1]):
490 |                     samples[i] = np.random.choice(
491 |                         range(int(samples[i]),
492 |                               int(samples[i + 1]) + 1))
493 |                 else:
494 |                     samples[i] = int(samples[i])
495 |             else:
496 |                 if int(samples[i]) < length - 1:
497 |                     samples[i] = np.random.choice(
498 |                         range(int(samples[i]), length))
499 |                 else:
500 |                     samples[i] = int(samples[i])
501 |         return samples.astype(int)
502 | 
503 |     def uniform_sampling(self, length):
504 |         if self.num_segments == length:
505 |             return np.arange(self.num_segments).astype(int)
506 |         samples = np.arange(self.num_segments) * length / self.num_segments
507 |         samples = np.floor(samples)
508 |         return samples.astype(int)
509 | 
510 | 
511 | if __name__ == '__main__':
512 |     args = options.parser.parse_args()
513 |     dt = SampleDataset(args)
514 |     data = dt.load_data()
515 |     print(data)
516 |     import pdb
517 | 
518 |     pdb.set_trace()
519 |     print(dt)
520 | 


--------------------------------------------------------------------------------
/libMR/malloc.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |   Default header file for malloc-2.8.x, written by Doug Lea
  3 |   and released to the public domain, as explained at
  4 |   http://creativecommons.org/publicdomain/zero/1.0/ 
  5 |  
  6 |   This header is for ANSI C/C++ only.  You can set any of
  7 |   the following #defines before including:
  8 | 
  9 |   * If USE_DL_PREFIX is defined, it is assumed that malloc.c 
 10 |     was also compiled with this option, so all routines
 11 |     have names starting with "dl".
 12 | 
 13 |   * If HAVE_USR_INCLUDE_MALLOC_H is defined, it is assumed that this
 14 |     file will be #included AFTER <malloc.h>. This is needed only if
 15 |     your system defines a struct mallinfo that is incompatible with the
 16 |     standard one declared here.  Otherwise, you can include this file
 17 |     INSTEAD of your system system <malloc.h>.  At least on ANSI, all
 18 |     declarations should be compatible with system versions
 19 | 
 20 |   * If MSPACES is defined, declarations for mspace versions are included.
 21 | */
 22 | 
 23 | #ifndef MALLOC_280_H
 24 | #define MALLOC_280_H
 25 | 
 26 | #ifdef __cplusplus
 27 | extern "C" {
 28 | #endif
 29 | 
 30 | #include <stddef.h>   /* for size_t */
 31 | 
 32 | #ifndef ONLY_MSPACES
 33 | #define ONLY_MSPACES 0     /* define to a value */
 34 | #elif ONLY_MSPACES != 0
 35 | #define ONLY_MSPACES 1
 36 | #endif  /* ONLY_MSPACES */
 37 | #ifndef NO_MALLINFO
 38 | #define NO_MALLINFO 0
 39 | #endif  /* NO_MALLINFO */
 40 | 
 41 | #ifndef MSPACES
 42 | #if ONLY_MSPACES
 43 | #define MSPACES 1
 44 | #else   /* ONLY_MSPACES */
 45 | #define MSPACES 0
 46 | #endif  /* ONLY_MSPACES */
 47 | #endif  /* MSPACES */
 48 | 
 49 | #if !ONLY_MSPACES
 50 | 
 51 | #ifndef USE_DL_PREFIX
 52 | #define dlcalloc               calloc
 53 | #define dlfree                 free
 54 | #define dlmalloc               malloc
 55 | #define dlmemalign             memalign
 56 | #define dlposix_memalign       posix_memalign
 57 | #define dlrealloc              realloc
 58 | #define dlvalloc               valloc
 59 | #define dlpvalloc              pvalloc
 60 | #define dlmallinfo             mallinfo
 61 | #define dlmallopt              mallopt
 62 | #define dlmalloc_trim          malloc_trim
 63 | #define dlmalloc_stats         malloc_stats
 64 | #define dlmalloc_usable_size   malloc_usable_size
 65 | #define dlmalloc_footprint     malloc_footprint
 66 | #define dlmalloc_max_footprint malloc_max_footprint
 67 | #define dlmalloc_footprint_limit malloc_footprint_limit
 68 | #define dlmalloc_set_footprint_limit malloc_set_footprint_limit
 69 | #define dlmalloc_inspect_all   malloc_inspect_all
 70 | #define dlindependent_calloc   independent_calloc
 71 | #define dlindependent_comalloc independent_comalloc
 72 | #define dlbulk_free            bulk_free
 73 | #endif /* USE_DL_PREFIX */
 74 | 
 75 | #if !NO_MALLINFO 
 76 | #ifndef HAVE_USR_INCLUDE_MALLOC_H
 77 | #ifndef _MALLOC_H
 78 | #ifndef MALLINFO_FIELD_TYPE
 79 | #define MALLINFO_FIELD_TYPE size_t
 80 | #endif /* MALLINFO_FIELD_TYPE */
 81 | #ifndef STRUCT_MALLINFO_DECLARED
 82 | #define STRUCT_MALLINFO_DECLARED 1
 83 | struct mallinfo {
 84 |   MALLINFO_FIELD_TYPE arena;    /* non-mmapped space allocated from system */
 85 |   MALLINFO_FIELD_TYPE ordblks;  /* number of free chunks */
 86 |   MALLINFO_FIELD_TYPE smblks;   /* always 0 */
 87 |   MALLINFO_FIELD_TYPE hblks;    /* always 0 */
 88 |   MALLINFO_FIELD_TYPE hblkhd;   /* space in mmapped regions */
 89 |   MALLINFO_FIELD_TYPE usmblks;  /* maximum total allocated space */
 90 |   MALLINFO_FIELD_TYPE fsmblks;  /* always 0 */
 91 |   MALLINFO_FIELD_TYPE uordblks; /* total allocated space */
 92 |   MALLINFO_FIELD_TYPE fordblks; /* total free space */
 93 |   MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */
 94 | };
 95 | #endif /* STRUCT_MALLINFO_DECLARED */
 96 | #endif  /* _MALLOC_H */
 97 | #endif  /* HAVE_USR_INCLUDE_MALLOC_H */
 98 | #endif  /* !NO_MALLINFO */
 99 | 
100 | /*
101 |   malloc(size_t n)
102 |   Returns a pointer to a newly allocated chunk of at least n bytes, or
103 |   null if no space is available, in which case errno is set to ENOMEM
104 |   on ANSI C systems.
105 | 
106 |   If n is zero, malloc returns a minimum-sized chunk. (The minimum
107 |   size is 16 bytes on most 32bit systems, and 32 bytes on 64bit
108 |   systems.)  Note that size_t is an unsigned type, so calls with
109 |   arguments that would be negative if signed are interpreted as
110 |   requests for huge amounts of space, which will often fail. The
111 |   maximum supported value of n differs across systems, but is in all
112 |   cases less than the maximum representable value of a size_t.
113 | */
114 | void* dlmalloc(size_t);
115 | 
116 | /*
117 |   free(void* p)
118 |   Releases the chunk of memory pointed to by p, that had been previously
119 |   allocated using malloc or a related routine such as realloc.
120 |   It has no effect if p is null. If p was not malloced or already
121 |   freed, free(p) will by default cuase the current program to abort.
122 | */
123 | void  dlfree(void*);
124 | 
125 | /*
126 |   calloc(size_t n_elements, size_t element_size);
127 |   Returns a pointer to n_elements * element_size bytes, with all locations
128 |   set to zero.
129 | */
130 | void* dlcalloc(size_t, size_t);
131 | 
132 | /*
133 |   realloc(void* p, size_t n)
134 |   Returns a pointer to a chunk of size n that contains the same data
135 |   as does chunk p up to the minimum of (n, p's size) bytes, or null
136 |   if no space is available.
137 | 
138 |   The returned pointer may or may not be the same as p. The algorithm
139 |   prefers extending p in most cases when possible, otherwise it
140 |   employs the equivalent of a malloc-copy-free sequence.
141 | 
142 |   If p is null, realloc is equivalent to malloc.
143 | 
144 |   If space is not available, realloc returns null, errno is set (if on
145 |   ANSI) and p is NOT freed.
146 | 
147 |   if n is for fewer bytes than already held by p, the newly unused
148 |   space is lopped off and freed if possible.  realloc with a size
149 |   argument of zero (re)allocates a minimum-sized chunk.
150 | 
151 |   The old unix realloc convention of allowing the last-free'd chunk
152 |   to be used as an argument to realloc is not supported.
153 | */
154 | void* dlrealloc(void*, size_t);
155 | 
156 | /*
157 |   realloc_in_place(void* p, size_t n)
158 |   Resizes the space allocated for p to size n, only if this can be
159 |   done without moving p (i.e., only if there is adjacent space
160 |   available if n is greater than p's current allocated size, or n is
161 |   less than or equal to p's size). This may be used instead of plain
162 |   realloc if an alternative allocation strategy is needed upon failure
163 |   to expand space; for example, reallocation of a buffer that must be
164 |   memory-aligned or cleared. You can use realloc_in_place to trigger
165 |   these alternatives only when needed.
166 | 
167 |   Returns p if successful; otherwise null.
168 | */
169 | void* dlrealloc_in_place(void*, size_t);
170 | 
171 | /*
172 |   memalign(size_t alignment, size_t n);
173 |   Returns a pointer to a newly allocated chunk of n bytes, aligned
174 |   in accord with the alignment argument.
175 | 
176 |   The alignment argument should be a power of two. If the argument is
177 |   not a power of two, the nearest greater power is used.
178 |   8-byte alignment is guaranteed by normal malloc calls, so don't
179 |   bother calling memalign with an argument of 8 or less.
180 | 
181 |   Overreliance on memalign is a sure way to fragment space.
182 | */
183 | void* dlmemalign(size_t, size_t);
184 | 
185 | /*
186 |   int posix_memalign(void** pp, size_t alignment, size_t n);
187 |   Allocates a chunk of n bytes, aligned in accord with the alignment
188 |   argument. Differs from memalign only in that it (1) assigns the
189 |   allocated memory to *pp rather than returning it, (2) fails and
190 |   returns EINVAL if the alignment is not a power of two (3) fails and
191 |   returns ENOMEM if memory cannot be allocated.
192 | */
193 | int dlposix_memalign(void**, size_t, size_t);
194 | 
195 | /*
196 |   valloc(size_t n);
197 |   Equivalent to memalign(pagesize, n), where pagesize is the page
198 |   size of the system. If the pagesize is unknown, 4096 is used.
199 | */
200 | void* dlvalloc(size_t);
201 | 
202 | /*
203 |   mallopt(int parameter_number, int parameter_value)
204 |   Sets tunable parameters The format is to provide a
205 |   (parameter-number, parameter-value) pair.  mallopt then sets the
206 |   corresponding parameter to the argument value if it can (i.e., so
207 |   long as the value is meaningful), and returns 1 if successful else
208 |   0.  SVID/XPG/ANSI defines four standard param numbers for mallopt,
209 |   normally defined in malloc.h.  None of these are use in this malloc,
210 |   so setting them has no effect. But this malloc also supports other
211 |   options in mallopt:
212 | 
213 |   Symbol            param #  default    allowed param values
214 |   M_TRIM_THRESHOLD     -1   2*1024*1024   any   (-1U disables trimming)
215 |   M_GRANULARITY        -2     page size   any power of 2 >= page size
216 |   M_MMAP_THRESHOLD     -3      256*1024   any   (or 0 if no MMAP support)
217 | */
218 | int dlmallopt(int, int);
219 | 
220 | #define M_TRIM_THRESHOLD     (-1)
221 | #define M_GRANULARITY        (-2)
222 | #define M_MMAP_THRESHOLD     (-3)
223 | 
224 | 
225 | /*
226 |   malloc_footprint();
227 |   Returns the number of bytes obtained from the system.  The total
228 |   number of bytes allocated by malloc, realloc etc., is less than this
229 |   value. Unlike mallinfo, this function returns only a precomputed
230 |   result, so can be called frequently to monitor memory consumption.
231 |   Even if locks are otherwise defined, this function does not use them,
232 |   so results might not be up to date.
233 | */
234 | size_t dlmalloc_footprint(void);
235 | 
236 | /*
237 |   malloc_max_footprint();
238 |   Returns the maximum number of bytes obtained from the system. This
239 |   value will be greater than current footprint if deallocated space
240 |   has been reclaimed by the system. The peak number of bytes allocated
241 |   by malloc, realloc etc., is less than this value. Unlike mallinfo,
242 |   this function returns only a precomputed result, so can be called
243 |   frequently to monitor memory consumption.  Even if locks are
244 |   otherwise defined, this function does not use them, so results might
245 |   not be up to date.
246 | */
247 | size_t dlmalloc_max_footprint(void);
248 | 
249 | /*
250 |   malloc_footprint_limit();
251 |   Returns the number of bytes that the heap is allowed to obtain from
252 |   the system, returning the last value returned by
253 |   malloc_set_footprint_limit, or the maximum size_t value if
254 |   never set. The returned value reflects a permission. There is no
255 |   guarantee that this number of bytes can actually be obtained from
256 |   the system.  
257 | */
258 | size_t dlmalloc_footprint_limit(void);
259 | 
260 | /*
261 |   malloc_set_footprint_limit();
262 |   Sets the maximum number of bytes to obtain from the system, causing
263 |   failure returns from malloc and related functions upon attempts to
264 |   exceed this value. The argument value may be subject to page
265 |   rounding to an enforceable limit; this actual value is returned.
266 |   Using an argument of the maximum possible size_t effectively
267 |   disables checks. If the argument is less than or equal to the
268 |   current malloc_footprint, then all future allocations that require
269 |   additional system memory will fail. However, invocation cannot
270 |   retroactively deallocate existing used memory.
271 | */
272 | size_t dlmalloc_set_footprint_limit(size_t bytes);
273 | 
274 | /*
275 |   malloc_inspect_all(void(*handler)(void *start,
276 |                                     void *end,
277 |                                     size_t used_bytes,
278 |                                     void* callback_arg),
279 |                       void* arg);
280 |   Traverses the heap and calls the given handler for each managed
281 |   region, skipping all bytes that are (or may be) used for bookkeeping
282 |   purposes.  Traversal does not include include chunks that have been
283 |   directly memory mapped. Each reported region begins at the start
284 |   address, and continues up to but not including the end address.  The
285 |   first used_bytes of the region contain allocated data. If
286 |   used_bytes is zero, the region is unallocated. The handler is
287 |   invoked with the given callback argument. If locks are defined, they
288 |   are held during the entire traversal. It is a bad idea to invoke
289 |   other malloc functions from within the handler.
290 | 
291 |   For example, to count the number of in-use chunks with size greater
292 |   than 1000, you could write:
293 |   static int count = 0;
294 |   void count_chunks(void* start, void* end, size_t used, void* arg) {
295 |     if (used >= 1000) ++count;
296 |   }
297 |   then:
298 |     malloc_inspect_all(count_chunks, NULL);
299 | 
300 |   malloc_inspect_all is compiled only if MALLOC_INSPECT_ALL is defined.
301 | */
302 | void dlmalloc_inspect_all(void(*handler)(void*, void *, size_t, void*),
303 |                            void* arg);
304 | 
305 | #if !NO_MALLINFO
306 | /*
307 |   mallinfo()
308 |   Returns (by copy) a struct containing various summary statistics:
309 | 
310 |   arena:     current total non-mmapped bytes allocated from system
311 |   ordblks:   the number of free chunks
312 |   smblks:    always zero.
313 |   hblks:     current number of mmapped regions
314 |   hblkhd:    total bytes held in mmapped regions
315 |   usmblks:   the maximum total allocated space. This will be greater
316 |                 than current total if trimming has occurred.
317 |   fsmblks:   always zero
318 |   uordblks:  current total allocated space (normal or mmapped)
319 |   fordblks:  total free space
320 |   keepcost:  the maximum number of bytes that could ideally be released
321 |                back to system via malloc_trim. ("ideally" means that
322 |                it ignores page restrictions etc.)
323 | 
324 |   Because these fields are ints, but internal bookkeeping may
325 |   be kept as longs, the reported values may wrap around zero and
326 |   thus be inaccurate.
327 | */
328 | 
329 | struct mallinfo dlmallinfo(void);
330 | #endif  /* NO_MALLINFO */
331 | 
332 | /*
333 |   independent_calloc(size_t n_elements, size_t element_size, void* chunks[]);
334 | 
335 |   independent_calloc is similar to calloc, but instead of returning a
336 |   single cleared space, it returns an array of pointers to n_elements
337 |   independent elements that can hold contents of size elem_size, each
338 |   of which starts out cleared, and can be independently freed,
339 |   realloc'ed etc. The elements are guaranteed to be adjacently
340 |   allocated (this is not guaranteed to occur with multiple callocs or
341 |   mallocs), which may also improve cache locality in some
342 |   applications.
343 | 
344 |   The "chunks" argument is optional (i.e., may be null, which is
345 |   probably the most typical usage). If it is null, the returned array
346 |   is itself dynamically allocated and should also be freed when it is
347 |   no longer needed. Otherwise, the chunks array must be of at least
348 |   n_elements in length. It is filled in with the pointers to the
349 |   chunks.
350 | 
351 |   In either case, independent_calloc returns this pointer array, or
352 |   null if the allocation failed.  If n_elements is zero and "chunks"
353 |   is null, it returns a chunk representing an array with zero elements
354 |   (which should be freed if not wanted).
355 | 
356 |   Each element must be freed when it is no longer needed. This can be
357 |   done all at once using bulk_free.
358 | 
359 |   independent_calloc simplifies and speeds up implementations of many
360 |   kinds of pools.  It may also be useful when constructing large data
361 |   structures that initially have a fixed number of fixed-sized nodes,
362 |   but the number is not known at compile time, and some of the nodes
363 |   may later need to be freed. For example:
364 | 
365 |   struct Node { int item; struct Node* next; };
366 | 
367 |   struct Node* build_list() {
368 |     struct Node** pool;
369 |     int n = read_number_of_nodes_needed();
370 |     if (n <= 0) return 0;
371 |     pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0);
372 |     if (pool == 0) die();
373 |     // organize into a linked list...
374 |     struct Node* first = pool[0];
375 |     for (i = 0; i < n-1; ++i)
376 |       pool[i]->next = pool[i+1];
377 |     free(pool);     // Can now free the array (or not, if it is needed later)
378 |     return first;
379 |   }
380 | */
381 | void** dlindependent_calloc(size_t, size_t, void**);
382 | 
383 | /*
384 |   independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]);
385 | 
386 |   independent_comalloc allocates, all at once, a set of n_elements
387 |   chunks with sizes indicated in the "sizes" array.    It returns
388 |   an array of pointers to these elements, each of which can be
389 |   independently freed, realloc'ed etc. The elements are guaranteed to
390 |   be adjacently allocated (this is not guaranteed to occur with
391 |   multiple callocs or mallocs), which may also improve cache locality
392 |   in some applications.
393 | 
394 |   The "chunks" argument is optional (i.e., may be null). If it is null
395 |   the returned array is itself dynamically allocated and should also
396 |   be freed when it is no longer needed. Otherwise, the chunks array
397 |   must be of at least n_elements in length. It is filled in with the
398 |   pointers to the chunks.
399 | 
400 |   In either case, independent_comalloc returns this pointer array, or
401 |   null if the allocation failed.  If n_elements is zero and chunks is
402 |   null, it returns a chunk representing an array with zero elements
403 |   (which should be freed if not wanted).
404 | 
405 |   Each element must be freed when it is no longer needed. This can be
406 |   done all at once using bulk_free.
407 | 
408 |   independent_comallac differs from independent_calloc in that each
409 |   element may have a different size, and also that it does not
410 |   automatically clear elements.
411 | 
412 |   independent_comalloc can be used to speed up allocation in cases
413 |   where several structs or objects must always be allocated at the
414 |   same time.  For example:
415 | 
416 |   struct Head { ... }
417 |   struct Foot { ... }
418 | 
419 |   void send_message(char* msg) {
420 |     int msglen = strlen(msg);
421 |     size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) };
422 |     void* chunks[3];
423 |     if (independent_comalloc(3, sizes, chunks) == 0)
424 |       die();
425 |     struct Head* head = (struct Head*)(chunks[0]);
426 |     char*        body = (char*)(chunks[1]);
427 |     struct Foot* foot = (struct Foot*)(chunks[2]);
428 |     // ...
429 |   }
430 | 
431 |   In general though, independent_comalloc is worth using only for
432 |   larger values of n_elements. For small values, you probably won't
433 |   detect enough difference from series of malloc calls to bother.
434 | 
435 |   Overuse of independent_comalloc can increase overall memory usage,
436 |   since it cannot reuse existing noncontiguous small chunks that
437 |   might be available for some of the elements.
438 | */
439 | void** dlindependent_comalloc(size_t, size_t*, void**);
440 | 
441 | /*
442 |   bulk_free(void* array[], size_t n_elements)
443 |   Frees and clears (sets to null) each non-null pointer in the given
444 |   array.  This is likely to be faster than freeing them one-by-one.
445 |   If footers are used, pointers that have been allocated in different
446 |   mspaces are not freed or cleared, and the count of all such pointers
447 |   is returned.  For large arrays of pointers with poor locality, it
448 |   may be worthwhile to sort this array before calling bulk_free.
449 | */
450 | size_t  dlbulk_free(void**, size_t n_elements);
451 | 
452 | /*
453 |   pvalloc(size_t n);
454 |   Equivalent to valloc(minimum-page-that-holds(n)), that is,
455 |   round up n to nearest pagesize.
456 |  */
457 | void*  dlpvalloc(size_t);
458 | 
459 | /*
460 |   malloc_trim(size_t pad);
461 | 
462 |   If possible, gives memory back to the system (via negative arguments
463 |   to sbrk) if there is unused memory at the `high' end of the malloc
464 |   pool or in unused MMAP segments. You can call this after freeing
465 |   large blocks of memory to potentially reduce the system-level memory
466 |   requirements of a program. However, it cannot guarantee to reduce
467 |   memory. Under some allocation patterns, some large free blocks of
468 |   memory will be locked between two used chunks, so they cannot be
469 |   given back to the system.
470 | 
471 |   The `pad' argument to malloc_trim represents the amount of free
472 |   trailing space to leave untrimmed. If this argument is zero, only
473 |   the minimum amount of memory to maintain internal data structures
474 |   will be left. Non-zero arguments can be supplied to maintain enough
475 |   trailing space to service future expected allocations without having
476 |   to re-obtain memory from the system.
477 | 
478 |   Malloc_trim returns 1 if it actually released any memory, else 0.
479 | */
480 | int  dlmalloc_trim(size_t);
481 | 
482 | /*
483 |   malloc_stats();
484 |   Prints on stderr the amount of space obtained from the system (both
485 |   via sbrk and mmap), the maximum amount (which may be more than
486 |   current if malloc_trim and/or munmap got called), and the current
487 |   number of bytes allocated via malloc (or realloc, etc) but not yet
488 |   freed. Note that this is the number of bytes allocated, not the
489 |   number requested. It will be larger than the number requested
490 |   because of alignment and bookkeeping overhead. Because it includes
491 |   alignment wastage as being in use, this figure may be greater than
492 |   zero even when no user-level chunks are allocated.
493 | 
494 |   The reported current and maximum system memory can be inaccurate if
495 |   a program makes other calls to system memory allocation functions
496 |   (normally sbrk) outside of malloc.
497 | 
498 |   malloc_stats prints only the most commonly interesting statistics.
499 |   More information can be obtained by calling mallinfo.
500 |   
501 |   malloc_stats is not compiled if NO_MALLOC_STATS is defined.
502 | */
503 | void  dlmalloc_stats(void);
504 | 
505 | #endif /* !ONLY_MSPACES */
506 | 
507 | /*
508 |   malloc_usable_size(void* p);
509 | 
510 |   Returns the number of bytes you can actually use in
511 |   an allocated chunk, which may be more than you requested (although
512 |   often not) due to alignment and minimum size constraints.
513 |   You can use this many bytes without worrying about
514 |   overwriting other allocated objects. This is not a particularly great
515 |   programming practice. malloc_usable_size can be more useful in
516 |   debugging and assertions, for example:
517 | 
518 |   p = malloc(n);
519 |   assert(malloc_usable_size(p) >= 256);
520 | */
521 | size_t dlmalloc_usable_size(const void*);
522 | 
523 | #if MSPACES
524 | 
525 | /*
526 |   mspace is an opaque type representing an independent
527 |   region of space that supports mspace_malloc, etc.
528 | */
529 | typedef void* mspace;
530 | 
531 | /*
532 |   create_mspace creates and returns a new independent space with the
533 |   given initial capacity, or, if 0, the default granularity size.  It
534 |   returns null if there is no system memory available to create the
535 |   space.  If argument locked is non-zero, the space uses a separate
536 |   lock to control access. The capacity of the space will grow
537 |   dynamically as needed to service mspace_malloc requests.  You can
538 |   control the sizes of incremental increases of this space by
539 |   compiling with a different DEFAULT_GRANULARITY or dynamically
540 |   setting with mallopt(M_GRANULARITY, value).
541 | */
542 | mspace create_mspace(size_t capacity, int locked);
543 | 
544 | /*
545 |   destroy_mspace destroys the given space, and attempts to return all
546 |   of its memory back to the system, returning the total number of
547 |   bytes freed. After destruction, the results of access to all memory
548 |   used by the space become undefined.
549 | */
550 | size_t destroy_mspace(mspace msp);
551 | 
552 | /*
553 |   create_mspace_with_base uses the memory supplied as the initial base
554 |   of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this
555 |   space is used for bookkeeping, so the capacity must be at least this
556 |   large. (Otherwise 0 is returned.) When this initial space is
557 |   exhausted, additional memory will be obtained from the system.
558 |   Destroying this space will deallocate all additionally allocated
559 |   space (if possible) but not the initial base.
560 | */
561 | mspace create_mspace_with_base(void* base, size_t capacity, int locked);
562 | 
563 | /*
564 |   mspace_track_large_chunks controls whether requests for large chunks
565 |   are allocated in their own untracked mmapped regions, separate from
566 |   others in this mspace. By default large chunks are not tracked,
567 |   which reduces fragmentation. However, such chunks are not
568 |   necessarily released to the system upon destroy_mspace.  Enabling
569 |   tracking by setting to true may increase fragmentation, but avoids
570 |   leakage when relying on destroy_mspace to release all memory
571 |   allocated using this space.  The function returns the previous
572 |   setting.
573 | */
574 | int mspace_track_large_chunks(mspace msp, int enable);
575 | 
576 | #if !NO_MALLINFO
577 | /*
578 |   mspace_mallinfo behaves as mallinfo, but reports properties of
579 |   the given space.
580 | */
581 | struct mallinfo mspace_mallinfo(mspace msp);
582 | #endif /* NO_MALLINFO */
583 | 
584 | /*
585 |   An alias for mallopt.
586 | */
587 | int mspace_mallopt(int, int);
588 | 
589 | /*
590 |   The following operate identically to their malloc counterparts
591 |   but operate only for the given mspace argument
592 | */
593 | void* mspace_malloc(mspace msp, size_t bytes);
594 | void mspace_free(mspace msp, void* mem);
595 | void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size);
596 | void* mspace_realloc(mspace msp, void* mem, size_t newsize);
597 | void* mspace_realloc_in_place(mspace msp, void* mem, size_t newsize);
598 | void* mspace_memalign(mspace msp, size_t alignment, size_t bytes);
599 | void** mspace_independent_calloc(mspace msp, size_t n_elements,
600 |                                  size_t elem_size, void* chunks[]);
601 | void** mspace_independent_comalloc(mspace msp, size_t n_elements,
602 |                                    size_t sizes[], void* chunks[]);
603 | size_t mspace_bulk_free(mspace msp, void**, size_t n_elements);
604 | size_t mspace_usable_size(const void* mem);
605 | void mspace_malloc_stats(mspace msp);
606 | int mspace_trim(mspace msp, size_t pad);
607 | size_t mspace_footprint(mspace msp);
608 | size_t mspace_max_footprint(mspace msp);
609 | size_t mspace_footprint_limit(mspace msp);
610 | size_t mspace_set_footprint_limit(mspace msp, size_t bytes);
611 | void mspace_inspect_all(mspace msp, 
612 |                         void(*handler)(void *, void *, size_t, void*),
613 |                         void* arg);
614 | #endif  /* MSPACES */
615 | 
616 | #ifdef __cplusplus
617 | };  /* end of extern "C" */
618 | #endif
619 | 
620 | #endif /* MALLOC_280_H */
621 | 


--------------------------------------------------------------------------------