├── FV2ES ├── System │ ├── app.py │ ├── static │ │ ├── Catalina_8.jpg │ │ ├── css │ │ │ └── main.css │ │ └── js │ │ │ └── main.js │ └── templates │ │ ├── base.html │ │ └── index1.html └── V2EM_prediction │ ├── main.py │ └── src │ ├── attention_block.py │ ├── c_e2e.py │ ├── cli.py │ ├── datasets.py │ ├── e2e_t.py │ ├── evaluate.py │ ├── nestnet │ ├── __init__.py │ ├── features.py │ ├── fx_features.py │ ├── helpers.py │ ├── hub.py │ ├── layers │ │ ├── __init__.py │ │ ├── activations.py │ │ ├── activations_jit.py │ │ ├── activations_me.py │ │ ├── adaptive_avgmax_pool.py │ │ ├── attention_pool2d.py │ │ ├── blur_pool.py │ │ ├── bottleneck_attn.py │ │ ├── cbam.py │ │ ├── classifier.py │ │ ├── cond_conv2d.py │ │ ├── config.py │ │ ├── conv2d_same.py │ │ ├── conv_bn_act.py │ │ ├── create_act.py │ │ ├── create_attn.py │ │ ├── create_conv2d.py │ │ ├── create_norm_act.py │ │ ├── drop.py │ │ ├── eca.py │ │ ├── evo_norm.py │ │ ├── gather_excite.py │ │ ├── global_context.py │ │ ├── halo_attn.py │ │ ├── helpers.py │ │ ├── inplace_abn.py │ │ ├── lambda_layer.py │ │ ├── linear.py │ │ ├── median_pool.py │ │ ├── mixed_conv2d.py │ │ ├── mlp.py │ │ ├── non_local_attn.py │ │ ├── norm.py │ │ ├── norm_act.py │ │ ├── padding.py │ │ ├── patch_embed.py │ │ ├── pool2d_same.py │ │ ├── selective_kernel.py │ │ ├── separable_conv.py │ │ ├── space_to_depth.py │ │ ├── split_attn.py │ │ ├── split_batchnorm.py │ │ ├── squeeze_excite.py │ │ ├── std_conv.py │ │ ├── test_time_pool.py │ │ ├── trace_utils.py │ │ └── weight_init.py │ ├── nest.py │ ├── registry.py │ └── visualizer.py │ ├── se_block.py │ ├── trainers │ ├── basetrainer.py │ └── r_emotiontrainer.py │ ├── transformer_encoder.py │ ├── utils.py │ └── vgg_block.py ├── README.md ├── V2EM ├── main.py └── src │ ├── cli.py │ ├── datasets.py │ ├── evaluate.py │ ├── model │ ├── attention_block.py │ ├── baselines │ │ ├── lf_rnn.py │ │ └── lf_transformer.py │ ├── c_e2e.py │ ├── e2e_t.py │ ├── nestnet │ │ ├── features.py │ │ ├── fx_features.py │ │ ├── helpers.py │ │ ├── hub.py │ │ ├── layers │ │ │ ├── __init__.py │ │ │ ├── activations.py │ │ │ ├── activations_jit.py │ │ │ ├── activations_me.py │ │ │ ├── adaptive_avgmax_pool.py │ │ │ ├── attention_pool2d.py │ │ │ ├── blur_pool.py │ │ │ ├── bottleneck_attn.py │ │ │ ├── cbam.py │ │ │ ├── classifier.py │ │ │ ├── cond_conv2d.py │ │ │ ├── config.py │ │ │ ├── conv2d_same.py │ │ │ ├── conv_bn_act.py │ │ │ ├── create_act.py │ │ │ ├── create_attn.py │ │ │ ├── create_conv2d.py │ │ │ ├── create_norm_act.py │ │ │ ├── drop.py │ │ │ ├── eca.py │ │ │ ├── evo_norm.py │ │ │ ├── gather_excite.py │ │ │ ├── global_context.py │ │ │ ├── halo_attn.py │ │ │ ├── helpers.py │ │ │ ├── inplace_abn.py │ │ │ ├── lambda_layer.py │ │ │ ├── linear.py │ │ │ ├── median_pool.py │ │ │ ├── mixed_conv2d.py │ │ │ ├── mlp.py │ │ │ ├── non_local_attn.py │ │ │ ├── norm.py │ │ │ ├── norm_act.py │ │ │ ├── padding.py │ │ │ ├── patch_embed.py │ │ │ ├── pool2d_same.py │ │ │ ├── selective_kernel.py │ │ │ ├── separable_conv.py │ │ │ ├── space_to_depth.py │ │ │ ├── split_attn.py │ │ │ ├── split_batchnorm.py │ │ │ ├── squeeze_excite.py │ │ │ ├── std_conv.py │ │ │ ├── test_time_pool.py │ │ │ ├── trace_utils.py │ │ │ └── weight_init.py │ │ ├── nest.py │ │ └── registry.py │ ├── se_block.py │ ├── transformer_encoder.py │ └── vgg_block.py │ ├── trainers │ ├── basetrainer.py │ └── r_emotiontrainer.py │ └── utils.py └── dataset_demo ├── Readme.md ├── Ses01F_impro01.avi ├── Ses01F_impro01.wav ├── Ses01F_impro01_label.txt └── Ses01F_impro01_text.txt /FV2ES/System/app.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import subprocess 4 | import numpy as np 5 | import sys 6 | sys.path.append('../base_iemocap_onetest/main.py') ## by ling 7 | # Import Flask 8 | from flask import Flask, render_template, request, redirect, jsonify, send_from_directory 9 | from werkzeug.utils import secure_filename ## Check if the filename is valid 10 | 11 | # Create Flask instance 12 | app = Flask(__name__) 13 | 14 | app.config['UPLOAD_FOLDER'] = '../video' ## Create upload directory 15 | 16 | 17 | # The decorator implements route mapping and establishes the association between URL rules and handler functions 18 | # Tell Flask what kind of URL can trigger our function 19 | @app.route('/', methods=['GET']) 20 | def index(): 21 | return render_template('index1.html') 22 | 23 | # POST trigger 24 | @app.route('/', methods=['POST', 'GET']) 25 | def upload_function(): 26 | # save file 27 | if request.method == 'POST': 28 | # f = request.files['file'] 29 | print(request.files.getlist('file')) 30 | # save 31 | for f in request.files.getlist('file'): 32 | f.save(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename))) 33 | return ('', 204) 34 | # todo:返回数据 35 | 36 | 37 | @app.route('/predict', methods=['POST']) 38 | def pridict_function(): 39 | # call the prediction module and return the prediction result 40 | cmd = ['python', '../V2EM_prediction/main.py', '--test'] 41 | cmd_result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8') 42 | if cmd_result.returncode == 0: 43 | print('success') 44 | # if successful, read result.txt 45 | emotion_list = [] 46 | with open('../V2ES_prediction/result.txt', 'r') as f: 47 | for line in f.readlines(): 48 | line = line.strip('\n') 49 | emotion_list.append(line) 50 | print(emotion_list) 51 | 52 | emotion_vector = {'angry': emotion_list[0], 'excited': emotion_list[1], 'frustrated': emotion_list[2], 53 | 'happy': emotion_list[3], 'neural': emotion_list[4], 'sad': emotion_list[5], } 54 | data = jsonify(emotion_vector) 55 | return data, 201, {"ContentType": "application/json"} 56 | 57 | 58 | if __name__ == '__main__': 59 | app.run(debug=True, host='127.0.0.1', port=int(os.environ.get('PORT', 7890))) 60 | -------------------------------------------------------------------------------- /FV2ES/System/static/Catalina_8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/FV2ES/System/static/Catalina_8.jpg -------------------------------------------------------------------------------- /FV2ES/System/static/css/main.css: -------------------------------------------------------------------------------- 1 | body 2 | { 3 | background-image:url('../Catalina_8.jpg'); 4 | background-size: cover; 5 | } 6 | 7 | .model_name_box{ 8 | width: 30px; 9 | height: 160px; 10 | padding: 5px; 11 | font-size: 20px; 12 | color:black; 13 | word-wrap: break-word; 14 | -webkit-user-select:text; 15 | float:left; 16 | display:inline; 17 | } 18 | 19 | div.img 20 | { 21 | margin:3px; 22 | height:auto; 23 | width:auto; 24 | float:left; 25 | display:inline; 26 | text-align:center; 27 | } 28 | div.img img 29 | { 30 | display:inline; 31 | margin:3px; 32 | border:1px solid #bebebe; 33 | } 34 | 35 | 36 | .img-preview, .img-predict{ 37 | width: 100%; 38 | height: 50%; 39 | position: relative; 40 | border: 5px solid #F8F8F8; 41 | box-shadow: 0px 2px 4px 0px rgba(0, 0, 0, 0.1); 42 | margin-top: 1em; 43 | margin-bottom: 1em; 44 | } 45 | 46 | .img-preview>div, .img-predict>div, .img-predict>img { 47 | width: 100%; 48 | height: 100%; 49 | background-size: 100%; 50 | background-repeat: no-repeat; 51 | background-position: center; 52 | } 53 | 54 | #select_parent{ 55 | align: center; 56 | width: 300px; 57 | height: 40px; 58 | border-radius: 5px; 59 | box-shadow: 0 0 5px #ccc; 60 | position: relative; 61 | } 62 | 63 | #select_parent:after{ 64 | content: ""; 65 | width: 14px; 66 | height: 8px; 67 | position: absolute; 68 | right: 20px; 69 | top: 45%; 70 | pointer-events: none; 71 | } 72 | 73 | 74 | #select_model{ 75 | border: none; 76 | outline: none; 77 | width: 100%; 78 | height: 40px; 79 | line-height: 40px; 80 | appearance: none; 81 | -webkit-appearance: none; 82 | -moz-appearance: none; 83 | padding-left: 60px; 84 | } 85 | 86 | .img-preview{ 87 | float:left; 88 | } 89 | 90 | .img-predict{ 91 | float: right; 92 | } 93 | 94 | input[type="file"] { 95 | display: none; 96 | } 97 | 98 | 99 | input[id="submit-button"] { 100 | display: none; 101 | } 102 | 103 | 104 | .upload-label{ 105 | display: inline-block; 106 | padding: 12px 30px; 107 | background: #ffc107; 108 | color: #fff; 109 | font-size: 1em; 110 | transition: all .4s; 111 | cursor: pointer; 112 | } 113 | 114 | .upload-label:hover{ 115 | background: #34495E; 116 | color: #39D2B4; 117 | } 118 | 119 | 120 | 121 | .loader { 122 | border: 8px solid #f3f3f3; /* Light grey */ 123 | border-top: 8px solid #3498db; /* Blue */ 124 | border-radius: 50%; 125 | width: 50px; 126 | height: 50px; 127 | animation: spin 1s linear infinite; 128 | } 129 | 130 | @keyframes spin { 131 | 0% { transform: rotate(0deg); } 132 | 100% { transform: rotate(360deg); } 133 | } 134 | 135 | /*进度条框架*/ 136 | .barcontainer{ 137 | width:600px; 138 | height:25px; 139 | border:1px solid #708090; 140 | height:25px; 141 | } 142 | #angrybar{ 143 | background:#FFA500; 144 | float:left; 145 | height:100%; 146 | text-align:center; 147 | line-height:150%; 148 | color:#ffffff; 149 | } 150 | #excitedbar{ 151 | background:#40E0D0; 152 | float:left; 153 | height:100%; 154 | text-align:center; 155 | line-height:150%; 156 | color: #ffffff; 157 | } 158 | #frustratedbar{ 159 | background:#98FB98; 160 | float:left; 161 | height:100%; 162 | text-align:center; 163 | line-height:150%; 164 | color: #ffffff; 165 | } 166 | #happybar{ 167 | background:#FF69B4; 168 | float:left; 169 | height:100%; 170 | text-align:center; 171 | line-height:150%; 172 | color: #ffffff; 173 | } 174 | #neuralbar{ 175 | background:#808000; 176 | float:left; 177 | height:100%; 178 | text-align:center; 179 | line-height:150%; 180 | color: #ffffff; 181 | } 182 | #sadbar{ 183 | background:#FFCC33; 184 | float:left; 185 | height:100%; 186 | text-align:center; 187 | line-height:150%; 188 | color: #ffffff; 189 | } -------------------------------------------------------------------------------- /FV2ES/System/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Emotion Analysis 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 24 |
25 |
{% block content %}{% endblock %}
26 |
27 | 28 | 29 | 32 | 33 | -------------------------------------------------------------------------------- /FV2ES/System/templates/index1.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} {% block content %} 2 | 3 |

Upload Your Video, Audio, Text and Label here

4 | Tip: You need to submit video (mp4 and avi), audio (wav), text (txt) and divided files (txt) 5 | 6 |
7 |
8 | 9 | 12 | 13 | 14 | 17 | 18 | 19 |
20 |
21 |
22 | 23 |
24 | 25 |
26 |
27 |
28 | 29 |
30 |
31 | 32 | 64 | 65 | 66 | 67 |
68 | 69 | {% endblock %} 70 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import torch 4 | import numpy as np 5 | from torch.utils.data import DataLoader 6 | from src.cli import get_args 7 | from src.datasets import get_dataset_iemocap, collate_fn 8 | 9 | from src.models.c_e2e import MME2E 10 | 11 | from src.trainers.r_emotiontrainer import IemocapTrainer 12 | 13 | import sys 14 | 15 | if __name__ == "__main__": 16 | start = time.time() 17 | 18 | sys.argv=['main.py', '-lr=4.5e-6', '-ep=40', '-mod=tav', '-bs=1','--img-interval=500', '--early-stop=6', '--loss=bce', '--cuda=0', '--model=mme2e', '--num-emotions=6','--trans-dim=64', '--trans-nlayers=4', '--trans-nheads=4','--text-lr-factor=10','--text-model-size=base','--text-max-len=100','--test','--datapath=../IEMOCAP_PREPROCESS_10'] 19 | 20 | 21 | 22 | args = get_args() 23 | 24 | # Fix seed for reproducibility 25 | seed = args['seed'] 26 | torch.manual_seed(seed) 27 | np.random.seed(seed) 28 | torch.backends.cudnn.deterministic = True 29 | torch.backends.cudnn.benchmark = False 30 | 31 | # Set device 32 | os.environ["CUDA_VISIBLE_DEVICES"] = args['cuda'] 33 | device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') 34 | 35 | #input data 36 | test_dataset = get_dataset_iemocap(data_folder=args['datapath'], phase='test', 37 | img_interval=args['img_interval'], hand_crafted_features=args['hand_crafted']) 38 | test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False, 39 | num_workers=0, collate_fn=collate_fn) 40 | 41 | dataloaders = { 42 | 'test': test_loader 43 | } 44 | 45 | lr = args['learning_rate'] 46 | if args['model'] == 'mme2e': 47 | model = MME2E(args=args, device=device) 48 | model = model.to(device=device) 49 | 50 | # When using a pre-trained text modal, you can use text_lr_factor to give a smaller leraning rate to the textual model parts 51 | if args['text_lr_factor'] == 1: 52 | optimizer = torch.optim.Adam(model.parameters(), lr=args['learning_rate'], weight_decay=args['weight_decay']) 53 | else: 54 | optimizer = torch.optim.Adam([ 55 | {'params': model.T.parameters(), 'lr': lr / args['text_lr_factor']}, 56 | {'params': model.t_out.parameters(), 'lr': lr / args['text_lr_factor']}, 57 | {'params': model.V.parameters()}, 58 | {'params': model.v_flatten.parameters()}, 59 | {'params': model.v_transformer.parameters()}, 60 | {'params': model.v_out.parameters()}, 61 | {'params': model.A.parameters()}, 62 | {'params': model.a_flatten.parameters()}, 63 | {'params': model.a_transformer.parameters()}, 64 | {'params': model.a_out.parameters()}, 65 | {'params': model.weighted_fusion.parameters()}, 66 | ], lr=lr, weight_decay=args['weight_decay']) 67 | 68 | checkpoint = torch.load("./savings/models/mme2e_tav_Acc_0.8477_F1_0.5857_AUC_0.8747_imginvl500_seed0.pt", map_location='cuda:0') 69 | model.load_state_dict(checkpoint,False) # load best model(when valid and test add,while when train must //this sentence) 70 | 71 | scheduler=None 72 | criterion=None 73 | 74 | trainer = IemocapTrainer(args, model, criterion,optimizer,scheduler, device, dataloaders) 75 | 76 | trainer.test() 77 | 78 | 79 | end = time.time() 80 | 81 | print(f'Total time usage = {(end - start) :.2f} seconds.') 82 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/attention_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from typing import List 5 | 6 | 7 | class CrossModalAttentionLayer(nn.Module): 8 | # y attends x 9 | def __init__(self, k, x_channels: int, y_size: int, spatial=True): 10 | super(CrossModalAttentionLayer, self).__init__() 11 | self.k = k 12 | self.spatial = spatial 13 | 14 | if spatial: 15 | self.channel_affine = nn.Linear(x_channels, k) 16 | 17 | self.y_affine = nn.Linear(y_size, k, bias=False) 18 | self.attn_weight_affine = nn.Linear(k, 1) 19 | 20 | def forward(self, x: List[torch.Tensor], x_lens: List[int], y: torch.Tensor): 21 | # x -> [(S, C, H, W)], len(x) = bs 22 | # y -> (bs, D) 23 | 24 | bs = y.size(0) 25 | x = x.split(x_lens, dim=0) 26 | y_k = self.y_affine(y) # (bs, k) 27 | 28 | all_spatial_attn_weights_softmax = [] 29 | 30 | for i in range(bs): 31 | if self.spatial: 32 | x_tensor = x[i].permute(0, 2, 3, 1) # (S_v, H_v, W_v, C_v) 33 | x_k = self.channel_affine(x_tensor) # (S_v, H_v, W_v, k) 34 | x_k += y_k[i] 35 | x_k = torch.tanh(x_k) 36 | x_attn_weights = self.attn_weight_affine(x_k).squeeze(-1) # (S_v, H_v, W_v) 37 | 38 | all_spatial_attn_weights_softmax.append( 39 | F.softmax( 40 | x_attn_weights.reshape(x_tensor.size(0), -1), 41 | dim=-1 42 | ).reshape(x_tensor.size(0), x_tensor.size(1), x_tensor.size(2)) # (S_v, H_v, W_v) 43 | ) 44 | 45 | return torch.cat(all_spatial_attn_weights_softmax, dim=0) 46 | 47 | class SparseCrossModalAttentionLayer(nn.Module): 48 | def __init__(self, k: int, x_channels: int, y_size: int, sparse_threshold: float): 49 | super(SparseCrossModalAttentionLayer, self).__init__() 50 | self.k = k 51 | self.sparse_threshold = sparse_threshold 52 | self.channel_affine = nn.Linear(x_channels, k) 53 | self.y_affine = nn.Linear(y_size, k, bias=False) 54 | self.attn_weight_affine = nn.Linear(k, 1) 55 | 56 | def forward(self, x: List[torch.Tensor], x_lens: List[int], locations: List[torch.Tensor], y: torch.Tensor): 57 | # x -> (N, C) 58 | # locations -> (N, 3) 59 | # y -> (bs, D) 60 | bs = y.size(0) 61 | y_k = self.y_affine(y) # (bs, k) 62 | x_k = self.channel_affine(x) # (N, k) 63 | 64 | sample_points_lens = [] 65 | for i in range(sum(x_lens)): 66 | sample_points_lens.append(len(locations[locations[:, 2] == i])) 67 | 68 | # how much points are left in each batch 69 | batch_points_lens = [] 70 | pointer = 0 71 | for l in x_lens: 72 | batch_points_lens.append(sum(sample_points_lens[pointer:(pointer + l)])) 73 | pointer += l 74 | 75 | x_ks = x_k.split(batch_points_lens, dim=0) 76 | 77 | attn_weights = [] 78 | for i in range(bs): 79 | this_weights = self.attn_weight_affine(torch.tanh(x_ks[i] + y_k[i])).squeeze(-1) 80 | attn_weights.append(this_weights) 81 | 82 | attn_weights = torch.cat(attn_weights, dim=0) 83 | attn_weights_split = list(attn_weights.split(sample_points_lens, dim=0)) 84 | attn_weights_split = [F.softmax(a, dim=-1) for a in attn_weights_split] 85 | attn_weights = torch.cat(attn_weights_split, dim=0) 86 | 87 | attn_weights_sparse = to_sparse_by_cdf(attn_weights, sample_points_lens, self.sparse_threshold) 88 | 89 | select_indices = attn_weights_sparse == 1 90 | new_x = x[select_indices, :] 91 | new_locations = locations[select_indices, :] 92 | 93 | return new_x, new_locations, None 94 | 95 | def to_sparse_by_cdf(t: torch.tensor, lens, cdf: float): 96 | _t = t.clone().detach() 97 | _t = list(_t.split(lens, dim=0)) 98 | 99 | for i, this_t in enumerate(_t): 100 | this_t_sorted, indices = torch.sort(this_t, descending=True) 101 | mask = torch.cumsum(this_t_sorted, dim=-1) < cdf 102 | mask[torch.sum(mask)] = True 103 | _t[i][indices[mask]] = 1 104 | _t[i][indices[~mask]] = 0 105 | 106 | return torch.cat(_t, dim=0).long() 107 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/e2e_t.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from transformers import AlbertModel 3 | 4 | class MME2E_T(nn.Module): 5 | def __init__(self, feature_dim, num_classes=4, size='base'): 6 | super(MME2E_T, self).__init__() 7 | self.albert = AlbertModel.from_pretrained(f'albert-{size}-v2') 8 | # self.albert = AlbertModel.from_pretrained('./src/models/albert-base-v2') 9 | 10 | def forward(self, text, get_cls=False): 11 | last_hidden_state = self.albert(**text).last_hidden_state 12 | if get_cls: 13 | cls_feature = last_hidden_state[:,0] 14 | return cls_feature 15 | 16 | text_features = self.text_feature_affine(last_hidden_state).sum(1) 17 | return text_features 18 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/FV2ES/V2EM_prediction/src/nestnet/__init__.py -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/fx_features.py: -------------------------------------------------------------------------------- 1 | """ PyTorch FX Based Feature Extraction Helpers 2 | Using https://pytorch.org/vision/stable/feature_extraction.html 3 | """ 4 | from typing import Callable 5 | from torch import nn 6 | 7 | from .features import _get_feature_info 8 | 9 | try: 10 | from torchvision.models.feature_extraction import create_feature_extractor 11 | has_fx_feature_extraction = True 12 | except ImportError: 13 | has_fx_feature_extraction = False 14 | 15 | # Layers we went to treat as leaf modules 16 | from .layers import Conv2dSame, ScaledStdConv2dSame, BatchNormAct2d, BlurPool2d, CondConv2d, StdConv2dSame, DropPath 17 | from .layers.non_local_attn import BilinearAttnTransform 18 | from .layers.pool2d_same import MaxPool2dSame, AvgPool2dSame 19 | 20 | # NOTE: By default, any modules from timm.models.layers that we want to treat as leaf modules go here 21 | # BUT modules from timm.models should use the registration mechanism below 22 | _leaf_modules = { 23 | BatchNormAct2d, # reason: flow control for jit scripting 24 | BilinearAttnTransform, # reason: flow control t <= 1 25 | BlurPool2d, # reason: TypeError: F.conv2d received Proxy in groups=x.shape[1] 26 | # Reason: get_same_padding has a max which raises a control flow error 27 | Conv2dSame, MaxPool2dSame, ScaledStdConv2dSame, StdConv2dSame, AvgPool2dSame, 28 | CondConv2d, # reason: TypeError: F.conv2d received Proxy in groups=self.groups * B (because B = x.shape[0]) 29 | DropPath, # reason: TypeError: rand recieved Proxy in `size` argument 30 | } 31 | 32 | try: 33 | from .layers import InplaceAbn 34 | _leaf_modules.add(InplaceAbn) 35 | except ImportError: 36 | pass 37 | 38 | 39 | def register_notrace_module(module: nn.Module): 40 | """ 41 | Any module not under timm.models.layers should get this decorator if we don't want to trace through it. 42 | """ 43 | _leaf_modules.add(module) 44 | return module 45 | 46 | 47 | # Functions we want to autowrap (treat them as leaves) 48 | _autowrap_functions = set() 49 | 50 | 51 | def register_notrace_function(func: Callable): 52 | """ 53 | Decorator for functions which ought not to be traced through 54 | """ 55 | _autowrap_functions.add(func) 56 | return func 57 | 58 | 59 | class FeatureGraphNet(nn.Module): 60 | def __init__(self, model, out_indices, out_map=None): 61 | super().__init__() 62 | assert has_fx_feature_extraction, 'Please update to PyTorch 1.10+, torchvision 0.11+ for FX feature extraction' 63 | self.feature_info = _get_feature_info(model, out_indices) 64 | if out_map is not None: 65 | assert len(out_map) == len(out_indices) 66 | return_nodes = {info['module']: out_map[i] if out_map is not None else info['module'] 67 | for i, info in enumerate(self.feature_info) if i in out_indices} 68 | self.graph_module = create_feature_extractor( 69 | model, return_nodes, 70 | tracer_kwargs={'leaf_modules': list(_leaf_modules), 'autowrap_functions': list(_autowrap_functions)}) 71 | 72 | def forward(self, x): 73 | return list(self.graph_module(x).values()) -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .activations import * 2 | from .adaptive_avgmax_pool import \ 3 | adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d 4 | from .blur_pool import BlurPool2d 5 | from .classifier import ClassifierHead, create_classifier 6 | from .cond_conv2d import CondConv2d, get_condconv_initializer 7 | from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\ 8 | set_layer_config 9 | from .conv2d_same import Conv2dSame, conv2d_same 10 | from .conv_bn_act import ConvBnAct 11 | from .create_act import create_act_layer, get_act_layer, get_act_fn 12 | from .create_attn import get_attn, create_attn 13 | from .create_conv2d import create_conv2d 14 | from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act 15 | from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path 16 | from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn 17 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d 18 | from .gather_excite import GatherExcite 19 | from .global_context import GlobalContext 20 | from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible 21 | from .inplace_abn import InplaceAbn 22 | from .linear import Linear 23 | from .mixed_conv2d import MixedConv2d 24 | from .mlp import Mlp, GluMlp, GatedMlp, ConvMlp 25 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn 26 | from .norm import GroupNorm, LayerNorm2d 27 | from .norm_act import BatchNormAct2d, GroupNormAct 28 | from .padding import get_padding, get_same_padding, pad_same 29 | from .patch_embed import PatchEmbed 30 | from .pool2d_same import AvgPool2dSame, create_pool2d 31 | from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite 32 | from .selective_kernel import SelectiveKernel 33 | from .separable_conv import SeparableConv2d, SeparableConvBnAct 34 | from .space_to_depth import SpaceToDepthModule 35 | from .split_attn import SplitAttn 36 | from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model 37 | from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame 38 | from .test_time_pool import TestTimePoolHead, apply_test_time_pool 39 | from .trace_utils import _assert, _float_to_int 40 | from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_ 41 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/activations_jit.py: -------------------------------------------------------------------------------- 1 | """ Activations 2 | 3 | A collection of jit-scripted activations fn and modules with a common interface so that they can 4 | easily be swapped. All have an `inplace` arg even if not used. 5 | 6 | All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not 7 | currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted 8 | versions if they contain in-place ops. 9 | 10 | Hacked together by / Copyright 2020 Ross Wightman 11 | """ 12 | 13 | import torch 14 | from torch import nn as nn 15 | from torch.nn import functional as F 16 | 17 | 18 | @torch.jit.script 19 | def swish_jit(x, inplace: bool = False): 20 | """Swish - Described in: https://arxiv.org/abs/1710.05941 21 | """ 22 | return x.mul(x.sigmoid()) 23 | 24 | 25 | @torch.jit.script 26 | def mish_jit(x, _inplace: bool = False): 27 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 28 | """ 29 | return x.mul(F.softplus(x).tanh()) 30 | 31 | 32 | class SwishJit(nn.Module): 33 | def __init__(self, inplace: bool = False): 34 | super(SwishJit, self).__init__() 35 | 36 | def forward(self, x): 37 | return swish_jit(x) 38 | 39 | 40 | class MishJit(nn.Module): 41 | def __init__(self, inplace: bool = False): 42 | super(MishJit, self).__init__() 43 | 44 | def forward(self, x): 45 | return mish_jit(x) 46 | 47 | 48 | @torch.jit.script 49 | def hard_sigmoid_jit(x, inplace: bool = False): 50 | # return F.relu6(x + 3.) / 6. 51 | return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 52 | 53 | 54 | class HardSigmoidJit(nn.Module): 55 | def __init__(self, inplace: bool = False): 56 | super(HardSigmoidJit, self).__init__() 57 | 58 | def forward(self, x): 59 | return hard_sigmoid_jit(x) 60 | 61 | 62 | @torch.jit.script 63 | def hard_swish_jit(x, inplace: bool = False): 64 | # return x * (F.relu6(x + 3.) / 6) 65 | return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 66 | 67 | 68 | class HardSwishJit(nn.Module): 69 | def __init__(self, inplace: bool = False): 70 | super(HardSwishJit, self).__init__() 71 | 72 | def forward(self, x): 73 | return hard_swish_jit(x) 74 | 75 | 76 | @torch.jit.script 77 | def hard_mish_jit(x, inplace: bool = False): 78 | """ Hard Mish 79 | Experimental, based on notes by Mish author Diganta Misra at 80 | https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md 81 | """ 82 | return 0.5 * x * (x + 2).clamp(min=0, max=2) 83 | 84 | 85 | class HardMishJit(nn.Module): 86 | def __init__(self, inplace: bool = False): 87 | super(HardMishJit, self).__init__() 88 | 89 | def forward(self, x): 90 | return hard_mish_jit(x) 91 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/blur_pool.py: -------------------------------------------------------------------------------- 1 | """ 2 | BlurPool layer inspired by 3 | - Kornia's Max_BlurPool2d 4 | - Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar` 5 | 6 | Hacked together by Chris Ha and Ross Wightman 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import numpy as np 13 | from .padding import get_padding 14 | 15 | 16 | class BlurPool2d(nn.Module): 17 | r"""Creates a module that computes blurs and downsample a given feature map. 18 | See :cite:`zhang2019shiftinvar` for more details. 19 | Corresponds to the Downsample class, which does blurring and subsampling 20 | 21 | Args: 22 | channels = Number of input channels 23 | filt_size (int): binomial filter size for blurring. currently supports 3 (default) and 5. 24 | stride (int): downsampling filter stride 25 | 26 | Returns: 27 | torch.Tensor: the transformed tensor. 28 | """ 29 | def __init__(self, channels, filt_size=3, stride=2) -> None: 30 | super(BlurPool2d, self).__init__() 31 | assert filt_size > 1 32 | self.channels = channels 33 | self.filt_size = filt_size 34 | self.stride = stride 35 | self.padding = [get_padding(filt_size, stride, dilation=1)] * 4 36 | coeffs = torch.tensor((np.poly1d((0.5, 0.5)) ** (self.filt_size - 1)).coeffs.astype(np.float32)) 37 | blur_filter = (coeffs[:, None] * coeffs[None, :])[None, None, :, :].repeat(self.channels, 1, 1, 1) 38 | self.register_buffer('filt', blur_filter, persistent=False) 39 | 40 | def forward(self, x: torch.Tensor) -> torch.Tensor: 41 | x = F.pad(x, self.padding, 'reflect') 42 | return F.conv2d(x, self.filt, stride=self.stride, groups=x.shape[1]) 43 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/classifier.py: -------------------------------------------------------------------------------- 1 | """ Classifier head and layer factory 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | from torch.nn import functional as F 7 | 8 | from .adaptive_avgmax_pool import SelectAdaptivePool2d 9 | 10 | 11 | def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False): 12 | flatten_in_pool = not use_conv # flatten when we use a Linear layer after pooling 13 | if not pool_type: 14 | assert num_classes == 0 or use_conv,\ 15 | 'Pooling can only be disabled if classifier is also removed or conv classifier is used' 16 | flatten_in_pool = False # disable flattening if pooling is pass-through (no pooling) 17 | global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool) 18 | num_pooled_features = num_features * global_pool.feat_mult() 19 | return global_pool, num_pooled_features 20 | 21 | 22 | def _create_fc(num_features, num_classes, use_conv=False): 23 | if num_classes <= 0: 24 | fc = nn.Identity() # pass-through (no classifier) 25 | elif use_conv: 26 | fc = nn.Conv2d(num_features, num_classes, 1, bias=True) 27 | else: 28 | fc = nn.Linear(num_features, num_classes, bias=True) 29 | return fc 30 | 31 | 32 | def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False): 33 | global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv) 34 | fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) 35 | return global_pool, fc 36 | 37 | 38 | class ClassifierHead(nn.Module): 39 | """Classifier head w/ configurable global pooling and dropout.""" 40 | 41 | def __init__(self, in_chs, num_classes, pool_type='avg', drop_rate=0., use_conv=False): 42 | super(ClassifierHead, self).__init__() 43 | self.drop_rate = drop_rate 44 | self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv) 45 | self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) 46 | self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity() 47 | 48 | def forward(self, x): 49 | x = self.global_pool(x) 50 | if self.drop_rate: 51 | x = F.dropout(x, p=float(self.drop_rate), training=self.training) 52 | x = self.fc(x) 53 | x = self.flatten(x) 54 | return x 55 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/config.py: -------------------------------------------------------------------------------- 1 | """ Model / Layer Config singleton state 2 | """ 3 | from typing import Any, Optional 4 | 5 | __all__ = [ 6 | 'is_exportable', 'is_scriptable', 'is_no_jit', 7 | 'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config' 8 | ] 9 | 10 | # Set to True if prefer to have layers with no jit optimization (includes activations) 11 | _NO_JIT = False 12 | 13 | # Set to True if prefer to have activation layers with no jit optimization 14 | # NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying 15 | # the jit flags so far are activations. This will change as more layers are updated and/or added. 16 | _NO_ACTIVATION_JIT = False 17 | 18 | # Set to True if exporting a model with Same padding via ONNX 19 | _EXPORTABLE = False 20 | 21 | # Set to True if wanting to use torch.jit.script on a model 22 | _SCRIPTABLE = False 23 | 24 | 25 | def is_no_jit(): 26 | return _NO_JIT 27 | 28 | 29 | class set_no_jit: 30 | def __init__(self, mode: bool) -> None: 31 | global _NO_JIT 32 | self.prev = _NO_JIT 33 | _NO_JIT = mode 34 | 35 | def __enter__(self) -> None: 36 | pass 37 | 38 | def __exit__(self, *args: Any) -> bool: 39 | global _NO_JIT 40 | _NO_JIT = self.prev 41 | return False 42 | 43 | 44 | def is_exportable(): 45 | return _EXPORTABLE 46 | 47 | 48 | class set_exportable: 49 | def __init__(self, mode: bool) -> None: 50 | global _EXPORTABLE 51 | self.prev = _EXPORTABLE 52 | _EXPORTABLE = mode 53 | 54 | def __enter__(self) -> None: 55 | pass 56 | 57 | def __exit__(self, *args: Any) -> bool: 58 | global _EXPORTABLE 59 | _EXPORTABLE = self.prev 60 | return False 61 | 62 | 63 | def is_scriptable(): 64 | return _SCRIPTABLE 65 | 66 | 67 | class set_scriptable: 68 | def __init__(self, mode: bool) -> None: 69 | global _SCRIPTABLE 70 | self.prev = _SCRIPTABLE 71 | _SCRIPTABLE = mode 72 | 73 | def __enter__(self) -> None: 74 | pass 75 | 76 | def __exit__(self, *args: Any) -> bool: 77 | global _SCRIPTABLE 78 | _SCRIPTABLE = self.prev 79 | return False 80 | 81 | 82 | class set_layer_config: 83 | """ Layer config context manager that allows setting all layer config flags at once. 84 | If a flag arg is None, it will not change the current value. 85 | """ 86 | def __init__( 87 | self, 88 | scriptable: Optional[bool] = None, 89 | exportable: Optional[bool] = None, 90 | no_jit: Optional[bool] = None, 91 | no_activation_jit: Optional[bool] = None): 92 | global _SCRIPTABLE 93 | global _EXPORTABLE 94 | global _NO_JIT 95 | global _NO_ACTIVATION_JIT 96 | self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT 97 | if scriptable is not None: 98 | _SCRIPTABLE = scriptable 99 | if exportable is not None: 100 | _EXPORTABLE = exportable 101 | if no_jit is not None: 102 | _NO_JIT = no_jit 103 | if no_activation_jit is not None: 104 | _NO_ACTIVATION_JIT = no_activation_jit 105 | 106 | def __enter__(self) -> None: 107 | pass 108 | 109 | def __exit__(self, *args: Any) -> bool: 110 | global _SCRIPTABLE 111 | global _EXPORTABLE 112 | global _NO_JIT 113 | global _NO_ACTIVATION_JIT 114 | _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev 115 | return False 116 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/conv2d_same.py: -------------------------------------------------------------------------------- 1 | """ Conv2d w/ Same Padding 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from typing import Tuple, Optional 9 | 10 | from .padding import pad_same, get_padding_value 11 | 12 | 13 | def conv2d_same( 14 | x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1), 15 | padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1): 16 | x = pad_same(x, weight.shape[-2:], stride, dilation) 17 | return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups) 18 | 19 | 20 | class Conv2dSame(nn.Conv2d): 21 | """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions 22 | """ 23 | 24 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 25 | padding=0, dilation=1, groups=1, bias=True): 26 | super(Conv2dSame, self).__init__( 27 | in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 28 | 29 | def forward(self, x): 30 | return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 31 | 32 | 33 | def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): 34 | padding = kwargs.pop('padding', '') 35 | kwargs.setdefault('bias', False) 36 | padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs) 37 | if is_dynamic: 38 | return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs) 39 | else: 40 | return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs) 41 | 42 | 43 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/conv_bn_act.py: -------------------------------------------------------------------------------- 1 | """ Conv2d + BN + Act 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | 7 | from .create_conv2d import create_conv2d 8 | from .create_norm_act import convert_norm_act 9 | 10 | 11 | class ConvBnAct(nn.Module): 12 | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1, 13 | bias=False, apply_act=True, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, aa_layer=None, 14 | drop_block=None): 15 | super(ConvBnAct, self).__init__() 16 | use_aa = aa_layer is not None 17 | 18 | self.conv = create_conv2d( 19 | in_channels, out_channels, kernel_size, stride=1 if use_aa else stride, 20 | padding=padding, dilation=dilation, groups=groups, bias=bias) 21 | 22 | # NOTE for backwards compatibility with models that use separate norm and act layer definitions 23 | norm_act_layer = convert_norm_act(norm_layer, act_layer) 24 | self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) 25 | self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None 26 | 27 | @property 28 | def in_channels(self): 29 | return self.conv.in_channels 30 | 31 | @property 32 | def out_channels(self): 33 | return self.conv.out_channels 34 | 35 | def forward(self, x): 36 | x = self.conv(x) 37 | x = self.bn(x) 38 | if self.aa is not None: 39 | x = self.aa(x) 40 | return x 41 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/create_attn.py: -------------------------------------------------------------------------------- 1 | """ Attention Factory 2 | 3 | Hacked together by / Copyright 2021 Ross Wightman 4 | """ 5 | import torch 6 | from functools import partial 7 | 8 | from .bottleneck_attn import BottleneckAttn 9 | from .cbam import CbamModule, LightCbamModule 10 | from .eca import EcaModule, CecaModule 11 | from .gather_excite import GatherExcite 12 | from .global_context import GlobalContext 13 | from .halo_attn import HaloAttn 14 | from .lambda_layer import LambdaLayer 15 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn 16 | from .selective_kernel import SelectiveKernel 17 | from .split_attn import SplitAttn 18 | from .squeeze_excite import SEModule, EffectiveSEModule 19 | 20 | 21 | def get_attn(attn_type): 22 | if isinstance(attn_type, torch.nn.Module): 23 | return attn_type 24 | module_cls = None 25 | if attn_type is not None: 26 | if isinstance(attn_type, str): 27 | attn_type = attn_type.lower() 28 | # Lightweight attention modules (channel and/or coarse spatial). 29 | # Typically added to existing network architecture blocks in addition to existing convolutions. 30 | if attn_type == 'se': 31 | module_cls = SEModule 32 | elif attn_type == 'ese': 33 | module_cls = EffectiveSEModule 34 | elif attn_type == 'eca': 35 | module_cls = EcaModule 36 | elif attn_type == 'ecam': 37 | module_cls = partial(EcaModule, use_mlp=True) 38 | elif attn_type == 'ceca': 39 | module_cls = CecaModule 40 | elif attn_type == 'ge': 41 | module_cls = GatherExcite 42 | elif attn_type == 'gc': 43 | module_cls = GlobalContext 44 | elif attn_type == 'gca': 45 | module_cls = partial(GlobalContext, fuse_add=True, fuse_scale=False) 46 | elif attn_type == 'cbam': 47 | module_cls = CbamModule 48 | elif attn_type == 'lcbam': 49 | module_cls = LightCbamModule 50 | 51 | # Attention / attention-like modules w/ significant params 52 | # Typically replace some of the existing workhorse convs in a network architecture. 53 | # All of these accept a stride argument and can spatially downsample the input. 54 | elif attn_type == 'sk': 55 | module_cls = SelectiveKernel 56 | elif attn_type == 'splat': 57 | module_cls = SplitAttn 58 | 59 | # Self-attention / attention-like modules w/ significant compute and/or params 60 | # Typically replace some of the existing workhorse convs in a network architecture. 61 | # All of these accept a stride argument and can spatially downsample the input. 62 | elif attn_type == 'lambda': 63 | return LambdaLayer 64 | elif attn_type == 'bottleneck': 65 | return BottleneckAttn 66 | elif attn_type == 'halo': 67 | return HaloAttn 68 | elif attn_type == 'nl': 69 | module_cls = NonLocalAttn 70 | elif attn_type == 'bat': 71 | module_cls = BatNonLocalAttn 72 | 73 | # Woops! 74 | else: 75 | assert False, "Invalid attn module (%s)" % attn_type 76 | elif isinstance(attn_type, bool): 77 | if attn_type: 78 | module_cls = SEModule 79 | else: 80 | module_cls = attn_type 81 | return module_cls 82 | 83 | 84 | def create_attn(attn_type, channels, **kwargs): 85 | module_cls = get_attn(attn_type) 86 | if module_cls is not None: 87 | # NOTE: it's expected the first (positional) argument of all attention layers is the # input channels 88 | return module_cls(channels, **kwargs) 89 | return None 90 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/create_conv2d.py: -------------------------------------------------------------------------------- 1 | """ Create Conv2d Factory Method 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | from .mixed_conv2d import MixedConv2d 7 | from .cond_conv2d import CondConv2d 8 | from .conv2d_same import create_conv2d_pad 9 | 10 | 11 | def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): 12 | """ Select a 2d convolution implementation based on arguments 13 | Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d. 14 | 15 | Used extensively by EfficientNet, MobileNetv3 and related networks. 16 | """ 17 | if isinstance(kernel_size, list): 18 | assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently 19 | assert 'groups' not in kwargs # MixedConv groups are defined by kernel list 20 | # We're going to use only lists for defining the MixedConv2d kernel groups, 21 | # ints, tuples, other iterables will continue to pass to normal conv and specify h, w. 22 | m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs) 23 | else: 24 | depthwise = kwargs.pop('depthwise', False) 25 | # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0 26 | groups = in_channels if depthwise else kwargs.pop('groups', 1) 27 | if 'num_experts' in kwargs and kwargs['num_experts'] > 0: 28 | m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs) 29 | else: 30 | m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs) 31 | return m 32 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/create_norm_act.py: -------------------------------------------------------------------------------- 1 | """ NormAct (Normalizaiton + Activation Layer) Factory 2 | 3 | Create norm + act combo modules that attempt to be backwards compatible with separate norm + act 4 | isntances in models. Where these are used it will be possible to swap separate BN + act layers with 5 | combined modules like IABN or EvoNorms. 6 | 7 | Hacked together by / Copyright 2020 Ross Wightman 8 | """ 9 | import types 10 | import functools 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d 16 | from .norm_act import BatchNormAct2d, GroupNormAct 17 | from .inplace_abn import InplaceAbn 18 | 19 | _NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn} 20 | _NORM_ACT_REQUIRES_ARG = {BatchNormAct2d, GroupNormAct, InplaceAbn} # requires act_layer arg to define act type 21 | 22 | 23 | def get_norm_act_layer(layer_class): 24 | layer_class = layer_class.replace('_', '').lower() 25 | if layer_class.startswith("batchnorm"): 26 | layer = BatchNormAct2d 27 | elif layer_class.startswith("groupnorm"): 28 | layer = GroupNormAct 29 | elif layer_class == "evonormbatch": 30 | layer = EvoNormBatch2d 31 | elif layer_class == "evonormsample": 32 | layer = EvoNormSample2d 33 | elif layer_class == "iabn" or layer_class == "inplaceabn": 34 | layer = InplaceAbn 35 | else: 36 | assert False, "Invalid norm_act layer (%s)" % layer_class 37 | return layer 38 | 39 | 40 | def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwargs): 41 | layer_parts = layer_type.split('-') # e.g. batchnorm-leaky_relu 42 | assert len(layer_parts) in (1, 2) 43 | layer = get_norm_act_layer(layer_parts[0]) 44 | #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else '' # FIXME support string act selection? 45 | layer_instance = layer(num_features, apply_act=apply_act, **kwargs) 46 | if jit: 47 | layer_instance = torch.jit.script(layer_instance) 48 | return layer_instance 49 | 50 | 51 | def convert_norm_act(norm_layer, act_layer): 52 | assert isinstance(norm_layer, (type, str, types.FunctionType, functools.partial)) 53 | assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial)) 54 | norm_act_kwargs = {} 55 | 56 | # unbind partial fn, so args can be rebound later 57 | if isinstance(norm_layer, functools.partial): 58 | norm_act_kwargs.update(norm_layer.keywords) 59 | norm_layer = norm_layer.func 60 | 61 | if isinstance(norm_layer, str): 62 | norm_act_layer = get_norm_act_layer(norm_layer) 63 | elif norm_layer in _NORM_ACT_TYPES: 64 | norm_act_layer = norm_layer 65 | elif isinstance(norm_layer, types.FunctionType): 66 | # if function type, must be a lambda/fn that creates a norm_act layer 67 | norm_act_layer = norm_layer 68 | else: 69 | type_name = norm_layer.__name__.lower() 70 | if type_name.startswith('batchnorm'): 71 | norm_act_layer = BatchNormAct2d 72 | elif type_name.startswith('groupnorm'): 73 | norm_act_layer = GroupNormAct 74 | else: 75 | assert False, f"No equivalent norm_act layer for {type_name}" 76 | 77 | if norm_act_layer in _NORM_ACT_REQUIRES_ARG: 78 | # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. 79 | # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types 80 | norm_act_kwargs.setdefault('act_layer', act_layer) 81 | if norm_act_kwargs: 82 | norm_act_layer = functools.partial(norm_act_layer, **norm_act_kwargs) # bind/rebind args 83 | return norm_act_layer 84 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/evo_norm.py: -------------------------------------------------------------------------------- 1 | """EvoNormB0 (Batched) and EvoNormS0 (Sample) in PyTorch 2 | 3 | An attempt at getting decent performing EvoNorms running in PyTorch. 4 | While currently faster than other impl, still quite a ways off the built-in BN 5 | in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed). 6 | 7 | Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts. 8 | 9 | Hacked together by / Copyright 2020 Ross Wightman 10 | """ 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .trace_utils import _assert 16 | 17 | 18 | class EvoNormBatch2d(nn.Module): 19 | def __init__(self, num_features, apply_act=True, momentum=0.1, eps=1e-5, drop_block=None): 20 | super(EvoNormBatch2d, self).__init__() 21 | self.apply_act = apply_act # apply activation (non-linearity) 22 | self.momentum = momentum 23 | self.eps = eps 24 | self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True) 25 | self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True) 26 | self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None 27 | self.register_buffer('running_var', torch.ones(num_features)) 28 | self.reset_parameters() 29 | 30 | def reset_parameters(self): 31 | nn.init.ones_(self.weight) 32 | nn.init.zeros_(self.bias) 33 | if self.apply_act: 34 | nn.init.ones_(self.v) 35 | 36 | def forward(self, x): 37 | _assert(x.dim() == 4, 'expected 4D input') 38 | x_type = x.dtype 39 | if self.v is not None: 40 | running_var = self.running_var.view(1, -1, 1, 1) 41 | if self.training: 42 | var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) 43 | n = x.numel() / x.shape[1] 44 | running_var = var.detach() * self.momentum * (n / (n - 1)) + running_var * (1 - self.momentum) 45 | self.running_var.copy_(running_var.view(self.running_var.shape)) 46 | else: 47 | var = running_var 48 | v = self.v.to(dtype=x_type).reshape(1, -1, 1, 1) 49 | d = x * v + (x.var(dim=(2, 3), unbiased=False, keepdim=True) + self.eps).sqrt().to(dtype=x_type) 50 | d = d.max((var + self.eps).sqrt().to(dtype=x_type)) 51 | x = x / d 52 | return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1) 53 | 54 | 55 | class EvoNormSample2d(nn.Module): 56 | def __init__(self, num_features, apply_act=True, groups=32, eps=1e-5, drop_block=None): 57 | super(EvoNormSample2d, self).__init__() 58 | self.apply_act = apply_act # apply activation (non-linearity) 59 | self.groups = groups 60 | self.eps = eps 61 | self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True) 62 | self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True) 63 | self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None 64 | self.reset_parameters() 65 | 66 | def reset_parameters(self): 67 | nn.init.ones_(self.weight) 68 | nn.init.zeros_(self.bias) 69 | if self.apply_act: 70 | nn.init.ones_(self.v) 71 | 72 | def forward(self, x): 73 | _assert(x.dim() == 4, 'expected 4D input') 74 | B, C, H, W = x.shape 75 | _assert(C % self.groups == 0, '') 76 | if self.v is not None: 77 | n = x * (x * self.v.view(1, -1, 1, 1)).sigmoid() 78 | x = x.reshape(B, self.groups, -1) 79 | x = n.reshape(B, self.groups, -1) / (x.var(dim=-1, unbiased=False, keepdim=True) + self.eps).sqrt() 80 | x = x.reshape(B, C, H, W) 81 | return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1) 82 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/gather_excite.py: -------------------------------------------------------------------------------- 1 | """ Gather-Excite Attention Block 2 | 3 | Paper: `Gather-Excite: Exploiting Feature Context in CNNs` - https://arxiv.org/abs/1810.12348 4 | 5 | Official code here, but it's only partial impl in Caffe: https://github.com/hujie-frank/GENet 6 | 7 | I've tried to support all of the extent both w/ and w/o params. I don't believe I've seen another 8 | impl that covers all of the cases. 9 | 10 | NOTE: extent=0 + extra_params=False is equivalent to Squeeze-and-Excitation 11 | 12 | Hacked together by / Copyright 2021 Ross Wightman 13 | """ 14 | import math 15 | 16 | from torch import nn as nn 17 | import torch.nn.functional as F 18 | 19 | from .create_act import create_act_layer, get_act_layer 20 | from .create_conv2d import create_conv2d 21 | from .helpers import make_divisible 22 | from .mlp import ConvMlp 23 | 24 | 25 | class GatherExcite(nn.Module): 26 | """ Gather-Excite Attention Module 27 | """ 28 | def __init__( 29 | self, channels, feat_size=None, extra_params=False, extent=0, use_mlp=True, 30 | rd_ratio=1./16, rd_channels=None, rd_divisor=1, add_maxpool=False, 31 | act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, gate_layer='sigmoid'): 32 | super(GatherExcite, self).__init__() 33 | self.add_maxpool = add_maxpool 34 | act_layer = get_act_layer(act_layer) 35 | self.extent = extent 36 | if extra_params: 37 | self.gather = nn.Sequential() 38 | if extent == 0: 39 | assert feat_size is not None, 'spatial feature size must be specified for global extent w/ params' 40 | self.gather.add_module( 41 | 'conv1', create_conv2d(channels, channels, kernel_size=feat_size, stride=1, depthwise=True)) 42 | if norm_layer: 43 | self.gather.add_module(f'norm1', nn.BatchNorm2d(channels)) 44 | else: 45 | assert extent % 2 == 0 46 | num_conv = int(math.log2(extent)) 47 | for i in range(num_conv): 48 | self.gather.add_module( 49 | f'conv{i + 1}', 50 | create_conv2d(channels, channels, kernel_size=3, stride=2, depthwise=True)) 51 | if norm_layer: 52 | self.gather.add_module(f'norm{i + 1}', nn.BatchNorm2d(channels)) 53 | if i != num_conv - 1: 54 | self.gather.add_module(f'act{i + 1}', act_layer(inplace=True)) 55 | else: 56 | self.gather = None 57 | if self.extent == 0: 58 | self.gk = 0 59 | self.gs = 0 60 | else: 61 | assert extent % 2 == 0 62 | self.gk = self.extent * 2 - 1 63 | self.gs = self.extent 64 | 65 | if not rd_channels: 66 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 67 | self.mlp = ConvMlp(channels, rd_channels, act_layer=act_layer) if use_mlp else nn.Identity() 68 | self.gate = create_act_layer(gate_layer) 69 | 70 | def forward(self, x): 71 | size = x.shape[-2:] 72 | if self.gather is not None: 73 | x_ge = self.gather(x) 74 | else: 75 | if self.extent == 0: 76 | # global extent 77 | x_ge = x.mean(dim=(2, 3), keepdims=True) 78 | if self.add_maxpool: 79 | # experimental codepath, may remove or change 80 | x_ge = 0.5 * x_ge + 0.5 * x.amax((2, 3), keepdim=True) 81 | else: 82 | x_ge = F.avg_pool2d( 83 | x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2, count_include_pad=False) 84 | if self.add_maxpool: 85 | # experimental codepath, may remove or change 86 | x_ge = 0.5 * x_ge + 0.5 * F.max_pool2d(x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2) 87 | x_ge = self.mlp(x_ge) 88 | if x_ge.shape[-1] != 1 or x_ge.shape[-2] != 1: 89 | x_ge = F.interpolate(x_ge, size=size) 90 | return x * self.gate(x_ge) 91 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/global_context.py: -------------------------------------------------------------------------------- 1 | """ Global Context Attention Block 2 | 3 | Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond` 4 | - https://arxiv.org/abs/1904.11492 5 | 6 | Official code consulted as reference: https://github.com/xvjiarui/GCNet 7 | 8 | Hacked together by / Copyright 2021 Ross Wightman 9 | """ 10 | from torch import nn as nn 11 | import torch.nn.functional as F 12 | 13 | from .create_act import create_act_layer, get_act_layer 14 | from .helpers import make_divisible 15 | from .mlp import ConvMlp 16 | from .norm import LayerNorm2d 17 | 18 | 19 | class GlobalContext(nn.Module): 20 | 21 | def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False, 22 | rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=nn.ReLU, gate_layer='sigmoid'): 23 | super(GlobalContext, self).__init__() 24 | act_layer = get_act_layer(act_layer) 25 | 26 | self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True) if use_attn else None 27 | 28 | if rd_channels is None: 29 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 30 | if fuse_add: 31 | self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d) 32 | else: 33 | self.mlp_add = None 34 | if fuse_scale: 35 | self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d) 36 | else: 37 | self.mlp_scale = None 38 | 39 | self.gate = create_act_layer(gate_layer) 40 | self.init_last_zero = init_last_zero 41 | self.reset_parameters() 42 | 43 | def reset_parameters(self): 44 | if self.conv_attn is not None: 45 | nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu') 46 | if self.mlp_add is not None: 47 | nn.init.zeros_(self.mlp_add.fc2.weight) 48 | 49 | def forward(self, x): 50 | B, C, H, W = x.shape 51 | 52 | if self.conv_attn is not None: 53 | attn = self.conv_attn(x).reshape(B, 1, H * W) # (B, 1, H * W) 54 | attn = F.softmax(attn, dim=-1).unsqueeze(3) # (B, 1, H * W, 1) 55 | context = x.reshape(B, C, H * W).unsqueeze(1) @ attn 56 | context = context.view(B, C, 1, 1) 57 | else: 58 | context = x.mean(dim=(2, 3), keepdim=True) 59 | 60 | if self.mlp_scale is not None: 61 | mlp_x = self.mlp_scale(context) 62 | x = x * self.gate(mlp_x) 63 | if self.mlp_add is not None: 64 | mlp_x = self.mlp_add(context) 65 | x = x + mlp_x 66 | 67 | return x 68 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from itertools import repeat 6 | import collections.abc 7 | 8 | 9 | # From PyTorch internals 10 | def _ntuple(n): 11 | def parse(x): 12 | if isinstance(x, collections.abc.Iterable): 13 | return x 14 | return tuple(repeat(x, n)) 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/inplace_abn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn as nn 3 | 4 | try: 5 | from inplace_abn.functions import inplace_abn, inplace_abn_sync 6 | has_iabn = True 7 | except ImportError: 8 | has_iabn = False 9 | 10 | def inplace_abn(x, weight, bias, running_mean, running_var, 11 | training=True, momentum=0.1, eps=1e-05, activation="leaky_relu", activation_param=0.01): 12 | raise ImportError( 13 | "Please install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.12'") 14 | 15 | def inplace_abn_sync(**kwargs): 16 | inplace_abn(**kwargs) 17 | 18 | 19 | class InplaceAbn(nn.Module): 20 | """Activated Batch Normalization 21 | 22 | This gathers a BatchNorm and an activation function in a single module 23 | 24 | Parameters 25 | ---------- 26 | num_features : int 27 | Number of feature channels in the input and output. 28 | eps : float 29 | Small constant to prevent numerical issues. 30 | momentum : float 31 | Momentum factor applied to compute running statistics. 32 | affine : bool 33 | If `True` apply learned scale and shift transformation after normalization. 34 | act_layer : str or nn.Module type 35 | Name or type of the activation functions, one of: `leaky_relu`, `elu` 36 | act_param : float 37 | Negative slope for the `leaky_relu` activation. 38 | """ 39 | 40 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, apply_act=True, 41 | act_layer="leaky_relu", act_param=0.01, drop_block=None): 42 | super(InplaceAbn, self).__init__() 43 | self.num_features = num_features 44 | self.affine = affine 45 | self.eps = eps 46 | self.momentum = momentum 47 | if apply_act: 48 | if isinstance(act_layer, str): 49 | assert act_layer in ('leaky_relu', 'elu', 'identity', '') 50 | self.act_name = act_layer if act_layer else 'identity' 51 | else: 52 | # convert act layer passed as type to string 53 | if act_layer == nn.ELU: 54 | self.act_name = 'elu' 55 | elif act_layer == nn.LeakyReLU: 56 | self.act_name = 'leaky_relu' 57 | elif act_layer == nn.Identity: 58 | self.act_name = 'identity' 59 | else: 60 | assert False, f'Invalid act layer {act_layer.__name__} for IABN' 61 | else: 62 | self.act_name = 'identity' 63 | self.act_param = act_param 64 | if self.affine: 65 | self.weight = nn.Parameter(torch.ones(num_features)) 66 | self.bias = nn.Parameter(torch.zeros(num_features)) 67 | else: 68 | self.register_parameter('weight', None) 69 | self.register_parameter('bias', None) 70 | self.register_buffer('running_mean', torch.zeros(num_features)) 71 | self.register_buffer('running_var', torch.ones(num_features)) 72 | self.reset_parameters() 73 | 74 | def reset_parameters(self): 75 | nn.init.constant_(self.running_mean, 0) 76 | nn.init.constant_(self.running_var, 1) 77 | if self.affine: 78 | nn.init.constant_(self.weight, 1) 79 | nn.init.constant_(self.bias, 0) 80 | 81 | def forward(self, x): 82 | output = inplace_abn( 83 | x, self.weight, self.bias, self.running_mean, self.running_var, 84 | self.training, self.momentum, self.eps, self.act_name, self.act_param) 85 | if isinstance(output, tuple): 86 | output = output[0] 87 | return output 88 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/linear.py: -------------------------------------------------------------------------------- 1 | """ Linear layer (alternate definition) 2 | """ 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn as nn 6 | 7 | 8 | class Linear(nn.Linear): 9 | r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b` 10 | 11 | Wraps torch.nn.Linear to support AMP + torchscript usage by manually casting 12 | weight & bias to input.dtype to work around an issue w/ torch.addmm in this use case. 13 | """ 14 | def forward(self, input: torch.Tensor) -> torch.Tensor: 15 | if torch.jit.is_scripting(): 16 | bias = self.bias.to(dtype=input.dtype) if self.bias is not None else None 17 | return F.linear(input, self.weight.to(dtype=input.dtype), bias=bias) 18 | else: 19 | return F.linear(input, self.weight, self.bias) 20 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/median_pool.py: -------------------------------------------------------------------------------- 1 | """ Median Pool 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from .helpers import to_2tuple, to_4tuple 7 | 8 | 9 | class MedianPool2d(nn.Module): 10 | """ Median pool (usable as median filter when stride=1) module. 11 | 12 | Args: 13 | kernel_size: size of pooling kernel, int or 2-tuple 14 | stride: pool stride, int or 2-tuple 15 | padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad 16 | same: override padding and enforce same padding, boolean 17 | """ 18 | def __init__(self, kernel_size=3, stride=1, padding=0, same=False): 19 | super(MedianPool2d, self).__init__() 20 | self.k = to_2tuple(kernel_size) 21 | self.stride = to_2tuple(stride) 22 | self.padding = to_4tuple(padding) # convert to l, r, t, b 23 | self.same = same 24 | 25 | def _padding(self, x): 26 | if self.same: 27 | ih, iw = x.size()[2:] 28 | if ih % self.stride[0] == 0: 29 | ph = max(self.k[0] - self.stride[0], 0) 30 | else: 31 | ph = max(self.k[0] - (ih % self.stride[0]), 0) 32 | if iw % self.stride[1] == 0: 33 | pw = max(self.k[1] - self.stride[1], 0) 34 | else: 35 | pw = max(self.k[1] - (iw % self.stride[1]), 0) 36 | pl = pw // 2 37 | pr = pw - pl 38 | pt = ph // 2 39 | pb = ph - pt 40 | padding = (pl, pr, pt, pb) 41 | else: 42 | padding = self.padding 43 | return padding 44 | 45 | def forward(self, x): 46 | x = F.pad(x, self._padding(x), mode='reflect') 47 | x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1]) 48 | x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0] 49 | return x 50 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/mixed_conv2d.py: -------------------------------------------------------------------------------- 1 | """ PyTorch Mixed Convolution 2 | 3 | Paper: MixConv: Mixed Depthwise Convolutional Kernels (https://arxiv.org/abs/1907.09595) 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | 8 | import torch 9 | from torch import nn as nn 10 | 11 | from .conv2d_same import create_conv2d_pad 12 | 13 | 14 | def _split_channels(num_chan, num_groups): 15 | split = [num_chan // num_groups for _ in range(num_groups)] 16 | split[0] += num_chan - sum(split) 17 | return split 18 | 19 | 20 | class MixedConv2d(nn.ModuleDict): 21 | """ Mixed Grouped Convolution 22 | 23 | Based on MDConv and GroupedConv in MixNet impl: 24 | https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py 25 | """ 26 | def __init__(self, in_channels, out_channels, kernel_size=3, 27 | stride=1, padding='', dilation=1, depthwise=False, **kwargs): 28 | super(MixedConv2d, self).__init__() 29 | 30 | kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size] 31 | num_groups = len(kernel_size) 32 | in_splits = _split_channels(in_channels, num_groups) 33 | out_splits = _split_channels(out_channels, num_groups) 34 | self.in_channels = sum(in_splits) 35 | self.out_channels = sum(out_splits) 36 | for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)): 37 | conv_groups = in_ch if depthwise else 1 38 | # use add_module to keep key space clean 39 | self.add_module( 40 | str(idx), 41 | create_conv2d_pad( 42 | in_ch, out_ch, k, stride=stride, 43 | padding=padding, dilation=dilation, groups=conv_groups, **kwargs) 44 | ) 45 | self.splits = in_splits 46 | 47 | def forward(self, x): 48 | x_split = torch.split(x, self.splits, 1) 49 | x_out = [c(x_split[i]) for i, c in enumerate(self.values())] 50 | x = torch.cat(x_out, 1) 51 | return x 52 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/norm.py: -------------------------------------------------------------------------------- 1 | """ Normalization layers and wrappers 2 | """ 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class GroupNorm(nn.GroupNorm): 9 | def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True): 10 | # NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN 11 | super().__init__(num_groups, num_channels, eps=eps, affine=affine) 12 | 13 | def forward(self, x): 14 | return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 15 | 16 | 17 | class LayerNorm2d(nn.LayerNorm): 18 | """ LayerNorm for channels of '2D' spatial BCHW tensors """ 19 | def __init__(self, num_channels): 20 | super().__init__(num_channels) 21 | 22 | def forward(self, x: torch.Tensor) -> torch.Tensor: 23 | return F.layer_norm( 24 | x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2) 25 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/norm_act.py: -------------------------------------------------------------------------------- 1 | """ Normalization + Activation Layers 2 | """ 3 | import torch 4 | from torch import nn as nn 5 | from torch.nn import functional as F 6 | 7 | from .create_act import get_act_layer 8 | 9 | 10 | class BatchNormAct2d(nn.BatchNorm2d): 11 | """BatchNorm + Activation 12 | 13 | This module performs BatchNorm + Activation in a manner that will remain backwards 14 | compatible with weights trained with separate bn, act. This is why we inherit from BN 15 | instead of composing it as a .bn member. 16 | """ 17 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, 18 | apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): 19 | super(BatchNormAct2d, self).__init__( 20 | num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats) 21 | if isinstance(act_layer, str): 22 | act_layer = get_act_layer(act_layer) 23 | if act_layer is not None and apply_act: 24 | act_args = dict(inplace=True) if inplace else {} 25 | self.act = act_layer(**act_args) 26 | else: 27 | self.act = nn.Identity() 28 | 29 | def _forward_jit(self, x): 30 | """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function 31 | """ 32 | # exponential_average_factor is self.momentum set to 33 | # (when it is available) only so that if gets updated 34 | # in ONNX graph when this node is exported to ONNX. 35 | if self.momentum is None: 36 | exponential_average_factor = 0.0 37 | else: 38 | exponential_average_factor = self.momentum 39 | 40 | if self.training and self.track_running_stats: 41 | # TODO: if statement only here to tell the jit to skip emitting this when it is None 42 | if self.num_batches_tracked is not None: 43 | self.num_batches_tracked += 1 44 | if self.momentum is None: # use cumulative moving average 45 | exponential_average_factor = 1.0 / float(self.num_batches_tracked) 46 | else: # use exponential moving average 47 | exponential_average_factor = self.momentum 48 | 49 | x = F.batch_norm( 50 | x, self.running_mean, self.running_var, self.weight, self.bias, 51 | self.training or not self.track_running_stats, 52 | exponential_average_factor, self.eps) 53 | return x 54 | 55 | @torch.jit.ignore 56 | def _forward_python(self, x): 57 | return super(BatchNormAct2d, self).forward(x) 58 | 59 | def forward(self, x): 60 | # FIXME cannot call parent forward() and maintain jit.script compatibility? 61 | if torch.jit.is_scripting(): 62 | x = self._forward_jit(x) 63 | else: 64 | x = self._forward_python(x) 65 | x = self.act(x) 66 | return x 67 | 68 | 69 | class GroupNormAct(nn.GroupNorm): 70 | # NOTE num_channel and num_groups order flipped for easier layer swaps / binding of fixed args 71 | def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True, 72 | apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): 73 | super(GroupNormAct, self).__init__(num_groups, num_channels, eps=eps, affine=affine) 74 | if isinstance(act_layer, str): 75 | act_layer = get_act_layer(act_layer) 76 | if act_layer is not None and apply_act: 77 | act_args = dict(inplace=True) if inplace else {} 78 | self.act = act_layer(**act_args) 79 | else: 80 | self.act = nn.Identity() 81 | 82 | def forward(self, x): 83 | x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 84 | x = self.act(x) 85 | return x 86 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/padding.py: -------------------------------------------------------------------------------- 1 | """ Padding Helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import math 6 | from typing import List, Tuple 7 | 8 | import torch.nn.functional as F 9 | 10 | 11 | # Calculate symmetric padding for a convolution 12 | def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int: 13 | padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 14 | return padding 15 | 16 | 17 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution 18 | def get_same_padding(x: int, k: int, s: int, d: int): 19 | return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0) 20 | 21 | 22 | # Can SAME padding for given args be done statically? 23 | def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_): 24 | return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0 25 | 26 | 27 | # Dynamically pad input x with 'SAME' padding for conv with specified args 28 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): 29 | ih, iw = x.size()[-2:] 30 | pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1]) 31 | if pad_h > 0 or pad_w > 0: 32 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) 33 | return x 34 | 35 | 36 | def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]: 37 | dynamic = False 38 | if isinstance(padding, str): 39 | # for any string padding, the padding will be calculated for you, one of three ways 40 | padding = padding.lower() 41 | if padding == 'same': 42 | # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact 43 | if is_static_pad(kernel_size, **kwargs): 44 | # static case, no extra overhead 45 | padding = get_padding(kernel_size, **kwargs) 46 | else: 47 | # dynamic 'SAME' padding, has runtime/GPU memory overhead 48 | padding = 0 49 | dynamic = True 50 | elif padding == 'valid': 51 | # 'VALID' padding, same as padding=0 52 | padding = 0 53 | else: 54 | # Default to PyTorch style 'same'-ish symmetric padding 55 | padding = get_padding(kernel_size, **kwargs) 56 | return padding, dynamic 57 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/patch_embed.py: -------------------------------------------------------------------------------- 1 | """ Image to Patch Embedding using Conv2d 2 | 3 | A convolution based approach to patchifying a 2D image w/ embedding projection. 4 | 5 | Based on the impl in https://github.com/google-research/vision_transformer 6 | 7 | Hacked together by / Copyright 2020 Ross Wightman 8 | """ 9 | from torch import nn as nn 10 | 11 | from .helpers import to_2tuple 12 | from .trace_utils import _assert 13 | 14 | 15 | class PatchEmbed(nn.Module): 16 | """ 2D Image to Patch Embedding 17 | """ 18 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None, flatten=True): 19 | super().__init__() 20 | img_size = to_2tuple(img_size) 21 | patch_size = to_2tuple(patch_size) 22 | self.img_size = img_size 23 | self.patch_size = patch_size 24 | self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) 25 | self.num_patches = self.grid_size[0] * self.grid_size[1] 26 | self.flatten = flatten 27 | 28 | # self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 29 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=3, stride=1,padding=1) # Modify convolution 30 | self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() 31 | 32 | def forward(self, x): 33 | B, C, H, W = x.shape 34 | _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).") 35 | _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).") 36 | x = self.proj(x) 37 | if self.flatten: 38 | x = x.flatten(2).transpose(1, 2) # BCHW -> BNC 39 | x = self.norm(x) 40 | return x 41 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/pool2d_same.py: -------------------------------------------------------------------------------- 1 | """ AvgPool2d w/ Same Padding 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from typing import List, Tuple, Optional 9 | 10 | from .helpers import to_2tuple 11 | from .padding import pad_same, get_padding_value 12 | 13 | 14 | def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), 15 | ceil_mode: bool = False, count_include_pad: bool = True): 16 | # FIXME how to deal with count_include_pad vs not for external padding? 17 | x = pad_same(x, kernel_size, stride) 18 | return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad) 19 | 20 | 21 | class AvgPool2dSame(nn.AvgPool2d): 22 | """ Tensorflow like 'SAME' wrapper for 2D average pooling 23 | """ 24 | def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True): 25 | kernel_size = to_2tuple(kernel_size) 26 | stride = to_2tuple(stride) 27 | super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) 28 | 29 | def forward(self, x): 30 | x = pad_same(x, self.kernel_size, self.stride) 31 | return F.avg_pool2d( 32 | x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) 33 | 34 | 35 | def max_pool2d_same( 36 | x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), 37 | dilation: List[int] = (1, 1), ceil_mode: bool = False): 38 | x = pad_same(x, kernel_size, stride, value=-float('inf')) 39 | return F.max_pool2d(x, kernel_size, stride, (0, 0), dilation, ceil_mode) 40 | 41 | 42 | class MaxPool2dSame(nn.MaxPool2d): 43 | """ Tensorflow like 'SAME' wrapper for 2D max pooling 44 | """ 45 | def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False): 46 | kernel_size = to_2tuple(kernel_size) 47 | stride = to_2tuple(stride) 48 | dilation = to_2tuple(dilation) 49 | super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode) 50 | 51 | def forward(self, x): 52 | x = pad_same(x, self.kernel_size, self.stride, value=-float('inf')) 53 | return F.max_pool2d(x, self.kernel_size, self.stride, (0, 0), self.dilation, self.ceil_mode) 54 | 55 | 56 | def create_pool2d(pool_type, kernel_size, stride=None, **kwargs): 57 | stride = stride or kernel_size 58 | padding = kwargs.pop('padding', '') 59 | padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs) 60 | if is_dynamic: 61 | if pool_type == 'avg': 62 | return AvgPool2dSame(kernel_size, stride=stride, **kwargs) 63 | elif pool_type == 'max': 64 | return MaxPool2dSame(kernel_size, stride=stride, **kwargs) 65 | else: 66 | assert False, f'Unsupported pool type {pool_type}' 67 | else: 68 | if pool_type == 'avg': 69 | return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs) 70 | elif pool_type == 'max': 71 | return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs) 72 | else: 73 | assert False, f'Unsupported pool type {pool_type}' 74 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/separable_conv.py: -------------------------------------------------------------------------------- 1 | """ Depthwise Separable Conv Modules 2 | 3 | Basic DWS convs. Other variations of DWS exist with batch norm or activations between the 4 | DW and PW convs such as the Depthwise modules in MobileNetV2 / EfficientNet and Xception. 5 | 6 | Hacked together by / Copyright 2020 Ross Wightman 7 | """ 8 | from torch import nn as nn 9 | 10 | from .create_conv2d import create_conv2d 11 | from .create_norm_act import convert_norm_act 12 | 13 | 14 | class SeparableConvBnAct(nn.Module): 15 | """ Separable Conv w/ trailing Norm and Activation 16 | """ 17 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, 18 | channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, 19 | apply_act=True, drop_block=None): 20 | super(SeparableConvBnAct, self).__init__() 21 | 22 | self.conv_dw = create_conv2d( 23 | in_channels, int(in_channels * channel_multiplier), kernel_size, 24 | stride=stride, dilation=dilation, padding=padding, depthwise=True) 25 | 26 | self.conv_pw = create_conv2d( 27 | int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) 28 | 29 | norm_act_layer = convert_norm_act(norm_layer, act_layer) 30 | self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) 31 | 32 | @property 33 | def in_channels(self): 34 | return self.conv_dw.in_channels 35 | 36 | @property 37 | def out_channels(self): 38 | return self.conv_pw.out_channels 39 | 40 | def forward(self, x): 41 | x = self.conv_dw(x) 42 | x = self.conv_pw(x) 43 | if self.bn is not None: 44 | x = self.bn(x) 45 | return x 46 | 47 | 48 | class SeparableConv2d(nn.Module): 49 | """ Separable Conv 50 | """ 51 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, 52 | channel_multiplier=1.0, pw_kernel_size=1): 53 | super(SeparableConv2d, self).__init__() 54 | 55 | self.conv_dw = create_conv2d( 56 | in_channels, int(in_channels * channel_multiplier), kernel_size, 57 | stride=stride, dilation=dilation, padding=padding, depthwise=True) 58 | 59 | self.conv_pw = create_conv2d( 60 | int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) 61 | 62 | @property 63 | def in_channels(self): 64 | return self.conv_dw.in_channels 65 | 66 | @property 67 | def out_channels(self): 68 | return self.conv_pw.out_channels 69 | 70 | def forward(self, x): 71 | x = self.conv_dw(x) 72 | x = self.conv_pw(x) 73 | return x 74 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/space_to_depth.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SpaceToDepth(nn.Module): 6 | def __init__(self, block_size=4): 7 | super().__init__() 8 | assert block_size == 4 9 | self.bs = block_size 10 | 11 | def forward(self, x): 12 | N, C, H, W = x.size() 13 | x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs) # (N, C, H//bs, bs, W//bs, bs) 14 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 15 | x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs) # (N, C*bs^2, H//bs, W//bs) 16 | return x 17 | 18 | 19 | @torch.jit.script 20 | class SpaceToDepthJit(object): 21 | def __call__(self, x: torch.Tensor): 22 | # assuming hard-coded that block_size==4 for acceleration 23 | N, C, H, W = x.size() 24 | x = x.view(N, C, H // 4, 4, W // 4, 4) # (N, C, H//bs, bs, W//bs, bs) 25 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 26 | x = x.view(N, C * 16, H // 4, W // 4) # (N, C*bs^2, H//bs, W//bs) 27 | return x 28 | 29 | 30 | class SpaceToDepthModule(nn.Module): 31 | def __init__(self, no_jit=False): 32 | super().__init__() 33 | if not no_jit: 34 | self.op = SpaceToDepthJit() 35 | else: 36 | self.op = SpaceToDepth() 37 | 38 | def forward(self, x): 39 | return self.op(x) 40 | 41 | 42 | class DepthToSpace(nn.Module): 43 | 44 | def __init__(self, block_size): 45 | super().__init__() 46 | self.bs = block_size 47 | 48 | def forward(self, x): 49 | N, C, H, W = x.size() 50 | x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W) # (N, bs, bs, C//bs^2, H, W) 51 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # (N, C//bs^2, H, bs, W, bs) 52 | x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs) # (N, C//bs^2, H * bs, W * bs) 53 | return x 54 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/split_attn.py: -------------------------------------------------------------------------------- 1 | """ Split Attention Conv2d (for ResNeSt Models) 2 | 3 | Paper: `ResNeSt: Split-Attention Networks` - /https://arxiv.org/abs/2004.08955 4 | 5 | Adapted from original PyTorch impl at https://github.com/zhanghang1989/ResNeSt 6 | 7 | Modified for torchscript compat, performance, and consistency with timm by Ross Wightman 8 | """ 9 | import torch 10 | import torch.nn.functional as F 11 | from torch import nn 12 | 13 | from .helpers import make_divisible 14 | 15 | 16 | class RadixSoftmax(nn.Module): 17 | def __init__(self, radix, cardinality): 18 | super(RadixSoftmax, self).__init__() 19 | self.radix = radix 20 | self.cardinality = cardinality 21 | 22 | def forward(self, x): 23 | batch = x.size(0) 24 | if self.radix > 1: 25 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) 26 | x = F.softmax(x, dim=1) 27 | x = x.reshape(batch, -1) 28 | else: 29 | x = torch.sigmoid(x) 30 | return x 31 | 32 | 33 | class SplitAttn(nn.Module): 34 | """Split-Attention (aka Splat) 35 | """ 36 | def __init__(self, in_channels, out_channels=None, kernel_size=3, stride=1, padding=None, 37 | dilation=1, groups=1, bias=False, radix=2, rd_ratio=0.25, rd_channels=None, rd_divisor=8, 38 | act_layer=nn.ReLU, norm_layer=None, drop_block=None, **kwargs): 39 | super(SplitAttn, self).__init__() 40 | out_channels = out_channels or in_channels 41 | self.radix = radix 42 | self.drop_block = drop_block 43 | mid_chs = out_channels * radix 44 | if rd_channels is None: 45 | attn_chs = make_divisible(in_channels * radix * rd_ratio, min_value=32, divisor=rd_divisor) 46 | else: 47 | attn_chs = rd_channels * radix 48 | 49 | padding = kernel_size // 2 if padding is None else padding 50 | self.conv = nn.Conv2d( 51 | in_channels, mid_chs, kernel_size, stride, padding, dilation, 52 | groups=groups * radix, bias=bias, **kwargs) 53 | self.bn0 = norm_layer(mid_chs) if norm_layer else nn.Identity() 54 | self.act0 = act_layer(inplace=True) 55 | self.fc1 = nn.Conv2d(out_channels, attn_chs, 1, groups=groups) 56 | self.bn1 = norm_layer(attn_chs) if norm_layer else nn.Identity() 57 | self.act1 = act_layer(inplace=True) 58 | self.fc2 = nn.Conv2d(attn_chs, mid_chs, 1, groups=groups) 59 | self.rsoftmax = RadixSoftmax(radix, groups) 60 | 61 | def forward(self, x): 62 | x = self.conv(x) 63 | x = self.bn0(x) 64 | if self.drop_block is not None: 65 | x = self.drop_block(x) 66 | x = self.act0(x) 67 | 68 | B, RC, H, W = x.shape 69 | if self.radix > 1: 70 | x = x.reshape((B, self.radix, RC // self.radix, H, W)) 71 | x_gap = x.sum(dim=1) 72 | else: 73 | x_gap = x 74 | x_gap = x_gap.mean((2, 3), keepdim=True) 75 | x_gap = self.fc1(x_gap) 76 | x_gap = self.bn1(x_gap) 77 | x_gap = self.act1(x_gap) 78 | x_attn = self.fc2(x_gap) 79 | 80 | x_attn = self.rsoftmax(x_attn).view(B, -1, 1, 1) 81 | if self.radix > 1: 82 | out = (x * x_attn.reshape((B, self.radix, RC // self.radix, 1, 1))).sum(dim=1) 83 | else: 84 | out = x * x_attn 85 | return out.contiguous() 86 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/split_batchnorm.py: -------------------------------------------------------------------------------- 1 | """ Split BatchNorm 2 | 3 | A PyTorch BatchNorm layer that splits input batch into N equal parts and passes each through 4 | a separate BN layer. The first split is passed through the parent BN layers with weight/bias 5 | keys the same as the original BN. All other splits pass through BN sub-layers under the '.aux_bn' 6 | namespace. 7 | 8 | This allows easily removing the auxiliary BN layers after training to efficiently 9 | achieve the 'Auxiliary BatchNorm' as described in the AdvProp Paper, section 4.2, 10 | 'Disentangled Learning via An Auxiliary BN' 11 | 12 | Hacked together by / Copyright 2020 Ross Wightman 13 | """ 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | class SplitBatchNorm2d(torch.nn.BatchNorm2d): 19 | 20 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, 21 | track_running_stats=True, num_splits=2): 22 | super().__init__(num_features, eps, momentum, affine, track_running_stats) 23 | assert num_splits > 1, 'Should have at least one aux BN layer (num_splits at least 2)' 24 | self.num_splits = num_splits 25 | self.aux_bn = nn.ModuleList([ 26 | nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats) for _ in range(num_splits - 1)]) 27 | 28 | def forward(self, input: torch.Tensor): 29 | if self.training: # aux BN only relevant while training 30 | split_size = input.shape[0] // self.num_splits 31 | assert input.shape[0] == split_size * self.num_splits, "batch size must be evenly divisible by num_splits" 32 | split_input = input.split(split_size) 33 | x = [super().forward(split_input[0])] 34 | for i, a in enumerate(self.aux_bn): 35 | x.append(a(split_input[i + 1])) 36 | return torch.cat(x, dim=0) 37 | else: 38 | return super().forward(input) 39 | 40 | 41 | def convert_splitbn_model(module, num_splits=2): 42 | """ 43 | Recursively traverse module and its children to replace all instances of 44 | ``torch.nn.modules.batchnorm._BatchNorm`` with `SplitBatchnorm2d`. 45 | Args: 46 | module (torch.nn.Module): input module 47 | num_splits: number of separate batchnorm layers to split input across 48 | Example:: 49 | >>> # model is an instance of torch.nn.Module 50 | >>> model = timm.models.convert_splitbn_model(model, num_splits=2) 51 | """ 52 | mod = module 53 | if isinstance(module, torch.nn.modules.instancenorm._InstanceNorm): 54 | return module 55 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): 56 | mod = SplitBatchNorm2d( 57 | module.num_features, module.eps, module.momentum, module.affine, 58 | module.track_running_stats, num_splits=num_splits) 59 | mod.running_mean = module.running_mean 60 | mod.running_var = module.running_var 61 | mod.num_batches_tracked = module.num_batches_tracked 62 | if module.affine: 63 | mod.weight.data = module.weight.data.clone().detach() 64 | mod.bias.data = module.bias.data.clone().detach() 65 | for aux in mod.aux_bn: 66 | aux.running_mean = module.running_mean.clone() 67 | aux.running_var = module.running_var.clone() 68 | aux.num_batches_tracked = module.num_batches_tracked.clone() 69 | if module.affine: 70 | aux.weight.data = module.weight.data.clone().detach() 71 | aux.bias.data = module.bias.data.clone().detach() 72 | for name, child in module.named_children(): 73 | mod.add_module(name, convert_splitbn_model(child, num_splits=num_splits)) 74 | del module 75 | return mod 76 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/squeeze_excite.py: -------------------------------------------------------------------------------- 1 | """ Squeeze-and-Excitation Channel Attention 2 | 3 | An SE implementation originally based on PyTorch SE-Net impl. 4 | Has since evolved with additional functionality / configuration. 5 | 6 | Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507 7 | 8 | Also included is Effective Squeeze-Excitation (ESE). 9 | Paper: `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 10 | 11 | Hacked together by / Copyright 2021 Ross Wightman 12 | """ 13 | from torch import nn as nn 14 | 15 | from .create_act import create_act_layer 16 | from .helpers import make_divisible 17 | 18 | 19 | class SEModule(nn.Module): 20 | """ SE Module as defined in original SE-Nets with a few additions 21 | Additions include: 22 | * divisor can be specified to keep channels % div == 0 (default: 8) 23 | * reduction channels can be specified directly by arg (if rd_channels is set) 24 | * reduction channels can be specified by float rd_ratio (default: 1/16) 25 | * global max pooling can be added to the squeeze aggregation 26 | * customizable activation, normalization, and gate layer 27 | """ 28 | def __init__( 29 | self, channels, rd_ratio=1. / 16, rd_channels=None, rd_divisor=8, add_maxpool=False, 30 | act_layer=nn.ReLU, norm_layer=None, gate_layer='sigmoid'): 31 | super(SEModule, self).__init__() 32 | self.add_maxpool = add_maxpool 33 | if not rd_channels: 34 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 35 | self.fc1 = nn.Conv2d(channels, rd_channels, kernel_size=1, bias=True) 36 | self.bn = norm_layer(rd_channels) if norm_layer else nn.Identity() 37 | self.act = create_act_layer(act_layer, inplace=True) 38 | self.fc2 = nn.Conv2d(rd_channels, channels, kernel_size=1, bias=True) 39 | self.gate = create_act_layer(gate_layer) 40 | 41 | def forward(self, x): 42 | x_se = x.mean((2, 3), keepdim=True) 43 | if self.add_maxpool: 44 | # experimental codepath, may remove or change 45 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 46 | x_se = self.fc1(x_se) 47 | x_se = self.act(self.bn(x_se)) 48 | x_se = self.fc2(x_se) 49 | return x * self.gate(x_se) 50 | 51 | 52 | SqueezeExcite = SEModule # alias 53 | 54 | 55 | class EffectiveSEModule(nn.Module): 56 | """ 'Effective Squeeze-Excitation 57 | From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 58 | """ 59 | def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid', **_): 60 | super(EffectiveSEModule, self).__init__() 61 | self.add_maxpool = add_maxpool 62 | self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0) 63 | self.gate = create_act_layer(gate_layer) 64 | 65 | def forward(self, x): 66 | x_se = x.mean((2, 3), keepdim=True) 67 | if self.add_maxpool: 68 | # experimental codepath, may remove or change 69 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 70 | x_se = self.fc(x_se) 71 | return x * self.gate(x_se) 72 | 73 | 74 | EffectiveSqueezeExcite = EffectiveSEModule # alias 75 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/test_time_pool.py: -------------------------------------------------------------------------------- 1 | """ Test Time Pooling (Average-Max Pool) 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | import logging 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from .adaptive_avgmax_pool import adaptive_avgmax_pool2d 11 | 12 | 13 | _logger = logging.getLogger(__name__) 14 | 15 | 16 | class TestTimePoolHead(nn.Module): 17 | def __init__(self, base, original_pool=7): 18 | super(TestTimePoolHead, self).__init__() 19 | self.base = base 20 | self.original_pool = original_pool 21 | base_fc = self.base.get_classifier() 22 | if isinstance(base_fc, nn.Conv2d): 23 | self.fc = base_fc 24 | else: 25 | self.fc = nn.Conv2d( 26 | self.base.num_features, self.base.num_classes, kernel_size=1, bias=True) 27 | self.fc.weight.data.copy_(base_fc.weight.data.view(self.fc.weight.size())) 28 | self.fc.bias.data.copy_(base_fc.bias.data.view(self.fc.bias.size())) 29 | self.base.reset_classifier(0) # delete original fc layer 30 | 31 | def forward(self, x): 32 | x = self.base.forward_features(x) 33 | x = F.avg_pool2d(x, kernel_size=self.original_pool, stride=1) 34 | x = self.fc(x) 35 | x = adaptive_avgmax_pool2d(x, 1) 36 | return x.view(x.size(0), -1) 37 | 38 | 39 | def apply_test_time_pool(model, config, use_test_size=True): 40 | test_time_pool = False 41 | if not hasattr(model, 'default_cfg') or not model.default_cfg: 42 | return model, False 43 | if use_test_size and 'test_input_size' in model.default_cfg: 44 | df_input_size = model.default_cfg['test_input_size'] 45 | else: 46 | df_input_size = model.default_cfg['input_size'] 47 | if config['input_size'][-1] > df_input_size[-1] and config['input_size'][-2] > df_input_size[-2]: 48 | _logger.info('Target input size %s > pretrained default %s, using test time pooling' % 49 | (str(config['input_size'][-2:]), str(df_input_size[-2:]))) 50 | model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size']) 51 | test_time_pool = True 52 | return model, test_time_pool 53 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/trace_utils.py: -------------------------------------------------------------------------------- 1 | try: 2 | from torch import _assert 3 | except ImportError: 4 | def _assert(condition: bool, message: str): 5 | assert condition, message 6 | 7 | 8 | def _float_to_int(x: float) -> int: 9 | """ 10 | Symbolic tracing helper to substitute for inbuilt `int`. 11 | Hint: Inbuilt `int` can't accept an argument of type `Proxy` 12 | """ 13 | return int(x) 14 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/layers/weight_init.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | 5 | from torch.nn.init import _calculate_fan_in_and_fan_out 6 | 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 17 | "The distribution of values may be incorrect.", 18 | stacklevel=2) 19 | 20 | with torch.no_grad(): 21 | # Values are generated by using a truncated uniform distribution and 22 | # then using the inverse CDF for the normal distribution. 23 | # Get upper and lower cdf values 24 | l = norm_cdf((a - mean) / std) 25 | u = norm_cdf((b - mean) / std) 26 | 27 | # Uniformly fill tensor with values from [l, u], then translate to 28 | # [2l-1, 2u-1]. 29 | tensor.uniform_(2 * l - 1, 2 * u - 1) 30 | 31 | # Use inverse cdf transform for normal distribution to get truncated 32 | # standard normal 33 | tensor.erfinv_() 34 | 35 | # Transform to proper mean, std 36 | tensor.mul_(std * math.sqrt(2.)) 37 | tensor.add_(mean) 38 | 39 | # Clamp to ensure it's in the proper range 40 | tensor.clamp_(min=a, max=b) 41 | return tensor 42 | 43 | 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 45 | # type: (Tensor, float, float, float, float) -> Tensor 46 | r"""Fills the input Tensor with values drawn from a truncated 47 | normal distribution. The values are effectively drawn from the 48 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 49 | with values outside :math:`[a, b]` redrawn until they are within 50 | the bounds. The method used for generating the random values works 51 | best when :math:`a \leq \text{mean} \leq b`. 52 | Args: 53 | tensor: an n-dimensional `torch.Tensor` 54 | mean: the mean of the normal distribution 55 | std: the standard deviation of the normal distribution 56 | a: the minimum cutoff value 57 | b: the maximum cutoff value 58 | Examples: 59 | >>> w = torch.empty(3, 5) 60 | >>> nn.init.trunc_normal_(w) 61 | """ 62 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 63 | 64 | 65 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'): 66 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 67 | if mode == 'fan_in': 68 | denom = fan_in 69 | elif mode == 'fan_out': 70 | denom = fan_out 71 | elif mode == 'fan_avg': 72 | denom = (fan_in + fan_out) / 2 73 | 74 | variance = scale / denom 75 | 76 | if distribution == "truncated_normal": 77 | # constant is stddev of standard normal truncated to (-2, 2) 78 | trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978) 79 | elif distribution == "normal": 80 | tensor.normal_(std=math.sqrt(variance)) 81 | elif distribution == "uniform": 82 | bound = math.sqrt(3 * variance) 83 | tensor.uniform_(-bound, bound) 84 | else: 85 | raise ValueError(f"invalid distribution {distribution}") 86 | 87 | 88 | def lecun_normal_(tensor): 89 | variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal') 90 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/nestnet/visualizer.py: -------------------------------------------------------------------------------- 1 | from bytecode import Bytecode, Instr 2 | 3 | class get_local(object): 4 | cache = {} 5 | is_activate = False 6 | 7 | def __init__(self, varname): 8 | self.varname = varname 9 | 10 | def __call__(self, func): 11 | if not type(self).is_activate: 12 | return func 13 | 14 | type(self).cache[func.__qualname__] = [] 15 | c = Bytecode.from_code(func.__code__) 16 | extra_code = [ 17 | Instr('STORE_FAST', '_res'), 18 | Instr('LOAD_FAST', self.varname), 19 | Instr('STORE_FAST', '_value'), 20 | Instr('LOAD_FAST', '_res'), 21 | Instr('LOAD_FAST', '_value'), 22 | Instr('BUILD_TUPLE', 2), 23 | Instr('STORE_FAST', '_result_tuple'), 24 | Instr('LOAD_FAST', '_result_tuple'), 25 | ] 26 | c[-1:-1] = extra_code 27 | func.__code__ = c.to_code() 28 | 29 | def wrapper(*args, **kwargs): 30 | res, values = func(*args, **kwargs) 31 | type(self).cache[func.__qualname__].append(values.detach().cpu().numpy()) 32 | return res 33 | return wrapper 34 | 35 | @classmethod 36 | def clear(cls): 37 | for key in cls.cache.keys(): 38 | cls.cache[key] = [] 39 | 40 | @classmethod 41 | def activate(cls): 42 | cls.is_activate = True 43 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/se_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | # https://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.html 6 | 7 | class SEBlock(nn.Module): 8 | 9 | def __init__(self, input_channels, internal_neurons): 10 | super(SEBlock, self).__init__() 11 | self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) 12 | self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) 13 | self.input_channels = input_channels 14 | 15 | def forward(self, inputs): 16 | x = F.avg_pool2d(inputs, kernel_size=inputs.size(3)) 17 | x = self.down(x) 18 | x = F.relu(x) 19 | x = self.up(x) 20 | x = torch.sigmoid(x) 21 | x = x.view(-1, self.input_channels, 1, 1) 22 | return inputs * x -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/trainers/basetrainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import torch 4 | from src.utils import save 5 | 6 | 7 | class TrainerBase(): 8 | def __init__(self, args, model, criterion, optimizer, scheduler, device, dataloaders): 9 | self.args = args 10 | self.model = model 11 | self.best_model = copy.deepcopy(model.state_dict()) 12 | self.device = device 13 | self.criterion = criterion 14 | self.optimizer = optimizer 15 | self.dataloaders = dataloaders 16 | self.scheduler = scheduler 17 | self.earlyStop = args['early_stop'] 18 | 19 | self.saving_path = f"./savings/" 20 | 21 | 22 | #self.saving_path = f"./savings/" #by qlwei 23 | self.saving_path = f"/home/s114/qlwei/project/savings/" #by qlwei 24 | 25 | def make_stat(self, prev, curr): 26 | new_stats = [] 27 | for i in range(len(prev)): 28 | if curr[i] > prev[i]: 29 | new_stats.append(f'{curr[i]:.4f} \u2191') 30 | elif curr[i] < prev[i]: 31 | new_stats.append(f'{curr[i]:.4f} \u2193') 32 | else: 33 | new_stats.append(f'{curr[i]:.4f} -') 34 | return new_stats 35 | 36 | def get_saving_file_name(self): 37 | best_test_stats = self.all_test_stats[self.best_epoch - 1] 38 | 39 | name = f'{self.args["model"]}_{self.args["modalities"]}_' 40 | 41 | if self.args['loss'] == 'bce': 42 | name += f'Acc_{best_test_stats[0][-1]:.4f}_' 43 | name += f'F1_{best_test_stats[3][-1]:.4f}_' 44 | name += f'AUC_{best_test_stats[4][-1]:.4f}_' 45 | else: 46 | name += f'{best_test_stats[0]:.4f}_' 47 | name += f'{best_test_stats[1]:.4f}_' 48 | name += f'{best_test_stats[2]:.4f}_' 49 | name += f'{best_test_stats[3]:.4f}_' 50 | 51 | name += f'imginvl{self.args["img_interval"]}_' 52 | 53 | if self.args['model'] == 'mme2e_sparse': 54 | name += f'st_{self.args["sparse_threshold"]}_' 55 | 56 | name += f'seed{self.args["seed"]}' 57 | name += '.pt' 58 | 59 | return name 60 | 61 | def save_stats(self): 62 | stats = { 63 | 'args': self.args, 64 | 'train_stats': self.all_train_stats, 65 | 'valid_stats': self.all_valid_stats, 66 | 'test_stats': self.all_test_stats, 67 | 'best_valid_stats': self.best_valid_stats, 68 | 'best_epoch': self.best_epoch 69 | } 70 | 71 | save(stats, os.path.join(self.saving_path, 'stats', self.get_saving_file_name())) 72 | 73 | # csv_path = os.path.join(self.saving_path, 'csv', self.get_saving_file_name()).replace('.pt', '.csv') 74 | # dirname = os.path.dirname(csv_path) 75 | # if not os.path.exists(dirname): 76 | # os.makedirs(dirname) 77 | # with open(csv_path, 'w') as f: 78 | # for stat in self.all_test_stats[self.best_epoch - 1]: 79 | # for n in stat: 80 | # f.write(f'{n:.4f},') 81 | # f.write('\n') 82 | # f.write(str(self.args)) 83 | # f.write('\n') 84 | 85 | def save_model(self): 86 | torch.save(self.best_model, os.path.join(self.saving_path, 'models', self.get_saving_file_name())) 87 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/trainers/r_emotiontrainer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from tqdm import tqdm 3 | from src.trainers.basetrainer import TrainerBase 4 | from transformers import AlbertTokenizer 5 | import numpy as np 6 | 7 | class IemocapTrainer(TrainerBase): 8 | def __init__(self, args, model, criterion, optimizer, scheduler, device, dataloaders): 9 | super(IemocapTrainer, self).__init__(args, model, criterion, optimizer, scheduler, device, dataloaders) 10 | 11 | self.args = args 12 | self.text_max_len = args['text_max_len'] 13 | # self.tokenizer = AlbertTokenizer.from_pretrained(f'albert-{args["text_model_size"]}-v2') 14 | self.tokenizer = AlbertTokenizer.from_pretrained('./src/models/albert-base-v2') 15 | self.all_test_stats = [] 16 | annotations = dataloaders['test'].dataset.get_annotations() 17 | self.best_epoch = -1 18 | 19 | def test(self): 20 | test_stats = self.eval_one_epoch('test') 21 | 22 | def eval_one_epoch(self, phase='valid', thresholds=None): 23 | 24 | for m in self.model.modules(): 25 | if hasattr(m, 'switch_to_deploy'): 26 | m.switch_to_deploy() # turn to deploy every modules 27 | self.model.eval() 28 | dataloader = self.dataloaders[phase] 29 | 30 | data_size = 0 31 | total_logits = [] 32 | total_Y = [] 33 | pbar = tqdm(dataloader, desc=phase) 34 | 35 | for uttranceId, imgs, imgLens, specgrams, specgramLens, text, Y in pbar: 36 | text = self.tokenizer(text, return_tensors='pt', max_length=self.text_max_len, padding='max_length', truncation=True) 37 | 38 | # imgs = imgs.to(device=self.device) 39 | specgrams = specgrams.to(device=self.device) 40 | text = text.to(device=self.device) 41 | Y = Y.to(device=self.device) 42 | 43 | with torch.set_grad_enabled(False): 44 | logits = self.model(imgs, imgLens, specgrams, specgramLens, text) # (batch_size, num_classes) 45 | data_size += Y.size(0) 46 | 47 | total_logits.append(logits.cpu()) 48 | total_Y.append(Y.cpu()) 49 | 50 | 51 | total_logits = torch.cat(total_logits, dim=0) 52 | total_Y = torch.cat(total_Y, dim=0) 53 | preds=torch.sigmoid(total_logits) 54 | mean_preds=torch.mean(preds,dim=0) 55 | print('six emotional values for one video:'+'\n') 56 | print(mean_preds) 57 | 58 | with open("result.txt", 'a') as f: 59 | mean = np.array(mean_preds) 60 | for i in range(len(mean)): 61 | f.write(str(mean[i])) 62 | f.write('\n') 63 | print("write txt finish!")## save result.txt 64 | 65 | return total_logits, total_Y 66 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/transformer_encoder.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Optional, List 3 | import torch 4 | from torch import nn 5 | from src.utils import padTensor 6 | 7 | class WrappedTransformerEncoder(nn.Module): 8 | def __init__(self, dim, num_layers, num_heads): 9 | super(WrappedTransformerEncoder, self).__init__() 10 | self.dim = dim 11 | encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=num_heads) 12 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers) 13 | self.cls_emb = nn.Embedding(num_embeddings=1, embedding_dim=dim) 14 | 15 | def prepend_cls(self, inputs): 16 | index = torch.LongTensor([0]).to(device=inputs.device) 17 | cls_emb = self.cls_emb(index) 18 | cls_emb = cls_emb.expand(inputs.size(0), 1, self.dim) 19 | outputs = torch.cat((cls_emb, inputs), dim=1) 20 | return outputs 21 | 22 | def forward(self, inputs: torch.Tensor, lens: Optional[List[int]] = None, get_cls: Optional[bool] = False): 23 | if lens is not None: 24 | max_len = max(lens) 25 | 26 | mask = [([False] * (l + int(get_cls)) + [True] * (max_len - l)) for l in lens] 27 | mask = torch.tensor(mask).to(device=inputs.device) 28 | 29 | inputs = list(inputs.split(lens, dim=0)) 30 | inputs = [padTensor(inp, max_len) for inp in inputs] 31 | inputs = torch.stack(inputs, dim=0) 32 | else: 33 | mask = None 34 | 35 | if get_cls: 36 | inputs = self.prepend_cls(inputs) 37 | 38 | inputs = inputs.permute(1, 0, 2) 39 | # inputs = self.pos_encoder(inputs) 40 | inputs = self.encoder(src=inputs, src_key_padding_mask=mask) # (seq_len, bs, dim) 41 | 42 | if get_cls: 43 | return inputs[0] 44 | 45 | return inputs[1:].permute(1, 0, 2) 46 | 47 | -------------------------------------------------------------------------------- /FV2ES/V2EM_prediction/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import torch 4 | import numpy as np 5 | from PIL import Image 6 | from torchvision import transforms 7 | 8 | def save(toBeSaved, filename, mode='wb'): 9 | dirname = os.path.dirname(filename) 10 | if not os.path.exists(dirname): 11 | os.makedirs(dirname) 12 | file = open(filename, mode) 13 | pickle.dump(toBeSaved, file, protocol=4) 14 | file.close() 15 | 16 | def load(filename, mode='rb'): 17 | file = open(filename, mode) 18 | loaded = pickle.load(file) 19 | file.close() 20 | return loaded 21 | 22 | # For python2 23 | def load2(path): 24 | with open(path, 'rb') as f: 25 | u = pickle._Unpickler(f) 26 | u.encoding = 'latin1' 27 | p = u.load() 28 | return p 29 | 30 | def pad_sents(sents, pad_token): 31 | sents_padded = [] 32 | lens = get_lens(sents) 33 | max_len = max(lens) 34 | sents_padded = [sents[i] + [pad_token] * (max_len - l) for i, l in enumerate(lens)] 35 | return sents_padded, lens 36 | 37 | def sort_sents(sents, reverse=True): 38 | sents.sort(key=(lambda s: len(s)), reverse=reverse) 39 | return sents 40 | 41 | def get_mask(sents, unmask_idx=1, mask_idx=0): 42 | lens = get_lens(sents) 43 | max_len = max(lens) 44 | mask = [([unmask_idx] * l + [mask_idx] * (max_len - l)) for l in lens] 45 | return mask 46 | 47 | def get_lens(sents): 48 | return [len(sent) for sent in sents] 49 | 50 | def get_max_len(sents): 51 | max_len = max([len(sent) for sent in sents]) 52 | return max_len 53 | 54 | def truncate_sents(sents, length): 55 | sents = [sent[:length] for sent in sents] 56 | return sents 57 | 58 | def get_loss_weight(labels, label_order): 59 | nums = [np.sum(labels == lo) for lo in label_order] 60 | loss_weight = torch.tensor([n / len(labels) for n in nums]) 61 | return loss_weight 62 | 63 | def capitalize_first_letter(data): 64 | return [word.capitalize() for word in data] 65 | 66 | def cmumosei_round(a): 67 | if a < -2: 68 | res = -3 69 | if -2 <= a and a < -1: 70 | res = -2 71 | if -1 <= a and a < 0: 72 | res = -1 73 | if 0 <= a and a <= 0: 74 | res = 0 75 | if 0 < a and a <= 1: 76 | res = 1 77 | if 1 < a and a <= 2: 78 | res = 2 79 | if a > 2: 80 | res = 3 81 | return res 82 | 83 | # From MTCNN 84 | def fixed_image_standardization(image_tensor: torch.tensor) -> torch.tensor: 85 | processed_tensor = (image_tensor - 127.5) / 128.0 86 | return processed_tensor 87 | 88 | def padTensor(t: torch.tensor, targetLen: int) -> torch.tensor: 89 | oriLen, dim = t.size() 90 | return torch.cat((t, torch.zeros(targetLen - oriLen, dim).to(t.device)), dim=0) 91 | 92 | def calc_percent(x: torch.tensor): 93 | total = np.prod(np.array(x.size())) 94 | positive = x.sum().item() 95 | return positive / total 96 | -------------------------------------------------------------------------------- /V2EM/src/model/attention_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | from typing import List 5 | 6 | 7 | class CrossModalAttentionLayer(nn.Module): 8 | # y attends x 9 | def __init__(self, k, x_channels: int, y_size: int, spatial=True): 10 | super(CrossModalAttentionLayer, self).__init__() 11 | self.k = k 12 | self.spatial = spatial 13 | 14 | if spatial: 15 | self.channel_affine = nn.Linear(x_channels, k) 16 | 17 | self.y_affine = nn.Linear(y_size, k, bias=False) 18 | self.attn_weight_affine = nn.Linear(k, 1) 19 | 20 | def forward(self, x: List[torch.Tensor], x_lens: List[int], y: torch.Tensor): 21 | # x -> [(S, C, H, W)], len(x) = bs 22 | # y -> (bs, D) 23 | 24 | bs = y.size(0) 25 | x = x.split(x_lens, dim=0) 26 | y_k = self.y_affine(y) # (bs, k) 27 | 28 | all_spatial_attn_weights_softmax = [] 29 | 30 | for i in range(bs): 31 | if self.spatial: 32 | x_tensor = x[i].permute(0, 2, 3, 1) # (S_v, H_v, W_v, C_v) 33 | x_k = self.channel_affine(x_tensor) # (S_v, H_v, W_v, k) 34 | x_k += y_k[i] 35 | x_k = torch.tanh(x_k) 36 | x_attn_weights = self.attn_weight_affine(x_k).squeeze(-1) # (S_v, H_v, W_v) 37 | 38 | all_spatial_attn_weights_softmax.append( 39 | F.softmax( 40 | x_attn_weights.reshape(x_tensor.size(0), -1), 41 | dim=-1 42 | ).reshape(x_tensor.size(0), x_tensor.size(1), x_tensor.size(2)) # (S_v, H_v, W_v) 43 | ) 44 | 45 | return torch.cat(all_spatial_attn_weights_softmax, dim=0) 46 | 47 | class SparseCrossModalAttentionLayer(nn.Module): 48 | def __init__(self, k: int, x_channels: int, y_size: int, sparse_threshold: float): 49 | super(SparseCrossModalAttentionLayer, self).__init__() 50 | self.k = k 51 | self.sparse_threshold = sparse_threshold 52 | self.channel_affine = nn.Linear(x_channels, k) 53 | self.y_affine = nn.Linear(y_size, k, bias=False) 54 | self.attn_weight_affine = nn.Linear(k, 1) 55 | 56 | def forward(self, x: List[torch.Tensor], x_lens: List[int], locations: List[torch.Tensor], y: torch.Tensor): 57 | # x -> (N, C) 58 | # locations -> (N, 3) 59 | # y -> (bs, D) 60 | bs = y.size(0) 61 | y_k = self.y_affine(y) # (bs, k) 62 | x_k = self.channel_affine(x) # (N, k) 63 | 64 | sample_points_lens = [] 65 | for i in range(sum(x_lens)): 66 | sample_points_lens.append(len(locations[locations[:, 2] == i])) 67 | 68 | # how much points are left in each batch 69 | batch_points_lens = [] 70 | pointer = 0 71 | for l in x_lens: 72 | batch_points_lens.append(sum(sample_points_lens[pointer:(pointer + l)])) 73 | pointer += l 74 | 75 | x_ks = x_k.split(batch_points_lens, dim=0) 76 | 77 | attn_weights = [] 78 | for i in range(bs): 79 | this_weights = self.attn_weight_affine(torch.tanh(x_ks[i] + y_k[i])).squeeze(-1) 80 | attn_weights.append(this_weights) 81 | 82 | attn_weights = torch.cat(attn_weights, dim=0) 83 | attn_weights_split = list(attn_weights.split(sample_points_lens, dim=0)) 84 | attn_weights_split = [F.softmax(a, dim=-1) for a in attn_weights_split] 85 | attn_weights = torch.cat(attn_weights_split, dim=0) 86 | 87 | attn_weights_sparse = to_sparse_by_cdf(attn_weights, sample_points_lens, self.sparse_threshold) 88 | 89 | select_indices = attn_weights_sparse == 1 90 | new_x = x[select_indices, :] 91 | new_locations = locations[select_indices, :] 92 | 93 | return new_x, new_locations, None 94 | 95 | def to_sparse_by_cdf(t: torch.tensor, lens, cdf: float): 96 | _t = t.clone().detach() 97 | _t = list(_t.split(lens, dim=0)) 98 | 99 | for i, this_t in enumerate(_t): 100 | this_t_sorted, indices = torch.sort(this_t, descending=True) 101 | mask = torch.cumsum(this_t_sorted, dim=-1) < cdf 102 | mask[torch.sum(mask)] = True 103 | _t[i][indices[mask]] = 1 104 | _t[i][indices[~mask]] = 0 105 | 106 | return torch.cat(_t, dim=0).long() 107 | -------------------------------------------------------------------------------- /V2EM/src/model/baselines/lf_rnn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | # from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence 5 | from typing import List 6 | from src.utils import padTensor 7 | 8 | pad_token_id = 0 9 | unk_token_id = 1 10 | 11 | class LF_RNN(nn.Module): 12 | def __init__(self, args, num_layers=1, dropout=0.1, bi=True): 13 | super(LF_RNN, self).__init__() 14 | feature_sizes = args['hfc_sizes'] 15 | num_classes = args['num_emotions'] 16 | self.mods = args['modalities'] 17 | 18 | feature_sizes = np.array(feature_sizes) 19 | 20 | self.rnns = nn.ModuleDict({ 21 | 't': nn.LSTM( 22 | input_size=feature_sizes[0], 23 | hidden_size=feature_sizes[0], 24 | num_layers=num_layers, 25 | dropout=(dropout if num_layers > 1 else 0), 26 | bidirectional=bi 27 | ), 28 | 'a': nn.LSTM( 29 | input_size=feature_sizes[1], 30 | hidden_size=feature_sizes[1], 31 | num_layers=num_layers, 32 | dropout=(dropout if num_layers > 1 else 0), 33 | bidirectional=bi 34 | ), 35 | 'v': nn.LSTM( 36 | input_size=feature_sizes[2], 37 | hidden_size=feature_sizes[2], 38 | num_layers=num_layers, 39 | dropout=(dropout if num_layers > 1 else 0), 40 | bidirectional=bi 41 | ) 42 | }) 43 | 44 | linear_in_sizes = feature_sizes if not bi else feature_sizes * 2 45 | 46 | self.affines = nn.ModuleDict({ 47 | 't': nn.Sequential( 48 | nn.Linear(linear_in_sizes[0], linear_in_sizes[0] // 2), 49 | nn.ReLU(), 50 | nn.Linear(linear_in_sizes[0] // 2, num_classes) 51 | ), 52 | 'a': nn.Sequential( 53 | nn.Linear(linear_in_sizes[1], linear_in_sizes[1] // 2), 54 | nn.ReLU(), 55 | nn.Linear(linear_in_sizes[1] // 2, num_classes) 56 | ), 57 | 'v': nn.Sequential( 58 | nn.Linear(linear_in_sizes[2], linear_in_sizes[2] // 2), 59 | nn.ReLU(), 60 | nn.Linear(linear_in_sizes[2] // 2, num_classes) 61 | ) 62 | }) 63 | 64 | self.weighted_fusion = nn.Linear(len(self.mods), 1, bias=False) 65 | 66 | def forward(self, img_features, img_features_lens, audio_features, audio_features_lens, texts): 67 | all_logits = [] 68 | 69 | if 't' in self.mods: 70 | output_t, _ = self.rnns['t'](texts.transpose(0, 1)) 71 | output_t = output_t[-1, :, :] 72 | output_t = self.affines['t'](output_t) 73 | all_logits.append(output_t) 74 | 75 | if 'a' in self.mods: 76 | max_len = max(audio_features_lens) 77 | audio_features = audio_features.split(audio_features_lens, dim=0) 78 | audio_features = [padTensor(s, max_len) for s in audio_features] 79 | audio_features = torch.stack(audio_features, dim=1) # (seq_len, batch, dim) 80 | _, (audio_hn, _) = self.rnns['a'](audio_features) 81 | audio_hn = audio_hn.transpose(0, 1).flatten(start_dim=1) # (batch, hid_dim * 2) 82 | audio_hn = self.affines['a'](audio_hn) 83 | all_logits.append(audio_hn) 84 | 85 | if 'v' in self.mods: 86 | max_len = max(img_features_lens) 87 | img_features = img_features.split(img_features_lens, dim=0) 88 | img_features = [padTensor(s, max_len) for s in img_features] 89 | img_features = torch.stack(img_features, dim=1) # (seq_len, batch, dim) 90 | _, (img_hn, _) = self.rnns['v'](img_features) 91 | img_hn = img_hn.transpose(0, 1).flatten(start_dim=1) # (batch, hid_dim * 2) 92 | img_hn = self.affines['v'](img_hn) 93 | all_logits.append(img_hn) 94 | 95 | if len(self.mods) == 1: 96 | return all_logits[0] 97 | 98 | return self.weighted_fusion(torch.stack(all_logits, dim=-1)).squeeze(-1) 99 | -------------------------------------------------------------------------------- /V2EM/src/model/baselines/lf_transformer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | from typing import List 5 | from src.models.transformer_encoder import WrappedTransformerEncoder 6 | 7 | 8 | class LF_Transformer(nn.Module): 9 | def __init__(self, args): 10 | super(LF_Transformer, self).__init__() 11 | num_classes = args['num_emotions'] 12 | self.mods = args['modalities'] 13 | feature_sizes = np.array(args['hfc_sizes']) 14 | nlayers = args['trans_nlayers'] 15 | # nheads = args['trans_nheads'] 16 | # trans_dim = args['trans_dim'] 17 | 18 | self.transformers = nn.ModuleDict({ 19 | 't': WrappedTransformerEncoder( 20 | dim=feature_sizes[0], # 300 21 | num_layers=nlayers, 22 | num_heads=4 23 | ), 24 | 'a': WrappedTransformerEncoder( 25 | dim=feature_sizes[1], # 2 empty features are added to make it 144, easy to be divided by #heads 26 | num_layers=nlayers, 27 | num_heads=2 28 | ), 29 | 'v': WrappedTransformerEncoder( 30 | dim=feature_sizes[2], # 35 31 | num_layers=nlayers, 32 | num_heads=5 33 | ) 34 | }) 35 | 36 | self.affines = nn.ModuleDict({ 37 | 't': nn.Sequential( 38 | nn.Linear(feature_sizes[0], feature_sizes[0] // 2), 39 | nn.ReLU(), 40 | nn.Linear(feature_sizes[0] // 2, num_classes) 41 | ), 42 | 'a': nn.Sequential( 43 | nn.Linear(feature_sizes[1], feature_sizes[1] // 2), 44 | nn.ReLU(), 45 | nn.Linear(feature_sizes[1] // 2, num_classes) 46 | ), 47 | 'v': nn.Sequential( 48 | nn.Linear(feature_sizes[2], feature_sizes[2] // 2), 49 | nn.ReLU(), 50 | nn.Linear(feature_sizes[2] // 2, num_classes) 51 | ) 52 | }) 53 | 54 | self.weighted_fusion = nn.Linear(len(self.mods), 1, bias=False) 55 | 56 | def forward(self, img_features, img_features_lens, audio_features, audio_features_lens, texts): 57 | all_logits = [] 58 | 59 | if 't' in self.mods: 60 | texts = self.transformers['t'](texts, get_cls=True) 61 | texts = self.affines['t'](texts) 62 | all_logits.append(texts) 63 | 64 | if 'a' in self.mods: 65 | audio_features = self.transformers['a'](audio_features, audio_features_lens, get_cls=True) 66 | audio_features = self.affines['a'](audio_features) 67 | all_logits.append(audio_features) 68 | 69 | if 'v' in self.mods: 70 | img_features = self.transformers['v'](img_features, img_features_lens, get_cls=True) 71 | img_features = self.affines['v'](img_features) 72 | all_logits.append(img_features) 73 | 74 | if len(self.mods) == 1: 75 | return all_logits[0] 76 | 77 | return self.weighted_fusion(torch.stack(all_logits, dim=-1)).squeeze(-1) 78 | -------------------------------------------------------------------------------- /V2EM/src/model/e2e_t.py: -------------------------------------------------------------------------------- 1 | from torch import nn 2 | from transformers import AlbertModel 3 | 4 | class MME2E_T(nn.Module): 5 | def __init__(self, feature_dim, num_classes=4, size='base'): 6 | super(MME2E_T, self).__init__() 7 | self.albert = AlbertModel.from_pretrained(f'albert-{size}-v2') 8 | # self.albert = AlbertModel.from_pretrained('./src/models/albert-base-v2') 9 | 10 | 11 | def forward(self, text, get_cls=False): 12 | last_hidden_state = self.albert(**text).last_hidden_state 13 | # print(last_hidden_state) 14 | if get_cls: 15 | cls_feature = last_hidden_state[:,0] 16 | return cls_feature 17 | 18 | text_features = self.text_feature_affine(last_hidden_state).sum(1) 19 | return text_features 20 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/fx_features.py: -------------------------------------------------------------------------------- 1 | """ PyTorch FX Based Feature Extraction Helpers 2 | Using https://pytorch.org/vision/stable/feature_extraction.html 3 | """ 4 | from typing import Callable 5 | from torch import nn 6 | 7 | from .features import _get_feature_info 8 | 9 | try: 10 | from torchvision.models.feature_extraction import create_feature_extractor 11 | has_fx_feature_extraction = True 12 | except ImportError: 13 | has_fx_feature_extraction = False 14 | 15 | # Layers we went to treat as leaf modules 16 | from .layers import Conv2dSame, ScaledStdConv2dSame, BatchNormAct2d, BlurPool2d, CondConv2d, StdConv2dSame, DropPath 17 | from .layers.non_local_attn import BilinearAttnTransform 18 | from .layers.pool2d_same import MaxPool2dSame, AvgPool2dSame 19 | 20 | # NOTE: By default, any modules from timm.models.layers that we want to treat as leaf modules go here 21 | # BUT modules from timm.models should use the registration mechanism below 22 | _leaf_modules = { 23 | BatchNormAct2d, # reason: flow control for jit scripting 24 | BilinearAttnTransform, # reason: flow control t <= 1 25 | BlurPool2d, # reason: TypeError: F.conv2d received Proxy in groups=x.shape[1] 26 | # Reason: get_same_padding has a max which raises a control flow error 27 | Conv2dSame, MaxPool2dSame, ScaledStdConv2dSame, StdConv2dSame, AvgPool2dSame, 28 | CondConv2d, # reason: TypeError: F.conv2d received Proxy in groups=self.groups * B (because B = x.shape[0]) 29 | DropPath, # reason: TypeError: rand recieved Proxy in `size` argument 30 | } 31 | 32 | try: 33 | from .layers import InplaceAbn 34 | _leaf_modules.add(InplaceAbn) 35 | except ImportError: 36 | pass 37 | 38 | 39 | def register_notrace_module(module: nn.Module): 40 | """ 41 | Any module not under timm.models.layers should get this decorator if we don't want to trace through it. 42 | """ 43 | _leaf_modules.add(module) 44 | return module 45 | 46 | 47 | # Functions we want to autowrap (treat them as leaves) 48 | _autowrap_functions = set() 49 | 50 | 51 | def register_notrace_function(func: Callable): 52 | """ 53 | Decorator for functions which ought not to be traced through 54 | """ 55 | _autowrap_functions.add(func) 56 | return func 57 | 58 | 59 | class FeatureGraphNet(nn.Module): 60 | def __init__(self, model, out_indices, out_map=None): 61 | super().__init__() 62 | assert has_fx_feature_extraction, 'Please update to PyTorch 1.10+, torchvision 0.11+ for FX feature extraction' 63 | self.feature_info = _get_feature_info(model, out_indices) 64 | if out_map is not None: 65 | assert len(out_map) == len(out_indices) 66 | return_nodes = {info['module']: out_map[i] if out_map is not None else info['module'] 67 | for i, info in enumerate(self.feature_info) if i in out_indices} 68 | self.graph_module = create_feature_extractor( 69 | model, return_nodes, 70 | tracer_kwargs={'leaf_modules': list(_leaf_modules), 'autowrap_functions': list(_autowrap_functions)}) 71 | 72 | def forward(self, x): 73 | return list(self.graph_module(x).values()) -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/__init__.py: -------------------------------------------------------------------------------- 1 | from .activations import * 2 | from .adaptive_avgmax_pool import \ 3 | adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d 4 | from .blur_pool import BlurPool2d 5 | from .classifier import ClassifierHead, create_classifier 6 | from .cond_conv2d import CondConv2d, get_condconv_initializer 7 | from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\ 8 | set_layer_config 9 | from .conv2d_same import Conv2dSame, conv2d_same 10 | from .conv_bn_act import ConvBnAct 11 | from .create_act import create_act_layer, get_act_layer, get_act_fn 12 | from .create_attn import get_attn, create_attn 13 | from .create_conv2d import create_conv2d 14 | from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act 15 | from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path 16 | from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn 17 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d 18 | from .gather_excite import GatherExcite 19 | from .global_context import GlobalContext 20 | from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible 21 | from .inplace_abn import InplaceAbn 22 | from .linear import Linear 23 | from .mixed_conv2d import MixedConv2d 24 | from .mlp import Mlp, GluMlp, GatedMlp, ConvMlp 25 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn 26 | from .norm import GroupNorm, LayerNorm2d 27 | from .norm_act import BatchNormAct2d, GroupNormAct 28 | from .padding import get_padding, get_same_padding, pad_same 29 | from .patch_embed import PatchEmbed 30 | from .pool2d_same import AvgPool2dSame, create_pool2d 31 | from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite 32 | from .selective_kernel import SelectiveKernel 33 | from .separable_conv import SeparableConv2d, SeparableConvBnAct 34 | from .space_to_depth import SpaceToDepthModule 35 | from .split_attn import SplitAttn 36 | from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model 37 | from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame 38 | from .test_time_pool import TestTimePoolHead, apply_test_time_pool 39 | from .trace_utils import _assert, _float_to_int 40 | from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_ 41 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/activations_jit.py: -------------------------------------------------------------------------------- 1 | """ Activations 2 | 3 | A collection of jit-scripted activations fn and modules with a common interface so that they can 4 | easily be swapped. All have an `inplace` arg even if not used. 5 | 6 | All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not 7 | currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted 8 | versions if they contain in-place ops. 9 | 10 | Hacked together by / Copyright 2020 Ross Wightman 11 | """ 12 | 13 | import torch 14 | from torch import nn as nn 15 | from torch.nn import functional as F 16 | 17 | 18 | @torch.jit.script 19 | def swish_jit(x, inplace: bool = False): 20 | """Swish - Described in: https://arxiv.org/abs/1710.05941 21 | """ 22 | return x.mul(x.sigmoid()) 23 | 24 | 25 | @torch.jit.script 26 | def mish_jit(x, _inplace: bool = False): 27 | """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681 28 | """ 29 | return x.mul(F.softplus(x).tanh()) 30 | 31 | 32 | class SwishJit(nn.Module): 33 | def __init__(self, inplace: bool = False): 34 | super(SwishJit, self).__init__() 35 | 36 | def forward(self, x): 37 | return swish_jit(x) 38 | 39 | 40 | class MishJit(nn.Module): 41 | def __init__(self, inplace: bool = False): 42 | super(MishJit, self).__init__() 43 | 44 | def forward(self, x): 45 | return mish_jit(x) 46 | 47 | 48 | @torch.jit.script 49 | def hard_sigmoid_jit(x, inplace: bool = False): 50 | # return F.relu6(x + 3.) / 6. 51 | return (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 52 | 53 | 54 | class HardSigmoidJit(nn.Module): 55 | def __init__(self, inplace: bool = False): 56 | super(HardSigmoidJit, self).__init__() 57 | 58 | def forward(self, x): 59 | return hard_sigmoid_jit(x) 60 | 61 | 62 | @torch.jit.script 63 | def hard_swish_jit(x, inplace: bool = False): 64 | # return x * (F.relu6(x + 3.) / 6) 65 | return x * (x + 3).clamp(min=0, max=6).div(6.) # clamp seems ever so slightly faster? 66 | 67 | 68 | class HardSwishJit(nn.Module): 69 | def __init__(self, inplace: bool = False): 70 | super(HardSwishJit, self).__init__() 71 | 72 | def forward(self, x): 73 | return hard_swish_jit(x) 74 | 75 | 76 | @torch.jit.script 77 | def hard_mish_jit(x, inplace: bool = False): 78 | """ Hard Mish 79 | Experimental, based on notes by Mish author Diganta Misra at 80 | https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md 81 | """ 82 | return 0.5 * x * (x + 2).clamp(min=0, max=2) 83 | 84 | 85 | class HardMishJit(nn.Module): 86 | def __init__(self, inplace: bool = False): 87 | super(HardMishJit, self).__init__() 88 | 89 | def forward(self, x): 90 | return hard_mish_jit(x) 91 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/blur_pool.py: -------------------------------------------------------------------------------- 1 | """ 2 | BlurPool layer inspired by 3 | - Kornia's Max_BlurPool2d 4 | - Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar` 5 | 6 | Hacked together by Chris Ha and Ross Wightman 7 | """ 8 | 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | import numpy as np 13 | from .padding import get_padding 14 | 15 | 16 | class BlurPool2d(nn.Module): 17 | r"""Creates a module that computes blurs and downsample a given feature map. 18 | See :cite:`zhang2019shiftinvar` for more details. 19 | Corresponds to the Downsample class, which does blurring and subsampling 20 | 21 | Args: 22 | channels = Number of input channels 23 | filt_size (int): binomial filter size for blurring. currently supports 3 (default) and 5. 24 | stride (int): downsampling filter stride 25 | 26 | Returns: 27 | torch.Tensor: the transformed tensor. 28 | """ 29 | def __init__(self, channels, filt_size=3, stride=2) -> None: 30 | super(BlurPool2d, self).__init__() 31 | assert filt_size > 1 32 | self.channels = channels 33 | self.filt_size = filt_size 34 | self.stride = stride 35 | self.padding = [get_padding(filt_size, stride, dilation=1)] * 4 36 | coeffs = torch.tensor((np.poly1d((0.5, 0.5)) ** (self.filt_size - 1)).coeffs.astype(np.float32)) 37 | blur_filter = (coeffs[:, None] * coeffs[None, :])[None, None, :, :].repeat(self.channels, 1, 1, 1) 38 | self.register_buffer('filt', blur_filter, persistent=False) 39 | 40 | def forward(self, x: torch.Tensor) -> torch.Tensor: 41 | x = F.pad(x, self.padding, 'reflect') 42 | return F.conv2d(x, self.filt, stride=self.stride, groups=x.shape[1]) 43 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/classifier.py: -------------------------------------------------------------------------------- 1 | """ Classifier head and layer factory 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | from torch.nn import functional as F 7 | 8 | from .adaptive_avgmax_pool import SelectAdaptivePool2d 9 | 10 | 11 | def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False): 12 | flatten_in_pool = not use_conv # flatten when we use a Linear layer after pooling 13 | if not pool_type: 14 | assert num_classes == 0 or use_conv,\ 15 | 'Pooling can only be disabled if classifier is also removed or conv classifier is used' 16 | flatten_in_pool = False # disable flattening if pooling is pass-through (no pooling) 17 | global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool) 18 | num_pooled_features = num_features * global_pool.feat_mult() 19 | return global_pool, num_pooled_features 20 | 21 | 22 | def _create_fc(num_features, num_classes, use_conv=False): 23 | if num_classes <= 0: 24 | fc = nn.Identity() # pass-through (no classifier) 25 | elif use_conv: 26 | fc = nn.Conv2d(num_features, num_classes, 1, bias=True) 27 | else: 28 | fc = nn.Linear(num_features, num_classes, bias=True) 29 | return fc 30 | 31 | 32 | def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False): 33 | global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv) 34 | fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) 35 | return global_pool, fc 36 | 37 | 38 | class ClassifierHead(nn.Module): 39 | """Classifier head w/ configurable global pooling and dropout.""" 40 | 41 | def __init__(self, in_chs, num_classes, pool_type='avg', drop_rate=0., use_conv=False): 42 | super(ClassifierHead, self).__init__() 43 | self.drop_rate = drop_rate 44 | self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv) 45 | self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv) 46 | self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity() 47 | 48 | def forward(self, x): 49 | x = self.global_pool(x) 50 | if self.drop_rate: 51 | x = F.dropout(x, p=float(self.drop_rate), training=self.training) 52 | x = self.fc(x) 53 | x = self.flatten(x) 54 | return x 55 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/config.py: -------------------------------------------------------------------------------- 1 | """ Model / Layer Config singleton state 2 | """ 3 | from typing import Any, Optional 4 | 5 | __all__ = [ 6 | 'is_exportable', 'is_scriptable', 'is_no_jit', 7 | 'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config' 8 | ] 9 | 10 | # Set to True if prefer to have layers with no jit optimization (includes activations) 11 | _NO_JIT = False 12 | 13 | # Set to True if prefer to have activation layers with no jit optimization 14 | # NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying 15 | # the jit flags so far are activations. This will change as more layers are updated and/or added. 16 | _NO_ACTIVATION_JIT = False 17 | 18 | # Set to True if exporting a model with Same padding via ONNX 19 | _EXPORTABLE = False 20 | 21 | # Set to True if wanting to use torch.jit.script on a model 22 | _SCRIPTABLE = False 23 | 24 | 25 | def is_no_jit(): 26 | return _NO_JIT 27 | 28 | 29 | class set_no_jit: 30 | def __init__(self, mode: bool) -> None: 31 | global _NO_JIT 32 | self.prev = _NO_JIT 33 | _NO_JIT = mode 34 | 35 | def __enter__(self) -> None: 36 | pass 37 | 38 | def __exit__(self, *args: Any) -> bool: 39 | global _NO_JIT 40 | _NO_JIT = self.prev 41 | return False 42 | 43 | 44 | def is_exportable(): 45 | return _EXPORTABLE 46 | 47 | 48 | class set_exportable: 49 | def __init__(self, mode: bool) -> None: 50 | global _EXPORTABLE 51 | self.prev = _EXPORTABLE 52 | _EXPORTABLE = mode 53 | 54 | def __enter__(self) -> None: 55 | pass 56 | 57 | def __exit__(self, *args: Any) -> bool: 58 | global _EXPORTABLE 59 | _EXPORTABLE = self.prev 60 | return False 61 | 62 | 63 | def is_scriptable(): 64 | return _SCRIPTABLE 65 | 66 | 67 | class set_scriptable: 68 | def __init__(self, mode: bool) -> None: 69 | global _SCRIPTABLE 70 | self.prev = _SCRIPTABLE 71 | _SCRIPTABLE = mode 72 | 73 | def __enter__(self) -> None: 74 | pass 75 | 76 | def __exit__(self, *args: Any) -> bool: 77 | global _SCRIPTABLE 78 | _SCRIPTABLE = self.prev 79 | return False 80 | 81 | 82 | class set_layer_config: 83 | """ Layer config context manager that allows setting all layer config flags at once. 84 | If a flag arg is None, it will not change the current value. 85 | """ 86 | def __init__( 87 | self, 88 | scriptable: Optional[bool] = None, 89 | exportable: Optional[bool] = None, 90 | no_jit: Optional[bool] = None, 91 | no_activation_jit: Optional[bool] = None): 92 | global _SCRIPTABLE 93 | global _EXPORTABLE 94 | global _NO_JIT 95 | global _NO_ACTIVATION_JIT 96 | self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT 97 | if scriptable is not None: 98 | _SCRIPTABLE = scriptable 99 | if exportable is not None: 100 | _EXPORTABLE = exportable 101 | if no_jit is not None: 102 | _NO_JIT = no_jit 103 | if no_activation_jit is not None: 104 | _NO_ACTIVATION_JIT = no_activation_jit 105 | 106 | def __enter__(self) -> None: 107 | pass 108 | 109 | def __exit__(self, *args: Any) -> bool: 110 | global _SCRIPTABLE 111 | global _EXPORTABLE 112 | global _NO_JIT 113 | global _NO_ACTIVATION_JIT 114 | _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev 115 | return False 116 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/conv2d_same.py: -------------------------------------------------------------------------------- 1 | """ Conv2d w/ Same Padding 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from typing import Tuple, Optional 9 | 10 | from .padding import pad_same, get_padding_value 11 | 12 | 13 | def conv2d_same( 14 | x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1), 15 | padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1): 16 | x = pad_same(x, weight.shape[-2:], stride, dilation) 17 | return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups) 18 | 19 | 20 | class Conv2dSame(nn.Conv2d): 21 | """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions 22 | """ 23 | 24 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, 25 | padding=0, dilation=1, groups=1, bias=True): 26 | super(Conv2dSame, self).__init__( 27 | in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias) 28 | 29 | def forward(self, x): 30 | return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) 31 | 32 | 33 | def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): 34 | padding = kwargs.pop('padding', '') 35 | kwargs.setdefault('bias', False) 36 | padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs) 37 | if is_dynamic: 38 | return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs) 39 | else: 40 | return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs) 41 | 42 | 43 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/conv_bn_act.py: -------------------------------------------------------------------------------- 1 | """ Conv2d + BN + Act 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from torch import nn as nn 6 | 7 | from .create_conv2d import create_conv2d 8 | from .create_norm_act import convert_norm_act 9 | 10 | 11 | class ConvBnAct(nn.Module): 12 | def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1, 13 | bias=False, apply_act=True, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, aa_layer=None, 14 | drop_block=None): 15 | super(ConvBnAct, self).__init__() 16 | use_aa = aa_layer is not None 17 | 18 | self.conv = create_conv2d( 19 | in_channels, out_channels, kernel_size, stride=1 if use_aa else stride, 20 | padding=padding, dilation=dilation, groups=groups, bias=bias) 21 | 22 | # NOTE for backwards compatibility with models that use separate norm and act layer definitions 23 | norm_act_layer = convert_norm_act(norm_layer, act_layer) 24 | self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) 25 | self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None 26 | 27 | @property 28 | def in_channels(self): 29 | return self.conv.in_channels 30 | 31 | @property 32 | def out_channels(self): 33 | return self.conv.out_channels 34 | 35 | def forward(self, x): 36 | x = self.conv(x) 37 | x = self.bn(x) 38 | if self.aa is not None: 39 | x = self.aa(x) 40 | return x 41 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/create_attn.py: -------------------------------------------------------------------------------- 1 | """ Attention Factory 2 | 3 | Hacked together by / Copyright 2021 Ross Wightman 4 | """ 5 | import torch 6 | from functools import partial 7 | 8 | from .bottleneck_attn import BottleneckAttn 9 | from .cbam import CbamModule, LightCbamModule 10 | from .eca import EcaModule, CecaModule 11 | from .gather_excite import GatherExcite 12 | from .global_context import GlobalContext 13 | from .halo_attn import HaloAttn 14 | from .lambda_layer import LambdaLayer 15 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn 16 | from .selective_kernel import SelectiveKernel 17 | from .split_attn import SplitAttn 18 | from .squeeze_excite import SEModule, EffectiveSEModule 19 | 20 | 21 | def get_attn(attn_type): 22 | if isinstance(attn_type, torch.nn.Module): 23 | return attn_type 24 | module_cls = None 25 | if attn_type is not None: 26 | if isinstance(attn_type, str): 27 | attn_type = attn_type.lower() 28 | # Lightweight attention modules (channel and/or coarse spatial). 29 | # Typically added to existing network architecture blocks in addition to existing convolutions. 30 | if attn_type == 'se': 31 | module_cls = SEModule 32 | elif attn_type == 'ese': 33 | module_cls = EffectiveSEModule 34 | elif attn_type == 'eca': 35 | module_cls = EcaModule 36 | elif attn_type == 'ecam': 37 | module_cls = partial(EcaModule, use_mlp=True) 38 | elif attn_type == 'ceca': 39 | module_cls = CecaModule 40 | elif attn_type == 'ge': 41 | module_cls = GatherExcite 42 | elif attn_type == 'gc': 43 | module_cls = GlobalContext 44 | elif attn_type == 'gca': 45 | module_cls = partial(GlobalContext, fuse_add=True, fuse_scale=False) 46 | elif attn_type == 'cbam': 47 | module_cls = CbamModule 48 | elif attn_type == 'lcbam': 49 | module_cls = LightCbamModule 50 | 51 | # Attention / attention-like modules w/ significant params 52 | # Typically replace some of the existing workhorse convs in a network architecture. 53 | # All of these accept a stride argument and can spatially downsample the input. 54 | elif attn_type == 'sk': 55 | module_cls = SelectiveKernel 56 | elif attn_type == 'splat': 57 | module_cls = SplitAttn 58 | 59 | # Self-attention / attention-like modules w/ significant compute and/or params 60 | # Typically replace some of the existing workhorse convs in a network architecture. 61 | # All of these accept a stride argument and can spatially downsample the input. 62 | elif attn_type == 'lambda': 63 | return LambdaLayer 64 | elif attn_type == 'bottleneck': 65 | return BottleneckAttn 66 | elif attn_type == 'halo': 67 | return HaloAttn 68 | elif attn_type == 'nl': 69 | module_cls = NonLocalAttn 70 | elif attn_type == 'bat': 71 | module_cls = BatNonLocalAttn 72 | 73 | # Woops! 74 | else: 75 | assert False, "Invalid attn module (%s)" % attn_type 76 | elif isinstance(attn_type, bool): 77 | if attn_type: 78 | module_cls = SEModule 79 | else: 80 | module_cls = attn_type 81 | return module_cls 82 | 83 | 84 | def create_attn(attn_type, channels, **kwargs): 85 | module_cls = get_attn(attn_type) 86 | if module_cls is not None: 87 | # NOTE: it's expected the first (positional) argument of all attention layers is the # input channels 88 | return module_cls(channels, **kwargs) 89 | return None 90 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/create_conv2d.py: -------------------------------------------------------------------------------- 1 | """ Create Conv2d Factory Method 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | from .mixed_conv2d import MixedConv2d 7 | from .cond_conv2d import CondConv2d 8 | from .conv2d_same import create_conv2d_pad 9 | 10 | 11 | def create_conv2d(in_channels, out_channels, kernel_size, **kwargs): 12 | """ Select a 2d convolution implementation based on arguments 13 | Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d. 14 | 15 | Used extensively by EfficientNet, MobileNetv3 and related networks. 16 | """ 17 | if isinstance(kernel_size, list): 18 | assert 'num_experts' not in kwargs # MixNet + CondConv combo not supported currently 19 | assert 'groups' not in kwargs # MixedConv groups are defined by kernel list 20 | # We're going to use only lists for defining the MixedConv2d kernel groups, 21 | # ints, tuples, other iterables will continue to pass to normal conv and specify h, w. 22 | m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs) 23 | else: 24 | depthwise = kwargs.pop('depthwise', False) 25 | # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0 26 | groups = in_channels if depthwise else kwargs.pop('groups', 1) 27 | if 'num_experts' in kwargs and kwargs['num_experts'] > 0: 28 | m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs) 29 | else: 30 | m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs) 31 | return m 32 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/create_norm_act.py: -------------------------------------------------------------------------------- 1 | """ NormAct (Normalizaiton + Activation Layer) Factory 2 | 3 | Create norm + act combo modules that attempt to be backwards compatible with separate norm + act 4 | isntances in models. Where these are used it will be possible to swap separate BN + act layers with 5 | combined modules like IABN or EvoNorms. 6 | 7 | Hacked together by / Copyright 2020 Ross Wightman 8 | """ 9 | import types 10 | import functools 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d 16 | from .norm_act import BatchNormAct2d, GroupNormAct 17 | from .inplace_abn import InplaceAbn 18 | 19 | _NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn} 20 | _NORM_ACT_REQUIRES_ARG = {BatchNormAct2d, GroupNormAct, InplaceAbn} # requires act_layer arg to define act type 21 | 22 | 23 | def get_norm_act_layer(layer_class): 24 | layer_class = layer_class.replace('_', '').lower() 25 | if layer_class.startswith("batchnorm"): 26 | layer = BatchNormAct2d 27 | elif layer_class.startswith("groupnorm"): 28 | layer = GroupNormAct 29 | elif layer_class == "evonormbatch": 30 | layer = EvoNormBatch2d 31 | elif layer_class == "evonormsample": 32 | layer = EvoNormSample2d 33 | elif layer_class == "iabn" or layer_class == "inplaceabn": 34 | layer = InplaceAbn 35 | else: 36 | assert False, "Invalid norm_act layer (%s)" % layer_class 37 | return layer 38 | 39 | 40 | def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwargs): 41 | layer_parts = layer_type.split('-') # e.g. batchnorm-leaky_relu 42 | assert len(layer_parts) in (1, 2) 43 | layer = get_norm_act_layer(layer_parts[0]) 44 | #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else '' # FIXME support string act selection? 45 | layer_instance = layer(num_features, apply_act=apply_act, **kwargs) 46 | if jit: 47 | layer_instance = torch.jit.script(layer_instance) 48 | return layer_instance 49 | 50 | 51 | def convert_norm_act(norm_layer, act_layer): 52 | assert isinstance(norm_layer, (type, str, types.FunctionType, functools.partial)) 53 | assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial)) 54 | norm_act_kwargs = {} 55 | 56 | # unbind partial fn, so args can be rebound later 57 | if isinstance(norm_layer, functools.partial): 58 | norm_act_kwargs.update(norm_layer.keywords) 59 | norm_layer = norm_layer.func 60 | 61 | if isinstance(norm_layer, str): 62 | norm_act_layer = get_norm_act_layer(norm_layer) 63 | elif norm_layer in _NORM_ACT_TYPES: 64 | norm_act_layer = norm_layer 65 | elif isinstance(norm_layer, types.FunctionType): 66 | # if function type, must be a lambda/fn that creates a norm_act layer 67 | norm_act_layer = norm_layer 68 | else: 69 | type_name = norm_layer.__name__.lower() 70 | if type_name.startswith('batchnorm'): 71 | norm_act_layer = BatchNormAct2d 72 | elif type_name.startswith('groupnorm'): 73 | norm_act_layer = GroupNormAct 74 | else: 75 | assert False, f"No equivalent norm_act layer for {type_name}" 76 | 77 | if norm_act_layer in _NORM_ACT_REQUIRES_ARG: 78 | # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation. 79 | # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types 80 | norm_act_kwargs.setdefault('act_layer', act_layer) 81 | if norm_act_kwargs: 82 | norm_act_layer = functools.partial(norm_act_layer, **norm_act_kwargs) # bind/rebind args 83 | return norm_act_layer 84 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/evo_norm.py: -------------------------------------------------------------------------------- 1 | """EvoNormB0 (Batched) and EvoNormS0 (Sample) in PyTorch 2 | 3 | An attempt at getting decent performing EvoNorms running in PyTorch. 4 | While currently faster than other impl, still quite a ways off the built-in BN 5 | in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed). 6 | 7 | Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts. 8 | 9 | Hacked together by / Copyright 2020 Ross Wightman 10 | """ 11 | 12 | import torch 13 | import torch.nn as nn 14 | 15 | from .trace_utils import _assert 16 | 17 | 18 | class EvoNormBatch2d(nn.Module): 19 | def __init__(self, num_features, apply_act=True, momentum=0.1, eps=1e-5, drop_block=None): 20 | super(EvoNormBatch2d, self).__init__() 21 | self.apply_act = apply_act # apply activation (non-linearity) 22 | self.momentum = momentum 23 | self.eps = eps 24 | self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True) 25 | self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True) 26 | self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None 27 | self.register_buffer('running_var', torch.ones(num_features)) 28 | self.reset_parameters() 29 | 30 | def reset_parameters(self): 31 | nn.init.ones_(self.weight) 32 | nn.init.zeros_(self.bias) 33 | if self.apply_act: 34 | nn.init.ones_(self.v) 35 | 36 | def forward(self, x): 37 | _assert(x.dim() == 4, 'expected 4D input') 38 | x_type = x.dtype 39 | if self.v is not None: 40 | running_var = self.running_var.view(1, -1, 1, 1) 41 | if self.training: 42 | var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True) 43 | n = x.numel() / x.shape[1] 44 | running_var = var.detach() * self.momentum * (n / (n - 1)) + running_var * (1 - self.momentum) 45 | self.running_var.copy_(running_var.view(self.running_var.shape)) 46 | else: 47 | var = running_var 48 | v = self.v.to(dtype=x_type).reshape(1, -1, 1, 1) 49 | d = x * v + (x.var(dim=(2, 3), unbiased=False, keepdim=True) + self.eps).sqrt().to(dtype=x_type) 50 | d = d.max((var + self.eps).sqrt().to(dtype=x_type)) 51 | x = x / d 52 | return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1) 53 | 54 | 55 | class EvoNormSample2d(nn.Module): 56 | def __init__(self, num_features, apply_act=True, groups=32, eps=1e-5, drop_block=None): 57 | super(EvoNormSample2d, self).__init__() 58 | self.apply_act = apply_act # apply activation (non-linearity) 59 | self.groups = groups 60 | self.eps = eps 61 | self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True) 62 | self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True) 63 | self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None 64 | self.reset_parameters() 65 | 66 | def reset_parameters(self): 67 | nn.init.ones_(self.weight) 68 | nn.init.zeros_(self.bias) 69 | if self.apply_act: 70 | nn.init.ones_(self.v) 71 | 72 | def forward(self, x): 73 | _assert(x.dim() == 4, 'expected 4D input') 74 | B, C, H, W = x.shape 75 | _assert(C % self.groups == 0, '') 76 | if self.v is not None: 77 | n = x * (x * self.v.view(1, -1, 1, 1)).sigmoid() 78 | x = x.reshape(B, self.groups, -1) 79 | x = n.reshape(B, self.groups, -1) / (x.var(dim=-1, unbiased=False, keepdim=True) + self.eps).sqrt() 80 | x = x.reshape(B, C, H, W) 81 | return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1) 82 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/gather_excite.py: -------------------------------------------------------------------------------- 1 | """ Gather-Excite Attention Block 2 | 3 | Paper: `Gather-Excite: Exploiting Feature Context in CNNs` - https://arxiv.org/abs/1810.12348 4 | 5 | Official code here, but it's only partial impl in Caffe: https://github.com/hujie-frank/GENet 6 | 7 | I've tried to support all of the extent both w/ and w/o params. I don't believe I've seen another 8 | impl that covers all of the cases. 9 | 10 | NOTE: extent=0 + extra_params=False is equivalent to Squeeze-and-Excitation 11 | 12 | Hacked together by / Copyright 2021 Ross Wightman 13 | """ 14 | import math 15 | 16 | from torch import nn as nn 17 | import torch.nn.functional as F 18 | 19 | from .create_act import create_act_layer, get_act_layer 20 | from .create_conv2d import create_conv2d 21 | from .helpers import make_divisible 22 | from .mlp import ConvMlp 23 | 24 | 25 | class GatherExcite(nn.Module): 26 | """ Gather-Excite Attention Module 27 | """ 28 | def __init__( 29 | self, channels, feat_size=None, extra_params=False, extent=0, use_mlp=True, 30 | rd_ratio=1./16, rd_channels=None, rd_divisor=1, add_maxpool=False, 31 | act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, gate_layer='sigmoid'): 32 | super(GatherExcite, self).__init__() 33 | self.add_maxpool = add_maxpool 34 | act_layer = get_act_layer(act_layer) 35 | self.extent = extent 36 | if extra_params: 37 | self.gather = nn.Sequential() 38 | if extent == 0: 39 | assert feat_size is not None, 'spatial feature size must be specified for global extent w/ params' 40 | self.gather.add_module( 41 | 'conv1', create_conv2d(channels, channels, kernel_size=feat_size, stride=1, depthwise=True)) 42 | if norm_layer: 43 | self.gather.add_module(f'norm1', nn.BatchNorm2d(channels)) 44 | else: 45 | assert extent % 2 == 0 46 | num_conv = int(math.log2(extent)) 47 | for i in range(num_conv): 48 | self.gather.add_module( 49 | f'conv{i + 1}', 50 | create_conv2d(channels, channels, kernel_size=3, stride=2, depthwise=True)) 51 | if norm_layer: 52 | self.gather.add_module(f'norm{i + 1}', nn.BatchNorm2d(channels)) 53 | if i != num_conv - 1: 54 | self.gather.add_module(f'act{i + 1}', act_layer(inplace=True)) 55 | else: 56 | self.gather = None 57 | if self.extent == 0: 58 | self.gk = 0 59 | self.gs = 0 60 | else: 61 | assert extent % 2 == 0 62 | self.gk = self.extent * 2 - 1 63 | self.gs = self.extent 64 | 65 | if not rd_channels: 66 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 67 | self.mlp = ConvMlp(channels, rd_channels, act_layer=act_layer) if use_mlp else nn.Identity() 68 | self.gate = create_act_layer(gate_layer) 69 | 70 | def forward(self, x): 71 | size = x.shape[-2:] 72 | if self.gather is not None: 73 | x_ge = self.gather(x) 74 | else: 75 | if self.extent == 0: 76 | # global extent 77 | x_ge = x.mean(dim=(2, 3), keepdims=True) 78 | if self.add_maxpool: 79 | # experimental codepath, may remove or change 80 | x_ge = 0.5 * x_ge + 0.5 * x.amax((2, 3), keepdim=True) 81 | else: 82 | x_ge = F.avg_pool2d( 83 | x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2, count_include_pad=False) 84 | if self.add_maxpool: 85 | # experimental codepath, may remove or change 86 | x_ge = 0.5 * x_ge + 0.5 * F.max_pool2d(x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2) 87 | x_ge = self.mlp(x_ge) 88 | if x_ge.shape[-1] != 1 or x_ge.shape[-2] != 1: 89 | x_ge = F.interpolate(x_ge, size=size) 90 | return x * self.gate(x_ge) 91 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/global_context.py: -------------------------------------------------------------------------------- 1 | """ Global Context Attention Block 2 | 3 | Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond` 4 | - https://arxiv.org/abs/1904.11492 5 | 6 | Official code consulted as reference: https://github.com/xvjiarui/GCNet 7 | 8 | Hacked together by / Copyright 2021 Ross Wightman 9 | """ 10 | from torch import nn as nn 11 | import torch.nn.functional as F 12 | 13 | from .create_act import create_act_layer, get_act_layer 14 | from .helpers import make_divisible 15 | from .mlp import ConvMlp 16 | from .norm import LayerNorm2d 17 | 18 | 19 | class GlobalContext(nn.Module): 20 | 21 | def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False, 22 | rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=nn.ReLU, gate_layer='sigmoid'): 23 | super(GlobalContext, self).__init__() 24 | act_layer = get_act_layer(act_layer) 25 | 26 | self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True) if use_attn else None 27 | 28 | if rd_channels is None: 29 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 30 | if fuse_add: 31 | self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d) 32 | else: 33 | self.mlp_add = None 34 | if fuse_scale: 35 | self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d) 36 | else: 37 | self.mlp_scale = None 38 | 39 | self.gate = create_act_layer(gate_layer) 40 | self.init_last_zero = init_last_zero 41 | self.reset_parameters() 42 | 43 | def reset_parameters(self): 44 | if self.conv_attn is not None: 45 | nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu') 46 | if self.mlp_add is not None: 47 | nn.init.zeros_(self.mlp_add.fc2.weight) 48 | 49 | def forward(self, x): 50 | B, C, H, W = x.shape 51 | 52 | if self.conv_attn is not None: 53 | attn = self.conv_attn(x).reshape(B, 1, H * W) # (B, 1, H * W) 54 | attn = F.softmax(attn, dim=-1).unsqueeze(3) # (B, 1, H * W, 1) 55 | context = x.reshape(B, C, H * W).unsqueeze(1) @ attn 56 | context = context.view(B, C, 1, 1) 57 | else: 58 | context = x.mean(dim=(2, 3), keepdim=True) 59 | 60 | if self.mlp_scale is not None: 61 | mlp_x = self.mlp_scale(context) 62 | x = x * self.gate(mlp_x) 63 | if self.mlp_add is not None: 64 | mlp_x = self.mlp_add(context) 65 | x = x + mlp_x 66 | 67 | return x 68 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/helpers.py: -------------------------------------------------------------------------------- 1 | """ Layer/Module Helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | from itertools import repeat 6 | import collections.abc 7 | 8 | 9 | # From PyTorch internals 10 | def _ntuple(n): 11 | def parse(x): 12 | if isinstance(x, collections.abc.Iterable): 13 | return x 14 | return tuple(repeat(x, n)) 15 | return parse 16 | 17 | 18 | to_1tuple = _ntuple(1) 19 | to_2tuple = _ntuple(2) 20 | to_3tuple = _ntuple(3) 21 | to_4tuple = _ntuple(4) 22 | to_ntuple = _ntuple 23 | 24 | 25 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9): 26 | min_value = min_value or divisor 27 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 28 | # Make sure that round down does not go down by more than 10%. 29 | if new_v < round_limit * v: 30 | new_v += divisor 31 | return new_v 32 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/inplace_abn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn as nn 3 | 4 | try: 5 | from inplace_abn.functions import inplace_abn, inplace_abn_sync 6 | has_iabn = True 7 | except ImportError: 8 | has_iabn = False 9 | 10 | def inplace_abn(x, weight, bias, running_mean, running_var, 11 | training=True, momentum=0.1, eps=1e-05, activation="leaky_relu", activation_param=0.01): 12 | raise ImportError( 13 | "Please install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.12'") 14 | 15 | def inplace_abn_sync(**kwargs): 16 | inplace_abn(**kwargs) 17 | 18 | 19 | class InplaceAbn(nn.Module): 20 | """Activated Batch Normalization 21 | 22 | This gathers a BatchNorm and an activation function in a single module 23 | 24 | Parameters 25 | ---------- 26 | num_features : int 27 | Number of feature channels in the input and output. 28 | eps : float 29 | Small constant to prevent numerical issues. 30 | momentum : float 31 | Momentum factor applied to compute running statistics. 32 | affine : bool 33 | If `True` apply learned scale and shift transformation after normalization. 34 | act_layer : str or nn.Module type 35 | Name or type of the activation functions, one of: `leaky_relu`, `elu` 36 | act_param : float 37 | Negative slope for the `leaky_relu` activation. 38 | """ 39 | 40 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, apply_act=True, 41 | act_layer="leaky_relu", act_param=0.01, drop_block=None): 42 | super(InplaceAbn, self).__init__() 43 | self.num_features = num_features 44 | self.affine = affine 45 | self.eps = eps 46 | self.momentum = momentum 47 | if apply_act: 48 | if isinstance(act_layer, str): 49 | assert act_layer in ('leaky_relu', 'elu', 'identity', '') 50 | self.act_name = act_layer if act_layer else 'identity' 51 | else: 52 | # convert act layer passed as type to string 53 | if act_layer == nn.ELU: 54 | self.act_name = 'elu' 55 | elif act_layer == nn.LeakyReLU: 56 | self.act_name = 'leaky_relu' 57 | elif act_layer == nn.Identity: 58 | self.act_name = 'identity' 59 | else: 60 | assert False, f'Invalid act layer {act_layer.__name__} for IABN' 61 | else: 62 | self.act_name = 'identity' 63 | self.act_param = act_param 64 | if self.affine: 65 | self.weight = nn.Parameter(torch.ones(num_features)) 66 | self.bias = nn.Parameter(torch.zeros(num_features)) 67 | else: 68 | self.register_parameter('weight', None) 69 | self.register_parameter('bias', None) 70 | self.register_buffer('running_mean', torch.zeros(num_features)) 71 | self.register_buffer('running_var', torch.ones(num_features)) 72 | self.reset_parameters() 73 | 74 | def reset_parameters(self): 75 | nn.init.constant_(self.running_mean, 0) 76 | nn.init.constant_(self.running_var, 1) 77 | if self.affine: 78 | nn.init.constant_(self.weight, 1) 79 | nn.init.constant_(self.bias, 0) 80 | 81 | def forward(self, x): 82 | output = inplace_abn( 83 | x, self.weight, self.bias, self.running_mean, self.running_var, 84 | self.training, self.momentum, self.eps, self.act_name, self.act_param) 85 | if isinstance(output, tuple): 86 | output = output[0] 87 | return output 88 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/linear.py: -------------------------------------------------------------------------------- 1 | """ Linear layer (alternate definition) 2 | """ 3 | import torch 4 | import torch.nn.functional as F 5 | from torch import nn as nn 6 | 7 | 8 | class Linear(nn.Linear): 9 | r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b` 10 | 11 | Wraps torch.nn.Linear to support AMP + torchscript usage by manually casting 12 | weight & bias to input.dtype to work around an issue w/ torch.addmm in this use case. 13 | """ 14 | def forward(self, input: torch.Tensor) -> torch.Tensor: 15 | if torch.jit.is_scripting(): 16 | bias = self.bias.to(dtype=input.dtype) if self.bias is not None else None 17 | return F.linear(input, self.weight.to(dtype=input.dtype), bias=bias) 18 | else: 19 | return F.linear(input, self.weight, self.bias) 20 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/median_pool.py: -------------------------------------------------------------------------------- 1 | """ Median Pool 2 | Hacked together by / Copyright 2020 Ross Wightman 3 | """ 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | from .helpers import to_2tuple, to_4tuple 7 | 8 | 9 | class MedianPool2d(nn.Module): 10 | """ Median pool (usable as median filter when stride=1) module. 11 | 12 | Args: 13 | kernel_size: size of pooling kernel, int or 2-tuple 14 | stride: pool stride, int or 2-tuple 15 | padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad 16 | same: override padding and enforce same padding, boolean 17 | """ 18 | def __init__(self, kernel_size=3, stride=1, padding=0, same=False): 19 | super(MedianPool2d, self).__init__() 20 | self.k = to_2tuple(kernel_size) 21 | self.stride = to_2tuple(stride) 22 | self.padding = to_4tuple(padding) # convert to l, r, t, b 23 | self.same = same 24 | 25 | def _padding(self, x): 26 | if self.same: 27 | ih, iw = x.size()[2:] 28 | if ih % self.stride[0] == 0: 29 | ph = max(self.k[0] - self.stride[0], 0) 30 | else: 31 | ph = max(self.k[0] - (ih % self.stride[0]), 0) 32 | if iw % self.stride[1] == 0: 33 | pw = max(self.k[1] - self.stride[1], 0) 34 | else: 35 | pw = max(self.k[1] - (iw % self.stride[1]), 0) 36 | pl = pw // 2 37 | pr = pw - pl 38 | pt = ph // 2 39 | pb = ph - pt 40 | padding = (pl, pr, pt, pb) 41 | else: 42 | padding = self.padding 43 | return padding 44 | 45 | def forward(self, x): 46 | x = F.pad(x, self._padding(x), mode='reflect') 47 | x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1]) 48 | x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0] 49 | return x 50 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/mixed_conv2d.py: -------------------------------------------------------------------------------- 1 | """ PyTorch Mixed Convolution 2 | 3 | Paper: MixConv: Mixed Depthwise Convolutional Kernels (https://arxiv.org/abs/1907.09595) 4 | 5 | Hacked together by / Copyright 2020 Ross Wightman 6 | """ 7 | 8 | import torch 9 | from torch import nn as nn 10 | 11 | from .conv2d_same import create_conv2d_pad 12 | 13 | 14 | def _split_channels(num_chan, num_groups): 15 | split = [num_chan // num_groups for _ in range(num_groups)] 16 | split[0] += num_chan - sum(split) 17 | return split 18 | 19 | 20 | class MixedConv2d(nn.ModuleDict): 21 | """ Mixed Grouped Convolution 22 | 23 | Based on MDConv and GroupedConv in MixNet impl: 24 | https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py 25 | """ 26 | def __init__(self, in_channels, out_channels, kernel_size=3, 27 | stride=1, padding='', dilation=1, depthwise=False, **kwargs): 28 | super(MixedConv2d, self).__init__() 29 | 30 | kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size] 31 | num_groups = len(kernel_size) 32 | in_splits = _split_channels(in_channels, num_groups) 33 | out_splits = _split_channels(out_channels, num_groups) 34 | self.in_channels = sum(in_splits) 35 | self.out_channels = sum(out_splits) 36 | for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)): 37 | conv_groups = in_ch if depthwise else 1 38 | # use add_module to keep key space clean 39 | self.add_module( 40 | str(idx), 41 | create_conv2d_pad( 42 | in_ch, out_ch, k, stride=stride, 43 | padding=padding, dilation=dilation, groups=conv_groups, **kwargs) 44 | ) 45 | self.splits = in_splits 46 | 47 | def forward(self, x): 48 | x_split = torch.split(x, self.splits, 1) 49 | x_out = [c(x_split[i]) for i, c in enumerate(self.values())] 50 | x = torch.cat(x_out, 1) 51 | return x 52 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/norm.py: -------------------------------------------------------------------------------- 1 | """ Normalization layers and wrappers 2 | """ 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | 8 | class GroupNorm(nn.GroupNorm): 9 | def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True): 10 | # NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN 11 | super().__init__(num_groups, num_channels, eps=eps, affine=affine) 12 | 13 | def forward(self, x): 14 | return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 15 | 16 | 17 | class LayerNorm2d(nn.LayerNorm): 18 | """ LayerNorm for channels of '2D' spatial BCHW tensors """ 19 | def __init__(self, num_channels): 20 | super().__init__(num_channels) 21 | 22 | def forward(self, x: torch.Tensor) -> torch.Tensor: 23 | return F.layer_norm( 24 | x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2) 25 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/norm_act.py: -------------------------------------------------------------------------------- 1 | """ Normalization + Activation Layers 2 | """ 3 | import torch 4 | from torch import nn as nn 5 | from torch.nn import functional as F 6 | 7 | from .create_act import get_act_layer 8 | 9 | 10 | class BatchNormAct2d(nn.BatchNorm2d): 11 | """BatchNorm + Activation 12 | 13 | This module performs BatchNorm + Activation in a manner that will remain backwards 14 | compatible with weights trained with separate bn, act. This is why we inherit from BN 15 | instead of composing it as a .bn member. 16 | """ 17 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True, 18 | apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): 19 | super(BatchNormAct2d, self).__init__( 20 | num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats) 21 | if isinstance(act_layer, str): 22 | act_layer = get_act_layer(act_layer) 23 | if act_layer is not None and apply_act: 24 | act_args = dict(inplace=True) if inplace else {} 25 | self.act = act_layer(**act_args) 26 | else: 27 | self.act = nn.Identity() 28 | 29 | def _forward_jit(self, x): 30 | """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function 31 | """ 32 | # exponential_average_factor is self.momentum set to 33 | # (when it is available) only so that if gets updated 34 | # in ONNX graph when this node is exported to ONNX. 35 | if self.momentum is None: 36 | exponential_average_factor = 0.0 37 | else: 38 | exponential_average_factor = self.momentum 39 | 40 | if self.training and self.track_running_stats: 41 | # TODO: if statement only here to tell the jit to skip emitting this when it is None 42 | if self.num_batches_tracked is not None: 43 | self.num_batches_tracked += 1 44 | if self.momentum is None: # use cumulative moving average 45 | exponential_average_factor = 1.0 / float(self.num_batches_tracked) 46 | else: # use exponential moving average 47 | exponential_average_factor = self.momentum 48 | 49 | x = F.batch_norm( 50 | x, self.running_mean, self.running_var, self.weight, self.bias, 51 | self.training or not self.track_running_stats, 52 | exponential_average_factor, self.eps) 53 | return x 54 | 55 | @torch.jit.ignore 56 | def _forward_python(self, x): 57 | return super(BatchNormAct2d, self).forward(x) 58 | 59 | def forward(self, x): 60 | # FIXME cannot call parent forward() and maintain jit.script compatibility? 61 | if torch.jit.is_scripting(): 62 | x = self._forward_jit(x) 63 | else: 64 | x = self._forward_python(x) 65 | x = self.act(x) 66 | return x 67 | 68 | 69 | class GroupNormAct(nn.GroupNorm): 70 | # NOTE num_channel and num_groups order flipped for easier layer swaps / binding of fixed args 71 | def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True, 72 | apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None): 73 | super(GroupNormAct, self).__init__(num_groups, num_channels, eps=eps, affine=affine) 74 | if isinstance(act_layer, str): 75 | act_layer = get_act_layer(act_layer) 76 | if act_layer is not None and apply_act: 77 | act_args = dict(inplace=True) if inplace else {} 78 | self.act = act_layer(**act_args) 79 | else: 80 | self.act = nn.Identity() 81 | 82 | def forward(self, x): 83 | x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps) 84 | x = self.act(x) 85 | return x 86 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/padding.py: -------------------------------------------------------------------------------- 1 | """ Padding Helpers 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import math 6 | from typing import List, Tuple 7 | 8 | import torch.nn.functional as F 9 | 10 | 11 | # Calculate symmetric padding for a convolution 12 | def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int: 13 | padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2 14 | return padding 15 | 16 | 17 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution 18 | def get_same_padding(x: int, k: int, s: int, d: int): 19 | return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0) 20 | 21 | 22 | # Can SAME padding for given args be done statically? 23 | def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_): 24 | return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0 25 | 26 | 27 | # Dynamically pad input x with 'SAME' padding for conv with specified args 28 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): 29 | ih, iw = x.size()[-2:] 30 | pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1]) 31 | if pad_h > 0 or pad_w > 0: 32 | x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) 33 | return x 34 | 35 | 36 | def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]: 37 | dynamic = False 38 | if isinstance(padding, str): 39 | # for any string padding, the padding will be calculated for you, one of three ways 40 | padding = padding.lower() 41 | if padding == 'same': 42 | # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact 43 | if is_static_pad(kernel_size, **kwargs): 44 | # static case, no extra overhead 45 | padding = get_padding(kernel_size, **kwargs) 46 | else: 47 | # dynamic 'SAME' padding, has runtime/GPU memory overhead 48 | padding = 0 49 | dynamic = True 50 | elif padding == 'valid': 51 | # 'VALID' padding, same as padding=0 52 | padding = 0 53 | else: 54 | # Default to PyTorch style 'same'-ish symmetric padding 55 | padding = get_padding(kernel_size, **kwargs) 56 | return padding, dynamic 57 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/patch_embed.py: -------------------------------------------------------------------------------- 1 | """ Image to Patch Embedding using Conv2d 2 | 3 | A convolution based approach to patchifying a 2D image w/ embedding projection. 4 | 5 | Based on the impl in https://github.com/google-research/vision_transformer 6 | 7 | Hacked together by / Copyright 2020 Ross Wightman 8 | """ 9 | from torch import nn as nn 10 | 11 | from .helpers import to_2tuple 12 | from .trace_utils import _assert 13 | 14 | 15 | class PatchEmbed(nn.Module): 16 | """ 2D Image to Patch Embedding 17 | """ 18 | def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None, flatten=True): 19 | super().__init__() 20 | img_size = to_2tuple(img_size) 21 | patch_size = to_2tuple(patch_size) 22 | self.img_size = img_size 23 | self.patch_size = patch_size 24 | self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1]) 25 | self.num_patches = self.grid_size[0] * self.grid_size[1] 26 | self.flatten = flatten 27 | 28 | # self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size) 29 | self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=3, stride=1,padding=1) 30 | self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity() 31 | 32 | def forward(self, x): 33 | B, C, H, W = x.shape 34 | _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).") 35 | _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).") 36 | x = self.proj(x) 37 | if self.flatten: 38 | x = x.flatten(2).transpose(1, 2) # BCHW -> BNC 39 | x = self.norm(x) 40 | return x 41 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/pool2d_same.py: -------------------------------------------------------------------------------- 1 | """ AvgPool2d w/ Same Padding 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from typing import List, Tuple, Optional 9 | 10 | from .helpers import to_2tuple 11 | from .padding import pad_same, get_padding_value 12 | 13 | 14 | def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), 15 | ceil_mode: bool = False, count_include_pad: bool = True): 16 | # FIXME how to deal with count_include_pad vs not for external padding? 17 | x = pad_same(x, kernel_size, stride) 18 | return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad) 19 | 20 | 21 | class AvgPool2dSame(nn.AvgPool2d): 22 | """ Tensorflow like 'SAME' wrapper for 2D average pooling 23 | """ 24 | def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True): 25 | kernel_size = to_2tuple(kernel_size) 26 | stride = to_2tuple(stride) 27 | super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) 28 | 29 | def forward(self, x): 30 | x = pad_same(x, self.kernel_size, self.stride) 31 | return F.avg_pool2d( 32 | x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) 33 | 34 | 35 | def max_pool2d_same( 36 | x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), 37 | dilation: List[int] = (1, 1), ceil_mode: bool = False): 38 | x = pad_same(x, kernel_size, stride, value=-float('inf')) 39 | return F.max_pool2d(x, kernel_size, stride, (0, 0), dilation, ceil_mode) 40 | 41 | 42 | class MaxPool2dSame(nn.MaxPool2d): 43 | """ Tensorflow like 'SAME' wrapper for 2D max pooling 44 | """ 45 | def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False): 46 | kernel_size = to_2tuple(kernel_size) 47 | stride = to_2tuple(stride) 48 | dilation = to_2tuple(dilation) 49 | super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode) 50 | 51 | def forward(self, x): 52 | x = pad_same(x, self.kernel_size, self.stride, value=-float('inf')) 53 | return F.max_pool2d(x, self.kernel_size, self.stride, (0, 0), self.dilation, self.ceil_mode) 54 | 55 | 56 | def create_pool2d(pool_type, kernel_size, stride=None, **kwargs): 57 | stride = stride or kernel_size 58 | padding = kwargs.pop('padding', '') 59 | padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs) 60 | if is_dynamic: 61 | if pool_type == 'avg': 62 | return AvgPool2dSame(kernel_size, stride=stride, **kwargs) 63 | elif pool_type == 'max': 64 | return MaxPool2dSame(kernel_size, stride=stride, **kwargs) 65 | else: 66 | assert False, f'Unsupported pool type {pool_type}' 67 | else: 68 | if pool_type == 'avg': 69 | return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs) 70 | elif pool_type == 'max': 71 | return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs) 72 | else: 73 | assert False, f'Unsupported pool type {pool_type}' 74 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/separable_conv.py: -------------------------------------------------------------------------------- 1 | """ Depthwise Separable Conv Modules 2 | 3 | Basic DWS convs. Other variations of DWS exist with batch norm or activations between the 4 | DW and PW convs such as the Depthwise modules in MobileNetV2 / EfficientNet and Xception. 5 | 6 | Hacked together by / Copyright 2020 Ross Wightman 7 | """ 8 | from torch import nn as nn 9 | 10 | from .create_conv2d import create_conv2d 11 | from .create_norm_act import convert_norm_act 12 | 13 | 14 | class SeparableConvBnAct(nn.Module): 15 | """ Separable Conv w/ trailing Norm and Activation 16 | """ 17 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, 18 | channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, 19 | apply_act=True, drop_block=None): 20 | super(SeparableConvBnAct, self).__init__() 21 | 22 | self.conv_dw = create_conv2d( 23 | in_channels, int(in_channels * channel_multiplier), kernel_size, 24 | stride=stride, dilation=dilation, padding=padding, depthwise=True) 25 | 26 | self.conv_pw = create_conv2d( 27 | int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) 28 | 29 | norm_act_layer = convert_norm_act(norm_layer, act_layer) 30 | self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block) 31 | 32 | @property 33 | def in_channels(self): 34 | return self.conv_dw.in_channels 35 | 36 | @property 37 | def out_channels(self): 38 | return self.conv_pw.out_channels 39 | 40 | def forward(self, x): 41 | x = self.conv_dw(x) 42 | x = self.conv_pw(x) 43 | if self.bn is not None: 44 | x = self.bn(x) 45 | return x 46 | 47 | 48 | class SeparableConv2d(nn.Module): 49 | """ Separable Conv 50 | """ 51 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False, 52 | channel_multiplier=1.0, pw_kernel_size=1): 53 | super(SeparableConv2d, self).__init__() 54 | 55 | self.conv_dw = create_conv2d( 56 | in_channels, int(in_channels * channel_multiplier), kernel_size, 57 | stride=stride, dilation=dilation, padding=padding, depthwise=True) 58 | 59 | self.conv_pw = create_conv2d( 60 | int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias) 61 | 62 | @property 63 | def in_channels(self): 64 | return self.conv_dw.in_channels 65 | 66 | @property 67 | def out_channels(self): 68 | return self.conv_pw.out_channels 69 | 70 | def forward(self, x): 71 | x = self.conv_dw(x) 72 | x = self.conv_pw(x) 73 | return x 74 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/space_to_depth.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class SpaceToDepth(nn.Module): 6 | def __init__(self, block_size=4): 7 | super().__init__() 8 | assert block_size == 4 9 | self.bs = block_size 10 | 11 | def forward(self, x): 12 | N, C, H, W = x.size() 13 | x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs) # (N, C, H//bs, bs, W//bs, bs) 14 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 15 | x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs) # (N, C*bs^2, H//bs, W//bs) 16 | return x 17 | 18 | 19 | @torch.jit.script 20 | class SpaceToDepthJit(object): 21 | def __call__(self, x: torch.Tensor): 22 | # assuming hard-coded that block_size==4 for acceleration 23 | N, C, H, W = x.size() 24 | x = x.view(N, C, H // 4, 4, W // 4, 4) # (N, C, H//bs, bs, W//bs, bs) 25 | x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # (N, bs, bs, C, H//bs, W//bs) 26 | x = x.view(N, C * 16, H // 4, W // 4) # (N, C*bs^2, H//bs, W//bs) 27 | return x 28 | 29 | 30 | class SpaceToDepthModule(nn.Module): 31 | def __init__(self, no_jit=False): 32 | super().__init__() 33 | if not no_jit: 34 | self.op = SpaceToDepthJit() 35 | else: 36 | self.op = SpaceToDepth() 37 | 38 | def forward(self, x): 39 | return self.op(x) 40 | 41 | 42 | class DepthToSpace(nn.Module): 43 | 44 | def __init__(self, block_size): 45 | super().__init__() 46 | self.bs = block_size 47 | 48 | def forward(self, x): 49 | N, C, H, W = x.size() 50 | x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W) # (N, bs, bs, C//bs^2, H, W) 51 | x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # (N, C//bs^2, H, bs, W, bs) 52 | x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs) # (N, C//bs^2, H * bs, W * bs) 53 | return x 54 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/split_attn.py: -------------------------------------------------------------------------------- 1 | """ Split Attention Conv2d (for ResNeSt Models) 2 | 3 | Paper: `ResNeSt: Split-Attention Networks` - /https://arxiv.org/abs/2004.08955 4 | 5 | Adapted from original PyTorch impl at https://github.com/zhanghang1989/ResNeSt 6 | 7 | Modified for torchscript compat, performance, and consistency with timm by Ross Wightman 8 | """ 9 | import torch 10 | import torch.nn.functional as F 11 | from torch import nn 12 | 13 | from .helpers import make_divisible 14 | 15 | 16 | class RadixSoftmax(nn.Module): 17 | def __init__(self, radix, cardinality): 18 | super(RadixSoftmax, self).__init__() 19 | self.radix = radix 20 | self.cardinality = cardinality 21 | 22 | def forward(self, x): 23 | batch = x.size(0) 24 | if self.radix > 1: 25 | x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2) 26 | x = F.softmax(x, dim=1) 27 | x = x.reshape(batch, -1) 28 | else: 29 | x = torch.sigmoid(x) 30 | return x 31 | 32 | 33 | class SplitAttn(nn.Module): 34 | """Split-Attention (aka Splat) 35 | """ 36 | def __init__(self, in_channels, out_channels=None, kernel_size=3, stride=1, padding=None, 37 | dilation=1, groups=1, bias=False, radix=2, rd_ratio=0.25, rd_channels=None, rd_divisor=8, 38 | act_layer=nn.ReLU, norm_layer=None, drop_block=None, **kwargs): 39 | super(SplitAttn, self).__init__() 40 | out_channels = out_channels or in_channels 41 | self.radix = radix 42 | self.drop_block = drop_block 43 | mid_chs = out_channels * radix 44 | if rd_channels is None: 45 | attn_chs = make_divisible(in_channels * radix * rd_ratio, min_value=32, divisor=rd_divisor) 46 | else: 47 | attn_chs = rd_channels * radix 48 | 49 | padding = kernel_size // 2 if padding is None else padding 50 | self.conv = nn.Conv2d( 51 | in_channels, mid_chs, kernel_size, stride, padding, dilation, 52 | groups=groups * radix, bias=bias, **kwargs) 53 | self.bn0 = norm_layer(mid_chs) if norm_layer else nn.Identity() 54 | self.act0 = act_layer(inplace=True) 55 | self.fc1 = nn.Conv2d(out_channels, attn_chs, 1, groups=groups) 56 | self.bn1 = norm_layer(attn_chs) if norm_layer else nn.Identity() 57 | self.act1 = act_layer(inplace=True) 58 | self.fc2 = nn.Conv2d(attn_chs, mid_chs, 1, groups=groups) 59 | self.rsoftmax = RadixSoftmax(radix, groups) 60 | 61 | def forward(self, x): 62 | x = self.conv(x) 63 | x = self.bn0(x) 64 | if self.drop_block is not None: 65 | x = self.drop_block(x) 66 | x = self.act0(x) 67 | 68 | B, RC, H, W = x.shape 69 | if self.radix > 1: 70 | x = x.reshape((B, self.radix, RC // self.radix, H, W)) 71 | x_gap = x.sum(dim=1) 72 | else: 73 | x_gap = x 74 | x_gap = x_gap.mean((2, 3), keepdim=True) 75 | x_gap = self.fc1(x_gap) 76 | x_gap = self.bn1(x_gap) 77 | x_gap = self.act1(x_gap) 78 | x_attn = self.fc2(x_gap) 79 | 80 | x_attn = self.rsoftmax(x_attn).view(B, -1, 1, 1) 81 | if self.radix > 1: 82 | out = (x * x_attn.reshape((B, self.radix, RC // self.radix, 1, 1))).sum(dim=1) 83 | else: 84 | out = x * x_attn 85 | return out.contiguous() 86 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/split_batchnorm.py: -------------------------------------------------------------------------------- 1 | """ Split BatchNorm 2 | 3 | A PyTorch BatchNorm layer that splits input batch into N equal parts and passes each through 4 | a separate BN layer. The first split is passed through the parent BN layers with weight/bias 5 | keys the same as the original BN. All other splits pass through BN sub-layers under the '.aux_bn' 6 | namespace. 7 | 8 | This allows easily removing the auxiliary BN layers after training to efficiently 9 | achieve the 'Auxiliary BatchNorm' as described in the AdvProp Paper, section 4.2, 10 | 'Disentangled Learning via An Auxiliary BN' 11 | 12 | Hacked together by / Copyright 2020 Ross Wightman 13 | """ 14 | import torch 15 | import torch.nn as nn 16 | 17 | 18 | class SplitBatchNorm2d(torch.nn.BatchNorm2d): 19 | 20 | def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, 21 | track_running_stats=True, num_splits=2): 22 | super().__init__(num_features, eps, momentum, affine, track_running_stats) 23 | assert num_splits > 1, 'Should have at least one aux BN layer (num_splits at least 2)' 24 | self.num_splits = num_splits 25 | self.aux_bn = nn.ModuleList([ 26 | nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats) for _ in range(num_splits - 1)]) 27 | 28 | def forward(self, input: torch.Tensor): 29 | if self.training: # aux BN only relevant while training 30 | split_size = input.shape[0] // self.num_splits 31 | assert input.shape[0] == split_size * self.num_splits, "batch size must be evenly divisible by num_splits" 32 | split_input = input.split(split_size) 33 | x = [super().forward(split_input[0])] 34 | for i, a in enumerate(self.aux_bn): 35 | x.append(a(split_input[i + 1])) 36 | return torch.cat(x, dim=0) 37 | else: 38 | return super().forward(input) 39 | 40 | 41 | def convert_splitbn_model(module, num_splits=2): 42 | """ 43 | Recursively traverse module and its children to replace all instances of 44 | ``torch.nn.modules.batchnorm._BatchNorm`` with `SplitBatchnorm2d`. 45 | Args: 46 | module (torch.nn.Module): input module 47 | num_splits: number of separate batchnorm layers to split input across 48 | Example:: 49 | >>> # model is an instance of torch.nn.Module 50 | >>> model = timm.models.convert_splitbn_model(model, num_splits=2) 51 | """ 52 | mod = module 53 | if isinstance(module, torch.nn.modules.instancenorm._InstanceNorm): 54 | return module 55 | if isinstance(module, torch.nn.modules.batchnorm._BatchNorm): 56 | mod = SplitBatchNorm2d( 57 | module.num_features, module.eps, module.momentum, module.affine, 58 | module.track_running_stats, num_splits=num_splits) 59 | mod.running_mean = module.running_mean 60 | mod.running_var = module.running_var 61 | mod.num_batches_tracked = module.num_batches_tracked 62 | if module.affine: 63 | mod.weight.data = module.weight.data.clone().detach() 64 | mod.bias.data = module.bias.data.clone().detach() 65 | for aux in mod.aux_bn: 66 | aux.running_mean = module.running_mean.clone() 67 | aux.running_var = module.running_var.clone() 68 | aux.num_batches_tracked = module.num_batches_tracked.clone() 69 | if module.affine: 70 | aux.weight.data = module.weight.data.clone().detach() 71 | aux.bias.data = module.bias.data.clone().detach() 72 | for name, child in module.named_children(): 73 | mod.add_module(name, convert_splitbn_model(child, num_splits=num_splits)) 74 | del module 75 | return mod 76 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/squeeze_excite.py: -------------------------------------------------------------------------------- 1 | """ Squeeze-and-Excitation Channel Attention 2 | 3 | An SE implementation originally based on PyTorch SE-Net impl. 4 | Has since evolved with additional functionality / configuration. 5 | 6 | Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507 7 | 8 | Also included is Effective Squeeze-Excitation (ESE). 9 | Paper: `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 10 | 11 | Hacked together by / Copyright 2021 Ross Wightman 12 | """ 13 | from torch import nn as nn 14 | 15 | from .create_act import create_act_layer 16 | from .helpers import make_divisible 17 | 18 | 19 | class SEModule(nn.Module): 20 | """ SE Module as defined in original SE-Nets with a few additions 21 | Additions include: 22 | * divisor can be specified to keep channels % div == 0 (default: 8) 23 | * reduction channels can be specified directly by arg (if rd_channels is set) 24 | * reduction channels can be specified by float rd_ratio (default: 1/16) 25 | * global max pooling can be added to the squeeze aggregation 26 | * customizable activation, normalization, and gate layer 27 | """ 28 | def __init__( 29 | self, channels, rd_ratio=1. / 16, rd_channels=None, rd_divisor=8, add_maxpool=False, 30 | act_layer=nn.ReLU, norm_layer=None, gate_layer='sigmoid'): 31 | super(SEModule, self).__init__() 32 | self.add_maxpool = add_maxpool 33 | if not rd_channels: 34 | rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.) 35 | self.fc1 = nn.Conv2d(channels, rd_channels, kernel_size=1, bias=True) 36 | self.bn = norm_layer(rd_channels) if norm_layer else nn.Identity() 37 | self.act = create_act_layer(act_layer, inplace=True) 38 | self.fc2 = nn.Conv2d(rd_channels, channels, kernel_size=1, bias=True) 39 | self.gate = create_act_layer(gate_layer) 40 | 41 | def forward(self, x): 42 | x_se = x.mean((2, 3), keepdim=True) 43 | if self.add_maxpool: 44 | # experimental codepath, may remove or change 45 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 46 | x_se = self.fc1(x_se) 47 | x_se = self.act(self.bn(x_se)) 48 | x_se = self.fc2(x_se) 49 | return x * self.gate(x_se) 50 | 51 | 52 | SqueezeExcite = SEModule # alias 53 | 54 | 55 | class EffectiveSEModule(nn.Module): 56 | """ 'Effective Squeeze-Excitation 57 | From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667 58 | """ 59 | def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid', **_): 60 | super(EffectiveSEModule, self).__init__() 61 | self.add_maxpool = add_maxpool 62 | self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0) 63 | self.gate = create_act_layer(gate_layer) 64 | 65 | def forward(self, x): 66 | x_se = x.mean((2, 3), keepdim=True) 67 | if self.add_maxpool: 68 | # experimental codepath, may remove or change 69 | x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True) 70 | x_se = self.fc(x_se) 71 | return x * self.gate(x_se) 72 | 73 | 74 | EffectiveSqueezeExcite = EffectiveSEModule # alias 75 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/test_time_pool.py: -------------------------------------------------------------------------------- 1 | """ Test Time Pooling (Average-Max Pool) 2 | 3 | Hacked together by / Copyright 2020 Ross Wightman 4 | """ 5 | 6 | import logging 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from .adaptive_avgmax_pool import adaptive_avgmax_pool2d 11 | 12 | 13 | _logger = logging.getLogger(__name__) 14 | 15 | 16 | class TestTimePoolHead(nn.Module): 17 | def __init__(self, base, original_pool=7): 18 | super(TestTimePoolHead, self).__init__() 19 | self.base = base 20 | self.original_pool = original_pool 21 | base_fc = self.base.get_classifier() 22 | if isinstance(base_fc, nn.Conv2d): 23 | self.fc = base_fc 24 | else: 25 | self.fc = nn.Conv2d( 26 | self.base.num_features, self.base.num_classes, kernel_size=1, bias=True) 27 | self.fc.weight.data.copy_(base_fc.weight.data.view(self.fc.weight.size())) 28 | self.fc.bias.data.copy_(base_fc.bias.data.view(self.fc.bias.size())) 29 | self.base.reset_classifier(0) # delete original fc layer 30 | 31 | def forward(self, x): 32 | x = self.base.forward_features(x) 33 | x = F.avg_pool2d(x, kernel_size=self.original_pool, stride=1) 34 | x = self.fc(x) 35 | x = adaptive_avgmax_pool2d(x, 1) 36 | return x.view(x.size(0), -1) 37 | 38 | 39 | def apply_test_time_pool(model, config, use_test_size=True): 40 | test_time_pool = False 41 | if not hasattr(model, 'default_cfg') or not model.default_cfg: 42 | return model, False 43 | if use_test_size and 'test_input_size' in model.default_cfg: 44 | df_input_size = model.default_cfg['test_input_size'] 45 | else: 46 | df_input_size = model.default_cfg['input_size'] 47 | if config['input_size'][-1] > df_input_size[-1] and config['input_size'][-2] > df_input_size[-2]: 48 | _logger.info('Target input size %s > pretrained default %s, using test time pooling' % 49 | (str(config['input_size'][-2:]), str(df_input_size[-2:]))) 50 | model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size']) 51 | test_time_pool = True 52 | return model, test_time_pool 53 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/trace_utils.py: -------------------------------------------------------------------------------- 1 | try: 2 | from torch import _assert 3 | except ImportError: 4 | def _assert(condition: bool, message: str): 5 | assert condition, message 6 | 7 | 8 | def _float_to_int(x: float) -> int: 9 | """ 10 | Symbolic tracing helper to substitute for inbuilt `int`. 11 | Hint: Inbuilt `int` can't accept an argument of type `Proxy` 12 | """ 13 | return int(x) 14 | -------------------------------------------------------------------------------- /V2EM/src/model/nestnet/layers/weight_init.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import warnings 4 | 5 | from torch.nn.init import _calculate_fan_in_and_fan_out 6 | 7 | 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b): 9 | # Cut & paste from PyTorch official master until it's in a few official releases - RW 10 | # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf 11 | def norm_cdf(x): 12 | # Computes standard normal cumulative distribution function 13 | return (1. + math.erf(x / math.sqrt(2.))) / 2. 14 | 15 | if (mean < a - 2 * std) or (mean > b + 2 * std): 16 | warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. " 17 | "The distribution of values may be incorrect.", 18 | stacklevel=2) 19 | 20 | with torch.no_grad(): 21 | # Values are generated by using a truncated uniform distribution and 22 | # then using the inverse CDF for the normal distribution. 23 | # Get upper and lower cdf values 24 | l = norm_cdf((a - mean) / std) 25 | u = norm_cdf((b - mean) / std) 26 | 27 | # Uniformly fill tensor with values from [l, u], then translate to 28 | # [2l-1, 2u-1]. 29 | tensor.uniform_(2 * l - 1, 2 * u - 1) 30 | 31 | # Use inverse cdf transform for normal distribution to get truncated 32 | # standard normal 33 | tensor.erfinv_() 34 | 35 | # Transform to proper mean, std 36 | tensor.mul_(std * math.sqrt(2.)) 37 | tensor.add_(mean) 38 | 39 | # Clamp to ensure it's in the proper range 40 | tensor.clamp_(min=a, max=b) 41 | return tensor 42 | 43 | 44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.): 45 | # type: (Tensor, float, float, float, float) -> Tensor 46 | r"""Fills the input Tensor with values drawn from a truncated 47 | normal distribution. The values are effectively drawn from the 48 | normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)` 49 | with values outside :math:`[a, b]` redrawn until they are within 50 | the bounds. The method used for generating the random values works 51 | best when :math:`a \leq \text{mean} \leq b`. 52 | Args: 53 | tensor: an n-dimensional `torch.Tensor` 54 | mean: the mean of the normal distribution 55 | std: the standard deviation of the normal distribution 56 | a: the minimum cutoff value 57 | b: the maximum cutoff value 58 | Examples: 59 | >>> w = torch.empty(3, 5) 60 | >>> nn.init.trunc_normal_(w) 61 | """ 62 | return _no_grad_trunc_normal_(tensor, mean, std, a, b) 63 | 64 | 65 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'): 66 | fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) 67 | if mode == 'fan_in': 68 | denom = fan_in 69 | elif mode == 'fan_out': 70 | denom = fan_out 71 | elif mode == 'fan_avg': 72 | denom = (fan_in + fan_out) / 2 73 | 74 | variance = scale / denom 75 | 76 | if distribution == "truncated_normal": 77 | # constant is stddev of standard normal truncated to (-2, 2) 78 | trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978) 79 | elif distribution == "normal": 80 | tensor.normal_(std=math.sqrt(variance)) 81 | elif distribution == "uniform": 82 | bound = math.sqrt(3 * variance) 83 | tensor.uniform_(-bound, bound) 84 | else: 85 | raise ValueError(f"invalid distribution {distribution}") 86 | 87 | 88 | def lecun_normal_(tensor): 89 | variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal') 90 | -------------------------------------------------------------------------------- /V2EM/src/model/se_block.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | # https://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.html 6 | 7 | class SEBlock(nn.Module): 8 | 9 | def __init__(self, input_channels, internal_neurons): 10 | super(SEBlock, self).__init__() 11 | self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True) 12 | self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True) 13 | self.input_channels = input_channels 14 | 15 | def forward(self, inputs): 16 | x = F.avg_pool2d(inputs, kernel_size=inputs.size(3)) 17 | x = self.down(x) 18 | x = F.relu(x) 19 | x = self.up(x) 20 | x = torch.sigmoid(x) 21 | x = x.view(-1, self.input_channels, 1, 1) 22 | return inputs * x -------------------------------------------------------------------------------- /V2EM/src/model/transformer_encoder.py: -------------------------------------------------------------------------------- 1 | import math 2 | from typing import Optional, List 3 | import torch 4 | from torch import nn 5 | from src.utils import padTensor 6 | 7 | class WrappedTransformerEncoder(nn.Module): 8 | def __init__(self, dim, num_layers, num_heads): 9 | super(WrappedTransformerEncoder, self).__init__() 10 | self.dim = dim 11 | encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=num_heads) 12 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers) 13 | self.cls_emb = nn.Embedding(num_embeddings=1, embedding_dim=dim) 14 | 15 | def prepend_cls(self, inputs): 16 | # print(f"cls填充前{inputs}") 17 | index = torch.LongTensor([0]).to(device=inputs.device) 18 | # print(f"LongTensor之后{index}") 19 | cls_emb = self.cls_emb(index) 20 | # print(f"对index使用cls_emb之后{cls_emb}") 21 | cls_emb = cls_emb.expand(inputs.size(0), 1, self.dim) 22 | # print(f"对cls_emb拓充之后{cls_emb.shape},具体{cls_emb}") 23 | outputs = torch.cat((cls_emb, inputs), dim=1) 24 | # print(f"合并cls和input之后{outputs.shape},具体{outputs}") 25 | return outputs 26 | 27 | def forward(self, inputs: torch.Tensor, lens: Optional[List[int]] = None, get_cls: Optional[bool] = False): 28 | if lens is not None: 29 | max_len = max(lens) 30 | 31 | mask = [([False] * (l + int(get_cls)) + [True] * (max_len - l)) for l in lens] 32 | mask = torch.tensor(mask).to(device=inputs.device) 33 | 34 | inputs = list(inputs.split(lens, dim=0)) 35 | inputs = [padTensor(inp, max_len) for inp in inputs] 36 | inputs = torch.stack(inputs, dim=0) 37 | # print(inputs.shape) 38 | else: 39 | mask = None 40 | 41 | if get_cls: 42 | inputs = self.prepend_cls(inputs) 43 | # print(inputs) 44 | 45 | inputs = inputs.permute(1, 0, 2) 46 | # inputs = self.pos_encoder(inputs) 47 | # print("input shape") ## 48 | # print(inputs.shape) ## 49 | inputs = self.encoder(src=inputs, src_key_padding_mask=mask) # (seq_len, bs, dim) 50 | 51 | if get_cls: 52 | return inputs[0] 53 | 54 | return inputs[1:].permute(1, 0, 2) 55 | 56 | -------------------------------------------------------------------------------- /V2EM/src/trainers/basetrainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import torch 4 | from src.utils import save 5 | 6 | 7 | class TrainerBase(): 8 | def __init__(self, args, model, criterion, optimizer, scheduler, device, dataloaders): 9 | self.args = args 10 | self.model = model 11 | self.best_model = copy.deepcopy(model.state_dict()) 12 | self.device = device 13 | self.criterion = criterion 14 | self.optimizer = optimizer 15 | self.dataloaders = dataloaders 16 | self.scheduler = scheduler 17 | self.earlyStop = args['early_stop'] 18 | 19 | self.saving_path = f"./savings/" 20 | 21 | def make_stat(self, prev, curr): 22 | new_stats = [] 23 | for i in range(len(prev)): 24 | if curr[i] > prev[i]: 25 | new_stats.append(f'{curr[i]:.4f} \u2191') 26 | elif curr[i] < prev[i]: 27 | new_stats.append(f'{curr[i]:.4f} \u2193') 28 | else: 29 | new_stats.append(f'{curr[i]:.4f} -') 30 | return new_stats 31 | 32 | def get_saving_file_name(self): 33 | best_test_stats = self.all_test_stats[self.best_epoch - 1] 34 | 35 | name = f'{self.args["model"]}_{self.args["modalities"]}_' 36 | 37 | if self.args['loss'] == 'bce': 38 | name += f'Acc_{best_test_stats[0][-1]:.4f}_' 39 | name += f'F1_{best_test_stats[3][-1]:.4f}_' 40 | name += f'AUC_{best_test_stats[4][-1]:.4f}_' 41 | else: 42 | name += f'{best_test_stats[0]:.4f}_' 43 | name += f'{best_test_stats[1]:.4f}_' 44 | name += f'{best_test_stats[2]:.4f}_' 45 | name += f'{best_test_stats[3]:.4f}_' 46 | 47 | name += f'imginvl{self.args["img_interval"]}_' 48 | 49 | if self.args['model'] == 'mme2e_sparse': 50 | name += f'st_{self.args["sparse_threshold"]}_' 51 | 52 | name += f'seed{self.args["seed"]}' 53 | name += '.pt' 54 | 55 | return name 56 | 57 | def save_stats(self): 58 | stats = { 59 | 'args': self.args, 60 | 'train_stats': self.all_train_stats, 61 | 'valid_stats': self.all_valid_stats, 62 | 'test_stats': self.all_test_stats, 63 | 'best_valid_stats': self.best_valid_stats, 64 | 'best_epoch': self.best_epoch 65 | } 66 | 67 | save(stats, os.path.join(self.saving_path, 'stats', self.get_saving_file_name())) 68 | 69 | # csv_path = os.path.join(self.saving_path, 'csv', self.get_saving_file_name()).replace('.pt', '.csv') 70 | # dirname = os.path.dirname(csv_path) 71 | # if not os.path.exists(dirname): 72 | # os.makedirs(dirname) 73 | # with open(csv_path, 'w') as f: 74 | # for stat in self.all_test_stats[self.best_epoch - 1]: 75 | # for n in stat: 76 | # f.write(f'{n:.4f},') 77 | # f.write('\n') 78 | # f.write(str(self.args)) 79 | # f.write('\n') 80 | 81 | def save_model(self): 82 | torch.save(self.best_model, os.path.join(self.saving_path, 'models', self.get_saving_file_name())) 83 | -------------------------------------------------------------------------------- /V2EM/src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import torch 4 | import numpy as np 5 | from PIL import Image 6 | from torchvision import transforms 7 | 8 | def save(toBeSaved, filename, mode='wb'): 9 | dirname = os.path.dirname(filename) 10 | if not os.path.exists(dirname): 11 | os.makedirs(dirname) 12 | file = open(filename, mode) 13 | pickle.dump(toBeSaved, file, protocol=4) 14 | file.close() 15 | 16 | def load(filename, mode='rb'): 17 | file = open(filename, mode) 18 | loaded = pickle.load(file) 19 | file.close() 20 | return loaded 21 | 22 | # For python2 23 | def load2(path): 24 | with open(path, 'rb') as f: 25 | u = pickle._Unpickler(f) 26 | u.encoding = 'latin1' 27 | p = u.load() 28 | return p 29 | 30 | def pad_sents(sents, pad_token): 31 | sents_padded = [] 32 | lens = get_lens(sents) 33 | max_len = max(lens) 34 | sents_padded = [sents[i] + [pad_token] * (max_len - l) for i, l in enumerate(lens)] 35 | return sents_padded, lens 36 | 37 | def sort_sents(sents, reverse=True): 38 | sents.sort(key=(lambda s: len(s)), reverse=reverse) 39 | return sents 40 | 41 | def get_mask(sents, unmask_idx=1, mask_idx=0): 42 | lens = get_lens(sents) 43 | max_len = max(lens) 44 | mask = [([unmask_idx] * l + [mask_idx] * (max_len - l)) for l in lens] 45 | return mask 46 | 47 | def get_lens(sents): 48 | return [len(sent) for sent in sents] 49 | 50 | def get_max_len(sents): 51 | max_len = max([len(sent) for sent in sents]) 52 | return max_len 53 | 54 | def truncate_sents(sents, length): 55 | sents = [sent[:length] for sent in sents] 56 | return sents 57 | 58 | def get_loss_weight(labels, label_order): 59 | nums = [np.sum(labels == lo) for lo in label_order] 60 | loss_weight = torch.tensor([n / len(labels) for n in nums]) 61 | return loss_weight 62 | 63 | def capitalize_first_letter(data): 64 | return [word.capitalize() for word in data] 65 | 66 | def cmumosei_round(a): 67 | if a < -2: 68 | res = -3 69 | if -2 <= a and a < -1: 70 | res = -2 71 | if -1 <= a and a < 0: 72 | res = -1 73 | if 0 <= a and a <= 0: 74 | res = 0 75 | if 0 < a and a <= 1: 76 | res = 1 77 | if 1 < a and a <= 2: 78 | res = 2 79 | if a > 2: 80 | res = 3 81 | return res 82 | 83 | # From MTCNN 84 | def fixed_image_standardization(image_tensor: torch.tensor) -> torch.tensor: 85 | processed_tensor = (image_tensor - 127.5) / 128.0 86 | return processed_tensor 87 | 88 | def padTensor(t: torch.tensor, targetLen: int) -> torch.tensor: 89 | oriLen, dim = t.size() 90 | return torch.cat((t, torch.zeros(targetLen - oriLen, dim).to(t.device)), dim=0) 91 | 92 | def calc_percent(x: torch.tensor): 93 | total = np.prod(np.array(x.size())) 94 | positive = x.sum().item() 95 | return positive / total 96 | -------------------------------------------------------------------------------- /dataset_demo/Readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /dataset_demo/Ses01F_impro01.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/dataset_demo/Ses01F_impro01.avi -------------------------------------------------------------------------------- /dataset_demo/Ses01F_impro01.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/dataset_demo/Ses01F_impro01.wav -------------------------------------------------------------------------------- /dataset_demo/Ses01F_impro01_text.txt: -------------------------------------------------------------------------------- 1 | Ses01F_impro01_F000 [006.2901-008.2357]: Excuse me. 2 | Ses01F_impro01_M000 [007.5712-010.4750]: Do you have your forms? 3 | Ses01F_impro01_F001 [010.0100-011.3925]: Yeah. 4 | Ses01F_impro01_M001 [010.9266-014.6649]: Let me see them. 5 | Ses01F_impro01_F002 [014.8872-018.0175]: Is there a problem? 6 | Ses01F_impro01_M002 [016.8352-019.7175]: Who told you to get in this line? 7 | Ses01F_impro01_F003 [019.2900-020.7875]: You did. 8 | Ses01F_impro01_F004 [021.3257-024.7400]: You were standing at the beginning and you directed me. 9 | Ses01F_impro01_M003 [023.4700-028.0300]: Okay. But I didn't tell you to get in this line if you are filling out this particular form. 10 | Ses01F_impro01_F005 [027.4600-031.4900]: Well what's the problem? Let me change it. 11 | Ses01F_impro01_M004 [028.3950-031.2117]: This form is a Z.X.four. 12 | Ses01F_impro01_M005 [031.2660-039.3875]: You can't-- This is not the line for Z.X.four. If you're going to fill out the Z.X.four, you need to have a different form of ID. 13 | Ses01F_impro01_F006 [038.9650-043.5900]: What? I'm getting an ID. This is why I'm here. My wallet was stolen. 14 | Ses01F_impro01_M006 [041.2300-046.9800]: No. I need another set of ID to prove this is actually you. 15 | Ses01F_impro01_F007 [046.5800-052.1900]: How am I supposed to get an ID without an ID? How does a person get an ID in the first place? 16 | Ses01F_impro01_M007 [051.4000-057.6400]: I don't know. But I need an ID to pass this form along. I can't just send it along without an ID. 17 | Ses01F_impro01_F008 [056.1600-058.8225]: I'm here to get an ID. 18 | Ses01F_impro01_M008 [058.1800-062.5900]: No. I need another ID, a separate one. 19 | Ses01F_impro01_F009 [061.8700-065.9700]: Like what? Like a birth certificate? 20 | Ses01F_impro01_M009 [065.5100-073.0000]: A birth certificate, a passport...a student ID; didn't you go to school? Anything? 21 | Ses01F_impro01_F010 [066.4200-069.3400]: Who the hell has a birth certificate? 22 | Ses01F_impro01_F011 [072.4500-082.2600]: Yes but my wallet was stolen, I don't have anything. I don't have any credit cards, I don't have my ID. Don't you have things on file here? 23 | Ses01F_impro01_M010 [081.5900-086.0300]: Yeah. We keep it on file, but we need an ID to access that file. 24 | Ses01F_impro01_F012 [085.2700-088.0200]: That's out of control. 25 | Ses01F_impro01_M011 [087.1500-094.3900]: I don't understand why this is so complicated for people when they get here. It's just a simple form. I just need an ID. 26 | Ses01F_impro01_F013 [093.6700-097.0218]: How long have you been working here? 27 | Ses01F_impro01_M012 [095.8600-098.6800]: Actually too long. 28 | Ses01F_impro01_F014 [097.8900-102.9600]: Clearly. You know, do you have like a supervisor or something? 29 | Ses01F_impro01_M013 [101.8400-107.8700]: Yeah. Do you want to see my supervisor? Huh? Yeah. Do you want to see my supervisor? Fine. I'll be right back. 30 | Ses01F_impro01_F015 [103.9700-106.7100]: That would - I would appreciate that. Yeah. 31 | --------------------------------------------------------------------------------