├── FV2ES
    ├── System
    │   ├── app.py
    │   ├── static
    │   │   ├── Catalina_8.jpg
    │   │   ├── css
    │   │   │   └── main.css
    │   │   └── js
    │   │   │   └── main.js
    │   └── templates
    │   │   ├── base.html
    │   │   └── index1.html
    └── V2EM_prediction
    │   ├── main.py
    │   └── src
    │       ├── attention_block.py
    │       ├── c_e2e.py
    │       ├── cli.py
    │       ├── datasets.py
    │       ├── e2e_t.py
    │       ├── evaluate.py
    │       ├── nestnet
    │           ├── __init__.py
    │           ├── features.py
    │           ├── fx_features.py
    │           ├── helpers.py
    │           ├── hub.py
    │           ├── layers
    │           │   ├── __init__.py
    │           │   ├── activations.py
    │           │   ├── activations_jit.py
    │           │   ├── activations_me.py
    │           │   ├── adaptive_avgmax_pool.py
    │           │   ├── attention_pool2d.py
    │           │   ├── blur_pool.py
    │           │   ├── bottleneck_attn.py
    │           │   ├── cbam.py
    │           │   ├── classifier.py
    │           │   ├── cond_conv2d.py
    │           │   ├── config.py
    │           │   ├── conv2d_same.py
    │           │   ├── conv_bn_act.py
    │           │   ├── create_act.py
    │           │   ├── create_attn.py
    │           │   ├── create_conv2d.py
    │           │   ├── create_norm_act.py
    │           │   ├── drop.py
    │           │   ├── eca.py
    │           │   ├── evo_norm.py
    │           │   ├── gather_excite.py
    │           │   ├── global_context.py
    │           │   ├── halo_attn.py
    │           │   ├── helpers.py
    │           │   ├── inplace_abn.py
    │           │   ├── lambda_layer.py
    │           │   ├── linear.py
    │           │   ├── median_pool.py
    │           │   ├── mixed_conv2d.py
    │           │   ├── mlp.py
    │           │   ├── non_local_attn.py
    │           │   ├── norm.py
    │           │   ├── norm_act.py
    │           │   ├── padding.py
    │           │   ├── patch_embed.py
    │           │   ├── pool2d_same.py
    │           │   ├── selective_kernel.py
    │           │   ├── separable_conv.py
    │           │   ├── space_to_depth.py
    │           │   ├── split_attn.py
    │           │   ├── split_batchnorm.py
    │           │   ├── squeeze_excite.py
    │           │   ├── std_conv.py
    │           │   ├── test_time_pool.py
    │           │   ├── trace_utils.py
    │           │   └── weight_init.py
    │           ├── nest.py
    │           ├── registry.py
    │           └── visualizer.py
    │       ├── se_block.py
    │       ├── trainers
    │           ├── basetrainer.py
    │           └── r_emotiontrainer.py
    │       ├── transformer_encoder.py
    │       ├── utils.py
    │       └── vgg_block.py
├── README.md
├── V2EM
    ├── main.py
    └── src
    │   ├── cli.py
    │   ├── datasets.py
    │   ├── evaluate.py
    │   ├── model
    │       ├── attention_block.py
    │       ├── baselines
    │       │   ├── lf_rnn.py
    │       │   └── lf_transformer.py
    │       ├── c_e2e.py
    │       ├── e2e_t.py
    │       ├── nestnet
    │       │   ├── features.py
    │       │   ├── fx_features.py
    │       │   ├── helpers.py
    │       │   ├── hub.py
    │       │   ├── layers
    │       │   │   ├── __init__.py
    │       │   │   ├── activations.py
    │       │   │   ├── activations_jit.py
    │       │   │   ├── activations_me.py
    │       │   │   ├── adaptive_avgmax_pool.py
    │       │   │   ├── attention_pool2d.py
    │       │   │   ├── blur_pool.py
    │       │   │   ├── bottleneck_attn.py
    │       │   │   ├── cbam.py
    │       │   │   ├── classifier.py
    │       │   │   ├── cond_conv2d.py
    │       │   │   ├── config.py
    │       │   │   ├── conv2d_same.py
    │       │   │   ├── conv_bn_act.py
    │       │   │   ├── create_act.py
    │       │   │   ├── create_attn.py
    │       │   │   ├── create_conv2d.py
    │       │   │   ├── create_norm_act.py
    │       │   │   ├── drop.py
    │       │   │   ├── eca.py
    │       │   │   ├── evo_norm.py
    │       │   │   ├── gather_excite.py
    │       │   │   ├── global_context.py
    │       │   │   ├── halo_attn.py
    │       │   │   ├── helpers.py
    │       │   │   ├── inplace_abn.py
    │       │   │   ├── lambda_layer.py
    │       │   │   ├── linear.py
    │       │   │   ├── median_pool.py
    │       │   │   ├── mixed_conv2d.py
    │       │   │   ├── mlp.py
    │       │   │   ├── non_local_attn.py
    │       │   │   ├── norm.py
    │       │   │   ├── norm_act.py
    │       │   │   ├── padding.py
    │       │   │   ├── patch_embed.py
    │       │   │   ├── pool2d_same.py
    │       │   │   ├── selective_kernel.py
    │       │   │   ├── separable_conv.py
    │       │   │   ├── space_to_depth.py
    │       │   │   ├── split_attn.py
    │       │   │   ├── split_batchnorm.py
    │       │   │   ├── squeeze_excite.py
    │       │   │   ├── std_conv.py
    │       │   │   ├── test_time_pool.py
    │       │   │   ├── trace_utils.py
    │       │   │   └── weight_init.py
    │       │   ├── nest.py
    │       │   └── registry.py
    │       ├── se_block.py
    │       ├── transformer_encoder.py
    │       └── vgg_block.py
    │   ├── trainers
    │       ├── basetrainer.py
    │       └── r_emotiontrainer.py
    │   └── utils.py
└── dataset_demo
    ├── Readme.md
    ├── Ses01F_impro01.avi
    ├── Ses01F_impro01.wav
    ├── Ses01F_impro01_label.txt
    └── Ses01F_impro01_text.txt


/FV2ES/System/app.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import subprocess
 4 | import numpy as np
 5 | import sys
 6 | sys.path.append('../base_iemocap_onetest/main.py')  ## by ling
 7 | # Import Flask
 8 | from flask import Flask, render_template, request, redirect, jsonify, send_from_directory
 9 | from werkzeug.utils import secure_filename  ## Check if the filename is valid
10 | 
11 | # Create Flask instance
12 | app = Flask(__name__)
13 | 
14 | app.config['UPLOAD_FOLDER'] = '../video'  ## Create upload directory
15 | 
16 | 
17 | # The decorator implements route mapping and establishes the association between URL rules and handler functions
18 | # Tell Flask what kind of URL can trigger our function
19 | @app.route('/', methods=['GET'])
20 | def index():
21 |     return render_template('index1.html')
22 | 
23 | # POST trigger
24 | @app.route('/', methods=['POST', 'GET'])
25 | def upload_function():
26 |     # save file
27 |     if request.method == 'POST':
28 |         # f = request.files['file']
29 |         print(request.files.getlist('file'))
30 |         # save
31 |         for f in request.files.getlist('file'):
32 |             f.save(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename)))
33 |         return ('', 204)
34 |     # todo：返回数据
35 | 
36 | 
37 | @app.route('/predict', methods=['POST'])
38 | def pridict_function():
39 |     # call the prediction module and return the prediction result
40 |     cmd = ['python', '../V2EM_prediction/main.py', '--test']
41 |     cmd_result = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, encoding='utf8')
42 |     if cmd_result.returncode == 0:
43 |         print('success')
44 |         # if successful, read result.txt
45 |         emotion_list = []
46 |         with open('../V2ES_prediction/result.txt', 'r') as f:
47 |             for line in f.readlines():
48 |                 line = line.strip('\n')
49 |                 emotion_list.append(line)
50 |         print(emotion_list)
51 | 
52 |         emotion_vector = {'angry': emotion_list[0], 'excited': emotion_list[1], 'frustrated': emotion_list[2],
53 |                       'happy': emotion_list[3], 'neural': emotion_list[4], 'sad': emotion_list[5], }
54 |         data = jsonify(emotion_vector)
55 |         return data, 201, {"ContentType": "application/json"}
56 | 
57 | 
58 | if __name__ == '__main__':
59 |     app.run(debug=True, host='127.0.0.1', port=int(os.environ.get('PORT', 7890)))
60 | 


--------------------------------------------------------------------------------
/FV2ES/System/static/Catalina_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/FV2ES/System/static/Catalina_8.jpg


--------------------------------------------------------------------------------
/FV2ES/System/static/css/main.css:
--------------------------------------------------------------------------------
  1 | body
  2 | {
  3 |     background-image:url('../Catalina_8.jpg');
  4 |     background-size: cover;
  5 | }
  6 | 
  7 | .model_name_box{
  8 |     width: 30px;
  9 |     height: 160px;
 10 |     padding: 5px;
 11 |     font-size: 20px;
 12 |     color:black;
 13 |     word-wrap: break-word;
 14 |     -webkit-user-select:text;
 15 |     float:left;
 16 |     display:inline;
 17 | }
 18 | 
 19 | div.img
 20 |   {
 21 |   margin:3px;
 22 |   height:auto;
 23 |   width:auto;
 24 |   float:left;
 25 |   display:inline;
 26 |   text-align:center;
 27 |   }
 28 | div.img img
 29 |   {
 30 |   display:inline;
 31 |   margin:3px;
 32 |   border:1px solid #bebebe;
 33 |   }
 34 | 
 35 | 
 36 | .img-preview, .img-predict{
 37 |     width: 100%;
 38 |     height: 50%;
 39 |     position: relative;
 40 |     border: 5px solid #F8F8F8;
 41 |     box-shadow: 0px 2px 4px 0px rgba(0, 0, 0, 0.1);
 42 |     margin-top: 1em;
 43 |     margin-bottom: 1em;    
 44 | }
 45 | 
 46 | .img-preview>div, .img-predict>div, .img-predict>img {
 47 |     width: 100%;
 48 |     height: 100%;
 49 |     background-size: 100%;
 50 |     background-repeat: no-repeat;
 51 |     background-position: center;
 52 | }
 53 | 
 54 | #select_parent{
 55 |     align: center;
 56 |     width: 300px;
 57 |     height: 40px;
 58 |     border-radius: 5px;
 59 |     box-shadow: 0 0 5px #ccc;
 60 |     position: relative;
 61 | }
 62 | 
 63 | #select_parent:after{
 64 |     content: "";
 65 |     width: 14px;
 66 |     height: 8px;
 67 |     position: absolute;
 68 |     right: 20px;
 69 |     top: 45%;
 70 |     pointer-events: none;
 71 | }
 72 | 
 73 | 
 74 | #select_model{
 75 |         border: none;
 76 |         outline: none;
 77 |         width: 100%;
 78 |         height: 40px;
 79 |         line-height: 40px;
 80 |         appearance: none;
 81 |         -webkit-appearance: none;
 82 |         -moz-appearance: none;
 83 |         padding-left: 60px;
 84 | }
 85 | 
 86 | .img-preview{
 87 |     float:left;
 88 | }
 89 | 
 90 | .img-predict{
 91 |     float: right;
 92 | }
 93 | 
 94 | input[type="file"] {
 95 |     display: none;
 96 | }
 97 | 
 98 | 
 99 | input[id="submit-button"] {
100 |     display: none;
101 | }
102 | 
103 | 
104 | .upload-label{
105 |     display: inline-block;
106 |     padding: 12px 30px;
107 |     background: #ffc107;
108 |     color: #fff;
109 |     font-size: 1em;
110 |     transition: all .4s;
111 |     cursor: pointer;
112 | }
113 | 
114 | .upload-label:hover{
115 |     background: #34495E;
116 |     color: #39D2B4;
117 | }
118 | 
119 | 
120 | 
121 | .loader {
122 |     border: 8px solid #f3f3f3; /* Light grey */
123 |     border-top: 8px solid #3498db; /* Blue */
124 |     border-radius: 50%;
125 |     width: 50px;
126 |     height: 50px;
127 |     animation: spin 1s linear infinite;
128 | }
129 | 
130 | @keyframes spin {
131 |     0% { transform: rotate(0deg); }
132 |     100% { transform: rotate(360deg); }
133 | }
134 | 
135 | /*进度条框架*/
136 | .barcontainer{
137 |    width:600px;
138 |    height:25px;
139 |    border:1px solid #708090;
140 |    height:25px;
141 |  }
142 | #angrybar{
143 |    background:#FFA500;
144 |    float:left;
145 |    height:100%;
146 |    text-align:center;
147 |    line-height:150%;
148 |    color:#ffffff;
149 |  }
150 | #excitedbar{
151 |    background:#40E0D0;
152 |    float:left;
153 |    height:100%;
154 |    text-align:center;
155 |    line-height:150%;
156 |    color: #ffffff;
157 |  }
158 |  #frustratedbar{
159 |    background:#98FB98;
160 |    float:left;
161 |    height:100%;
162 |    text-align:center;
163 |    line-height:150%;
164 |    color: #ffffff;
165 |  }
166 |  #happybar{
167 |    background:#FF69B4;
168 |    float:left;
169 |    height:100%;
170 |    text-align:center;
171 |    line-height:150%;
172 |    color: #ffffff;
173 |  }
174 |  #neuralbar{
175 |    background:#808000;
176 |    float:left;
177 |    height:100%;
178 |    text-align:center;
179 |    line-height:150%;
180 |    color: #ffffff;
181 |  }
182 |  #sadbar{
183 |    background:#FFCC33;
184 |    float:left;
185 |    height:100%;
186 |    text-align:center;
187 |    line-height:150%;
188 |    color: #ffffff;
189 |  }


--------------------------------------------------------------------------------
/FV2ES/System/templates/base.html:
--------------------------------------------------------------------------------
 1 | <html lang="en">
 2 | 
 3 | <head>
 4 |     <meta charset="UTF-8">
 5 |     <meta name="viewport" content="width=device-width, initial-scale=1.0">
 6 |     <meta http-equiv="X-UA-Compatible" content="ie=edge">
 7 |     <title>Emotion Analysis</title>
 8 |     <link href="https://cdn.bootcss.com/bootstrap/4.0.0/css/bootstrap.min.css" rel="stylesheet">
 9 |     <script src="https://cdn.bootcss.com/popper.js/1.12.9/umd/popper.min.js"></script>
10 |     <script src="https://cdn.bootcss.com/jquery/3.3.1/jquery.min.js"></script>
11 |     <script src="https://cdn.bootcss.com/bootstrap/4.0.0/js/bootstrap.min.js"></script>
12 |     <link href="{{ url_for('static', filename='css/main.css') }}" rel="stylesheet">
13 |     <link href="https://vjs.zencdn.net/7.14.3/video-js.css" rel="stylesheet" />
14 |     <script src="https://vjs.zencdn.net/7.14.3/video.min.js"></script>
15 | </head>
16 | 
17 | <body>
18 |     <nav class="navbar navbar-dark bg-dark">
19 |         <div class="container">
20 |             <a class="navbar-brand" href="#">Multimedia Processing Platform</a>
21 |             <button class="btn btn-outline-secondary my-2 my-sm-0" type="submit">Menu</button>
22 |         </div>
23 |     </nav>
24 |     <div class="container">
25 |         <div id="content" style="margin-top:2em">{% block content %}{% endblock %}</div>
26 |     </div>
27 | </body>
28 | 
29 | <footer>
30 |     <script src="{{ url_for('static', filename='js/main.js') }}" type="text/javascript"></script>    
31 | </footer>
32 | 
33 | </html>


--------------------------------------------------------------------------------
/FV2ES/System/templates/index1.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %} {% block content %}
 2 | 
 3 | <h2 style="color:white;" xmlns="http://www.w3.org/1999/html">Upload Your Video, Audio, Text and Label here</h2>
 4 | <body style="color:white;font-size:4px">Tip: You need to submit video (mp4 and avi), audio (wav), text (txt) and divided files (txt)</body>
 5 | 
 6 | <div>
 7 |     <form  id="upload-file" method="post" enctype="multipart/form-data"><!--upload video-->
 8 | <!--        action="http://localhost:5000/uploader"-->
 9 |         <label for="imageUpload" class="upload-label"><!--upload trigger-->
10 |             CHOOSE
11 |         </label>
12 |         <input type="file" name="file" id="imageUpload" multiple="multiple">
13 | 
14 |         <label for="submit-button" class="upload-label">
15 |             SUBMIT
16 |         </label>
17 |         <input type="submit" value="提交" id="submit-button">
18 | 
19 |     </form>
20 |     <div class="no-style"></div>
21 |     <div class="image-section" style="display:incline;"><!--preview video-->
22 | 
23 |         <div class="img-preview">
24 |             <video  style="width:100%;height:100%;object-fit:fill" src="" id="videoPreview" controls="controls"></video>
25 |         </div>
26 |         <div style="clear:both;"></div>
27 |         <div>
28 |             <button type="button" class="btn btn-primary btn-lg " id="btn-predict">PREIDICT</button>
29 |         </div>
30 |     </div>
31 | 
32 |     <div class="emotionbar" style="display:none;">
33 | 
34 |         <div id='angry' style="display:inline;color:#FFA500">angry:</div>
35 |         <div class="barcontainer">
36 |             <div id="angrybar" style="width:0%;"></div>
37 |         </div>
38 |         </br>
39 |         <div id='excited' style="display:inline;color:#40E0D0">excited:</div>
40 |         <div class="barcontainer">
41 |             <div id="excitedbar" style="width:0%;"></div>
42 |         </div>
43 |         </br>
44 |         <div id='frustrated' style="display:inline;color:#98FB98">frustrated:</div>
45 |         <div class="barcontainer">
46 |             <div id="frustratedbar" style="width:0%;"></div>
47 |         </div>
48 |         </br>
49 |         <div id='happy' style="display:inline;color:#FF69B4">happy:</div>
50 |         <div class="barcontainer">
51 |             <div id="happybar" style="width:0%;"></div>
52 |         </div>
53 |         </br>
54 |         <div id='neural' style="display:inline;color:#808000">neural:</div>
55 |         <div class="barcontainer">
56 |             <div id="neuralbar" style="width:0%;"></div>
57 |         </div>
58 |         </br>
59 |         <div id='sad' style="display:inline;color:#FFCC33">sad:</div>
60 |         <div class="barcontainer">
61 |             <div id="sadbar" style="width:0%;"></div>
62 |         </div>
63 |     </div>
64 | 
65 | 
66 | 
67 | </div>
68 | 
69 | {% endblock %}
70 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import time
 3 | import torch
 4 | import numpy as np
 5 | from torch.utils.data import DataLoader
 6 | from src.cli import get_args
 7 | from src.datasets import get_dataset_iemocap, collate_fn
 8 | 
 9 | from src.models.c_e2e import MME2E
10 | 
11 | from src.trainers.r_emotiontrainer import IemocapTrainer
12 | 
13 | import sys
14 | 
15 | if __name__ == "__main__":
16 |     start = time.time()
17 | 
18 |     sys.argv=['main.py', '-lr=4.5e-6', '-ep=40', '-mod=tav', '-bs=1','--img-interval=500', '--early-stop=6', '--loss=bce', '--cuda=0', '--model=mme2e', '--num-emotions=6','--trans-dim=64', '--trans-nlayers=4', '--trans-nheads=4','--text-lr-factor=10','--text-model-size=base','--text-max-len=100','--test','--datapath=../IEMOCAP_PREPROCESS_10']
19 | 
20 | 
21 | 
22 |     args = get_args()
23 | 
24 |     # Fix seed for reproducibility
25 |     seed = args['seed']
26 |     torch.manual_seed(seed)
27 |     np.random.seed(seed)
28 |     torch.backends.cudnn.deterministic = True
29 |     torch.backends.cudnn.benchmark = False
30 | 
31 |     # Set device
32 |     os.environ["CUDA_VISIBLE_DEVICES"] = args['cuda']
33 |     device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
34 | 
35 |     #input data
36 |     test_dataset = get_dataset_iemocap(data_folder=args['datapath'], phase='test',
37 |                                            img_interval=args['img_interval'], hand_crafted_features=args['hand_crafted'])
38 |     test_loader = DataLoader(test_dataset, batch_size=args['batch_size'], shuffle=False,
39 |                                      num_workers=0, collate_fn=collate_fn)
40 | 
41 |     dataloaders = {
42 |         'test': test_loader
43 |     }
44 | 
45 |     lr = args['learning_rate']
46 |     if args['model'] == 'mme2e':
47 |         model = MME2E(args=args, device=device)
48 |         model = model.to(device=device)
49 | 
50 |         # When using a pre-trained text modal, you can use text_lr_factor to give a smaller leraning rate to the textual model parts
51 |         if args['text_lr_factor'] == 1:
52 |             optimizer = torch.optim.Adam(model.parameters(), lr=args['learning_rate'], weight_decay=args['weight_decay'])
53 |         else:
54 |             optimizer = torch.optim.Adam([
55 |                 {'params': model.T.parameters(), 'lr': lr / args['text_lr_factor']},
56 |                 {'params': model.t_out.parameters(), 'lr': lr / args['text_lr_factor']},
57 |                 {'params': model.V.parameters()},
58 |                 {'params': model.v_flatten.parameters()},
59 |                 {'params': model.v_transformer.parameters()},
60 |                 {'params': model.v_out.parameters()},
61 |                 {'params': model.A.parameters()},
62 |                 {'params': model.a_flatten.parameters()},
63 |                 {'params': model.a_transformer.parameters()},
64 |                 {'params': model.a_out.parameters()},
65 |                 {'params': model.weighted_fusion.parameters()},
66 |             ], lr=lr, weight_decay=args['weight_decay'])
67 | 
68 |     checkpoint = torch.load("./savings/models/mme2e_tav_Acc_0.8477_F1_0.5857_AUC_0.8747_imginvl500_seed0.pt", map_location='cuda:0')
69 |     model.load_state_dict(checkpoint,False) # load best model(when valid and test add,while when train must //this sentence)
70 |     
71 |     scheduler=None
72 |     criterion=None
73 | 
74 |     trainer = IemocapTrainer(args, model, criterion,optimizer,scheduler, device, dataloaders)
75 | 
76 |     trainer.test()
77 | 
78 | 
79 |     end = time.time()
80 | 
81 |     print(f'Total time usage = {(end - start) :.2f} seconds.')
82 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/attention_block.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | from typing import List
  5 | 
  6 | 
  7 | class CrossModalAttentionLayer(nn.Module):
  8 |     # y attends x
  9 |     def __init__(self, k, x_channels: int, y_size: int, spatial=True):
 10 |         super(CrossModalAttentionLayer, self).__init__()
 11 |         self.k = k
 12 |         self.spatial = spatial
 13 | 
 14 |         if spatial:
 15 |             self.channel_affine = nn.Linear(x_channels, k)
 16 | 
 17 |         self.y_affine = nn.Linear(y_size, k, bias=False)
 18 |         self.attn_weight_affine = nn.Linear(k, 1)
 19 | 
 20 |     def forward(self, x: List[torch.Tensor], x_lens: List[int], y: torch.Tensor):
 21 |         # x -> [(S, C, H, W)], len(x) = bs
 22 |         # y -> (bs, D)
 23 | 
 24 |         bs = y.size(0)
 25 |         x = x.split(x_lens, dim=0)
 26 |         y_k = self.y_affine(y) # (bs, k)
 27 | 
 28 |         all_spatial_attn_weights_softmax = []
 29 | 
 30 |         for i in range(bs):
 31 |             if self.spatial:
 32 |                 x_tensor = x[i].permute(0, 2, 3, 1) # (S_v, H_v, W_v, C_v)
 33 |                 x_k = self.channel_affine(x_tensor) # (S_v, H_v, W_v, k)
 34 |                 x_k += y_k[i]
 35 |                 x_k = torch.tanh(x_k)
 36 |                 x_attn_weights = self.attn_weight_affine(x_k).squeeze(-1) # (S_v, H_v, W_v)
 37 | 
 38 |                 all_spatial_attn_weights_softmax.append(
 39 |                     F.softmax(
 40 |                         x_attn_weights.reshape(x_tensor.size(0), -1),
 41 |                         dim=-1
 42 |                     ).reshape(x_tensor.size(0), x_tensor.size(1), x_tensor.size(2)) # (S_v, H_v, W_v)
 43 |                 )
 44 | 
 45 |         return torch.cat(all_spatial_attn_weights_softmax, dim=0)
 46 | 
 47 | class SparseCrossModalAttentionLayer(nn.Module):
 48 |     def __init__(self, k: int, x_channels: int, y_size: int, sparse_threshold: float):
 49 |         super(SparseCrossModalAttentionLayer, self).__init__()
 50 |         self.k = k
 51 |         self.sparse_threshold = sparse_threshold
 52 |         self.channel_affine = nn.Linear(x_channels, k)
 53 |         self.y_affine = nn.Linear(y_size, k, bias=False)
 54 |         self.attn_weight_affine = nn.Linear(k, 1)
 55 | 
 56 |     def forward(self, x: List[torch.Tensor], x_lens: List[int], locations: List[torch.Tensor], y: torch.Tensor):
 57 |         # x -> (N, C)
 58 |         # locations -> (N, 3)
 59 |         # y -> (bs, D)
 60 |         bs = y.size(0)
 61 |         y_k = self.y_affine(y) # (bs, k)
 62 |         x_k = self.channel_affine(x) # (N, k)
 63 | 
 64 |         sample_points_lens = []
 65 |         for i in range(sum(x_lens)):
 66 |             sample_points_lens.append(len(locations[locations[:, 2] == i]))
 67 | 
 68 |         # how much points are left in each batch
 69 |         batch_points_lens = []
 70 |         pointer = 0
 71 |         for l in x_lens:
 72 |             batch_points_lens.append(sum(sample_points_lens[pointer:(pointer + l)]))
 73 |             pointer += l
 74 | 
 75 |         x_ks = x_k.split(batch_points_lens, dim=0)
 76 | 
 77 |         attn_weights = []
 78 |         for i in range(bs):
 79 |             this_weights = self.attn_weight_affine(torch.tanh(x_ks[i] + y_k[i])).squeeze(-1)
 80 |             attn_weights.append(this_weights)
 81 | 
 82 |         attn_weights = torch.cat(attn_weights, dim=0)
 83 |         attn_weights_split = list(attn_weights.split(sample_points_lens, dim=0))
 84 |         attn_weights_split = [F.softmax(a, dim=-1) for a in attn_weights_split]
 85 |         attn_weights = torch.cat(attn_weights_split, dim=0)
 86 | 
 87 |         attn_weights_sparse = to_sparse_by_cdf(attn_weights, sample_points_lens, self.sparse_threshold)
 88 | 
 89 |         select_indices = attn_weights_sparse == 1
 90 |         new_x = x[select_indices, :]
 91 |         new_locations = locations[select_indices, :]
 92 | 
 93 |         return new_x, new_locations, None
 94 | 
 95 | def to_sparse_by_cdf(t: torch.tensor, lens, cdf: float):
 96 |     _t = t.clone().detach()
 97 |     _t = list(_t.split(lens, dim=0))
 98 | 
 99 |     for i, this_t in enumerate(_t):
100 |         this_t_sorted, indices = torch.sort(this_t, descending=True)
101 |         mask = torch.cumsum(this_t_sorted, dim=-1) < cdf
102 |         mask[torch.sum(mask)] = True
103 |         _t[i][indices[mask]] = 1
104 |         _t[i][indices[~mask]] = 0
105 | 
106 |     return torch.cat(_t, dim=0).long()
107 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/e2e_t.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from transformers import AlbertModel
 3 | 
 4 | class MME2E_T(nn.Module):
 5 |     def __init__(self, feature_dim, num_classes=4, size='base'):
 6 |         super(MME2E_T, self).__init__()
 7 |         self.albert = AlbertModel.from_pretrained(f'albert-{size}-v2')
 8 |         # self.albert = AlbertModel.from_pretrained('./src/models/albert-base-v2')
 9 | 
10 |     def forward(self, text, get_cls=False):
11 |         last_hidden_state = self.albert(**text).last_hidden_state
12 |         if get_cls:
13 |             cls_feature = last_hidden_state[:,0]
14 |             return cls_feature
15 | 
16 |         text_features = self.text_feature_affine(last_hidden_state).sum(1)
17 |         return text_features
18 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/FV2ES/V2EM_prediction/src/nestnet/__init__.py


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/fx_features.py:
--------------------------------------------------------------------------------
 1 | """ PyTorch FX Based Feature Extraction Helpers
 2 | Using https://pytorch.org/vision/stable/feature_extraction.html
 3 | """
 4 | from typing import Callable
 5 | from torch import nn
 6 | 
 7 | from .features import _get_feature_info
 8 | 
 9 | try:
10 |     from torchvision.models.feature_extraction import create_feature_extractor
11 |     has_fx_feature_extraction = True
12 | except ImportError:
13 |     has_fx_feature_extraction = False
14 | 
15 | # Layers we went to treat as leaf modules
16 | from .layers import Conv2dSame, ScaledStdConv2dSame, BatchNormAct2d, BlurPool2d, CondConv2d, StdConv2dSame, DropPath
17 | from .layers.non_local_attn import BilinearAttnTransform
18 | from .layers.pool2d_same import MaxPool2dSame, AvgPool2dSame
19 | 
20 | # NOTE: By default, any modules from timm.models.layers that we want to treat as leaf modules go here
21 | # BUT modules from timm.models should use the registration mechanism below
22 | _leaf_modules = {
23 |     BatchNormAct2d,  # reason: flow control for jit scripting
24 |     BilinearAttnTransform,  # reason: flow control t <= 1
25 |     BlurPool2d,  # reason: TypeError: F.conv2d received Proxy in groups=x.shape[1]
26 |     # Reason: get_same_padding has a max which raises a control flow error
27 |     Conv2dSame, MaxPool2dSame,  ScaledStdConv2dSame, StdConv2dSame, AvgPool2dSame,
28 |     CondConv2d,  # reason: TypeError: F.conv2d received Proxy in groups=self.groups * B (because B = x.shape[0])
29 |     DropPath,  # reason: TypeError: rand recieved Proxy in `size` argument
30 | }
31 | 
32 | try:
33 |     from .layers import InplaceAbn
34 |     _leaf_modules.add(InplaceAbn)
35 | except ImportError:
36 |     pass
37 | 
38 | 
39 | def register_notrace_module(module: nn.Module):
40 |     """
41 |     Any module not under timm.models.layers should get this decorator if we don't want to trace through it.
42 |     """
43 |     _leaf_modules.add(module)
44 |     return module
45 | 
46 | 
47 | # Functions we want to autowrap (treat them as leaves)
48 | _autowrap_functions = set()
49 | 
50 | 
51 | def register_notrace_function(func: Callable):
52 |     """
53 |     Decorator for functions which ought not to be traced through
54 |     """
55 |     _autowrap_functions.add(func)
56 |     return func
57 | 
58 | 
59 | class FeatureGraphNet(nn.Module):
60 |     def __init__(self, model, out_indices, out_map=None):
61 |         super().__init__()
62 |         assert has_fx_feature_extraction, 'Please update to PyTorch 1.10+, torchvision 0.11+ for FX feature extraction'
63 |         self.feature_info = _get_feature_info(model, out_indices)
64 |         if out_map is not None:
65 |             assert len(out_map) == len(out_indices)
66 |         return_nodes = {info['module']: out_map[i] if out_map is not None else info['module']
67 |                         for i, info in enumerate(self.feature_info) if i in out_indices}
68 |         self.graph_module = create_feature_extractor(
69 |             model, return_nodes,
70 |             tracer_kwargs={'leaf_modules': list(_leaf_modules), 'autowrap_functions': list(_autowrap_functions)})
71 | 
72 |     def forward(self, x):
73 |         return list(self.graph_module(x).values())


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .activations import *
 2 | from .adaptive_avgmax_pool import \
 3 |     adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d
 4 | from .blur_pool import BlurPool2d
 5 | from .classifier import ClassifierHead, create_classifier
 6 | from .cond_conv2d import CondConv2d, get_condconv_initializer
 7 | from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\
 8 |     set_layer_config
 9 | from .conv2d_same import Conv2dSame, conv2d_same
10 | from .conv_bn_act import ConvBnAct
11 | from .create_act import create_act_layer, get_act_layer, get_act_fn
12 | from .create_attn import get_attn, create_attn
13 | from .create_conv2d import create_conv2d
14 | from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act
15 | from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path
16 | from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn
17 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d
18 | from .gather_excite import GatherExcite
19 | from .global_context import GlobalContext
20 | from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible
21 | from .inplace_abn import InplaceAbn
22 | from .linear import Linear
23 | from .mixed_conv2d import MixedConv2d
24 | from .mlp import Mlp, GluMlp, GatedMlp, ConvMlp
25 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn
26 | from .norm import GroupNorm, LayerNorm2d
27 | from .norm_act import BatchNormAct2d, GroupNormAct
28 | from .padding import get_padding, get_same_padding, pad_same
29 | from .patch_embed import PatchEmbed
30 | from .pool2d_same import AvgPool2dSame, create_pool2d
31 | from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite
32 | from .selective_kernel import SelectiveKernel
33 | from .separable_conv import SeparableConv2d, SeparableConvBnAct
34 | from .space_to_depth import SpaceToDepthModule
35 | from .split_attn import SplitAttn
36 | from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model
37 | from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame
38 | from .test_time_pool import TestTimePoolHead, apply_test_time_pool
39 | from .trace_utils import _assert, _float_to_int
40 | from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_
41 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/activations_jit.py:
--------------------------------------------------------------------------------
 1 | """ Activations
 2 | 
 3 | A collection of jit-scripted activations fn and modules with a common interface so that they can
 4 | easily be swapped. All have an `inplace` arg even if not used.
 5 | 
 6 | All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not
 7 | currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
 8 | versions if they contain in-place ops.
 9 | 
10 | Hacked together by / Copyright 2020 Ross Wightman
11 | """
12 | 
13 | import torch
14 | from torch import nn as nn
15 | from torch.nn import functional as F
16 | 
17 | 
18 | @torch.jit.script
19 | def swish_jit(x, inplace: bool = False):
20 |     """Swish - Described in: https://arxiv.org/abs/1710.05941
21 |     """
22 |     return x.mul(x.sigmoid())
23 | 
24 | 
25 | @torch.jit.script
26 | def mish_jit(x, _inplace: bool = False):
27 |     """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
28 |     """
29 |     return x.mul(F.softplus(x).tanh())
30 | 
31 | 
32 | class SwishJit(nn.Module):
33 |     def __init__(self, inplace: bool = False):
34 |         super(SwishJit, self).__init__()
35 | 
36 |     def forward(self, x):
37 |         return swish_jit(x)
38 | 
39 | 
40 | class MishJit(nn.Module):
41 |     def __init__(self, inplace: bool = False):
42 |         super(MishJit, self).__init__()
43 | 
44 |     def forward(self, x):
45 |         return mish_jit(x)
46 | 
47 | 
48 | @torch.jit.script
49 | def hard_sigmoid_jit(x, inplace: bool = False):
50 |     # return F.relu6(x + 3.) / 6.
51 |     return (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
52 | 
53 | 
54 | class HardSigmoidJit(nn.Module):
55 |     def __init__(self, inplace: bool = False):
56 |         super(HardSigmoidJit, self).__init__()
57 | 
58 |     def forward(self, x):
59 |         return hard_sigmoid_jit(x)
60 | 
61 | 
62 | @torch.jit.script
63 | def hard_swish_jit(x, inplace: bool = False):
64 |     # return x * (F.relu6(x + 3.) / 6)
65 |     return x * (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
66 | 
67 | 
68 | class HardSwishJit(nn.Module):
69 |     def __init__(self, inplace: bool = False):
70 |         super(HardSwishJit, self).__init__()
71 | 
72 |     def forward(self, x):
73 |         return hard_swish_jit(x)
74 | 
75 | 
76 | @torch.jit.script
77 | def hard_mish_jit(x, inplace: bool = False):
78 |     """ Hard Mish
79 |     Experimental, based on notes by Mish author Diganta Misra at
80 |       https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
81 |     """
82 |     return 0.5 * x * (x + 2).clamp(min=0, max=2)
83 | 
84 | 
85 | class HardMishJit(nn.Module):
86 |     def __init__(self, inplace: bool = False):
87 |         super(HardMishJit, self).__init__()
88 | 
89 |     def forward(self, x):
90 |         return hard_mish_jit(x)
91 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/blur_pool.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BlurPool layer inspired by
 3 |  - Kornia's Max_BlurPool2d
 4 |  - Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar`
 5 | 
 6 | Hacked together by Chris Ha and Ross Wightman
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import numpy as np
13 | from .padding import get_padding
14 | 
15 | 
16 | class BlurPool2d(nn.Module):
17 |     r"""Creates a module that computes blurs and downsample a given feature map.
18 |     See :cite:`zhang2019shiftinvar` for more details.
19 |     Corresponds to the Downsample class, which does blurring and subsampling
20 | 
21 |     Args:
22 |         channels = Number of input channels
23 |         filt_size (int): binomial filter size for blurring. currently supports 3 (default) and 5.
24 |         stride (int): downsampling filter stride
25 | 
26 |     Returns:
27 |         torch.Tensor: the transformed tensor.
28 |     """
29 |     def __init__(self, channels, filt_size=3, stride=2) -> None:
30 |         super(BlurPool2d, self).__init__()
31 |         assert filt_size > 1
32 |         self.channels = channels
33 |         self.filt_size = filt_size
34 |         self.stride = stride
35 |         self.padding = [get_padding(filt_size, stride, dilation=1)] * 4
36 |         coeffs = torch.tensor((np.poly1d((0.5, 0.5)) ** (self.filt_size - 1)).coeffs.astype(np.float32))
37 |         blur_filter = (coeffs[:, None] * coeffs[None, :])[None, None, :, :].repeat(self.channels, 1, 1, 1)
38 |         self.register_buffer('filt', blur_filter, persistent=False)
39 | 
40 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
41 |         x = F.pad(x, self.padding, 'reflect')
42 |         return F.conv2d(x, self.filt, stride=self.stride, groups=x.shape[1])
43 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/classifier.py:
--------------------------------------------------------------------------------
 1 | """ Classifier head and layer factory
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from torch import nn as nn
 6 | from torch.nn import functional as F
 7 | 
 8 | from .adaptive_avgmax_pool import SelectAdaptivePool2d
 9 | 
10 | 
11 | def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False):
12 |     flatten_in_pool = not use_conv  # flatten when we use a Linear layer after pooling
13 |     if not pool_type:
14 |         assert num_classes == 0 or use_conv,\
15 |             'Pooling can only be disabled if classifier is also removed or conv classifier is used'
16 |         flatten_in_pool = False  # disable flattening if pooling is pass-through (no pooling)
17 |     global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool)
18 |     num_pooled_features = num_features * global_pool.feat_mult()
19 |     return global_pool, num_pooled_features
20 | 
21 | 
22 | def _create_fc(num_features, num_classes, use_conv=False):
23 |     if num_classes <= 0:
24 |         fc = nn.Identity()  # pass-through (no classifier)
25 |     elif use_conv:
26 |         fc = nn.Conv2d(num_features, num_classes, 1, bias=True)
27 |     else:
28 |         fc = nn.Linear(num_features, num_classes, bias=True)
29 |     return fc
30 | 
31 | 
32 | def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False):
33 |     global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv)
34 |     fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
35 |     return global_pool, fc
36 | 
37 | 
38 | class ClassifierHead(nn.Module):
39 |     """Classifier head w/ configurable global pooling and dropout."""
40 | 
41 |     def __init__(self, in_chs, num_classes, pool_type='avg', drop_rate=0., use_conv=False):
42 |         super(ClassifierHead, self).__init__()
43 |         self.drop_rate = drop_rate
44 |         self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv)
45 |         self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
46 |         self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity()
47 | 
48 |     def forward(self, x):
49 |         x = self.global_pool(x)
50 |         if self.drop_rate:
51 |             x = F.dropout(x, p=float(self.drop_rate), training=self.training)
52 |         x = self.fc(x)
53 |         x = self.flatten(x)
54 |         return x
55 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/config.py:
--------------------------------------------------------------------------------
  1 | """ Model / Layer Config singleton state
  2 | """
  3 | from typing import Any, Optional
  4 | 
  5 | __all__ = [
  6 |     'is_exportable', 'is_scriptable', 'is_no_jit',
  7 |     'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config'
  8 | ]
  9 | 
 10 | # Set to True if prefer to have layers with no jit optimization (includes activations)
 11 | _NO_JIT = False
 12 | 
 13 | # Set to True if prefer to have activation layers with no jit optimization
 14 | # NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying
 15 | # the jit flags so far are activations. This will change as more layers are updated and/or added.
 16 | _NO_ACTIVATION_JIT = False
 17 | 
 18 | # Set to True if exporting a model with Same padding via ONNX
 19 | _EXPORTABLE = False
 20 | 
 21 | # Set to True if wanting to use torch.jit.script on a model
 22 | _SCRIPTABLE = False
 23 | 
 24 | 
 25 | def is_no_jit():
 26 |     return _NO_JIT
 27 | 
 28 | 
 29 | class set_no_jit:
 30 |     def __init__(self, mode: bool) -> None:
 31 |         global _NO_JIT
 32 |         self.prev = _NO_JIT
 33 |         _NO_JIT = mode
 34 | 
 35 |     def __enter__(self) -> None:
 36 |         pass
 37 | 
 38 |     def __exit__(self, *args: Any) -> bool:
 39 |         global _NO_JIT
 40 |         _NO_JIT = self.prev
 41 |         return False
 42 | 
 43 | 
 44 | def is_exportable():
 45 |     return _EXPORTABLE
 46 | 
 47 | 
 48 | class set_exportable:
 49 |     def __init__(self, mode: bool) -> None:
 50 |         global _EXPORTABLE
 51 |         self.prev = _EXPORTABLE
 52 |         _EXPORTABLE = mode
 53 | 
 54 |     def __enter__(self) -> None:
 55 |         pass
 56 | 
 57 |     def __exit__(self, *args: Any) -> bool:
 58 |         global _EXPORTABLE
 59 |         _EXPORTABLE = self.prev
 60 |         return False
 61 | 
 62 | 
 63 | def is_scriptable():
 64 |     return _SCRIPTABLE
 65 | 
 66 | 
 67 | class set_scriptable:
 68 |     def __init__(self, mode: bool) -> None:
 69 |         global _SCRIPTABLE
 70 |         self.prev = _SCRIPTABLE
 71 |         _SCRIPTABLE = mode
 72 | 
 73 |     def __enter__(self) -> None:
 74 |         pass
 75 | 
 76 |     def __exit__(self, *args: Any) -> bool:
 77 |         global _SCRIPTABLE
 78 |         _SCRIPTABLE = self.prev
 79 |         return False
 80 | 
 81 | 
 82 | class set_layer_config:
 83 |     """ Layer config context manager that allows setting all layer config flags at once.
 84 |     If a flag arg is None, it will not change the current value.
 85 |     """
 86 |     def __init__(
 87 |             self,
 88 |             scriptable: Optional[bool] = None,
 89 |             exportable: Optional[bool] = None,
 90 |             no_jit: Optional[bool] = None,
 91 |             no_activation_jit: Optional[bool] = None):
 92 |         global _SCRIPTABLE
 93 |         global _EXPORTABLE
 94 |         global _NO_JIT
 95 |         global _NO_ACTIVATION_JIT
 96 |         self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT
 97 |         if scriptable is not None:
 98 |             _SCRIPTABLE = scriptable
 99 |         if exportable is not None:
100 |             _EXPORTABLE = exportable
101 |         if no_jit is not None:
102 |             _NO_JIT = no_jit
103 |         if no_activation_jit is not None:
104 |             _NO_ACTIVATION_JIT = no_activation_jit
105 | 
106 |     def __enter__(self) -> None:
107 |         pass
108 | 
109 |     def __exit__(self, *args: Any) -> bool:
110 |         global _SCRIPTABLE
111 |         global _EXPORTABLE
112 |         global _NO_JIT
113 |         global _NO_ACTIVATION_JIT
114 |         _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev
115 |         return False
116 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/conv2d_same.py:
--------------------------------------------------------------------------------
 1 | """ Conv2d w/ Same Padding
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from typing import Tuple, Optional
 9 | 
10 | from .padding import pad_same, get_padding_value
11 | 
12 | 
13 | def conv2d_same(
14 |         x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1),
15 |         padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1):
16 |     x = pad_same(x, weight.shape[-2:], stride, dilation)
17 |     return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups)
18 | 
19 | 
20 | class Conv2dSame(nn.Conv2d):
21 |     """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions
22 |     """
23 | 
24 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
25 |                  padding=0, dilation=1, groups=1, bias=True):
26 |         super(Conv2dSame, self).__init__(
27 |             in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
28 | 
29 |     def forward(self, x):
30 |         return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
31 | 
32 | 
33 | def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs):
34 |     padding = kwargs.pop('padding', '')
35 |     kwargs.setdefault('bias', False)
36 |     padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs)
37 |     if is_dynamic:
38 |         return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs)
39 |     else:
40 |         return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/conv_bn_act.py:
--------------------------------------------------------------------------------
 1 | """ Conv2d + BN + Act
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from torch import nn as nn
 6 | 
 7 | from .create_conv2d import create_conv2d
 8 | from .create_norm_act import convert_norm_act
 9 | 
10 | 
11 | class ConvBnAct(nn.Module):
12 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1,
13 |                  bias=False, apply_act=True, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, aa_layer=None,
14 |                  drop_block=None):
15 |         super(ConvBnAct, self).__init__()
16 |         use_aa = aa_layer is not None
17 | 
18 |         self.conv = create_conv2d(
19 |             in_channels, out_channels, kernel_size, stride=1 if use_aa else stride,
20 |             padding=padding, dilation=dilation, groups=groups, bias=bias)
21 | 
22 |         # NOTE for backwards compatibility with models that use separate norm and act layer definitions
23 |         norm_act_layer = convert_norm_act(norm_layer, act_layer)
24 |         self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block)
25 |         self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None
26 | 
27 |     @property
28 |     def in_channels(self):
29 |         return self.conv.in_channels
30 | 
31 |     @property
32 |     def out_channels(self):
33 |         return self.conv.out_channels
34 | 
35 |     def forward(self, x):
36 |         x = self.conv(x)
37 |         x = self.bn(x)
38 |         if self.aa is not None:
39 |             x = self.aa(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/create_attn.py:
--------------------------------------------------------------------------------
 1 | """ Attention Factory
 2 | 
 3 | Hacked together by / Copyright 2021 Ross Wightman
 4 | """
 5 | import torch
 6 | from functools import partial
 7 | 
 8 | from .bottleneck_attn import BottleneckAttn
 9 | from .cbam import CbamModule, LightCbamModule
10 | from .eca import EcaModule, CecaModule
11 | from .gather_excite import GatherExcite
12 | from .global_context import GlobalContext
13 | from .halo_attn import HaloAttn
14 | from .lambda_layer import LambdaLayer
15 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn
16 | from .selective_kernel import SelectiveKernel
17 | from .split_attn import SplitAttn
18 | from .squeeze_excite import SEModule, EffectiveSEModule
19 | 
20 | 
21 | def get_attn(attn_type):
22 |     if isinstance(attn_type, torch.nn.Module):
23 |         return attn_type
24 |     module_cls = None
25 |     if attn_type is not None:
26 |         if isinstance(attn_type, str):
27 |             attn_type = attn_type.lower()
28 |             # Lightweight attention modules (channel and/or coarse spatial).
29 |             # Typically added to existing network architecture blocks in addition to existing convolutions.
30 |             if attn_type == 'se':
31 |                 module_cls = SEModule
32 |             elif attn_type == 'ese':
33 |                 module_cls = EffectiveSEModule
34 |             elif attn_type == 'eca':
35 |                 module_cls = EcaModule
36 |             elif attn_type == 'ecam':
37 |                 module_cls = partial(EcaModule, use_mlp=True)
38 |             elif attn_type == 'ceca':
39 |                 module_cls = CecaModule
40 |             elif attn_type == 'ge':
41 |                 module_cls = GatherExcite
42 |             elif attn_type == 'gc':
43 |                 module_cls = GlobalContext
44 |             elif attn_type == 'gca':
45 |                 module_cls = partial(GlobalContext, fuse_add=True, fuse_scale=False)
46 |             elif attn_type == 'cbam':
47 |                 module_cls = CbamModule
48 |             elif attn_type == 'lcbam':
49 |                 module_cls = LightCbamModule
50 | 
51 |             # Attention / attention-like modules w/ significant params
52 |             # Typically replace some of the existing workhorse convs in a network architecture.
53 |             # All of these accept a stride argument and can spatially downsample the input.
54 |             elif attn_type == 'sk':
55 |                 module_cls = SelectiveKernel
56 |             elif attn_type == 'splat':
57 |                 module_cls = SplitAttn
58 | 
59 |             # Self-attention / attention-like modules w/ significant compute and/or params
60 |             # Typically replace some of the existing workhorse convs in a network architecture.
61 |             # All of these accept a stride argument and can spatially downsample the input.
62 |             elif attn_type == 'lambda':
63 |                 return LambdaLayer
64 |             elif attn_type == 'bottleneck':
65 |                 return BottleneckAttn
66 |             elif attn_type == 'halo':
67 |                 return HaloAttn
68 |             elif attn_type == 'nl':
69 |                 module_cls = NonLocalAttn
70 |             elif attn_type == 'bat':
71 |                 module_cls = BatNonLocalAttn
72 | 
73 |             # Woops!
74 |             else:
75 |                 assert False, "Invalid attn module (%s)" % attn_type
76 |         elif isinstance(attn_type, bool):
77 |             if attn_type:
78 |                 module_cls = SEModule
79 |         else:
80 |             module_cls = attn_type
81 |     return module_cls
82 | 
83 | 
84 | def create_attn(attn_type, channels, **kwargs):
85 |     module_cls = get_attn(attn_type)
86 |     if module_cls is not None:
87 |         # NOTE: it's expected the first (positional) argument of all attention layers is the # input channels
88 |         return module_cls(channels, **kwargs)
89 |     return None
90 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/create_conv2d.py:
--------------------------------------------------------------------------------
 1 | """ Create Conv2d Factory Method
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | 
 6 | from .mixed_conv2d import MixedConv2d
 7 | from .cond_conv2d import CondConv2d
 8 | from .conv2d_same import create_conv2d_pad
 9 | 
10 | 
11 | def create_conv2d(in_channels, out_channels, kernel_size, **kwargs):
12 |     """ Select a 2d convolution implementation based on arguments
13 |     Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d.
14 | 
15 |     Used extensively by EfficientNet, MobileNetv3 and related networks.
16 |     """
17 |     if isinstance(kernel_size, list):
18 |         assert 'num_experts' not in kwargs  # MixNet + CondConv combo not supported currently
19 |         assert 'groups' not in kwargs  # MixedConv groups are defined by kernel list
20 |         # We're going to use only lists for defining the MixedConv2d kernel groups,
21 |         # ints, tuples, other iterables will continue to pass to normal conv and specify h, w.
22 |         m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs)
23 |     else:
24 |         depthwise = kwargs.pop('depthwise', False)
25 |         # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0
26 |         groups = in_channels if depthwise else kwargs.pop('groups', 1)
27 |         if 'num_experts' in kwargs and kwargs['num_experts'] > 0:
28 |             m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs)
29 |         else:
30 |             m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs)
31 |     return m
32 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/create_norm_act.py:
--------------------------------------------------------------------------------
 1 | """ NormAct (Normalizaiton + Activation Layer) Factory
 2 | 
 3 | Create norm + act combo modules that attempt to be backwards compatible with separate norm + act
 4 | isntances in models. Where these are used it will be possible to swap separate BN + act layers with
 5 | combined modules like IABN or EvoNorms.
 6 | 
 7 | Hacked together by / Copyright 2020 Ross Wightman
 8 | """
 9 | import types
10 | import functools
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d
16 | from .norm_act import BatchNormAct2d, GroupNormAct
17 | from .inplace_abn import InplaceAbn
18 | 
19 | _NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn}
20 | _NORM_ACT_REQUIRES_ARG = {BatchNormAct2d, GroupNormAct, InplaceAbn}  # requires act_layer arg to define act type
21 | 
22 | 
23 | def get_norm_act_layer(layer_class):
24 |     layer_class = layer_class.replace('_', '').lower()
25 |     if layer_class.startswith("batchnorm"):
26 |         layer = BatchNormAct2d
27 |     elif layer_class.startswith("groupnorm"):
28 |         layer = GroupNormAct
29 |     elif layer_class == "evonormbatch":
30 |         layer = EvoNormBatch2d
31 |     elif layer_class == "evonormsample":
32 |         layer = EvoNormSample2d
33 |     elif layer_class == "iabn" or layer_class == "inplaceabn":
34 |         layer = InplaceAbn
35 |     else:
36 |         assert False, "Invalid norm_act layer (%s)" % layer_class
37 |     return layer
38 | 
39 | 
40 | def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwargs):
41 |     layer_parts = layer_type.split('-')  # e.g. batchnorm-leaky_relu
42 |     assert len(layer_parts) in (1, 2)
43 |     layer = get_norm_act_layer(layer_parts[0])
44 |     #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else ''   # FIXME support string act selection?
45 |     layer_instance = layer(num_features, apply_act=apply_act, **kwargs)
46 |     if jit:
47 |         layer_instance = torch.jit.script(layer_instance)
48 |     return layer_instance
49 | 
50 | 
51 | def convert_norm_act(norm_layer, act_layer):
52 |     assert isinstance(norm_layer, (type, str,  types.FunctionType, functools.partial))
53 |     assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial))
54 |     norm_act_kwargs = {}
55 | 
56 |     # unbind partial fn, so args can be rebound later
57 |     if isinstance(norm_layer, functools.partial):
58 |         norm_act_kwargs.update(norm_layer.keywords)
59 |         norm_layer = norm_layer.func
60 | 
61 |     if isinstance(norm_layer, str):
62 |         norm_act_layer = get_norm_act_layer(norm_layer)
63 |     elif norm_layer in _NORM_ACT_TYPES:
64 |         norm_act_layer = norm_layer
65 |     elif isinstance(norm_layer,  types.FunctionType):
66 |         # if function type, must be a lambda/fn that creates a norm_act layer
67 |         norm_act_layer = norm_layer
68 |     else:
69 |         type_name = norm_layer.__name__.lower()
70 |         if type_name.startswith('batchnorm'):
71 |             norm_act_layer = BatchNormAct2d
72 |         elif type_name.startswith('groupnorm'):
73 |             norm_act_layer = GroupNormAct
74 |         else:
75 |             assert False, f"No equivalent norm_act layer for {type_name}"
76 | 
77 |     if norm_act_layer in _NORM_ACT_REQUIRES_ARG:
78 |         # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation.
79 |         # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types
80 |         norm_act_kwargs.setdefault('act_layer', act_layer)
81 |     if norm_act_kwargs:
82 |         norm_act_layer = functools.partial(norm_act_layer, **norm_act_kwargs)  # bind/rebind args
83 |     return norm_act_layer
84 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/evo_norm.py:
--------------------------------------------------------------------------------
 1 | """EvoNormB0 (Batched) and EvoNormS0 (Sample) in PyTorch
 2 | 
 3 | An attempt at getting decent performing EvoNorms running in PyTorch.
 4 | While currently faster than other impl, still quite a ways off the built-in BN
 5 | in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed).
 6 | 
 7 | Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts.
 8 | 
 9 | Hacked together by / Copyright 2020 Ross Wightman
10 | """
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | from .trace_utils import _assert
16 | 
17 | 
18 | class EvoNormBatch2d(nn.Module):
19 |     def __init__(self, num_features, apply_act=True, momentum=0.1, eps=1e-5, drop_block=None):
20 |         super(EvoNormBatch2d, self).__init__()
21 |         self.apply_act = apply_act  # apply activation (non-linearity)
22 |         self.momentum = momentum
23 |         self.eps = eps
24 |         self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True)
25 |         self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True)
26 |         self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None
27 |         self.register_buffer('running_var', torch.ones(num_features))
28 |         self.reset_parameters()
29 | 
30 |     def reset_parameters(self):
31 |         nn.init.ones_(self.weight)
32 |         nn.init.zeros_(self.bias)
33 |         if self.apply_act:
34 |             nn.init.ones_(self.v)
35 | 
36 |     def forward(self, x):
37 |         _assert(x.dim() == 4, 'expected 4D input')
38 |         x_type = x.dtype
39 |         if self.v is not None:
40 |             running_var = self.running_var.view(1, -1, 1, 1)
41 |             if self.training:
42 |                 var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True)
43 |                 n = x.numel() / x.shape[1]
44 |                 running_var = var.detach() * self.momentum * (n / (n - 1)) + running_var * (1 - self.momentum)
45 |                 self.running_var.copy_(running_var.view(self.running_var.shape))
46 |             else:
47 |                 var = running_var
48 |             v = self.v.to(dtype=x_type).reshape(1, -1, 1, 1)
49 |             d = x * v + (x.var(dim=(2, 3), unbiased=False, keepdim=True) + self.eps).sqrt().to(dtype=x_type)
50 |             d = d.max((var + self.eps).sqrt().to(dtype=x_type))
51 |             x = x / d
52 |         return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1)
53 | 
54 | 
55 | class EvoNormSample2d(nn.Module):
56 |     def __init__(self, num_features, apply_act=True, groups=32, eps=1e-5, drop_block=None):
57 |         super(EvoNormSample2d, self).__init__()
58 |         self.apply_act = apply_act  # apply activation (non-linearity)
59 |         self.groups = groups
60 |         self.eps = eps
61 |         self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True)
62 |         self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True)
63 |         self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None
64 |         self.reset_parameters()
65 | 
66 |     def reset_parameters(self):
67 |         nn.init.ones_(self.weight)
68 |         nn.init.zeros_(self.bias)
69 |         if self.apply_act:
70 |             nn.init.ones_(self.v)
71 | 
72 |     def forward(self, x):
73 |         _assert(x.dim() == 4, 'expected 4D input')
74 |         B, C, H, W = x.shape
75 |         _assert(C % self.groups == 0, '')
76 |         if self.v is not None:
77 |             n = x * (x * self.v.view(1, -1, 1, 1)).sigmoid()
78 |             x = x.reshape(B, self.groups, -1)
79 |             x = n.reshape(B, self.groups, -1) / (x.var(dim=-1, unbiased=False, keepdim=True) + self.eps).sqrt()
80 |             x = x.reshape(B, C, H, W)
81 |         return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1)
82 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/gather_excite.py:
--------------------------------------------------------------------------------
 1 | """ Gather-Excite Attention Block
 2 | 
 3 | Paper: `Gather-Excite: Exploiting Feature Context in CNNs` - https://arxiv.org/abs/1810.12348
 4 | 
 5 | Official code here, but it's only partial impl in Caffe: https://github.com/hujie-frank/GENet
 6 | 
 7 | I've tried to support all of the extent both w/ and w/o params. I don't believe I've seen another
 8 | impl that covers all of the cases.
 9 | 
10 | NOTE: extent=0 + extra_params=False is equivalent to Squeeze-and-Excitation
11 | 
12 | Hacked together by / Copyright 2021 Ross Wightman
13 | """
14 | import math
15 | 
16 | from torch import nn as nn
17 | import torch.nn.functional as F
18 | 
19 | from .create_act import create_act_layer, get_act_layer
20 | from .create_conv2d import create_conv2d
21 | from .helpers import make_divisible
22 | from .mlp import ConvMlp
23 | 
24 | 
25 | class GatherExcite(nn.Module):
26 |     """ Gather-Excite Attention Module
27 |     """
28 |     def __init__(
29 |             self, channels, feat_size=None, extra_params=False, extent=0, use_mlp=True,
30 |             rd_ratio=1./16, rd_channels=None,  rd_divisor=1, add_maxpool=False,
31 |             act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, gate_layer='sigmoid'):
32 |         super(GatherExcite, self).__init__()
33 |         self.add_maxpool = add_maxpool
34 |         act_layer = get_act_layer(act_layer)
35 |         self.extent = extent
36 |         if extra_params:
37 |             self.gather = nn.Sequential()
38 |             if extent == 0:
39 |                 assert feat_size is not None, 'spatial feature size must be specified for global extent w/ params'
40 |                 self.gather.add_module(
41 |                     'conv1', create_conv2d(channels, channels, kernel_size=feat_size, stride=1, depthwise=True))
42 |                 if norm_layer:
43 |                     self.gather.add_module(f'norm1', nn.BatchNorm2d(channels))
44 |             else:
45 |                 assert extent % 2 == 0
46 |                 num_conv = int(math.log2(extent))
47 |                 for i in range(num_conv):
48 |                     self.gather.add_module(
49 |                         f'conv{i + 1}',
50 |                         create_conv2d(channels, channels, kernel_size=3, stride=2, depthwise=True))
51 |                     if norm_layer:
52 |                         self.gather.add_module(f'norm{i + 1}', nn.BatchNorm2d(channels))
53 |                     if i != num_conv - 1:
54 |                         self.gather.add_module(f'act{i + 1}', act_layer(inplace=True))
55 |         else:
56 |             self.gather = None
57 |             if self.extent == 0:
58 |                 self.gk = 0
59 |                 self.gs = 0
60 |             else:
61 |                 assert extent % 2 == 0
62 |                 self.gk = self.extent * 2 - 1
63 |                 self.gs = self.extent
64 | 
65 |         if not rd_channels:
66 |             rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
67 |         self.mlp = ConvMlp(channels, rd_channels, act_layer=act_layer) if use_mlp else nn.Identity()
68 |         self.gate = create_act_layer(gate_layer)
69 | 
70 |     def forward(self, x):
71 |         size = x.shape[-2:]
72 |         if self.gather is not None:
73 |             x_ge = self.gather(x)
74 |         else:
75 |             if self.extent == 0:
76 |                 # global extent
77 |                 x_ge = x.mean(dim=(2, 3), keepdims=True)
78 |                 if self.add_maxpool:
79 |                     # experimental codepath, may remove or change
80 |                     x_ge = 0.5 * x_ge + 0.5 * x.amax((2, 3), keepdim=True)
81 |             else:
82 |                 x_ge = F.avg_pool2d(
83 |                     x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2, count_include_pad=False)
84 |                 if self.add_maxpool:
85 |                     # experimental codepath, may remove or change
86 |                     x_ge = 0.5 * x_ge + 0.5 * F.max_pool2d(x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2)
87 |         x_ge = self.mlp(x_ge)
88 |         if x_ge.shape[-1] != 1 or x_ge.shape[-2] != 1:
89 |             x_ge = F.interpolate(x_ge, size=size)
90 |         return x * self.gate(x_ge)
91 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/global_context.py:
--------------------------------------------------------------------------------
 1 | """ Global Context Attention Block
 2 | 
 3 | Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond`
 4 |     - https://arxiv.org/abs/1904.11492
 5 | 
 6 | Official code consulted as reference: https://github.com/xvjiarui/GCNet
 7 | 
 8 | Hacked together by / Copyright 2021 Ross Wightman
 9 | """
10 | from torch import nn as nn
11 | import torch.nn.functional as F
12 | 
13 | from .create_act import create_act_layer, get_act_layer
14 | from .helpers import make_divisible
15 | from .mlp import ConvMlp
16 | from .norm import LayerNorm2d
17 | 
18 | 
19 | class GlobalContext(nn.Module):
20 | 
21 |     def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False,
22 |                  rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=nn.ReLU, gate_layer='sigmoid'):
23 |         super(GlobalContext, self).__init__()
24 |         act_layer = get_act_layer(act_layer)
25 | 
26 |         self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True) if use_attn else None
27 | 
28 |         if rd_channels is None:
29 |             rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
30 |         if fuse_add:
31 |             self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d)
32 |         else:
33 |             self.mlp_add = None
34 |         if fuse_scale:
35 |             self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d)
36 |         else:
37 |             self.mlp_scale = None
38 | 
39 |         self.gate = create_act_layer(gate_layer)
40 |         self.init_last_zero = init_last_zero
41 |         self.reset_parameters()
42 | 
43 |     def reset_parameters(self):
44 |         if self.conv_attn is not None:
45 |             nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu')
46 |         if self.mlp_add is not None:
47 |             nn.init.zeros_(self.mlp_add.fc2.weight)
48 | 
49 |     def forward(self, x):
50 |         B, C, H, W = x.shape
51 | 
52 |         if self.conv_attn is not None:
53 |             attn = self.conv_attn(x).reshape(B, 1, H * W)  # (B, 1, H * W)
54 |             attn = F.softmax(attn, dim=-1).unsqueeze(3)  # (B, 1, H * W, 1)
55 |             context = x.reshape(B, C, H * W).unsqueeze(1) @ attn
56 |             context = context.view(B, C, 1, 1)
57 |         else:
58 |             context = x.mean(dim=(2, 3), keepdim=True)
59 | 
60 |         if self.mlp_scale is not None:
61 |             mlp_x = self.mlp_scale(context)
62 |             x = x * self.gate(mlp_x)
63 |         if self.mlp_add is not None:
64 |             mlp_x = self.mlp_add(context)
65 |             x = x + mlp_x
66 | 
67 |         return x
68 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/helpers.py:
--------------------------------------------------------------------------------
 1 | """ Layer/Module Helpers
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from itertools import repeat
 6 | import collections.abc
 7 | 
 8 | 
 9 | # From PyTorch internals
10 | def _ntuple(n):
11 |     def parse(x):
12 |         if isinstance(x, collections.abc.Iterable):
13 |             return x
14 |         return tuple(repeat(x, n))
15 |     return parse
16 | 
17 | 
18 | to_1tuple = _ntuple(1)
19 | to_2tuple = _ntuple(2)
20 | to_3tuple = _ntuple(3)
21 | to_4tuple = _ntuple(4)
22 | to_ntuple = _ntuple
23 | 
24 | 
25 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
26 |     min_value = min_value or divisor
27 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
28 |     # Make sure that round down does not go down by more than 10%.
29 |     if new_v < round_limit * v:
30 |         new_v += divisor
31 |     return new_v
32 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/inplace_abn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn as nn
 3 | 
 4 | try:
 5 |     from inplace_abn.functions import inplace_abn, inplace_abn_sync
 6 |     has_iabn = True
 7 | except ImportError:
 8 |     has_iabn = False
 9 | 
10 |     def inplace_abn(x, weight, bias, running_mean, running_var,
11 |                     training=True, momentum=0.1, eps=1e-05, activation="leaky_relu", activation_param=0.01):
12 |         raise ImportError(
13 |             "Please install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.12'")
14 | 
15 |     def inplace_abn_sync(**kwargs):
16 |         inplace_abn(**kwargs)
17 | 
18 | 
19 | class InplaceAbn(nn.Module):
20 |     """Activated Batch Normalization
21 | 
22 |     This gathers a BatchNorm and an activation function in a single module
23 | 
24 |     Parameters
25 |     ----------
26 |     num_features : int
27 |         Number of feature channels in the input and output.
28 |     eps : float
29 |         Small constant to prevent numerical issues.
30 |     momentum : float
31 |         Momentum factor applied to compute running statistics.
32 |     affine : bool
33 |         If `True` apply learned scale and shift transformation after normalization.
34 |     act_layer : str or nn.Module type
35 |         Name or type of the activation functions, one of: `leaky_relu`, `elu`
36 |     act_param : float
37 |         Negative slope for the `leaky_relu` activation.
38 |     """
39 | 
40 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, apply_act=True,
41 |                  act_layer="leaky_relu", act_param=0.01, drop_block=None):
42 |         super(InplaceAbn, self).__init__()
43 |         self.num_features = num_features
44 |         self.affine = affine
45 |         self.eps = eps
46 |         self.momentum = momentum
47 |         if apply_act:
48 |             if isinstance(act_layer, str):
49 |                 assert act_layer in ('leaky_relu', 'elu', 'identity', '')
50 |                 self.act_name = act_layer if act_layer else 'identity'
51 |             else:
52 |                 # convert act layer passed as type to string
53 |                 if act_layer == nn.ELU:
54 |                     self.act_name = 'elu'
55 |                 elif act_layer == nn.LeakyReLU:
56 |                     self.act_name = 'leaky_relu'
57 |                 elif act_layer == nn.Identity:
58 |                     self.act_name = 'identity'
59 |                 else:
60 |                     assert False, f'Invalid act layer {act_layer.__name__} for IABN'
61 |         else:
62 |             self.act_name = 'identity'
63 |         self.act_param = act_param
64 |         if self.affine:
65 |             self.weight = nn.Parameter(torch.ones(num_features))
66 |             self.bias = nn.Parameter(torch.zeros(num_features))
67 |         else:
68 |             self.register_parameter('weight', None)
69 |             self.register_parameter('bias', None)
70 |         self.register_buffer('running_mean', torch.zeros(num_features))
71 |         self.register_buffer('running_var', torch.ones(num_features))
72 |         self.reset_parameters()
73 | 
74 |     def reset_parameters(self):
75 |         nn.init.constant_(self.running_mean, 0)
76 |         nn.init.constant_(self.running_var, 1)
77 |         if self.affine:
78 |             nn.init.constant_(self.weight, 1)
79 |             nn.init.constant_(self.bias, 0)
80 | 
81 |     def forward(self, x):
82 |         output = inplace_abn(
83 |             x, self.weight, self.bias, self.running_mean, self.running_var,
84 |             self.training, self.momentum, self.eps, self.act_name, self.act_param)
85 |         if isinstance(output, tuple):
86 |             output = output[0]
87 |         return output
88 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/linear.py:
--------------------------------------------------------------------------------
 1 | """ Linear layer (alternate definition)
 2 | """
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torch import nn as nn
 6 | 
 7 | 
 8 | class Linear(nn.Linear):
 9 |     r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
10 | 
11 |     Wraps torch.nn.Linear to support AMP + torchscript usage by manually casting
12 |     weight & bias to input.dtype to work around an issue w/ torch.addmm in this use case.
13 |     """
14 |     def forward(self, input: torch.Tensor) -> torch.Tensor:
15 |         if torch.jit.is_scripting():
16 |             bias = self.bias.to(dtype=input.dtype) if self.bias is not None else None
17 |             return F.linear(input, self.weight.to(dtype=input.dtype), bias=bias)
18 |         else:
19 |             return F.linear(input, self.weight, self.bias)
20 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/median_pool.py:
--------------------------------------------------------------------------------
 1 | """ Median Pool
 2 | Hacked together by / Copyright 2020 Ross Wightman
 3 | """
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from .helpers import to_2tuple, to_4tuple
 7 | 
 8 | 
 9 | class MedianPool2d(nn.Module):
10 |     """ Median pool (usable as median filter when stride=1) module.
11 | 
12 |     Args:
13 |          kernel_size: size of pooling kernel, int or 2-tuple
14 |          stride: pool stride, int or 2-tuple
15 |          padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad
16 |          same: override padding and enforce same padding, boolean
17 |     """
18 |     def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
19 |         super(MedianPool2d, self).__init__()
20 |         self.k = to_2tuple(kernel_size)
21 |         self.stride = to_2tuple(stride)
22 |         self.padding = to_4tuple(padding)  # convert to l, r, t, b
23 |         self.same = same
24 | 
25 |     def _padding(self, x):
26 |         if self.same:
27 |             ih, iw = x.size()[2:]
28 |             if ih % self.stride[0] == 0:
29 |                 ph = max(self.k[0] - self.stride[0], 0)
30 |             else:
31 |                 ph = max(self.k[0] - (ih % self.stride[0]), 0)
32 |             if iw % self.stride[1] == 0:
33 |                 pw = max(self.k[1] - self.stride[1], 0)
34 |             else:
35 |                 pw = max(self.k[1] - (iw % self.stride[1]), 0)
36 |             pl = pw // 2
37 |             pr = pw - pl
38 |             pt = ph // 2
39 |             pb = ph - pt
40 |             padding = (pl, pr, pt, pb)
41 |         else:
42 |             padding = self.padding
43 |         return padding
44 | 
45 |     def forward(self, x):
46 |         x = F.pad(x, self._padding(x), mode='reflect')
47 |         x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1])
48 |         x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0]
49 |         return x
50 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/mixed_conv2d.py:
--------------------------------------------------------------------------------
 1 | """ PyTorch Mixed Convolution
 2 | 
 3 | Paper: MixConv: Mixed Depthwise Convolutional Kernels (https://arxiv.org/abs/1907.09595)
 4 | 
 5 | Hacked together by / Copyright 2020 Ross Wightman
 6 | """
 7 | 
 8 | import torch
 9 | from torch import nn as nn
10 | 
11 | from .conv2d_same import create_conv2d_pad
12 | 
13 | 
14 | def _split_channels(num_chan, num_groups):
15 |     split = [num_chan // num_groups for _ in range(num_groups)]
16 |     split[0] += num_chan - sum(split)
17 |     return split
18 | 
19 | 
20 | class MixedConv2d(nn.ModuleDict):
21 |     """ Mixed Grouped Convolution
22 | 
23 |     Based on MDConv and GroupedConv in MixNet impl:
24 |       https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py
25 |     """
26 |     def __init__(self, in_channels, out_channels, kernel_size=3,
27 |                  stride=1, padding='', dilation=1, depthwise=False, **kwargs):
28 |         super(MixedConv2d, self).__init__()
29 | 
30 |         kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size]
31 |         num_groups = len(kernel_size)
32 |         in_splits = _split_channels(in_channels, num_groups)
33 |         out_splits = _split_channels(out_channels, num_groups)
34 |         self.in_channels = sum(in_splits)
35 |         self.out_channels = sum(out_splits)
36 |         for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)):
37 |             conv_groups = in_ch if depthwise else 1
38 |             # use add_module to keep key space clean
39 |             self.add_module(
40 |                 str(idx),
41 |                 create_conv2d_pad(
42 |                     in_ch, out_ch, k, stride=stride,
43 |                     padding=padding, dilation=dilation, groups=conv_groups, **kwargs)
44 |             )
45 |         self.splits = in_splits
46 | 
47 |     def forward(self, x):
48 |         x_split = torch.split(x, self.splits, 1)
49 |         x_out = [c(x_split[i]) for i, c in enumerate(self.values())]
50 |         x = torch.cat(x_out, 1)
51 |         return x
52 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/norm.py:
--------------------------------------------------------------------------------
 1 | """ Normalization layers and wrappers
 2 | """
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class GroupNorm(nn.GroupNorm):
 9 |     def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True):
10 |         # NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN
11 |         super().__init__(num_groups, num_channels, eps=eps, affine=affine)
12 | 
13 |     def forward(self, x):
14 |         return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
15 | 
16 | 
17 | class LayerNorm2d(nn.LayerNorm):
18 |     """ LayerNorm for channels of '2D' spatial BCHW tensors """
19 |     def __init__(self, num_channels):
20 |         super().__init__(num_channels)
21 | 
22 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
23 |         return F.layer_norm(
24 |             x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)
25 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/norm_act.py:
--------------------------------------------------------------------------------
 1 | """ Normalization + Activation Layers
 2 | """
 3 | import torch
 4 | from torch import nn as nn
 5 | from torch.nn import functional as F
 6 | 
 7 | from .create_act import get_act_layer
 8 | 
 9 | 
10 | class BatchNormAct2d(nn.BatchNorm2d):
11 |     """BatchNorm + Activation
12 | 
13 |     This module performs BatchNorm + Activation in a manner that will remain backwards
14 |     compatible with weights trained with separate bn, act. This is why we inherit from BN
15 |     instead of composing it as a .bn member.
16 |     """
17 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True,
18 |                  apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None):
19 |         super(BatchNormAct2d, self).__init__(
20 |             num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats)
21 |         if isinstance(act_layer, str):
22 |             act_layer = get_act_layer(act_layer)
23 |         if act_layer is not None and apply_act:
24 |             act_args = dict(inplace=True) if inplace else {}
25 |             self.act = act_layer(**act_args)
26 |         else:
27 |             self.act = nn.Identity()
28 | 
29 |     def _forward_jit(self, x):
30 |         """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function
31 |         """
32 |         # exponential_average_factor is self.momentum set to
33 |         # (when it is available) only so that if gets updated
34 |         # in ONNX graph when this node is exported to ONNX.
35 |         if self.momentum is None:
36 |             exponential_average_factor = 0.0
37 |         else:
38 |             exponential_average_factor = self.momentum
39 | 
40 |         if self.training and self.track_running_stats:
41 |             # TODO: if statement only here to tell the jit to skip emitting this when it is None
42 |             if self.num_batches_tracked is not None:
43 |                 self.num_batches_tracked += 1
44 |                 if self.momentum is None:  # use cumulative moving average
45 |                     exponential_average_factor = 1.0 / float(self.num_batches_tracked)
46 |                 else:  # use exponential moving average
47 |                     exponential_average_factor = self.momentum
48 | 
49 |         x = F.batch_norm(
50 |                 x, self.running_mean, self.running_var, self.weight, self.bias,
51 |                 self.training or not self.track_running_stats,
52 |                 exponential_average_factor, self.eps)
53 |         return x
54 | 
55 |     @torch.jit.ignore
56 |     def _forward_python(self, x):
57 |         return super(BatchNormAct2d, self).forward(x)
58 | 
59 |     def forward(self, x):
60 |         # FIXME cannot call parent forward() and maintain jit.script compatibility?
61 |         if torch.jit.is_scripting():
62 |             x = self._forward_jit(x)
63 |         else:
64 |             x = self._forward_python(x)
65 |         x = self.act(x)
66 |         return x
67 | 
68 | 
69 | class GroupNormAct(nn.GroupNorm):
70 |     # NOTE num_channel and num_groups order flipped for easier layer swaps / binding of fixed args
71 |     def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True,
72 |                  apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None):
73 |         super(GroupNormAct, self).__init__(num_groups, num_channels, eps=eps, affine=affine)
74 |         if isinstance(act_layer, str):
75 |             act_layer = get_act_layer(act_layer)
76 |         if act_layer is not None and apply_act:
77 |             act_args = dict(inplace=True) if inplace else {}
78 |             self.act = act_layer(**act_args)
79 |         else:
80 |             self.act = nn.Identity()
81 | 
82 |     def forward(self, x):
83 |         x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
84 |         x = self.act(x)
85 |         return x
86 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/padding.py:
--------------------------------------------------------------------------------
 1 | """ Padding Helpers
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import math
 6 | from typing import List, Tuple
 7 | 
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | # Calculate symmetric padding for a convolution
12 | def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int:
13 |     padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
14 |     return padding
15 | 
16 | 
17 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution
18 | def get_same_padding(x: int, k: int, s: int, d: int):
19 |     return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0)
20 | 
21 | 
22 | # Can SAME padding for given args be done statically?
23 | def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_):
24 |     return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0
25 | 
26 | 
27 | # Dynamically pad input x with 'SAME' padding for conv with specified args
28 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0):
29 |     ih, iw = x.size()[-2:]
30 |     pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1])
31 |     if pad_h > 0 or pad_w > 0:
32 |         x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value)
33 |     return x
34 | 
35 | 
36 | def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]:
37 |     dynamic = False
38 |     if isinstance(padding, str):
39 |         # for any string padding, the padding will be calculated for you, one of three ways
40 |         padding = padding.lower()
41 |         if padding == 'same':
42 |             # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact
43 |             if is_static_pad(kernel_size, **kwargs):
44 |                 # static case, no extra overhead
45 |                 padding = get_padding(kernel_size, **kwargs)
46 |             else:
47 |                 # dynamic 'SAME' padding, has runtime/GPU memory overhead
48 |                 padding = 0
49 |                 dynamic = True
50 |         elif padding == 'valid':
51 |             # 'VALID' padding, same as padding=0
52 |             padding = 0
53 |         else:
54 |             # Default to PyTorch style 'same'-ish symmetric padding
55 |             padding = get_padding(kernel_size, **kwargs)
56 |     return padding, dynamic
57 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/patch_embed.py:
--------------------------------------------------------------------------------
 1 | """ Image to Patch Embedding using Conv2d
 2 | 
 3 | A convolution based approach to patchifying a 2D image w/ embedding projection.
 4 | 
 5 | Based on the impl in https://github.com/google-research/vision_transformer
 6 | 
 7 | Hacked together by / Copyright 2020 Ross Wightman
 8 | """
 9 | from torch import nn as nn
10 | 
11 | from .helpers import to_2tuple
12 | from .trace_utils import _assert
13 | 
14 | 
15 | class PatchEmbed(nn.Module):
16 |     """ 2D Image to Patch Embedding
17 |     """
18 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None, flatten=True):
19 |         super().__init__()
20 |         img_size = to_2tuple(img_size)
21 |         patch_size = to_2tuple(patch_size)
22 |         self.img_size = img_size
23 |         self.patch_size = patch_size
24 |         self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
25 |         self.num_patches = self.grid_size[0] * self.grid_size[1]
26 |         self.flatten = flatten
27 | 
28 |         # self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
29 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=3, stride=1,padding=1)  # Modify convolution
30 |         self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
31 | 
32 |     def forward(self, x):
33 |         B, C, H, W = x.shape
34 |         _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).")
35 |         _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).")
36 |         x = self.proj(x)
37 |         if self.flatten:
38 |             x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC
39 |         x = self.norm(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/pool2d_same.py:
--------------------------------------------------------------------------------
 1 | """ AvgPool2d w/ Same Padding
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from typing import List, Tuple, Optional
 9 | 
10 | from .helpers import to_2tuple
11 | from .padding import pad_same, get_padding_value
12 | 
13 | 
14 | def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0),
15 |                     ceil_mode: bool = False, count_include_pad: bool = True):
16 |     # FIXME how to deal with count_include_pad vs not for external padding?
17 |     x = pad_same(x, kernel_size, stride)
18 |     return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad)
19 | 
20 | 
21 | class AvgPool2dSame(nn.AvgPool2d):
22 |     """ Tensorflow like 'SAME' wrapper for 2D average pooling
23 |     """
24 |     def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True):
25 |         kernel_size = to_2tuple(kernel_size)
26 |         stride = to_2tuple(stride)
27 |         super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad)
28 | 
29 |     def forward(self, x):
30 |         x = pad_same(x, self.kernel_size, self.stride)
31 |         return F.avg_pool2d(
32 |             x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad)
33 | 
34 | 
35 | def max_pool2d_same(
36 |         x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0),
37 |         dilation: List[int] = (1, 1), ceil_mode: bool = False):
38 |     x = pad_same(x, kernel_size, stride, value=-float('inf'))
39 |     return F.max_pool2d(x, kernel_size, stride, (0, 0), dilation, ceil_mode)
40 | 
41 | 
42 | class MaxPool2dSame(nn.MaxPool2d):
43 |     """ Tensorflow like 'SAME' wrapper for 2D max pooling
44 |     """
45 |     def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False):
46 |         kernel_size = to_2tuple(kernel_size)
47 |         stride = to_2tuple(stride)
48 |         dilation = to_2tuple(dilation)
49 |         super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode)
50 | 
51 |     def forward(self, x):
52 |         x = pad_same(x, self.kernel_size, self.stride, value=-float('inf'))
53 |         return F.max_pool2d(x, self.kernel_size, self.stride, (0, 0), self.dilation, self.ceil_mode)
54 | 
55 | 
56 | def create_pool2d(pool_type, kernel_size, stride=None, **kwargs):
57 |     stride = stride or kernel_size
58 |     padding = kwargs.pop('padding', '')
59 |     padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs)
60 |     if is_dynamic:
61 |         if pool_type == 'avg':
62 |             return AvgPool2dSame(kernel_size, stride=stride, **kwargs)
63 |         elif pool_type == 'max':
64 |             return MaxPool2dSame(kernel_size, stride=stride, **kwargs)
65 |         else:
66 |             assert False, f'Unsupported pool type {pool_type}'
67 |     else:
68 |         if pool_type == 'avg':
69 |             return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs)
70 |         elif pool_type == 'max':
71 |             return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs)
72 |         else:
73 |             assert False, f'Unsupported pool type {pool_type}'
74 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/separable_conv.py:
--------------------------------------------------------------------------------
 1 | """ Depthwise Separable Conv Modules
 2 | 
 3 | Basic DWS convs. Other variations of DWS exist with batch norm or activations between the
 4 | DW and PW convs such as the Depthwise modules in MobileNetV2 / EfficientNet and Xception.
 5 | 
 6 | Hacked together by / Copyright 2020 Ross Wightman
 7 | """
 8 | from torch import nn as nn
 9 | 
10 | from .create_conv2d import create_conv2d
11 | from .create_norm_act import convert_norm_act
12 | 
13 | 
14 | class SeparableConvBnAct(nn.Module):
15 |     """ Separable Conv w/ trailing Norm and Activation
16 |     """
17 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False,
18 |                  channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU,
19 |                  apply_act=True, drop_block=None):
20 |         super(SeparableConvBnAct, self).__init__()
21 | 
22 |         self.conv_dw = create_conv2d(
23 |             in_channels, int(in_channels * channel_multiplier), kernel_size,
24 |             stride=stride, dilation=dilation, padding=padding, depthwise=True)
25 | 
26 |         self.conv_pw = create_conv2d(
27 |             int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias)
28 | 
29 |         norm_act_layer = convert_norm_act(norm_layer, act_layer)
30 |         self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block)
31 | 
32 |     @property
33 |     def in_channels(self):
34 |         return self.conv_dw.in_channels
35 | 
36 |     @property
37 |     def out_channels(self):
38 |         return self.conv_pw.out_channels
39 | 
40 |     def forward(self, x):
41 |         x = self.conv_dw(x)
42 |         x = self.conv_pw(x)
43 |         if self.bn is not None:
44 |             x = self.bn(x)
45 |         return x
46 | 
47 | 
48 | class SeparableConv2d(nn.Module):
49 |     """ Separable Conv
50 |     """
51 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False,
52 |                  channel_multiplier=1.0, pw_kernel_size=1):
53 |         super(SeparableConv2d, self).__init__()
54 | 
55 |         self.conv_dw = create_conv2d(
56 |             in_channels, int(in_channels * channel_multiplier), kernel_size,
57 |             stride=stride, dilation=dilation, padding=padding, depthwise=True)
58 | 
59 |         self.conv_pw = create_conv2d(
60 |             int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias)
61 | 
62 |     @property
63 |     def in_channels(self):
64 |         return self.conv_dw.in_channels
65 | 
66 |     @property
67 |     def out_channels(self):
68 |         return self.conv_pw.out_channels
69 | 
70 |     def forward(self, x):
71 |         x = self.conv_dw(x)
72 |         x = self.conv_pw(x)
73 |         return x
74 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/space_to_depth.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SpaceToDepth(nn.Module):
 6 |     def __init__(self, block_size=4):
 7 |         super().__init__()
 8 |         assert block_size == 4
 9 |         self.bs = block_size
10 | 
11 |     def forward(self, x):
12 |         N, C, H, W = x.size()
13 |         x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs)  # (N, C, H//bs, bs, W//bs, bs)
14 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # (N, bs, bs, C, H//bs, W//bs)
15 |         x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs)  # (N, C*bs^2, H//bs, W//bs)
16 |         return x
17 | 
18 | 
19 | @torch.jit.script
20 | class SpaceToDepthJit(object):
21 |     def __call__(self, x: torch.Tensor):
22 |         # assuming hard-coded that block_size==4 for acceleration
23 |         N, C, H, W = x.size()
24 |         x = x.view(N, C, H // 4, 4, W // 4, 4)  # (N, C, H//bs, bs, W//bs, bs)
25 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # (N, bs, bs, C, H//bs, W//bs)
26 |         x = x.view(N, C * 16, H // 4, W // 4)  # (N, C*bs^2, H//bs, W//bs)
27 |         return x
28 | 
29 | 
30 | class SpaceToDepthModule(nn.Module):
31 |     def __init__(self, no_jit=False):
32 |         super().__init__()
33 |         if not no_jit:
34 |             self.op = SpaceToDepthJit()
35 |         else:
36 |             self.op = SpaceToDepth()
37 | 
38 |     def forward(self, x):
39 |         return self.op(x)
40 | 
41 | 
42 | class DepthToSpace(nn.Module):
43 | 
44 |     def __init__(self, block_size):
45 |         super().__init__()
46 |         self.bs = block_size
47 | 
48 |     def forward(self, x):
49 |         N, C, H, W = x.size()
50 |         x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W)  # (N, bs, bs, C//bs^2, H, W)
51 |         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # (N, C//bs^2, H, bs, W, bs)
52 |         x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs)  # (N, C//bs^2, H * bs, W * bs)
53 |         return x
54 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/split_attn.py:
--------------------------------------------------------------------------------
 1 | """ Split Attention Conv2d (for ResNeSt Models)
 2 | 
 3 | Paper: `ResNeSt: Split-Attention Networks` - /https://arxiv.org/abs/2004.08955
 4 | 
 5 | Adapted from original PyTorch impl at https://github.com/zhanghang1989/ResNeSt
 6 | 
 7 | Modified for torchscript compat, performance, and consistency with timm by Ross Wightman
 8 | """
 9 | import torch
10 | import torch.nn.functional as F
11 | from torch import nn
12 | 
13 | from .helpers import make_divisible
14 | 
15 | 
16 | class RadixSoftmax(nn.Module):
17 |     def __init__(self, radix, cardinality):
18 |         super(RadixSoftmax, self).__init__()
19 |         self.radix = radix
20 |         self.cardinality = cardinality
21 | 
22 |     def forward(self, x):
23 |         batch = x.size(0)
24 |         if self.radix > 1:
25 |             x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
26 |             x = F.softmax(x, dim=1)
27 |             x = x.reshape(batch, -1)
28 |         else:
29 |             x = torch.sigmoid(x)
30 |         return x
31 | 
32 | 
33 | class SplitAttn(nn.Module):
34 |     """Split-Attention (aka Splat)
35 |     """
36 |     def __init__(self, in_channels, out_channels=None, kernel_size=3, stride=1, padding=None,
37 |                  dilation=1, groups=1, bias=False, radix=2, rd_ratio=0.25, rd_channels=None, rd_divisor=8,
38 |                  act_layer=nn.ReLU, norm_layer=None, drop_block=None, **kwargs):
39 |         super(SplitAttn, self).__init__()
40 |         out_channels = out_channels or in_channels
41 |         self.radix = radix
42 |         self.drop_block = drop_block
43 |         mid_chs = out_channels * radix
44 |         if rd_channels is None:
45 |             attn_chs = make_divisible(in_channels * radix * rd_ratio, min_value=32, divisor=rd_divisor)
46 |         else:
47 |             attn_chs = rd_channels * radix
48 | 
49 |         padding = kernel_size // 2 if padding is None else padding
50 |         self.conv = nn.Conv2d(
51 |             in_channels, mid_chs, kernel_size, stride, padding, dilation,
52 |             groups=groups * radix, bias=bias, **kwargs)
53 |         self.bn0 = norm_layer(mid_chs) if norm_layer else nn.Identity()
54 |         self.act0 = act_layer(inplace=True)
55 |         self.fc1 = nn.Conv2d(out_channels, attn_chs, 1, groups=groups)
56 |         self.bn1 = norm_layer(attn_chs) if norm_layer else nn.Identity()
57 |         self.act1 = act_layer(inplace=True)
58 |         self.fc2 = nn.Conv2d(attn_chs, mid_chs, 1, groups=groups)
59 |         self.rsoftmax = RadixSoftmax(radix, groups)
60 | 
61 |     def forward(self, x):
62 |         x = self.conv(x)
63 |         x = self.bn0(x)
64 |         if self.drop_block is not None:
65 |             x = self.drop_block(x)
66 |         x = self.act0(x)
67 | 
68 |         B, RC, H, W = x.shape
69 |         if self.radix > 1:
70 |             x = x.reshape((B, self.radix, RC // self.radix, H, W))
71 |             x_gap = x.sum(dim=1)
72 |         else:
73 |             x_gap = x
74 |         x_gap = x_gap.mean((2, 3), keepdim=True)
75 |         x_gap = self.fc1(x_gap)
76 |         x_gap = self.bn1(x_gap)
77 |         x_gap = self.act1(x_gap)
78 |         x_attn = self.fc2(x_gap)
79 | 
80 |         x_attn = self.rsoftmax(x_attn).view(B, -1, 1, 1)
81 |         if self.radix > 1:
82 |             out = (x * x_attn.reshape((B, self.radix, RC // self.radix, 1, 1))).sum(dim=1)
83 |         else:
84 |             out = x * x_attn
85 |         return out.contiguous()
86 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/split_batchnorm.py:
--------------------------------------------------------------------------------
 1 | """ Split BatchNorm
 2 | 
 3 | A PyTorch BatchNorm layer that splits input batch into N equal parts and passes each through
 4 | a separate BN layer. The first split is passed through the parent BN layers with weight/bias
 5 | keys the same as the original BN. All other splits pass through BN sub-layers under the '.aux_bn'
 6 | namespace.
 7 | 
 8 | This allows easily removing the auxiliary BN layers after training to efficiently
 9 | achieve the 'Auxiliary BatchNorm' as described in the AdvProp Paper, section 4.2,
10 | 'Disentangled Learning via An Auxiliary BN'
11 | 
12 | Hacked together by / Copyright 2020 Ross Wightman
13 | """
14 | import torch
15 | import torch.nn as nn
16 | 
17 | 
18 | class SplitBatchNorm2d(torch.nn.BatchNorm2d):
19 | 
20 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True,
21 |                  track_running_stats=True, num_splits=2):
22 |         super().__init__(num_features, eps, momentum, affine, track_running_stats)
23 |         assert num_splits > 1, 'Should have at least one aux BN layer (num_splits at least 2)'
24 |         self.num_splits = num_splits
25 |         self.aux_bn = nn.ModuleList([
26 |             nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats) for _ in range(num_splits - 1)])
27 | 
28 |     def forward(self, input: torch.Tensor):
29 |         if self.training:  # aux BN only relevant while training
30 |             split_size = input.shape[0] // self.num_splits
31 |             assert input.shape[0] == split_size * self.num_splits, "batch size must be evenly divisible by num_splits"
32 |             split_input = input.split(split_size)
33 |             x = [super().forward(split_input[0])]
34 |             for i, a in enumerate(self.aux_bn):
35 |                 x.append(a(split_input[i + 1]))
36 |             return torch.cat(x, dim=0)
37 |         else:
38 |             return super().forward(input)
39 | 
40 | 
41 | def convert_splitbn_model(module, num_splits=2):
42 |     """
43 |     Recursively traverse module and its children to replace all instances of
44 |     ``torch.nn.modules.batchnorm._BatchNorm`` with `SplitBatchnorm2d`.
45 |     Args:
46 |         module (torch.nn.Module): input module
47 |         num_splits: number of separate batchnorm layers to split input across
48 |     Example::
49 |         >>> # model is an instance of torch.nn.Module
50 |         >>> model = timm.models.convert_splitbn_model(model, num_splits=2)
51 |     """
52 |     mod = module
53 |     if isinstance(module, torch.nn.modules.instancenorm._InstanceNorm):
54 |         return module
55 |     if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
56 |         mod = SplitBatchNorm2d(
57 |             module.num_features, module.eps, module.momentum, module.affine,
58 |             module.track_running_stats, num_splits=num_splits)
59 |         mod.running_mean = module.running_mean
60 |         mod.running_var = module.running_var
61 |         mod.num_batches_tracked = module.num_batches_tracked
62 |         if module.affine:
63 |             mod.weight.data = module.weight.data.clone().detach()
64 |             mod.bias.data = module.bias.data.clone().detach()
65 |         for aux in mod.aux_bn:
66 |             aux.running_mean = module.running_mean.clone()
67 |             aux.running_var = module.running_var.clone()
68 |             aux.num_batches_tracked = module.num_batches_tracked.clone()
69 |             if module.affine:
70 |                 aux.weight.data = module.weight.data.clone().detach()
71 |                 aux.bias.data = module.bias.data.clone().detach()
72 |     for name, child in module.named_children():
73 |         mod.add_module(name, convert_splitbn_model(child, num_splits=num_splits))
74 |     del module
75 |     return mod
76 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/squeeze_excite.py:
--------------------------------------------------------------------------------
 1 | """ Squeeze-and-Excitation Channel Attention
 2 | 
 3 | An SE implementation originally based on PyTorch SE-Net impl.
 4 | Has since evolved with additional functionality / configuration.
 5 | 
 6 | Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507
 7 | 
 8 | Also included is Effective Squeeze-Excitation (ESE).
 9 | Paper: `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
10 | 
11 | Hacked together by / Copyright 2021 Ross Wightman
12 | """
13 | from torch import nn as nn
14 | 
15 | from .create_act import create_act_layer
16 | from .helpers import make_divisible
17 | 
18 | 
19 | class SEModule(nn.Module):
20 |     """ SE Module as defined in original SE-Nets with a few additions
21 |     Additions include:
22 |         * divisor can be specified to keep channels % div == 0 (default: 8)
23 |         * reduction channels can be specified directly by arg (if rd_channels is set)
24 |         * reduction channels can be specified by float rd_ratio (default: 1/16)
25 |         * global max pooling can be added to the squeeze aggregation
26 |         * customizable activation, normalization, and gate layer
27 |     """
28 |     def __init__(
29 |             self, channels, rd_ratio=1. / 16, rd_channels=None, rd_divisor=8, add_maxpool=False,
30 |             act_layer=nn.ReLU, norm_layer=None, gate_layer='sigmoid'):
31 |         super(SEModule, self).__init__()
32 |         self.add_maxpool = add_maxpool
33 |         if not rd_channels:
34 |             rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
35 |         self.fc1 = nn.Conv2d(channels, rd_channels, kernel_size=1, bias=True)
36 |         self.bn = norm_layer(rd_channels) if norm_layer else nn.Identity()
37 |         self.act = create_act_layer(act_layer, inplace=True)
38 |         self.fc2 = nn.Conv2d(rd_channels, channels, kernel_size=1, bias=True)
39 |         self.gate = create_act_layer(gate_layer)
40 | 
41 |     def forward(self, x):
42 |         x_se = x.mean((2, 3), keepdim=True)
43 |         if self.add_maxpool:
44 |             # experimental codepath, may remove or change
45 |             x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True)
46 |         x_se = self.fc1(x_se)
47 |         x_se = self.act(self.bn(x_se))
48 |         x_se = self.fc2(x_se)
49 |         return x * self.gate(x_se)
50 | 
51 | 
52 | SqueezeExcite = SEModule  # alias
53 | 
54 | 
55 | class EffectiveSEModule(nn.Module):
56 |     """ 'Effective Squeeze-Excitation
57 |     From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
58 |     """
59 |     def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid', **_):
60 |         super(EffectiveSEModule, self).__init__()
61 |         self.add_maxpool = add_maxpool
62 |         self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0)
63 |         self.gate = create_act_layer(gate_layer)
64 | 
65 |     def forward(self, x):
66 |         x_se = x.mean((2, 3), keepdim=True)
67 |         if self.add_maxpool:
68 |             # experimental codepath, may remove or change
69 |             x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True)
70 |         x_se = self.fc(x_se)
71 |         return x * self.gate(x_se)
72 | 
73 | 
74 | EffectiveSqueezeExcite = EffectiveSEModule  # alias
75 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/test_time_pool.py:
--------------------------------------------------------------------------------
 1 | """ Test Time Pooling (Average-Max Pool)
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | 
 6 | import logging
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | 
10 | from .adaptive_avgmax_pool import adaptive_avgmax_pool2d
11 | 
12 | 
13 | _logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class TestTimePoolHead(nn.Module):
17 |     def __init__(self, base, original_pool=7):
18 |         super(TestTimePoolHead, self).__init__()
19 |         self.base = base
20 |         self.original_pool = original_pool
21 |         base_fc = self.base.get_classifier()
22 |         if isinstance(base_fc, nn.Conv2d):
23 |             self.fc = base_fc
24 |         else:
25 |             self.fc = nn.Conv2d(
26 |                 self.base.num_features, self.base.num_classes, kernel_size=1, bias=True)
27 |             self.fc.weight.data.copy_(base_fc.weight.data.view(self.fc.weight.size()))
28 |             self.fc.bias.data.copy_(base_fc.bias.data.view(self.fc.bias.size()))
29 |         self.base.reset_classifier(0)  # delete original fc layer
30 | 
31 |     def forward(self, x):
32 |         x = self.base.forward_features(x)
33 |         x = F.avg_pool2d(x, kernel_size=self.original_pool, stride=1)
34 |         x = self.fc(x)
35 |         x = adaptive_avgmax_pool2d(x, 1)
36 |         return x.view(x.size(0), -1)
37 | 
38 | 
39 | def apply_test_time_pool(model, config, use_test_size=True):
40 |     test_time_pool = False
41 |     if not hasattr(model, 'default_cfg') or not model.default_cfg:
42 |         return model, False
43 |     if use_test_size and 'test_input_size' in model.default_cfg:
44 |         df_input_size = model.default_cfg['test_input_size']
45 |     else:
46 |         df_input_size = model.default_cfg['input_size']
47 |     if config['input_size'][-1] > df_input_size[-1] and config['input_size'][-2] > df_input_size[-2]:
48 |         _logger.info('Target input size %s > pretrained default %s, using test time pooling' %
49 |                      (str(config['input_size'][-2:]), str(df_input_size[-2:])))
50 |         model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size'])
51 |         test_time_pool = True
52 |     return model, test_time_pool
53 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/trace_utils.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from torch import _assert
 3 | except ImportError:
 4 |     def _assert(condition: bool, message: str):
 5 |         assert condition, message
 6 | 
 7 | 
 8 | def _float_to_int(x: float) -> int:
 9 |     """
10 |     Symbolic tracing helper to substitute for inbuilt `int`.
11 |     Hint: Inbuilt `int` can't accept an argument of type `Proxy`
12 |     """
13 |     return int(x)
14 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/layers/weight_init.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | import warnings
 4 | 
 5 | from torch.nn.init import _calculate_fan_in_and_fan_out
 6 | 
 7 | 
 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
 9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
11 |     def norm_cdf(x):
12 |         # Computes standard normal cumulative distribution function
13 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
14 | 
15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
16 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
17 |                       "The distribution of values may be incorrect.",
18 |                       stacklevel=2)
19 | 
20 |     with torch.no_grad():
21 |         # Values are generated by using a truncated uniform distribution and
22 |         # then using the inverse CDF for the normal distribution.
23 |         # Get upper and lower cdf values
24 |         l = norm_cdf((a - mean) / std)
25 |         u = norm_cdf((b - mean) / std)
26 | 
27 |         # Uniformly fill tensor with values from [l, u], then translate to
28 |         # [2l-1, 2u-1].
29 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
30 | 
31 |         # Use inverse cdf transform for normal distribution to get truncated
32 |         # standard normal
33 |         tensor.erfinv_()
34 | 
35 |         # Transform to proper mean, std
36 |         tensor.mul_(std * math.sqrt(2.))
37 |         tensor.add_(mean)
38 | 
39 |         # Clamp to ensure it's in the proper range
40 |         tensor.clamp_(min=a, max=b)
41 |         return tensor
42 | 
43 | 
44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
45 |     # type: (Tensor, float, float, float, float) -> Tensor
46 |     r"""Fills the input Tensor with values drawn from a truncated
47 |     normal distribution. The values are effectively drawn from the
48 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
49 |     with values outside :math:`[a, b]` redrawn until they are within
50 |     the bounds. The method used for generating the random values works
51 |     best when :math:`a \leq \text{mean} \leq b`.
52 |     Args:
53 |         tensor: an n-dimensional `torch.Tensor`
54 |         mean: the mean of the normal distribution
55 |         std: the standard deviation of the normal distribution
56 |         a: the minimum cutoff value
57 |         b: the maximum cutoff value
58 |     Examples:
59 |         >>> w = torch.empty(3, 5)
60 |         >>> nn.init.trunc_normal_(w)
61 |     """
62 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
63 | 
64 | 
65 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'):
66 |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
67 |     if mode == 'fan_in':
68 |         denom = fan_in
69 |     elif mode == 'fan_out':
70 |         denom = fan_out
71 |     elif mode == 'fan_avg':
72 |         denom = (fan_in + fan_out) / 2
73 | 
74 |     variance = scale / denom
75 | 
76 |     if distribution == "truncated_normal":
77 |         # constant is stddev of standard normal truncated to (-2, 2)
78 |         trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978)
79 |     elif distribution == "normal":
80 |         tensor.normal_(std=math.sqrt(variance))
81 |     elif distribution == "uniform":
82 |         bound = math.sqrt(3 * variance)
83 |         tensor.uniform_(-bound, bound)
84 |     else:
85 |         raise ValueError(f"invalid distribution {distribution}")
86 | 
87 | 
88 | def lecun_normal_(tensor):
89 |     variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal')
90 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/nestnet/visualizer.py:
--------------------------------------------------------------------------------
 1 | from bytecode import Bytecode, Instr
 2 | 
 3 | class get_local(object):
 4 |     cache = {}
 5 |     is_activate = False
 6 | 
 7 |     def __init__(self, varname):
 8 |         self.varname = varname
 9 | 
10 |     def __call__(self, func):
11 |         if not type(self).is_activate:
12 |             return func
13 | 
14 |         type(self).cache[func.__qualname__] = []
15 |         c = Bytecode.from_code(func.__code__)
16 |         extra_code = [
17 |                          Instr('STORE_FAST', '_res'),
18 |                          Instr('LOAD_FAST', self.varname),
19 |                          Instr('STORE_FAST', '_value'),
20 |                          Instr('LOAD_FAST', '_res'),
21 |                          Instr('LOAD_FAST', '_value'),
22 |                          Instr('BUILD_TUPLE', 2),
23 |                          Instr('STORE_FAST', '_result_tuple'),
24 |                          Instr('LOAD_FAST', '_result_tuple'),
25 |                      ]
26 |         c[-1:-1] = extra_code
27 |         func.__code__ = c.to_code()
28 | 
29 |         def wrapper(*args, **kwargs):
30 |             res, values = func(*args, **kwargs)
31 |             type(self).cache[func.__qualname__].append(values.detach().cpu().numpy())
32 |             return res
33 |         return wrapper
34 | 
35 |     @classmethod
36 |     def clear(cls):
37 |         for key in cls.cache.keys():
38 |             cls.cache[key] = []
39 | 
40 |     @classmethod
41 |     def activate(cls):
42 |         cls.is_activate = True
43 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/se_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | #   https://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.html
 6 | 
 7 | class SEBlock(nn.Module):
 8 | 
 9 |     def __init__(self, input_channels, internal_neurons):
10 |         super(SEBlock, self).__init__()
11 |         self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True)
12 |         self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True)
13 |         self.input_channels = input_channels
14 | 
15 |     def forward(self, inputs):
16 |         x = F.avg_pool2d(inputs, kernel_size=inputs.size(3))
17 |         x = self.down(x)
18 |         x = F.relu(x)
19 |         x = self.up(x)
20 |         x = torch.sigmoid(x)
21 |         x = x.view(-1, self.input_channels, 1, 1)
22 |         return inputs * x


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/trainers/basetrainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import copy
 3 | import torch
 4 | from src.utils import save
 5 | 
 6 | 
 7 | class TrainerBase():
 8 |     def __init__(self, args, model, criterion, optimizer, scheduler, device, dataloaders):
 9 |         self.args = args
10 |         self.model = model
11 |         self.best_model = copy.deepcopy(model.state_dict())
12 |         self.device = device
13 |         self.criterion = criterion
14 |         self.optimizer = optimizer
15 |         self.dataloaders = dataloaders
16 |         self.scheduler = scheduler
17 |         self.earlyStop = args['early_stop']
18 | 
19 |         self.saving_path = f"./savings/"
20 | 
21 | 
22 |         #self.saving_path = f"./savings/"  #by qlwei
23 |         self.saving_path = f"/home/s114/qlwei/project/savings/"   #by qlwei
24 | 
25 |     def make_stat(self, prev, curr):
26 |         new_stats = []
27 |         for i in range(len(prev)):
28 |             if curr[i] > prev[i]:
29 |                 new_stats.append(f'{curr[i]:.4f} \u2191')
30 |             elif curr[i] < prev[i]:
31 |                 new_stats.append(f'{curr[i]:.4f} \u2193')
32 |             else:
33 |                 new_stats.append(f'{curr[i]:.4f} -')
34 |         return new_stats
35 | 
36 |     def get_saving_file_name(self):
37 |         best_test_stats = self.all_test_stats[self.best_epoch - 1]
38 | 
39 |         name = f'{self.args["model"]}_{self.args["modalities"]}_'
40 | 
41 |         if self.args['loss'] == 'bce':
42 |             name += f'Acc_{best_test_stats[0][-1]:.4f}_'
43 |             name += f'F1_{best_test_stats[3][-1]:.4f}_'
44 |             name += f'AUC_{best_test_stats[4][-1]:.4f}_'
45 |         else:
46 |             name += f'{best_test_stats[0]:.4f}_'
47 |             name += f'{best_test_stats[1]:.4f}_'
48 |             name += f'{best_test_stats[2]:.4f}_'
49 |             name += f'{best_test_stats[3]:.4f}_'
50 | 
51 |         name += f'imginvl{self.args["img_interval"]}_'
52 | 
53 |         if self.args['model'] == 'mme2e_sparse':
54 |             name += f'st_{self.args["sparse_threshold"]}_'
55 | 
56 |         name += f'seed{self.args["seed"]}'
57 |         name += '.pt'
58 | 
59 |         return name
60 | 
61 |     def save_stats(self):
62 |         stats = {
63 |             'args': self.args,
64 |             'train_stats': self.all_train_stats,
65 |             'valid_stats': self.all_valid_stats,
66 |             'test_stats': self.all_test_stats,
67 |             'best_valid_stats': self.best_valid_stats,
68 |             'best_epoch': self.best_epoch
69 |         }
70 | 
71 |         save(stats, os.path.join(self.saving_path, 'stats', self.get_saving_file_name()))
72 | 
73 |         # csv_path = os.path.join(self.saving_path, 'csv', self.get_saving_file_name()).replace('.pt', '.csv')
74 |         # dirname = os.path.dirname(csv_path)
75 |         # if not os.path.exists(dirname):
76 |         #     os.makedirs(dirname)
77 |         # with open(csv_path, 'w') as f:
78 |         #     for stat in self.all_test_stats[self.best_epoch - 1]:
79 |         #         for n in stat:
80 |         #             f.write(f'{n:.4f},')
81 |         #     f.write('\n')
82 |         #     f.write(str(self.args))
83 |         #     f.write('\n')
84 | 
85 |     def save_model(self):
86 |         torch.save(self.best_model, os.path.join(self.saving_path, 'models', self.get_saving_file_name()))
87 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/trainers/r_emotiontrainer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import tqdm
 3 | from src.trainers.basetrainer import TrainerBase
 4 | from transformers import AlbertTokenizer
 5 | import numpy as np
 6 | 
 7 | class IemocapTrainer(TrainerBase):
 8 |     def __init__(self, args, model, criterion, optimizer, scheduler, device, dataloaders):
 9 |         super(IemocapTrainer, self).__init__(args, model, criterion, optimizer, scheduler, device, dataloaders)
10 | 
11 |         self.args = args
12 |         self.text_max_len = args['text_max_len']
13 |         # self.tokenizer = AlbertTokenizer.from_pretrained(f'albert-{args["text_model_size"]}-v2')
14 |         self.tokenizer = AlbertTokenizer.from_pretrained('./src/models/albert-base-v2')
15 |         self.all_test_stats = []
16 |         annotations = dataloaders['test'].dataset.get_annotations()
17 |         self.best_epoch = -1
18 | 
19 |     def test(self):
20 |         test_stats = self.eval_one_epoch('test')
21 | 
22 |     def eval_one_epoch(self, phase='valid', thresholds=None):
23 | 
24 |         for m in self.model.modules():
25 |             if hasattr(m, 'switch_to_deploy'):
26 |                 m.switch_to_deploy()  # turn to deploy every modules
27 |         self.model.eval()
28 |         dataloader = self.dataloaders[phase]
29 | 
30 |         data_size = 0
31 |         total_logits = []
32 |         total_Y = []
33 |         pbar = tqdm(dataloader, desc=phase)
34 | 
35 |         for uttranceId, imgs, imgLens, specgrams, specgramLens, text, Y in pbar:
36 |             text = self.tokenizer(text, return_tensors='pt', max_length=self.text_max_len, padding='max_length', truncation=True)
37 | 
38 |             # imgs = imgs.to(device=self.device)
39 |             specgrams = specgrams.to(device=self.device)
40 |             text = text.to(device=self.device)
41 |             Y = Y.to(device=self.device)
42 | 
43 |             with torch.set_grad_enabled(False):
44 |                 logits = self.model(imgs, imgLens, specgrams, specgramLens, text) # (batch_size, num_classes)
45 |                 data_size += Y.size(0)
46 | 
47 |             total_logits.append(logits.cpu())
48 |             total_Y.append(Y.cpu())
49 | 
50 | 
51 |         total_logits = torch.cat(total_logits, dim=0)
52 |         total_Y = torch.cat(total_Y, dim=0)
53 |         preds=torch.sigmoid(total_logits)
54 |         mean_preds=torch.mean(preds,dim=0)
55 |         print('six emotional values for one video:'+'\n')
56 |         print(mean_preds)
57 | 
58 |         with open("result.txt", 'a') as f:
59 |             mean = np.array(mean_preds)
60 |             for i in range(len(mean)):
61 |                 f.write(str(mean[i]))
62 |                 f.write('\n')
63 |         print("write txt finish!")## save result.txt
64 | 
65 |         return total_logits, total_Y
66 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/transformer_encoder.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import Optional, List
 3 | import torch
 4 | from torch import nn
 5 | from src.utils import padTensor
 6 | 
 7 | class WrappedTransformerEncoder(nn.Module):
 8 |     def __init__(self, dim, num_layers, num_heads):
 9 |         super(WrappedTransformerEncoder, self).__init__()
10 |         self.dim = dim
11 |         encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=num_heads)
12 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
13 |         self.cls_emb = nn.Embedding(num_embeddings=1, embedding_dim=dim)
14 | 
15 |     def prepend_cls(self, inputs):
16 |         index = torch.LongTensor([0]).to(device=inputs.device)
17 |         cls_emb = self.cls_emb(index)
18 |         cls_emb = cls_emb.expand(inputs.size(0), 1, self.dim)
19 |         outputs = torch.cat((cls_emb, inputs), dim=1)
20 |         return outputs
21 | 
22 |     def forward(self, inputs: torch.Tensor, lens: Optional[List[int]] = None, get_cls: Optional[bool] = False):
23 |         if lens is not None:
24 |             max_len = max(lens)
25 | 
26 |             mask = [([False] * (l + int(get_cls)) + [True] * (max_len - l)) for l in lens]
27 |             mask = torch.tensor(mask).to(device=inputs.device)
28 | 
29 |             inputs = list(inputs.split(lens, dim=0))
30 |             inputs = [padTensor(inp, max_len) for inp in inputs]
31 |             inputs = torch.stack(inputs, dim=0)
32 |         else:
33 |             mask = None
34 | 
35 |         if get_cls:
36 |             inputs = self.prepend_cls(inputs)
37 | 
38 |         inputs = inputs.permute(1, 0, 2)
39 |         # inputs = self.pos_encoder(inputs)
40 |         inputs = self.encoder(src=inputs, src_key_padding_mask=mask) # (seq_len, bs, dim)
41 | 
42 |         if get_cls:
43 |             return inputs[0]
44 | 
45 |         return inputs[1:].permute(1, 0, 2)
46 | 
47 | 


--------------------------------------------------------------------------------
/FV2ES/V2EM_prediction/src/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import torch
 4 | import numpy as np
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | 
 8 | def save(toBeSaved, filename, mode='wb'):
 9 |     dirname = os.path.dirname(filename)
10 |     if not os.path.exists(dirname):
11 |         os.makedirs(dirname)
12 |     file = open(filename, mode)
13 |     pickle.dump(toBeSaved, file, protocol=4)
14 |     file.close()
15 | 
16 | def load(filename, mode='rb'):
17 |     file = open(filename, mode)
18 |     loaded = pickle.load(file)
19 |     file.close()
20 |     return loaded
21 | 
22 | # For python2
23 | def load2(path):
24 |     with open(path, 'rb') as f:
25 |         u = pickle._Unpickler(f)
26 |         u.encoding = 'latin1'
27 |         p = u.load()
28 |     return p
29 | 
30 | def pad_sents(sents, pad_token):
31 |     sents_padded = []
32 |     lens = get_lens(sents)
33 |     max_len = max(lens)
34 |     sents_padded = [sents[i] + [pad_token] * (max_len - l) for i, l in enumerate(lens)]
35 |     return sents_padded, lens
36 | 
37 | def sort_sents(sents, reverse=True):
38 |     sents.sort(key=(lambda s: len(s)), reverse=reverse)
39 |     return sents
40 | 
41 | def get_mask(sents, unmask_idx=1, mask_idx=0):
42 |     lens = get_lens(sents)
43 |     max_len = max(lens)
44 |     mask = [([unmask_idx] * l + [mask_idx] * (max_len - l)) for l in lens]
45 |     return mask
46 | 
47 | def get_lens(sents):
48 |     return [len(sent) for sent in sents]
49 | 
50 | def get_max_len(sents):
51 |     max_len = max([len(sent) for sent in sents])
52 |     return max_len
53 | 
54 | def truncate_sents(sents, length):
55 |     sents = [sent[:length] for sent in sents]
56 |     return sents
57 | 
58 | def get_loss_weight(labels, label_order):
59 |     nums = [np.sum(labels == lo) for lo in label_order]
60 |     loss_weight = torch.tensor([n / len(labels) for n in nums])
61 |     return loss_weight
62 | 
63 | def capitalize_first_letter(data):
64 |     return [word.capitalize() for word in data]
65 | 
66 | def cmumosei_round(a):
67 |     if a < -2:
68 |         res = -3
69 |     if -2 <= a and a < -1:
70 |         res = -2
71 |     if -1 <= a and a < 0:
72 |         res = -1
73 |     if 0 <= a and a <= 0:
74 |         res = 0
75 |     if 0 < a and a <= 1:
76 |         res = 1
77 |     if 1 < a and a <= 2:
78 |         res = 2
79 |     if a > 2:
80 |         res = 3
81 |     return res
82 | 
83 | # From MTCNN
84 | def fixed_image_standardization(image_tensor: torch.tensor) -> torch.tensor:
85 |     processed_tensor = (image_tensor - 127.5) / 128.0
86 |     return processed_tensor
87 | 
88 | def padTensor(t: torch.tensor, targetLen: int) -> torch.tensor:
89 |     oriLen, dim = t.size()
90 |     return torch.cat((t, torch.zeros(targetLen - oriLen, dim).to(t.device)), dim=0)
91 | 
92 | def calc_percent(x: torch.tensor):
93 |     total = np.prod(np.array(x.size()))
94 |     positive = x.sum().item()
95 |     return positive / total
96 | 


--------------------------------------------------------------------------------
/V2EM/src/model/attention_block.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import torch.nn.functional as F
  4 | from typing import List
  5 | 
  6 | 
  7 | class CrossModalAttentionLayer(nn.Module):
  8 |     # y attends x
  9 |     def __init__(self, k, x_channels: int, y_size: int, spatial=True):
 10 |         super(CrossModalAttentionLayer, self).__init__()
 11 |         self.k = k
 12 |         self.spatial = spatial
 13 | 
 14 |         if spatial:
 15 |             self.channel_affine = nn.Linear(x_channels, k)
 16 | 
 17 |         self.y_affine = nn.Linear(y_size, k, bias=False)
 18 |         self.attn_weight_affine = nn.Linear(k, 1)
 19 | 
 20 |     def forward(self, x: List[torch.Tensor], x_lens: List[int], y: torch.Tensor):
 21 |         # x -> [(S, C, H, W)], len(x) = bs
 22 |         # y -> (bs, D)
 23 | 
 24 |         bs = y.size(0)
 25 |         x = x.split(x_lens, dim=0)
 26 |         y_k = self.y_affine(y) # (bs, k)
 27 | 
 28 |         all_spatial_attn_weights_softmax = []
 29 | 
 30 |         for i in range(bs):
 31 |             if self.spatial:
 32 |                 x_tensor = x[i].permute(0, 2, 3, 1) # (S_v, H_v, W_v, C_v)
 33 |                 x_k = self.channel_affine(x_tensor) # (S_v, H_v, W_v, k)
 34 |                 x_k += y_k[i]
 35 |                 x_k = torch.tanh(x_k)
 36 |                 x_attn_weights = self.attn_weight_affine(x_k).squeeze(-1) # (S_v, H_v, W_v)
 37 | 
 38 |                 all_spatial_attn_weights_softmax.append(
 39 |                     F.softmax(
 40 |                         x_attn_weights.reshape(x_tensor.size(0), -1),
 41 |                         dim=-1
 42 |                     ).reshape(x_tensor.size(0), x_tensor.size(1), x_tensor.size(2)) # (S_v, H_v, W_v)
 43 |                 )
 44 | 
 45 |         return torch.cat(all_spatial_attn_weights_softmax, dim=0)
 46 | 
 47 | class SparseCrossModalAttentionLayer(nn.Module):
 48 |     def __init__(self, k: int, x_channels: int, y_size: int, sparse_threshold: float):
 49 |         super(SparseCrossModalAttentionLayer, self).__init__()
 50 |         self.k = k
 51 |         self.sparse_threshold = sparse_threshold
 52 |         self.channel_affine = nn.Linear(x_channels, k)
 53 |         self.y_affine = nn.Linear(y_size, k, bias=False)
 54 |         self.attn_weight_affine = nn.Linear(k, 1)
 55 | 
 56 |     def forward(self, x: List[torch.Tensor], x_lens: List[int], locations: List[torch.Tensor], y: torch.Tensor):
 57 |         # x -> (N, C)
 58 |         # locations -> (N, 3)
 59 |         # y -> (bs, D)
 60 |         bs = y.size(0)
 61 |         y_k = self.y_affine(y) # (bs, k)
 62 |         x_k = self.channel_affine(x) # (N, k)
 63 | 
 64 |         sample_points_lens = []
 65 |         for i in range(sum(x_lens)):
 66 |             sample_points_lens.append(len(locations[locations[:, 2] == i]))
 67 | 
 68 |         # how much points are left in each batch
 69 |         batch_points_lens = []
 70 |         pointer = 0
 71 |         for l in x_lens:
 72 |             batch_points_lens.append(sum(sample_points_lens[pointer:(pointer + l)]))
 73 |             pointer += l
 74 | 
 75 |         x_ks = x_k.split(batch_points_lens, dim=0)
 76 | 
 77 |         attn_weights = []
 78 |         for i in range(bs):
 79 |             this_weights = self.attn_weight_affine(torch.tanh(x_ks[i] + y_k[i])).squeeze(-1)
 80 |             attn_weights.append(this_weights)
 81 | 
 82 |         attn_weights = torch.cat(attn_weights, dim=0)
 83 |         attn_weights_split = list(attn_weights.split(sample_points_lens, dim=0))
 84 |         attn_weights_split = [F.softmax(a, dim=-1) for a in attn_weights_split]
 85 |         attn_weights = torch.cat(attn_weights_split, dim=0)
 86 | 
 87 |         attn_weights_sparse = to_sparse_by_cdf(attn_weights, sample_points_lens, self.sparse_threshold)
 88 | 
 89 |         select_indices = attn_weights_sparse == 1
 90 |         new_x = x[select_indices, :]
 91 |         new_locations = locations[select_indices, :]
 92 | 
 93 |         return new_x, new_locations, None
 94 | 
 95 | def to_sparse_by_cdf(t: torch.tensor, lens, cdf: float):
 96 |     _t = t.clone().detach()
 97 |     _t = list(_t.split(lens, dim=0))
 98 | 
 99 |     for i, this_t in enumerate(_t):
100 |         this_t_sorted, indices = torch.sort(this_t, descending=True)
101 |         mask = torch.cumsum(this_t_sorted, dim=-1) < cdf
102 |         mask[torch.sum(mask)] = True
103 |         _t[i][indices[mask]] = 1
104 |         _t[i][indices[~mask]] = 0
105 | 
106 |     return torch.cat(_t, dim=0).long()
107 | 


--------------------------------------------------------------------------------
/V2EM/src/model/baselines/lf_rnn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | # from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
 5 | from typing import List
 6 | from src.utils import padTensor
 7 | 
 8 | pad_token_id = 0
 9 | unk_token_id = 1
10 | 
11 | class LF_RNN(nn.Module):
12 |     def __init__(self, args, num_layers=1, dropout=0.1, bi=True):
13 |         super(LF_RNN, self).__init__()
14 |         feature_sizes = args['hfc_sizes']
15 |         num_classes = args['num_emotions']
16 |         self.mods = args['modalities']
17 | 
18 |         feature_sizes = np.array(feature_sizes)
19 | 
20 |         self.rnns = nn.ModuleDict({
21 |             't': nn.LSTM(
22 |                 input_size=feature_sizes[0],
23 |                 hidden_size=feature_sizes[0],
24 |                 num_layers=num_layers,
25 |                 dropout=(dropout if num_layers > 1 else 0),
26 |                 bidirectional=bi
27 |             ),
28 |             'a': nn.LSTM(
29 |                 input_size=feature_sizes[1],
30 |                 hidden_size=feature_sizes[1],
31 |                 num_layers=num_layers,
32 |                 dropout=(dropout if num_layers > 1 else 0),
33 |                 bidirectional=bi
34 |             ),
35 |             'v': nn.LSTM(
36 |                 input_size=feature_sizes[2],
37 |                 hidden_size=feature_sizes[2],
38 |                 num_layers=num_layers,
39 |                 dropout=(dropout if num_layers > 1 else 0),
40 |                 bidirectional=bi
41 |             )
42 |         })
43 | 
44 |         linear_in_sizes = feature_sizes if not bi else feature_sizes * 2
45 | 
46 |         self.affines = nn.ModuleDict({
47 |             't': nn.Sequential(
48 |                 nn.Linear(linear_in_sizes[0], linear_in_sizes[0] // 2),
49 |                 nn.ReLU(),
50 |                 nn.Linear(linear_in_sizes[0] // 2, num_classes)
51 |             ),
52 |             'a': nn.Sequential(
53 |                 nn.Linear(linear_in_sizes[1], linear_in_sizes[1] // 2),
54 |                 nn.ReLU(),
55 |                 nn.Linear(linear_in_sizes[1] // 2, num_classes)
56 |             ),
57 |             'v': nn.Sequential(
58 |                 nn.Linear(linear_in_sizes[2], linear_in_sizes[2] // 2),
59 |                 nn.ReLU(),
60 |                 nn.Linear(linear_in_sizes[2] // 2, num_classes)
61 |             )
62 |         })
63 | 
64 |         self.weighted_fusion = nn.Linear(len(self.mods), 1, bias=False)
65 | 
66 |     def forward(self, img_features, img_features_lens, audio_features, audio_features_lens, texts):
67 |         all_logits = []
68 | 
69 |         if 't' in self.mods:
70 |             output_t, _ = self.rnns['t'](texts.transpose(0, 1))
71 |             output_t = output_t[-1, :, :]
72 |             output_t = self.affines['t'](output_t)
73 |             all_logits.append(output_t)
74 | 
75 |         if 'a' in self.mods:
76 |             max_len = max(audio_features_lens)
77 |             audio_features = audio_features.split(audio_features_lens, dim=0)
78 |             audio_features = [padTensor(s, max_len) for s in audio_features]
79 |             audio_features = torch.stack(audio_features, dim=1) # (seq_len, batch, dim)
80 |             _, (audio_hn, _) = self.rnns['a'](audio_features)
81 |             audio_hn = audio_hn.transpose(0, 1).flatten(start_dim=1) # (batch, hid_dim * 2)
82 |             audio_hn = self.affines['a'](audio_hn)
83 |             all_logits.append(audio_hn)
84 | 
85 |         if 'v' in self.mods:
86 |             max_len = max(img_features_lens)
87 |             img_features = img_features.split(img_features_lens, dim=0)
88 |             img_features = [padTensor(s, max_len) for s in img_features]
89 |             img_features = torch.stack(img_features, dim=1) # (seq_len, batch, dim)
90 |             _, (img_hn, _) = self.rnns['v'](img_features)
91 |             img_hn = img_hn.transpose(0, 1).flatten(start_dim=1) # (batch, hid_dim * 2)
92 |             img_hn = self.affines['v'](img_hn)
93 |             all_logits.append(img_hn)
94 | 
95 |         if len(self.mods) == 1:
96 |             return all_logits[0]
97 | 
98 |         return self.weighted_fusion(torch.stack(all_logits, dim=-1)).squeeze(-1)
99 | 


--------------------------------------------------------------------------------
/V2EM/src/model/baselines/lf_transformer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from torch import nn
 4 | from typing import List
 5 | from src.models.transformer_encoder import WrappedTransformerEncoder
 6 | 
 7 | 
 8 | class LF_Transformer(nn.Module):
 9 |     def __init__(self, args):
10 |         super(LF_Transformer, self).__init__()
11 |         num_classes = args['num_emotions']
12 |         self.mods = args['modalities']
13 |         feature_sizes = np.array(args['hfc_sizes'])
14 |         nlayers = args['trans_nlayers']
15 |         # nheads = args['trans_nheads']
16 |         # trans_dim = args['trans_dim']
17 | 
18 |         self.transformers = nn.ModuleDict({
19 |             't': WrappedTransformerEncoder(
20 |                 dim=feature_sizes[0], # 300
21 |                 num_layers=nlayers,
22 |                 num_heads=4
23 |             ),
24 |             'a': WrappedTransformerEncoder(
25 |                 dim=feature_sizes[1], # 2 empty features are added to make it 144, easy to be divided by #heads
26 |                 num_layers=nlayers,
27 |                 num_heads=2
28 |             ),
29 |             'v': WrappedTransformerEncoder(
30 |                 dim=feature_sizes[2], # 35
31 |                 num_layers=nlayers,
32 |                 num_heads=5
33 |             )
34 |         })
35 | 
36 |         self.affines = nn.ModuleDict({
37 |             't': nn.Sequential(
38 |                 nn.Linear(feature_sizes[0], feature_sizes[0] // 2),
39 |                 nn.ReLU(),
40 |                 nn.Linear(feature_sizes[0] // 2, num_classes)
41 |             ),
42 |             'a': nn.Sequential(
43 |                 nn.Linear(feature_sizes[1], feature_sizes[1] // 2),
44 |                 nn.ReLU(),
45 |                 nn.Linear(feature_sizes[1] // 2, num_classes)
46 |             ),
47 |             'v': nn.Sequential(
48 |                 nn.Linear(feature_sizes[2], feature_sizes[2] // 2),
49 |                 nn.ReLU(),
50 |                 nn.Linear(feature_sizes[2] // 2, num_classes)
51 |             )
52 |         })
53 | 
54 |         self.weighted_fusion = nn.Linear(len(self.mods), 1, bias=False)
55 | 
56 |     def forward(self, img_features, img_features_lens, audio_features, audio_features_lens, texts):
57 |         all_logits = []
58 | 
59 |         if 't' in self.mods:
60 |             texts = self.transformers['t'](texts, get_cls=True)
61 |             texts = self.affines['t'](texts)
62 |             all_logits.append(texts)
63 | 
64 |         if 'a' in self.mods:
65 |             audio_features = self.transformers['a'](audio_features, audio_features_lens, get_cls=True)
66 |             audio_features = self.affines['a'](audio_features)
67 |             all_logits.append(audio_features)
68 | 
69 |         if 'v' in self.mods:
70 |             img_features = self.transformers['v'](img_features, img_features_lens, get_cls=True)
71 |             img_features = self.affines['v'](img_features)
72 |             all_logits.append(img_features)
73 | 
74 |         if len(self.mods) == 1:
75 |             return all_logits[0]
76 | 
77 |         return self.weighted_fusion(torch.stack(all_logits, dim=-1)).squeeze(-1)
78 | 


--------------------------------------------------------------------------------
/V2EM/src/model/e2e_t.py:
--------------------------------------------------------------------------------
 1 | from torch import nn
 2 | from transformers import AlbertModel
 3 | 
 4 | class MME2E_T(nn.Module):
 5 |     def __init__(self, feature_dim, num_classes=4, size='base'):
 6 |         super(MME2E_T, self).__init__()
 7 |         self.albert = AlbertModel.from_pretrained(f'albert-{size}-v2')
 8 |         # self.albert = AlbertModel.from_pretrained('./src/models/albert-base-v2')
 9 | 
10 | 
11 |     def forward(self, text, get_cls=False):
12 |         last_hidden_state = self.albert(**text).last_hidden_state
13 | #         print(last_hidden_state)
14 |         if get_cls:
15 |             cls_feature = last_hidden_state[:,0]
16 |             return cls_feature
17 | 
18 |         text_features = self.text_feature_affine(last_hidden_state).sum(1)
19 |         return text_features
20 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/fx_features.py:
--------------------------------------------------------------------------------
 1 | """ PyTorch FX Based Feature Extraction Helpers
 2 | Using https://pytorch.org/vision/stable/feature_extraction.html
 3 | """
 4 | from typing import Callable
 5 | from torch import nn
 6 | 
 7 | from .features import _get_feature_info
 8 | 
 9 | try:
10 |     from torchvision.models.feature_extraction import create_feature_extractor
11 |     has_fx_feature_extraction = True
12 | except ImportError:
13 |     has_fx_feature_extraction = False
14 | 
15 | # Layers we went to treat as leaf modules
16 | from .layers import Conv2dSame, ScaledStdConv2dSame, BatchNormAct2d, BlurPool2d, CondConv2d, StdConv2dSame, DropPath
17 | from .layers.non_local_attn import BilinearAttnTransform
18 | from .layers.pool2d_same import MaxPool2dSame, AvgPool2dSame
19 | 
20 | # NOTE: By default, any modules from timm.models.layers that we want to treat as leaf modules go here
21 | # BUT modules from timm.models should use the registration mechanism below
22 | _leaf_modules = {
23 |     BatchNormAct2d,  # reason: flow control for jit scripting
24 |     BilinearAttnTransform,  # reason: flow control t <= 1
25 |     BlurPool2d,  # reason: TypeError: F.conv2d received Proxy in groups=x.shape[1]
26 |     # Reason: get_same_padding has a max which raises a control flow error
27 |     Conv2dSame, MaxPool2dSame,  ScaledStdConv2dSame, StdConv2dSame, AvgPool2dSame,
28 |     CondConv2d,  # reason: TypeError: F.conv2d received Proxy in groups=self.groups * B (because B = x.shape[0])
29 |     DropPath,  # reason: TypeError: rand recieved Proxy in `size` argument
30 | }
31 | 
32 | try:
33 |     from .layers import InplaceAbn
34 |     _leaf_modules.add(InplaceAbn)
35 | except ImportError:
36 |     pass
37 | 
38 | 
39 | def register_notrace_module(module: nn.Module):
40 |     """
41 |     Any module not under timm.models.layers should get this decorator if we don't want to trace through it.
42 |     """
43 |     _leaf_modules.add(module)
44 |     return module
45 | 
46 | 
47 | # Functions we want to autowrap (treat them as leaves)
48 | _autowrap_functions = set()
49 | 
50 | 
51 | def register_notrace_function(func: Callable):
52 |     """
53 |     Decorator for functions which ought not to be traced through
54 |     """
55 |     _autowrap_functions.add(func)
56 |     return func
57 | 
58 | 
59 | class FeatureGraphNet(nn.Module):
60 |     def __init__(self, model, out_indices, out_map=None):
61 |         super().__init__()
62 |         assert has_fx_feature_extraction, 'Please update to PyTorch 1.10+, torchvision 0.11+ for FX feature extraction'
63 |         self.feature_info = _get_feature_info(model, out_indices)
64 |         if out_map is not None:
65 |             assert len(out_map) == len(out_indices)
66 |         return_nodes = {info['module']: out_map[i] if out_map is not None else info['module']
67 |                         for i, info in enumerate(self.feature_info) if i in out_indices}
68 |         self.graph_module = create_feature_extractor(
69 |             model, return_nodes,
70 |             tracer_kwargs={'leaf_modules': list(_leaf_modules), 'autowrap_functions': list(_autowrap_functions)})
71 | 
72 |     def forward(self, x):
73 |         return list(self.graph_module(x).values())


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .activations import *
 2 | from .adaptive_avgmax_pool import \
 3 |     adaptive_avgmax_pool2d, select_adaptive_pool2d, AdaptiveAvgMaxPool2d, SelectAdaptivePool2d
 4 | from .blur_pool import BlurPool2d
 5 | from .classifier import ClassifierHead, create_classifier
 6 | from .cond_conv2d import CondConv2d, get_condconv_initializer
 7 | from .config import is_exportable, is_scriptable, is_no_jit, set_exportable, set_scriptable, set_no_jit,\
 8 |     set_layer_config
 9 | from .conv2d_same import Conv2dSame, conv2d_same
10 | from .conv_bn_act import ConvBnAct
11 | from .create_act import create_act_layer, get_act_layer, get_act_fn
12 | from .create_attn import get_attn, create_attn
13 | from .create_conv2d import create_conv2d
14 | from .create_norm_act import get_norm_act_layer, create_norm_act, convert_norm_act
15 | from .drop import DropBlock2d, DropPath, drop_block_2d, drop_path
16 | from .eca import EcaModule, CecaModule, EfficientChannelAttn, CircularEfficientChannelAttn
17 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d
18 | from .gather_excite import GatherExcite
19 | from .global_context import GlobalContext
20 | from .helpers import to_ntuple, to_2tuple, to_3tuple, to_4tuple, make_divisible
21 | from .inplace_abn import InplaceAbn
22 | from .linear import Linear
23 | from .mixed_conv2d import MixedConv2d
24 | from .mlp import Mlp, GluMlp, GatedMlp, ConvMlp
25 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn
26 | from .norm import GroupNorm, LayerNorm2d
27 | from .norm_act import BatchNormAct2d, GroupNormAct
28 | from .padding import get_padding, get_same_padding, pad_same
29 | from .patch_embed import PatchEmbed
30 | from .pool2d_same import AvgPool2dSame, create_pool2d
31 | from .squeeze_excite import SEModule, SqueezeExcite, EffectiveSEModule, EffectiveSqueezeExcite
32 | from .selective_kernel import SelectiveKernel
33 | from .separable_conv import SeparableConv2d, SeparableConvBnAct
34 | from .space_to_depth import SpaceToDepthModule
35 | from .split_attn import SplitAttn
36 | from .split_batchnorm import SplitBatchNorm2d, convert_splitbn_model
37 | from .std_conv import StdConv2d, StdConv2dSame, ScaledStdConv2d, ScaledStdConv2dSame
38 | from .test_time_pool import TestTimePoolHead, apply_test_time_pool
39 | from .trace_utils import _assert, _float_to_int
40 | from .weight_init import trunc_normal_, variance_scaling_, lecun_normal_
41 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/activations_jit.py:
--------------------------------------------------------------------------------
 1 | """ Activations
 2 | 
 3 | A collection of jit-scripted activations fn and modules with a common interface so that they can
 4 | easily be swapped. All have an `inplace` arg even if not used.
 5 | 
 6 | All jit scripted activations are lacking in-place variations on purpose, scripted kernel fusion does not
 7 | currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
 8 | versions if they contain in-place ops.
 9 | 
10 | Hacked together by / Copyright 2020 Ross Wightman
11 | """
12 | 
13 | import torch
14 | from torch import nn as nn
15 | from torch.nn import functional as F
16 | 
17 | 
18 | @torch.jit.script
19 | def swish_jit(x, inplace: bool = False):
20 |     """Swish - Described in: https://arxiv.org/abs/1710.05941
21 |     """
22 |     return x.mul(x.sigmoid())
23 | 
24 | 
25 | @torch.jit.script
26 | def mish_jit(x, _inplace: bool = False):
27 |     """Mish: A Self Regularized Non-Monotonic Neural Activation Function - https://arxiv.org/abs/1908.08681
28 |     """
29 |     return x.mul(F.softplus(x).tanh())
30 | 
31 | 
32 | class SwishJit(nn.Module):
33 |     def __init__(self, inplace: bool = False):
34 |         super(SwishJit, self).__init__()
35 | 
36 |     def forward(self, x):
37 |         return swish_jit(x)
38 | 
39 | 
40 | class MishJit(nn.Module):
41 |     def __init__(self, inplace: bool = False):
42 |         super(MishJit, self).__init__()
43 | 
44 |     def forward(self, x):
45 |         return mish_jit(x)
46 | 
47 | 
48 | @torch.jit.script
49 | def hard_sigmoid_jit(x, inplace: bool = False):
50 |     # return F.relu6(x + 3.) / 6.
51 |     return (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
52 | 
53 | 
54 | class HardSigmoidJit(nn.Module):
55 |     def __init__(self, inplace: bool = False):
56 |         super(HardSigmoidJit, self).__init__()
57 | 
58 |     def forward(self, x):
59 |         return hard_sigmoid_jit(x)
60 | 
61 | 
62 | @torch.jit.script
63 | def hard_swish_jit(x, inplace: bool = False):
64 |     # return x * (F.relu6(x + 3.) / 6)
65 |     return x * (x + 3).clamp(min=0, max=6).div(6.)  # clamp seems ever so slightly faster?
66 | 
67 | 
68 | class HardSwishJit(nn.Module):
69 |     def __init__(self, inplace: bool = False):
70 |         super(HardSwishJit, self).__init__()
71 | 
72 |     def forward(self, x):
73 |         return hard_swish_jit(x)
74 | 
75 | 
76 | @torch.jit.script
77 | def hard_mish_jit(x, inplace: bool = False):
78 |     """ Hard Mish
79 |     Experimental, based on notes by Mish author Diganta Misra at
80 |       https://github.com/digantamisra98/H-Mish/blob/0da20d4bc58e696b6803f2523c58d3c8a82782d0/README.md
81 |     """
82 |     return 0.5 * x * (x + 2).clamp(min=0, max=2)
83 | 
84 | 
85 | class HardMishJit(nn.Module):
86 |     def __init__(self, inplace: bool = False):
87 |         super(HardMishJit, self).__init__()
88 | 
89 |     def forward(self, x):
90 |         return hard_mish_jit(x)
91 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/blur_pool.py:
--------------------------------------------------------------------------------
 1 | """
 2 | BlurPool layer inspired by
 3 |  - Kornia's Max_BlurPool2d
 4 |  - Making Convolutional Networks Shift-Invariant Again :cite:`zhang2019shiftinvar`
 5 | 
 6 | Hacked together by Chris Ha and Ross Wightman
 7 | """
 8 | 
 9 | import torch
10 | import torch.nn as nn
11 | import torch.nn.functional as F
12 | import numpy as np
13 | from .padding import get_padding
14 | 
15 | 
16 | class BlurPool2d(nn.Module):
17 |     r"""Creates a module that computes blurs and downsample a given feature map.
18 |     See :cite:`zhang2019shiftinvar` for more details.
19 |     Corresponds to the Downsample class, which does blurring and subsampling
20 | 
21 |     Args:
22 |         channels = Number of input channels
23 |         filt_size (int): binomial filter size for blurring. currently supports 3 (default) and 5.
24 |         stride (int): downsampling filter stride
25 | 
26 |     Returns:
27 |         torch.Tensor: the transformed tensor.
28 |     """
29 |     def __init__(self, channels, filt_size=3, stride=2) -> None:
30 |         super(BlurPool2d, self).__init__()
31 |         assert filt_size > 1
32 |         self.channels = channels
33 |         self.filt_size = filt_size
34 |         self.stride = stride
35 |         self.padding = [get_padding(filt_size, stride, dilation=1)] * 4
36 |         coeffs = torch.tensor((np.poly1d((0.5, 0.5)) ** (self.filt_size - 1)).coeffs.astype(np.float32))
37 |         blur_filter = (coeffs[:, None] * coeffs[None, :])[None, None, :, :].repeat(self.channels, 1, 1, 1)
38 |         self.register_buffer('filt', blur_filter, persistent=False)
39 | 
40 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
41 |         x = F.pad(x, self.padding, 'reflect')
42 |         return F.conv2d(x, self.filt, stride=self.stride, groups=x.shape[1])
43 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/classifier.py:
--------------------------------------------------------------------------------
 1 | """ Classifier head and layer factory
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from torch import nn as nn
 6 | from torch.nn import functional as F
 7 | 
 8 | from .adaptive_avgmax_pool import SelectAdaptivePool2d
 9 | 
10 | 
11 | def _create_pool(num_features, num_classes, pool_type='avg', use_conv=False):
12 |     flatten_in_pool = not use_conv  # flatten when we use a Linear layer after pooling
13 |     if not pool_type:
14 |         assert num_classes == 0 or use_conv,\
15 |             'Pooling can only be disabled if classifier is also removed or conv classifier is used'
16 |         flatten_in_pool = False  # disable flattening if pooling is pass-through (no pooling)
17 |     global_pool = SelectAdaptivePool2d(pool_type=pool_type, flatten=flatten_in_pool)
18 |     num_pooled_features = num_features * global_pool.feat_mult()
19 |     return global_pool, num_pooled_features
20 | 
21 | 
22 | def _create_fc(num_features, num_classes, use_conv=False):
23 |     if num_classes <= 0:
24 |         fc = nn.Identity()  # pass-through (no classifier)
25 |     elif use_conv:
26 |         fc = nn.Conv2d(num_features, num_classes, 1, bias=True)
27 |     else:
28 |         fc = nn.Linear(num_features, num_classes, bias=True)
29 |     return fc
30 | 
31 | 
32 | def create_classifier(num_features, num_classes, pool_type='avg', use_conv=False):
33 |     global_pool, num_pooled_features = _create_pool(num_features, num_classes, pool_type, use_conv=use_conv)
34 |     fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
35 |     return global_pool, fc
36 | 
37 | 
38 | class ClassifierHead(nn.Module):
39 |     """Classifier head w/ configurable global pooling and dropout."""
40 | 
41 |     def __init__(self, in_chs, num_classes, pool_type='avg', drop_rate=0., use_conv=False):
42 |         super(ClassifierHead, self).__init__()
43 |         self.drop_rate = drop_rate
44 |         self.global_pool, num_pooled_features = _create_pool(in_chs, num_classes, pool_type, use_conv=use_conv)
45 |         self.fc = _create_fc(num_pooled_features, num_classes, use_conv=use_conv)
46 |         self.flatten = nn.Flatten(1) if use_conv and pool_type else nn.Identity()
47 | 
48 |     def forward(self, x):
49 |         x = self.global_pool(x)
50 |         if self.drop_rate:
51 |             x = F.dropout(x, p=float(self.drop_rate), training=self.training)
52 |         x = self.fc(x)
53 |         x = self.flatten(x)
54 |         return x
55 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/config.py:
--------------------------------------------------------------------------------
  1 | """ Model / Layer Config singleton state
  2 | """
  3 | from typing import Any, Optional
  4 | 
  5 | __all__ = [
  6 |     'is_exportable', 'is_scriptable', 'is_no_jit',
  7 |     'set_exportable', 'set_scriptable', 'set_no_jit', 'set_layer_config'
  8 | ]
  9 | 
 10 | # Set to True if prefer to have layers with no jit optimization (includes activations)
 11 | _NO_JIT = False
 12 | 
 13 | # Set to True if prefer to have activation layers with no jit optimization
 14 | # NOTE not currently used as no difference between no_jit and no_activation jit as only layers obeying
 15 | # the jit flags so far are activations. This will change as more layers are updated and/or added.
 16 | _NO_ACTIVATION_JIT = False
 17 | 
 18 | # Set to True if exporting a model with Same padding via ONNX
 19 | _EXPORTABLE = False
 20 | 
 21 | # Set to True if wanting to use torch.jit.script on a model
 22 | _SCRIPTABLE = False
 23 | 
 24 | 
 25 | def is_no_jit():
 26 |     return _NO_JIT
 27 | 
 28 | 
 29 | class set_no_jit:
 30 |     def __init__(self, mode: bool) -> None:
 31 |         global _NO_JIT
 32 |         self.prev = _NO_JIT
 33 |         _NO_JIT = mode
 34 | 
 35 |     def __enter__(self) -> None:
 36 |         pass
 37 | 
 38 |     def __exit__(self, *args: Any) -> bool:
 39 |         global _NO_JIT
 40 |         _NO_JIT = self.prev
 41 |         return False
 42 | 
 43 | 
 44 | def is_exportable():
 45 |     return _EXPORTABLE
 46 | 
 47 | 
 48 | class set_exportable:
 49 |     def __init__(self, mode: bool) -> None:
 50 |         global _EXPORTABLE
 51 |         self.prev = _EXPORTABLE
 52 |         _EXPORTABLE = mode
 53 | 
 54 |     def __enter__(self) -> None:
 55 |         pass
 56 | 
 57 |     def __exit__(self, *args: Any) -> bool:
 58 |         global _EXPORTABLE
 59 |         _EXPORTABLE = self.prev
 60 |         return False
 61 | 
 62 | 
 63 | def is_scriptable():
 64 |     return _SCRIPTABLE
 65 | 
 66 | 
 67 | class set_scriptable:
 68 |     def __init__(self, mode: bool) -> None:
 69 |         global _SCRIPTABLE
 70 |         self.prev = _SCRIPTABLE
 71 |         _SCRIPTABLE = mode
 72 | 
 73 |     def __enter__(self) -> None:
 74 |         pass
 75 | 
 76 |     def __exit__(self, *args: Any) -> bool:
 77 |         global _SCRIPTABLE
 78 |         _SCRIPTABLE = self.prev
 79 |         return False
 80 | 
 81 | 
 82 | class set_layer_config:
 83 |     """ Layer config context manager that allows setting all layer config flags at once.
 84 |     If a flag arg is None, it will not change the current value.
 85 |     """
 86 |     def __init__(
 87 |             self,
 88 |             scriptable: Optional[bool] = None,
 89 |             exportable: Optional[bool] = None,
 90 |             no_jit: Optional[bool] = None,
 91 |             no_activation_jit: Optional[bool] = None):
 92 |         global _SCRIPTABLE
 93 |         global _EXPORTABLE
 94 |         global _NO_JIT
 95 |         global _NO_ACTIVATION_JIT
 96 |         self.prev = _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT
 97 |         if scriptable is not None:
 98 |             _SCRIPTABLE = scriptable
 99 |         if exportable is not None:
100 |             _EXPORTABLE = exportable
101 |         if no_jit is not None:
102 |             _NO_JIT = no_jit
103 |         if no_activation_jit is not None:
104 |             _NO_ACTIVATION_JIT = no_activation_jit
105 | 
106 |     def __enter__(self) -> None:
107 |         pass
108 | 
109 |     def __exit__(self, *args: Any) -> bool:
110 |         global _SCRIPTABLE
111 |         global _EXPORTABLE
112 |         global _NO_JIT
113 |         global _NO_ACTIVATION_JIT
114 |         _SCRIPTABLE, _EXPORTABLE, _NO_JIT, _NO_ACTIVATION_JIT = self.prev
115 |         return False
116 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/conv2d_same.py:
--------------------------------------------------------------------------------
 1 | """ Conv2d w/ Same Padding
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from typing import Tuple, Optional
 9 | 
10 | from .padding import pad_same, get_padding_value
11 | 
12 | 
13 | def conv2d_same(
14 |         x, weight: torch.Tensor, bias: Optional[torch.Tensor] = None, stride: Tuple[int, int] = (1, 1),
15 |         padding: Tuple[int, int] = (0, 0), dilation: Tuple[int, int] = (1, 1), groups: int = 1):
16 |     x = pad_same(x, weight.shape[-2:], stride, dilation)
17 |     return F.conv2d(x, weight, bias, stride, (0, 0), dilation, groups)
18 | 
19 | 
20 | class Conv2dSame(nn.Conv2d):
21 |     """ Tensorflow like 'SAME' convolution wrapper for 2D convolutions
22 |     """
23 | 
24 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1,
25 |                  padding=0, dilation=1, groups=1, bias=True):
26 |         super(Conv2dSame, self).__init__(
27 |             in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias)
28 | 
29 |     def forward(self, x):
30 |         return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
31 | 
32 | 
33 | def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs):
34 |     padding = kwargs.pop('padding', '')
35 |     kwargs.setdefault('bias', False)
36 |     padding, is_dynamic = get_padding_value(padding, kernel_size, **kwargs)
37 |     if is_dynamic:
38 |         return Conv2dSame(in_chs, out_chs, kernel_size, **kwargs)
39 |     else:
40 |         return nn.Conv2d(in_chs, out_chs, kernel_size, padding=padding, **kwargs)
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/conv_bn_act.py:
--------------------------------------------------------------------------------
 1 | """ Conv2d + BN + Act
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from torch import nn as nn
 6 | 
 7 | from .create_conv2d import create_conv2d
 8 | from .create_norm_act import convert_norm_act
 9 | 
10 | 
11 | class ConvBnAct(nn.Module):
12 |     def __init__(self, in_channels, out_channels, kernel_size=1, stride=1, padding='', dilation=1, groups=1,
13 |                  bias=False, apply_act=True, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU, aa_layer=None,
14 |                  drop_block=None):
15 |         super(ConvBnAct, self).__init__()
16 |         use_aa = aa_layer is not None
17 | 
18 |         self.conv = create_conv2d(
19 |             in_channels, out_channels, kernel_size, stride=1 if use_aa else stride,
20 |             padding=padding, dilation=dilation, groups=groups, bias=bias)
21 | 
22 |         # NOTE for backwards compatibility with models that use separate norm and act layer definitions
23 |         norm_act_layer = convert_norm_act(norm_layer, act_layer)
24 |         self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block)
25 |         self.aa = aa_layer(channels=out_channels) if stride == 2 and use_aa else None
26 | 
27 |     @property
28 |     def in_channels(self):
29 |         return self.conv.in_channels
30 | 
31 |     @property
32 |     def out_channels(self):
33 |         return self.conv.out_channels
34 | 
35 |     def forward(self, x):
36 |         x = self.conv(x)
37 |         x = self.bn(x)
38 |         if self.aa is not None:
39 |             x = self.aa(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/create_attn.py:
--------------------------------------------------------------------------------
 1 | """ Attention Factory
 2 | 
 3 | Hacked together by / Copyright 2021 Ross Wightman
 4 | """
 5 | import torch
 6 | from functools import partial
 7 | 
 8 | from .bottleneck_attn import BottleneckAttn
 9 | from .cbam import CbamModule, LightCbamModule
10 | from .eca import EcaModule, CecaModule
11 | from .gather_excite import GatherExcite
12 | from .global_context import GlobalContext
13 | from .halo_attn import HaloAttn
14 | from .lambda_layer import LambdaLayer
15 | from .non_local_attn import NonLocalAttn, BatNonLocalAttn
16 | from .selective_kernel import SelectiveKernel
17 | from .split_attn import SplitAttn
18 | from .squeeze_excite import SEModule, EffectiveSEModule
19 | 
20 | 
21 | def get_attn(attn_type):
22 |     if isinstance(attn_type, torch.nn.Module):
23 |         return attn_type
24 |     module_cls = None
25 |     if attn_type is not None:
26 |         if isinstance(attn_type, str):
27 |             attn_type = attn_type.lower()
28 |             # Lightweight attention modules (channel and/or coarse spatial).
29 |             # Typically added to existing network architecture blocks in addition to existing convolutions.
30 |             if attn_type == 'se':
31 |                 module_cls = SEModule
32 |             elif attn_type == 'ese':
33 |                 module_cls = EffectiveSEModule
34 |             elif attn_type == 'eca':
35 |                 module_cls = EcaModule
36 |             elif attn_type == 'ecam':
37 |                 module_cls = partial(EcaModule, use_mlp=True)
38 |             elif attn_type == 'ceca':
39 |                 module_cls = CecaModule
40 |             elif attn_type == 'ge':
41 |                 module_cls = GatherExcite
42 |             elif attn_type == 'gc':
43 |                 module_cls = GlobalContext
44 |             elif attn_type == 'gca':
45 |                 module_cls = partial(GlobalContext, fuse_add=True, fuse_scale=False)
46 |             elif attn_type == 'cbam':
47 |                 module_cls = CbamModule
48 |             elif attn_type == 'lcbam':
49 |                 module_cls = LightCbamModule
50 | 
51 |             # Attention / attention-like modules w/ significant params
52 |             # Typically replace some of the existing workhorse convs in a network architecture.
53 |             # All of these accept a stride argument and can spatially downsample the input.
54 |             elif attn_type == 'sk':
55 |                 module_cls = SelectiveKernel
56 |             elif attn_type == 'splat':
57 |                 module_cls = SplitAttn
58 | 
59 |             # Self-attention / attention-like modules w/ significant compute and/or params
60 |             # Typically replace some of the existing workhorse convs in a network architecture.
61 |             # All of these accept a stride argument and can spatially downsample the input.
62 |             elif attn_type == 'lambda':
63 |                 return LambdaLayer
64 |             elif attn_type == 'bottleneck':
65 |                 return BottleneckAttn
66 |             elif attn_type == 'halo':
67 |                 return HaloAttn
68 |             elif attn_type == 'nl':
69 |                 module_cls = NonLocalAttn
70 |             elif attn_type == 'bat':
71 |                 module_cls = BatNonLocalAttn
72 | 
73 |             # Woops!
74 |             else:
75 |                 assert False, "Invalid attn module (%s)" % attn_type
76 |         elif isinstance(attn_type, bool):
77 |             if attn_type:
78 |                 module_cls = SEModule
79 |         else:
80 |             module_cls = attn_type
81 |     return module_cls
82 | 
83 | 
84 | def create_attn(attn_type, channels, **kwargs):
85 |     module_cls = get_attn(attn_type)
86 |     if module_cls is not None:
87 |         # NOTE: it's expected the first (positional) argument of all attention layers is the # input channels
88 |         return module_cls(channels, **kwargs)
89 |     return None
90 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/create_conv2d.py:
--------------------------------------------------------------------------------
 1 | """ Create Conv2d Factory Method
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | 
 6 | from .mixed_conv2d import MixedConv2d
 7 | from .cond_conv2d import CondConv2d
 8 | from .conv2d_same import create_conv2d_pad
 9 | 
10 | 
11 | def create_conv2d(in_channels, out_channels, kernel_size, **kwargs):
12 |     """ Select a 2d convolution implementation based on arguments
13 |     Creates and returns one of torch.nn.Conv2d, Conv2dSame, MixedConv2d, or CondConv2d.
14 | 
15 |     Used extensively by EfficientNet, MobileNetv3 and related networks.
16 |     """
17 |     if isinstance(kernel_size, list):
18 |         assert 'num_experts' not in kwargs  # MixNet + CondConv combo not supported currently
19 |         assert 'groups' not in kwargs  # MixedConv groups are defined by kernel list
20 |         # We're going to use only lists for defining the MixedConv2d kernel groups,
21 |         # ints, tuples, other iterables will continue to pass to normal conv and specify h, w.
22 |         m = MixedConv2d(in_channels, out_channels, kernel_size, **kwargs)
23 |     else:
24 |         depthwise = kwargs.pop('depthwise', False)
25 |         # for DW out_channels must be multiple of in_channels as must have out_channels % groups == 0
26 |         groups = in_channels if depthwise else kwargs.pop('groups', 1)
27 |         if 'num_experts' in kwargs and kwargs['num_experts'] > 0:
28 |             m = CondConv2d(in_channels, out_channels, kernel_size, groups=groups, **kwargs)
29 |         else:
30 |             m = create_conv2d_pad(in_channels, out_channels, kernel_size, groups=groups, **kwargs)
31 |     return m
32 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/create_norm_act.py:
--------------------------------------------------------------------------------
 1 | """ NormAct (Normalizaiton + Activation Layer) Factory
 2 | 
 3 | Create norm + act combo modules that attempt to be backwards compatible with separate norm + act
 4 | isntances in models. Where these are used it will be possible to swap separate BN + act layers with
 5 | combined modules like IABN or EvoNorms.
 6 | 
 7 | Hacked together by / Copyright 2020 Ross Wightman
 8 | """
 9 | import types
10 | import functools
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | from .evo_norm import EvoNormBatch2d, EvoNormSample2d
16 | from .norm_act import BatchNormAct2d, GroupNormAct
17 | from .inplace_abn import InplaceAbn
18 | 
19 | _NORM_ACT_TYPES = {BatchNormAct2d, GroupNormAct, EvoNormBatch2d, EvoNormSample2d, InplaceAbn}
20 | _NORM_ACT_REQUIRES_ARG = {BatchNormAct2d, GroupNormAct, InplaceAbn}  # requires act_layer arg to define act type
21 | 
22 | 
23 | def get_norm_act_layer(layer_class):
24 |     layer_class = layer_class.replace('_', '').lower()
25 |     if layer_class.startswith("batchnorm"):
26 |         layer = BatchNormAct2d
27 |     elif layer_class.startswith("groupnorm"):
28 |         layer = GroupNormAct
29 |     elif layer_class == "evonormbatch":
30 |         layer = EvoNormBatch2d
31 |     elif layer_class == "evonormsample":
32 |         layer = EvoNormSample2d
33 |     elif layer_class == "iabn" or layer_class == "inplaceabn":
34 |         layer = InplaceAbn
35 |     else:
36 |         assert False, "Invalid norm_act layer (%s)" % layer_class
37 |     return layer
38 | 
39 | 
40 | def create_norm_act(layer_type, num_features, apply_act=True, jit=False, **kwargs):
41 |     layer_parts = layer_type.split('-')  # e.g. batchnorm-leaky_relu
42 |     assert len(layer_parts) in (1, 2)
43 |     layer = get_norm_act_layer(layer_parts[0])
44 |     #activation_class = layer_parts[1].lower() if len(layer_parts) > 1 else ''   # FIXME support string act selection?
45 |     layer_instance = layer(num_features, apply_act=apply_act, **kwargs)
46 |     if jit:
47 |         layer_instance = torch.jit.script(layer_instance)
48 |     return layer_instance
49 | 
50 | 
51 | def convert_norm_act(norm_layer, act_layer):
52 |     assert isinstance(norm_layer, (type, str,  types.FunctionType, functools.partial))
53 |     assert act_layer is None or isinstance(act_layer, (type, str, types.FunctionType, functools.partial))
54 |     norm_act_kwargs = {}
55 | 
56 |     # unbind partial fn, so args can be rebound later
57 |     if isinstance(norm_layer, functools.partial):
58 |         norm_act_kwargs.update(norm_layer.keywords)
59 |         norm_layer = norm_layer.func
60 | 
61 |     if isinstance(norm_layer, str):
62 |         norm_act_layer = get_norm_act_layer(norm_layer)
63 |     elif norm_layer in _NORM_ACT_TYPES:
64 |         norm_act_layer = norm_layer
65 |     elif isinstance(norm_layer,  types.FunctionType):
66 |         # if function type, must be a lambda/fn that creates a norm_act layer
67 |         norm_act_layer = norm_layer
68 |     else:
69 |         type_name = norm_layer.__name__.lower()
70 |         if type_name.startswith('batchnorm'):
71 |             norm_act_layer = BatchNormAct2d
72 |         elif type_name.startswith('groupnorm'):
73 |             norm_act_layer = GroupNormAct
74 |         else:
75 |             assert False, f"No equivalent norm_act layer for {type_name}"
76 | 
77 |     if norm_act_layer in _NORM_ACT_REQUIRES_ARG:
78 |         # pass `act_layer` through for backwards compat where `act_layer=None` implies no activation.
79 |         # In the future, may force use of `apply_act` with `act_layer` arg bound to relevant NormAct types
80 |         norm_act_kwargs.setdefault('act_layer', act_layer)
81 |     if norm_act_kwargs:
82 |         norm_act_layer = functools.partial(norm_act_layer, **norm_act_kwargs)  # bind/rebind args
83 |     return norm_act_layer
84 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/evo_norm.py:
--------------------------------------------------------------------------------
 1 | """EvoNormB0 (Batched) and EvoNormS0 (Sample) in PyTorch
 2 | 
 3 | An attempt at getting decent performing EvoNorms running in PyTorch.
 4 | While currently faster than other impl, still quite a ways off the built-in BN
 5 | in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed).
 6 | 
 7 | Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts.
 8 | 
 9 | Hacked together by / Copyright 2020 Ross Wightman
10 | """
11 | 
12 | import torch
13 | import torch.nn as nn
14 | 
15 | from .trace_utils import _assert
16 | 
17 | 
18 | class EvoNormBatch2d(nn.Module):
19 |     def __init__(self, num_features, apply_act=True, momentum=0.1, eps=1e-5, drop_block=None):
20 |         super(EvoNormBatch2d, self).__init__()
21 |         self.apply_act = apply_act  # apply activation (non-linearity)
22 |         self.momentum = momentum
23 |         self.eps = eps
24 |         self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True)
25 |         self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True)
26 |         self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None
27 |         self.register_buffer('running_var', torch.ones(num_features))
28 |         self.reset_parameters()
29 | 
30 |     def reset_parameters(self):
31 |         nn.init.ones_(self.weight)
32 |         nn.init.zeros_(self.bias)
33 |         if self.apply_act:
34 |             nn.init.ones_(self.v)
35 | 
36 |     def forward(self, x):
37 |         _assert(x.dim() == 4, 'expected 4D input')
38 |         x_type = x.dtype
39 |         if self.v is not None:
40 |             running_var = self.running_var.view(1, -1, 1, 1)
41 |             if self.training:
42 |                 var = x.var(dim=(0, 2, 3), unbiased=False, keepdim=True)
43 |                 n = x.numel() / x.shape[1]
44 |                 running_var = var.detach() * self.momentum * (n / (n - 1)) + running_var * (1 - self.momentum)
45 |                 self.running_var.copy_(running_var.view(self.running_var.shape))
46 |             else:
47 |                 var = running_var
48 |             v = self.v.to(dtype=x_type).reshape(1, -1, 1, 1)
49 |             d = x * v + (x.var(dim=(2, 3), unbiased=False, keepdim=True) + self.eps).sqrt().to(dtype=x_type)
50 |             d = d.max((var + self.eps).sqrt().to(dtype=x_type))
51 |             x = x / d
52 |         return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1)
53 | 
54 | 
55 | class EvoNormSample2d(nn.Module):
56 |     def __init__(self, num_features, apply_act=True, groups=32, eps=1e-5, drop_block=None):
57 |         super(EvoNormSample2d, self).__init__()
58 |         self.apply_act = apply_act  # apply activation (non-linearity)
59 |         self.groups = groups
60 |         self.eps = eps
61 |         self.weight = nn.Parameter(torch.ones(num_features), requires_grad=True)
62 |         self.bias = nn.Parameter(torch.zeros(num_features), requires_grad=True)
63 |         self.v = nn.Parameter(torch.ones(num_features), requires_grad=True) if apply_act else None
64 |         self.reset_parameters()
65 | 
66 |     def reset_parameters(self):
67 |         nn.init.ones_(self.weight)
68 |         nn.init.zeros_(self.bias)
69 |         if self.apply_act:
70 |             nn.init.ones_(self.v)
71 | 
72 |     def forward(self, x):
73 |         _assert(x.dim() == 4, 'expected 4D input')
74 |         B, C, H, W = x.shape
75 |         _assert(C % self.groups == 0, '')
76 |         if self.v is not None:
77 |             n = x * (x * self.v.view(1, -1, 1, 1)).sigmoid()
78 |             x = x.reshape(B, self.groups, -1)
79 |             x = n.reshape(B, self.groups, -1) / (x.var(dim=-1, unbiased=False, keepdim=True) + self.eps).sqrt()
80 |             x = x.reshape(B, C, H, W)
81 |         return x * self.weight.view(1, -1, 1, 1) + self.bias.view(1, -1, 1, 1)
82 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/gather_excite.py:
--------------------------------------------------------------------------------
 1 | """ Gather-Excite Attention Block
 2 | 
 3 | Paper: `Gather-Excite: Exploiting Feature Context in CNNs` - https://arxiv.org/abs/1810.12348
 4 | 
 5 | Official code here, but it's only partial impl in Caffe: https://github.com/hujie-frank/GENet
 6 | 
 7 | I've tried to support all of the extent both w/ and w/o params. I don't believe I've seen another
 8 | impl that covers all of the cases.
 9 | 
10 | NOTE: extent=0 + extra_params=False is equivalent to Squeeze-and-Excitation
11 | 
12 | Hacked together by / Copyright 2021 Ross Wightman
13 | """
14 | import math
15 | 
16 | from torch import nn as nn
17 | import torch.nn.functional as F
18 | 
19 | from .create_act import create_act_layer, get_act_layer
20 | from .create_conv2d import create_conv2d
21 | from .helpers import make_divisible
22 | from .mlp import ConvMlp
23 | 
24 | 
25 | class GatherExcite(nn.Module):
26 |     """ Gather-Excite Attention Module
27 |     """
28 |     def __init__(
29 |             self, channels, feat_size=None, extra_params=False, extent=0, use_mlp=True,
30 |             rd_ratio=1./16, rd_channels=None,  rd_divisor=1, add_maxpool=False,
31 |             act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, gate_layer='sigmoid'):
32 |         super(GatherExcite, self).__init__()
33 |         self.add_maxpool = add_maxpool
34 |         act_layer = get_act_layer(act_layer)
35 |         self.extent = extent
36 |         if extra_params:
37 |             self.gather = nn.Sequential()
38 |             if extent == 0:
39 |                 assert feat_size is not None, 'spatial feature size must be specified for global extent w/ params'
40 |                 self.gather.add_module(
41 |                     'conv1', create_conv2d(channels, channels, kernel_size=feat_size, stride=1, depthwise=True))
42 |                 if norm_layer:
43 |                     self.gather.add_module(f'norm1', nn.BatchNorm2d(channels))
44 |             else:
45 |                 assert extent % 2 == 0
46 |                 num_conv = int(math.log2(extent))
47 |                 for i in range(num_conv):
48 |                     self.gather.add_module(
49 |                         f'conv{i + 1}',
50 |                         create_conv2d(channels, channels, kernel_size=3, stride=2, depthwise=True))
51 |                     if norm_layer:
52 |                         self.gather.add_module(f'norm{i + 1}', nn.BatchNorm2d(channels))
53 |                     if i != num_conv - 1:
54 |                         self.gather.add_module(f'act{i + 1}', act_layer(inplace=True))
55 |         else:
56 |             self.gather = None
57 |             if self.extent == 0:
58 |                 self.gk = 0
59 |                 self.gs = 0
60 |             else:
61 |                 assert extent % 2 == 0
62 |                 self.gk = self.extent * 2 - 1
63 |                 self.gs = self.extent
64 | 
65 |         if not rd_channels:
66 |             rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
67 |         self.mlp = ConvMlp(channels, rd_channels, act_layer=act_layer) if use_mlp else nn.Identity()
68 |         self.gate = create_act_layer(gate_layer)
69 | 
70 |     def forward(self, x):
71 |         size = x.shape[-2:]
72 |         if self.gather is not None:
73 |             x_ge = self.gather(x)
74 |         else:
75 |             if self.extent == 0:
76 |                 # global extent
77 |                 x_ge = x.mean(dim=(2, 3), keepdims=True)
78 |                 if self.add_maxpool:
79 |                     # experimental codepath, may remove or change
80 |                     x_ge = 0.5 * x_ge + 0.5 * x.amax((2, 3), keepdim=True)
81 |             else:
82 |                 x_ge = F.avg_pool2d(
83 |                     x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2, count_include_pad=False)
84 |                 if self.add_maxpool:
85 |                     # experimental codepath, may remove or change
86 |                     x_ge = 0.5 * x_ge + 0.5 * F.max_pool2d(x, kernel_size=self.gk, stride=self.gs, padding=self.gk // 2)
87 |         x_ge = self.mlp(x_ge)
88 |         if x_ge.shape[-1] != 1 or x_ge.shape[-2] != 1:
89 |             x_ge = F.interpolate(x_ge, size=size)
90 |         return x * self.gate(x_ge)
91 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/global_context.py:
--------------------------------------------------------------------------------
 1 | """ Global Context Attention Block
 2 | 
 3 | Paper: `GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond`
 4 |     - https://arxiv.org/abs/1904.11492
 5 | 
 6 | Official code consulted as reference: https://github.com/xvjiarui/GCNet
 7 | 
 8 | Hacked together by / Copyright 2021 Ross Wightman
 9 | """
10 | from torch import nn as nn
11 | import torch.nn.functional as F
12 | 
13 | from .create_act import create_act_layer, get_act_layer
14 | from .helpers import make_divisible
15 | from .mlp import ConvMlp
16 | from .norm import LayerNorm2d
17 | 
18 | 
19 | class GlobalContext(nn.Module):
20 | 
21 |     def __init__(self, channels, use_attn=True, fuse_add=False, fuse_scale=True, init_last_zero=False,
22 |                  rd_ratio=1./8, rd_channels=None, rd_divisor=1, act_layer=nn.ReLU, gate_layer='sigmoid'):
23 |         super(GlobalContext, self).__init__()
24 |         act_layer = get_act_layer(act_layer)
25 | 
26 |         self.conv_attn = nn.Conv2d(channels, 1, kernel_size=1, bias=True) if use_attn else None
27 | 
28 |         if rd_channels is None:
29 |             rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
30 |         if fuse_add:
31 |             self.mlp_add = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d)
32 |         else:
33 |             self.mlp_add = None
34 |         if fuse_scale:
35 |             self.mlp_scale = ConvMlp(channels, rd_channels, act_layer=act_layer, norm_layer=LayerNorm2d)
36 |         else:
37 |             self.mlp_scale = None
38 | 
39 |         self.gate = create_act_layer(gate_layer)
40 |         self.init_last_zero = init_last_zero
41 |         self.reset_parameters()
42 | 
43 |     def reset_parameters(self):
44 |         if self.conv_attn is not None:
45 |             nn.init.kaiming_normal_(self.conv_attn.weight, mode='fan_in', nonlinearity='relu')
46 |         if self.mlp_add is not None:
47 |             nn.init.zeros_(self.mlp_add.fc2.weight)
48 | 
49 |     def forward(self, x):
50 |         B, C, H, W = x.shape
51 | 
52 |         if self.conv_attn is not None:
53 |             attn = self.conv_attn(x).reshape(B, 1, H * W)  # (B, 1, H * W)
54 |             attn = F.softmax(attn, dim=-1).unsqueeze(3)  # (B, 1, H * W, 1)
55 |             context = x.reshape(B, C, H * W).unsqueeze(1) @ attn
56 |             context = context.view(B, C, 1, 1)
57 |         else:
58 |             context = x.mean(dim=(2, 3), keepdim=True)
59 | 
60 |         if self.mlp_scale is not None:
61 |             mlp_x = self.mlp_scale(context)
62 |             x = x * self.gate(mlp_x)
63 |         if self.mlp_add is not None:
64 |             mlp_x = self.mlp_add(context)
65 |             x = x + mlp_x
66 | 
67 |         return x
68 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/helpers.py:
--------------------------------------------------------------------------------
 1 | """ Layer/Module Helpers
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | from itertools import repeat
 6 | import collections.abc
 7 | 
 8 | 
 9 | # From PyTorch internals
10 | def _ntuple(n):
11 |     def parse(x):
12 |         if isinstance(x, collections.abc.Iterable):
13 |             return x
14 |         return tuple(repeat(x, n))
15 |     return parse
16 | 
17 | 
18 | to_1tuple = _ntuple(1)
19 | to_2tuple = _ntuple(2)
20 | to_3tuple = _ntuple(3)
21 | to_4tuple = _ntuple(4)
22 | to_ntuple = _ntuple
23 | 
24 | 
25 | def make_divisible(v, divisor=8, min_value=None, round_limit=.9):
26 |     min_value = min_value or divisor
27 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
28 |     # Make sure that round down does not go down by more than 10%.
29 |     if new_v < round_limit * v:
30 |         new_v += divisor
31 |     return new_v
32 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/inplace_abn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn as nn
 3 | 
 4 | try:
 5 |     from inplace_abn.functions import inplace_abn, inplace_abn_sync
 6 |     has_iabn = True
 7 | except ImportError:
 8 |     has_iabn = False
 9 | 
10 |     def inplace_abn(x, weight, bias, running_mean, running_var,
11 |                     training=True, momentum=0.1, eps=1e-05, activation="leaky_relu", activation_param=0.01):
12 |         raise ImportError(
13 |             "Please install InplaceABN:'pip install git+https://github.com/mapillary/inplace_abn.git@v1.0.12'")
14 | 
15 |     def inplace_abn_sync(**kwargs):
16 |         inplace_abn(**kwargs)
17 | 
18 | 
19 | class InplaceAbn(nn.Module):
20 |     """Activated Batch Normalization
21 | 
22 |     This gathers a BatchNorm and an activation function in a single module
23 | 
24 |     Parameters
25 |     ----------
26 |     num_features : int
27 |         Number of feature channels in the input and output.
28 |     eps : float
29 |         Small constant to prevent numerical issues.
30 |     momentum : float
31 |         Momentum factor applied to compute running statistics.
32 |     affine : bool
33 |         If `True` apply learned scale and shift transformation after normalization.
34 |     act_layer : str or nn.Module type
35 |         Name or type of the activation functions, one of: `leaky_relu`, `elu`
36 |     act_param : float
37 |         Negative slope for the `leaky_relu` activation.
38 |     """
39 | 
40 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, apply_act=True,
41 |                  act_layer="leaky_relu", act_param=0.01, drop_block=None):
42 |         super(InplaceAbn, self).__init__()
43 |         self.num_features = num_features
44 |         self.affine = affine
45 |         self.eps = eps
46 |         self.momentum = momentum
47 |         if apply_act:
48 |             if isinstance(act_layer, str):
49 |                 assert act_layer in ('leaky_relu', 'elu', 'identity', '')
50 |                 self.act_name = act_layer if act_layer else 'identity'
51 |             else:
52 |                 # convert act layer passed as type to string
53 |                 if act_layer == nn.ELU:
54 |                     self.act_name = 'elu'
55 |                 elif act_layer == nn.LeakyReLU:
56 |                     self.act_name = 'leaky_relu'
57 |                 elif act_layer == nn.Identity:
58 |                     self.act_name = 'identity'
59 |                 else:
60 |                     assert False, f'Invalid act layer {act_layer.__name__} for IABN'
61 |         else:
62 |             self.act_name = 'identity'
63 |         self.act_param = act_param
64 |         if self.affine:
65 |             self.weight = nn.Parameter(torch.ones(num_features))
66 |             self.bias = nn.Parameter(torch.zeros(num_features))
67 |         else:
68 |             self.register_parameter('weight', None)
69 |             self.register_parameter('bias', None)
70 |         self.register_buffer('running_mean', torch.zeros(num_features))
71 |         self.register_buffer('running_var', torch.ones(num_features))
72 |         self.reset_parameters()
73 | 
74 |     def reset_parameters(self):
75 |         nn.init.constant_(self.running_mean, 0)
76 |         nn.init.constant_(self.running_var, 1)
77 |         if self.affine:
78 |             nn.init.constant_(self.weight, 1)
79 |             nn.init.constant_(self.bias, 0)
80 | 
81 |     def forward(self, x):
82 |         output = inplace_abn(
83 |             x, self.weight, self.bias, self.running_mean, self.running_var,
84 |             self.training, self.momentum, self.eps, self.act_name, self.act_param)
85 |         if isinstance(output, tuple):
86 |             output = output[0]
87 |         return output
88 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/linear.py:
--------------------------------------------------------------------------------
 1 | """ Linear layer (alternate definition)
 2 | """
 3 | import torch
 4 | import torch.nn.functional as F
 5 | from torch import nn as nn
 6 | 
 7 | 
 8 | class Linear(nn.Linear):
 9 |     r"""Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
10 | 
11 |     Wraps torch.nn.Linear to support AMP + torchscript usage by manually casting
12 |     weight & bias to input.dtype to work around an issue w/ torch.addmm in this use case.
13 |     """
14 |     def forward(self, input: torch.Tensor) -> torch.Tensor:
15 |         if torch.jit.is_scripting():
16 |             bias = self.bias.to(dtype=input.dtype) if self.bias is not None else None
17 |             return F.linear(input, self.weight.to(dtype=input.dtype), bias=bias)
18 |         else:
19 |             return F.linear(input, self.weight, self.bias)
20 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/median_pool.py:
--------------------------------------------------------------------------------
 1 | """ Median Pool
 2 | Hacked together by / Copyright 2020 Ross Wightman
 3 | """
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | from .helpers import to_2tuple, to_4tuple
 7 | 
 8 | 
 9 | class MedianPool2d(nn.Module):
10 |     """ Median pool (usable as median filter when stride=1) module.
11 | 
12 |     Args:
13 |          kernel_size: size of pooling kernel, int or 2-tuple
14 |          stride: pool stride, int or 2-tuple
15 |          padding: pool padding, int or 4-tuple (l, r, t, b) as in pytorch F.pad
16 |          same: override padding and enforce same padding, boolean
17 |     """
18 |     def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
19 |         super(MedianPool2d, self).__init__()
20 |         self.k = to_2tuple(kernel_size)
21 |         self.stride = to_2tuple(stride)
22 |         self.padding = to_4tuple(padding)  # convert to l, r, t, b
23 |         self.same = same
24 | 
25 |     def _padding(self, x):
26 |         if self.same:
27 |             ih, iw = x.size()[2:]
28 |             if ih % self.stride[0] == 0:
29 |                 ph = max(self.k[0] - self.stride[0], 0)
30 |             else:
31 |                 ph = max(self.k[0] - (ih % self.stride[0]), 0)
32 |             if iw % self.stride[1] == 0:
33 |                 pw = max(self.k[1] - self.stride[1], 0)
34 |             else:
35 |                 pw = max(self.k[1] - (iw % self.stride[1]), 0)
36 |             pl = pw // 2
37 |             pr = pw - pl
38 |             pt = ph // 2
39 |             pb = ph - pt
40 |             padding = (pl, pr, pt, pb)
41 |         else:
42 |             padding = self.padding
43 |         return padding
44 | 
45 |     def forward(self, x):
46 |         x = F.pad(x, self._padding(x), mode='reflect')
47 |         x = x.unfold(2, self.k[0], self.stride[0]).unfold(3, self.k[1], self.stride[1])
48 |         x = x.contiguous().view(x.size()[:4] + (-1,)).median(dim=-1)[0]
49 |         return x
50 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/mixed_conv2d.py:
--------------------------------------------------------------------------------
 1 | """ PyTorch Mixed Convolution
 2 | 
 3 | Paper: MixConv: Mixed Depthwise Convolutional Kernels (https://arxiv.org/abs/1907.09595)
 4 | 
 5 | Hacked together by / Copyright 2020 Ross Wightman
 6 | """
 7 | 
 8 | import torch
 9 | from torch import nn as nn
10 | 
11 | from .conv2d_same import create_conv2d_pad
12 | 
13 | 
14 | def _split_channels(num_chan, num_groups):
15 |     split = [num_chan // num_groups for _ in range(num_groups)]
16 |     split[0] += num_chan - sum(split)
17 |     return split
18 | 
19 | 
20 | class MixedConv2d(nn.ModuleDict):
21 |     """ Mixed Grouped Convolution
22 | 
23 |     Based on MDConv and GroupedConv in MixNet impl:
24 |       https://github.com/tensorflow/tpu/blob/master/models/official/mnasnet/mixnet/custom_layers.py
25 |     """
26 |     def __init__(self, in_channels, out_channels, kernel_size=3,
27 |                  stride=1, padding='', dilation=1, depthwise=False, **kwargs):
28 |         super(MixedConv2d, self).__init__()
29 | 
30 |         kernel_size = kernel_size if isinstance(kernel_size, list) else [kernel_size]
31 |         num_groups = len(kernel_size)
32 |         in_splits = _split_channels(in_channels, num_groups)
33 |         out_splits = _split_channels(out_channels, num_groups)
34 |         self.in_channels = sum(in_splits)
35 |         self.out_channels = sum(out_splits)
36 |         for idx, (k, in_ch, out_ch) in enumerate(zip(kernel_size, in_splits, out_splits)):
37 |             conv_groups = in_ch if depthwise else 1
38 |             # use add_module to keep key space clean
39 |             self.add_module(
40 |                 str(idx),
41 |                 create_conv2d_pad(
42 |                     in_ch, out_ch, k, stride=stride,
43 |                     padding=padding, dilation=dilation, groups=conv_groups, **kwargs)
44 |             )
45 |         self.splits = in_splits
46 | 
47 |     def forward(self, x):
48 |         x_split = torch.split(x, self.splits, 1)
49 |         x_out = [c(x_split[i]) for i, c in enumerate(self.values())]
50 |         x = torch.cat(x_out, 1)
51 |         return x
52 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/norm.py:
--------------------------------------------------------------------------------
 1 | """ Normalization layers and wrappers
 2 | """
 3 | import torch
 4 | import torch.nn as nn
 5 | import torch.nn.functional as F
 6 | 
 7 | 
 8 | class GroupNorm(nn.GroupNorm):
 9 |     def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True):
10 |         # NOTE num_channels is swapped to first arg for consistency in swapping norm layers with BN
11 |         super().__init__(num_groups, num_channels, eps=eps, affine=affine)
12 | 
13 |     def forward(self, x):
14 |         return F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
15 | 
16 | 
17 | class LayerNorm2d(nn.LayerNorm):
18 |     """ LayerNorm for channels of '2D' spatial BCHW tensors """
19 |     def __init__(self, num_channels):
20 |         super().__init__(num_channels)
21 | 
22 |     def forward(self, x: torch.Tensor) -> torch.Tensor:
23 |         return F.layer_norm(
24 |             x.permute(0, 2, 3, 1), self.normalized_shape, self.weight, self.bias, self.eps).permute(0, 3, 1, 2)
25 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/norm_act.py:
--------------------------------------------------------------------------------
 1 | """ Normalization + Activation Layers
 2 | """
 3 | import torch
 4 | from torch import nn as nn
 5 | from torch.nn import functional as F
 6 | 
 7 | from .create_act import get_act_layer
 8 | 
 9 | 
10 | class BatchNormAct2d(nn.BatchNorm2d):
11 |     """BatchNorm + Activation
12 | 
13 |     This module performs BatchNorm + Activation in a manner that will remain backwards
14 |     compatible with weights trained with separate bn, act. This is why we inherit from BN
15 |     instead of composing it as a .bn member.
16 |     """
17 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True, track_running_stats=True,
18 |                  apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None):
19 |         super(BatchNormAct2d, self).__init__(
20 |             num_features, eps=eps, momentum=momentum, affine=affine, track_running_stats=track_running_stats)
21 |         if isinstance(act_layer, str):
22 |             act_layer = get_act_layer(act_layer)
23 |         if act_layer is not None and apply_act:
24 |             act_args = dict(inplace=True) if inplace else {}
25 |             self.act = act_layer(**act_args)
26 |         else:
27 |             self.act = nn.Identity()
28 | 
29 |     def _forward_jit(self, x):
30 |         """ A cut & paste of the contents of the PyTorch BatchNorm2d forward function
31 |         """
32 |         # exponential_average_factor is self.momentum set to
33 |         # (when it is available) only so that if gets updated
34 |         # in ONNX graph when this node is exported to ONNX.
35 |         if self.momentum is None:
36 |             exponential_average_factor = 0.0
37 |         else:
38 |             exponential_average_factor = self.momentum
39 | 
40 |         if self.training and self.track_running_stats:
41 |             # TODO: if statement only here to tell the jit to skip emitting this when it is None
42 |             if self.num_batches_tracked is not None:
43 |                 self.num_batches_tracked += 1
44 |                 if self.momentum is None:  # use cumulative moving average
45 |                     exponential_average_factor = 1.0 / float(self.num_batches_tracked)
46 |                 else:  # use exponential moving average
47 |                     exponential_average_factor = self.momentum
48 | 
49 |         x = F.batch_norm(
50 |                 x, self.running_mean, self.running_var, self.weight, self.bias,
51 |                 self.training or not self.track_running_stats,
52 |                 exponential_average_factor, self.eps)
53 |         return x
54 | 
55 |     @torch.jit.ignore
56 |     def _forward_python(self, x):
57 |         return super(BatchNormAct2d, self).forward(x)
58 | 
59 |     def forward(self, x):
60 |         # FIXME cannot call parent forward() and maintain jit.script compatibility?
61 |         if torch.jit.is_scripting():
62 |             x = self._forward_jit(x)
63 |         else:
64 |             x = self._forward_python(x)
65 |         x = self.act(x)
66 |         return x
67 | 
68 | 
69 | class GroupNormAct(nn.GroupNorm):
70 |     # NOTE num_channel and num_groups order flipped for easier layer swaps / binding of fixed args
71 |     def __init__(self, num_channels, num_groups=32, eps=1e-5, affine=True,
72 |                  apply_act=True, act_layer=nn.ReLU, inplace=True, drop_block=None):
73 |         super(GroupNormAct, self).__init__(num_groups, num_channels, eps=eps, affine=affine)
74 |         if isinstance(act_layer, str):
75 |             act_layer = get_act_layer(act_layer)
76 |         if act_layer is not None and apply_act:
77 |             act_args = dict(inplace=True) if inplace else {}
78 |             self.act = act_layer(**act_args)
79 |         else:
80 |             self.act = nn.Identity()
81 | 
82 |     def forward(self, x):
83 |         x = F.group_norm(x, self.num_groups, self.weight, self.bias, self.eps)
84 |         x = self.act(x)
85 |         return x
86 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/padding.py:
--------------------------------------------------------------------------------
 1 | """ Padding Helpers
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import math
 6 | from typing import List, Tuple
 7 | 
 8 | import torch.nn.functional as F
 9 | 
10 | 
11 | # Calculate symmetric padding for a convolution
12 | def get_padding(kernel_size: int, stride: int = 1, dilation: int = 1, **_) -> int:
13 |     padding = ((stride - 1) + dilation * (kernel_size - 1)) // 2
14 |     return padding
15 | 
16 | 
17 | # Calculate asymmetric TensorFlow-like 'SAME' padding for a convolution
18 | def get_same_padding(x: int, k: int, s: int, d: int):
19 |     return max((math.ceil(x / s) - 1) * s + (k - 1) * d + 1 - x, 0)
20 | 
21 | 
22 | # Can SAME padding for given args be done statically?
23 | def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_):
24 |     return stride == 1 and (dilation * (kernel_size - 1)) % 2 == 0
25 | 
26 | 
27 | # Dynamically pad input x with 'SAME' padding for conv with specified args
28 | def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0):
29 |     ih, iw = x.size()[-2:]
30 |     pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1])
31 |     if pad_h > 0 or pad_w > 0:
32 |         x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value)
33 |     return x
34 | 
35 | 
36 | def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]:
37 |     dynamic = False
38 |     if isinstance(padding, str):
39 |         # for any string padding, the padding will be calculated for you, one of three ways
40 |         padding = padding.lower()
41 |         if padding == 'same':
42 |             # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact
43 |             if is_static_pad(kernel_size, **kwargs):
44 |                 # static case, no extra overhead
45 |                 padding = get_padding(kernel_size, **kwargs)
46 |             else:
47 |                 # dynamic 'SAME' padding, has runtime/GPU memory overhead
48 |                 padding = 0
49 |                 dynamic = True
50 |         elif padding == 'valid':
51 |             # 'VALID' padding, same as padding=0
52 |             padding = 0
53 |         else:
54 |             # Default to PyTorch style 'same'-ish symmetric padding
55 |             padding = get_padding(kernel_size, **kwargs)
56 |     return padding, dynamic
57 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/patch_embed.py:
--------------------------------------------------------------------------------
 1 | """ Image to Patch Embedding using Conv2d
 2 | 
 3 | A convolution based approach to patchifying a 2D image w/ embedding projection.
 4 | 
 5 | Based on the impl in https://github.com/google-research/vision_transformer
 6 | 
 7 | Hacked together by / Copyright 2020 Ross Wightman
 8 | """
 9 | from torch import nn as nn
10 | 
11 | from .helpers import to_2tuple
12 | from .trace_utils import _assert
13 | 
14 | 
15 | class PatchEmbed(nn.Module):
16 |     """ 2D Image to Patch Embedding
17 |     """
18 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None, flatten=True):
19 |         super().__init__()
20 |         img_size = to_2tuple(img_size)
21 |         patch_size = to_2tuple(patch_size)
22 |         self.img_size = img_size
23 |         self.patch_size = patch_size
24 |         self.grid_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
25 |         self.num_patches = self.grid_size[0] * self.grid_size[1]
26 |         self.flatten = flatten
27 | 
28 |         # self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
29 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=3, stride=1,padding=1)
30 |         self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
31 | 
32 |     def forward(self, x):
33 |         B, C, H, W = x.shape
34 |         _assert(H == self.img_size[0], f"Input image height ({H}) doesn't match model ({self.img_size[0]}).")
35 |         _assert(W == self.img_size[1], f"Input image width ({W}) doesn't match model ({self.img_size[1]}).")
36 |         x = self.proj(x)
37 |         if self.flatten:
38 |             x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC
39 |         x = self.norm(x)
40 |         return x
41 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/pool2d_same.py:
--------------------------------------------------------------------------------
 1 | """ AvgPool2d w/ Same Padding
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | import torch
 6 | import torch.nn as nn
 7 | import torch.nn.functional as F
 8 | from typing import List, Tuple, Optional
 9 | 
10 | from .helpers import to_2tuple
11 | from .padding import pad_same, get_padding_value
12 | 
13 | 
14 | def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0),
15 |                     ceil_mode: bool = False, count_include_pad: bool = True):
16 |     # FIXME how to deal with count_include_pad vs not for external padding?
17 |     x = pad_same(x, kernel_size, stride)
18 |     return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad)
19 | 
20 | 
21 | class AvgPool2dSame(nn.AvgPool2d):
22 |     """ Tensorflow like 'SAME' wrapper for 2D average pooling
23 |     """
24 |     def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True):
25 |         kernel_size = to_2tuple(kernel_size)
26 |         stride = to_2tuple(stride)
27 |         super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad)
28 | 
29 |     def forward(self, x):
30 |         x = pad_same(x, self.kernel_size, self.stride)
31 |         return F.avg_pool2d(
32 |             x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad)
33 | 
34 | 
35 | def max_pool2d_same(
36 |         x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0),
37 |         dilation: List[int] = (1, 1), ceil_mode: bool = False):
38 |     x = pad_same(x, kernel_size, stride, value=-float('inf'))
39 |     return F.max_pool2d(x, kernel_size, stride, (0, 0), dilation, ceil_mode)
40 | 
41 | 
42 | class MaxPool2dSame(nn.MaxPool2d):
43 |     """ Tensorflow like 'SAME' wrapper for 2D max pooling
44 |     """
45 |     def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False):
46 |         kernel_size = to_2tuple(kernel_size)
47 |         stride = to_2tuple(stride)
48 |         dilation = to_2tuple(dilation)
49 |         super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode)
50 | 
51 |     def forward(self, x):
52 |         x = pad_same(x, self.kernel_size, self.stride, value=-float('inf'))
53 |         return F.max_pool2d(x, self.kernel_size, self.stride, (0, 0), self.dilation, self.ceil_mode)
54 | 
55 | 
56 | def create_pool2d(pool_type, kernel_size, stride=None, **kwargs):
57 |     stride = stride or kernel_size
58 |     padding = kwargs.pop('padding', '')
59 |     padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs)
60 |     if is_dynamic:
61 |         if pool_type == 'avg':
62 |             return AvgPool2dSame(kernel_size, stride=stride, **kwargs)
63 |         elif pool_type == 'max':
64 |             return MaxPool2dSame(kernel_size, stride=stride, **kwargs)
65 |         else:
66 |             assert False, f'Unsupported pool type {pool_type}'
67 |     else:
68 |         if pool_type == 'avg':
69 |             return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs)
70 |         elif pool_type == 'max':
71 |             return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs)
72 |         else:
73 |             assert False, f'Unsupported pool type {pool_type}'
74 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/separable_conv.py:
--------------------------------------------------------------------------------
 1 | """ Depthwise Separable Conv Modules
 2 | 
 3 | Basic DWS convs. Other variations of DWS exist with batch norm or activations between the
 4 | DW and PW convs such as the Depthwise modules in MobileNetV2 / EfficientNet and Xception.
 5 | 
 6 | Hacked together by / Copyright 2020 Ross Wightman
 7 | """
 8 | from torch import nn as nn
 9 | 
10 | from .create_conv2d import create_conv2d
11 | from .create_norm_act import convert_norm_act
12 | 
13 | 
14 | class SeparableConvBnAct(nn.Module):
15 |     """ Separable Conv w/ trailing Norm and Activation
16 |     """
17 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False,
18 |                  channel_multiplier=1.0, pw_kernel_size=1, norm_layer=nn.BatchNorm2d, act_layer=nn.ReLU,
19 |                  apply_act=True, drop_block=None):
20 |         super(SeparableConvBnAct, self).__init__()
21 | 
22 |         self.conv_dw = create_conv2d(
23 |             in_channels, int(in_channels * channel_multiplier), kernel_size,
24 |             stride=stride, dilation=dilation, padding=padding, depthwise=True)
25 | 
26 |         self.conv_pw = create_conv2d(
27 |             int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias)
28 | 
29 |         norm_act_layer = convert_norm_act(norm_layer, act_layer)
30 |         self.bn = norm_act_layer(out_channels, apply_act=apply_act, drop_block=drop_block)
31 | 
32 |     @property
33 |     def in_channels(self):
34 |         return self.conv_dw.in_channels
35 | 
36 |     @property
37 |     def out_channels(self):
38 |         return self.conv_pw.out_channels
39 | 
40 |     def forward(self, x):
41 |         x = self.conv_dw(x)
42 |         x = self.conv_pw(x)
43 |         if self.bn is not None:
44 |             x = self.bn(x)
45 |         return x
46 | 
47 | 
48 | class SeparableConv2d(nn.Module):
49 |     """ Separable Conv
50 |     """
51 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=1, dilation=1, padding='', bias=False,
52 |                  channel_multiplier=1.0, pw_kernel_size=1):
53 |         super(SeparableConv2d, self).__init__()
54 | 
55 |         self.conv_dw = create_conv2d(
56 |             in_channels, int(in_channels * channel_multiplier), kernel_size,
57 |             stride=stride, dilation=dilation, padding=padding, depthwise=True)
58 | 
59 |         self.conv_pw = create_conv2d(
60 |             int(in_channels * channel_multiplier), out_channels, pw_kernel_size, padding=padding, bias=bias)
61 | 
62 |     @property
63 |     def in_channels(self):
64 |         return self.conv_dw.in_channels
65 | 
66 |     @property
67 |     def out_channels(self):
68 |         return self.conv_pw.out_channels
69 | 
70 |     def forward(self, x):
71 |         x = self.conv_dw(x)
72 |         x = self.conv_pw(x)
73 |         return x
74 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/space_to_depth.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class SpaceToDepth(nn.Module):
 6 |     def __init__(self, block_size=4):
 7 |         super().__init__()
 8 |         assert block_size == 4
 9 |         self.bs = block_size
10 | 
11 |     def forward(self, x):
12 |         N, C, H, W = x.size()
13 |         x = x.view(N, C, H // self.bs, self.bs, W // self.bs, self.bs)  # (N, C, H//bs, bs, W//bs, bs)
14 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # (N, bs, bs, C, H//bs, W//bs)
15 |         x = x.view(N, C * (self.bs ** 2), H // self.bs, W // self.bs)  # (N, C*bs^2, H//bs, W//bs)
16 |         return x
17 | 
18 | 
19 | @torch.jit.script
20 | class SpaceToDepthJit(object):
21 |     def __call__(self, x: torch.Tensor):
22 |         # assuming hard-coded that block_size==4 for acceleration
23 |         N, C, H, W = x.size()
24 |         x = x.view(N, C, H // 4, 4, W // 4, 4)  # (N, C, H//bs, bs, W//bs, bs)
25 |         x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # (N, bs, bs, C, H//bs, W//bs)
26 |         x = x.view(N, C * 16, H // 4, W // 4)  # (N, C*bs^2, H//bs, W//bs)
27 |         return x
28 | 
29 | 
30 | class SpaceToDepthModule(nn.Module):
31 |     def __init__(self, no_jit=False):
32 |         super().__init__()
33 |         if not no_jit:
34 |             self.op = SpaceToDepthJit()
35 |         else:
36 |             self.op = SpaceToDepth()
37 | 
38 |     def forward(self, x):
39 |         return self.op(x)
40 | 
41 | 
42 | class DepthToSpace(nn.Module):
43 | 
44 |     def __init__(self, block_size):
45 |         super().__init__()
46 |         self.bs = block_size
47 | 
48 |     def forward(self, x):
49 |         N, C, H, W = x.size()
50 |         x = x.view(N, self.bs, self.bs, C // (self.bs ** 2), H, W)  # (N, bs, bs, C//bs^2, H, W)
51 |         x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # (N, C//bs^2, H, bs, W, bs)
52 |         x = x.view(N, C // (self.bs ** 2), H * self.bs, W * self.bs)  # (N, C//bs^2, H * bs, W * bs)
53 |         return x
54 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/split_attn.py:
--------------------------------------------------------------------------------
 1 | """ Split Attention Conv2d (for ResNeSt Models)
 2 | 
 3 | Paper: `ResNeSt: Split-Attention Networks` - /https://arxiv.org/abs/2004.08955
 4 | 
 5 | Adapted from original PyTorch impl at https://github.com/zhanghang1989/ResNeSt
 6 | 
 7 | Modified for torchscript compat, performance, and consistency with timm by Ross Wightman
 8 | """
 9 | import torch
10 | import torch.nn.functional as F
11 | from torch import nn
12 | 
13 | from .helpers import make_divisible
14 | 
15 | 
16 | class RadixSoftmax(nn.Module):
17 |     def __init__(self, radix, cardinality):
18 |         super(RadixSoftmax, self).__init__()
19 |         self.radix = radix
20 |         self.cardinality = cardinality
21 | 
22 |     def forward(self, x):
23 |         batch = x.size(0)
24 |         if self.radix > 1:
25 |             x = x.view(batch, self.cardinality, self.radix, -1).transpose(1, 2)
26 |             x = F.softmax(x, dim=1)
27 |             x = x.reshape(batch, -1)
28 |         else:
29 |             x = torch.sigmoid(x)
30 |         return x
31 | 
32 | 
33 | class SplitAttn(nn.Module):
34 |     """Split-Attention (aka Splat)
35 |     """
36 |     def __init__(self, in_channels, out_channels=None, kernel_size=3, stride=1, padding=None,
37 |                  dilation=1, groups=1, bias=False, radix=2, rd_ratio=0.25, rd_channels=None, rd_divisor=8,
38 |                  act_layer=nn.ReLU, norm_layer=None, drop_block=None, **kwargs):
39 |         super(SplitAttn, self).__init__()
40 |         out_channels = out_channels or in_channels
41 |         self.radix = radix
42 |         self.drop_block = drop_block
43 |         mid_chs = out_channels * radix
44 |         if rd_channels is None:
45 |             attn_chs = make_divisible(in_channels * radix * rd_ratio, min_value=32, divisor=rd_divisor)
46 |         else:
47 |             attn_chs = rd_channels * radix
48 | 
49 |         padding = kernel_size // 2 if padding is None else padding
50 |         self.conv = nn.Conv2d(
51 |             in_channels, mid_chs, kernel_size, stride, padding, dilation,
52 |             groups=groups * radix, bias=bias, **kwargs)
53 |         self.bn0 = norm_layer(mid_chs) if norm_layer else nn.Identity()
54 |         self.act0 = act_layer(inplace=True)
55 |         self.fc1 = nn.Conv2d(out_channels, attn_chs, 1, groups=groups)
56 |         self.bn1 = norm_layer(attn_chs) if norm_layer else nn.Identity()
57 |         self.act1 = act_layer(inplace=True)
58 |         self.fc2 = nn.Conv2d(attn_chs, mid_chs, 1, groups=groups)
59 |         self.rsoftmax = RadixSoftmax(radix, groups)
60 | 
61 |     def forward(self, x):
62 |         x = self.conv(x)
63 |         x = self.bn0(x)
64 |         if self.drop_block is not None:
65 |             x = self.drop_block(x)
66 |         x = self.act0(x)
67 | 
68 |         B, RC, H, W = x.shape
69 |         if self.radix > 1:
70 |             x = x.reshape((B, self.radix, RC // self.radix, H, W))
71 |             x_gap = x.sum(dim=1)
72 |         else:
73 |             x_gap = x
74 |         x_gap = x_gap.mean((2, 3), keepdim=True)
75 |         x_gap = self.fc1(x_gap)
76 |         x_gap = self.bn1(x_gap)
77 |         x_gap = self.act1(x_gap)
78 |         x_attn = self.fc2(x_gap)
79 | 
80 |         x_attn = self.rsoftmax(x_attn).view(B, -1, 1, 1)
81 |         if self.radix > 1:
82 |             out = (x * x_attn.reshape((B, self.radix, RC // self.radix, 1, 1))).sum(dim=1)
83 |         else:
84 |             out = x * x_attn
85 |         return out.contiguous()
86 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/split_batchnorm.py:
--------------------------------------------------------------------------------
 1 | """ Split BatchNorm
 2 | 
 3 | A PyTorch BatchNorm layer that splits input batch into N equal parts and passes each through
 4 | a separate BN layer. The first split is passed through the parent BN layers with weight/bias
 5 | keys the same as the original BN. All other splits pass through BN sub-layers under the '.aux_bn'
 6 | namespace.
 7 | 
 8 | This allows easily removing the auxiliary BN layers after training to efficiently
 9 | achieve the 'Auxiliary BatchNorm' as described in the AdvProp Paper, section 4.2,
10 | 'Disentangled Learning via An Auxiliary BN'
11 | 
12 | Hacked together by / Copyright 2020 Ross Wightman
13 | """
14 | import torch
15 | import torch.nn as nn
16 | 
17 | 
18 | class SplitBatchNorm2d(torch.nn.BatchNorm2d):
19 | 
20 |     def __init__(self, num_features, eps=1e-5, momentum=0.1, affine=True,
21 |                  track_running_stats=True, num_splits=2):
22 |         super().__init__(num_features, eps, momentum, affine, track_running_stats)
23 |         assert num_splits > 1, 'Should have at least one aux BN layer (num_splits at least 2)'
24 |         self.num_splits = num_splits
25 |         self.aux_bn = nn.ModuleList([
26 |             nn.BatchNorm2d(num_features, eps, momentum, affine, track_running_stats) for _ in range(num_splits - 1)])
27 | 
28 |     def forward(self, input: torch.Tensor):
29 |         if self.training:  # aux BN only relevant while training
30 |             split_size = input.shape[0] // self.num_splits
31 |             assert input.shape[0] == split_size * self.num_splits, "batch size must be evenly divisible by num_splits"
32 |             split_input = input.split(split_size)
33 |             x = [super().forward(split_input[0])]
34 |             for i, a in enumerate(self.aux_bn):
35 |                 x.append(a(split_input[i + 1]))
36 |             return torch.cat(x, dim=0)
37 |         else:
38 |             return super().forward(input)
39 | 
40 | 
41 | def convert_splitbn_model(module, num_splits=2):
42 |     """
43 |     Recursively traverse module and its children to replace all instances of
44 |     ``torch.nn.modules.batchnorm._BatchNorm`` with `SplitBatchnorm2d`.
45 |     Args:
46 |         module (torch.nn.Module): input module
47 |         num_splits: number of separate batchnorm layers to split input across
48 |     Example::
49 |         >>> # model is an instance of torch.nn.Module
50 |         >>> model = timm.models.convert_splitbn_model(model, num_splits=2)
51 |     """
52 |     mod = module
53 |     if isinstance(module, torch.nn.modules.instancenorm._InstanceNorm):
54 |         return module
55 |     if isinstance(module, torch.nn.modules.batchnorm._BatchNorm):
56 |         mod = SplitBatchNorm2d(
57 |             module.num_features, module.eps, module.momentum, module.affine,
58 |             module.track_running_stats, num_splits=num_splits)
59 |         mod.running_mean = module.running_mean
60 |         mod.running_var = module.running_var
61 |         mod.num_batches_tracked = module.num_batches_tracked
62 |         if module.affine:
63 |             mod.weight.data = module.weight.data.clone().detach()
64 |             mod.bias.data = module.bias.data.clone().detach()
65 |         for aux in mod.aux_bn:
66 |             aux.running_mean = module.running_mean.clone()
67 |             aux.running_var = module.running_var.clone()
68 |             aux.num_batches_tracked = module.num_batches_tracked.clone()
69 |             if module.affine:
70 |                 aux.weight.data = module.weight.data.clone().detach()
71 |                 aux.bias.data = module.bias.data.clone().detach()
72 |     for name, child in module.named_children():
73 |         mod.add_module(name, convert_splitbn_model(child, num_splits=num_splits))
74 |     del module
75 |     return mod
76 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/squeeze_excite.py:
--------------------------------------------------------------------------------
 1 | """ Squeeze-and-Excitation Channel Attention
 2 | 
 3 | An SE implementation originally based on PyTorch SE-Net impl.
 4 | Has since evolved with additional functionality / configuration.
 5 | 
 6 | Paper: `Squeeze-and-Excitation Networks` - https://arxiv.org/abs/1709.01507
 7 | 
 8 | Also included is Effective Squeeze-Excitation (ESE).
 9 | Paper: `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
10 | 
11 | Hacked together by / Copyright 2021 Ross Wightman
12 | """
13 | from torch import nn as nn
14 | 
15 | from .create_act import create_act_layer
16 | from .helpers import make_divisible
17 | 
18 | 
19 | class SEModule(nn.Module):
20 |     """ SE Module as defined in original SE-Nets with a few additions
21 |     Additions include:
22 |         * divisor can be specified to keep channels % div == 0 (default: 8)
23 |         * reduction channels can be specified directly by arg (if rd_channels is set)
24 |         * reduction channels can be specified by float rd_ratio (default: 1/16)
25 |         * global max pooling can be added to the squeeze aggregation
26 |         * customizable activation, normalization, and gate layer
27 |     """
28 |     def __init__(
29 |             self, channels, rd_ratio=1. / 16, rd_channels=None, rd_divisor=8, add_maxpool=False,
30 |             act_layer=nn.ReLU, norm_layer=None, gate_layer='sigmoid'):
31 |         super(SEModule, self).__init__()
32 |         self.add_maxpool = add_maxpool
33 |         if not rd_channels:
34 |             rd_channels = make_divisible(channels * rd_ratio, rd_divisor, round_limit=0.)
35 |         self.fc1 = nn.Conv2d(channels, rd_channels, kernel_size=1, bias=True)
36 |         self.bn = norm_layer(rd_channels) if norm_layer else nn.Identity()
37 |         self.act = create_act_layer(act_layer, inplace=True)
38 |         self.fc2 = nn.Conv2d(rd_channels, channels, kernel_size=1, bias=True)
39 |         self.gate = create_act_layer(gate_layer)
40 | 
41 |     def forward(self, x):
42 |         x_se = x.mean((2, 3), keepdim=True)
43 |         if self.add_maxpool:
44 |             # experimental codepath, may remove or change
45 |             x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True)
46 |         x_se = self.fc1(x_se)
47 |         x_se = self.act(self.bn(x_se))
48 |         x_se = self.fc2(x_se)
49 |         return x * self.gate(x_se)
50 | 
51 | 
52 | SqueezeExcite = SEModule  # alias
53 | 
54 | 
55 | class EffectiveSEModule(nn.Module):
56 |     """ 'Effective Squeeze-Excitation
57 |     From `CenterMask : Real-Time Anchor-Free Instance Segmentation` - https://arxiv.org/abs/1911.06667
58 |     """
59 |     def __init__(self, channels, add_maxpool=False, gate_layer='hard_sigmoid', **_):
60 |         super(EffectiveSEModule, self).__init__()
61 |         self.add_maxpool = add_maxpool
62 |         self.fc = nn.Conv2d(channels, channels, kernel_size=1, padding=0)
63 |         self.gate = create_act_layer(gate_layer)
64 | 
65 |     def forward(self, x):
66 |         x_se = x.mean((2, 3), keepdim=True)
67 |         if self.add_maxpool:
68 |             # experimental codepath, may remove or change
69 |             x_se = 0.5 * x_se + 0.5 * x.amax((2, 3), keepdim=True)
70 |         x_se = self.fc(x_se)
71 |         return x * self.gate(x_se)
72 | 
73 | 
74 | EffectiveSqueezeExcite = EffectiveSEModule  # alias
75 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/test_time_pool.py:
--------------------------------------------------------------------------------
 1 | """ Test Time Pooling (Average-Max Pool)
 2 | 
 3 | Hacked together by / Copyright 2020 Ross Wightman
 4 | """
 5 | 
 6 | import logging
 7 | from torch import nn
 8 | import torch.nn.functional as F
 9 | 
10 | from .adaptive_avgmax_pool import adaptive_avgmax_pool2d
11 | 
12 | 
13 | _logger = logging.getLogger(__name__)
14 | 
15 | 
16 | class TestTimePoolHead(nn.Module):
17 |     def __init__(self, base, original_pool=7):
18 |         super(TestTimePoolHead, self).__init__()
19 |         self.base = base
20 |         self.original_pool = original_pool
21 |         base_fc = self.base.get_classifier()
22 |         if isinstance(base_fc, nn.Conv2d):
23 |             self.fc = base_fc
24 |         else:
25 |             self.fc = nn.Conv2d(
26 |                 self.base.num_features, self.base.num_classes, kernel_size=1, bias=True)
27 |             self.fc.weight.data.copy_(base_fc.weight.data.view(self.fc.weight.size()))
28 |             self.fc.bias.data.copy_(base_fc.bias.data.view(self.fc.bias.size()))
29 |         self.base.reset_classifier(0)  # delete original fc layer
30 | 
31 |     def forward(self, x):
32 |         x = self.base.forward_features(x)
33 |         x = F.avg_pool2d(x, kernel_size=self.original_pool, stride=1)
34 |         x = self.fc(x)
35 |         x = adaptive_avgmax_pool2d(x, 1)
36 |         return x.view(x.size(0), -1)
37 | 
38 | 
39 | def apply_test_time_pool(model, config, use_test_size=True):
40 |     test_time_pool = False
41 |     if not hasattr(model, 'default_cfg') or not model.default_cfg:
42 |         return model, False
43 |     if use_test_size and 'test_input_size' in model.default_cfg:
44 |         df_input_size = model.default_cfg['test_input_size']
45 |     else:
46 |         df_input_size = model.default_cfg['input_size']
47 |     if config['input_size'][-1] > df_input_size[-1] and config['input_size'][-2] > df_input_size[-2]:
48 |         _logger.info('Target input size %s > pretrained default %s, using test time pooling' %
49 |                      (str(config['input_size'][-2:]), str(df_input_size[-2:])))
50 |         model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size'])
51 |         test_time_pool = True
52 |     return model, test_time_pool
53 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/trace_utils.py:
--------------------------------------------------------------------------------
 1 | try:
 2 |     from torch import _assert
 3 | except ImportError:
 4 |     def _assert(condition: bool, message: str):
 5 |         assert condition, message
 6 | 
 7 | 
 8 | def _float_to_int(x: float) -> int:
 9 |     """
10 |     Symbolic tracing helper to substitute for inbuilt `int`.
11 |     Hint: Inbuilt `int` can't accept an argument of type `Proxy`
12 |     """
13 |     return int(x)
14 | 


--------------------------------------------------------------------------------
/V2EM/src/model/nestnet/layers/weight_init.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | import warnings
 4 | 
 5 | from torch.nn.init import _calculate_fan_in_and_fan_out
 6 | 
 7 | 
 8 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
 9 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
10 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
11 |     def norm_cdf(x):
12 |         # Computes standard normal cumulative distribution function
13 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
14 | 
15 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
16 |         warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
17 |                       "The distribution of values may be incorrect.",
18 |                       stacklevel=2)
19 | 
20 |     with torch.no_grad():
21 |         # Values are generated by using a truncated uniform distribution and
22 |         # then using the inverse CDF for the normal distribution.
23 |         # Get upper and lower cdf values
24 |         l = norm_cdf((a - mean) / std)
25 |         u = norm_cdf((b - mean) / std)
26 | 
27 |         # Uniformly fill tensor with values from [l, u], then translate to
28 |         # [2l-1, 2u-1].
29 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
30 | 
31 |         # Use inverse cdf transform for normal distribution to get truncated
32 |         # standard normal
33 |         tensor.erfinv_()
34 | 
35 |         # Transform to proper mean, std
36 |         tensor.mul_(std * math.sqrt(2.))
37 |         tensor.add_(mean)
38 | 
39 |         # Clamp to ensure it's in the proper range
40 |         tensor.clamp_(min=a, max=b)
41 |         return tensor
42 | 
43 | 
44 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
45 |     # type: (Tensor, float, float, float, float) -> Tensor
46 |     r"""Fills the input Tensor with values drawn from a truncated
47 |     normal distribution. The values are effectively drawn from the
48 |     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
49 |     with values outside :math:`[a, b]` redrawn until they are within
50 |     the bounds. The method used for generating the random values works
51 |     best when :math:`a \leq \text{mean} \leq b`.
52 |     Args:
53 |         tensor: an n-dimensional `torch.Tensor`
54 |         mean: the mean of the normal distribution
55 |         std: the standard deviation of the normal distribution
56 |         a: the minimum cutoff value
57 |         b: the maximum cutoff value
58 |     Examples:
59 |         >>> w = torch.empty(3, 5)
60 |         >>> nn.init.trunc_normal_(w)
61 |     """
62 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
63 | 
64 | 
65 | def variance_scaling_(tensor, scale=1.0, mode='fan_in', distribution='normal'):
66 |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
67 |     if mode == 'fan_in':
68 |         denom = fan_in
69 |     elif mode == 'fan_out':
70 |         denom = fan_out
71 |     elif mode == 'fan_avg':
72 |         denom = (fan_in + fan_out) / 2
73 | 
74 |     variance = scale / denom
75 | 
76 |     if distribution == "truncated_normal":
77 |         # constant is stddev of standard normal truncated to (-2, 2)
78 |         trunc_normal_(tensor, std=math.sqrt(variance) / .87962566103423978)
79 |     elif distribution == "normal":
80 |         tensor.normal_(std=math.sqrt(variance))
81 |     elif distribution == "uniform":
82 |         bound = math.sqrt(3 * variance)
83 |         tensor.uniform_(-bound, bound)
84 |     else:
85 |         raise ValueError(f"invalid distribution {distribution}")
86 | 
87 | 
88 | def lecun_normal_(tensor):
89 |     variance_scaling_(tensor, mode='fan_in', distribution='truncated_normal')
90 | 


--------------------------------------------------------------------------------
/V2EM/src/model/se_block.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | #   https://openaccess.thecvf.com/content_cvpr_2018/html/Hu_Squeeze-and-Excitation_Networks_CVPR_2018_paper.html
 6 | 
 7 | class SEBlock(nn.Module):
 8 | 
 9 |     def __init__(self, input_channels, internal_neurons):
10 |         super(SEBlock, self).__init__()
11 |         self.down = nn.Conv2d(in_channels=input_channels, out_channels=internal_neurons, kernel_size=1, stride=1, bias=True)
12 |         self.up = nn.Conv2d(in_channels=internal_neurons, out_channels=input_channels, kernel_size=1, stride=1, bias=True)
13 |         self.input_channels = input_channels
14 | 
15 |     def forward(self, inputs):
16 |         x = F.avg_pool2d(inputs, kernel_size=inputs.size(3))
17 |         x = self.down(x)
18 |         x = F.relu(x)
19 |         x = self.up(x)
20 |         x = torch.sigmoid(x)
21 |         x = x.view(-1, self.input_channels, 1, 1)
22 |         return inputs * x


--------------------------------------------------------------------------------
/V2EM/src/model/transformer_encoder.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | from typing import Optional, List
 3 | import torch
 4 | from torch import nn
 5 | from src.utils import padTensor
 6 | 
 7 | class WrappedTransformerEncoder(nn.Module):
 8 |     def __init__(self, dim, num_layers, num_heads):
 9 |         super(WrappedTransformerEncoder, self).__init__()
10 |         self.dim = dim
11 |         encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=num_heads)
12 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
13 |         self.cls_emb = nn.Embedding(num_embeddings=1, embedding_dim=dim)
14 | 
15 |     def prepend_cls(self, inputs):
16 |         # print(f"cls填充前{inputs}")
17 |         index = torch.LongTensor([0]).to(device=inputs.device)
18 |         # print(f"LongTensor之后{index}")
19 |         cls_emb = self.cls_emb(index)
20 |         # print(f"对index使用cls_emb之后{cls_emb}")
21 |         cls_emb = cls_emb.expand(inputs.size(0), 1, self.dim)
22 |         # print(f"对cls_emb拓充之后{cls_emb.shape},具体{cls_emb}")
23 |         outputs = torch.cat((cls_emb, inputs), dim=1)
24 |         # print(f"合并cls和input之后{outputs.shape},具体{outputs}")
25 |         return outputs
26 | 
27 |     def forward(self, inputs: torch.Tensor, lens: Optional[List[int]] = None, get_cls: Optional[bool] = False):
28 |         if lens is not None:
29 |             max_len = max(lens)
30 | 
31 |             mask = [([False] * (l + int(get_cls)) + [True] * (max_len - l)) for l in lens]
32 |             mask = torch.tensor(mask).to(device=inputs.device)
33 | 
34 |             inputs = list(inputs.split(lens, dim=0))
35 |             inputs = [padTensor(inp, max_len) for inp in inputs]
36 |             inputs = torch.stack(inputs, dim=0)
37 |             # print(inputs.shape)
38 |         else:
39 |             mask = None
40 | 
41 |         if get_cls:
42 |             inputs = self.prepend_cls(inputs)
43 |             # print(inputs)
44 | 
45 |         inputs = inputs.permute(1, 0, 2)
46 |         # inputs = self.pos_encoder(inputs)
47 |         # print("input shape")  ##
48 |         # print(inputs.shape)  ##
49 |         inputs = self.encoder(src=inputs, src_key_padding_mask=mask) # (seq_len, bs, dim)
50 | 
51 |         if get_cls:
52 |             return inputs[0]
53 | 
54 |         return inputs[1:].permute(1, 0, 2)
55 | 
56 | 


--------------------------------------------------------------------------------
/V2EM/src/trainers/basetrainer.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import copy
 3 | import torch
 4 | from src.utils import save
 5 | 
 6 | 
 7 | class TrainerBase():
 8 |     def __init__(self, args, model, criterion, optimizer, scheduler, device, dataloaders):
 9 |         self.args = args
10 |         self.model = model
11 |         self.best_model = copy.deepcopy(model.state_dict())
12 |         self.device = device
13 |         self.criterion = criterion
14 |         self.optimizer = optimizer
15 |         self.dataloaders = dataloaders
16 |         self.scheduler = scheduler
17 |         self.earlyStop = args['early_stop']
18 | 
19 |         self.saving_path = f"./savings/"
20 | 
21 |     def make_stat(self, prev, curr):
22 |         new_stats = []
23 |         for i in range(len(prev)):
24 |             if curr[i] > prev[i]:
25 |                 new_stats.append(f'{curr[i]:.4f} \u2191')
26 |             elif curr[i] < prev[i]:
27 |                 new_stats.append(f'{curr[i]:.4f} \u2193')
28 |             else:
29 |                 new_stats.append(f'{curr[i]:.4f} -')
30 |         return new_stats
31 | 
32 |     def get_saving_file_name(self):
33 |         best_test_stats = self.all_test_stats[self.best_epoch - 1]
34 | 
35 |         name = f'{self.args["model"]}_{self.args["modalities"]}_'
36 | 
37 |         if self.args['loss'] == 'bce':
38 |             name += f'Acc_{best_test_stats[0][-1]:.4f}_'
39 |             name += f'F1_{best_test_stats[3][-1]:.4f}_'
40 |             name += f'AUC_{best_test_stats[4][-1]:.4f}_'
41 |         else:
42 |             name += f'{best_test_stats[0]:.4f}_'
43 |             name += f'{best_test_stats[1]:.4f}_'
44 |             name += f'{best_test_stats[2]:.4f}_'
45 |             name += f'{best_test_stats[3]:.4f}_'
46 | 
47 |         name += f'imginvl{self.args["img_interval"]}_'
48 | 
49 |         if self.args['model'] == 'mme2e_sparse':
50 |             name += f'st_{self.args["sparse_threshold"]}_'
51 | 
52 |         name += f'seed{self.args["seed"]}'
53 |         name += '.pt'
54 | 
55 |         return name
56 | 
57 |     def save_stats(self):
58 |         stats = {
59 |             'args': self.args,
60 |             'train_stats': self.all_train_stats,
61 |             'valid_stats': self.all_valid_stats,
62 |             'test_stats': self.all_test_stats,
63 |             'best_valid_stats': self.best_valid_stats,
64 |             'best_epoch': self.best_epoch
65 |         }
66 | 
67 |         save(stats, os.path.join(self.saving_path, 'stats', self.get_saving_file_name()))
68 | 
69 |         # csv_path = os.path.join(self.saving_path, 'csv', self.get_saving_file_name()).replace('.pt', '.csv')
70 |         # dirname = os.path.dirname(csv_path)
71 |         # if not os.path.exists(dirname):
72 |         #     os.makedirs(dirname)
73 |         # with open(csv_path, 'w') as f:
74 |         #     for stat in self.all_test_stats[self.best_epoch - 1]:
75 |         #         for n in stat:
76 |         #             f.write(f'{n:.4f},')
77 |         #     f.write('\n')
78 |         #     f.write(str(self.args))
79 |         #     f.write('\n')
80 | 
81 |     def save_model(self):
82 |         torch.save(self.best_model, os.path.join(self.saving_path, 'models', self.get_saving_file_name()))
83 | 


--------------------------------------------------------------------------------
/V2EM/src/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import torch
 4 | import numpy as np
 5 | from PIL import Image
 6 | from torchvision import transforms
 7 | 
 8 | def save(toBeSaved, filename, mode='wb'):
 9 |     dirname = os.path.dirname(filename)
10 |     if not os.path.exists(dirname):
11 |         os.makedirs(dirname)
12 |     file = open(filename, mode)
13 |     pickle.dump(toBeSaved, file, protocol=4)
14 |     file.close()
15 | 
16 | def load(filename, mode='rb'):
17 |     file = open(filename, mode)
18 |     loaded = pickle.load(file)
19 |     file.close()
20 |     return loaded
21 | 
22 | # For python2
23 | def load2(path):
24 |     with open(path, 'rb') as f:
25 |         u = pickle._Unpickler(f)
26 |         u.encoding = 'latin1'
27 |         p = u.load()
28 |     return p
29 | 
30 | def pad_sents(sents, pad_token):
31 |     sents_padded = []
32 |     lens = get_lens(sents)
33 |     max_len = max(lens)
34 |     sents_padded = [sents[i] + [pad_token] * (max_len - l) for i, l in enumerate(lens)]
35 |     return sents_padded, lens
36 | 
37 | def sort_sents(sents, reverse=True):
38 |     sents.sort(key=(lambda s: len(s)), reverse=reverse)
39 |     return sents
40 | 
41 | def get_mask(sents, unmask_idx=1, mask_idx=0):
42 |     lens = get_lens(sents)
43 |     max_len = max(lens)
44 |     mask = [([unmask_idx] * l + [mask_idx] * (max_len - l)) for l in lens]
45 |     return mask
46 | 
47 | def get_lens(sents):
48 |     return [len(sent) for sent in sents]
49 | 
50 | def get_max_len(sents):
51 |     max_len = max([len(sent) for sent in sents])
52 |     return max_len
53 | 
54 | def truncate_sents(sents, length):
55 |     sents = [sent[:length] for sent in sents]
56 |     return sents
57 | 
58 | def get_loss_weight(labels, label_order):
59 |     nums = [np.sum(labels == lo) for lo in label_order]
60 |     loss_weight = torch.tensor([n / len(labels) for n in nums])
61 |     return loss_weight
62 | 
63 | def capitalize_first_letter(data):
64 |     return [word.capitalize() for word in data]
65 | 
66 | def cmumosei_round(a):
67 |     if a < -2:
68 |         res = -3
69 |     if -2 <= a and a < -1:
70 |         res = -2
71 |     if -1 <= a and a < 0:
72 |         res = -1
73 |     if 0 <= a and a <= 0:
74 |         res = 0
75 |     if 0 < a and a <= 1:
76 |         res = 1
77 |     if 1 < a and a <= 2:
78 |         res = 2
79 |     if a > 2:
80 |         res = 3
81 |     return res
82 | 
83 | # From MTCNN
84 | def fixed_image_standardization(image_tensor: torch.tensor) -> torch.tensor:
85 |     processed_tensor = (image_tensor - 127.5) / 128.0
86 |     return processed_tensor
87 | 
88 | def padTensor(t: torch.tensor, targetLen: int) -> torch.tensor:
89 |     oriLen, dim = t.size()
90 |     return torch.cat((t, torch.zeros(targetLen - oriLen, dim).to(t.device)), dim=0)
91 | 
92 | def calc_percent(x: torch.tensor):
93 |     total = np.prod(np.array(x.size()))
94 |     positive = x.sum().item()
95 |     return positive / total
96 | 


--------------------------------------------------------------------------------
/dataset_demo/Readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/dataset_demo/Ses01F_impro01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/dataset_demo/Ses01F_impro01.avi


--------------------------------------------------------------------------------
/dataset_demo/Ses01F_impro01.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MultimodalAffectiveComputing/FV2ES/f2128f1bccd08381314886b09a530f4fc8cadcc4/dataset_demo/Ses01F_impro01.wav


--------------------------------------------------------------------------------
/dataset_demo/Ses01F_impro01_text.txt:
--------------------------------------------------------------------------------
 1 | Ses01F_impro01_F000 [006.2901-008.2357]: Excuse me.
 2 | Ses01F_impro01_M000 [007.5712-010.4750]: Do you have your forms?
 3 | Ses01F_impro01_F001 [010.0100-011.3925]: Yeah.
 4 | Ses01F_impro01_M001 [010.9266-014.6649]: Let me see them.
 5 | Ses01F_impro01_F002 [014.8872-018.0175]: Is there a problem?
 6 | Ses01F_impro01_M002 [016.8352-019.7175]: Who told you to get in this line?
 7 | Ses01F_impro01_F003 [019.2900-020.7875]: You did.
 8 | Ses01F_impro01_F004 [021.3257-024.7400]: You were standing at the beginning and you directed me.
 9 | Ses01F_impro01_M003 [023.4700-028.0300]: Okay. But I didn't tell you to get in this line if you are filling out this particular form.
10 | Ses01F_impro01_F005 [027.4600-031.4900]: Well what's the problem?  Let me change it.
11 | Ses01F_impro01_M004 [028.3950-031.2117]: This form is a Z.X.four.
12 | Ses01F_impro01_M005 [031.2660-039.3875]: You can't--  This is not the line for Z.X.four.  If you're going to fill out the Z.X.four, you need to have a different form of ID.
13 | Ses01F_impro01_F006 [038.9650-043.5900]: What?  I'm getting an ID.  This is why I'm here.  My wallet was stolen.
14 | Ses01F_impro01_M006 [041.2300-046.9800]: No. I need another set of ID to prove this is actually you.
15 | Ses01F_impro01_F007 [046.5800-052.1900]: How am I supposed to get an ID without an ID?  How does a person get an ID in the first place?
16 | Ses01F_impro01_M007 [051.4000-057.6400]: I don't know.  But I need an ID to pass this form along.  I can't just send it along without an ID.
17 | Ses01F_impro01_F008 [056.1600-058.8225]: I'm here to get an ID.
18 | Ses01F_impro01_M008 [058.1800-062.5900]: No.  I need another ID, a separate one.
19 | Ses01F_impro01_F009 [061.8700-065.9700]: Like what?  Like a birth certificate?
20 | Ses01F_impro01_M009 [065.5100-073.0000]: A birth certificate, a passport...a student ID; didn't you go to school?  Anything?
21 | Ses01F_impro01_F010 [066.4200-069.3400]: Who the hell has a birth certificate?
22 | Ses01F_impro01_F011 [072.4500-082.2600]: Yes but my wallet was stolen, I don't have anything.  I don't have any credit cards, I don't have my ID.  Don't you have things on file here?
23 | Ses01F_impro01_M010 [081.5900-086.0300]: Yeah.  We keep it on file, but we need an ID to access that file.
24 | Ses01F_impro01_F012 [085.2700-088.0200]: That's out of control.
25 | Ses01F_impro01_M011 [087.1500-094.3900]: I don't understand why this is so complicated for people when they get here.  It's just a simple form.  I just need an ID.
26 | Ses01F_impro01_F013 [093.6700-097.0218]: How long have you been working here?
27 | Ses01F_impro01_M012 [095.8600-098.6800]: Actually too long.
28 | Ses01F_impro01_F014 [097.8900-102.9600]: Clearly.  You know, do you have like a supervisor or something?
29 | Ses01F_impro01_M013 [101.8400-107.8700]: Yeah.  Do you want to see my supervisor?  Huh? Yeah.  Do you want to see my supervisor?  Fine.  I'll be right back.
30 | Ses01F_impro01_F015 [103.9700-106.7100]: That would - I would appreciate that.  Yeah.
31 | 


--------------------------------------------------------------------------------