├── README.md
├── TX2_object-detection
└── ECO-pytorch
    ├── pyActionRecog
        ├── utils
        │   ├── __init__.py
        │   ├── metrics.py
        │   ├── video_funcs.py
        │   └── io.py
        ├── __init__.py
        ├── action_caffe.py
        ├── action_parrots.py
        ├── benchmark_db.py
        └── anet_db.py
    ├── ops
        ├── __init__.py
        ├── __pycache__
        │   ├── __init__.cpython-35.pyc
        │   ├── __init__.cpython-36.pyc
        │   ├── basic_ops.cpython-35.pyc
        │   └── basic_ops.cpython-36.pyc
        ├── utils.py
        └── basic_ops.py
    ├── output_acc.jpg
    ├── output_number.jpg
    ├── img
        ├── ONNX
        │   ├── output_acc.jpg
        │   └── output_number.jpg
        ├── split_07_03_order
        │   ├── output_acc.jpg
        │   └── output_number.jpg
        ├── split_07_03_random
        │   ├── output_acc.jpg
        │   └── output_number.jpg
        └── onnx_ecofull_split_07_03_order
        │   ├── output_acc.jpg
        │   └── output_number.jpg
    ├── model
        └── download_models.sh
    ├── scripts
        ├── LICENSE
        ├── run_ECOLite_kinetics.sh
        ├── run_demo_ECO_Lite.sh
        ├── run_demo_ECO_Full.sh
        ├── run_c3dres_kinetics.sh
        ├── run_ECOLite_finetune_UCF101.sh
        └── README.md
    ├── data
        ├── ucf101_splits
        │   └── classInd.txt
        └── dataset_labels
        │   └── something-something-v1-labels.csv
    ├── extract_frames.py
    ├── get_path.py
    ├── generate_list.py
    ├── dataset.py
    ├── opts.py
    ├── transformer_model.py
    ├── gen_dataset_lists.py
    ├── onnx_infer.py
    ├── test_model_Violence.py
    ├── transforms.py
    ├── transform_video.py
    ├── log
        ├── 202104271523.log
        └── 202104141348.log
    └── models.py


/README.md:
--------------------------------------------------------------------------------
1 | 只上传部分代码，见谅
2 | 


--------------------------------------------------------------------------------
/TX2_object-detection:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/ECO-pytorch/ops/__init__.py:
--------------------------------------------------------------------------------
1 | from ops.basic_ops import *


--------------------------------------------------------------------------------
/ECO-pytorch/output_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/output_acc.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/output_number.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/output_number.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/img/ONNX/output_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/ONNX/output_acc.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/img/ONNX/output_number.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/ONNX/output_number.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/img/split_07_03_order/output_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/split_07_03_order/output_acc.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/img/split_07_03_random/output_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/split_07_03_random/output_acc.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/img/split_07_03_order/output_number.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/split_07_03_order/output_number.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/ops/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/ops/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/ECO-pytorch/ops/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/ops/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/ECO-pytorch/img/split_07_03_random/output_number.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/split_07_03_random/output_number.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/ops/__pycache__/basic_ops.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/ops/__pycache__/basic_ops.cpython-35.pyc


--------------------------------------------------------------------------------
/ECO-pytorch/ops/__pycache__/basic_ops.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/ops/__pycache__/basic_ops.cpython-36.pyc


--------------------------------------------------------------------------------
/ECO-pytorch/img/onnx_ecofull_split_07_03_order/output_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/onnx_ecofull_split_07_03_order/output_acc.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/img/onnx_ecofull_split_07_03_order/output_number.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DruidsTest/PaddlePaddle-Industrial_Park_Intelligent_Inspection_System/HEAD/ECO-pytorch/img/onnx_ecofull_split_07_03_order/output_number.jpg


--------------------------------------------------------------------------------
/ECO-pytorch/model/download_models.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | python gd_download.py 1J2mV0Kl9pWOK0FJ23ApHnJHQ3eq76D8a C3DResNet18_rgb_16F_kinetics_v1.pth.tar
 3 | 
 4 | #python gd_download.py 17rQFmTgB_NSjY8Fq3UaBHOrbZOdwQxFM eco_lite_rgb_16F_kinetics_v1.pth.tar
 5 | 
 6 | #python gd_download.py 1GsMyNxkbEr_2q6pn5cgVo3TMxAFthnrZ eco_lite_rgb_16F_kinetics_v2.pth.tar
 7 | 
 8 | python gd_download.py 17SnoxH8tkuUCvW-4ifa4Hk7ITm93nMqI eco_lite_rgb_16F_kinetics_v3.pth.tar #14October 2018
 9 | 
10 | 


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/__init__.py:
--------------------------------------------------------------------------------
 1 | from .benchmark_db import *
 2 | 
 3 | 
 4 | split_parsers = dict()
 5 | split_parsers['ucf101'] = parse_ucf_splits
 6 | split_parsers['hmdb51'] = parse_hmdb51_splits
 7 | split_parsers['activitynet_1.2'] = lambda : parse_activitynet_splits("1.2")
 8 | split_parsers['activitynet_1.3'] = lambda : parse_activitynet_splits("1.3")
 9 | 
10 | 
11 | def parse_split_file(dataset):
12 |     sp = split_parsers[dataset]
13 |     return sp()
14 | 
15 | 


--------------------------------------------------------------------------------
/ECO-pytorch/ops/utils.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from sklearn.metrics import confusion_matrix
 4 | 
 5 | def get_grad_hook(name):
 6 |     def hook(m, grad_in, grad_out):
 7 |         print((name, grad_out[0].data.abs().mean(), grad_in[0].data.abs().mean()))
 8 |         print((grad_out[0].size()))
 9 |         print((grad_in[0].size()))
10 | 
11 |         print((grad_out[0]))
12 |         print((grad_in[0]))
13 | 
14 |     return hook
15 | 
16 | 
17 | def softmax(scores):
18 |     es = np.exp(scores - scores.max(axis=-1)[..., None])
19 |     return es / es.sum(axis=-1)[..., None]
20 | 
21 | 
22 | def log_add(log_a, log_b):
23 |     return log_a + np.log(1 + np.exp(log_b - log_a))
24 | 
25 | 
26 | def class_accuracy(prediction, label):
27 |     cf = confusion_matrix(prediction, label)
28 |     cls_cnt = cf.sum(axis=1)
29 |     cls_hit = np.diag(cf)
30 | 
31 |     cls_acc = cls_hit / cls_cnt.astype(float)
32 | 
33 |     mean_cls_acc = cls_acc.mean()
34 | 
35 |     return cls_acc, mean_cls_acc


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/LICENSE:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2017, Multimedia Laboratary, The Chinese University of Hong Kong
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
20 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 | 


--------------------------------------------------------------------------------
/ECO-pytorch/ops/basic_ops.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import math
 3 | 
 4 | 
 5 | class Identity(torch.nn.Module):
 6 |     def forward(self, input):
 7 |         return input
 8 | 
 9 | 
10 | class SegmentConsensus(torch.autograd.Function):
11 | 
12 |     def __init__(self, consensus_type, dim=1):
13 |         self.consensus_type = consensus_type
14 |         self.dim = dim
15 |         self.shape = None
16 | 
17 |     def forward(self, input_tensor):
18 |         self.shape = input_tensor.size()
19 |         if self.consensus_type == 'avg':
20 |             output = input_tensor.mean(dim=self.dim, keepdim=True)
21 |         elif self.consensus_type == 'identity':
22 |             output = input_tensor
23 |         else:
24 |             output = None
25 | 
26 |         return output
27 | 
28 |     def backward(self, grad_output):
29 |         if self.consensus_type == 'avg':
30 |             grad_in = grad_output.expand(self.shape) / float(self.shape[self.dim])
31 |         elif self.consensus_type == 'identity':
32 |             grad_in = grad_output
33 |         else:
34 |             grad_in = None
35 | 
36 |         return grad_in
37 | 
38 | 
39 | class ConsensusModule(torch.nn.Module):
40 | 
41 |     def __init__(self, consensus_type, dim=1):
42 |         super(ConsensusModule, self).__init__()
43 |         self.consensus_type = consensus_type if consensus_type != 'rnn' else 'identity'
44 |         self.dim = dim
45 | 
46 |     def forward(self, input):
47 |         return SegmentConsensus(self.consensus_type, self.dim)(input)
48 | 


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module provides some utils for calculating metrics
 3 | """
 4 | import numpy as np
 5 | from sklearn.metrics import average_precision_score, confusion_matrix
 6 | 
 7 | 
 8 | def softmax(raw_score, T=1):
 9 |     exp_s = np.exp((raw_score - raw_score.max(axis=-1)[..., None])*T)
10 |     sum_s = exp_s.sum(axis=-1)
11 |     return exp_s / sum_s[..., None]
12 | 
13 | 
14 | def top_k_acc(lb_set, scores, k=3):
15 |     idx = np.argsort(scores)[-k:]
16 |     return len(lb_set.intersection(idx)), len(lb_set)
17 | 
18 | 
19 | def top_k_hit(lb_set, scores, k=3):
20 |     idx = np.argsort(scores)[-k:]
21 |     return len(lb_set.intersection(idx)) > 0, 1
22 | 
23 | 
24 | def top_3_accuracy(score_dict, video_list):
25 |     return top_k_accuracy(score_dict, video_list, 3)
26 | 
27 | 
28 | def top_k_accuracy(score_dict, video_list, k):
29 |     video_labels = [set([i.num_label for i in v.instances]) for v in video_list]
30 | 
31 |     video_top_k_acc = np.array(
32 |         [top_k_hit(lb, score_dict[v.id], k=k) for v, lb in zip(video_list, video_labels)
33 |          if v.id in score_dict])
34 | 
35 |     tmp = video_top_k_acc.sum(axis=0).astype(float)
36 |     top_k_acc = tmp[0] / tmp[1]
37 | 
38 |     return top_k_acc
39 | 
40 | 
41 | def video_mean_ap(score_dict, video_list):
42 |     avail_video_labels = [set([i.num_label for i in v.instances]) for v in video_list if
43 |                           v.id in score_dict]
44 |     pred_array = np.array([score_dict[v.id] for v in video_list if v.id in score_dict])
45 |     gt_array = np.zeros(pred_array.shape)
46 | 
47 |     for i in xrange(pred_array.shape[0]):
48 |         gt_array[i, list(avail_video_labels[i])] = 1
49 |     mean_ap = average_precision_score(gt_array, pred_array, average='macro')
50 |     return mean_ap
51 | 
52 | 
53 | def mean_class_accuracy(scores, labels):
54 |     pred = np.argmax(scores, axis=1)
55 |     cf = confusion_matrix(labels, pred).astype(float)
56 | 
57 |     cls_cnt = cf.sum(axis=1)
58 |     cls_hit = np.diag(cf)
59 | 
60 |     return np.mean(cls_hit/cls_cnt)


--------------------------------------------------------------------------------
/ECO-pytorch/data/ucf101_splits/classInd.txt:
--------------------------------------------------------------------------------
  1 | 1 ApplyEyeMakeup
  2 | 2 ApplyLipstick
  3 | 3 Archery
  4 | 4 BabyCrawling
  5 | 5 BalanceBeam
  6 | 6 BandMarching
  7 | 7 BaseballPitch
  8 | 8 Basketball
  9 | 9 BasketballDunk
 10 | 10 BenchPress
 11 | 11 Biking
 12 | 12 Billiards
 13 | 13 BlowDryHair
 14 | 14 BlowingCandles
 15 | 15 BodyWeightSquats
 16 | 16 Bowling
 17 | 17 BoxingPunchingBag
 18 | 18 BoxingSpeedBag
 19 | 19 BreastStroke
 20 | 20 BrushingTeeth
 21 | 21 CleanAndJerk
 22 | 22 CliffDiving
 23 | 23 CricketBowling
 24 | 24 CricketShot
 25 | 25 CuttingInKitchen
 26 | 26 Diving
 27 | 27 Drumming
 28 | 28 Fencing
 29 | 29 FieldHockeyPenalty
 30 | 30 FloorGymnastics
 31 | 31 FrisbeeCatch
 32 | 32 FrontCrawl
 33 | 33 GolfSwing
 34 | 34 Haircut
 35 | 35 Hammering
 36 | 36 HammerThrow
 37 | 37 HandstandPushups
 38 | 38 HandstandWalking
 39 | 39 HeadMassage
 40 | 40 HighJump
 41 | 41 HorseRace
 42 | 42 HorseRiding
 43 | 43 HulaHoop
 44 | 44 IceDancing
 45 | 45 JavelinThrow
 46 | 46 JugglingBalls
 47 | 47 JumpingJack
 48 | 48 JumpRope
 49 | 49 Kayaking
 50 | 50 Knitting
 51 | 51 LongJump
 52 | 52 Lunges
 53 | 53 MilitaryParade
 54 | 54 Mixing
 55 | 55 MoppingFloor
 56 | 56 Nunchucks
 57 | 57 ParallelBars
 58 | 58 PizzaTossing
 59 | 59 PlayingCello
 60 | 60 PlayingDaf
 61 | 61 PlayingDhol
 62 | 62 PlayingFlute
 63 | 63 PlayingGuitar
 64 | 64 PlayingPiano
 65 | 65 PlayingSitar
 66 | 66 PlayingTabla
 67 | 67 PlayingViolin
 68 | 68 PoleVault
 69 | 69 PommelHorse
 70 | 70 PullUps
 71 | 71 Punch
 72 | 72 PushUps
 73 | 73 Rafting
 74 | 74 RockClimbingIndoor
 75 | 75 RopeClimbing
 76 | 76 Rowing
 77 | 77 SalsaSpin
 78 | 78 ShavingBeard
 79 | 79 Shotput
 80 | 80 SkateBoarding
 81 | 81 Skiing
 82 | 82 Skijet
 83 | 83 SkyDiving
 84 | 84 SoccerJuggling
 85 | 85 SoccerPenalty
 86 | 86 StillRings
 87 | 87 SumoWrestling
 88 | 88 Surfing
 89 | 89 Swing
 90 | 90 TableTennisShot
 91 | 91 TaiChi
 92 | 92 TennisSwing
 93 | 93 ThrowDiscus
 94 | 94 TrampolineJumping
 95 | 95 Typing
 96 | 96 UnevenBars
 97 | 97 VolleyballSpiking
 98 | 98 WalkingWithDog
 99 | 99 WallPushups
100 | 100 WritingOnBoard
101 | 101 YoYo
102 | 


--------------------------------------------------------------------------------
/ECO-pytorch/extract_frames.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import threading
 3 | import cv2
 4 | import os
 5 | import sys
 6 | from glob import glob
 7 | 
 8 | 
 9 | 
10 | 
11 | def dump_frames(vid_path,out_path,video_flag):
12 | 
13 |     video = cv2.VideoCapture(vid_path)
14 |     vid_name = os.path.basename(vid_path)
15 |     # vid_name = out_path.split('/')[-1]
16 |     print(vid_name)
17 | 
18 |     try:
19 |         os.mkdir(out_path)
20 |     except OSError:
21 |         pass
22 |     file_list = []
23 |     i=1
24 |     while(True):
25 | 
26 |         ret, frame = video.read()
27 |         # print(frame)
28 |         if ret is False:
29 |             break
30 |         # print('{}/{:06d}.jpg'.format(out_full_path, i))
31 |         cv2.imwrite('{}/{:05d}.jpg'.format(out_path, i), frame)
32 |         access_path = '{}/{:05d}.jpg'.format(vid_name, i)
33 |         i=i+1
34 |         file_list.append(access_path)
35 |     print('{} done'.format(vid_name))
36 |     sys.stdout.flush()
37 |     return file_list
38 | 
39 | 
40 | 
41 | def extract_frame(abnormal_video):
42 | 
43 |     NUM_THREADS = 20
44 |     VIDEO_ROOT = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut_5s/for4/' + abnormal_video
45 |     FRAME_ROOT = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/for4/' + abnormal_video  # Directory for extracted frames
46 | 
47 |     try:
48 |         os.mkdir(FRAME_ROOT)
49 |     except OSError:
50 |         pass
51 | 
52 |     cut5s_clips = sorted(glob(VIDEO_ROOT+'/*.mp4'))
53 |     flag_video = 3651
54 |     for i,cut5s_clip in enumerate(cut5s_clips):
55 | 
56 |         clip_basename = os.path.basename(cut5s_clip)
57 |         # frames_path = FRAME_ROOT + '/' + clip_basename.split('.')[0]
58 |         frames_path = FRAME_ROOT + '/' + abnormal_video + '_' +str(flag_video).zfill(5)
59 |         print(frames_path)
60 |         dump_frames(cut5s_clip, frames_path, flag_video)
61 |         flag_video += 1
62 |         # if i==0:
63 |         #     break
64 | 
65 | 
66 | if __name__ == '__main__':
67 | 
68 | 
69 |     # abnormal_video = 'Escape'
70 |     labels = ['Normal']
71 | 
72 |     for abnormal_video in labels:
73 |         extract_frame(abnormal_video)
74 | 
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/utils/video_funcs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | This module provides our implementation of different functions to do video-level classification and stream fusion
 3 | """
 4 | import numpy as np
 5 | from metrics import softmax
 6 | 
 7 | 
 8 | def default_aggregation_func(score_arr, normalization=True, crop_agg=None):
 9 |     """
10 |     This is the default function for make video-level prediction
11 |     :param score_arr: a 3-dim array with (frame, crop, class) layout
12 |     :return:
13 |     """
14 |     crop_agg = np.mean if crop_agg is None else crop_agg
15 |     if normalization:
16 |         return softmax(crop_agg(score_arr, axis=1).mean(axis=0))
17 |     else:
18 |         return crop_agg(score_arr, axis=1).mean(axis=0)
19 | 
20 | 
21 | def top_k_aggregation_func(score_arr, k, normalization=True, crop_agg=None):
22 |     crop_agg = np.mean if crop_agg is None else crop_agg
23 |     if normalization:
24 |         return softmax(np.sort(crop_agg(score_arr, axis=1), axis=0)[-k:, :].mean(axis=0))
25 |     else:
26 |         return np.sort(crop_agg(score_arr, axis=1), axis=0)[-k:, :].mean(axis=0)
27 | 
28 | 
29 | def sliding_window_aggregation_func(score, spans=[1, 2, 4, 8, 16], overlap=0.2, norm=True, fps=1):
30 |     """
31 |     This is the aggregation function used for ActivityNet Challenge 2016
32 |     :param score:
33 |     :param spans:
34 |     :param overlap:
35 |     :param norm:
36 |     :param fps:
37 |     :return:
38 |     """
39 |     frm_max = score.max(axis=1)
40 |     slide_score = []
41 | 
42 |     def top_k_pool(scores, k):
43 |         return np.sort(scores, axis=0)[-k:, :].mean(axis=0)
44 | 
45 |     for t_span in spans:
46 |         span = t_span * fps
47 |         step = int(np.ceil(span * (1-overlap)))
48 |         local_agg = [frm_max[i: i+span].max(axis=0) for i in xrange(0, frm_max.shape[0], step)]
49 |         k = max(15, len(local_agg)/4)
50 |         slide_score.append(top_k_pool(np.array(local_agg), k))
51 | 
52 |     out_score = np.mean(slide_score, axis=0)
53 | 
54 |     if norm:
55 |         return softmax(out_score)
56 |     else:
57 |         return out_score
58 | 
59 | 
60 | def default_fusion_func(major_score, other_scores, fusion_weights, norm=True):
61 |     assert len(other_scores) == len(fusion_weights)
62 |     out_score = major_score
63 |     for s, w in zip(other_scores, fusion_weights):
64 |         out_score += s * w
65 | 
66 |     if norm:
67 |         return softmax(out_score)
68 |     else:
69 |         return out_score
70 | 


--------------------------------------------------------------------------------
/ECO-pytorch/get_path.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import random
 3 | from glob import glob
 4 | '''
 5 | train_out = open("./test_split_all.txt",'w')
 6 | #test_out = open("./test_split_all.txt",'w')
 7 | train_lines=[]
 8 | #test_lines = []
 9 | with open("./test_split.txt",'r') as infile:
10 |   for i,line in enumerate(infile):
11 |     print(i,line)
12 |     line = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/ERA/'+ line[11:]
13 |     print(i,line)
14 |     train_lines.append(line)
15 |     
16 |   #random.shuffle(train_lines)
17 |   #print(train_lines)
18 | 
19 |   for train_line in train_lines:
20 |     train_out.write(train_line)
21 |   
22 | '''
23 | train_out = open("./train_split_07.txt",'w')
24 | test_out = open("./test_split_03.txt",'w')
25 | train_lines = []
26 | test_lines = []
27 | data_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/UCF12_crime'
28 | train_id = [[1,390,558,627],[1,17,23,40],[1,137,202,352],[1,559,806,1365,1606,1745],[1,80,115,1516],[1,12],[1,2407,3651,3860,3927,4091,4171,4304,4363,4470],[1,553,792,808,817,834],[1,233,334,372,390,426],[1,175],[1,169],[1,74,105,224]]
29 | print(train_id[0])
30 | test_id = [[391,557,628,657],[18,22,41,46],[138,201,353,418],[560,805,1366,1605,1746,1805],[81,114,1517,2127],[13,16],[2408,3650,3861,3926,4092,4170,4305,4362,4471,4517],[554,791,809,816,835,841],[234,333,373,389,427,442],[176,250],[170,243],[75,104,225,276]]
31 | video_labels = sorted(glob(data_root+'/*'))
32 | for i,video_label in enumerate(video_labels):
33 |    videos = sorted(glob(video_label+'/*'))
34 |    for j,video in enumerate(videos):
35 |       
36 |       frames = sorted(glob(video+'/*.jpg'))
37 |       frame_number = len(frames)
38 |       #print(video,frame_number,i)
39 |       train_flag = False
40 |       test_flag = False
41 |       for k in range(len(train_id[i])):
42 |         if(k%2==0):
43 |       	   if((train_id[i][k]-1)<=j<=(train_id[i][k+1]-1)):
44 |              train_flag = True
45 |            
46 |       for k in range(len(test_id[i])):
47 |         if(k%2==0):
48 |       	   if((test_id[i][k]-1)<=j<=(test_id[i][k+1]-1)):
49 |              test_flag = True
50 |       if(train_flag):
51 |          train_lines.append(video + " " + str(frame_number) + " " + str(i) + "\n")
52 |       if(test_flag):
53 |          
54 |          test_lines.append(video + " " + str(frame_number) + " " + str(i) + "\n")
55 | print(len(train_lines))
56 | print(len(test_lines))
57 | for i,line in enumerate(train_lines):
58 |     train_out.write(line)
59 | 
60 | for j,line in enumerate(test_lines):
61 |     test_out.write(line)
62 | 
63 | 


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/run_ECOLite_kinetics.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | ##################################################################### 
 6 | # Train on local machine
 7 | if [ "$1" != "local" ] && [ "$2" != "local" ] && [ "$3" != "local" ]; then
 8 |     cd $PBS_O_WORKDIR
 9 | fi
10 | 
11 | 
12 | ##################################################################### 
13 | # Parameters!
14 | mainFolder="net_runs"
15 | subFolder="ECO_lite_run1"
16 | snap_pref="eco_lite"
17 | 
18 | train_path="list/kinetics_train.txt"
19 | val_path="list/kinetics_val.txt"
20 | 
21 | #############################################
22 | #--- training hyperparams ---
23 | dataset_name="kinetics"
24 | netType="ECO"
25 | batch_size=15
26 | learning_rate=0.001
27 | num_segments=16
28 | dropout=0.3
29 | iter_size=4
30 | num_workers=5
31 | 
32 | ##################################################################### 
33 | mkdir -p ${mainFolder}
34 | mkdir -p ${mainFolder}/${subFolder}/training
35 | 
36 | echo "Current network folder: "
37 | echo ${mainFolder}/${subFolder}
38 | 
39 | 
40 | ##################################################################### 
41 | # Find the latest checkpoint of network 
42 | checkpointIter="$(ls ${mainFolder}/${subFolder}/*checkpoint* 2>/dev/null | grep -o "epoch_[0-9]*_" | sed -e "s/^epoch_//" -e "s/_$//" | xargs printf "%d\n" | sort -V | tail -1 | sed -e "s/^0*//")"
43 | ##################################################################### 
44 | 
45 | 
46 | echo "${checkpointIter}"
47 | 
48 | ##################################################################### 
49 | # If there is a checkpoint then continue training otherwise train from scratch
50 | if [ "x${checkpointIter}" != "x" ]; then
51 |     lastCheckpoint="${subFolder}/${snap_pref}_rgb_epoch_${checkpointIter}_checkpoint.pth.tar"
52 |     echo "Continuing from checkpoint ${lastCheckpoint}"
53 | 
54 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path}  --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --lr_steps 15 30 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn --nesterov "True" --resume ${mainFolder}/${lastCheckpoint} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt    
55 | 
56 | else
57 |      echo "Training with initialization"
58 | 
59 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path} --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --lr_steps 15 30 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn --nesterov "True" 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt
60 | 
61 | fi
62 | 
63 | ##################################################################### 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/run_demo_ECO_Lite.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##################################################################### 
 4 | # Parameters!
 5 | mainFolder="net_runs"
 6 | subFolder="ECO_lite_finetune_UCF101_run1"
 7 | snap_pref="eco_lite_finetune_UCF101"
 8 | 
 9 | 
10 | 
11 | 
12 | train_path="list/ucf101_train_split1.txt"
13 | val_path="list/ucf101_val_split1.txt"
14 | 
15 | 
16 | 
17 | n2D_model="nll"
18 | n3D_model="nll"
19 | 
20 | nECO_model="ECO_Lite_rgb_model_Kinetics.pth.tar"
21 | #############################################
22 | #--- training hyperparams ---
23 | dataset_name="ucf101"
24 | netType="ECO"
25 | batch_size=16
26 | learning_rate=0.001
27 | num_segments=16
28 | dropout=0.3
29 | iter_size=4
30 | num_workers=2
31 | 
32 | ##################################################################### 
33 | mkdir -p ${mainFolder}
34 | mkdir -p ${mainFolder}/${subFolder}/training
35 | 
36 | echo "Current network folder: "
37 | echo ${mainFolder}/${subFolder}
38 | 
39 | 
40 | ##################################################################### 
41 | # Find the latest checkpoint of network 
42 | checkpointIter="$(ls ${mainFolder}/${subFolder}/*checkpoint* 2>/dev/null | grep -o "epoch_[0-9]*_" | sed -e "s/^epoch_//" -e "s/_$//" | xargs printf "%d\n" | sort -V | tail -1 | sed -e "s/^0*//")"
43 | ##################################################################### 
44 | 
45 | 
46 | echo "${checkpointIter}"
47 | 
48 | ##################################################################### 
49 | # If there is a checkpoint then continue training otherwise train from scratch
50 | if [ "x${checkpointIter}" != "x" ]; then
51 |     lastCheckpoint="${subFolder}/${snap_pref}_rgb_epoch_${checkpointIter}_checkpoint.pth.tar"
52 |     echo "Continuing from checkpoint ${lastCheckpoint}"
53 | 
54 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path}  --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --num_saturate 5 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn  --nesterov "True" --resume ${mainFolder}/${lastCheckpoint} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt    
55 | 
56 | else
57 |      echo "Training with initialization"
58 | 
59 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path} --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --num_saturate 5 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn --nesterov "True" --net_model2D ${n2D_model} --net_model3D ${n3D_model} --net_modelECO ${nECO_model} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt
60 | 
61 | fi
62 | 
63 | ##################################################################### 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/run_demo_ECO_Full.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | ##################################################################### 
 5 | # Parameters!
 6 | mainFolder="net_runs"
 7 | subFolder="ECO_full_finetune_UCF101_run1"
 8 | snap_pref="eco_full _finetune_UCF101"
 9 | 
10 | 
11 | 
12 | 
13 | train_path="list/ucf101_train_split1.txt"
14 | val_path="list/ucf101_val_split1.txt"
15 | 
16 | 
17 | 
18 | n2D_model="nll"
19 | n3D_model="nll"
20 | 
21 | nECO_model="ECO_Full_rgb_model_Kinetics.pth.tar"
22 | #############################################
23 | #--- training hyperparams ---
24 | dataset_name="ucf101"
25 | netType="ECOfull"
26 | batch_size=16
27 | learning_rate=0.001
28 | num_segments=16
29 | dropout=0
30 | iter_size=4
31 | num_workers=2
32 | 
33 | ##################################################################### 
34 | mkdir -p ${mainFolder}
35 | mkdir -p ${mainFolder}/${subFolder}/training
36 | 
37 | echo "Current network folder: "
38 | echo ${mainFolder}/${subFolder}
39 | 
40 | 
41 | ##################################################################### 
42 | # Find the latest checkpoint of network 
43 | checkpointIter="$(ls ${mainFolder}/${subFolder}/*checkpoint* 2>/dev/null | grep -o "epoch_[0-9]*_" | sed -e "s/^epoch_//" -e "s/_$//" | xargs printf "%d\n" | sort -V | tail -1 | sed -e "s/^0*//")"
44 | ##################################################################### 
45 | 
46 | 
47 | echo "${checkpointIter}"
48 | 
49 | ##################################################################### 
50 | # If there is a checkpoint then continue training otherwise train from scratch
51 | if [ "x${checkpointIter}" != "x" ]; then
52 |     lastCheckpoint="${subFolder}/${snap_pref}_rgb_epoch_${checkpointIter}_checkpoint.pth.tar"
53 |     echo "Continuing from checkpoint ${lastCheckpoint}"
54 | 
55 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path}  --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --num_saturate 5 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn  --nesterov "True" --resume ${mainFolder}/${lastCheckpoint} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt    
56 | 
57 | else
58 |      echo "Training with initialization"
59 | 
60 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path} --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --num_saturate 5 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn --nesterov "True" --net_model2D ${n2D_model} --net_model3D ${n3D_model} --net_modelECO ${nECO_model} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt
61 | 
62 | fi
63 | 
64 | ##################################################################### 
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/action_caffe.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | 
 4 | import caffe
 5 | from caffe.io import oversample
 6 | import numpy as np
 7 | from utils.io import flow_stack_oversample, fast_list2arr
 8 | import cv2
 9 | 
10 | 
11 | class CaffeNet(object):
12 | 
13 |     def __init__(self, net_proto, net_weights, device_id, input_size=None):
14 |         caffe.set_mode_gpu()
15 |         caffe.set_device(device_id)
16 |         self._net = caffe.Net(net_proto, net_weights, caffe.TEST)
17 | 
18 |         input_shape = self._net.blobs['data'].data.shape
19 | 
20 |         if input_size is not None:
21 |             input_shape = input_shape[:2] + input_size
22 | 
23 |         transformer = caffe.io.Transformer({'data': input_shape})
24 | 
25 |         if self._net.blobs['data'].data.shape[1] == 3:
26 |             transformer.set_transpose('data', (2, 0, 1))  # move image channels to outermost dimension
27 |             transformer.set_mean('data', np.array([104, 117, 123]))  # subtract the dataset-mean value in each channel
28 |         else:
29 |             pass # non RGB data need not use transformer
30 | 
31 |         self._transformer = transformer
32 | 
33 |         self._sample_shape = self._net.blobs['data'].data.shape
34 | 
35 |     def predict_single_frame(self, frame, score_name, over_sample=True, multiscale=None, frame_size=None):
36 | 
37 |         if frame_size is not None:
38 |             frame = [cv2.resize(x, frame_size) for x in frame]
39 | 
40 |         if over_sample:
41 |             if multiscale is None:
42 |                 os_frame = oversample(frame, (self._sample_shape[2], self._sample_shape[3]))
43 |             else:
44 |                 os_frame = []
45 |                 for scale in multiscale:
46 |                     resized_frame = [cv2.resize(x, (0,0), fx=1.0/scale, fy=1.0/scale) for x in frame]
47 |                     os_frame.extend(oversample(resized_frame, (self._sample_shape[2], self._sample_shape[3])))
48 |         else:
49 |             os_frame = fast_list2arr(frame)
50 |         data = fast_list2arr([self._transformer.preprocess('data', x) for x in os_frame])
51 | 
52 |         self._net.blobs['data'].reshape(*data.shape)
53 |         self._net.reshape()
54 |         out = self._net.forward(blobs=[score_name,], data=data)
55 |         return out[score_name].copy()
56 | 
57 |     def predict_single_flow_stack(self, frame, score_name, over_sample=True, frame_size=None):
58 | 
59 |         if frame_size is not None:
60 |             frame = fast_list2arr([cv2.resize(x, frame_size) for x in frame])
61 |         else:
62 |             frame = fast_list2arr(frame)
63 | 
64 |         if over_sample:
65 |             os_frame = flow_stack_oversample(frame, (self._sample_shape[2], self._sample_shape[3]))
66 |         else:
67 |             os_frame = fast_list2arr([frame])
68 | 
69 |         data = os_frame - np.float32(128.0)
70 | 
71 |         self._net.blobs['data'].reshape(*data.shape)
72 |         self._net.reshape()
73 |         out = self._net.forward(blobs=[score_name,], data=data)
74 |         return out[score_name].copy()
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/run_c3dres_kinetics.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ##################################################################### 
 4 | # Train on local machine
 5 | if [ "$1" != "local" ] && [ "$2" != "local" ] && [ "$3" != "local" ]; then
 6 |     cd $PBS_O_WORKDIR
 7 | fi
 8 | 
 9 | 
10 | ##################################################################### 
11 | # output folder setting!
12 | mainFolder="net_runs"
13 | subFolder="C3Dresnet18_run1"
14 | snap_pref="C3DResNet18_16F"
15 | 
16 | 
17 | ### data list path #####
18 | train_path="/list/kinetics_train.txt"
19 | val_path="/list/kinetics_val.txt"
20 | 
21 | #############################################
22 | #--- training hyperparams ---
23 | dataset_name="kinetics"
24 | netType="C3DRes18"
25 | batch_size=32
26 | learning_rate=0.001
27 | num_segments=16
28 | dropout=0.3
29 | iter_size=4
30 | num_workers=4
31 | 
32 | 
33 | ##################################################################### 
34 | mkdir -p ${mainFolder}
35 | mkdir -p ${mainFolder}/${subFolder}/training
36 | 
37 | echo "Current network folder: "
38 | echo ${mainFolder}/${subFolder}
39 | 
40 | 
41 | ##################################################################### 
42 | # Find the latest checkpoint of network 
43 | checkpointIter="$(ls ${mainFolder}/${subFolder}/*checkpoint* 2>/dev/null | grep -o "epoch_[0-9]*_" | sed -e "s/^epoch_//" -e "s/_$//" | xargs printf "%d\n" | sort -V | tail -1 | sed -e "s/^0*//")"
44 | ##################################################################### 
45 | 
46 | 
47 | echo "${checkpointIter}"
48 | 
49 | 
50 | ### Start training - continue training or start from begining ###
51 | ##################################################################### 
52 | # If there is a checkpoint then continue training otherwise train from scratch
53 | if [ "x${checkpointIter}" != "x" ]; then
54 |     lastCheckpoint="${subFolder}/${snap_pref}_rgb_epoch_${checkpointIter}_checkpoint.pth.tar"
55 |     echo "Continuing from checkpoint ${lastCheckpoint}"
56 | 
57 | python3 main.py ${dataset_name} RGB ${train_path} ${val_path}  --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --lr_steps 30 60 --epochs 80 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts 3D --no_partialbn --resume ${mainFolder}/${lastCheckpoint} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt    
58 | 
59 | else
60 |      echo "Training with initialization"
61 | 
62 | python3 main.py ${dataset_name} RGB ${train_path} ${val_path} --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --lr_steps 30 60 --epochs 80 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts 3D --no_partialbn 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt
63 | 
64 | fi
65 | 
66 | ##################################################################### 
67 | 
68 | 
69 | 


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/run_ECOLite_finetune_UCF101.sh:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | 
 5 | ##################################################################### 
 6 | # Train on local machine
 7 | if [ "$1" != "local" ] && [ "$2" != "local" ] && [ "$3" != "local" ]; then
 8 |     cd $PBS_O_WORKDIR
 9 | fi
10 | 
11 | 
12 | ##################################################################### 
13 | # Parameters!
14 | mainFolder="net_runs"
15 | subFolder="ECO_lite_finetune_UCF101_run1"
16 | snap_pref="eco_lite_finetune_UCF101"
17 | 
18 | train_path="list/ucf101_train_split1.txt"
19 | val_path="list/ucf101_val_split1.txt"
20 | 
21 | #############################################
22 | #--- training hyperparams ---
23 | dataset_name="ucf101"
24 | netType="ECO"
25 | batch_size=15
26 | learning_rate=0.001
27 | num_segments=16
28 | dropout=0.3
29 | iter_size=4
30 | num_workers=5
31 | 
32 | ##################################################################### 
33 | mkdir -p ${mainFolder}
34 | mkdir -p ${mainFolder}/${subFolder}/training
35 | 
36 | echo "Current network folder: "
37 | echo ${mainFolder}/${subFolder}
38 | 
39 | 
40 | ##################################################################### 
41 | # Find the latest checkpoint of network 
42 | checkpointIter="$(ls ${mainFolder}/${subFolder}/*checkpoint* 2>/dev/null | grep -o "epoch_[0-9]*_" | sed -e "s/^epoch_//" -e "s/_$//" | xargs printf "%d\n" | sort -V | tail -1 | sed -e "s/^0*//")"
43 | ##################################################################### 
44 | 
45 | 
46 | echo "${checkpointIter}"
47 | 
48 | ##################################################################### 
49 | # If there is a checkpoint then continue training otherwise train from scratch
50 | if [ "x${checkpointIter}" != "x" ]; then
51 |     lastCheckpoint="${subFolder}/${snap_pref}_rgb_epoch_${checkpointIter}_checkpoint.pth.tar"
52 |     echo "Continuing from checkpoint ${lastCheckpoint}"
53 | 
54 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path}  --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --lr_steps 15 30 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn --nesterov "True" --resume ${mainFolder}/${lastCheckpoint} 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt    
55 | 
56 | else
57 |      echo "Training with initialization"
58 | 
59 | python3 -u main.py ${dataset_name} RGB ${train_path} ${val_path} --arch ${netType} --num_segments ${num_segments} --gd 50 --lr ${learning_rate} --lr_steps 15 30 --epochs 40 -b ${batch_size} -i ${iter_size} -j ${num_workers} --dropout ${dropout} --snapshot_pref ${mainFolder}/${subFolder}/${snap_pref} --consensus_type identity --eval-freq 1 --rgb_prefix img_ --pretrained_parts finetune --no_partialbn --nesterov "True" 2>&1 | tee -a ${mainFolder}/${subFolder}/training/log.txt
60 | 
61 | fi
62 | 
63 | ##################################################################### 
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/ECO-pytorch/scripts/README.md:
--------------------------------------------------------------------------------
  1 | #### This is a fork of [Can Zhang](https://github.com/zhang-can/ECO-pytorch)'s PyTorch implementation for the [paper](https://arxiv.org/pdf/1804.09066.pdf):
  2 | ##### " ECO: Efficient Convolutional Network for Online Video Understanding, European Conference on Computer Vision (ECCV), 2018." By Mohammadreza Zolfaghari, Kamaljeet Singh, Thomas Brox
  3 |  
  4 |  
  5 |  ### Update
  6 | - **2019.3.05**: This is a major update. This update is more robust and we solved some problems in the previous version such as iter_size and ECO Full model definiation. Updating the training procedure (main.py) and providing the pretrained models for ECOLite and ECOFull. Please let us know if you found any problem or had suggestions to improve the code.
  7 | 
  8 | 
  9 | ##### NOTE
 10 | 
 11 | * Trained models on Kinetics dataset for ECO Lite and C3D are provided. 
 12 | * Pre-trained model for 2D-Net is provided by [tsn-pytorch](https://github.com/yjxiong/tsn-pytorch).
 13 | * **Stay tuned for more updates**
 14 | 
 15 | 
 16 | ### Environment:
 17 | * Python 3.5.2
 18 | * PyTorch 0.4.1
 19 | * TorchVison: 0.2.1
 20 | 
 21 | ### Clone this repo
 22 | 
 23 | ```
 24 | git clone https://github.com/mzolfaghari/ECO-pytorch
 25 | ```
 26 | 
 27 | ### Generate dataset lists
 28 | 
 29 | ```bash
 30 | python gen_dataset_lists.py <ucf101/something> <dataset_frames_root_path>
 31 | ```
 32 | e.g. python gen_dataset_lists.py something ~/dataset/20bn-something-something-v1/
 33 | 
 34 | > The dataset should be organized as:<br>
 35 | > <dataset_frames_root_path>/<video_name>/<frame_images>
 36 | 
 37 | ### Training
 38 | 1. Download the initialization and trained models:
 39 | 
 40 | ```Shell
 41 |       ECO-Lite pretrained model on Kinetics: https://drive.google.com/open?id=1XNIq7byciKgrn011jLBggd2g79jKX4uD
 42 |       ECO-Full pretrained model on Kinetics: https://drive.google.com/open?id=1ATuN_KctsbFAbcNgWDlETZVsy2vhxZay
 43 | 
 44 | ```
 45 | Othe models:
 46 | ```Shell
 47 |       sh models/download_models.sh
 48 | ```
 49 | 
 50 | * If you can not access Google Drive, please download the pretrained models from [BaiduYun](https://pan.baidu.com/s/1Hx52akJLR_ISfX406bkIog), and put them in "models" folder.
 51 | 
 52 | 2. Command for training ECO Lite model:
 53 | 
 54 | ```bash
 55 |     ./scripts/run_ECOLite_kinetics.sh local
 56 | ```
 57 | 
 58 | 3. For training C3D network use the following command:
 59 | 
 60 | ```bash
 61 |     ./scripts/run_c3dres_kinetics.sh local
 62 | ```
 63 | 
 64 | 4. For finetuning on UCF101 use the following command:
 65 | 
 66 | ```bash
 67 |     sh run_demo_ECO_Lite.sh local 
 68 |     or 
 69 |     sh run_demo_ECO_Full.sh local
 70 | ```
 71 | 
 72 | ### NOTE
 73 | * If you want to train your model from scratch change the config as following:
 74 | ```bash
 75 |     --pretrained_parts scratch
 76 | ```
 77 | * configurations explained in "opts.py"
 78 | 
 79 | #### TODO
 80 | 1. Trained models on other datasets
 81 | 
 82 | 
 83 | #### Citation
 84 | If you use this code or ideas from the paper for your research, please cite our paper:
 85 | ```
 86 | @inproceedings{ECO_eccv18,
 87 | author={Mohammadreza Zolfaghari and
 88 |                Kamaljeet Singh and
 89 |                Thomas Brox},
 90 | title={{ECO:} Efficient Convolutional Network for Online Video Understanding},	       
 91 | booktitle={ECCV},
 92 | year={2018}
 93 | }
 94 | ```
 95 | 
 96 | #### Contact
 97 | 
 98 |   [Mohammadreza Zolfaghari](https://github.com/mzolfaghari/ECO-pytorch), [Can Zhang](https://github.com/zhang-can/ECO-pytorch)
 99 | 
100 |   Questions can also be left as issues in the repository. We will be happy to answer them.
101 | 


--------------------------------------------------------------------------------
/ECO-pytorch/generate_list.py:
--------------------------------------------------------------------------------
  1 | # processing the raw data of the video datasets (Something-something and jester)
  2 | # generate the meta files:
  3 | #   category.txt:               the list of categories.
  4 | #   train_videofolder.txt:      each row contains [videoname num_frames classIDX]
  5 | #   val_videofolder.txt:        same as above
  6 | #
  7 | # Bolei Zhou, Dec.2 2017
  8 | #
  9 | #
 10 | import os
 11 | import pdb
 12 | from glob import glob
 13 | def write_categories():
 14 | 
 15 |     dataset_name = 'ERA'  #
 16 |     with open('%s-labels.csv'% dataset_name) as f:
 17 |         lines = f.readlines()
 18 |     categories = []
 19 |     for line in lines:
 20 |         line = line.rstrip()
 21 |         categories.append(line)
 22 |     categories = sorted(categories)
 23 |     with open('category.txt','w') as f:
 24 |         f.write('\n'.join(categories))
 25 | 
 26 | 
 27 | def write_video_txt(VIDEO_ROOT,flag,filename_output):
 28 |     video_all_path = sorted(os.listdir(os.path.join(VIDEO_ROOT,flag)),key=str.lower)
 29 |     print(video_all_path)
 30 |     output = []
 31 |     for i,video_path_index in enumerate(video_all_path):
 32 |         # print(os.path.join(VIDEO_ROOT,flag,video_path_index))
 33 |         video_paths = sorted(os.listdir(os.path.join(VIDEO_ROOT,flag,video_path_index)),key=str.lower)
 34 |         # print(video_paths)
 35 | 
 36 |         # print(video_paths)
 37 |         for j,video_path in enumerate(video_paths):
 38 | 
 39 |             frames_number = len(glob(os.path.join(VIDEO_ROOT,flag,video_path_index,video_path)+'/*'))
 40 |             output.append('%s %d %d' %("".join(os.path.join(VIDEO_ROOT,flag,video_path_index,video_path).split()),frames_number,i))
 41 | 
 42 |     print(output)
 43 |     print(filename_output)
 44 |     with open(filename_output,'w') as f:
 45 |         f.write('\n'.join(output))
 46 |     print(video_all_path)
 47 | 
 48 | 
 49 | 
 50 | # VIDEO_ROOT = '../dataset/UCF-crime_frames/'
 51 | # flag_train = 'train'
 52 | # flag_test = 'test'
 53 | # filename_output_train = '../dataset/UCF-crime_frames/train_split.txt'
 54 | # filename_output_test = '../dataset/UCF-crime_frames/test_split.txt'
 55 | #
 56 | #
 57 | # write_video_txt(VIDEO_ROOT,flag_train,filename_output_train)
 58 | # write_video_txt(VIDEO_ROOT,flag_test,filename_output_test)
 59 | 
 60 | def write_video_path(clip5s_frames_root,flag,output_path):
 61 | 
 62 |     all_paths = sorted(glob(clip5s_frames_root+'/'+flag+'/*'))
 63 | 
 64 |     output = []
 65 |     for i,clip_frame_root in enumerate(all_paths):
 66 |         print(clip_frame_root)
 67 |         clip_frame_paths = sorted(glob(clip_frame_root+'/*'))
 68 | 
 69 |         for j,clip_frame_path in enumerate(clip_frame_paths):
 70 |             print(clip_frame_path)
 71 |             frames_number = len(glob(clip_frame_path + '/*'))
 72 |             if i==0:
 73 |                 kk=3
 74 |             if i==1:
 75 |                 kk=6
 76 |             output.append('%s %d %d' % (clip_frame_path, frames_number, kk))
 77 | 
 78 |         # if i==0:
 79 |         #     break
 80 |     # print(output)
 81 |     with open(output_path,'w') as f:
 82 |         f.write('\n'.join(output))
 83 | 
 84 | 
 85 | 
 86 | 
 87 | def split_train_test():
 88 | 
 89 |     clip5s_frames_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames'
 90 |     flag_train = 'UCF12_crime'
 91 |     flag_test = 'test'
 92 |     train_output_path = clip5s_frames_root + '/train_split.txt'
 93 |     test_output_path = clip5s_frames_root + '/test_split.txt'
 94 | 
 95 |     write_video_txt(clip5s_frames_root,flag_train,train_output_path)
 96 | 
 97 | def add_fighting_test():
 98 |     clip5s_frames_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/RWF_frames'
 99 |     flag_train = 'test'
100 |     train_output_path = clip5s_frames_root + '/test_split.txt'
101 |     write_video_path(clip5s_frames_root, flag_train, train_output_path)
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     split_train_test()
106 |     # add_fighting_test()


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/utils/io.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def flow_stack_oversample(flow_stack, crop_dims):
  5 |     """
  6 |     This function performs oversampling on flow stacks.
  7 |     Adapted from pyCaffe's oversample function
  8 |     :param flow_stack:
  9 |     :param crop_dims:
 10 |     :return:
 11 |     """
 12 |     im_shape = np.array(flow_stack.shape[1:])
 13 |     stack_depth = flow_stack.shape[0]
 14 |     crop_dims = np.array(crop_dims)
 15 | 
 16 |     h_indices = (0, im_shape[0] - crop_dims[0])
 17 |     w_indices = (0, im_shape[1] - crop_dims[1])
 18 | 
 19 |     h_center_offset = (im_shape[0] - crop_dims[0])/2
 20 |     w_center_offset = (im_shape[1] - crop_dims[1])/2
 21 | 
 22 |     crop_ix = np.empty((5, 4), dtype=int)
 23 | 
 24 |     cnt = 0
 25 |     for i in h_indices:
 26 |         for j in w_indices:
 27 |             crop_ix[cnt, :] = (i, j, i+crop_dims[0], j+crop_dims[1])
 28 |             cnt += 1
 29 |     crop_ix[4, :] = [h_center_offset, w_center_offset,
 30 |                      h_center_offset+crop_dims[0], w_center_offset+crop_dims[1]]
 31 | 
 32 |     crop_ix = np.tile(crop_ix, (2,1))
 33 | 
 34 |     crops = np.empty((10, flow_stack.shape[0], crop_dims[0], crop_dims[1]),
 35 |                      dtype=flow_stack.dtype)
 36 | 
 37 |     for ix in xrange(10):
 38 |         cp = crop_ix[ix]
 39 |         crops[ix] = flow_stack[:, cp[0]:cp[2], cp[1]:cp[3]]
 40 |     crops[5:] = crops[5:, :, :, ::-1]
 41 |     crops[5:, range(0, stack_depth, 2), ...] = 255 - crops[5:, range(0, stack_depth, 2), ...]
 42 |     return crops
 43 | 
 44 | 
 45 | def rgb_oversample(image, crop_dims):
 46 |     """
 47 |     Crop images into the four corners, center, and their mirrored versions.
 48 |     Adapted from Caffe
 49 |     Parameters
 50 |     ----------
 51 |     image : (H x W x K) ndarray
 52 |     crop_dims : (height, width) tuple for the crops.
 53 |     Returns
 54 |     -------
 55 |     crops : (10 x H x W x K) ndarray of crops.
 56 |     """
 57 |     # Dimensions and center.
 58 |     im_shape = np.array(image.shape)
 59 |     crop_dims = np.array(crop_dims)
 60 |     im_center = im_shape[:2] / 2.0
 61 | 
 62 |     # Make crop coordinates
 63 |     h_indices = (0, im_shape[0] - crop_dims[0])
 64 |     w_indices = (0, im_shape[1] - crop_dims[1])
 65 |     crops_ix = np.empty((5, 4), dtype=int)
 66 |     curr = 0
 67 |     for i in h_indices:
 68 |         for j in w_indices:
 69 |             crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
 70 |             curr += 1
 71 |     crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
 72 |         -crop_dims / 2.0,
 73 |          crop_dims / 2.0
 74 |     ])
 75 |     crops_ix = np.tile(crops_ix, (2, 1))
 76 | 
 77 |     # Extract crops
 78 |     crops = np.empty((10 , crop_dims[0], crop_dims[1],
 79 |                       im_shape[-1]), dtype=np.float32)
 80 | 
 81 |     ix = 0
 82 |     for crop in crops_ix:
 83 |         crops[ix] = image[crop[0]:crop[2], crop[1]:crop[3], :]
 84 |         ix += 1
 85 |     crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]  # flip for mirrors
 86 |     return crops
 87 | 
 88 | 
 89 | def rgb_to_parrots(frame, oversample=True, mean_val=None, crop_size=None):
 90 |     """
 91 |     Pre-process the rgb frame for Parrots input
 92 |     """
 93 |     if mean_val is None:
 94 |         mean_val = [104, 117, 123]
 95 |     if not oversample:
 96 |         ret_frame = (frame - mean_val).transpose((2, 0, 1))
 97 |         return ret_frame[None, ...]
 98 |     else:
 99 |         crops = rgb_oversample(frame, crop_size) - mean_val
100 |         ret_frames = crops.transpose((0, 3, 1, 2))
101 |         return ret_frames
102 | 
103 | 
104 | def fast_list2arr(data, offset=None, dtype=None):
105 |     """
106 |     Convert a list of numpy arrays with the same size to a large numpy array.
107 |     This is way more efficient than directly using numpy.array()
108 |     See
109 |         https://github.com/obspy/obspy/wiki/Known-Python-Issues
110 |     :param data: [numpy.array]
111 |     :param offset: array to be subtracted from the each array.
112 |     :param dtype: data type
113 |     :return: numpy.array
114 |     """
115 |     num = len(data)
116 |     out_data = np.empty((num,)+data[0].shape, dtype=dtype if dtype else data[0].dtype)
117 |     for i in xrange(num):
118 |         out_data[i] = data[i] - offset if offset else data[i]
119 |     return out_data
120 | 
121 | 


--------------------------------------------------------------------------------
/ECO-pytorch/dataset.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | 
  3 | from PIL import Image
  4 | import os
  5 | import os.path
  6 | import numpy as np
  7 | from numpy.random import randint
  8 | 
  9 | class VideoRecord(object):
 10 |     def __init__(self, row):
 11 |         self._data = row
 12 | 
 13 |     @property
 14 |     def path(self):
 15 |         return self._data[0]
 16 | 
 17 |     @property
 18 |     def num_frames(self):
 19 |         return int(self._data[1])
 20 | 
 21 |     @property
 22 |     def label(self):
 23 |         return int(self._data[2])
 24 | 
 25 | 
 26 | class TSNDataSet(data.Dataset):
 27 |     def __init__(self, root_path, list_file,
 28 |                  num_segments=3, new_length=1, modality='RGB',
 29 |                  image_tmpl='img_{:05d}.jpg', transform=None,
 30 |                  force_grayscale=False, random_shift=True, test_mode=False):
 31 | 
 32 |         self.root_path = root_path
 33 |         self.list_file = list_file
 34 |         self.num_segments = num_segments
 35 |         self.new_length = new_length
 36 |         self.modality = modality
 37 |         self.image_tmpl = image_tmpl
 38 |         self.transform = transform
 39 |         self.random_shift = random_shift
 40 |         self.test_mode = test_mode
 41 | 
 42 |         if self.modality == 'RGBDiff':
 43 |             self.new_length += 1# Diff needs one more image to calculate diff
 44 | 
 45 |         self._parse_list()
 46 | 
 47 |     def _load_image(self, directory, idx):
 48 |         if self.modality == 'RGB' or self.modality == 'RGBDiff':
 49 |             return [Image.open(os.path.join(directory, self.image_tmpl.format(idx))).convert('RGB')]
 50 |         elif self.modality == 'Flow':
 51 |             x_img = Image.open(os.path.join(directory, self.image_tmpl.format('x', idx))).convert('L')
 52 |             y_img = Image.open(os.path.join(directory, self.image_tmpl.format('y', idx))).convert('L')
 53 | 
 54 |             return [x_img, y_img]
 55 | 
 56 |     def _parse_list(self):
 57 |         self.video_list = [VideoRecord(x.strip().split(' ')) for x in open(self.list_file)]
 58 | 
 59 |     def _sample_indices(self, record):
 60 |         """
 61 | 
 62 |         :param record: VideoRecord
 63 |         :return: list
 64 |         """
 65 | 
 66 |         average_duration = (record.num_frames - self.new_length + 1) // self.num_segments
 67 |         if average_duration > 0:
 68 |             offsets = np.multiply(list(range(self.num_segments)), average_duration) + randint(average_duration, size=self.num_segments)
 69 |         elif record.num_frames > self.num_segments:
 70 |             offsets = np.sort(randint(record.num_frames - self.new_length + 1, size=self.num_segments))
 71 |         else:
 72 |             offsets = np.zeros((self.num_segments,))
 73 |         return offsets + 1
 74 | 
 75 |     def _get_val_indices(self, record):
 76 |         if record.num_frames > self.num_segments + self.new_length - 1:
 77 |             tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
 78 |             offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
 79 |         else:
 80 |             offsets = np.zeros((self.num_segments,))
 81 |         return offsets + 1
 82 | 
 83 |     def _get_test_indices(self, record):
 84 | 
 85 |         tick = (record.num_frames - self.new_length + 1) / float(self.num_segments)
 86 | 
 87 |         offsets = np.array([int(tick / 2.0 + tick * x) for x in range(self.num_segments)])
 88 | 
 89 |         return offsets + 1
 90 | 
 91 |     def __getitem__(self, index):
 92 |         record = self.video_list[index]
 93 | 
 94 |         if not self.test_mode:
 95 |             segment_indices = self._sample_indices(record) if self.random_shift else self._get_val_indices(record)
 96 |         else:
 97 |             segment_indices = self._get_test_indices(record)
 98 | 
 99 |         return self.get(record, segment_indices)
100 | 
101 |     def get(self, record, indices):
102 | 
103 |         images = list()
104 |         for seg_ind in indices:
105 |             p = int(seg_ind)
106 |             for i in range(self.new_length):
107 |                 seg_imgs = self._load_image(record.path, p)
108 |                 images.extend(seg_imgs)
109 |                 if p < record.num_frames:
110 |                     p += 1
111 | 
112 |         process_data = self.transform(images)
113 |         return process_data, record.label
114 | 
115 |     def __len__(self):
116 |         return len(self.video_list)
117 | 


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/action_parrots.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import sys
  3 | 
  4 | import cv2
  5 | from utils.io import flow_stack_oversample, rgb_to_parrots
  6 | import pyparrots.dnn as dnn
  7 | 
  8 | 
  9 | class ParrotsNet(object):
 10 | 
 11 |     def __init__(self, parrots_session_file, input_size=None):
 12 | 
 13 |         self._parrots_runner = dnn.Runner(parrots_session_file, extract=True)
 14 |         self._parrots_runner.setup()
 15 |         self._parrots_session = self._parrots_runner.session
 16 | 
 17 |         with self._parrots_session.flow('main') as flow:
 18 |             input_shape = flow.get_data_spec('data').shape[::-1]
 19 | 
 20 |         if input_size is not None:
 21 |             input_shape = input_shape[:2] + input_size
 22 | 
 23 |         self._sample_shape = input_shape
 24 |         self._channel_mean = [104, 117, 123]
 25 | 
 26 |     def predict_rgb_frame_list(self, frame_list,
 27 |                              score_name, over_sample=True,
 28 |                              multiscale=None, frame_size=None):
 29 | 
 30 |         if frame_size is not None:
 31 |             frame_list = [cv2.resize(x, frame_size) for x in frame_list]
 32 | 
 33 |         if over_sample:
 34 |             if multiscale is None:
 35 |                 os_frame = np.concatenate([rgb_to_parrots(x, mean_val=self._channel_mean,
 36 |                                           crop_size=(self._sample_shape[2], self._sample_shape[3]))
 37 |                                      for x in frame_list], axis=0)
 38 |             else:
 39 |                 os_frame = []
 40 |                 for scale in multiscale:
 41 |                     resized_frame_list = [cv2.resize(x, (0, 0), fx=1.0 / scale, fy=1.0 / scale) for x in frame_list]
 42 |                     os_frame.extend(np.concatenate([rgb_to_parrots(x, mean_val=self._channel_mean,
 43 |                                           crop_size=(self._sample_shape[2], self._sample_shape[3]))
 44 |                                     for x in resized_frame_list]))
 45 |                 os_frame = np.concatenate(os_frame, axis=0)
 46 |         else:
 47 |             os_frame = rgb_to_parrots(False)
 48 | 
 49 |         bs = self._sample_shape[0]
 50 | 
 51 |         feed_data = np.zeros(self._sample_shape)
 52 | 
 53 |         score_list = []
 54 |         for offset in xrange(0, os_frame.shape[0], bs):
 55 |             step = min(bs, os_frame.shape[0]-offset)
 56 |             feed_data[:step, ...] = os_frame[offset:offset+step, ...]
 57 | 
 58 |             with self._parrots_session.flow("main") as flow:
 59 |                 flow.set_input('data', feed_data.astype(np.float32, order='C'))
 60 |                 flow.forward()
 61 |                 score_list.append(flow.data(score_name).value().T[:step])
 62 | 
 63 |         if over_sample:
 64 |             tmp = np.concatenate(score_list, axis=0)
 65 |             return tmp.reshape((len(os_frame) / 10, 10, score_list[0].shape[-1]))
 66 |         else:
 67 |             return np.concatenate(score_list, axis=0)
 68 | 
 69 |     def predict_flow_stack_list(self, flow_stack_list, score_name, over_sample=True, frame_size=None):
 70 | 
 71 |         if frame_size is not None:
 72 |             for i in xrange(len(flow_stack_list)):
 73 |                 flow_stack_list[i] = np.array([cv2.resize(x, frame_size) for x in flow_stack_list[i]])
 74 | 
 75 |         if over_sample:
 76 |             tmp = [flow_stack_oversample(stack, (self._sample_shape[2], self._sample_shape[3]))
 77 |                                         for stack in flow_stack_list]
 78 |             os_frame = np.concatenate(tmp, axis=0)
 79 |         else:
 80 |             os_frame = np.array(flow_stack_list)
 81 | 
 82 |         os_frame -= 128
 83 | 
 84 |         bs = self._sample_shape[0]
 85 |         feed_data = np.zeros(self._sample_shape)
 86 | 
 87 |         score_list = []
 88 |         for offset in xrange(0, os_frame.shape[0], bs):
 89 |             step = min(bs, os_frame.shape[0] - offset)
 90 |             feed_data[:step, ...] = os_frame[offset:offset + step, ...]
 91 | 
 92 |             with self._parrots_session.flow("main") as flow:
 93 |                 flow.set_input('data', feed_data.astype(np.float32, order='C'))
 94 |                 flow.forward()
 95 |                 score_list.append(flow.data(score_name).value().T[:step])
 96 | 
 97 |         if over_sample:
 98 |             tmp = np.concatenate(score_list, axis=0)
 99 |             return tmp.reshape((len(os_frame) / 10, 10, score_list[0].shape[-1]))
100 |         else:
101 |             return np.concatenate(score_list, axis=0)
102 | 


--------------------------------------------------------------------------------
/ECO-pytorch/opts.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | parser = argparse.ArgumentParser(description="PyTorch implementation of ECO")
 3 | parser.add_argument('--dataset', type=str, default='UCF12',choices=['UCF12','UCFcrime','RWF','ViolentFlows','ERA','ucf101', 'hmdb51', 'kinetics', 'something','jhmdb'])
 4 | parser.add_argument('--modality', type=str,default='RGB', choices=['RGB', 'Flow', 'RGBDiff'])
 5 | parser.add_argument('--train_list', type=str,default='/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/train_split_07.txt')
 6 | parser.add_argument('--val_list', type=str,default='/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/test_split_03.txt')
 7 | parser.add_argument('--net_model', type=str, default=None)
 8 | parser.add_argument('--net_model2D', type=str, default= None)
 9 | # parser.add_argument('--net_modelECO', type=str, default='./checkpoint/UCFcrime_expansion/ECOFULL/_rgb_model_best.pth.tar')
10 | parser.add_argument('--net_modelECO', type=str, default='./model/ECO_Lite_rgb_model_Kinetics.pth.tar')
11 | parser.add_argument('--net_model3D', type=str, default=None)
12 | # ========================= Model Configs ==========================
13 | parser.add_argument('--arch', type=str, default="ECO")
14 | parser.add_argument('--num_segments', type=int, default=8)
15 | parser.add_argument('--consensus_type', type=str, default='avg',
16 |                     choices=['avg', 'max', 'topk', 'identity', 'rnn', 'cnn'])
17 | parser.add_argument('--pretrained_parts', type=str, default='finetune',
18 |                     choices=['scratch', '2D', '3D', 'both','finetune'])
19 | parser.add_argument('--k', type=int, default=3)
20 | 
21 | parser.add_argument('--dropout', '--do', default=0.6, type=float,
22 |                     metavar='DO', help='dropout ratio (default: 0.5)')
23 | parser.add_argument('--loss_type', type=str, default="nll",
24 |                     choices=['nll'])
25 | 
26 | # ========================= Learning Configs ==========================
27 | parser.add_argument('--epochs', default=60, type=int, metavar='N',
28 |                     help='number of total epochs to run')
29 | parser.add_argument('-b', '--batch-size', default=16, type=int,
30 |                     metavar='N', help='mini-batch size (default: 256)')
31 | parser.add_argument('-i', '--iter-size', default=5, type=int,
32 |                     metavar='N', help='number of iterations before on update')
33 | parser.add_argument('--lr', '--learning-rate', default=0.001, type=float,
34 |                     metavar='LR', help='initial learning rate')
35 | parser.add_argument('--lr_steps', default=[20,40], type=float, nargs="+",
36 |                     metavar='LRSteps', help='epochs to decay learning rate by 10')
37 | parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
38 |                     help='momentum')
39 | parser.add_argument('--weight-decay', '--wd', default=5e-4, type=float,
40 |                     metavar='W', help='weight decay (default: 5e-4)')
41 | parser.add_argument('--clip-gradient', '--gd', default=None, type=float,
42 |                     metavar='W', help='gradient norm clipping (default: disabled)')
43 | parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true")
44 | parser.add_argument('--nesterov',  default=False)
45 | parser.add_argument('--num_saturate', type=int, default=5,
46 |                     help='if number of epochs that validation Prec@1 saturates, then decrease lr by 10 (default: 5)')
47 | 
48 | # ========================= Monitor Configs ==========================
49 | parser.add_argument('--print_freq', '-p', default=10, type=int,
50 |                     metavar='N', help='print frequency (default: 10)')
51 | parser.add_argument('--eval-freq', '-ef', default=5, type=int,
52 |                     metavar='N', help='evaluation frequency (default: 5)')
53 | 
54 | 
55 | # ========================= Runtime Configs ==========================
56 | parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
57 |                     help='number of data loading workers (default: 4)')
58 | parser.add_argument('--resume', default='', type=str, metavar='PATH',
59 |                     help='path to latest checkpoint (default: none)')
60 | parser.add_argument('-e', '--evaluate', dest='evaluate', action='store_true',
61 |                     help='evaluate model on validation set')
62 | parser.add_argument('--snapshot_pref', type=str, default="./checkpoint/")
63 | parser.add_argument('--start-epoch', default=0, type=int, metavar='N',
64 |                     help='manual epoch number (useful on restarts)')
65 | parser.add_argument('--gpus', nargs='+', type=int, default=None)
66 | parser.add_argument('--flow_prefix', default="", type=str)
67 | parser.add_argument('--rgb_prefix', default="", type=str)
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/ECO-pytorch/transformer_model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | ## .pth 转为onnx
  4 | import numpy as np
  5 | import torch
  6 | import torchvision
  7 | import argparse
  8 | import time
  9 | from models import TSN
 10 | import warnings
 11 | import onnx
 12 | import onnxruntime
 13 | warnings.filterwarnings("ignore")
 14 | 
 15 | # options
 16 | parser = argparse.ArgumentParser(description="Standard video-level testing")
 17 | parser.add_argument('--dataset', type=str, default='ViolentFlows')
 18 | parser.add_argument('--modality', type=str, default='RGB')
 19 | parser.add_argument('--test_list', type=str,default = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/test_split_03.txt')
 20 | parser.add_argument('--weights', type=str,default ='./checkpoint/eco_ucf12_78.084/_rgb_model_best.pth.tar')
 21 | parser.add_argument('--num_class',type=int,default=12)
 22 | parser.add_argument('--split',type=str,default='train_split')
 23 | parser.add_argument('--arch', type=str, default="ECO")
 24 | parser.add_argument('--save_scores', type=str, default=None)
 25 | parser.add_argument('--test_segments', type=int, default=8)
 26 | parser.add_argument('--max_num', type=int, default=-1)
 27 | parser.add_argument('--pretrained_parts', type=str, default='finetune',
 28 |                     choices=['scratch', '2D', '3D', 'both','finetune'])
 29 | parser.add_argument('--crop_fusion_type', type=str, default='avg',
 30 |                     choices=['avg', 'max', 'topk'])
 31 | parser.add_argument('--dropout', type=float, default=0.6)
 32 | parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
 33 |                     help='number of data loading workers (default: 4)')
 34 | parser.add_argument('-b', '--batch_size', default=1, type=int,
 35 |                     metavar='N', help='mini-batch size (default: 256)')
 36 | parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true")
 37 | parser.add_argument('--gpus', nargs='+', type=int, default=None)
 38 | parser.add_argument('--flow_prefix', type=str, default='')
 39 | 
 40 | args = parser.parse_args()
 41 | def to_numpy(tensor):
 42 |     return tensor.cpu().numpy()
 43 | 
 44 | def load_torch_model():
 45 | 
 46 |     net = TSN(args.num_class, args.test_segments, args.pretrained_parts, args.modality,
 47 |               base_model=args.arch,
 48 |               consensus_type=args.crop_fusion_type, dropout=args.dropout, partial_bn=not args.no_partialbn)
 49 | 
 50 |     crop_size = net.crop_size
 51 |     scale_size = net.scale_size
 52 |     input_mean = net.input_mean
 53 |     input_std = net.input_std
 54 |     policies = net.get_optim_policies()
 55 | 
 56 |     checkpoint = torch.load(args.weights, map_location=torch.device('cuda'))
 57 |     # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
 58 | 
 59 |     base_dict = {'.'.join(k.split('.')[1:]): v for k, v in list(checkpoint['state_dict'].items())}
 60 | 
 61 |     net.load_state_dict(base_dict)
 62 |     return net
 63 | 
 64 | def torch2onnx():
 65 | 
 66 |     # net = torch.nn.DataParallel(net.cuda(0))
 67 |     net = load_torch_model()
 68 |     net = net.cuda()
 69 |     input_shape = (3, 224, 224)
 70 |     # net.eval()
 71 |     # x = torch.randn(args.batch_size, args.test_segments, *input_shape, device='cuda')
 72 |     x = torch.randn(1,args.test_segments*3,224,224,device='cuda')
 73 | 
 74 |     export_onnx_file = 'ECO_8.onnx'
 75 |     # torch.onnx.export(net,x,export_onnx_file,
 76 |     #                   export_params=True)
 77 |     torch.onnx.export(net,
 78 |                       x,
 79 |                       export_onnx_file,
 80 |                       verbose=True,
 81 |                       do_constant_folding=True)
 82 | 
 83 | def inferonnx():
 84 | 
 85 |     input_shape = (3, 224, 224)
 86 |     torch_model = load_torch_model().cuda()
 87 |     torch_model.eval()
 88 |     export_onnx_file =   'ECO_8.onnx'
 89 |     x = torch.randn(args.batch_size, args.test_segments*3, 224,224, device='cuda')
 90 |     with torch.no_grad():
 91 |         output = torch_model(x).cpu().numpy()
 92 |         print(output)
 93 | 
 94 | 
 95 |     onnx_model = onnx.load(export_onnx_file)
 96 |     onnx.checker.check_model(onnx_model)   #检查文件模型是否正确
 97 |     onnx.helper.printable_graph(onnx_model.graph)  #输出计算图
 98 |     ort_session = onnxruntime.InferenceSession(export_onnx_file)  #运行一个session
 99 | 
100 |     ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(x)}
101 |     ort_outputs = ort_session.run(None, ort_inputs)
102 |     ort_out = ort_outputs[0]
103 |     print(ort_out)
104 | 
105 |     torch_output = np.array(output.flatten(),dtype='float32')
106 |     onnx_output = np.array(np.asarray(ort_outputs).flatten(),dtype='float32')
107 |     np.testing.assert_almost_equal(torch_output,onnx_output,decimal=3)  #判断输出的float
108 | 
109 | 
110 | if __name__ =='__main__':
111 | 
112 |      # torch2onnx()
113 |     inferonnx()


--------------------------------------------------------------------------------
/ECO-pytorch/gen_dataset_lists.py:
--------------------------------------------------------------------------------
  1 | # processing the raw data of the video datasets (something-something and jester)
  2 | # generate the meta files:
  3 | #   category.txt:               the list of categories.
  4 | #   train_videofolder.txt:      each row contains [videoname num_frames classIDX]
  5 | #   val_videofolder.txt:        same as above
  6 | #
  7 | # Created by: Can Zhang
  8 | # github: @zhang-can, May,27th 2018
  9 | #
 10 | 
 11 | import argparse
 12 | import os
 13 | 
 14 | parser = argparse.ArgumentParser()
 15 | parser.add_argument('dataset', type=str, choices=['something', 'jester', 'ucf101', 'hmdb51', 'activitynet_1.2', 'activitynet_1.3'])
 16 | parser.add_argument('frame_path', type=str, help="root directory holding the frames")
 17 | parser.add_argument('--labels_path', type=str, default='data/dataset_labels/', help="root directory holding the 20bn csv files: labels, train & validation")
 18 | parser.add_argument('--out_list_path', type=str, default='data/')
 19 | parser.add_argument('--rgb_prefix', type=str, help="prefix of RGB frames", default='img_')
 20 | parser.add_argument('--flow_x_prefix', type=str, help="prefix of x direction flow images", default='flow_x')
 21 | parser.add_argument('--flow_y_prefix', type=str, help="prefix of y direction flow images", default='flow_y')
 22 | parser.add_argument('--num_split', type=int, default=3)
 23 | parser.add_argument('--shuffle', action='store_true', default=True)
 24 | 
 25 | args = parser.parse_args()
 26 | 
 27 | dataset = args.dataset
 28 | labels_path = args.labels_path
 29 | frame_path = args.frame_path
 30 | 
 31 | if dataset == 'something':
 32 | 
 33 |     import pdb
 34 | 
 35 |     dataset_name = 'something-something-v1'
 36 | 
 37 |     print('\nProcessing dataset: {}\n'.format(dataset))
 38 | 
 39 |     print('- Generating {}_category.txt ......'.format(dataset))
 40 |     with open(os.path.join(labels_path, '{}-labels.csv'.format(dataset_name))) as f:
 41 |         lines = f.readlines()
 42 |     categories = []
 43 |     for line in lines:
 44 |         line = line.rstrip()
 45 |         categories.append(line)
 46 |     categories = sorted(categories)
 47 |     open(os.path.join(args.out_list_path, '{}_category.txt'.format(dataset)),'w').write('\n'.join(categories))
 48 |     print('- Saved as:', os.path.join(args.out_list_path, '{}_category.txt!\n'.format(dataset)))
 49 | 
 50 |     dict_categories = {}
 51 |     for i, category in enumerate(categories):
 52 |         dict_categories[category] = i
 53 | 
 54 |     files_input = ['{}-validation.csv'.format(dataset_name),'{}-train.csv'.format(dataset_name)]
 55 |     files_output = ['{}_val.txt'.format(dataset),'{}_train.txt'.format(dataset)]
 56 |     for (filename_input, filename_output) in zip(files_input, files_output):
 57 |         with open(os.path.join(labels_path, filename_input)) as f:
 58 |             lines = f.readlines()
 59 |         folders = []
 60 |         idx_categories = []
 61 |         for line in lines:
 62 |             line = line.rstrip()
 63 |             items = line.split(';')
 64 |             folders.append(items[0])
 65 |             idx_categories.append(os.path.join(str(dict_categories[items[1]])))
 66 |         output = []
 67 |         for i in range(len(folders)):
 68 |             curFolder = folders[i]
 69 |             curIDX = idx_categories[i]
 70 |             # counting the number of frames in each video folders
 71 |             dir_files = os.listdir(os.path.join(frame_path, curFolder))
 72 |             output.append('{} {} {}'.format(os.path.join(frame_path, curFolder), len(dir_files), curIDX))
 73 |             if i % 1000 == 0:
 74 |                 print('- Generating {} ({}/{})'.format(filename_output, i, len(folders)))
 75 |         with open(os.path.join(args.out_list_path, filename_output),'w') as f:
 76 |             f.write('\n'.join(output))
 77 |         print('- Saved as:', os.path.join(args.out_list_path, '{}!\n'.format(filename_output)))
 78 | 
 79 | elif dataset == 'ucf101':
 80 | 
 81 |     import sys
 82 | 
 83 |     from pyActionRecog import parse_directory, build_split_list
 84 |     from pyActionRecog import parse_split_file
 85 | 
 86 |     rgb_p = args.rgb_prefix
 87 |     flow_x_p = args.flow_x_prefix
 88 |     flow_y_p = args.flow_y_prefix
 89 |     num_split = args.num_split
 90 |     out_path = args.out_list_path
 91 |     shuffle = args.shuffle
 92 | 
 93 |     # operation
 94 |     print('\nProcessing dataset {}:\n'.format(dataset))
 95 |     split_tp = parse_split_file(dataset)
 96 |     f_info = parse_directory(frame_path, rgb_p, flow_x_p, flow_y_p)
 97 | 
 98 |     print('- Writing list files for training/testing')
 99 |     for i in range(max(num_split, len(split_tp))):
100 |         lists = build_split_list(split_tp, f_info, i, shuffle)
101 |         open(os.path.join(out_path, '{}_rgb_train_split_{}.txt'.format(dataset, i+1)), 'w').writelines(lists[0][0])
102 |         open(os.path.join(out_path, '{}_rgb_val_split_{}.txt'.format(dataset, i+1)), 'w').writelines(lists[0][1])
103 | 
104 |     print('- List files successfully saved to "data/" folder!\n')
105 | 
106 | else:
107 |     print('"{}" dataset have not been tested yet!'.format(dataset))


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/benchmark_db.py:
--------------------------------------------------------------------------------
  1 | import glob
  2 | import fnmatch
  3 | import os
  4 | import random
  5 | from .anet_db import ANetDB
  6 | 
  7 | 
  8 | def parse_directory(path, rgb_prefix='img_', flow_x_prefix='flow_x_', flow_y_prefix='flow_y_'):
  9 |     """
 10 |     Parse directories holding extracted frames from standard benchmarks
 11 |     """
 12 |     print('- Parse frames under folder {}\n'.format(path))
 13 |     frame_folders = glob.glob(os.path.join(path, '*'))
 14 | 
 15 |     def count_files(directory, prefix_list):
 16 |         lst = os.listdir(directory)
 17 |         cnt_list = [len(fnmatch.filter(lst, x+'*')) for x in prefix_list]
 18 |         return cnt_list
 19 | 
 20 |     # check RGB
 21 |     rgb_counts = {}
 22 |     flow_counts = {}
 23 |     dir_dict = {}
 24 |     for i,f in enumerate(frame_folders):
 25 |         all_cnt = count_files(f, (rgb_prefix, flow_x_prefix, flow_y_prefix))
 26 |         k = f.split('/')[-1]
 27 |         rgb_counts[k] = all_cnt[0]
 28 |         dir_dict[k] = f
 29 | 
 30 |         x_cnt = all_cnt[1]
 31 |         y_cnt = all_cnt[2]
 32 |         if x_cnt != y_cnt:
 33 |             raise ValueError('x and y direction have different number of flow images. video: '+f)
 34 |         flow_counts[k] = x_cnt
 35 |         if i % 1000 == 0:
 36 |             print('- {}/{} videos parsed'.format(i, len(frame_folders)))
 37 | 
 38 |     print('\n- Frame folder analysis done')
 39 |     return dir_dict, rgb_counts, flow_counts
 40 | 
 41 | 
 42 | def build_split_list(split_tuple, frame_info, split_idx, shuffle=False):
 43 |     split = split_tuple[split_idx]
 44 | 
 45 |     def build_set_list(set_list):
 46 |         rgb_list, flow_list = list(), list()
 47 |         for item in set_list:
 48 |             frame_dir = frame_info[0][item[0]]
 49 |             rgb_cnt = frame_info[1][item[0]]
 50 |             flow_cnt = frame_info[2][item[0]]
 51 |             rgb_list.append('{} {} {}\n'.format(frame_dir, rgb_cnt, item[1]))
 52 |             flow_list.append('{} {} {}\n'.format(frame_dir, flow_cnt, item[1]))
 53 |         if shuffle:
 54 |             random.shuffle(rgb_list)
 55 |             random.shuffle(flow_list)
 56 |         return rgb_list, flow_list
 57 | 
 58 |     train_rgb_list, train_flow_list = build_set_list(split[0])
 59 |     test_rgb_list, test_flow_list = build_set_list(split[1])
 60 |     return (train_rgb_list, test_rgb_list), (train_flow_list, test_flow_list)
 61 | 
 62 | 
 63 | ## Dataset specific split file parse
 64 | def parse_ucf_splits():
 65 |     class_ind = [x.strip().split() for x in open('data/ucf101_splits/classInd.txt')]
 66 |     class_mapping = {x[1]:int(x[0])-1 for x in class_ind}
 67 | 
 68 |     def line2rec(line):
 69 |         items = line.strip().split('/')
 70 |         label = class_mapping[items[0]]
 71 |         vid = items[1].split('.')[0]
 72 |         return vid, label
 73 | 
 74 |     splits = []
 75 |     for i in range(1, 4):
 76 |         train_list = [line2rec(x) for x in open('data/ucf101_splits/trainlist{:02d}.txt'.format(i))]
 77 |         test_list = [line2rec(x) for x in open('data/ucf101_splits/testlist{:02d}.txt'.format(i))]
 78 |         splits.append((train_list, test_list))
 79 |     return splits
 80 | 
 81 | 
 82 | def parse_hmdb51_splits():
 83 |     # load split file
 84 |     class_files = glob.glob('data/hmdb51_splits/*split*.txt')
 85 | 
 86 |     # load class list
 87 |     class_list = [x.strip() for x in open('data/hmdb51_splits/class_list.txt')]
 88 |     class_dict = {x: i for i, x in enumerate(class_list)}
 89 | 
 90 |     def parse_class_file(filename):
 91 |         # parse filename parts
 92 |         filename_parts = filename.split('/')[-1][:-4].split('_')
 93 |         split_id = int(filename_parts[-1][-1])
 94 |         class_name = '_'.join(filename_parts[:-2])
 95 | 
 96 |         # parse class file contents
 97 |         contents = [x.strip().split() for x in open(filename).readlines()]
 98 |         train_videos = [ln[0][:-4] for ln in contents if ln[1] == '1']
 99 |         test_videos = [ln[0][:-4] for ln in contents if ln[1] == '2']
100 | 
101 |         return class_name, split_id, train_videos, test_videos
102 | 
103 |     class_info_list = map(parse_class_file, class_files)
104 | 
105 |     splits = []
106 |     for i in range(1, 4):
107 |         train_list = [
108 |             (vid, class_dict[cls[0]]) for cls in class_info_list for vid in cls[2] if cls[1] == i
109 |         ]
110 |         test_list = [
111 |             (vid, class_dict[cls[0]]) for cls in class_info_list for vid in cls[3] if cls[1] == i
112 |         ]
113 |         splits.append((train_list, test_list))
114 |     return splits
115 | 
116 | 
117 | def parse_activitynet_splits(version):
118 |     db = ANetDB.get_db(version)
119 |     train_instance = db.get_subset_instance('training')
120 |     val_instance = db.get_subset_instance('validation')
121 |     test_instance = db.get_subset_videos('testing')
122 | 
123 |     splits = []
124 | 
125 |     train_list = [(x.name, x.num_label) for x in train_instance]
126 |     val_list = [(x.name, x.num_label) for x in val_instance]
127 |     test_list = [(x.id, 0) for x in test_instance]
128 | 
129 |     splits.append((train_list, val_list))
130 |     splits.append((train_list + val_list, test_list))
131 | 
132 |     return splits


--------------------------------------------------------------------------------
/ECO-pytorch/pyActionRecog/anet_db.py:
--------------------------------------------------------------------------------
  1 | from .utils import *
  2 | 
  3 | 
  4 | class Instance(object):
  5 |     """
  6 |     Representing an instance of activity in the videos
  7 |     """
  8 | 
  9 |     def __init__(self, idx, anno, vid_id, vid_info, name_num_mapping):
 10 |         self._starting, self._ending = anno['segment'][0], anno['segment'][1]
 11 |         self._str_label = anno['label']
 12 |         self._total_duration = vid_info['duration']
 13 |         self._idx = idx
 14 |         self._vid_id = vid_id
 15 |         self._file_path = None
 16 | 
 17 |         if name_num_mapping:
 18 |             self._num_label = name_num_mapping[self._str_label]
 19 | 
 20 |     @property
 21 |     def time_span(self):
 22 |         return self._starting, self._ending
 23 | 
 24 |     @property
 25 |     def covering_ratio(self):
 26 |         return self._starting / float(self._total_duration), self._ending / float(self._total_duration)
 27 | 
 28 |     @property
 29 |     def num_label(self):
 30 |         return self._num_label
 31 | 
 32 |     @property
 33 |     def label(self):
 34 |         return self._str_label
 35 | 
 36 |     @property
 37 |     def name(self):
 38 |         return '{}_{}'.format(self._vid_id, self._idx)
 39 | 
 40 |     @property
 41 |     def path(self):
 42 |         if self._file_path is None:
 43 |             raise ValueError("This instance is not associated to a file on disk. Maybe the file is missing?")
 44 |         return self._file_path
 45 | 
 46 |     @path.setter
 47 |     def path(self, path):
 48 |         self._file_path = path
 49 | 
 50 | 
 51 | class Video(object):
 52 |     """
 53 |     This class represents one video in the activity-net db
 54 |     """
 55 |     def __init__(self, key, info, name_idx_mapping=None):
 56 |         self._id = key
 57 |         self._info_dict = info
 58 |         self._instances = [Instance(i, x, self._id, self._info_dict, name_idx_mapping)
 59 |                            for i, x in enumerate(self._info_dict['annotations'])]
 60 |         self._file_path = None
 61 | 
 62 |     @property
 63 |     def id(self):
 64 |         return self._id
 65 | 
 66 |     @property
 67 |     def url(self):
 68 |         return self._info_dict['url']
 69 | 
 70 |     @property
 71 |     def instances(self):
 72 |         return self._instances
 73 | 
 74 |     @property
 75 |     def duration(self):
 76 |         return self._info_dict['duration']
 77 | 
 78 |     @property
 79 |     def subset(self):
 80 |         return self._info_dict['subset']
 81 | 
 82 |     @property
 83 |     def instance(self):
 84 |         return self._instances
 85 | 
 86 |     @property
 87 |     def path(self):
 88 |         if self._file_path is None:
 89 |             raise ValueError("This video is not associated to a file on disk. Maybe the file is missing?")
 90 |         return self._file_path
 91 | 
 92 |     @path.setter
 93 |     def path(self, path):
 94 |         self._file_path = path
 95 | 
 96 | 
 97 | class ANetDB(object):
 98 |     """
 99 |     This class is the abstraction of the activity-net db
100 |     """
101 | 
102 |     _CONSTRUCTOR_LOCK = object()
103 | 
104 |     def __init__(self, token):
105 |         """
106 |         Disabled constructor
107 |         :param token:
108 |         :return:
109 |         """
110 |         if token is not self._CONSTRUCTOR_LOCK:
111 |             raise ValueError("Use get_db to construct an instance, do not directly use the constructor")
112 | 
113 |     @classmethod
114 |     def get_db(cls, version="1.2"):
115 |         """
116 |         Build the internal representation of Activity Net databases
117 |         We use the alphabetic order to transfer the label string to its numerical index in learning
118 |         :param version:
119 |         :return:
120 |         """
121 |         if version not in ["1.2","1.3"]:
122 |             raise ValueError("Unsupported database version {}".format(version))
123 | 
124 |         import os
125 |         raw_db_file = os.path.join("data/activitynet_splits",
126 |                                    "activity_net.v{}.min.json".format(version.replace('.', '-')))
127 | 
128 |         import json
129 |         db_data = json.load(open(raw_db_file))
130 | 
131 |         me = cls(cls._CONSTRUCTOR_LOCK)
132 |         me.version = version
133 |         me.prepare_data(db_data)
134 | 
135 |         return me
136 | 
137 |     def prepare_data(self, raw_db):
138 |         self._version = raw_db['version']
139 | 
140 |         # deal with taxonomy
141 |         self._taxonomy = raw_db['taxonomy']
142 |         self._parse_taxonomy()
143 | 
144 |         self._database = raw_db['database']
145 |         self._video_dict = {k: Video(k, v, self._name_idx_table) for k,v in self._database.items()}
146 | 
147 |         # split testing/training/validation set
148 |         self._testing_dict = {k: v for k, v in self._video_dict.items() if v.subset == 'testing'}
149 |         self._training_dict = {k: v for k, v in self._video_dict.items() if v.subset == 'training'}
150 |         self._validation_dict = {k: v for k, v in self._video_dict.items() if v.subset == 'validation'}
151 | 
152 |         self._training_inst_dict = {i.name: i for v in self._training_dict.values() for i in v.instances}
153 |         self._validation_inst_dict = {i.name: i for v in self._validation_dict.values() for i in v.instances}
154 | 
155 |     def get_subset_videos(self, subset_name):
156 |         if subset_name == 'training':
157 |             return self._training_dict.values()
158 |         elif subset_name == 'validation':
159 |             return self._validation_dict.values()
160 |         elif subset_name == 'testing':
161 |             return self._testing_dict.values()
162 |         else:
163 |             raise ValueError("Unknown subset {}".format(subset_name))
164 | 
165 |     def get_subset_instance(self, subset_name):
166 |         if subset_name == 'training':
167 |             return self._training_inst_dict.values()
168 |         elif subset_name == 'validation':
169 |             return self._validation_inst_dict.values()
170 |         else:
171 |             raise ValueError("Unknown subset {}".format(subset_name))
172 | 
173 |     def get_ordered_label_list(self):
174 |         return [self._idx_name_table[x] for x in sorted(self._idx_name_table.keys())]
175 | 
176 |     def _parse_taxonomy(self):
177 |         """
178 |         This function just parse the taxonomy file
179 |         It gives alphabetical ordered indices to the classes in competition
180 |         :return:
181 |         """
182 |         name_dict = {x['nodeName']: x for x in self._taxonomy}
183 |         parents = set()
184 |         for x in self._taxonomy:
185 |             parents.add(x['parentName'])
186 | 
187 |         # leaf nodes are those without any child
188 |         leaf_nodes = [name_dict[x] for x
189 |                       in list(set(name_dict.keys()).difference(parents))]
190 |         sorted_lead_nodes = sorted(leaf_nodes, key=lambda l: l['nodeName'])
191 |         self._idx_name_table = {i: e['nodeName'] for i, e in enumerate(sorted_lead_nodes)}
192 |         self._name_idx_table = {e['nodeName']: i for i, e in enumerate(sorted_lead_nodes)}
193 |         self._name_table = {x['nodeName']: x for x in sorted_lead_nodes}
194 | 
195 | 
196 | if __name__ == '__main__':
197 |     db = ANetDB.get_db("1.3")
198 | 


--------------------------------------------------------------------------------
/ECO-pytorch/onnx_infer.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import torch.nn.parallel
  6 | import torch.optim
  7 | from sklearn.metrics import confusion_matrix
  8 | 
  9 | from dataset import TSNDataSet
 10 | from models import TSN
 11 | from transforms import *
 12 | import seaborn as sns
 13 | import matplotlib.pyplot as plt
 14 | import matplotlib
 15 | import onnx
 16 | import onnxruntime
 17 | matplotlib.use('Agg')
 18 | import torch.nn.functional as F
 19 | 
 20 | import warnings
 21 | 
 22 | warnings.filterwarnings("ignore")
 23 | 
 24 | # options
 25 | parser = argparse.ArgumentParser(
 26 |     description="Standard video-level testing")
 27 | parser.add_argument('--dataset', type=str, default='UCF12')
 28 | parser.add_argument('--modality', type=str, default='RGB')
 29 | parser.add_argument('--test_list', type=str,default = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/test_split_03.txt')
 30 | parser.add_argument('--weights', type=str,default ='./checkpoint/eco_ucf12_78.084/_rgb_model_best.pth.tar')
 31 | parser.add_argument('--split',type=str,default='train07_test03_split')
 32 | parser.add_argument('--arch', type=str, default="ECO")
 33 | parser.add_argument('--save_scores', type=str, default=None)
 34 | parser.add_argument('--test_segments', type=int, default=8)
 35 | parser.add_argument('--max_num', type=int, default=-1)
 36 | parser.add_argument('--pretrained_parts', type=str, default='finetune',
 37 |                     choices=['scratch', '2D', '3D', 'both','finetune'])
 38 | parser.add_argument('--crop_fusion_type', type=str, default='avg',
 39 |                     choices=['avg', 'max', 'topk'])
 40 | parser.add_argument('--dropout', type=float, default=0.6)
 41 | parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
 42 |                     help='nupip mber of data loading workers (default: 4)')
 43 | parser.add_argument('-b', '--batch-size', default=1, type=int,
 44 |                     metavar='N', help='mini-batch size (default: 256)')
 45 | parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true")
 46 | parser.add_argument('--gpus', nargs='+', type=int, default=None)
 47 | parser.add_argument('--flow_prefix', type=str, default='')
 48 | def to_numpy(tensor):
 49 |     return tensor.cpu().numpy()
 50 | args = parser.parse_args()
 51 | labels = ['Demonstration','Escape','Explosion','Fighting','Fire','Gather','Normal','Revolt','RoadAccidents','Shooting','Stampede','StreetRobbery']
 52 | def main():
 53 | 
 54 |     num_class = 12
 55 | 
 56 |     net = TSN(num_class, args.test_segments, args.pretrained_parts, args.modality,
 57 |                     base_model=args.arch,
 58 |                     consensus_type=args.crop_fusion_type, dropout=args.dropout, partial_bn=not args.no_partialbn)
 59 | 
 60 |     crop_size = net.crop_size
 61 |     scale_size = net.scale_size
 62 |     input_mean = net.input_mean
 63 |     input_std = net.input_std
 64 |     policies = net.get_optim_policies()
 65 | 
 66 | 
 67 |     if args.modality != 'RGBDiff':
 68 | 
 69 |         normalize = GroupNormalize(input_mean, input_std)
 70 |     else:
 71 |         normalize = IdentityTransform()
 72 | 
 73 | 
 74 |     data_loader = torch.utils.data.DataLoader(
 75 |         TSNDataSet("", args.test_list, num_segments=args.test_segments,
 76 |                    new_length=1,
 77 |                    modality=args.modality,
 78 |                    image_tmpl="{:05d}.jpg" if args.modality in ['RGB','RGBDiff'] else args.flow_prefix + "{}_{:05d}.jpg",
 79 |                    random_shift=False,
 80 |                    transform=torchvision.transforms.Compose([
 81 |                        GroupScale(int(scale_size)),
 82 |                        GroupCenterCrop(crop_size),
 83 |                        Stack(roll=True),
 84 |                        ToTorchFormatTensor(div=False),
 85 |                        #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')),
 86 |                        #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')),
 87 |                        normalize,
 88 |                    ])),
 89 |         batch_size=args.batch_size, shuffle=False,
 90 |         num_workers=args.workers, pin_memory=True)
 91 | 
 92 |     export_onnx_file = 'ECO_8.onnx'
 93 |     # torch.onnx.export(net,x,export_onnx_file,
 94 |     #                   export_params=True)
 95 |     # input_shape = (3, 224, 224)
 96 | 
 97 |     # x = torch.randn(args.batch_size, args.test_segments, *input_shape)
 98 |     # onnx_model = onnx.load(export_onnx_file)
 99 |     # onnx.checker.check_model(onnx_model)
100 |     ort_session = onnxruntime.InferenceSession(export_onnx_file)
101 | 
102 | 
103 | 
104 | 
105 |     data_gen = enumerate(data_loader)
106 | 
107 |     total_num = len(data_loader.dataset)
108 | 
109 |     proc_start_time = time.time()
110 |     max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset)
111 |     video_pred = []
112 |     video_labels = []
113 |     for i, (data, label) in data_gen:
114 |         if i >= max_num:
115 |             break
116 |         #data = data.view(1,16,3,224,224).cuda()
117 |         print(data.size())
118 |         data = data.cuda()
119 |         ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(data)}
120 |         ort_outputs = ort_session.run(None, ort_inputs)
121 |         ort_out = torch.from_numpy(ort_outputs[0])
122 |         # print(ort_out)
123 |         softmax_output = F.softmax(ort_out)
124 |         # rst = eval_video(i, data, label,net,num_class)
125 |         # softmax_output = F.softmax(rst[1])
126 |         print(softmax_output.size())
127 |         _, pred = softmax_output.topk(1, 1, True, True)
128 |         pred_text = labels[pred.item()]
129 |         label_text = labels[label.item()]
130 |         video_pred.append(pred.squeeze(1).data.cpu())
131 |         video_labels.append(label)
132 |         cnt_time = time.time() - proc_start_time
133 |         print('video {} done, total {}/{}, average {:5f} sec/video,true:{} predict: {} score:{:5f}'.format(i+1, i + 1,
134 |                                                                         total_num,
135 |                                                                         float(cnt_time) / (i + 1),
136 |                                                                         label_text,
137 |                                                                         pred_text,
138 |                                                                         _.item()))
139 | 
140 | 
141 |     # print(video_pred)
142 |     # print(video_labels)
143 |     cf = confusion_matrix(video_labels, video_pred).astype(float)
144 |     # print(type(cf))
145 |     #
146 | 
147 |     cls_cnt = cf.sum(axis=1)
148 | 
149 |     cls_cnt_expand = np.expand_dims(cls_cnt,0).repeat(cf.shape[0],axis=0).T
150 |     # print(cls_cnt_expand)
151 |     # print(cls_cnt_expand.shape)
152 |     acc_cf = cf / (cls_cnt_expand + np.ones((cls_cnt_expand.shape[0],cls_cnt_expand.shape[0]))*0.001)
153 |     cls_hit = np.diag(cf)
154 | 
155 |     cls_acc = cls_hit / (cls_cnt + np.ones(cls_cnt.shape[0])*0.001)
156 |     # print(cls_acc)
157 |     test_acc = np.sum(cls_hit) / np.sum(cls_cnt+ np.ones(cls_cnt.shape[0])*0.01)
158 |     print('TestAccuracy {:.02f}%'.format(test_acc * 100))
159 | 
160 |     f,ax = plt.subplots(figsize=(10, 10))
161 |     sns.heatmap(data = cf,square=True,annot=True)
162 |     ax.set_title('{} Accuracy:{:.02f}%'.format(args.split,test_acc*100))
163 |     f.savefig('./output_number.jpg')
164 |     plt.show()
165 | 
166 |     f1, ax1 = plt.subplots(figsize=(10, 10))
167 |     sns.heatmap(data=acc_cf, square=True, annot=True)
168 |     ax1.set_title('{} Accuracy:{:.02f}%'.format(args.split, test_acc * 100))
169 |     f1.savefig('./output_acc.jpg')
170 |     plt.show()
171 | 
172 | def eval_video(i,data,label,net,num_class,):
173 |     # i, data, label = video_data
174 |     # num_crop = args.test_crops
175 | 
176 |     if args.modality == 'RGB':
177 |         length = 3
178 |     elif args.modality == 'Flow':
179 |         length = 10
180 |     elif args.modality == 'RGBDiff':
181 |         length = 18
182 |     else:
183 |         raise ValueError("Unknown modality "+args.modality)
184 | 
185 |     rst = net(data)
186 | 
187 |     return i,rst,label
188 | 
189 | 
190 | 
191 | 
192 | 
193 | if __name__ == '__main__':
194 |     main()
195 | 
196 | 
197 | 
198 | 


--------------------------------------------------------------------------------
/ECO-pytorch/data/dataset_labels/something-something-v1-labels.csv:
--------------------------------------------------------------------------------
  1 | Holding something
  2 | Turning something upside down
  3 | Turning the camera left while filming something
  4 | Stacking number of something
  5 | Turning the camera right while filming something
  6 | Opening something
  7 | Approaching something with your camera
  8 | Picking something up
  9 | Pushing something so that it almost falls off but doesn't
 10 | Folding something
 11 | Moving something away from the camera
 12 | Closing something
 13 | Moving away from something with your camera
 14 | Turning the camera downwards while filming something
 15 | Pushing something so that it slightly moves
 16 | Turning the camera upwards while filming something
 17 | Pretending to pick something up
 18 | Showing something to the camera
 19 | Moving something up
 20 | Plugging something into something
 21 | Unfolding something
 22 | Putting something onto something
 23 | Showing that something is empty
 24 | Pretending to put something on a surface
 25 | Taking something from somewhere
 26 | Putting something next to something
 27 | Moving something towards the camera
 28 | Showing a photo of something to the camera
 29 | Pushing something with something
 30 | Throwing something
 31 | Pushing something from left to right
 32 | Something falling like a feather or paper
 33 | Throwing something in the air and letting it fall
 34 | Throwing something against something
 35 | Lifting something with something on it
 36 | Taking one of many similar things on the table
 37 | Showing something behind something
 38 | Putting something into something
 39 | Tearing something just a little bit
 40 | Moving something away from something
 41 | Tearing something into two pieces
 42 | Pushing something from right to left
 43 | Holding something next to something
 44 | Putting something, something and something on the table
 45 | Pretending to take something from somewhere
 46 | Moving something closer to something
 47 | Pretending to put something next to something
 48 | Uncovering something
 49 | Something falling like a rock
 50 | Putting something and something on the table
 51 | Pouring something into something
 52 | Moving something down
 53 | Pulling something from right to left
 54 | Throwing something in the air and catching it
 55 | Tilting something with something on it until it falls off
 56 | Putting something in front of something
 57 | Pretending to turn something upside down
 58 | Putting something on a surface
 59 | Pretending to throw something
 60 | Showing something on top of something
 61 | Covering something with something
 62 | Squeezing something
 63 | Putting something similar to other things that are already on the table
 64 | Lifting up one end of something, then letting it drop down
 65 | Taking something out of something
 66 | Moving part of something
 67 | Pulling something from left to right
 68 | Lifting something up completely without letting it drop down
 69 | Attaching something to something
 70 | Putting something behind something
 71 | Moving something and something closer to each other
 72 | Holding something in front of something
 73 | Pushing something so that it falls off the table
 74 | Holding something over something
 75 | Pretending to open something without actually opening it
 76 | Removing something, revealing something behind
 77 | Hitting something with something
 78 | Moving something and something away from each other
 79 | Touching (without moving) part of something
 80 | Pretending to put something into something
 81 | Showing that something is inside something
 82 | Lifting something up completely, then letting it drop down
 83 | Pretending to take something out of something
 84 | Holding something behind something
 85 | Laying something on the table on its side, not upright
 86 | Poking something so it slightly moves
 87 | Pretending to close something without actually closing it
 88 | Putting something upright on the table
 89 | Dropping something in front of something
 90 | Dropping something behind something
 91 | Lifting up one end of something without letting it drop down
 92 | Rolling something on a flat surface
 93 | Throwing something onto a surface
 94 | Showing something next to something
 95 | Dropping something onto something
 96 | Stuffing something into something
 97 | Dropping something into something
 98 | Piling something up
 99 | Letting something roll along a flat surface
100 | Twisting something
101 | Spinning something that quickly stops spinning
102 | Putting number of something onto something
103 | Putting something underneath something
104 | Moving something across a surface without it falling down
105 | Plugging something into something but pulling it right out as you remove your hand
106 | Dropping something next to something
107 | Poking something so that it falls over
108 | Spinning something so it continues spinning
109 | Poking something so lightly that it doesn't or almost doesn't move
110 | Wiping something off of something
111 | Moving something across a surface until it falls down
112 | Pretending to poke something
113 | Putting something that cannot actually stand upright upright on the table, so it falls on its side
114 | Pulling something out of something
115 | Scooping something up with something
116 | Pretending to be tearing something that is not tearable
117 | Burying something in something
118 | Tipping something over
119 | Tilting something with something on it slightly so it doesn't fall down
120 | Pretending to put something onto something
121 | Bending something until it breaks
122 | Letting something roll down a slanted surface
123 | Trying to bend something unbendable so nothing happens
124 | Bending something so that it deforms
125 | Digging something out of something
126 | Pretending to put something underneath something
127 | Putting something on a flat surface without letting it roll
128 | Putting something on the edge of something so it is not supported and falls down
129 | Spreading something onto something
130 | Pretending to put something behind something
131 | Sprinkling something onto something
132 | Something colliding with something and both come to a halt
133 | Pushing something off of something
134 | Putting something that can't roll onto a slanted surface, so it stays where it is
135 | Lifting a surface with something on it until it starts sliding down
136 | Pretending or failing to wipe something off of something
137 | Trying but failing to attach something to something because it doesn't stick
138 | Pulling something from behind of something
139 | Pushing something so it spins
140 | Pouring something onto something
141 | Pulling two ends of something but nothing happens
142 | Moving something and something so they pass each other
143 | Pretending to sprinkle air onto something
144 | Putting something that can't roll onto a slanted surface, so it slides down
145 | Something colliding with something and both are being deflected
146 | Pretending to squeeze something
147 | Pulling something onto something
148 | Putting something onto something else that cannot support it so it falls down
149 | Lifting a surface with something on it but not enough for it to slide down
150 | Pouring something out of something
151 | Moving something and something so they collide with each other
152 | Tipping something with something in it over, so something in it falls out
153 | Letting something roll up a slanted surface, so it rolls back down
154 | Pretending to scoop something up with something
155 | Pretending to pour something out of something, but something is empty
156 | Pulling two ends of something so that it gets stretched
157 | Failing to put something into something because something does not fit
158 | Pretending or trying and failing to twist something
159 | Trying to pour something into something, but missing so it spills next to it
160 | Something being deflected from something
161 | Poking a stack of something so the stack collapses
162 | Spilling something onto something
163 | Pulling two ends of something so that it separates into two pieces
164 | Pouring something into something until it overflows
165 | Pretending to spread air onto something
166 | Twisting (wringing) something wet until water comes out
167 | Poking a hole into something soft
168 | Spilling something next to something
169 | Poking a stack of something without the stack collapsing
170 | Putting something onto a slanted surface but it doesn't glide down
171 | Pushing something onto something
172 | Poking something so that it spins around
173 | Spilling something behind something
174 | Poking a hole into some substance
175 | 


--------------------------------------------------------------------------------
/ECO-pytorch/test_model_Violence.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import time
  3 | 
  4 | import numpy as np
  5 | import torch.nn.parallel
  6 | import torch.optim
  7 | from sklearn.metrics import confusion_matrix
  8 | 
  9 | from dataset import TSNDataSet
 10 | from models import TSN
 11 | from transforms import *
 12 | import seaborn as sns
 13 | import matplotlib.pyplot as plt
 14 | import matplotlib
 15 | matplotlib.use('Agg')
 16 | import torch.nn.functional as F
 17 | 
 18 | import warnings
 19 | 
 20 | warnings.filterwarnings("ignore")
 21 | 
 22 | # options
 23 | parser = argparse.ArgumentParser(
 24 |     description="Standard video-level testing")
 25 | parser.add_argument('--dataset', type=str, default='UCF12')
 26 | parser.add_argument('--modality', type=str, default='RGB')
 27 | parser.add_argument('--test_list', type=str,default = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/test_split_03.txt')
 28 | parser.add_argument('--weights', type=str,default ='./checkpoint/UCF12_07_03_order/_rgb_model_best.pth.tar')
 29 | parser.add_argument('--split',type=str,default='train07_test03_split')
 30 | parser.add_argument('--arch', type=str, default="ECOfull")
 31 | parser.add_argument('--save_scores', type=str, default=None)
 32 | parser.add_argument('--test_segments', type=int, default=8)
 33 | parser.add_argument('--max_num', type=int, default=-1)
 34 | parser.add_argument('--pretrained_parts', type=str, default='finetune',
 35 |                     choices=['scratch', '2D', '3D', 'both','finetune'])
 36 | parser.add_argument('--crop_fusion_type', type=str, default='avg',
 37 |                     choices=['avg', 'max', 'topk'])
 38 | parser.add_argument('--dropout', type=float, default=0.6)
 39 | parser.add_argument('-j', '--workers', default=2, type=int, metavar='N',
 40 |                     help='number of data loading workers (default: 4)')
 41 | parser.add_argument('-b', '--batch-size', default=1, type=int,
 42 |                     metavar='N', help='mini-batch size (default: 256)')
 43 | parser.add_argument('--no_partialbn', '--npb', default=False, action="store_true")
 44 | parser.add_argument('--gpus', nargs='+', type=int, default=None)
 45 | parser.add_argument('--flow_prefix', type=str, default='')
 46 | 
 47 | args = parser.parse_args()
 48 | labels = ['Demonstration','Escape','Explosion','Fighting','Fire','Gather','Normal','Revolt','RoadAccidents','Shooting','Stampede','StreetRobbery']
 49 | def main():
 50 |     if args.dataset == 'ucf101':
 51 |         num_class = 101
 52 |     elif args.dataset == 'hmdb51':
 53 |         num_class = 51
 54 |     elif args.dataset == 'kinetics':
 55 |         num_class = 400
 56 |     elif args.dataset == 'ViolentFlows':
 57 |         num_class = 2
 58 |     elif args.dataset == 'UCF12':
 59 |         num_class = 12
 60 |     else:
 61 |         raise ValueError('Unknown dataset '+args.dataset)
 62 |     print(args.modality,args.arch,args.crop_fusion_type,args.dropout)
 63 |     # net = TSN(num_class, 1, args.modality,
 64 |     #           base_model=args.arch,
 65 |     #           consensus_type=args.crop_fusion_type,
 66 |     #           dropout=args.dropout)
 67 |     net = TSN(num_class, args.test_segments, args.pretrained_parts, args.modality,
 68 |                     base_model=args.arch,
 69 |                     consensus_type=args.crop_fusion_type, dropout=args.dropout, partial_bn=not args.no_partialbn)
 70 | 
 71 |     crop_size = net.crop_size
 72 |     scale_size = net.scale_size
 73 |     input_mean = net.input_mean
 74 |     input_std = net.input_std
 75 |     policies = net.get_optim_policies()
 76 | 
 77 | 
 78 |     checkpoint = torch.load(args.weights)
 79 |     # print("model epoch {} best prec@1: {}".format(checkpoint['epoch'], checkpoint['best_prec1']))
 80 | 
 81 |     base_dict = {'.'.join(k.split('.')[1:]): v for k,v in list(checkpoint['state_dict'].items())}
 82 | 
 83 |     net.load_state_dict(base_dict)
 84 | 
 85 |     if args.modality != 'RGBDiff':
 86 | 
 87 |         normalize = GroupNormalize(input_mean, input_std)
 88 |     else:
 89 |         normalize = IdentityTransform()
 90 | 
 91 | 
 92 |     data_loader = torch.utils.data.DataLoader(
 93 |         TSNDataSet("", args.test_list, num_segments=args.test_segments,
 94 |                    new_length=1,
 95 |                    modality=args.modality,
 96 |                    image_tmpl="{:05d}.jpg" if args.modality in ['RGB','RGBDiff'] else args.flow_prefix + "{}_{:05d}.jpg",
 97 |                    random_shift=False,
 98 |                    transform=torchvision.transforms.Compose([
 99 |                        GroupScale(int(scale_size)),
100 |                        GroupCenterCrop(crop_size),
101 |                        Stack(roll=True),
102 |                        ToTorchFormatTensor(div=False),
103 |                        #Stack(roll=(args.arch == 'C3DRes18') or (args.arch == 'ECO') or (args.arch == 'ECOfull') or (args.arch == 'ECO_2FC')),
104 |                        #ToTorchFormatTensor(div=(args.arch != 'C3DRes18') and (args.arch != 'ECO') and (args.arch != 'ECOfull') and (args.arch != 'ECO_2FC')),
105 |                        normalize,
106 |                    ])),
107 |         batch_size=args.batch_size, shuffle=False,
108 |         num_workers=args.workers, pin_memory=True)
109 | 
110 |     if args.gpus is not None:
111 |         devices = [args.gpus[i] for i in range(args.workers)]
112 |         print(devices)
113 |     else:
114 |         devices = list(range(args.workers))
115 | 
116 | 
117 |     net.eval()
118 |     net = torch.nn.DataParallel(net.cuda(devices[0]), device_ids=devices)
119 | 
120 | 
121 |     data_gen = enumerate(data_loader)
122 | 
123 |     total_num = len(data_loader.dataset)
124 | 
125 |     proc_start_time = time.time()
126 |     max_num = args.max_num if args.max_num > 0 else len(data_loader.dataset)
127 |     video_pred = []
128 |     video_labels = []
129 |     for i, (data, label) in data_gen:
130 |         if i >= max_num:
131 |             break
132 | 
133 |         rst = eval_video(i, data, label,net,num_class)
134 |         print(rst[1])
135 |         softmax_output = F.softmax(rst[1])
136 |         _, pred = softmax_output.topk(1, 1, True, True)
137 |         pred_text = labels[pred.item()]
138 |         label_text = labels[label.item()]
139 |         video_pred.append(pred.squeeze(1).data.cpu())
140 |         video_labels.append(label)
141 |         cnt_time = time.time() - proc_start_time
142 |         print('video {} done, total {}/{}, average {:5f} sec/video,true:{} predict: {} score:{:5f}'.format(i+1, i + 1,
143 |                                                                         total_num,
144 |                                                                         float(cnt_time) / (i + 1),
145 |                                                                         label_text,
146 |                                                                         pred_text,
147 |                                                                         _.item()))
148 | 
149 | 
150 |     # print(video_pred)
151 |     # print(video_labels)
152 |     cf = confusion_matrix(video_labels, video_pred).astype(float)
153 |     # print(type(cf))
154 |     #
155 | 
156 |     cls_cnt = cf.sum(axis=1)
157 | 
158 |     cls_cnt_expand = np.expand_dims(cls_cnt,0).repeat(cf.shape[0],axis=0).T
159 |     # print(cls_cnt_expand)
160 |     # print(cls_cnt_expand.shape)
161 |     acc_cf = cf / (cls_cnt_expand + np.ones((cls_cnt_expand.shape[0],cls_cnt_expand.shape[0]))*0.001)
162 |     cls_hit = np.diag(cf)
163 | 
164 |     cls_acc = cls_hit / (cls_cnt + np.ones(cls_cnt.shape[0])*0.001)
165 |     # print(cls_acc)
166 |     test_acc = np.sum(cls_hit) / np.sum(cls_cnt+ np.ones(cls_cnt.shape[0])*0.01)
167 |     print('TestAccuracy {:.02f}%'.format(test_acc * 100))
168 | 
169 |     f,ax = plt.subplots(figsize=(10, 10))
170 |     sns.heatmap(data = cf,square=True,annot=True)
171 |     ax.set_title('{} Accuracy:{:.02f}%'.format(args.split,test_acc*100))
172 |     f.savefig('./output_number.jpg')
173 |     plt.show()
174 | 
175 |     f1, ax1 = plt.subplots(figsize=(10, 10))
176 |     sns.heatmap(data=acc_cf, square=True, annot=True)
177 |     ax1.set_title('{} Accuracy:{:.02f}%'.format(args.split, test_acc * 100))
178 |     f1.savefig('./output_acc.jpg')
179 |     plt.show()
180 | 
181 | def eval_video(i,data,label,net,num_class,):
182 |     # i, data, label = video_data
183 |     # num_crop = args.test_crops
184 | 
185 |     if args.modality == 'RGB':
186 |         length = 3
187 |     elif args.modality == 'Flow':
188 |         length = 10
189 |     elif args.modality == 'RGBDiff':
190 |         length = 18
191 |     else:
192 |         raise ValueError("Unknown modality "+args.modality)
193 | 
194 |     rst = net(data)
195 | 
196 |     return i,rst,label
197 | 
198 | 
199 | 
200 | 
201 | '''
202 | for i, (data, label) in data_gen:
203 |     if i >= max_num:
204 |         break
205 |     rst = eval_video((i, data, label))
206 |     output.append(rst[1:])
207 |     cnt_time = time.time() - proc_start_time
208 |     print('video {} done, total {}/{}, average {} sec/video'.format(i, i+1,
209 |                                                                     total_num,
210 |                                                                     float(cnt_time) / (i+1)))
211 | 
212 | video_pred = [np.argmax(np.mean(x[0], axis=0)) for x in output]
213 | 
214 | video_labels = [x[1] for x in output]
215 | 
216 | 
217 | cf = confusion_matrix(video_labels, video_pred).astype(float)
218 | 
219 | cls_cnt = cf.sum(axis=1)
220 | cls_hit = np.diag(cf)
221 | 
222 | cls_acc = cls_hit / cls_cnt
223 | 
224 | print(cls_acc)
225 | 
226 | print('Accuracy {:.02f}%'.format(np.mean(cls_acc) * 100))
227 | 
228 | if args.save_scores is not None:
229 | 
230 |     # reorder before saving
231 |     name_list = [x.strip().split()[0] for x in open(args.test_list)]
232 | 
233 |     order_dict = {e:i for i, e in enumerate(sorted(name_list))}
234 | 
235 |     reorder_output = [None] * len(output)
236 |     reorder_label = [None] * len(output)
237 | 
238 |     for i in range(len(output)):
239 |         idx = order_dict[name_list[i]]
240 |         reorder_output[idx] = output[i]
241 |         reorder_label[idx] = video_labels[i]
242 | 
243 |     np.savez(args.save_scores, scores=reorder_output, labels=reorder_label)
244 | 
245 | '''
246 | 
247 | 
248 | if __name__ == '__main__':
249 |     main()
250 | 
251 | 
252 | 
253 | 


--------------------------------------------------------------------------------
/ECO-pytorch/transforms.py:
--------------------------------------------------------------------------------
  1 | import torchvision
  2 | import random
  3 | from PIL import Image, ImageOps
  4 | import numpy as np
  5 | import numbers
  6 | import math
  7 | import torch
  8 | 
  9 | 
 10 | class GroupRandomCrop(object):
 11 |     def __init__(self, size):
 12 |         if isinstance(size, numbers.Number):
 13 |             self.size = (int(size), int(size))
 14 |         else:
 15 |             self.size = size
 16 | 
 17 |     def __call__(self, img_group):
 18 | 
 19 |         w, h = img_group[0].size
 20 |         th, tw = self.size
 21 | 
 22 |         out_images = list()
 23 | 
 24 |         x1 = random.randint(0, w - tw)
 25 |         y1 = random.randint(0, h - th)
 26 | 
 27 |         for img in img_group:
 28 |             assert(img.size[0] == w and img.size[1] == h)
 29 |             if w == tw and h == th:
 30 |                 out_images.append(img)
 31 |             else:
 32 |                 out_images.append(img.crop((x1, y1, x1 + tw, y1 + th)))
 33 | 
 34 |         return out_images
 35 | 
 36 | 
 37 | class GroupCenterCrop(object):
 38 |     def __init__(self, size):
 39 |         self.worker = torchvision.transforms.CenterCrop(size)
 40 | 
 41 |     def __call__(self, img_group):
 42 |         return [self.worker(img) for img in img_group]
 43 | 
 44 | 
 45 | class GroupRandomHorizontalFlip(object):
 46 |     """Randomly horizontally flips the given PIL.Image with a probability of 0.5
 47 |     """
 48 |     def __init__(self, is_flow=False):
 49 |         self.is_flow = is_flow
 50 | 
 51 |     def __call__(self, img_group, is_flow=False):
 52 |         v = random.random()
 53 |         if v < 0.5:
 54 |             ret = [img.transpose(Image.FLIP_LEFT_RIGHT) for img in img_group]
 55 |             if self.is_flow:
 56 |                 for i in range(0, len(ret), 2):
 57 |                     ret[i] = ImageOps.invert(ret[i])  # invert flow pixel values when flipping
 58 |             return ret
 59 |         else:
 60 |             return img_group
 61 | 
 62 | 
 63 | class GroupNormalize(object):
 64 |     def __init__(self, mean, std):
 65 |         self.mean = mean
 66 |         self.std = std
 67 | 
 68 |     def __call__(self, tensor):
 69 |         rep_mean = self.mean * (tensor.size()[0]//len(self.mean))
 70 |         rep_std = self.std * (tensor.size()[0]//len(self.std))
 71 | 
 72 |         # TODO: make efficient
 73 |         for t, m, s in zip(tensor, rep_mean, rep_std):
 74 |             t.sub_(m).div_(s)
 75 | 
 76 |         return tensor
 77 | 
 78 | 
 79 | class GroupScale(object):
 80 |     """ Rescales the input PIL.Image to the given 'size'.
 81 |     'size' will be the size of the smaller edge.
 82 |     For example, if height > width, then image will be
 83 |     rescaled to (size * height / width, size)
 84 |     size: size of the smaller edge
 85 |     interpolation: Default: PIL.Image.BILINEAR
 86 |     """
 87 | 
 88 |     def __init__(self, size, interpolation=Image.BILINEAR):
 89 |         self.worker = torchvision.transforms.Resize(size, interpolation)
 90 | 
 91 |     def __call__(self, img_group):
 92 |         return [self.worker(img) for img in img_group]
 93 | 
 94 | 
 95 | class GroupOverSample(object):
 96 |     def __init__(self, crop_size, scale_size=None):
 97 |         self.crop_size = crop_size if not isinstance(crop_size, int) else (crop_size, crop_size)
 98 | 
 99 |         if scale_size is not None:
100 |             self.scale_worker = GroupScale(scale_size)
101 |         else:
102 |             self.scale_worker = None
103 | 
104 |     def __call__(self, img_group):
105 | 
106 |         if self.scale_worker is not None:
107 |             img_group = self.scale_worker(img_group)
108 | 
109 |         image_w, image_h = img_group[0].size
110 |         crop_w, crop_h = self.crop_size
111 | 
112 |         offsets = GroupMultiScaleCrop.fill_fix_offset(False, image_w, image_h, crop_w, crop_h)
113 |         oversample_group = list()
114 |         for o_w, o_h in offsets:
115 |             normal_group = list()
116 |             flip_group = list()
117 |             for i, img in enumerate(img_group):
118 |                 crop = img.crop((o_w, o_h, o_w + crop_w, o_h + crop_h))
119 |                 normal_group.append(crop)
120 |                 flip_crop = crop.copy().transpose(Image.FLIP_LEFT_RIGHT)
121 | 
122 |                 if img.mode == 'L' and i % 2 == 0:
123 |                     flip_group.append(ImageOps.invert(flip_crop))
124 |                 else:
125 |                     flip_group.append(flip_crop)
126 | 
127 |             oversample_group.extend(normal_group)
128 |             oversample_group.extend(flip_group)
129 |         return oversample_group
130 | 
131 | 
132 | class GroupMultiScaleCrop(object):
133 | 
134 |     def __init__(self, input_size, scales=None, max_distort=1, fix_crop=True, more_fix_crop=True):
135 |         self.scales = scales if scales is not None else [1, .875, .75, .66]
136 |         self.max_distort = max_distort
137 |         self.fix_crop = fix_crop
138 |         self.more_fix_crop = more_fix_crop
139 |         self.input_size = input_size if not isinstance(input_size, int) else [input_size, input_size]
140 |         self.interpolation = Image.BILINEAR
141 | 
142 |     def __call__(self, img_group):
143 | 
144 |         im_size = img_group[0].size
145 | 
146 |         crop_w, crop_h, offset_w, offset_h = self._sample_crop_size(im_size)
147 |         crop_img_group = [img.crop((offset_w, offset_h, offset_w + crop_w, offset_h + crop_h)) for img in img_group]
148 |         ret_img_group = [img.resize((self.input_size[0], self.input_size[1]), self.interpolation)
149 |                          for img in crop_img_group]
150 |         return ret_img_group
151 | 
152 |     def _sample_crop_size(self, im_size):
153 |         image_w, image_h = im_size[0], im_size[1]
154 | 
155 |         # find a crop size
156 |         base_size = min(image_w, image_h)
157 |         crop_sizes = [int(base_size * x) for x in self.scales]
158 |         crop_h = [self.input_size[1] if abs(x - self.input_size[1]) < 3 else x for x in crop_sizes]
159 |         crop_w = [self.input_size[0] if abs(x - self.input_size[0]) < 3 else x for x in crop_sizes]
160 | 
161 |         pairs = []
162 |         for i, h in enumerate(crop_h):
163 |             for j, w in enumerate(crop_w):
164 |                 if abs(i - j) <= self.max_distort:
165 |                     pairs.append((w, h))
166 | 
167 |         crop_pair = random.choice(pairs)
168 |         if not self.fix_crop:
169 |             w_offset = random.randint(0, image_w - crop_pair[0])
170 |             h_offset = random.randint(0, image_h - crop_pair[1])
171 |         else:
172 |             w_offset, h_offset = self._sample_fix_offset(image_w, image_h, crop_pair[0], crop_pair[1])
173 | 
174 |         return crop_pair[0], crop_pair[1], w_offset, h_offset
175 | 
176 |     def _sample_fix_offset(self, image_w, image_h, crop_w, crop_h):
177 |         offsets = self.fill_fix_offset(self.more_fix_crop, image_w, image_h, crop_w, crop_h)
178 |         return random.choice(offsets)
179 | 
180 |     @staticmethod
181 |     def fill_fix_offset(more_fix_crop, image_w, image_h, crop_w, crop_h):
182 |         w_step = (image_w - crop_w) // 4
183 |         h_step = (image_h - crop_h) // 4
184 | 
185 |         ret = list()
186 |         ret.append((0, 0))  # upper left
187 |         ret.append((4 * w_step, 0))  # upper right
188 |         ret.append((0, 4 * h_step))  # lower left
189 |         ret.append((4 * w_step, 4 * h_step))  # lower right
190 |         ret.append((2 * w_step, 2 * h_step))  # center
191 | 
192 |         if more_fix_crop:
193 |             ret.append((0, 2 * h_step))  # center left
194 |             ret.append((4 * w_step, 2 * h_step))  # center right
195 |             ret.append((2 * w_step, 4 * h_step))  # lower center
196 |             ret.append((2 * w_step, 0 * h_step))  # upper center
197 | 
198 |             ret.append((1 * w_step, 1 * h_step))  # upper left quarter
199 |             ret.append((3 * w_step, 1 * h_step))  # upper right quarter
200 |             ret.append((1 * w_step, 3 * h_step))  # lower left quarter
201 |             ret.append((3 * w_step, 3 * h_step))  # lower righ quarter
202 | 
203 |         return ret
204 | 
205 | 
206 | class GroupRandomSizedCrop(object):
207 |     """Random crop the given PIL.Image to a random size of (0.08 to 1.0) of the original size
208 |     and and a random aspect ratio of 3/4 to 4/3 of the original aspect ratio
209 |     This is popularly used to train the Inception networks
210 |     size: size of the smaller edge
211 |     interpolation: Default: PIL.Image.BILINEAR
212 |     """
213 |     def __init__(self, size, interpolation=Image.BILINEAR):
214 |         self.size = size
215 |         self.interpolation = interpolation
216 | 
217 |     def __call__(self, img_group):
218 |         for attempt in range(10):
219 |             area = img_group[0].size[0] * img_group[0].size[1]
220 |             target_area = random.uniform(0.08, 1.0) * area
221 |             aspect_ratio = random.uniform(3. / 4, 4. / 3)
222 | 
223 |             w = int(round(math.sqrt(target_area * aspect_ratio)))
224 |             h = int(round(math.sqrt(target_area / aspect_ratio)))
225 | 
226 |             if random.random() < 0.5:
227 |                 w, h = h, w
228 | 
229 |             if w <= img_group[0].size[0] and h <= img_group[0].size[1]:
230 |                 x1 = random.randint(0, img_group[0].size[0] - w)
231 |                 y1 = random.randint(0, img_group[0].size[1] - h)
232 |                 found = True
233 |                 break
234 |         else:
235 |             found = False
236 |             x1 = 0
237 |             y1 = 0
238 | 
239 |         if found:
240 |             out_group = list()
241 |             for img in img_group:
242 |                 img = img.crop((x1, y1, x1 + w, y1 + h))
243 |                 assert(img.size == (w, h))
244 |                 out_group.append(img.resize((self.size, self.size), self.interpolation))
245 |             return out_group
246 |         else:
247 |             # Fallback
248 |             scale = GroupScale(self.size, interpolation=self.interpolation)
249 |             crop = GroupRandomCrop(self.size)
250 |             return crop(scale(img_group))
251 | 
252 | 
253 | class Stack(object):
254 | 
255 |     def __init__(self, roll=False):
256 |         self.roll = roll
257 | 
258 |     def __call__(self, img_group):
259 |         if img_group[0].mode == 'L':
260 |             return np.concatenate([np.expand_dims(x, 2) for x in img_group], axis=2)
261 |         elif img_group[0].mode == 'RGB':
262 |             if self.roll:
263 |                 return np.concatenate([np.array(x)[:, :, ::-1] for x in img_group], axis=2)
264 |             else:
265 |                 return np.concatenate(img_group, axis=2)
266 | 
267 | 
268 | class ToTorchFormatTensor(object):
269 |     """ Converts a PIL.Image (RGB) or numpy.ndarray (H x W x C) in the range [0, 255]
270 |     to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] """
271 |     def __init__(self, div=True):
272 |         self.div = div
273 | 
274 |     def __call__(self, pic):
275 |         if isinstance(pic, np.ndarray):
276 |             # handle numpy array
277 |             img = torch.from_numpy(pic).permute(2, 0, 1).contiguous()
278 |         else:
279 |             # handle PIL Image
280 |             img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
281 |             img = img.view(pic.size[1], pic.size[0], len(pic.mode))
282 |             # put it from HWC to CHW format
283 |             # yikes, this transpose takes 80% of the loading time/CPU
284 |             img = img.transpose(0, 1).transpose(0, 2).contiguous()
285 |         return img.float().div(255) if self.div else img.float()
286 | 
287 | 
288 | class IdentityTransform(object):
289 | 
290 |     def __call__(self, data):
291 |         return data
292 | 
293 | 
294 | if __name__ == "__main__":
295 |     trans = torchvision.transforms.Compose([
296 |         GroupScale(256),
297 |         GroupRandomCrop(224),
298 |         Stack(),
299 |         ToTorchFormatTensor(),
300 |         GroupNormalize(
301 |             mean=[.485, .456, .406],
302 |             std=[.229, .224, .225]
303 |         )]
304 |     )
305 | 
306 |     im = Image.open('../tensorflow-model-zoo.torch/lena_299.png')
307 | 
308 |     color_group = [im] * 3
309 |     rst = trans(color_group)
310 | 
311 |     gray_group = [im.convert('L')] * 9
312 |     gray_rst = trans(gray_group)
313 | 
314 |     trans2 = torchvision.transforms.Compose([
315 |         GroupRandomSizedCrop(256),
316 |         Stack(),
317 |         ToTorchFormatTensor(),
318 |         GroupNormalize(
319 |             mean=[.485, .456, .406],
320 |             std=[.229, .224, .225])
321 |     ])
322 |     print(trans2(color_group))
323 | 


--------------------------------------------------------------------------------
/ECO-pytorch/transform_video.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import numpy as np
  3 | from glob import glob
  4 | import cv2
  5 | import os
  6 | import shutil
  7 | 
  8 | def cut_video_clip():
  9 | 
 10 |     video_data_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_total/for4'
 11 |     ann_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_ann/for4'
 12 |     video_cut_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut/for4'
 13 |     other_video_cut_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut/other_video'
 14 |     event_paths = sorted(glob(video_data_root+'/*'))
 15 |     ann_paths = sorted(glob(ann_root+'/*'))
 16 |     save_paths = sorted(glob(video_cut_root+'/*'))
 17 |     # print(event_paths,ann_paths)
 18 |     other_label_video = []
 19 |     labels = [12,9]
 20 |     for i,event_name in enumerate(event_paths):
 21 |         video_names = sorted(glob(event_name+'/*.mp4'))
 22 |         ann_names = sorted(glob(ann_paths[i]+'/*.txt'))
 23 |         save_path = save_paths[i]
 24 |         assert len(video_names)==len(ann_names)
 25 |         for j,ann_name in enumerate(ann_names):
 26 | 
 27 |             video_name = video_names[j]
 28 |             # print(video_name,ann_name)
 29 |             assert video_name.split('/')[-1].split('.')[0] == ann_name.split('/')[-1].split('.')[0]
 30 |             anns = np.loadtxt(ann_name).reshape(-1,4)
 31 |             # print(anns)
 32 |             if not anns.all():
 33 |                 print(ann_name)
 34 |             # print(anns.shape)
 35 | 
 36 | 
 37 | 
 38 |             for k in range(anns.shape[0]):
 39 | 
 40 |                 clip_label = anns[k][3]
 41 | 
 42 |                 video_basename = os.path.basename(video_name)
 43 |                 if clip_label != labels[i]:
 44 |                     other_label_video.append(video_basename)
 45 |                     print('error video label----------------------------------------------')
 46 |                     print(int(clip_label))
 47 |                     print(other_label_video)
 48 |                     save_clip_name = other_video_cut_root + '/' + str(int(clip_label)) + '/' + video_basename.split('.')[0] + '_{}'.format(k+1)+'.mp4'
 49 |                     print(save_clip_name)
 50 |                     print('___________________________________________________')
 51 |                     # print(len(other_label_video))
 52 |                 else:
 53 |                     print(video_basename)
 54 |                     save_clip_name = save_path + '/' + video_basename.split('.')[0] + '_{}'.format(k+1)+'.mp4'
 55 |                     print(save_clip_name)
 56 | 
 57 |                 ## fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
 58 |                 fourcc = cv2.VideoWriter_fourcc(*'mp4v')
 59 | 
 60 |                 cap = cv2.VideoCapture(video_name)
 61 | 
 62 |                 frames_fps = int(cap.get(5))
 63 |                 img_width = int(cap.get(3))
 64 |                 img_height = int(cap.get(4))
 65 |                 img_size=(img_width,img_height)
 66 |                 video_writer = cv2.VideoWriter(save_clip_name, fourcc, 30, img_size)
 67 |                 # print("frames_fps:{}".format(frames_fps))
 68 |                 frame_count = 0
 69 |                 start_frame = int(anns[k][1] * frames_fps)
 70 |                 end_frame = int(anns[k][2] * frames_fps)
 71 |                 print(start_frame,end_frame)
 72 |                 frame_diff = end_frame-start_frame
 73 |                 if frame_diff<120:
 74 |                     end_frame = end_frame + (120-frame_diff) + 5
 75 |                 success = True
 76 | 
 77 |                 while (success):
 78 |                     success, frame = cap.read()
 79 |                     if frame_count>=start_frame and frame_count<end_frame:
 80 |                         # print('kkkkkkkkkkkk')
 81 |                         video_writer.write(frame)
 82 |                         # imgs.append(frame)
 83 |                     frame_count += 1
 84 | 
 85 |                 video_writer.release()
 86 |         #     if j==0:
 87 |         #         break
 88 |         #
 89 |         #
 90 |         # if i==0:
 91 |         #     break
 92 | 
 93 | 
 94 | def change_ann():
 95 | 
 96 | 
 97 |     ann_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_ann/for3'
 98 |     ann_paths = sorted(glob(ann_root + '/*'))
 99 | 
100 |     for i, ann_name in enumerate(ann_paths):
101 |         ann_names = sorted(glob(ann_name + '/*.txt'))
102 |         # print(ann_names)
103 |         if  i>=0:
104 |             for j, ann_name in enumerate(ann_names):
105 | 
106 |                 txt_f = open(ann_name)
107 |                 lines = txt_f.readlines()
108 |                 true_lines = []
109 |                 for k in range(len(lines)):
110 |                     if k>0:
111 |                         true_lines.append(lines[k][1:])
112 |                     else:
113 |                         true_lines.append(lines[k])
114 |                 # print(true_lines)
115 |                 txt_f.close()
116 |                 # print(ann_name)
117 |                 write_f = open(ann_name,"w")
118 | 
119 | 
120 |                 write_f.writelines(true_lines)
121 |                 write_f.close()
122 |             # while line:
123 |             #     context = txt_f.readline()
124 |             #     print(context)
125 | 
126 | 
127 |             # if j==0:
128 |             #     break
129 | 
130 | 
131 |         # if i==0:
132 |         #     break
133 | 
134 | 
135 | def change_filename():
136 |     ann_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut_5s/all/ParadeProtest'
137 |     txt_names = sorted(glob(ann_root + '/P*.mp4'))
138 | 
139 |     for i,txt_name in enumerate(txt_names):
140 |         # print(txt_name)
141 | 
142 |         new_txt_name = ann_root + '/' + 'Demonstration_' +str(i+52).zfill(5)+'.mp4'
143 |         # print(new_txt_name)
144 |         os.rename(txt_name,new_txt_name)
145 | 
146 |         # if i==0:
147 |         #     break
148 | 
149 | def transform_videotime(abnormal_name):
150 | 
151 |     # abnormal_name = 'Demonstration'
152 |     clip_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut/for4/' + abnormal_name
153 |     short_clip_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut_5s/all/clip_lower5s'
154 |     clip_5s_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut_5s/for4/' + abnormal_name
155 | 
156 |     clip_names = sorted(glob(clip_root + '/*.mp4'))
157 |     clip_5s_fps = 24
158 |     clip_5s_frames = 120
159 |     start_clip5s_id = 3860
160 |     for i,clip_name in enumerate(clip_names):
161 |         if i<4: continue
162 |         clip_basename = os.path.basename(clip_name)
163 |         print(clip_basename)
164 | 
165 |         cap = cv2.VideoCapture(clip_name)
166 |         # print(cap.get(5))
167 |         frames_fps = int(cap.get(5))                            #得到帧率
168 |         frames_num = int(cap.get(7))                            #得到总帧数
169 |         img_width = int(cap.get(3))                             #图像宽度
170 |         img_height = int(cap.get(4))                            #图像高度
171 |         clip_time = frames_num / frames_fps
172 |         img_size = (img_width, img_height)
173 | 
174 |         print('clip_time',clip_time,'frames_fps:',frames_fps,'img_width:',img_width,'img_height:',img_height)
175 | 
176 |         #判断是否可裁剪成一段视频
177 |         if clip_time < 4:
178 |             print(clip_basename,clip_time)
179 |             short_clip_oldpath = clip_name
180 |             short_clip_newpath = short_clip_root + '/' + clip_basename
181 |             print(short_clip_oldpath,short_clip_newpath)
182 |             shutil.move(short_clip_oldpath,short_clip_newpath)
183 | 
184 |         else:
185 | 
186 |             # print(clip_basename)
187 |             # print('clip_time', clip_time, 'frames_fps:', frames_fps, 'img_width:', img_width, 'img_height:', img_height)
188 | 
189 |             #判断视频是否属于同一个宽度和高度
190 |             # if img_height != 240 or img_width != 320:
191 |             #     print(clip_basename)
192 | 
193 |             clip5s_num = int(frames_num / clip_5s_frames) + 1
194 |             print(frames_num)
195 |             for j in range(clip5s_num):
196 | 
197 |                 start_clip5s_id += 1
198 |                 # save_clip5s_name = clip_5s_root + '/' + clip_basename.split('_')[0] + '_' + str(start_clip5s_id).zfill(5) + '.mp4'
199 |                 save_clip5s_name = clip_5s_root + '/' + abnormal_name + '_' + str(start_clip5s_id).zfill(5) + '.mp4'
200 |                 # print(save_clip5s_name)
201 | 
202 | 
203 |                 if j + 1 < clip5s_num:
204 | 
205 |                     start_frame = j * clip_5s_frames
206 |                     end_frame = (j+1) * clip_5s_frames
207 |                     print(start_frame,end_frame)
208 | 
209 |                 else:
210 |                     start_frame = frames_num - clip_5s_frames
211 |                     end_frame = frames_num
212 |                     print(start_frame, end_frame)
213 | 
214 |                 fourcc = cv2.VideoWriter_fourcc(*'mp4v')
215 |                 cap = cv2.VideoCapture(clip_name)
216 |                 video_writer = cv2.VideoWriter(save_clip5s_name, fourcc, clip_5s_fps, img_size)
217 |                 success = True
218 |                 frame_count = 0
219 |                 while (success):
220 |                     success, frame = cap.read()
221 |                     if frame_count >= start_frame and frame_count < end_frame:
222 |                         video_writer.write(frame)
223 | 
224 |                     frame_count += 1
225 | 
226 |                 video_writer.release()
227 | 
228 | 
229 |         # if i>=1:
230 |         #     break
231 | 
232 | 
233 | def read_clip5s():
234 | 
235 | 
236 |     clip5s_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut/for3/Fire'
237 | 
238 |     clip5s_names = sorted(glob(clip5s_root + '/*.mp4'))
239 |     index = 0
240 |     for i,clip5s_name in enumerate(clip5s_names):
241 | 
242 |         cap = cv2.VideoCapture(clip5s_name)
243 | 
244 |         frames_fps = int(cap.get(5))  # 得到帧率
245 |         frames_num = int(cap.get(7))  # 得到总帧数
246 |         img_width = int(cap.get(3))  # 图像宽度
247 |         img_height = int(cap.get(4))  # 图像高度
248 |         print('frames_num', frames_num, 'frames_fps:', frames_fps, 'img_width:', img_width, 'img_height:', img_height)
249 |         index += (frames_num/120)+1
250 | 
251 |         print(clip5s_name,index)
252 |         # if i>5:
253 |         #     break
254 | 
255 | 
256 | def change_foldername():
257 |     folder_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/RWF/Fight_test'
258 |     folder_names = sorted(glob(folder_root + '/*'))
259 | 
260 |     start_id = 1606
261 |     for i, folder_name in enumerate(folder_names):
262 |         # print(folder_name)
263 |         # folder_basename = os.path.basename(folder_name).split('_')[0]
264 |         change_folder_name = 'Fighting'
265 |         new_folder_name = folder_root + '/' + change_folder_name + '_' +str(start_id).zfill(5)
266 |         print(new_folder_name)
267 | 
268 |         os.rename(folder_name,new_folder_name)
269 |         start_id +=1
270 |         # if i==0:
271 |         #     break
272 | 
273 | def visdrone2normal():
274 | 
275 |     data_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/for3/visdrone'
276 |     save_root = '/media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/for4/Visdrone'
277 | 
278 |     three_files = sorted(glob(data_root +'/*'))
279 |     start_clip5s_id = 3926
280 |     for i,three_file in enumerate(three_files):
281 |         videos = sorted(glob(three_file + '/*'))
282 |         for j,video in enumerate(videos):
283 | 
284 |             frames = sorted(glob(video+'/*.jpg'))
285 |             if(len(frames)<120):
286 |                 cut_frames_num = len(frames)
287 |             else:
288 |                 cut_frames_num = int(len(frames)/120) * 120
289 | 
290 |             frame_id = 1
291 |             save_video_path = ''
292 |             print(video)
293 |             print(len(frames),cut_frames_num)
294 |             for k,frame in enumerate(frames):
295 |                 if k == cut_frames_num:
296 |                     break
297 | 
298 |                 if(k%120==0):
299 |                     # print(k)
300 |                     try:
301 |                         start_clip5s_id += 1
302 |                         frame_id = 1
303 |                         save_video_path = save_root + '/Normal_' + str(start_clip5s_id).zfill(5)
304 |                         os.mkdir(save_video_path)
305 | 
306 |                     except OSError:
307 |                         pass
308 | 
309 |                 img = cv2.imread(frame)
310 |                 save_img_path = save_video_path + '/' + str(frame_id).zfill(5) +'.jpg'
311 |                 frame_id += 1
312 |                 # print(save_img_path)
313 |                 cv2.imwrite(save_img_path,img)
314 | 
315 | 
316 | 
317 |         #     if j==0:
318 |         #         break
319 |         # if i==0:
320 |         #     break
321 | 
322 | 
323 | if __name__ == "__main__":
324 | 
325 | 
326 |     #2、根据标注裁剪长视频
327 |     # cut_video_clip()
328 | 
329 |     #1、去除标注文档txt的逗号
330 |     # change_ann()
331 | 
332 |     #改变文件名称
333 |     # change_filename()
334 | 
335 | 
336 |     #将裁剪后的短视频处理成5S的视频
337 |     # transform_videotime(abnormal_name)
338 |     # labels = ['StreetRobbery']
339 |     # #
340 |     # for label in labels:
341 |     #     print(label)
342 |     #     transform_videotime(label)
343 | 
344 |     #读取短片段，查看信息
345 |     # read_clip5s()
346 | 
347 |     #将normal_video裁剪成5s视频
348 |     # transform_videotime('Normal')
349 | 
350 |    #修改文件夹名称
351 |    change_foldername()
352 | 
353 |    #将visdrone序列转换为normal
354 |    # visdrone2normal()
355 | 
356 | 
357 | 
358 | 
359 | 
360 | 


--------------------------------------------------------------------------------
/ECO-pytorch/log/202104271523.log:
--------------------------------------------------------------------------------
  1 | 2021-04-27 15:23:06,169 - main.py[line:54] - INFO: ------------------------------------
  2 | 2021-04-27 15:23:06,169 - main.py[line:55] - INFO: Environment Versions:
  3 | 2021-04-27 15:23:06,169 - main.py[line:56] - INFO: - Python: 3.6.13 (default, Feb 20 2021, 21:42:50) 
  4 | [GCC 5.4.0 20160609]
  5 | 2021-04-27 15:23:06,169 - main.py[line:57] - INFO: - PyTorch: 1.3.0
  6 | 2021-04-27 15:23:06,169 - main.py[line:58] - INFO: - TorchVison: 0.4.1
  7 | 2021-04-27 15:23:06,169 - main.py[line:61] - INFO: ------------------------------------
  8 | 2021-04-27 15:23:06,169 - main.py[line:62] - INFO: ECO Configurations:
  9 | 2021-04-27 15:23:06,169 - main.py[line:64] - INFO: - dataset: UCF12
 10 | 2021-04-27 15:23:06,169 - main.py[line:64] - INFO: - modality: RGB
 11 | 2021-04-27 15:23:06,169 - main.py[line:64] - INFO: - train_list: /media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/train_split_07.txt
 12 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - val_list: /media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/test_split_03.txt
 13 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - net_model: None
 14 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - net_model2D: None
 15 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - net_modelECO: ./model/ECO_Lite_rgb_model_Kinetics.pth.tar
 16 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - net_model3D: None
 17 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - arch: ECO
 18 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - num_segments: 8
 19 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - consensus_type: avg
 20 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - pretrained_parts: finetune
 21 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - k: 3
 22 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - dropout: 0.6
 23 | 2021-04-27 15:23:06,170 - main.py[line:64] - INFO: - loss_type: nll
 24 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - epochs: 60
 25 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - batch_size: 4
 26 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - iter_size: 5
 27 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - lr: 0.001
 28 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - lr_steps: [20, 40]
 29 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - momentum: 0.9
 30 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - weight_decay: 0.0005
 31 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - clip_gradient: None
 32 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - no_partialbn: False
 33 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - nesterov: False
 34 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - num_saturate: 5
 35 | 2021-04-27 15:23:06,171 - main.py[line:64] - INFO: - print_freq: 10
 36 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - eval_freq: 5
 37 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - workers: 2
 38 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - resume: 
 39 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - evaluate: False
 40 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - snapshot_pref: ./checkpoint/
 41 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - start_epoch: 0
 42 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - gpus: None
 43 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - flow_prefix: 
 44 | 2021-04-27 15:23:06,172 - main.py[line:64] - INFO: - rgb_prefix: 
 45 | 2021-04-27 15:23:06,172 - main.py[line:65] - INFO: ------------------------------------
 46 | 2021-04-27 15:23:11,524 - main.py[line:119] - INFO: DataParallel(
 47 |   (module): TSN(
 48 |     (base_model): ECO(
 49 |       (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
 50 |       (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 51 |       (conv1_relu_7x7): ReLU(inplace=True)
 52 |       (pool1_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
 53 |       (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
 54 |       (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 55 |       (conv2_relu_3x3_reduce): ReLU(inplace=True)
 56 |       (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 57 |       (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 58 |       (conv2_relu_3x3): ReLU(inplace=True)
 59 |       (pool2_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
 60 |       (inception_3a_1x1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
 61 |       (inception_3a_1x1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 62 |       (inception_3a_relu_1x1): ReLU(inplace=True)
 63 |       (inception_3a_3x3_reduce): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
 64 |       (inception_3a_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 65 |       (inception_3a_relu_3x3_reduce): ReLU(inplace=True)
 66 |       (inception_3a_3x3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 67 |       (inception_3a_3x3_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 68 |       (inception_3a_relu_3x3): ReLU(inplace=True)
 69 |       (inception_3a_double_3x3_reduce): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
 70 |       (inception_3a_double_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 71 |       (inception_3a_relu_double_3x3_reduce): ReLU(inplace=True)
 72 |       (inception_3a_double_3x3_1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 73 |       (inception_3a_double_3x3_1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 74 |       (inception_3a_relu_double_3x3_1): ReLU(inplace=True)
 75 |       (inception_3a_double_3x3_2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 76 |       (inception_3a_double_3x3_2_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 77 |       (inception_3a_relu_double_3x3_2): ReLU(inplace=True)
 78 |       (inception_3a_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
 79 |       (inception_3a_pool_proj): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1))
 80 |       (inception_3a_pool_proj_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 81 |       (inception_3a_relu_pool_proj): ReLU(inplace=True)
 82 |       (inception_3b_1x1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
 83 |       (inception_3b_1x1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 84 |       (inception_3b_relu_1x1): ReLU(inplace=True)
 85 |       (inception_3b_3x3_reduce): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
 86 |       (inception_3b_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 87 |       (inception_3b_relu_3x3_reduce): ReLU(inplace=True)
 88 |       (inception_3b_3x3): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 89 |       (inception_3b_3x3_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 90 |       (inception_3b_relu_3x3): ReLU(inplace=True)
 91 |       (inception_3b_double_3x3_reduce): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
 92 |       (inception_3b_double_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 93 |       (inception_3b_relu_double_3x3_reduce): ReLU(inplace=True)
 94 |       (inception_3b_double_3x3_1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 95 |       (inception_3b_double_3x3_1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 96 |       (inception_3b_relu_double_3x3_1): ReLU(inplace=True)
 97 |       (inception_3b_double_3x3_2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 98 |       (inception_3b_double_3x3_2_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 99 |       (inception_3b_relu_double_3x3_2): ReLU(inplace=True)
100 |       (inception_3b_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
101 |       (inception_3b_pool_proj): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
102 |       (inception_3b_pool_proj_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
103 |       (inception_3b_relu_pool_proj): ReLU(inplace=True)
104 |       (inception_3c_double_3x3_reduce): Conv2d(320, 64, kernel_size=(1, 1), stride=(1, 1))
105 |       (inception_3c_double_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
106 |       (inception_3c_relu_double_3x3_reduce): ReLU(inplace=True)
107 |       (inception_3c_double_3x3_1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
108 |       (inception_3c_double_3x3_1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
109 |       (inception_3c_relu_double_3x3_1): ReLU(inplace=True)
110 |       (res3a_2): Conv3d(96, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
111 |       (res3a_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
112 |       (res3a_relu): ReLU(inplace=True)
113 |       (res3b_1): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
114 |       (res3b_1_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
115 |       (res3b_1_relu): ReLU(inplace=True)
116 |       (res3b_2): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
117 |       (res3b_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
118 |       (res3b_relu): ReLU(inplace=True)
119 |       (res4a_1): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
120 |       (res4a_1_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
121 |       (res4a_1_relu): ReLU(inplace=True)
122 |       (res4a_2): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
123 |       (res4a_down): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
124 |       (res4a_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
125 |       (res4a_relu): ReLU(inplace=True)
126 |       (res4b_1): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
127 |       (res4b_1_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
128 |       (res4b_1_relu): ReLU(inplace=True)
129 |       (res4b_2): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
130 |       (res4b_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
131 |       (res4b_relu): ReLU(inplace=True)
132 |       (res5a_1): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
133 |       (res5a_1_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
134 |       (res5a_1_relu): ReLU(inplace=True)
135 |       (res5a_2): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
136 |       (res5a_down): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
137 |       (res5a_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
138 |       (res5a_relu): ReLU(inplace=True)
139 |       (res5b_1): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
140 |       (res5b_1_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
141 |       (res5b_1_relu): ReLU(inplace=True)
142 |       (res5b_2): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
143 |       (res5b_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
144 |       (res5b_relu): ReLU(inplace=True)
145 |       (global_pool): AvgPool3d(kernel_size=(2, 7, 7), stride=1, padding=0)
146 |       (fc_final): Dropout(p=0.6, inplace=False)
147 |     )
148 |     (new_fc): Linear(in_features=512, out_features=12, bias=True)
149 |     (consensus): ConsensusModule()
150 |   )
151 | )
152 | 2021-04-27 15:23:11,530 - main.py[line:120] - INFO: finetune
153 | 2021-04-27 15:23:11,530 - main.py[line:322] - INFO: ./model/ECO_Lite_rgb_model_Kinetics.pth.tar
154 | 2021-04-27 15:23:11,530 - main.py[line:323] - INFO: 88888888888888888888888888888888888888888888888888888888888888888888888888888888
155 | 2021-04-27 15:23:12,261 - main.py[line:326] - INFO: => loading model-finetune: './model/ECO_Lite_rgb_model_Kinetics.pth.tar'
156 | 2021-04-27 15:23:12,262 - main.py[line:335] - INFO: **************************************************
157 | 2021-04-27 15:23:12,262 - main.py[line:336] - INFO: Start finetuning ..
158 | 2021-04-27 15:23:12,262 - main.py[line:152] - INFO: ['module.new_fc.weight', 'module.new_fc.bias']
159 | 2021-04-27 15:23:12,262 - main.py[line:153] - INFO: 
160 | ------------------------------------
161 | 2021-04-27 15:23:12,263 - main.py[line:162] - INFO: module.new_fc.weight init as: xavier
162 | 2021-04-27 15:23:12,263 - main.py[line:165] - INFO: module.new_fc.bias init as: 0
163 | 2021-04-27 15:23:12,263 - main.py[line:168] - INFO: ------------------------------------
164 | 2021-04-27 15:23:12,319 - main.py[line:229] - INFO: group: first_3d_conv_weight has 1 params, lr_mult: 1, decay_mult: 1
165 | 2021-04-27 15:23:12,319 - main.py[line:229] - INFO: group: first_3d_conv_bias has 1 params, lr_mult: 2, decay_mult: 0
166 | 2021-04-27 15:23:12,319 - main.py[line:229] - INFO: group: normal_weight has 32 params, lr_mult: 1, decay_mult: 1
167 | 2021-04-27 15:23:12,319 - main.py[line:229] - INFO: group: normal_bias has 32 params, lr_mult: 2, decay_mult: 0
168 | 2021-04-27 15:23:12,319 - main.py[line:229] - INFO: group: BN scale/shift has 2 params, lr_mult: 1, decay_mult: 0
169 | 2021-04-27 15:23:23,299 - main.py[line:528] - INFO: Epoch: [0][4/1999], lr: 0.0010000	Time 0.17 (2.70)	UTime 2021/04/27-15:23:23 	Data 0.00 (0.37)	Loss 4.334 (2.639)	Prec@1 25.00 (20.00)	Prec@5 50.00 (55.00)
170 | 2021-04-27 15:23:25,438 - main.py[line:528] - INFO: Epoch: [0][9/1999], lr: 0.0010000	Time 0.77 (1.41)	UTime 2021/04/27-15:23:25 	Data 0.30 (0.34)	Loss 2.780 (2.154)	Prec@1 0.00 (15.00)	Prec@5 75.00 (60.00)
171 | 2021-04-27 15:23:28,094 - main.py[line:528] - INFO: Epoch: [0][14/1999], lr: 0.0010000	Time 0.16 (1.06)	UTime 2021/04/27-15:23:27 	Data 0.82 (0.36)	Loss 2.391 (1.956)	Prec@1 0.00 (16.67)	Prec@5 100.00 (63.33)
172 | 2021-04-27 15:23:30,107 - main.py[line:528] - INFO: Epoch: [0][19/1999], lr: 0.0010000	Time 0.80 (0.93)	UTime 2021/04/27-15:23:29 	Data 0.00 (0.34)	Loss 2.140 (1.797)	Prec@1 25.00 (17.50)	Prec@5 50.00 (66.25)
173 | 2021-04-27 15:23:32,599 - main.py[line:528] - INFO: Epoch: [0][24/1999], lr: 0.0010000	Time 0.27 (0.81)	UTime 2021/04/27-15:23:31 	Data 0.68 (0.35)	Loss 2.076 (1.672)	Prec@1 25.00 (19.00)	Prec@5 50.00 (69.00)
174 | 2021-04-27 15:23:34,763 - main.py[line:528] - INFO: Epoch: [0][29/1999], lr: 0.0010000	Time 0.70 (0.77)	UTime 2021/04/27-15:23:34 	Data 0.05 (0.35)	Loss 2.052 (1.587)	Prec@1 0.00 (24.17)	Prec@5 25.00 (69.17)
175 | 2021-04-27 15:23:37,636 - main.py[line:528] - INFO: Epoch: [0][34/1999], lr: 0.0010000	Time 0.53 (0.72)	UTime 2021/04/27-15:23:36 	Data 0.89 (0.36)	Loss 1.736 (1.509)	Prec@1 50.00 (26.43)	Prec@5 100.00 (71.43)
176 | 2021-04-27 15:23:39,586 - main.py[line:528] - INFO: Epoch: [0][39/1999], lr: 0.0010000	Time 0.66 (0.69)	UTime 2021/04/27-15:23:39 	Data 0.00 (0.35)	Loss 2.812 (1.514)	Prec@1 25.00 (26.88)	Prec@5 50.00 (70.62)
177 | 2021-04-27 15:23:42,104 - main.py[line:528] - INFO: Epoch: [0][44/1999], lr: 0.0010000	Time 0.26 (0.66)	UTime 2021/04/27-15:23:41 	Data 0.54 (0.35)	Loss 2.224 (1.505)	Prec@1 50.00 (28.33)	Prec@5 50.00 (70.00)
178 | 2021-04-27 15:23:44,589 - main.py[line:528] - INFO: Epoch: [0][49/1999], lr: 0.0010000	Time 0.61 (0.65)	UTime 2021/04/27-15:23:44 	Data 0.15 (0.36)	Loss 2.182 (1.480)	Prec@1 25.00 (29.00)	Prec@5 50.00 (69.50)
179 | 2021-04-27 15:23:46,641 - main.py[line:528] - INFO: Epoch: [0][54/1999], lr: 0.0010000	Time 0.47 (0.63)	UTime 2021/04/27-15:23:46 	Data 0.19 (0.35)	Loss 2.136 (1.468)	Prec@1 75.00 (30.00)	Prec@5 75.00 (70.00)
180 | 2021-04-27 15:23:49,092 - main.py[line:528] - INFO: Epoch: [0][59/1999], lr: 0.0010000	Time 0.43 (0.61)	UTime 2021/04/27-15:23:48 	Data 0.46 (0.36)	Loss 2.207 (1.456)	Prec@1 25.00 (30.00)	Prec@5 75.00 (70.42)
181 | 2021-04-27 15:23:51,158 - main.py[line:528] - INFO: Epoch: [0][64/1999], lr: 0.0010000	Time 0.18 (0.60)	UTime 2021/04/27-15:23:50 	Data 0.42 (0.35)	Loss 2.016 (1.434)	Prec@1 50.00 (31.15)	Prec@5 100.00 (71.54)
182 | 2021-04-27 15:23:53,681 - main.py[line:528] - INFO: Epoch: [0][69/1999], lr: 0.0010000	Time 1.00 (0.59)	UTime 2021/04/27-15:23:53 	Data 0.37 (0.36)	Loss 2.421 (1.431)	Prec@1 25.00 (32.14)	Prec@5 100.00 (72.14)
183 | 2021-04-27 15:23:56,277 - main.py[line:528] - INFO: Epoch: [0][74/1999], lr: 0.0010000	Time 0.56 (0.59)	UTime 2021/04/27-15:23:56 	Data 0.00 (0.36)	Loss 4.930 (1.523)	Prec@1 0.00 (30.67)	Prec@5 50.00 (71.00)
184 | 2021-04-27 15:23:59,260 - main.py[line:528] - INFO: Epoch: [0][79/1999], lr: 0.0010000	Time 0.16 (0.58)	UTime 2021/04/27-15:23:58 	Data 0.62 (0.36)	Loss 4.706 (1.589)	Prec@1 25.00 (30.94)	Prec@5 50.00 (70.00)
185 | 2021-04-27 15:24:01,521 - main.py[line:528] - INFO: Epoch: [0][84/1999], lr: 0.0010000	Time 0.64 (0.58)	UTime 2021/04/27-15:24:01 	Data 0.00 (0.36)	Loss 1.969 (1.565)	Prec@1 50.00 (31.18)	Prec@5 100.00 (70.59)
186 | 


--------------------------------------------------------------------------------
/ECO-pytorch/models.py:
--------------------------------------------------------------------------------
  1 | from torch import nn
  2 | 
  3 | from ops.basic_ops import ConsensusModule, Identity
  4 | from transforms import *
  5 | from torch.nn.init import xavier_uniform_, constant_
  6 | 
  7 | class TSN(nn.Module):
  8 |     def __init__(self, num_class, num_segments, pretrained_parts, modality,
  9 |                  base_model='resnet101', new_length=None,
 10 |                  consensus_type='avg', before_softmax=True,
 11 |                  dropout=0.8,
 12 |                  crop_num=1, partial_bn=True):
 13 |         super(TSN, self).__init__()
 14 |         self.modality = modality
 15 |         self.num_segments = num_segments
 16 |         self.pretrained_parts = pretrained_parts
 17 |         self.reshape = True
 18 |         self.before_softmax = before_softmax
 19 |         self.dropout = dropout
 20 |         self.crop_num = crop_num
 21 |         self.consensus_type = consensus_type
 22 |         self.base_model_name = base_model
 23 |         if not before_softmax and consensus_type != 'avg':
 24 |             raise ValueError("Only avg consensus can be used after Softmax")
 25 | 
 26 |         if new_length is None:
 27 |             self.new_length = 1 if modality == "RGB" else 5
 28 |         else:
 29 |             self.new_length = new_length
 30 | 
 31 |         print(("""
 32 | Initializing TSN with base model: {}.
 33 | TSN Configurations:
 34 |     input_modality:     {}
 35 |     num_segments:       {}
 36 |     new_length:         {}
 37 |     consensus_module:   {}
 38 |     dropout_ratio:      {}
 39 |         """.format(base_model, self.modality, self.num_segments, self.new_length, consensus_type, self.dropout)))
 40 | 
 41 |         self._prepare_base_model(base_model)
 42 | 
 43 |         # zc comments
 44 |         feature_dim = self._prepare_tsn(num_class)
 45 |         # modules = list(self.modules())
 46 |         # print(modules)
 47 |         # zc comments end
 48 | 
 49 |         '''
 50 |         # zc: print "NN variable name"
 51 |         zc_params = self.base_model.state_dict()
 52 |         for zc_k in zc_params.items():
 53 |             print(zc_k)
 54 | 
 55 |         # zc: print "Specified layer's weight and bias"
 56 |         print(zc_params['conv1_7x7_s2.weight'])
 57 |         print(zc_params['conv1_7x7_s2.bias'])
 58 |         '''
 59 | 
 60 |         if self.modality == 'Flow':
 61 |             print("Converting the ImageNet model to a flow init model")
 62 |             self.base_model = self._construct_flow_model(self.base_model)
 63 |             print("Done. Flow model ready...")
 64 |         elif self.modality == 'RGBDiff':
 65 |             print("Converting the ImageNet model to RGB+Diff init model")
 66 |             self.base_model = self._construct_diff_model(self.base_model)
 67 |             print("Done. RGBDiff model ready.")
 68 | 
 69 |         self.consensus = ConsensusModule(consensus_type)
 70 | 
 71 |         if not self.before_softmax:
 72 |             self.softmax = nn.Softmax()
 73 | 
 74 |         self._enable_pbn = partial_bn
 75 |         if partial_bn:
 76 |             self.partialBN(True)
 77 | 
 78 |     def _prepare_tsn(self, num_class):
 79 |         feature_dim = getattr(self.base_model, self.base_model.last_layer_name).in_features
 80 |         # feature_dim =1024
 81 |         # print(feature_dim)
 82 |         if self.dropout == 0:
 83 |             setattr(self.base_model, self.base_model.last_layer_name, nn.Linear(feature_dim, num_class))
 84 |             self.new_fc = None
 85 |         else:
 86 |             setattr(self.base_model, self.base_model.last_layer_name, nn.Dropout(p=self.dropout))
 87 |             self.new_fc = nn.Linear(feature_dim, num_class)
 88 | 
 89 |         std = 0.001
 90 |         if self.new_fc is None:
 91 |             xavier_uniform_(getattr(self.base_model, self.base_model.last_layer_name).weight)
 92 |             constant_(getattr(self.base_model, self.base_model.last_layer_name).bias, 0)
 93 |         else:
 94 |             xavier_uniform_(self.new_fc.weight)
 95 |             constant_(self.new_fc.bias, 0)
 96 |         return feature_dim
 97 | 
 98 |     def _prepare_base_model(self, base_model):
 99 | 
100 |         if 'resnet' in base_model or 'vgg' in base_model:
101 |             self.base_model = getattr(torchvision.models, base_model)(True)
102 |             self.base_model.last_layer_name = 'fc'
103 |             self.input_size = 224
104 |             self.input_mean = [0.485, 0.456, 0.406]
105 |             self.input_std = [0.229, 0.224, 0.225]
106 | 
107 |             if self.modality == 'Flow':
108 |                 self.input_mean = [0.5]
109 |                 self.input_std = [np.mean(self.input_std)]
110 |             elif self.modality == 'RGBDiff':
111 |                 self.input_mean = [0.485, 0.456, 0.406] + [0] * 3 * self.new_length
112 |                 self.input_std = self.input_std + [np.mean(self.input_std) * 2] * 3 * self.new_length
113 | 
114 |         elif base_model == 'C3DRes18':
115 |             import tf_model_zoo
116 |             self.base_model = getattr(tf_model_zoo, base_model)(num_segments=self.num_segments, pretrained_parts=self.pretrained_parts)
117 |             self.base_model.last_layer_name = 'fc8'
118 |             self.input_size = 112
119 |             self.input_mean = [104, 117, 128]
120 |             self.input_std = [1]
121 | 
122 |             if self.modality == 'Flow':
123 |                 self.input_mean = [128]
124 |             elif self.modality == 'RGBDiff':
125 |                 self.input_mean = self.input_mean * (1 + self.new_length)
126 | 
127 |         elif base_model == 'ECO':
128 |             import tf_model_zoo
129 |             self.base_model = getattr(tf_model_zoo, base_model)(num_segments=self.num_segments, pretrained_parts=self.pretrained_parts)
130 |             self.base_model.last_layer_name = 'fc_final'
131 |             self.input_size = 224
132 |             self.input_mean = [104, 117, 128]
133 |             self.input_std = [1]
134 | 
135 |             if self.modality == 'Flow':
136 |                 self.input_mean = [128]
137 |             elif self.modality == 'RGBDiff':
138 |                 self.input_mean = self.input_mean * (1 + self.new_length)
139 | 
140 |         elif base_model == 'ECOfull' :
141 |             import tf_model_zoo
142 |             self.base_model = getattr(tf_model_zoo, base_model)(num_segments=self.num_segments, pretrained_parts=self.pretrained_parts)
143 |             self.base_model.last_layer_name = 'fc_final'
144 |             self.input_size = 224
145 |             self.input_mean = [104, 117, 128]
146 |             self.input_std = [1]
147 | 
148 |             if self.modality == 'Flow':
149 |                 self.input_mean = [128]
150 |             elif self.modality == 'RGBDiff':
151 |                 self.input_mean = self.input_mean * (1 + self.new_length)
152 | 
153 | 
154 |         elif base_model == 'BN2to1D':
155 |             import tf_model_zoo
156 |             self.base_model = getattr(tf_model_zoo, base_model)(num_segments=self.num_segments)
157 |             self.base_model.last_layer_name = 'fc'
158 |             self.input_size = 224
159 |             self.input_mean = [104, 117, 128]
160 |             self.input_std = [1]
161 | 
162 |             if self.modality == 'Flow':
163 |                 self.input_mean = [128]
164 |             elif self.modality == 'RGBDiff':
165 |                 self.input_mean = self.input_mean * (1 + self.new_length)
166 | 
167 |         elif 'inception' in base_model:
168 |             import tf_model_zoo
169 |             self.base_model = getattr(tf_model_zoo, base_model)()
170 |             self.base_model.last_layer_name = 'classif'
171 |             self.input_size = 299
172 |             self.input_mean = [0.5]
173 |             self.input_std = [0.5]
174 |         else:
175 |             raise ValueError('Unknown base model: {}'.format(base_model))
176 | 
177 |     def train(self, mode=True):
178 |         """
179 |         Override the default train() to freeze the BN parameters
180 |         :return:
181 |         """
182 |         super(TSN, self).train(mode)
183 |         count = 0
184 |         if self._enable_pbn:
185 |             print("Freezing BatchNorm2D except the first one.")
186 |             for m in self.base_model.modules():
187 |                 if (isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm3d)):
188 |                     count += 1
189 |                     if count >= (2 if self._enable_pbn else 1):
190 |                         m.eval()
191 | 
192 |                         # shutdown update in frozen mode
193 |                         m.weight.requires_grad = False
194 |                         m.bias.requires_grad = False
195 |         else:
196 |             print("No BN layer Freezing.")
197 | 
198 |     def partialBN(self, enable):
199 |         self._enable_pbn = enable
200 | 
201 |     def get_optim_policies(self):
202 |         first_3d_conv_weight = []
203 |         first_3d_conv_bias = []
204 |         normal_weight = []
205 |         normal_bias = []
206 |         bn = []
207 | 
208 |         conv_2d_cnt = 0
209 |         conv_3d_cnt = 0
210 |         bn_cnt = 0
211 |         for m in self.modules():
212 |             # (conv1d or conv2d) 1st layer's params will be append to list: first_conv_weight & first_conv_bias, total num 1 respectively(1 conv2d)
213 |             # (conv1d or conv2d or Linear) from 2nd layers' params will be append to list: normal_weight & normal_bias, total num 69 respectively(68 Conv2d + 1 Linear)
214 |             if isinstance(m, torch.nn.Conv2d):
215 |                 ps = list(m.parameters())
216 |                 conv_2d_cnt += 1
217 |                 normal_weight.append(ps[0])
218 |                 if len(ps) == 2:
219 |                     normal_bias.append(ps[1])
220 | 
221 |             elif isinstance(m, torch.nn.Conv3d):
222 |                 ps = list(m.parameters())
223 |                 conv_3d_cnt += 1
224 |                 if conv_3d_cnt == 1:
225 |                     first_3d_conv_weight.append(ps[0])
226 |                     if len(ps) == 2:
227 |                         first_3d_conv_bias.append(ps[1])
228 |                 else:
229 |                     normal_weight.append(ps[0])
230 |                     if len(ps) == 2:
231 |                         normal_bias.append(ps[1])
232 | 
233 |             elif isinstance(m, torch.nn.Linear):
234 |                 ps = list(m.parameters())
235 |                 normal_weight.append(ps[0])
236 |                 if len(ps) == 2:
237 |                     normal_bias.append(ps[1])
238 |             # (BatchNorm1d or BatchNorm2d) params will be append to list: bn, total num 2 (enabled pbn, so only: 1st BN layer's weight + 1st BN layer's bias)
239 |             elif isinstance(m, torch.nn.BatchNorm1d):
240 |                 bn.extend(list(m.parameters()))
241 |             elif isinstance(m, torch.nn.BatchNorm2d):
242 |                 bn_cnt += 1
243 |                 # later BN's are frozen
244 |                 if not self._enable_pbn or bn_cnt == 1:
245 |                     bn.extend(list(m.parameters()))
246 |             elif isinstance(m, torch.nn.BatchNorm3d):
247 |                 bn_cnt += 1
248 |                 # 4
249 |                 # later BN's are frozen
250 |                 if not self._enable_pbn or bn_cnt == 1:
251 |                     bn.extend(list(m.parameters()))
252 |             elif len(m._modules) == 0:
253 |                 if len(list(m.parameters())) > 0:
254 |                     raise ValueError("New atomic module type: {}. Need to give it a learning policy".format(type(m)))
255 |         return [
256 |             {'params': first_3d_conv_weight, 'lr_mult': 5 if self.modality == 'Flow' else 1, 'decay_mult': 1,
257 |              'name': "first_3d_conv_weight"},
258 |             {'params': first_3d_conv_bias, 'lr_mult': 10 if self.modality == 'Flow' else 2, 'decay_mult': 0,
259 |              'name': "first_3d_conv_bias"},
260 |             {'params': normal_weight, 'lr_mult': 1, 'decay_mult': 1,
261 |              'name': "normal_weight"},
262 |             {'params': normal_bias, 'lr_mult': 2, 'decay_mult': 0,
263 |              'name': "normal_bias"},
264 |             {'params': bn, 'lr_mult': 1, 'decay_mult': 0,
265 |              'name': "BN scale/shift"},
266 |         ]
267 | 
268 |     def get_optim_policies_BN2to1D(self):
269 |         first_conv_weight = []
270 |         first_conv_bias = []
271 |         normal_weight = []
272 |         normal_bias = []
273 |         bn = []
274 |         last_conv_weight = []
275 |         last_conv_bias = []
276 | 
277 |         conv_cnt = 0
278 |         bn_cnt = 0
279 |         for m in self.modules():
280 |             # (conv1d or conv2d) 1st layer's params will be append to list: first_conv_weight & first_conv_bias, total num 1 respectively(1 conv2d)
281 |             # (conv1d or conv2d or Linear) from 2nd layers' params will be append to list: normal_weight & normal_bias, total num 69 respectively(68 Conv2d + 1 Linear)
282 |             if isinstance(m, torch.nn.Conv2d) or isinstance(m, torch.nn.Conv1d):
283 |                 ps = list(m.parameters())
284 |                 conv_cnt += 1
285 |                 if conv_cnt == 1:
286 |                     first_conv_weight.append(ps[0])
287 |                     if len(ps) == 2:
288 |                         first_conv_bias.append(ps[1])
289 |                 else:
290 |                     normal_weight.append(ps[0])
291 |                     if len(ps) == 2:
292 |                         normal_bias.append(ps[1])
293 |             elif isinstance(m, torch.nn.Conv3d):
294 |                 ps = list(m.parameters())
295 |                 last_conv_weight.append(ps[0])
296 |                 if len(ps) == 2:
297 |                     last_conv_bias.append(ps[1])
298 |             elif isinstance(m, torch.nn.Linear):
299 |                 ps = list(m.parameters())
300 |                 normal_weight.append(ps[0])
301 |                 if len(ps) == 2:
302 |                     normal_bias.append(ps[1])
303 |             # (BatchNorm1d or BatchNorm2d) params will be append to list: bn, total num 2 (enabled pbn, so only: 1st BN layer's weight + 1st BN layer's bias)
304 |             elif isinstance(m, torch.nn.BatchNorm1d):
305 |                 bn.extend(list(m.parameters()))
306 |             elif isinstance(m, torch.nn.BatchNorm2d):
307 |                 bn_cnt += 1
308 |                 # later BN's are frozen
309 |                 if not self._enable_pbn or bn_cnt == 1:
310 |                     bn.extend(list(m.parameters()))
311 |             elif isinstance(m, torch.nn.BatchNorm3d):
312 |                 bn_cnt += 1
313 |                 # 4
314 |                 # later BN's are frozen
315 |                 if not self._enable_pbn or bn_cnt == 1:
316 |                     bn.extend(list(m.parameters()))
317 |             elif len(m._modules) == 0:
318 |                 if len(list(m.parameters())) > 0:
319 |                     raise ValueError("New atomic module type: {}. Need to give it a learning policy".format(type(m)))
320 |         return [
321 |             {'params': first_conv_weight, 'lr_mult': 5 if self.modality == 'Flow' else 1, 'decay_mult': 1,
322 |              'name': "first_conv_weight"},
323 |             {'params': first_conv_bias, 'lr_mult': 10 if self.modality == 'Flow' else 2, 'decay_mult': 0,
324 |              'name': "first_conv_bias"},
325 |             {'params': normal_weight, 'lr_mult': 1, 'decay_mult': 1,
326 |              'name': "normal_weight"},
327 |             {'params': normal_bias, 'lr_mult': 2, 'decay_mult': 0,
328 |              'name': "normal_bias"},
329 |              {'params': last_conv_weight, 'lr_mult': 5, 'decay_mult': 1,
330 |              'name': "last_conv_weight"},
331 |             {'params': last_conv_bias, 'lr_mult': 10, 'decay_mult': 0,
332 |              'name': "last_conv_bias"},
333 |             {'params': bn, 'lr_mult': 1, 'decay_mult': 0,
334 |              'name': "BN scale/shift"},
335 |         ]
336 | 
337 |     def forward(self, input):
338 |         sample_len = (3 if self.modality == "RGB" else 2) * self.new_length
339 | 
340 |         if self.modality == 'RGBDiff':
341 |             sample_len = 3 * self.new_length
342 |             input = self._get_diff(input)
343 | 
344 |         # input.size(): [32, 9, 224, 224]
345 |         # after view() func: [96, 3, 224, 224]
346 |         # print(input.view((-1, sample_len) + input.size()[-2:]).size())
347 |         if self.base_model_name == "C3DRes18":
348 |             before_permute = input.view((-1, sample_len) + input.size()[-2:])
349 |             input_var = torch.transpose(before_permute.view((-1, self.num_segments) + before_permute.size()[1:]), 1, 2)
350 |         else:
351 |             input_var = input.view((-1, sample_len) + input.size()[-2:])
352 | 
353 |         base_out = self.base_model(input_var)
354 | 
355 |         # zc comments
356 |         if self.dropout > 0:
357 |             # print('kkkkkkk')
358 |             base_out = self.new_fc(base_out)
359 | 
360 | 
361 |         if not self.before_softmax:
362 |             base_out = self.softmax(base_out)
363 | 
364 |         # zc comments end
365 |         # print(base_out)
366 |         if self.reshape:
367 |           
368 |             if self.base_model_name == 'C3DRes18':
369 |                 output = base_out
370 |                 # print(output.size())
371 |                 # output = self.consensus(base_out)
372 |                 return output
373 |             elif self.base_model_name == 'ECO':
374 |                 output = base_out
375 |                 # print(output)
376 |                 # output = self.consensus(base_out)
377 |                 return output
378 |             elif self.base_model_name == 'ECOfull':
379 |                 output = base_out
380 |                 # output = self.consensus(base_out)
381 |                 return output
382 |             else:
383 |                 # base_out.size(): [32, 3, 101], [batch_size, num_segments, num_class] respectively
384 |                 base_out = base_out.view((-1, self.num_segments) + base_out.size()[1:])
385 |                 # output.size(): [32, 1, 101]
386 |                 output = self.consensus(base_out)
387 |                 # output after squeeze(1): [32, 101], forward() returns size: [batch_size, num_class]
388 |                 return output.squeeze(1)
389 | 
390 | 
391 |     def _get_diff(self, input, keep_rgb=False):
392 |         input_c = 3 if self.modality in ["RGB", "RGBDiff"] else 2
393 |         input_view = input.view((-1, self.num_segments, self.new_length + 1, input_c,) + input.size()[2:])
394 |         if keep_rgb:
395 |             new_data = input_view.clone()
396 |         else:
397 |             new_data = input_view[:, :, 1:, :, :, :].clone()
398 | 
399 |         for x in reversed(list(range(1, self.new_length + 1))):
400 |             if keep_rgb:
401 |                 new_data[:, :, x, :, :, :] = input_view[:, :, x, :, :, :] - input_view[:, :, x - 1, :, :, :]
402 |             else:
403 |                 new_data[:, :, x - 1, :, :, :] = input_view[:, :, x, :, :, :] - input_view[:, :, x - 1, :, :, :]
404 | 
405 |         return new_data
406 | 
407 | 
408 |     def _construct_flow_model(self, base_model):
409 |         # modify the convolution layers
410 |         # Torch models are usually defined in a hierarchical way.
411 |         # nn.modules.children() return all sub modules in a DFS manner
412 |         modules = list(self.base_model.modules())
413 |         first_conv_idx = list(filter(lambda x: isinstance(modules[x], nn.Conv2d), list(range(len(modules)))))[0]
414 |         conv_layer = modules[first_conv_idx]
415 |         container = modules[first_conv_idx - 1]
416 | 
417 |         # modify parameters, assume the first blob contains the convolution kernels
418 |         params = [x.clone() for x in conv_layer.parameters()]
419 |         kernel_size = params[0].size()
420 |         new_kernel_size = kernel_size[:1] + (2 * self.new_length, ) + kernel_size[2:]
421 |         new_kernels = params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()
422 | 
423 |         new_conv = nn.Conv2d(2 * self.new_length, conv_layer.out_channels,
424 |                              conv_layer.kernel_size, conv_layer.stride, conv_layer.padding,
425 |                              bias=True if len(params) == 2 else False)
426 |         new_conv.weight.data = new_kernels
427 |         if len(params) == 2:
428 |             new_conv.bias.data = params[1].data # add bias if neccessary
429 |         layer_name = list(container.state_dict().keys())[0][:-7] # remove .weight suffix to get the layer name
430 | 
431 |         # replace the first convlution layer
432 |         setattr(container, layer_name, new_conv)
433 |         return base_model
434 | 
435 |     def _construct_diff_model(self, base_model, keep_rgb=False):
436 |         # modify the convolution layers
437 |         # Torch models are usually defined in a hierarchical way.
438 |         # nn.modules.children() return all sub modules in a DFS manner
439 |         modules = list(self.base_model.modules())
440 |         first_conv_idx = filter(lambda x: isinstance(modules[x], nn.Conv2d), list(range(len(modules))))[0]
441 |         conv_layer = modules[first_conv_idx]
442 |         container = modules[first_conv_idx - 1]
443 | 
444 |         # modify parameters, assume the first blob contains the convolution kernels
445 |         params = [x.clone() for x in conv_layer.parameters()]
446 |         kernel_size = params[0].size()
447 |         if not keep_rgb:
448 |             new_kernel_size = kernel_size[:1] + (3 * self.new_length,) + kernel_size[2:]
449 |             new_kernels = params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()
450 |         else:
451 |             new_kernel_size = kernel_size[:1] + (3 * self.new_length,) + kernel_size[2:]
452 |             new_kernels = torch.cat((params[0].data, params[0].data.mean(dim=1, keepdim=True).expand(new_kernel_size).contiguous()),
453 |                                     1)
454 |             new_kernel_size = kernel_size[:1] + (3 + 3 * self.new_length,) + kernel_size[2:]
455 | 
456 |         new_conv = nn.Conv2d(new_kernel_size[1], conv_layer.out_channels,
457 |                              conv_layer.kernel_size, conv_layer.stride, conv_layer.padding,
458 |                              bias=True if len(params) == 2 else False)
459 |         new_conv.weight.data = new_kernels
460 |         if len(params) == 2:
461 |             new_conv.bias.data = params[1].data  # add bias if neccessary
462 |         layer_name = list(container.state_dict().keys())[0][:-7]  # remove .weight suffix to get the layer name
463 | 
464 |         # replace the first convolution layer
465 |         setattr(container, layer_name, new_conv)
466 |         return base_model
467 | 
468 |     @property
469 |     def crop_size(self):
470 |         return self.input_size
471 | 
472 |     @property
473 |     def scale_size(self):
474 |         return self.input_size * 256 // 224
475 | 
476 |     def get_augmentation(self):
477 |         if self.modality == 'RGB':
478 |             return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75, .66]),
479 |                                                    GroupRandomHorizontalFlip(is_flow=False)])
480 |         elif self.modality == 'Flow':
481 |             return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75]),
482 |                                                    GroupRandomHorizontalFlip(is_flow=True)])
483 |         elif self.modality == 'RGBDiff':
484 |             return torchvision.transforms.Compose([GroupMultiScaleCrop(self.input_size, [1, .875, .75]),
485 |                                                    GroupRandomHorizontalFlip(is_flow=False)])
486 | 


--------------------------------------------------------------------------------
/ECO-pytorch/log/202104141348.log:
--------------------------------------------------------------------------------
  1 | 2021-04-14 13:48:22,419 - main.py[line:54] - INFO: ------------------------------------
  2 | 2021-04-14 13:48:22,419 - main.py[line:55] - INFO: Environment Versions:
  3 | 2021-04-14 13:48:22,419 - main.py[line:56] - INFO: - Python: 3.5.2 (default, Jan 26 2021, 13:30:48) 
  4 | [GCC 5.4.0 20160609]
  5 | 2021-04-14 13:48:22,419 - main.py[line:57] - INFO: - PyTorch: 1.1.0
  6 | 2021-04-14 13:48:22,419 - main.py[line:58] - INFO: - TorchVison: 0.3.0
  7 | 2021-04-14 13:48:22,419 - main.py[line:61] - INFO: ------------------------------------
  8 | 2021-04-14 13:48:22,419 - main.py[line:62] - INFO: ECOfull Configurations:
  9 | 2021-04-14 13:48:22,419 - main.py[line:64] - INFO: - print_freq: 10
 10 | 2021-04-14 13:48:22,419 - main.py[line:64] - INFO: - net_model3D: None
 11 | 2021-04-14 13:48:22,419 - main.py[line:64] - INFO: - resume: 
 12 | 2021-04-14 13:48:22,419 - main.py[line:64] - INFO: - train_list: /media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/train_split_07.txt
 13 | 2021-04-14 13:48:22,419 - main.py[line:64] - INFO: - dataset: UCF12
 14 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - start_epoch: 0
 15 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - rgb_prefix: 
 16 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - clip_gradient: None
 17 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - net_model: None
 18 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - workers: 2
 19 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - num_segments: 16
 20 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - pretrained_parts: finetune
 21 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - arch: ECOfull
 22 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - epochs: 30
 23 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - val_list: /media/hp/085d9636-47e0-4970-96d1-5bf4b5c53b4d/u421/user/cc/dataset/UCF_cut5s_frames/test_split_03.txt
 24 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - weight_decay: 0.0005
 25 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - lr: 0.0001
 26 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - eval_freq: 5
 27 | 2021-04-14 13:48:22,420 - main.py[line:64] - INFO: - net_modelECO: ./model/ECO_Full_rgb_model_Kinetics.pth.tar
 28 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - num_saturate: 5
 29 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - net_model2D: None
 30 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - iter_size: 5
 31 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - momentum: 0.9
 32 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - dropout: 0.6
 33 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - nesterov: False
 34 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - k: 3
 35 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - gpus: None
 36 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - loss_type: nll
 37 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - batch_size: 16
 38 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - flow_prefix: 
 39 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - consensus_type: avg
 40 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - snapshot_pref: ./checkpoint/
 41 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - no_partialbn: False
 42 | 2021-04-14 13:48:22,421 - main.py[line:64] - INFO: - modality: RGB
 43 | 2021-04-14 13:48:22,422 - main.py[line:64] - INFO: - lr_steps: [10, 20]
 44 | 2021-04-14 13:48:22,422 - main.py[line:64] - INFO: - evaluate: False
 45 | 2021-04-14 13:48:22,422 - main.py[line:65] - INFO: ------------------------------------
 46 | 2021-04-14 13:48:27,250 - main.py[line:119] - INFO: DataParallel(
 47 |   (module): TSN(
 48 |     (base_model): ECOfull(
 49 |       (conv1_7x7_s2): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
 50 |       (conv1_7x7_s2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 51 |       (conv1_relu_7x7): ReLU(inplace)
 52 |       (pool1_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
 53 |       (conv2_3x3_reduce): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
 54 |       (conv2_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 55 |       (conv2_relu_3x3_reduce): ReLU(inplace)
 56 |       (conv2_3x3): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 57 |       (conv2_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 58 |       (conv2_relu_3x3): ReLU(inplace)
 59 |       (pool2_3x3_s2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
 60 |       (inception_3a_1x1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
 61 |       (inception_3a_1x1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 62 |       (inception_3a_relu_1x1): ReLU(inplace)
 63 |       (inception_3a_3x3_reduce): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
 64 |       (inception_3a_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 65 |       (inception_3a_relu_3x3_reduce): ReLU(inplace)
 66 |       (inception_3a_3x3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 67 |       (inception_3a_3x3_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 68 |       (inception_3a_relu_3x3): ReLU(inplace)
 69 |       (inception_3a_double_3x3_reduce): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
 70 |       (inception_3a_double_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 71 |       (inception_3a_relu_double_3x3_reduce): ReLU(inplace)
 72 |       (inception_3a_double_3x3_1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 73 |       (inception_3a_double_3x3_1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 74 |       (inception_3a_relu_double_3x3_1): ReLU(inplace)
 75 |       (inception_3a_double_3x3_2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 76 |       (inception_3a_double_3x3_2_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 77 |       (inception_3a_relu_double_3x3_2): ReLU(inplace)
 78 |       (inception_3a_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
 79 |       (inception_3a_pool_proj): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1))
 80 |       (inception_3a_pool_proj_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 81 |       (inception_3a_relu_pool_proj): ReLU(inplace)
 82 |       (inception_3b_1x1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
 83 |       (inception_3b_1x1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 84 |       (inception_3b_relu_1x1): ReLU(inplace)
 85 |       (inception_3b_3x3_reduce): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
 86 |       (inception_3b_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 87 |       (inception_3b_relu_3x3_reduce): ReLU(inplace)
 88 |       (inception_3b_3x3): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 89 |       (inception_3b_3x3_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 90 |       (inception_3b_relu_3x3): ReLU(inplace)
 91 |       (inception_3b_double_3x3_reduce): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
 92 |       (inception_3b_double_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 93 |       (inception_3b_relu_double_3x3_reduce): ReLU(inplace)
 94 |       (inception_3b_double_3x3_1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 95 |       (inception_3b_double_3x3_1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 96 |       (inception_3b_relu_double_3x3_1): ReLU(inplace)
 97 |       (inception_3b_double_3x3_2): Conv2d(96, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
 98 |       (inception_3b_double_3x3_2_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
 99 |       (inception_3b_relu_double_3x3_2): ReLU(inplace)
100 |       (inception_3b_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
101 |       (inception_3b_pool_proj): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1))
102 |       (inception_3b_pool_proj_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
103 |       (inception_3b_relu_pool_proj): ReLU(inplace)
104 |       (inception_3c_3x3_reduce): Conv2d(320, 128, kernel_size=(1, 1), stride=(1, 1))
105 |       (inception_3c_3x3_reduce_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
106 |       (inception_3c_relu_3x3_reduce): ReLU(inplace)
107 |       (inception_3c_3x3): Conv2d(128, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
108 |       (inception_3c_3x3_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
109 |       (inception_3c_relu_3x3): ReLU(inplace)
110 |       (inception_3c_double_3x3_reduce): Conv2d(320, 64, kernel_size=(1, 1), stride=(1, 1))
111 |       (inception_3c_double_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
112 |       (inception_3c_relu_double_3x3_reduce): ReLU(inplace)
113 |       (inception_3c_double_3x3_1): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
114 |       (inception_3c_double_3x3_1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
115 |       (inception_3c_relu_double_3x3_1): ReLU(inplace)
116 |       (res3a_2): Conv3d(96, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
117 |       (res3a_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
118 |       (res3a_relu): ReLU(inplace)
119 |       (res3b_1): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
120 |       (res3b_1_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
121 |       (res3b_1_relu): ReLU(inplace)
122 |       (res3b_2): Conv3d(128, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
123 |       (res3b_bn): BatchNorm3d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
124 |       (res3b_relu): ReLU(inplace)
125 |       (res4a_1): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
126 |       (res4a_1_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
127 |       (res4a_1_relu): ReLU(inplace)
128 |       (res4a_2): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
129 |       (res4a_down): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
130 |       (res4a_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
131 |       (res4a_relu): ReLU(inplace)
132 |       (res4b_1): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
133 |       (res4b_1_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
134 |       (res4b_1_relu): ReLU(inplace)
135 |       (res4b_2): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
136 |       (res4b_bn): BatchNorm3d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
137 |       (res4b_relu): ReLU(inplace)
138 |       (res5a_1): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
139 |       (res5a_1_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
140 |       (res5a_1_relu): ReLU(inplace)
141 |       (res5a_2): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
142 |       (res5a_down): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1))
143 |       (res5a_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
144 |       (res5a_relu): ReLU(inplace)
145 |       (res5b_1): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
146 |       (res5b_1_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
147 |       (res5b_1_relu): ReLU(inplace)
148 |       (res5b_2): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
149 |       (res5b_bn): BatchNorm3d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
150 |       (res5b_relu): ReLU(inplace)
151 |       (inception_3c_double_3x3_2): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
152 |       (inception_3c_double_3x3_2_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
153 |       (inception_3c_relu_double_3x3_2): ReLU(inplace)
154 |       (inception_3c_pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
155 |       (inception_4a_1x1): Conv2d(576, 224, kernel_size=(1, 1), stride=(1, 1))
156 |       (inception_4a_1x1_bn): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
157 |       (inception_4a_relu_1x1): ReLU(inplace)
158 |       (inception_4a_3x3_reduce): Conv2d(576, 64, kernel_size=(1, 1), stride=(1, 1))
159 |       (inception_4a_3x3_reduce_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
160 |       (inception_4a_relu_3x3_reduce): ReLU(inplace)
161 |       (inception_4a_3x3): Conv2d(64, 96, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
162 |       (inception_4a_3x3_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
163 |       (inception_4a_relu_3x3): ReLU(inplace)
164 |       (inception_4a_double_3x3_reduce): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1))
165 |       (inception_4a_double_3x3_reduce_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
166 |       (inception_4a_relu_double_3x3_reduce): ReLU(inplace)
167 |       (inception_4a_double_3x3_1): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
168 |       (inception_4a_double_3x3_1_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
169 |       (inception_4a_relu_double_3x3_1): ReLU(inplace)
170 |       (inception_4a_double_3x3_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
171 |       (inception_4a_double_3x3_2_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
172 |       (inception_4a_relu_double_3x3_2): ReLU(inplace)
173 |       (inception_4a_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
174 |       (inception_4a_pool_proj): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1))
175 |       (inception_4a_pool_proj_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
176 |       (inception_4a_relu_pool_proj): ReLU(inplace)
177 |       (inception_4b_1x1): Conv2d(576, 192, kernel_size=(1, 1), stride=(1, 1))
178 |       (inception_4b_1x1_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
179 |       (inception_4b_relu_1x1): ReLU(inplace)
180 |       (inception_4b_3x3_reduce): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1))
181 |       (inception_4b_3x3_reduce_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
182 |       (inception_4b_relu_3x3_reduce): ReLU(inplace)
183 |       (inception_4b_3x3): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
184 |       (inception_4b_3x3_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
185 |       (inception_4b_relu_3x3): ReLU(inplace)
186 |       (inception_4b_double_3x3_reduce): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1))
187 |       (inception_4b_double_3x3_reduce_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
188 |       (inception_4b_relu_double_3x3_reduce): ReLU(inplace)
189 |       (inception_4b_double_3x3_1): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
190 |       (inception_4b_double_3x3_1_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
191 |       (inception_4b_relu_double_3x3_1): ReLU(inplace)
192 |       (inception_4b_double_3x3_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
193 |       (inception_4b_double_3x3_2_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
194 |       (inception_4b_relu_double_3x3_2): ReLU(inplace)
195 |       (inception_4b_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
196 |       (inception_4b_pool_proj): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1))
197 |       (inception_4b_pool_proj_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
198 |       (inception_4b_relu_pool_proj): ReLU(inplace)
199 |       (inception_4c_1x1): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1))
200 |       (inception_4c_1x1_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
201 |       (inception_4c_relu_1x1): ReLU(inplace)
202 |       (inception_4c_3x3_reduce): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1))
203 |       (inception_4c_3x3_reduce_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
204 |       (inception_4c_relu_3x3_reduce): ReLU(inplace)
205 |       (inception_4c_3x3): Conv2d(128, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
206 |       (inception_4c_3x3_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
207 |       (inception_4c_relu_3x3): ReLU(inplace)
208 |       (inception_4c_double_3x3_reduce): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1))
209 |       (inception_4c_double_3x3_reduce_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
210 |       (inception_4c_relu_double_3x3_reduce): ReLU(inplace)
211 |       (inception_4c_double_3x3_1): Conv2d(128, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
212 |       (inception_4c_double_3x3_1_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
213 |       (inception_4c_relu_double_3x3_1): ReLU(inplace)
214 |       (inception_4c_double_3x3_2): Conv2d(160, 160, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
215 |       (inception_4c_double_3x3_2_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
216 |       (inception_4c_relu_double_3x3_2): ReLU(inplace)
217 |       (inception_4c_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
218 |       (inception_4c_pool_proj): Conv2d(576, 128, kernel_size=(1, 1), stride=(1, 1))
219 |       (inception_4c_pool_proj_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
220 |       (inception_4c_relu_pool_proj): ReLU(inplace)
221 |       (inception_4d_1x1): Conv2d(608, 96, kernel_size=(1, 1), stride=(1, 1))
222 |       (inception_4d_1x1_bn): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
223 |       (inception_4d_relu_1x1): ReLU(inplace)
224 |       (inception_4d_3x3_reduce): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1))
225 |       (inception_4d_3x3_reduce_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
226 |       (inception_4d_relu_3x3_reduce): ReLU(inplace)
227 |       (inception_4d_3x3): Conv2d(128, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
228 |       (inception_4d_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
229 |       (inception_4d_relu_3x3): ReLU(inplace)
230 |       (inception_4d_double_3x3_reduce): Conv2d(608, 160, kernel_size=(1, 1), stride=(1, 1))
231 |       (inception_4d_double_3x3_reduce_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
232 |       (inception_4d_relu_double_3x3_reduce): ReLU(inplace)
233 |       (inception_4d_double_3x3_1): Conv2d(160, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
234 |       (inception_4d_double_3x3_1_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
235 |       (inception_4d_relu_double_3x3_1): ReLU(inplace)
236 |       (inception_4d_double_3x3_2): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
237 |       (inception_4d_double_3x3_2_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
238 |       (inception_4d_relu_double_3x3_2): ReLU(inplace)
239 |       (inception_4d_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
240 |       (inception_4d_pool_proj): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1))
241 |       (inception_4d_pool_proj_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
242 |       (inception_4d_relu_pool_proj): ReLU(inplace)
243 |       (inception_4e_3x3_reduce): Conv2d(608, 128, kernel_size=(1, 1), stride=(1, 1))
244 |       (inception_4e_3x3_reduce_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
245 |       (inception_4e_relu_3x3_reduce): ReLU(inplace)
246 |       (inception_4e_3x3): Conv2d(128, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
247 |       (inception_4e_3x3_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
248 |       (inception_4e_relu_3x3): ReLU(inplace)
249 |       (inception_4e_double_3x3_reduce): Conv2d(608, 192, kernel_size=(1, 1), stride=(1, 1))
250 |       (inception_4e_double_3x3_reduce_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
251 |       (inception_4e_relu_double_3x3_reduce): ReLU(inplace)
252 |       (inception_4e_double_3x3_1): Conv2d(192, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
253 |       (inception_4e_double_3x3_1_bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
254 |       (inception_4e_relu_double_3x3_1): ReLU(inplace)
255 |       (inception_4e_double_3x3_2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
256 |       (inception_4e_double_3x3_2_bn): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
257 |       (inception_4e_relu_double_3x3_2): ReLU(inplace)
258 |       (inception_4e_pool): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
259 |       (inception_5a_1x1): Conv2d(1056, 352, kernel_size=(1, 1), stride=(1, 1))
260 |       (inception_5a_1x1_bn): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
261 |       (inception_5a_relu_1x1): ReLU(inplace)
262 |       (inception_5a_3x3_reduce): Conv2d(1056, 192, kernel_size=(1, 1), stride=(1, 1))
263 |       (inception_5a_3x3_reduce_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
264 |       (inception_5a_relu_3x3_reduce): ReLU(inplace)
265 |       (inception_5a_3x3): Conv2d(192, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
266 |       (inception_5a_3x3_bn): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
267 |       (inception_5a_relu_3x3): ReLU(inplace)
268 |       (inception_5a_double_3x3_reduce): Conv2d(1056, 160, kernel_size=(1, 1), stride=(1, 1))
269 |       (inception_5a_double_3x3_reduce_bn): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
270 |       (inception_5a_relu_double_3x3_reduce): ReLU(inplace)
271 |       (inception_5a_double_3x3_1): Conv2d(160, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
272 |       (inception_5a_double_3x3_1_bn): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
273 |       (inception_5a_relu_double_3x3_1): ReLU(inplace)
274 |       (inception_5a_double_3x3_2): Conv2d(224, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
275 |       (inception_5a_double_3x3_2_bn): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
276 |       (inception_5a_relu_double_3x3_2): ReLU(inplace)
277 |       (inception_5a_pool): AvgPool2d(kernel_size=3, stride=1, padding=1)
278 |       (inception_5a_pool_proj): Conv2d(1056, 128, kernel_size=(1, 1), stride=(1, 1))
279 |       (inception_5a_pool_proj_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
280 |       (inception_5a_relu_pool_proj): ReLU(inplace)
281 |       (inception_5b_1x1): Conv2d(1024, 352, kernel_size=(1, 1), stride=(1, 1))
282 |       (inception_5b_1x1_bn): BatchNorm2d(352, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
283 |       (inception_5b_relu_1x1): ReLU(inplace)
284 |       (inception_5b_3x3_reduce): Conv2d(1024, 192, kernel_size=(1, 1), stride=(1, 1))
285 |       (inception_5b_3x3_reduce_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
286 |       (inception_5b_relu_3x3_reduce): ReLU(inplace)
287 |       (inception_5b_3x3): Conv2d(192, 320, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
288 |       (inception_5b_3x3_bn): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
289 |       (inception_5b_relu_3x3): ReLU(inplace)
290 |       (inception_5b_double_3x3_reduce): Conv2d(1024, 192, kernel_size=(1, 1), stride=(1, 1))
291 |       (inception_5b_double_3x3_reduce_bn): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
292 |       (inception_5b_relu_double_3x3_reduce): ReLU(inplace)
293 |       (inception_5b_double_3x3_1): Conv2d(192, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
294 |       (inception_5b_double_3x3_1_bn): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
295 |       (inception_5b_relu_double_3x3_1): ReLU(inplace)
296 |       (inception_5b_double_3x3_2): Conv2d(224, 224, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
297 |       (inception_5b_double_3x3_2_bn): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
298 |       (inception_5b_relu_double_3x3_2): ReLU(inplace)
299 |       (inception_5b_pool): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=True)
300 |       (inception_5b_pool_proj): Conv2d(1024, 128, kernel_size=(1, 1), stride=(1, 1))
301 |       (inception_5b_pool_proj_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
302 |       (inception_5b_relu_pool_proj): ReLU(inplace)
303 |       (global_pool2D_pre): AvgPool2d(kernel_size=7, stride=1, padding=0)
304 |       (global_pool2D_pre_drop): Dropout(p=0.5)
305 |       (global_pool2D_reshape_consensus): AvgPool3d(kernel_size=(16, 1, 1), stride=1, padding=0)
306 |       (global_pool3D): AvgPool3d(kernel_size=(4, 7, 7), stride=1, padding=0)
307 |       (global_pool3D_drop): Dropout(p=0.3)
308 |       (fc_final): Dropout(p=0.6)
309 |     )
310 |     (new_fc): Linear(in_features=1536, out_features=12, bias=True)
311 |     (consensus): ConsensusModule()
312 |   )
313 | )
314 | 2021-04-14 13:48:27,260 - main.py[line:120] - INFO: finetune
315 | 2021-04-14 13:48:27,261 - main.py[line:395] - INFO: ./model/ECO_Full_rgb_model_Kinetics.pth.tar
316 | 2021-04-14 13:48:27,261 - main.py[line:396] - INFO: 88888888888888888888888888888888888888888888888888888888888888888888888888888888
317 | 2021-04-14 13:48:27,410 - main.py[line:399] - INFO: => loading model-finetune: './model/ECO_Full_rgb_model_Kinetics.pth.tar'
318 | 2021-04-14 13:48:27,411 - main.py[line:405] - INFO: **************************************************
319 | 2021-04-14 13:48:27,411 - main.py[line:406] - INFO: Start finetuning ..
320 | 2021-04-14 13:48:27,411 - main.py[line:152] - INFO: ['module.new_fc.weight', 'module.new_fc.bias']
321 | 2021-04-14 13:48:27,412 - main.py[line:153] - INFO: 
322 | ------------------------------------
323 | 2021-04-14 13:48:27,412 - main.py[line:162] - INFO: module.new_fc.weight init as: xavier
324 | 2021-04-14 13:48:27,412 - main.py[line:165] - INFO: module.new_fc.bias init as: 0
325 | 2021-04-14 13:48:27,412 - main.py[line:168] - INFO: ------------------------------------
326 | 2021-04-14 13:48:27,481 - main.py[line:229] - INFO: group: first_3d_conv_weight has 1 params, lr_mult: 1, decay_mult: 1
327 | 2021-04-14 13:48:27,482 - main.py[line:229] - INFO: group: first_3d_conv_bias has 1 params, lr_mult: 2, decay_mult: 0
328 | 2021-04-14 13:48:27,482 - main.py[line:229] - INFO: group: normal_weight has 82 params, lr_mult: 1, decay_mult: 1
329 | 2021-04-14 13:48:27,482 - main.py[line:229] - INFO: group: normal_bias has 82 params, lr_mult: 2, decay_mult: 0
330 | 2021-04-14 13:48:27,482 - main.py[line:229] - INFO: group: BN scale/shift has 2 params, lr_mult: 1, decay_mult: 0
331 | 2021-04-14 13:48:47,800 - main.py[line:528] - INFO: Epoch: [0][4/500], lr: 0.0001000	Time 0.90 (4.79)	UTime 2021/04/14-13:48:46 	Data 0.00 (0.95)	Loss 4.550 (2.680)	Prec@1 12.50 (13.75)	Prec@5 56.25 (53.75)
332 | 


--------------------------------------------------------------------------------