├── .gitignore
├── 3d_net_visualization.py
├── README.md
├── action_feature_visualization.py
├── action_recognition.py
├── main.py
├── net
    ├── c3d.py
    ├── i3d.py
    ├── i3dpt_origin.py
    ├── mfnet_3d.py
    ├── model.py
    ├── mp_i3d.py
    └── r3d.py
├── output
    ├── imgs
    │   └── 79
    │   │   ├── focusmap_000.png
    │   │   ├── focusmap_001.png
    │   │   ├── focusmap_002.png
    │   │   ├── focusmap_003.png
    │   │   ├── focusmap_004.png
    │   │   ├── focusmap_005.png
    │   │   ├── focusmap_006.png
    │   │   ├── heatmap_000.png
    │   │   ├── heatmap_001.png
    │   │   ├── heatmap_002.png
    │   │   ├── heatmap_003.png
    │   │   ├── heatmap_004.png
    │   │   ├── heatmap_005.png
    │   │   └── heatmap_006.png
    ├── ucf101_test_1
    │   └── 0
    │   │   ├── focusmap
    │   │       ├── 000.png
    │   │       ├── 001.png
    │   │       ├── 002.png
    │   │       ├── 003.png
    │   │       ├── 004.png
    │   │       ├── 005.png
    │   │       ├── 006.png
    │   │       ├── 007.png
    │   │       ├── 008.png
    │   │       ├── 009.png
    │   │       ├── 010.png
    │   │       ├── 011.png
    │   │       ├── 012.png
    │   │       ├── 013.png
    │   │       ├── 014.png
    │   │       └── 015.png
    │   │   ├── heatmap
    │   │       ├── 000.png
    │   │       ├── 001.png
    │   │       ├── 002.png
    │   │       ├── 003.png
    │   │       ├── 004.png
    │   │       ├── 005.png
    │   │       ├── 006.png
    │   │       ├── 007.png
    │   │       ├── 008.png
    │   │       ├── 009.png
    │   │       ├── 010.png
    │   │       ├── 011.png
    │   │       ├── 012.png
    │   │       ├── 013.png
    │   │       ├── 014.png
    │   │       └── 015.png
    │   │   └── info.txt
    └── video
    │   ├── label_0.mp4
    │   ├── label_28.mp4
    │   └── label_471.mp4
├── process_all_hmdb51_videos.py
├── resources
    ├── HMDB_snapshot1.png
    ├── HMDB_snapshot2.png
    ├── classInd.txt
    ├── focusimg_1.png
    ├── heatmap_000.png
    ├── heatmap_000_sc.png
    ├── heatmap_003.png
    ├── heatmap_003_sc.png
    ├── heatmap_007.png
    ├── heatmap_007_sc.png
    ├── heatmap_1.png
    ├── hmdb51_classInd.txt
    ├── supervised.gif
    └── unsupervised.gif
├── scripts
    ├── c3d_unsupervised_demo.sh
    ├── demo.sh
    ├── i3d_demo.sh
    ├── i3d_mixup_demo.sh
    ├── i3d_rotate_demo.sh
    ├── i3d_unsupervised_demo.sh
    ├── mpi3d_demo.sh
    └── r3d_unsupervised_demo.sh
├── test_videos
    ├── 50_FIRST_DATES_drink_u_nm_np1_fr_goo_29.mp4
    ├── BASE_Jumping_Compilation_-_Brilliant_dive_f_cm_np1_le_bad_3.mp4
    ├── BaseballSwingAnalysis_swing_baseball_u_nm_np1_ba_med_0.mp4
    ├── Bodenturnen_im_sportunterricht_handstand_f_cm_np1_le_med_1.mp4
    ├── Bruno_Walks_up_Stairs_-_Chicago_Dog_Training_-_We_can_teach_ANYTHING_to_a_dog!!!!_climb_stairs_f_cm_np1_fr_med_0.mp4
    ├── DefensivePistolShootingTechniques_shoot_gun_f_nm_np1_fr_med_3.mp4
    ├── Documentario_Le_Parkour_Londrina_jump_f_cm_np1_ri_bad_6.mp4
    ├── v_ApplyEyeMakeup_g01_c01.avi
    └── v_HeadMassage_g02_c05.avi
├── util.py
└── utils
    ├── gen_new_video.py
    ├── gen_rotation_data.py
    └── video_cat.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Example user template template
 3 | ### Example user template
 4 | 
 5 | # IntelliJ project files
 6 | .idea
 7 | *.iml
 8 | out
 9 | gen
10 | pretrained_model
11 | /output/concat_videos/
12 | /test_videos/bk/
13 | /output/hmdb51_all_videos/
14 | /output/ucf101_test_1/
15 | /output/video/MultiMedia/
16 | /output/video/BK/
17 | /output/imgs/28/
18 | 


--------------------------------------------------------------------------------
/3d_net_visualization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-03-17 10:09
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : 3d_net_visualization.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import os
 14 | 
 15 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 16 | os.environ["CUDA_VISIBLE_DEVICES"]='3'
 17 | import os
 18 | import cv2
 19 | import torch
 20 | import argparse
 21 | import numpy as np
 22 | from scipy.ndimage import zoom
 23 | from net.mfnet_3d import MFNET_3D
 24 | from util import load_images
 25 | 
 26 | 
 27 | def parse_args():
 28 |     parser = argparse.ArgumentParser(description='mfnet-base-parser')
 29 |     parser.add_argument("--num_classes", type=int, default=101)
 30 |     parser.add_argument("--model_weights", type=str, default='pretrained_model/MFNet3D_UCF-101_Split-1_96.3.pth')
 31 |     parser.add_argument("--frame_dir", type=str, default='test_videos/ucf101_test_1')
 32 |     parser.add_argument("--label", type=int, default=0)
 33 |     parser.add_argument("--base_output_dir", type=str, default="output")
 34 |     return parser.parse_args()
 35 | args = parse_args()
 36 | 
 37 | 
 38 | def load_model():
 39 |     model_ft = MFNET_3D(args.num_classes)
 40 |     model_ft = torch.nn.DataParallel(model_ft).cuda()
 41 |     checkpoint = torch.load(args.model_weights)
 42 |     model_ft.load_state_dict(checkpoint['state_dict'])
 43 |     model_ft.cuda()
 44 |     model_ft.eval()
 45 |     return model_ft
 46 | 
 47 | 
 48 | def split_imgs():
 49 |     frame_names = os.listdir(args.frame_dir)
 50 |     frame_indices = list(np.linspace(0, len(frame_names) - 1, num=16, dtype=np.int))
 51 |     selected_frames = [frame_names[i] for i in frame_indices]
 52 | 
 53 |     RGB_vid, vid = load_images(args.frame_dir, selected_frames)
 54 |     return RGB_vid, vid
 55 | 
 56 | 
 57 | def cam_calculate(model_ft, vid):
 58 |     # get predictions, last convolution output and the weights of the prediction layer
 59 |     # i3d is two layer fc, need to modify here
 60 |     predictions, layerout = model_ft(torch.tensor(vid).cuda()) # 1x101
 61 |     layerout = torch.tensor(layerout[0].numpy().transpose(1, 2, 3, 0)) #8x7x7x768
 62 |     pred_weights = model_ft.module.classifier.weight.data.detach().cpu().numpy().transpose() # 768 x 101
 63 |     pred = torch.argmax(predictions).item()
 64 |     cam = np.zeros(dtype = np.float32, shape = layerout.shape[0:3])
 65 |     for i, w in enumerate(pred_weights[:, args.label]):
 66 |     #i = 0, w:101
 67 |         # Compute cam for every kernel
 68 |         cam += w * layerout[:, :, :, i] # 8x7x7
 69 | 
 70 |     # Resize CAM to frame level
 71 |     cam = zoom(cam, (2, 32, 32))  # output map is 8x7x7, so multiply to get to 16x224x224 (original image size)
 72 | 
 73 |     # normalize
 74 |     cam -= np.min(cam)
 75 |     cam /= np.max(cam) - np.min(cam)
 76 |     return cam, pred
 77 | 
 78 | 
 79 | def save_imgs(cam, pred, RGB_vid):
 80 |     # make dirs and filenames
 81 |     example_name = os.path.basename(args.frame_dir)
 82 |     heatmap_dir = os.path.join(args.base_output_dir, example_name, str(args.label), "heatmap")
 83 |     focusmap_dir = os.path.join(args.base_output_dir, example_name, str(args.label), "focusmap")
 84 |     for d in [heatmap_dir, focusmap_dir]:
 85 |         if not os.path.exists(d):
 86 |             os.makedirs(d)
 87 | 
 88 |     file = open(os.path.join(args.base_output_dir, example_name, str(args.label), "info.txt"), "a")
 89 |     file.write("Visualizing for class {}\n".format(args.label))
 90 |     file.write("Predicted class {}\n".format(pred))
 91 |     file.close()
 92 | 
 93 |     # produce heatmap and focusmap for every frame and activation map
 94 |     for i in range(0, cam.shape[0]):
 95 |         #   Create colourmap
 96 |         # COLORMAP_AUTUMN = 0,
 97 |         # COLORMAP_BONE = 1,
 98 |         # COLORMAP_JET = 2,
 99 |         # COLORMAP_WINTER = 3,
100 |         # COLORMAP_RAINBOW = 4,
101 |         # COLORMAP_OCEAN = 5,
102 |         # COLORMAP_SUMMER = 6,
103 |         # COLORMAP_SPRING = 7,
104 |         # COLORMAP_COOL = 8,
105 |         # COLORMAP_HSV = 9,
106 |         # COLORMAP_PINK = 10,
107 |         # COLORMAP_HOT = 11
108 | 
109 |         heatmap = cv2.applyColorMap(np.uint8(255 * cam[i]), cv2.COLORMAP_WINTER)
110 |         #   Create focus map
111 |         # focusmap = np.uint8(255 * cam[i])
112 |         # focusmap = cv2.normalize(cam[i], dst=focusmap, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_8UC1)
113 | 
114 |         # Create frame with heatmap
115 |         heatframe = heatmap // 2 + RGB_vid[0][i] // 2
116 |         cv2.imwrite(os.path.join(heatmap_dir, '{:03d}.png'.format(i)), heatframe)
117 | 
118 |         #   Create frame with focus map in the alpha channel
119 |         focusframe = RGB_vid[0][i]
120 |         focusframe = cv2.cvtColor(np.uint8(focusframe), cv2.COLOR_BGR2BGRA)
121 |         focusframe[:, :, 3] = focusframe
122 |         cv2.imwrite(os.path.join(focusmap_dir, '{:03d}.png'.format(i)), focusframe)
123 | 
124 | 
125 | def main():
126 |     global args
127 |     mfnet = load_model()
128 |     RGB_vid, vid = split_imgs()
129 |     cam, pred = cam_calculate(mfnet, vid)
130 |     save_imgs(cam, pred, RGB_vid)
131 | 
132 | 
133 | if __name__ == '__main__':
134 |     main()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 3D Net Visualization Tools (PyTorch)
  2 | 
  3 | ## Demo
  4 | 
  5 | **This project is to show which space-time region that the model focus on, 
  6 | supported supervised or unsupervised (no label available). For an input video, 
  7 | this project will show attention map in video and frames.**
  8 | 
  9 | ### saved video
 10 | 
 11 | Video can't be show here, there are some gif.
 12 | 
 13 | **supervised with label**
 14 | 
 15 | ![gif](https://github.com/FingerRec/3DNet_Visualization/raw/master/resources/supervised.gif)
 16 | 
 17 | **unsupervised (only have RGB video)**
 18 | 
 19 | ![gif_2](https://github.com/FingerRec/3DNet_Visualization/raw/master/resources/unsupervised.gif)
 20 | 
 21 | 
 22 | ### saved img
 23 | 
 24 | **heatmap**
 25 | 
 26 | ![heatmap_image](https://github.com/FingerRec/3DNet_Visualization/raw/master/resources/heatmap_1.png)
 27 | 
 28 | **focus map**
 29 | 
 30 | ![focus_image](https://github.com/FingerRec/3DNet_Visualization/raw/master/resources/focusimg_1.png)
 31 | 
 32 | ### feature map average(without label)
 33 | In some case, the real label of video/action can't access. We average all filters
 34 | and visualize the heatmap.
 35 | 
 36 | ![averaage feature map scratch](https://github.com/FingerRec/3DNet_Visualization/raw/master/resources/heatmap_000_sc.png)
 37 | ![averaage feature map supervised](https://github.com/FingerRec/3DNet_Visualization/raw/master/resources/heatmap_000.png)
 38 | 
 39 | 
 40 | 
 41 | ## Require:
 42 | - pytorch0.4
 43 | - opencv
 44 | - numpy
 45 | - skvideo
 46 | - ffmpeg
 47 | 
 48 | ## Run:
 49 | ### 1.create pretrain_model dir
 50 | ```bash
 51 | git clone https://github.com/FingerRec/3DNet_Visualization.git
 52 | cd 3DNet_Visualization
 53 | mkdir pretrained_model
 54 | ```
 55 | 
 56 | ### 2.download pretrained model and put in into the dir pretrained_model
 57 | 
 58 | #### MF-Net
 59 | download pretrained MFNet on UCF101 from [google_drive](https://goo.gl/mML2gv) and put it into directory pretrained_model,
 60 | which is from [MFNet](https://github.com/cypw/PyTorch-MFNet)
 61 | #### I3d
 62 | [google_drive](https://drive.google.com/open?id=1feHEql9XhoV2pwXb5dTs4TFuaqsa1ajX)
 63 | 
 64 | #### R3D 
 65 | 
 66 | [r3d](https://drive.google.com/file/d/1H52vT1T0sl7iWA7Up8wu1rSMFzgdwGZG/view?usp=sharing)
 67 | 
 68 | R3D pretrain model is from [3D-Resnet-Pytorch](https://github.com/kenshohara/3D-ResNets-PyTorch)
 69 | 
 70 | #### C3D
 71 | 
 72 | [C3D](https://drive.google.com/file/d/19NWziHWh1LgCcHU34geoKwYezAogv9fX/view?usp=sharing)
 73 | 
 74 | C3D pretrain model is from [C3D-Pytorch](https://github.com/jfzhang95/pytorch-video-recognition)
 75 | 
 76 | ### 3.run demo
 77 | 
 78 | pretrained I3d on HMDB51
 79 | ```bash
 80 | bash scripts/demo.sh
 81 | ```
 82 | #### c3d
 83 | ```bash
 84 | bash scripts/c3d_unsupervised_demo.sh
 85 | ```
 86 | 
 87 | #### r3d
 88 | ```bash
 89 | bash scripts/r3d_unsupervised_demo.sh
 90 | ```
 91 | 
 92 | The generate video and imgs will be put in dir output/imgs and output/video.
 93 | 
 94 | Tip: in main.py, if set clip_steps is 1, will generate a video the same length as origin.
 95 | 
 96 | ### 4.test own video
 97 | 
 98 | the details in demo.sh as follow, change --video and --label accorading to your video, please refer to resources/classInd.txt for label information for UCF101 videos.
 99 | 
100 | ```bash
101 | python main.py --num_classes 101 \
102 | --classes_list resources/classInd.txt \
103 | --model_weights pretrained_model/MFNet3D_UCF-101_Split-1_96.3.pth \
104 | --video test_videos/[your own video here] \
105 | --frames_num 16 --label 0 --clip_steps 16 \
106 | --output_dir output \
107 | --supervised unsupervised # not annotate this line if no label available
108 | 
109 | ```
110 | 
111 | **Notice unsupervised compute only add --supervised unsupervised in script;**
112 | 
113 | 
114 | Tip:UCF101/HMDB51 dataset is support now, for Kinetics et al. Just download a pretrained model and change --classes_list
115 | 
116 | ## To Do List
117 | - [X] support i3d, mpi3d
118 | - [X] support multi fc layers or full convolution networks
119 | - [X] support feature map average without label
120 | - [X] support r3d and c3d
121 | - [ ] support Slow-Fast Net
122 | - [ ] visualize filters
123 | - [ ] grad-cam
124 | 
125 | ## More information
126 | 
127 | Support your own network:
128 | 
129 | > 1. pretrained model; 2. update load_model() in main.py; 3. modify last linear layer name in generate_supervised_cam in action_recognition.py
130 | 
131 | **Notice C3D and R3D are pretrained on Sports/Kinetics, for better visualization, you may need to finetune these networks on UCF/HMDB as in [RHE](https://github.com/FingerRec/RHE)**
132 | 
133 | 
134 | ## Acknowledgment
135 | This project is highly based on [SaliencyTubes](https://github.com/alexandrosstergiou/Saliency-Tubes-Visual-Explanations-for-Spatio-Temporal-Convolutions) 
136 | , [MF-Net](https://github.com/cypw/PyTorch-MFNet) and [st-gcn](https://github.com/yysijie/st-gcn).


--------------------------------------------------------------------------------
/action_feature_visualization.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-03-16 22:47
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : action_feature_visualization.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | import cv2
 16 | from util import *
 17 | import numpy as np
 18 | 
 19 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 20 | #i want to do a show video such as openpose, show detection video and label real time, plot the weights of the prediction layer
 21 | #plot heat map as well
 22 | #this work may be finished in 2-3 days.
 23 | 
 24 | 
 25 | class Visualization(object):
 26 |     def __init__(self):
 27 |         return
 28 | 
 29 |     def gen_heatmap(self, cam, frame):
 30 |         """
 31 |         geneate headmap and focus map from images
 32 |         :return:
 33 |         """
 34 |         # produce heatmap and focusmap for every frame and activation map
 35 |         # cam:16x224x224x3 frame:1x3x16x224x224
 36 |         #   Create colourmap
 37 |         # COLORMAP_AUTUMN = 0,
 38 |         # COLORMAP_BONE = 1,
 39 |         # COLORMAP_JET = 2,
 40 |         # COLORMAP_WINTER = 3,
 41 |         # COLORMAP_RAINBOW = 4,
 42 |         # COLORMAP_OCEAN = 5,
 43 |         # COLORMAP_SUMMER = 6,
 44 |         # COLORMAP_SPRING = 7,
 45 |         # COLORMAP_COOL = 8,
 46 |         # COLORMAP_HSV = 9,
 47 |         # COLORMAP_PINK = 10,
 48 |         # COLORMAP_HOT = 11
 49 |         for i in range(cam.shape[0]):
 50 |             #   Create colourmap
 51 |             heatmap = cv2.applyColorMap(np.uint8(255 * cam[i]), cv2.COLORMAP_JET) # for COLORMAP 5/8
 52 |             #  heatmap = cv2.applyColorMap(np.uint8(255 * cam[i]), cv2.COLORMAP_COOL)
 53 |             #   Create focus map
 54 |             focusmap = np.uint8(255 * cam[i])
 55 |             focusmap = cv2.normalize(cam[i], dst=focusmap, alpha=0, beta=255, norm_type=cv2.NORM_MINMAX,
 56 |                                      dtype=cv2.CV_8UC1)
 57 |             # Create frame with heatmap
 58 |             heatframe = heatmap // 2 + frame[0][i] // 2
 59 |             #   Create frame with focus map in the alpha channel
 60 |             focusframe = frame[0][i]
 61 |             # focusframe = cv2.cvtColor(np.uint8(focusframe), cv2.COLOR_BGR2BGRA)
 62 |             # focusframe[:, :, 3] = focusmap
 63 |             # focusmap = cv2.blur(focusmap, (30,30))
 64 |             alpha = focusmap
 65 |             focusframe = np.dstack((focusframe, alpha))
 66 |         return heatframe, focusframe
 67 | 
 68 |     @staticmethod
 69 |     def gen_mask_img(origin_img,  heat_map, pred_top3, prob_top3, label, classes_list, text=True):
 70 |         """
 71 |         a img will be divide into four parts, origin images, activation_map, heatmap, focusmap
 72 |         and add text into them
 73 |         may be want to visulization these filters, do it later
 74 |         :return:
 75 |         """
 76 |         h, w, c = origin_img.shape
 77 |         assert h >= 224 and w  >= 224
 78 |         x1 = int(round((w - 224) / 2.))
 79 |         y1 = int(round((h - 224) / 2.))
 80 |         cropped_img = origin_img[y1:(y1 + 224), x1:(x1 + 224), :]
 81 |         #focus_crop_img = np.zeros([224, 224, 3])
 82 |         #for i in range(3):
 83 |         #    focus_crop_img = focus_map[:,:,i] * focus_map[:, :, 3]
 84 |         #focus_crop_img = cv2.cvtColor(focus_map, cv2.COLOR_RGBA2RGB)
 85 |         #focus_map = np.resize(focus_crop_img, [224,224,3])
 86 |         classes = [x.strip() for x in open(classes_list)]
 87 |         if text:
 88 |             label_name = 'real label: ' + classes[label - 1]
 89 |             put_text(cropped_img, label_name, (0.1, 0.5))
 90 |             for i in range(3):
 91 |                 label_text = "  Top {}: label: {}".format(i+1, classes[pred_top3[i]])
 92 |                 put_text(heat_map[i], label_text, (0.1, 0.5))
 93 |                 prob_text = "prob: {}".format(str(prob_top3[i])[:7])
 94 |                 put_text(heat_map[i], prob_text, (0.2, 0.5))
 95 |         img0 = np.concatenate((cropped_img, heat_map[0]), axis=1)
 96 |         img1 = np.concatenate((heat_map[1], heat_map[2]), axis=1)
 97 |         maskimg = np.concatenate((img0, img1), axis=0)
 98 |         return maskimg
 99 | 
100 |     @staticmethod
101 |     def gen_mp_mask_img(origin_img,  heat_map, pred_top3, prob_top3, label, classes_list):
102 |         """
103 |         a img will be divide into four parts, origin images, activation_map, heatmap, focusmap
104 |         and add text into them
105 |         may be want to visulization these filters, do it later
106 |         :return:
107 |         """
108 |         h, w, c = origin_img.shape
109 |         assert h >= 224 and w  >= 224
110 |         x1 = int(round((w - 224) / 2.))
111 |         y1 = int(round((h - 224) / 2.))
112 |         cropped_img = origin_img[y1:(y1 + 224), x1:(x1 + 224), :]
113 |         classes = [x.strip() for x in open(classes_list)]
114 |         label_name = 'real label: ' + classes[label - 1]
115 |         put_text(cropped_img, label_name, (0.1, 0.5))
116 |         pred_top3 = np.array(pred_top3)
117 |         prob_top3 = np.array(prob_top3)
118 |         strs = ['s', 'm', 'l']
119 |         for i in range(3):
120 |             label_text = "  Path {}: label: {}".format(strs[i], classes[pred_top3[i][0]])
121 |             put_text(heat_map[i], label_text, (0.1, 0.5))
122 |             prob_text = "prob: {}".format(str(prob_top3[i][0])[:7])
123 |             put_text(heat_map[i], prob_text, (0.2, 0.5))
124 |         img0 = np.concatenate((cropped_img, heat_map[0]), axis=1)
125 |         img1 = np.concatenate((heat_map[1], heat_map[2]), axis=1)
126 |         maskimg = np.concatenate((img0, img1), axis=1)
127 |         return maskimg
128 | 


--------------------------------------------------------------------------------
/action_recognition.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-03-17 13:00
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : action_recognition.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | import torch
 16 | import numpy as np
 17 | import cv2
 18 | from util import center_crop
 19 | from scipy.ndimage import zoom
 20 | 
 21 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 22 | 
 23 | 
 24 | class ActionRecognition(object):
 25 |     def __init__(self, args, model):
 26 |         self.model = model
 27 |         self.args = args
 28 | 
 29 |     def img_process(self, imgs, frames_num):
 30 |         images = np.zeros((frames_num, 224, 224, 3))
 31 |         orig_imgs = np.zeros_like(images)
 32 |         for i in range(frames_num):
 33 |             next_image = imgs[i]
 34 |             next_image = np.uint8(next_image)
 35 |             scaled_img = cv2.resize(next_image, (256, 256), interpolation=cv2.INTER_LINEAR)  # resize to 256x256
 36 |             cropped_img = center_crop(scaled_img)  # center crop 224x224
 37 |             final_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)
 38 |             images[i] = final_img
 39 |             orig_imgs[i] = cropped_img
 40 |         torch_imgs = torch.from_numpy(images.transpose(3, 0, 1, 2))
 41 |         torch_imgs = torch_imgs.float() / 255.0
 42 |         mean_3d = [124 / 255, 117 / 255, 104 / 255]
 43 |         std_3d = [0.229, 0.224, 0.225]
 44 |         for t, m, s in zip(torch_imgs, mean_3d, std_3d):
 45 |             t.sub_(m).div_(s)
 46 |         return np.expand_dims(orig_imgs, 0), torch_imgs.unsqueeze(0)
 47 | 
 48 |     def recognition_video(self, imgs):
 49 |         """
 50 |         recognition video's action
 51 |         :param imgs: preprocess imgs
 52 |         :return:
 53 |         """
 54 |         prediction, _ = self.model(torch.tensor(imgs).cuda())  # 1x101
 55 |         pred = torch.argmax(prediction).item()
 56 |         return pred
 57 | 
 58 |     def generate_supervised_cam(self, imgs):
 59 |         predictions, layerout = self.model(torch.tensor(imgs).cuda())  # 1x101
 60 |         layerout = torch.tensor(layerout[0].numpy().transpose(1, 2, 3, 0))  # 8x7x7x768
 61 |         if self.args.arch == "i3d" or "mf_net":
 62 |             pred_weights = self.model.module.classifier.weight.data.detach().cpu().numpy().transpose()  # 768 x 101
 63 |         elif self.args.arch == "r3d":
 64 |             pred_weights = self.model.module.fc.weight.data.detach().cpu().numpy().transpose()  # 2048 x 101
 65 |         elif self.args.arch == "c3d":
 66 |             pred_weights = self.model.module.linear.weight.data.detach().cpu().numpy().transpose()  # 512 x 101
 67 |         predictions = torch.nn.Softmax(dim=1)(predictions)
 68 |         pred_top3 = predictions.detach().cpu().numpy().argsort()[0][::-1][:3]
 69 |         probality_top3 = -np.sort(-predictions.detach().cpu().numpy())[0,0:3]
 70 |         #print(pred_top3)
 71 |         #pred_top3 = torch.argmax(predictions).item()
 72 |         cam_list = list()
 73 |         for k in range(len(pred_top3)):
 74 |             cam = np.zeros(dtype=np.float32, shape=layerout.shape[0:3])
 75 |             for i, w in enumerate(pred_weights[:, pred_top3[k]]):
 76 |                 # Compute cam for every kernel
 77 |                 cam += w * layerout[:, :, :, i]  # 8x7x7
 78 |             # Resize CAM to frame level
 79 |             cam = zoom(cam, (16//layerout.size(0), 224//layerout.size(1), 224//layerout.size(2)))
 80 |             # output map is 8x7x7, so multiply to get to 16x224x224 (original image size)
 81 | 
 82 |             # normalize
 83 |             cam -= np.min(cam)
 84 |             cam /= np.max(cam) - np.min(cam)
 85 |             cam_list.append(cam)
 86 |         return cam_list, pred_top3, probality_top3
 87 | 
 88 |     def generate_unsupervised_cam(self, imgs):
 89 |         """
 90 | 
 91 |         :param imgs:
 92 |         :return:
 93 |         """
 94 |         _, layerout = self.model(torch.tensor(imgs).cuda())  # 1x101
 95 |         layerout = torch.tensor(layerout[0].detach().cpu().numpy().transpose(1, 2, 3, 0))  # 8x7x7x1024
 96 |         cam_list = list()
 97 |         cam = np.zeros(dtype=np.float32, shape=layerout.shape[0:3])
 98 |         # print(cam.shape)
 99 |         for i in range(layerout.size(3)):
100 |             cam += layerout[:, :, :, i].cpu().numpy()  # 8x7x7
101 |         cam = zoom(cam, (16//layerout.size(0), 224//layerout.size(1), 224//layerout.size(2)), mode='wrap')
102 |         # output map is 8x7x7, so multiply to get to 16x224x224 (original video size)
103 | 
104 |         # normalize
105 |         cam -= np.min(cam)
106 |         cam /= np.max(cam) - np.min(cam)
107 |         cam_list.append(cam)
108 |         cam_list.append(cam)
109 |         cam_list.append(cam)
110 |         return cam_list
111 | 
112 | '''
113 |     def generate_mp_cam(self, imgs):
114 |         """
115 |         mpi3d has three part, for each part we record the grad-cam
116 |         :param imgs:
117 |         :return:
118 |         """
119 |         predictions, layerout_s, layerout_m, layerout_l, predictions_s, predictions_m, predictions_l = self.model(torch.tensor(imgs).cuda())  # 1x101
120 |         layerout_s = torch.tensor(layerout_s[0].detach().cpu().numpy().transpose(1, 2, 3, 0))  # 32 x 28 x 28 x 480
121 |         layerout_m = torch.tensor(layerout_m[0].detach().cpu().numpy().transpose(1, 2, 3, 0))  # 16x14x14x832
122 |         layerout_l = torch.tensor(layerout_l[0].detach().cpu().numpy().transpose(1, 2, 3, 0))  # 8x7x7x1024
123 |         pred_weights_s = self.model.module.s_depend.local_range_depen.conv3d.weight.data.detach().cpu().numpy().transpose()  # 480 x 51
124 |         pred_weights_s = np.reshape(pred_weights_s, (51, 51)) # may be need do squeeze rather than reshape
125 |         pred_weights_m = self.model.module.m_depend.local_range_depen.conv3d.weight.data.detach().cpu().numpy().transpose() # 832 x 51
126 |         pred_weights_m = np.reshape(pred_weights_m, (51, 51))
127 |         pred_weights_l = self.model.module.l_depend.local_range_depen.conv3d.weight.data.detach().cpu().numpy().transpose() # 1024 x 51
128 |         pred_weights_l = np.reshape(pred_weights_l, (51, 51))
129 |         predictions = torch.nn.Softmax(dim=1)(predictions)
130 |         pred_top3 = predictions.detach().cpu().numpy().argsort()[0][::-1][:3]
131 |         probality_top3 = -np.sort(-predictions.detach().cpu().numpy())[0,0:3]
132 |         predictions_s = torch.nn.Softmax(dim=1)(predictions_s)
133 |         predictions_m = torch.nn.Softmax(dim=1)(predictions_m)
134 |         predictions_l = torch.nn.Softmax(dim=1)(predictions_l)
135 |         three_pred = [predictions_s.detach().cpu().numpy().argsort()[0][::-1][:1],predictions_m.detach().cpu().numpy().argsort()[0][::-1][:1],predictions_l.detach().cpu().numpy().argsort()[0][::-1][:1]]
136 |         three_prob = [-np.sort(-predictions_s.detach().cpu().numpy())[0,0:1],-np.sort(-predictions_m.detach().cpu().numpy())[0,0:1],-np.sort(-predictions_l.detach().cpu().numpy())[0,0:1]]
137 |         layerout = [layerout_s, layerout_m, layerout_l]
138 |         pred_weights = [pred_weights_s, pred_weights_m, pred_weights_l]
139 |         #print(pred_top3)
140 |         #pred_top3 = torch.argmax(predictions).item()
141 |         cam_list = list()
142 |         for k in range(3):
143 |             cam = np.zeros(dtype=np.float32, shape=layerout[k].shape[0:3])
144 |             cam = zoom(cam, (pow(2, k + 1), 224, 224))
145 |             for i, w in enumerate(pred_weights[k][:, pred_top3[0]]):
146 |                 print(i)
147 |                 # Compute cam for every kernel
148 |                 cam += zoom(w * layerout[k][:, :, :, i], (pow(2, k + 1), 224, 224))
149 |                 #cam += w * layerout[k][:, :, :, i]  # 8x7x7
150 |             # Resize CAM to frame level
151 |             #cam = zoom(cam, (pow(2,k+1), pow(2,3+k), pow(2,3+k)))  # output map is 8x7x7, so multiply to get to 64x224x224 (original image size)
152 |             #cam = zoom(cam, (pow(2, k + 1), 224, 224))
153 |             # normalize
154 |             cam -= np.min(cam)
155 |             cam /= np.max(cam) - np.min(cam)
156 |             cam_list.append(cam)
157 |         #return cam_list, pred_top3, probality_top3
158 |         return cam_list, three_pred, three_prob
159 |     '''


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-03-17 12:55
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : main.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import argparse
 14 | from net.mfnet_3d import MFNET_3D
 15 | from net.mp_i3d import MultiPathI3d
 16 | from net.i3dpt_origin import I3D, weights_init
 17 | from net.c3d import C3D
 18 | from net.r3d import resnet50
 19 | from action_recognition import ActionRecognition
 20 | from util import *
 21 | from action_feature_visualization import Visualization
 22 | import math
 23 | import datetime
 24 | 
 25 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 26 | os.environ["CUDA_VISIBLE_DEVICES"] = '2'
 27 | date_time = datetime.datetime.today().strftime('%m-%d-%H%M')
 28 | 
 29 | 
 30 | def parse_args():
 31 |     parser = argparse.ArgumentParser(description='mfnet-base-parser')
 32 |     parser.add_argument("--num_classes", type=int, default=101)
 33 |     parser.add_argument("--classes_list", type=str, default='resources/classInd.txt')
 34 |     parser.add_argument("--arch", type=str, default='mf_net', choices=['s3d', 'i3d', 'mf_net', 'c3d', 'mpi3d', 'r3d'])
 35 |     parser.add_argument("--supervised", type=str, default='fully_supervised',
 36 |                         choices=['fully_supervised', 'unsupervised'])
 37 |     parser.add_argument("--model_weights", type=str, default='pretrained_model/MFNet3D_UCF-101_Split-1_96.3.pth')
 38 |     parser.add_argument("--video", type=str, default='test_videos/v_Shotput_g05_c02.avi')
 39 |     parser.add_argument("--frames_num", type=int, default=16, help = "the frames num for the network input")
 40 |     parser.add_argument("--label", type=int, default=79)
 41 |     parser.add_argument("--clip_steps", type=int, default=16)
 42 |     parser.add_argument("--output_dir", type=str, default="output")
 43 |     parser.add_argument("--gpus", type=str, default="1")
 44 |     return parser.parse_args()
 45 | 
 46 | 
 47 | args = parse_args()
 48 | os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
 49 | 
 50 | 
 51 | def weight_transform(model_dict, pretrain_dict, supervised=True):
 52 |     '''
 53 | 
 54 |     :return:
 55 |     '''
 56 |     for k, _ in pretrain_dict.items():
 57 |         print("pretrain: {}".format(k))
 58 |     if supervised:
 59 |         weight_dict = {k:v for k, v in pretrain_dict.items() if k in model_dict}
 60 |     else:
 61 |         weight_dict = {k:v for k, v in pretrain_dict.items() if k in model_dict and 'fc' not in k and 'classifier' not in k}
 62 |     for k, _ in weight_dict.items():
 63 |         print("have load: {}".format(k))
 64 |     model_dict.update(weight_dict)
 65 |     return model_dict
 66 | 
 67 | def c3d_weight_transform(model_dict, pretrain_dict, supervised=True):
 68 |     corresp_name = {
 69 |         # Conv1
 70 |         "features.0.weight": "conv1.weight",
 71 |         "features.0.bias": "conv1.bias",
 72 |         # Conv2
 73 |         "features.3.weight": "conv2.weight",
 74 |         "features.3.bias": "conv2.bias",
 75 |         # Conv3a
 76 |         "features.6.weight": "conv3a.weight",
 77 |         "features.6.bias": "conv3a.bias",
 78 |         # Conv3b
 79 |         "features.8.weight": "conv3b.weight",
 80 |         "features.8.bias": "conv3b.bias",
 81 |         # Conv4a
 82 |         "features.11.weight": "conv4a.weight",
 83 |         "features.11.bias": "conv4a.bias",
 84 |         # Conv4b
 85 |         "features.13.weight": "conv4b.weight",
 86 |         "features.13.bias": "conv4b.bias",
 87 |         # Conv5a
 88 |         "features.16.weight": "conv5a.weight",
 89 |         "features.16.bias": "conv5a.bias",
 90 |         # Conv5b
 91 |         "features.18.weight": "conv5b.weight",
 92 |         "features.18.bias": "conv5b.bias",
 93 |         # fc6
 94 |         "classifier.0.weight": "fc6.weight",
 95 |         "classifier.0.bias": "fc6.bias",
 96 |         # fc7
 97 |         "classifier.3.weight": "fc7.weight",
 98 |         "classifier.3.bias": "fc7.bias",
 99 |     }
100 | 
101 |     p_dict = pretrain_dict
102 |     s_dict = model_dict
103 |     for name in p_dict:
104 |         if name not in corresp_name:
105 |             continue
106 |         if 'classifier' in name:
107 |             continue
108 |         s_dict[corresp_name[name]] = p_dict[name]
109 |         print("have load: {}".format(corresp_name[name]))
110 |     return s_dict
111 | 
112 | 
113 | def load_model():
114 |     if args.arch == 'mf_net':
115 |         model_ft = MFNET_3D(args.num_classes)
116 |     elif args.arch == 'mpi3d':
117 |         model_ft = MultiPathI3d(args.num_classes, in_channels=3, dropout_prob=0)
118 |     elif args.arch == 'i3d':
119 |         model_ft = I3D(args.num_classes, modality='rgb', dropout_prob=0)
120 |     elif args.arch == 'r3d':
121 |         model_ft = resnet50(num_classes=args.num_classes)
122 |     elif args.arch == 'c3d':
123 |         model_ft = C3D(with_classifier=True, num_classes=args.num_classes)
124 |     else:
125 |         Exception("Not support network now!")
126 |     if args.model_weights:
127 |         checkpoint = torch.load(args.model_weights)
128 |         if args.arch in ['mpi3d', 'i3d']:
129 |             base_dict = {'.'.join(k.split('.')[1:]):v for k,v in list(checkpoint['state_dict'].items())}
130 |             #  model_ft.load_state_dict(base_dict)
131 |             model_dict = model_ft.state_dict()
132 |             model_dict = weight_transform(model_dict, base_dict)
133 |             model_ft.load_state_dict(model_dict)
134 |         else:
135 |             if args.supervised == 'unsupervised':
136 |                 if args.arch == 'c3d':
137 |                     model_dict = model_ft.state_dict()
138 |                     model_dict = c3d_weight_transform(model_dict, checkpoint, supervised=False)
139 |                     model_ft.load_state_dict(model_dict)
140 |                 else:
141 |                     base_dict = {k : v for k, v in list(checkpoint['state_dict'].items())}
142 |                     model_dict = model_ft.state_dict()
143 |                     model_dict = weight_transform(model_dict, base_dict, supervised=False)
144 |                     model_ft.load_state_dict(model_dict)
145 |             else:
146 |                 model_ft.load_state_dict(checkpoint['state_dict'])
147 |     else:
148 |         # print("????")
149 |         weights_init(model_ft)
150 |     model_ft.cuda()
151 |     model_ft = torch.nn.DataParallel(model_ft).cuda()
152 |     model_ft.eval()
153 |     return model_ft
154 | 
155 | 
156 | def decode_on_the_fly(self):
157 |     """
158 |     there incule two way to implement decode on the fly
159 |     we need to consider the video at begin and at end
160 |     :return:
161 |     """
162 | 
163 | 
164 | def heat_map_api(video, frames_num, clip_steps, output_dir, label, classes_list):
165 |     args.arch = 'i3d'
166 |     args.num_classes = 51
167 |     args.gpus = 1
168 |     # args.supervised = 'self_supervised'
169 |     # args.model_weights = 'pretrained_model/77.254_mpi3d_rgb_model_best.pth.tar'
170 |     # args.model_weights = 'pretrained_model/hmdb51_rgb_gl_randomrotation_3flip_mixup_way2_1loss_stride_1_12_26_checkpoint_37.77.pth.tar'
171 |     # args.model_weights = 'pretrained_model/25.294_i3dpt_rgb_model_best.pth.tar'
172 |     # args.model_weights = 'pretrained_model/36.209_i3dpt_rgb_model_best.pth.tar'
173 |     # args.classes_list = 'resources/hmdb51_classInd.txt'
174 |     # args.model_weights = ""
175 |     reg_net = ActionRecognition(args, load_model())
176 |     visulaize = Visualization()
177 | 
178 |     length, width, height = video_frame_count(video)
179 |     if length < frames_num:
180 |         print(
181 |             "the video's frame num is {}, shorter than {}, will loop the video.".format(length, frames_num))
182 |     cap = cv2.VideoCapture(video)
183 |     # q = queue.Queue(self.frames_num)
184 |     frames = list()
185 |     count = 0
186 |     while count < length:
187 |         ret, frame = cap.read()
188 |         if type(frame) == type(None):
189 |             break
190 |         else:
191 |             frames.append(frame)
192 |     # if video shorter than frames_num, repeat last frame
193 |     index = 0
194 |     while len(frames) < frames_num:
195 |         frames.append(frames[index])
196 |         index += 1
197 |         length += 1
198 |     mask_imgs = list()
199 |     focus_imgs = list()
200 |     count = 0
201 |     for i in range(math.ceil((length - frames_num) // clip_steps)+1):
202 |         if 0 < length - frames_num - clip_steps*i:
203 |             reg_imgs = frames[i * clip_steps:i * clip_steps + frames_num]
204 |         else:
205 |             if length > frames_num + 1:
206 |                 reg_imgs = frames[length - 1 - frames_num: -1]
207 |             else:
208 |                 reg_imgs = frames
209 |                 for j in range(frames_num - length):
210 |                     reg_imgs.append(reg_imgs[j])
211 |         if len(reg_imgs) < frames_num:
212 |             print("reg_imgs is too short")
213 |             break
214 |         RGB_vid, vid = reg_net.img_process(reg_imgs, frames_num)
215 |         if args.supervised == 'unsupervised':
216 |             cam_list = reg_net.generate_unsupervised_cam(vid)
217 |         else:
218 |             cam_list, pred_top3, prob_top3 = reg_net.generate_supervised_cam(vid)
219 |         heat_maps = list()
220 |         for j in range(len(cam_list)):
221 |             heat_map, focus_map = visulaize.gen_heatmap(cam_list[j], RGB_vid)
222 |             heat_maps.append(heat_map)
223 |             focus_imgs.append(focus_map)  # BGRA space
224 |         if args.supervised == 'unsupervised':
225 |             mask_img = visulaize.gen_mask_img(RGB_vid[0][args.frames_num // 2], heat_maps, None, None,
226 |                                               args.label, args.classes_list, text=False)
227 |         else:
228 |             mask_img = visulaize.gen_mask_img(RGB_vid[0][args.frames_num // 2], heat_maps, pred_top3, prob_top3,
229 |                                               args.label, args.classes_list)
230 |         mask_imgs.append(mask_img)
231 |         print("precoss video clips: {}/{}, wait a moment".format(i + 1, int(math.ceil(length - frames_num) // clip_steps) + 1))
232 |         count += 1
233 |     #  saved_video_path = save_as_video(output_dir, mask_imgs, label)
234 |     save_as_imgs(output_dir, mask_imgs, count, label, 'heatmap_')
235 |     save_as_imgs(output_dir, focus_imgs, count, label, 'focusmap_')
236 | 
237 | 
238 | def main():
239 |     global args
240 |     reg_net = ActionRecognition(args, load_model())
241 |     visulaize = Visualization()
242 | 
243 |     length, width, height = video_frame_count(args.video)
244 |     if length < args.frames_num:
245 |         print("the video's frame num is {}, shorter than {}, will repeat the last frame".format(length, args.frames_num))
246 |     cap = cv2.VideoCapture(args.video)
247 |     #  q = queue.Queue(self.frames_num)
248 |     frames = list()
249 |     count = 0
250 |     while count < length:
251 |         ret, frame = cap.read()
252 |         if type(frame) == type(None):
253 |             break
254 |         else:
255 |             frames.append(frame)
256 |     #  if video shorter than frames_num, repeat last frame
257 |     while len(frames) < args.frames_num:
258 |         frames.append(frames[length - 1])
259 |     mask_imgs = list()
260 |     focus_imgs = list()
261 |     count = 0
262 |     for i in range(int(length/args.clip_steps) -1):
263 |         if i < length - args.frames_num:
264 |             reg_imgs = frames[i*args.clip_steps:i*args.clip_steps + args.frames_num]
265 |         else:
266 |             reg_imgs = frames[length - 1 - args.frames_num: -1]
267 |         if len(reg_imgs) < args.frames_num:
268 |             print("reg_imgs is too short")
269 |             break
270 |         RGB_vid, vid = reg_net.img_process(reg_imgs, args.frames_num)
271 |         if args.supervised == 'unsupervised':
272 |             cam_list = reg_net.generate_unsupervised_cam(vid)
273 |         else:
274 |             cam_list, pred_top3, prob_top3 = reg_net.generate_supervised_cam(vid)
275 |         heat_maps = list()
276 |         for j in range(len(cam_list)):
277 |             heat_map, focus_map = visulaize.gen_heatmap(cam_list[j], RGB_vid)
278 |             heat_maps.append(heat_map)
279 |             focus_imgs.append(focus_map)  # BGRA space
280 |         if args.supervised == 'unsupervised':
281 |             mask_img = visulaize.gen_mask_img(RGB_vid[0][args.frames_num // 2], heat_maps, None, None,
282 |                                               args.label, args.classes_list, text=False)
283 |         else:
284 |             mask_img = visulaize.gen_mask_img(RGB_vid[0][args.frames_num//2], heat_maps, pred_top3, prob_top3,
285 |                                               args.label, args.classes_list)
286 |         mask_imgs.append(mask_img)
287 |         print("precoss video clips: {}/{}, wait a moment".format(i+1, int(length/args.clip_steps)-1))
288 |         count += 1
289 |     saved_video_path = save_as_video(args.output_dir, mask_imgs, args.label)
290 |     save_as_imgs(args.output_dir, mask_imgs, count, args.label, 'heatmap_')
291 |     save_as_imgs(args.output_dir, focus_imgs, count, args.label, 'focusmap_')
292 |     #  visualization(saved_video_path)
293 | 
294 | 
295 | if __name__ == '__main__':
296 |     main()


--------------------------------------------------------------------------------
/net/c3d.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 |      # @Time    : 2020-09-17 15:59
 5 |      # @Author  : Awiny
 6 |      # @Site    :
 7 |      # @Project : amax_Action_Video_Visualization
 8 |      # @File    : c3d.py
 9 |      # @Software: PyCharm
10 |      # @Github  : https://github.com/FingerRec
11 |      # @Blog    : http://fingerrec.github.io
12 | """
13 | import scipy.io
14 | import os
15 | 
16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
17 | 
18 | """C3D"""
19 | import math
20 | from collections import OrderedDict
21 | 
22 | import torch
23 | import torch.nn as nn
24 | from torch.nn.modules.utils import _triple
25 | 
26 | 
27 | class C3D(nn.Module):
28 |     """C3D with BN and pool5 to be AdaptiveAvgPool3d(1)."""
29 | 
30 |     def __init__(self, with_classifier=True, num_classes=101):
31 |         super(C3D, self).__init__()
32 |         self.with_classifier = with_classifier
33 |         self.num_classes = num_classes
34 | 
35 |         self.conv1 = nn.Conv3d(3, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1))
36 |         self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
37 | 
38 |         self.conv2 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1))
39 |         self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
40 | 
41 |         self.conv3a = nn.Conv3d(128, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
42 |         self.conv3b = nn.Conv3d(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1))
43 |         self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
44 | 
45 |         self.conv4a = nn.Conv3d(256, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
46 |         self.conv4b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
47 |         self.pool4 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))
48 | 
49 |         self.conv5a = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
50 |         self.conv5b = nn.Conv3d(512, 512, kernel_size=(3, 3, 3), padding=(1, 1, 1))
51 |         self.pool5 = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=(0, 1, 1))
52 | 
53 |         if self.with_classifier:
54 |             self.fc6 = nn.Linear(8192, 4096)
55 |             self.fc7 = nn.Linear(4096, 4096)
56 |             self.fc8 = nn.Linear(4096, 487)
57 | 
58 |         self.dropout = nn.Dropout(p=0.5)
59 | 
60 |         self.relu = nn.ReLU()
61 |         self.softmax = nn.Softmax()
62 | 
63 |         if self.with_classifier:
64 |             self.linear = nn.Linear(512, self.num_classes)
65 | 
66 |     def forward(self, x, return_conv=False):
67 |         h = self.relu(self.conv1(x))
68 |         h = self.pool1(h)
69 | 
70 |         h = self.relu(self.conv2(h))
71 |         h = self.pool2(h)
72 | 
73 |         h = self.relu(self.conv3a(h))
74 |         h = self.relu(self.conv3b(h))
75 |         h = self.pool3(h)
76 | 
77 |         h = self.relu(self.conv4a(h))
78 |         h = self.relu(self.conv4b(h))
79 |         h = self.pool4(h)
80 | 
81 |         h = self.relu(self.conv5a(h))
82 |         h = self.relu(self.conv5b(h))
83 |         h = self.pool5(h)
84 |         feature = h
85 |         if self.with_classifier:
86 |             h = h.view(-1, 8192)
87 |             h = self.relu(self.fc6(h))
88 |             h = self.dropout(h)
89 |             h = self.relu(self.fc7(h))
90 |             h = self.dropout(h)
91 |             logits = self.fc8(h)
92 |             probs = self.softmax(logits)
93 |             return probs, feature
94 |         else:
95 |             return feature
96 | 
97 | 
98 | if __name__ == '__main__':
99 |     c3d = C3D()


--------------------------------------------------------------------------------
/net/i3d.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-03-19 10:44
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : i3d.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | 
 16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 17 | import torch
 18 | import torch.nn as nn
 19 | import torch.nn.functional as F
 20 | from torch.autograd import Variable
 21 | 
 22 | import numpy as np
 23 | 
 24 | import os
 25 | import sys
 26 | from collections import OrderedDict
 27 | 
 28 | 
 29 | class MaxPool3dSamePadding(nn.MaxPool3d):
 30 | 
 31 |     def compute_pad(self, dim, s):
 32 |         if s % self.stride[dim] == 0:
 33 |             return max(self.kernel_size[dim] - self.stride[dim], 0)
 34 |         else:
 35 |             return max(self.kernel_size[dim] - (s % self.stride[dim]), 0)
 36 | 
 37 |     def forward(self, x):
 38 |         # compute 'same' padding
 39 |         (batch, channel, t, h, w) = x.size()
 40 |         # print t,h,w
 41 |         out_t = np.ceil(float(t) / float(self.stride[0]))
 42 |         out_h = np.ceil(float(h) / float(self.stride[1]))
 43 |         out_w = np.ceil(float(w) / float(self.stride[2]))
 44 |         # print out_t, out_h, out_w
 45 |         pad_t = self.compute_pad(0, t)
 46 |         pad_h = self.compute_pad(1, h)
 47 |         pad_w = self.compute_pad(2, w)
 48 |         # print pad_t, pad_h, pad_w
 49 | 
 50 |         pad_t_f = pad_t // 2
 51 |         pad_t_b = pad_t - pad_t_f
 52 |         pad_h_f = pad_h // 2
 53 |         pad_h_b = pad_h - pad_h_f
 54 |         pad_w_f = pad_w // 2
 55 |         pad_w_b = pad_w - pad_w_f
 56 | 
 57 |         pad = (pad_w_f, pad_w_b, pad_h_f, pad_h_b, pad_t_f, pad_t_b)
 58 |         # print x.size()
 59 |         # print pad
 60 |         x = F.pad(x, pad)
 61 |         return super(MaxPool3dSamePadding, self).forward(x)
 62 | 
 63 | 
 64 | class Unit3D(nn.Module):
 65 | 
 66 |     def __init__(self, in_channels,
 67 |                  output_channels,
 68 |                  kernel_shape=(1, 1, 1),
 69 |                  stride=(1, 1, 1),
 70 |                  padding=0,
 71 |                  activation_fn=F.relu,
 72 |                  use_batch_norm=True,
 73 |                  use_bias=False,
 74 |                  name='unit_3d'):
 75 | 
 76 |         """Initializes Unit3D module."""
 77 |         super(Unit3D, self).__init__()
 78 | 
 79 |         self._output_channels = output_channels
 80 |         self._kernel_shape = kernel_shape
 81 |         self._stride = stride
 82 |         self._use_batch_norm = use_batch_norm
 83 |         self._activation_fn = activation_fn
 84 |         self._use_bias = use_bias
 85 |         self.name = name
 86 |         self.padding = padding
 87 | 
 88 |         self.conv3d = nn.Conv3d(in_channels=in_channels,
 89 |                                 out_channels=self._output_channels,
 90 |                                 kernel_size=self._kernel_shape,
 91 |                                 stride=self._stride,
 92 |                                 padding=0,
 93 |                                 # we always want padding to be 0 here. We will dynamically pad based on input size in forward function
 94 |                                 bias=self._use_bias)
 95 | 
 96 |         if self._use_batch_norm:
 97 |             self.bn = nn.BatchNorm3d(self._output_channels, eps=0.001, momentum=0.01)
 98 | 
 99 |     def compute_pad(self, dim, s):
100 |         if s % self._stride[dim] == 0:
101 |             return max(self._kernel_shape[dim] - self._stride[dim], 0)
102 |         else:
103 |             return max(self._kernel_shape[dim] - (s % self._stride[dim]), 0)
104 | 
105 |     def forward(self, x):
106 |         # compute 'same' padding
107 |         (batch, channel, t, h, w) = x.size()
108 |         # print t,h,w
109 |         out_t = np.ceil(float(t) / float(self._stride[0]))
110 |         out_h = np.ceil(float(h) / float(self._stride[1]))
111 |         out_w = np.ceil(float(w) / float(self._stride[2]))
112 |         # print out_t, out_h, out_w
113 |         pad_t = self.compute_pad(0, t)
114 |         pad_h = self.compute_pad(1, h)
115 |         pad_w = self.compute_pad(2, w)
116 |         # print pad_t, pad_h, pad_w
117 | 
118 |         pad_t_f = pad_t // 2
119 |         pad_t_b = pad_t - pad_t_f
120 |         pad_h_f = pad_h // 2
121 |         pad_h_b = pad_h - pad_h_f
122 |         pad_w_f = pad_w // 2
123 |         pad_w_b = pad_w - pad_w_f
124 | 
125 |         pad = (pad_w_f, pad_w_b, pad_h_f, pad_h_b, pad_t_f, pad_t_b)
126 |         # print x.size()
127 |         # print pad
128 |         x = F.pad(x, pad)
129 |         # print x.size()
130 | 
131 |         x = self.conv3d(x)
132 |         if self._use_batch_norm:
133 |             x = self.bn(x)
134 |         if self._activation_fn is not None:
135 |             x = self._activation_fn(x)
136 |         return x
137 | 
138 | 
139 | class InceptionModule(nn.Module):
140 |     def __init__(self, in_channels, out_channels, name):
141 |         super(InceptionModule, self).__init__()
142 | 
143 |         self.b0 = Unit3D(in_channels=in_channels, output_channels=out_channels[0], kernel_shape=[1, 1, 1], padding=0,
144 |                          name=name + '/Branch_0/Conv3d_0a_1x1')
145 |         self.b1a = Unit3D(in_channels=in_channels, output_channels=out_channels[1], kernel_shape=[1, 1, 1], padding=0,
146 |                           name=name + '/Branch_1/Conv3d_0a_1x1')
147 |         self.b1b = Unit3D(in_channels=out_channels[1], output_channels=out_channels[2], kernel_shape=[3, 3, 3],
148 |                           name=name + '/Branch_1/Conv3d_0b_3x3')
149 |         self.b2a = Unit3D(in_channels=in_channels, output_channels=out_channels[3], kernel_shape=[1, 1, 1], padding=0,
150 |                           name=name + '/Branch_2/Conv3d_0a_1x1')
151 |         self.b2b = Unit3D(in_channels=out_channels[3], output_channels=out_channels[4], kernel_shape=[3, 3, 3],
152 |                           name=name + '/Branch_2/Conv3d_0b_3x3')
153 |         self.b3a = MaxPool3dSamePadding(kernel_size=[3, 3, 3],
154 |                                         stride=(1, 1, 1), padding=0)
155 |         self.b3b = Unit3D(in_channels=in_channels, output_channels=out_channels[5], kernel_shape=[1, 1, 1], padding=0,
156 |                           name=name + '/Branch_3/Conv3d_0b_1x1')
157 |         self.name = name
158 | 
159 |     def forward(self, x):
160 |         b0 = self.b0(x)
161 |         b1 = self.b1b(self.b1a(x))
162 |         b2 = self.b2b(self.b2a(x))
163 |         b3 = self.b3b(self.b3a(x))
164 |         return torch.cat([b0, b1, b2, b3], dim=1)
165 | 
166 | 
167 | class InceptionI3d(nn.Module):
168 |     """Inception-v1 I3D architecture.
169 |     The model is introduced in:
170 |         Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset
171 |         Joao Carreira, Andrew Zisserman
172 |         https://arxiv.org/pdf/1705.07750v1.pdf.
173 |     See also the Inception architecture, introduced in:
174 |         Going deeper with convolutions
175 |         Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
176 |         Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich.
177 |         http://arxiv.org/pdf/1409.4842v1.pdf.
178 |     """
179 | 
180 |     # Endpoints of the model in order. During construction, all the endpoints up
181 |     # to a designated `final_endpoint` are returned in a dictionary as the
182 |     # second return value.
183 |     VALID_ENDPOINTS = (
184 |         'Conv3d_1a_7x7',
185 |         'MaxPool3d_2a_3x3',
186 |         'Conv3d_2b_1x1',
187 |         'Conv3d_2c_3x3',
188 |         'MaxPool3d_3a_3x3',
189 |         'Mixed_3b',
190 |         'Mixed_3c',
191 |         'MaxPool3d_4a_3x3',
192 |         'Mixed_4b',
193 |         'Mixed_4c',
194 |         'Mixed_4d',
195 |         'Mixed_4e',
196 |         'Mixed_4f',
197 |         'MaxPool3d_5a_2x2',
198 |         'Mixed_5b',
199 |         'Mixed_5c',
200 |         'Logits',
201 |         'Predictions',
202 |     )
203 | 
204 |     def __init__(self, num_classes=400, spatial_squeeze=True,
205 |                  final_endpoint='Logits', name='inception_i3d', in_channels=3, dropout_prob=0.5):
206 |         """Initializes I3D model instance.
207 |         Args:
208 |           num_classes: The number of outputs in the logit layer (default 400, which
209 |               matches the Kinetics dataset).
210 |           spatial_squeeze: Whether to squeeze the spatial dimensions for the logits
211 |               before returning (default True).
212 |           final_endpoint: The model contains many possible endpoints.
213 |               `final_endpoint` specifies the last endpoint for the model to be built
214 |               up to. In addition to the output at `final_endpoint`, all the outputs
215 |               at endpoints up to `final_endpoint` will also be returned, in a
216 |               dictionary. `final_endpoint` must be one of
217 |               InceptionI3d.VALID_ENDPOINTS (default 'Logits').
218 |           name: A string (optional). The name of this module.
219 |         Raises:
220 |           ValueError: if `final_endpoint` is not recognized.
221 |         """
222 | 
223 |         if final_endpoint not in self.VALID_ENDPOINTS:
224 |             raise ValueError('Unknown final endpoint %s' % final_endpoint)
225 | 
226 |         super(InceptionI3d, self).__init__()
227 |         self._num_classes = num_classes
228 |         self._spatial_squeeze = spatial_squeeze
229 |         self._final_endpoint = final_endpoint
230 |         self.logits = None
231 | 
232 |         if self._final_endpoint not in self.VALID_ENDPOINTS:
233 |             raise ValueError('Unknown final endpoint %s' % self._final_endpoint)
234 | 
235 |         self.end_points = {}
236 |         end_point = 'Conv3d_1a_7x7'
237 |         self.end_points[end_point] = Unit3D(in_channels=in_channels, output_channels=64, kernel_shape=[7, 7, 7],
238 |                                             stride=(2, 2, 2), padding=(3, 3, 3), name=name + end_point)
239 |         if self._final_endpoint == end_point: return
240 | 
241 |         end_point = 'MaxPool3d_2a_3x3'
242 |         self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[1, 3, 3], stride=(1, 2, 2),
243 |                                                           padding=0)
244 |         if self._final_endpoint == end_point: return
245 | 
246 |         end_point = 'Conv3d_2b_1x1'
247 |         self.end_points[end_point] = Unit3D(in_channels=64, output_channels=64, kernel_shape=[1, 1, 1], padding=0,
248 |                                             name=name + end_point)
249 |         if self._final_endpoint == end_point: return
250 | 
251 |         end_point = 'Conv3d_2c_3x3'
252 |         self.end_points[end_point] = Unit3D(in_channels=64, output_channels=192, kernel_shape=[3, 3, 3], padding=1,
253 |                                             name=name + end_point)
254 |         if self._final_endpoint == end_point: return
255 | 
256 |         end_point = 'MaxPool3d_3a_3x3'
257 |         self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[1, 3, 3], stride=(1, 2, 2),
258 |                                                           padding=0)
259 |         if self._final_endpoint == end_point: return
260 | 
261 |         end_point = 'Mixed_3b'
262 |         self.end_points[end_point] = InceptionModule(192, [64, 96, 128, 16, 32, 32], name + end_point)
263 |         if self._final_endpoint == end_point: return
264 | 
265 |         end_point = 'Mixed_3c'
266 |         self.end_points[end_point] = InceptionModule(256, [128, 128, 192, 32, 96, 64], name + end_point)
267 |         if self._final_endpoint == end_point: return
268 | 
269 |         # ====================================Add Some Model To I3d
270 |         """
271 |         end_point = 'attention_1'
272 |         self.end_points[end_point] = Self_Attn(480, 'relu')
273 |         if self._final_endpoint == end_point: return
274 |         """
275 |         # =======================================
276 |         end_point = 'MaxPool3d_4a_3x3'
277 |         self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[3, 3, 3], stride=(2, 2, 2),
278 |                                                           padding=0)
279 |         if self._final_endpoint == end_point: return
280 | 
281 |         end_point = 'Mixed_4b'
282 |         self.end_points[end_point] = InceptionModule(128 + 192 + 96 + 64, [192, 96, 208, 16, 48, 64], name + end_point)
283 |         if self._final_endpoint == end_point: return
284 | 
285 |         end_point = 'Mixed_4c'
286 |         self.end_points[end_point] = InceptionModule(192 + 208 + 48 + 64, [160, 112, 224, 24, 64, 64], name + end_point)
287 |         if self._final_endpoint == end_point: return
288 | 
289 |         end_point = 'Mixed_4d'
290 |         self.end_points[end_point] = InceptionModule(160 + 224 + 64 + 64, [128, 128, 256, 24, 64, 64], name + end_point)
291 |         if self._final_endpoint == end_point: return
292 | 
293 |         end_point = 'Mixed_4e'
294 |         self.end_points[end_point] = InceptionModule(128 + 256 + 64 + 64, [112, 144, 288, 32, 64, 64], name + end_point)
295 |         if self._final_endpoint == end_point: return
296 | 
297 |         end_point = 'Mixed_4f'
298 |         self.end_points[end_point] = InceptionModule(112 + 288 + 64 + 64, [256, 160, 320, 32, 128, 128],
299 |                                                      name + end_point)
300 |         if self._final_endpoint == end_point: return
301 | 
302 |         end_point = 'MaxPool3d_5a_2x2'
303 |         self.end_points[end_point] = MaxPool3dSamePadding(kernel_size=[2, 2, 2], stride=(2, 2, 2),
304 |                                                           padding=0)
305 |         if self._final_endpoint == end_point: return
306 | 
307 |         end_point = 'Mixed_5b'
308 |         self.end_points[end_point] = InceptionModule(256 + 320 + 128 + 128, [256, 160, 320, 32, 128, 128],
309 |                                                      name + end_point)
310 |         if self._final_endpoint == end_point: return
311 | 
312 |         end_point = 'Mixed_5c'
313 |         self.end_points[end_point] = InceptionModule(256 + 320 + 128 + 128, [384, 192, 384, 48, 128, 128],
314 |                                                      name + end_point)
315 |         if self._final_endpoint == end_point: return
316 | 
317 |         end_point = 'Logits'
318 |         self.avg_pool = nn.AvgPool3d(kernel_size=[2, 7, 7],
319 |                                      stride=(1, 1, 1))
320 |         self.dropout = nn.Dropout(dropout_prob)
321 |         self.dropout_probality = dropout_prob
322 |         self.logits = Unit3D(in_channels=384 + 384 + 128 + 128, output_channels=400,
323 |                              kernel_shape=[1, 1, 1],
324 |                              padding=0,
325 |                              activation_fn=None,
326 |                              use_batch_norm=False,
327 |                              use_bias=True,
328 |                              name='logits')
329 |         self.softmax = torch.nn.Softmax(dim=1)
330 |         if self._num_classes != 400:
331 |             self.fc_out = nn.Linear(400, self._num_classes, bias=True)
332 |         self.build()
333 |         if self._final_endpoint == end_point: return
334 | 
335 |     def replace_logits(self, num_classes):
336 |         self._num_classes = num_classes
337 |         self.logits = Unit3D(in_channels=384 + 384 + 128 + 128, output_channels=self._num_classes,
338 |                              kernel_shape=[1, 1, 1],
339 |                              padding=0,
340 |                              activation_fn=None,
341 |                              use_batch_norm=False,
342 |                              use_bias=True,
343 |                              name='logits')
344 | 
345 |     def replace_dropout(self, dropout_radio):
346 |         self.dropout = nn.Dropout(dropout_radio)
347 |         if self._num_classes != 400:
348 |             self.logits_dropout = nn.Dropout(dropout_radio)
349 | 
350 |     def build(self):
351 |         for k in self.end_points.keys():
352 |             self.add_module(k, self.end_points[k])
353 | 
354 |     def forward(self, x):
355 |         for end_point in self.VALID_ENDPOINTS:
356 |             if end_point in self.end_points:
357 |                 x = self._modules[end_point](x)  # use _modules to work with dataparallel
358 | 
359 |         x = self.logits(self.dropout(self.avg_pool(x)))
360 |         # print(x.size())
361 |         if self._spatial_squeeze:
362 |             logits = x.squeeze(3).squeeze(3)  # remove dim whose size is 1
363 |         logits = torch.mean(logits, 2)
364 |         # print(logits)
365 |         if self._num_classes != 400:
366 |             logits_out = nn.Dropout(self.dropout_probality)(logits)
367 |             fc_out = self.fc_out(logits_out)
368 |             # print(fc_out.size()) # 4 x 101
369 |             # print(self.softmax(fc_out)[0,:].data.cpu().numpy().sum())
370 |             return fc_out
371 |         #  return self.softmax(fc_out)
372 |         else:
373 |             # print(logits.size()) # 5 x101
374 |             # logits is batch X time X classes, which is what we want to work with
375 |             # return self.softmax(logits)
376 |             return logits
377 | 
378 |     def extract_features(self, x):
379 |         for end_point in self.VALID_ENDPOINTS:
380 |             if end_point in self.end_points:
381 |                 x = self._modules[end_point](x)
382 |         return self.avg_pool(x)
383 | 
384 | 
385 | def get_fine_tuning_parameters(model):
386 |     ft_module_names = []
387 |     # ft_module_names.append('Mixed_5b')
388 |     # ft_module_names.append('Mixed_5c')
389 |     ft_module_names.append('fc_out')
390 |     ft_module_names.append('logits')
391 |     # ft_module_names.append('attention_1')
392 | 
393 |     parameters = []
394 |     for k, v in model.named_parameters():
395 |         for ft_module in ft_module_names:
396 |             if ft_module in k:
397 |                 parameters.append({'params': v})
398 |                 break
399 |         else:
400 |             parameters.append({'params': v, 'lr': 0.0001})
401 | 
402 |     return parameters
403 | 


--------------------------------------------------------------------------------
/net/i3dpt_origin.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-05-12 22:09
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : pytorch_i3d
  8 |      # @File    : i3dpt.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | 
 16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 17 | 
 18 | import math
 19 | import os
 20 | 
 21 | import numpy as np
 22 | import torch
 23 | from torch.nn import ReplicationPad3d
 24 | import torch.nn.functional as F
 25 | import torch.nn as nn
 26 | from torch.nn.init import xavier_uniform_, constant_, normal_
 27 | 
 28 | 
 29 | def get_padding_shape(filter_shape, stride):
 30 |     def _pad_top_bottom(filter_dim, stride_val):
 31 |         pad_along = max(filter_dim - stride_val, 0)
 32 |         pad_top = pad_along // 2
 33 |         pad_bottom = pad_along - pad_top
 34 |         return pad_top, pad_bottom
 35 | 
 36 |     padding_shape = []
 37 |     for filter_dim, stride_val in zip(filter_shape, stride):
 38 |         pad_top, pad_bottom = _pad_top_bottom(filter_dim, stride_val)
 39 |         padding_shape.append(pad_top)
 40 |         padding_shape.append(pad_bottom)
 41 |     depth_top = padding_shape.pop(0)
 42 |     depth_bottom = padding_shape.pop(0)
 43 |     padding_shape.append(depth_top)
 44 |     padding_shape.append(depth_bottom)
 45 | 
 46 |     return tuple(padding_shape)
 47 | 
 48 | 
 49 | def simplify_padding(padding_shapes):
 50 |     all_same = True
 51 |     padding_init = padding_shapes[0]
 52 |     for pad in padding_shapes[1:]:
 53 |         if pad != padding_init:
 54 |             all_same = False
 55 |     return all_same, padding_init
 56 | 
 57 | 
 58 | class Unit3Dpy(torch.nn.Module):
 59 |     def __init__(self,
 60 |                  in_channels,
 61 |                  out_channels,
 62 |                  kernel_size=(1, 1, 1),
 63 |                  stride=(1, 1, 1),
 64 |                  activation='relu',
 65 |                  padding='SAME',
 66 |                  use_bias=False,
 67 |                  use_bn=True):
 68 |         super(Unit3Dpy, self).__init__()
 69 | 
 70 |         self.padding = padding
 71 |         self.activation = activation
 72 |         self.use_bn = use_bn
 73 |         if padding == 'SAME':
 74 |             padding_shape = get_padding_shape(kernel_size, stride)
 75 |             simplify_pad, pad_size = simplify_padding(padding_shape)
 76 |             self.simplify_pad = simplify_pad
 77 |         elif padding == 'VALID':
 78 |             padding_shape = 0
 79 |         else:
 80 |             raise ValueError(
 81 |                 'padding should be in [VALID|SAME] but got {}'.format(padding))
 82 | 
 83 |         if padding == 'SAME':
 84 |             if not simplify_pad:
 85 |                 self.pad = torch.nn.ConstantPad3d(padding_shape, 0)
 86 |                 self.conv3d = torch.nn.Conv3d(
 87 |                     in_channels,
 88 |                     out_channels,
 89 |                     kernel_size,
 90 |                     stride=stride,
 91 |                     bias=use_bias)
 92 |             else:
 93 |                 self.conv3d = torch.nn.Conv3d(
 94 |                     in_channels,
 95 |                     out_channels,
 96 |                     kernel_size,
 97 |                     stride=stride,
 98 |                     padding=pad_size,
 99 |                     bias=use_bias)
100 |         elif padding == 'VALID':
101 |             self.conv3d = torch.nn.Conv3d(
102 |                 in_channels,
103 |                 out_channels,
104 |                 kernel_size,
105 |                 padding=padding_shape,
106 |                 stride=stride,
107 |                 bias=use_bias)
108 |         else:
109 |             raise ValueError(
110 |                 'padding should be in [VALID|SAME] but got {}'.format(padding))
111 | 
112 |         if self.use_bn:
113 |             self.batch3d = torch.nn.BatchNorm3d(out_channels)
114 | 
115 |         if activation == 'relu':
116 |             self.activation = torch.nn.functional.relu
117 | 
118 |     def forward(self, inp):
119 |         if self.padding == 'SAME' and self.simplify_pad is False:
120 |             inp = self.pad(inp)
121 |         out = self.conv3d(inp)
122 |         if self.use_bn:
123 |             out = self.batch3d(out)
124 |         if self.activation is not None:
125 |             out = torch.nn.functional.relu(out)
126 |         return out
127 | 
128 | 
129 | class MaxPool3dTFPadding(torch.nn.Module):
130 |     def __init__(self, kernel_size, stride=None, padding='SAME'):
131 |         super(MaxPool3dTFPadding, self).__init__()
132 |         if padding == 'SAME':
133 |             padding_shape = get_padding_shape(kernel_size, stride)
134 |             self.padding_shape = padding_shape
135 |             self.pad = torch.nn.ConstantPad3d(padding_shape, 0)
136 |         self.pool = torch.nn.MaxPool3d(kernel_size, stride, ceil_mode=True)
137 | 
138 |     def forward(self, inp):
139 |         inp = self.pad(inp)
140 |         out = self.pool(inp)
141 |         return out
142 | 
143 | 
144 | class Mixed(torch.nn.Module):
145 |     def __init__(self, in_channels, out_channels):
146 |         super(Mixed, self).__init__()
147 |         # Branch 0
148 |         self.branch_0 = Unit3Dpy(
149 |             in_channels, out_channels[0], kernel_size=(1, 1, 1))
150 | 
151 |         # Branch 1
152 |         branch_1_conv1 = Unit3Dpy(
153 |             in_channels, out_channels[1], kernel_size=(1, 1, 1))
154 |         branch_1_conv2 = Unit3Dpy(
155 |             out_channels[1], out_channels[2], kernel_size=(3, 3, 3))
156 |         self.branch_1 = torch.nn.Sequential(branch_1_conv1, branch_1_conv2)
157 | 
158 |         # Branch 2
159 |         branch_2_conv1 = Unit3Dpy(
160 |             in_channels, out_channels[3], kernel_size=(1, 1, 1))
161 |         branch_2_conv2 = Unit3Dpy(
162 |             out_channels[3], out_channels[4], kernel_size=(3, 3, 3))
163 |         self.branch_2 = torch.nn.Sequential(branch_2_conv1, branch_2_conv2)
164 | 
165 |         # Branch3
166 |         branch_3_pool = MaxPool3dTFPadding(
167 |             kernel_size=(3, 3, 3), stride=(1, 1, 1), padding='SAME')
168 |         branch_3_conv2 = Unit3Dpy(
169 |             in_channels, out_channels[5], kernel_size=(1, 1, 1))
170 |         self.branch_3 = torch.nn.Sequential(branch_3_pool, branch_3_conv2)
171 | 
172 |     def forward(self, inp):
173 |         out_0 = self.branch_0(inp)
174 |         out_1 = self.branch_1(inp)
175 |         out_2 = self.branch_2(inp)
176 |         out_3 = self.branch_3(inp)
177 |         out = torch.cat((out_0, out_1, out_2, out_3), 1)
178 |         return out
179 | 
180 | 
181 | class I3D(torch.nn.Module):
182 |     def __init__(self,
183 |                  num_classes,
184 |                  modality='rgb',
185 |                  dropout_prob=0,
186 |                  name='inception'):
187 |         super(I3D, self).__init__()
188 | 
189 |         self.name = name
190 |         self.num_classes = num_classes
191 |         if modality == 'rgb':
192 |             in_channels = 3
193 |         elif modality == 'flow':
194 |             in_channels = 2
195 |         else:
196 |             raise ValueError(
197 |                 '{} not among known modalities [rgb|flow]'.format(modality))
198 |         self.modality = modality
199 | 
200 |         conv3d_1a_7x7 = Unit3Dpy(
201 |             out_channels=64,
202 |             in_channels=in_channels,
203 |             kernel_size=(7, 7, 7),
204 |             stride=(2, 2, 2),
205 |             padding='SAME')
206 |         # 1st conv-pool
207 |         self.conv3d_1a_7x7 = conv3d_1a_7x7
208 |         self.maxPool3d_2a_3x3 = MaxPool3dTFPadding(
209 |             kernel_size=(1, 3, 3), stride=(1, 2, 2), padding='SAME')
210 |         # conv conv
211 |         conv3d_2b_1x1 = Unit3Dpy(
212 |             out_channels=64,
213 |             in_channels=64,
214 |             kernel_size=(1, 1, 1),
215 |             padding='SAME')
216 |         self.conv3d_2b_1x1 = conv3d_2b_1x1
217 |         conv3d_2c_3x3 = Unit3Dpy(
218 |             out_channels=192,
219 |             in_channels=64,
220 |             kernel_size=(3, 3, 3),
221 |             padding='SAME')
222 |         self.conv3d_2c_3x3 = conv3d_2c_3x3 #here padding = 1 may influence the result
223 |         self.maxPool3d_3a_3x3 = MaxPool3dTFPadding(
224 |             kernel_size=(1, 3, 3), stride=(1, 2, 2), padding='SAME')
225 | 
226 |         # Mixed_3b
227 |         self.mixed_3b = Mixed(192, [64, 96, 128, 16, 32, 32])
228 |         self.mixed_3c = Mixed(256, [128, 128, 192, 32, 96, 64])
229 | 
230 |         self.maxPool3d_4a_3x3 = MaxPool3dTFPadding(
231 |             kernel_size=(3, 3, 3), stride=(2, 2, 2), padding='SAME')
232 | 
233 |         # Mixed 4
234 |         self.mixed_4b = Mixed(480, [192, 96, 208, 16, 48, 64])
235 |         self.mixed_4c = Mixed(512, [160, 112, 224, 24, 64, 64])
236 |         self.mixed_4d = Mixed(512, [128, 128, 256, 24, 64, 64])
237 |         self.mixed_4e = Mixed(512, [112, 144, 288, 32, 64, 64])
238 |         self.mixed_4f = Mixed(528, [256, 160, 320, 32, 128, 128])
239 | 
240 |         self.maxPool3d_5a_2x2 = MaxPool3dTFPadding(
241 |             kernel_size=(2, 2, 2), stride=(2, 2, 2), padding='SAME')
242 | 
243 |         # Mixed 5
244 |         self.mixed_5b = Mixed(832, [256, 160, 320, 32, 128, 128])
245 |         self.mixed_5c = Mixed(832, [384, 192, 384, 48, 128, 128])
246 | 
247 |         self.avg_pool = torch.nn.AvgPool3d((2, 7, 7), (1, 1, 1))
248 |         self.dropout = torch.nn.Dropout(dropout_prob)
249 |         self.conv3d_0c_1x1_custom = Unit3Dpy(
250 |             in_channels=1024,
251 |             out_channels=self.num_classes,
252 |             kernel_size=(1, 1, 1),
253 |             activation=None,
254 |             use_bias=False,
255 |             use_bn=False)
256 |         self.softmax = torch.nn.Softmax(1)
257 |         # ==========two dropout for temporal ensembling=========
258 |         self.dropout1 = nn.Dropout(0.3)
259 |         self.dropout2 = nn.Dropout(0.3)
260 | 
261 |     def forward(self, inp):
262 |         out = self.conv3d_1a_7x7(inp)
263 |         out = self.maxPool3d_2a_3x3(out)
264 |         out = self.conv3d_2b_1x1(out)
265 |         out = self.conv3d_2c_3x3(out)
266 |         out = self.maxPool3d_3a_3x3(out)
267 |         out = self.mixed_3b(out)
268 |         out = self.mixed_3c(out)
269 |         # out = self.s_depend(out)
270 |         out = self.maxPool3d_4a_3x3(out)
271 |         # out = self.dropout1(out)
272 |         out = self.mixed_4b(out)
273 |         out = self.mixed_4c(out)
274 |         out = self.mixed_4d(out)
275 |         out = self.mixed_4e(out)
276 |         out = self.mixed_4f(out)
277 |         # out = self.m_depend(out)
278 |         out = self.maxPool3d_5a_2x2(out)
279 |         # out = self.dropout2(out)
280 |         out = self.mixed_5b(out)
281 |         out = self.mixed_5c(out)
282 |         features = out
283 |         out = self.avg_pool(out)
284 |         out = self.dropout(out)
285 |         out = self.conv3d_0c_1x1_custom(out)
286 |         out = out.squeeze(3)
287 |         out = out.squeeze(3)
288 |         out = out.mean(2)
289 |         out_logits = out
290 |         return F.log_softmax(out, dim=1), features
291 | 
292 |     def load_tf_weights(self, sess):
293 |         state_dict = {}
294 |         if self.modality == 'rgb':
295 |             prefix = 'RGB/inception_i3d'
296 |         elif self.modality == 'flow':
297 |             prefix = 'Flow/inception_i3d'
298 |         load_conv3d(state_dict, 'conv3d_1a_7x7', sess,
299 |                     os.path.join(prefix, 'Conv3d_1a_7x7'))
300 |         load_conv3d(state_dict, 'conv3d_2b_1x1', sess,
301 |                     os.path.join(prefix, 'Conv3d_2b_1x1'))
302 |         load_conv3d(state_dict, 'conv3d_2c_3x3', sess,
303 |                     os.path.join(prefix, 'Conv3d_2c_3x3'))
304 | 
305 |         load_mixed(state_dict, 'mixed_3b', sess,
306 |                    os.path.join(prefix, 'Mixed_3b'))
307 |         load_mixed(state_dict, 'mixed_3c', sess,
308 |                    os.path.join(prefix, 'Mixed_3c'))
309 |         load_mixed(state_dict, 'mixed_4b', sess,
310 |                    os.path.join(prefix, 'Mixed_4b'))
311 |         load_mixed(state_dict, 'mixed_4c', sess,
312 |                    os.path.join(prefix, 'Mixed_4c'))
313 |         load_mixed(state_dict, 'mixed_4d', sess,
314 |                    os.path.join(prefix, 'Mixed_4d'))
315 |         load_mixed(state_dict, 'mixed_4e', sess,
316 |                    os.path.join(prefix, 'Mixed_4e'))
317 |         # Here goest to 0.1 max error with tf
318 |         load_mixed(state_dict, 'mixed_4f', sess,
319 |                    os.path.join(prefix, 'Mixed_4f'))
320 | 
321 |         load_mixed(
322 |             state_dict,
323 |             'mixed_5b',
324 |             sess,
325 |             os.path.join(prefix, 'Mixed_5b'),
326 |             fix_typo=True)
327 |         load_mixed(state_dict, 'mixed_5c', sess,
328 |                    os.path.join(prefix, 'Mixed_5c'))
329 |         load_conv3d(
330 |             state_dict,
331 |             'conv3d_0c_1x1',
332 |             sess,
333 |             os.path.join(prefix, 'Logits', 'Conv3d_0c_1x1'),
334 |             bias=True,
335 |             bn=False)
336 |         self.load_state_dict(state_dict)
337 | 
338 | 
339 | def get_conv_params(sess, name, bias=False):
340 |     # Get conv weights
341 |     conv_weights_tensor = sess.graph.get_tensor_by_name(
342 |         os.path.join(name, 'w:0'))
343 |     if bias:
344 |         conv_bias_tensor = sess.graph.get_tensor_by_name(
345 |             os.path.join(name, 'b:0'))
346 |         conv_bias = sess.run(conv_bias_tensor)
347 |     conv_weights = sess.run(conv_weights_tensor)
348 |     conv_shape = conv_weights.shape
349 | 
350 |     kernel_shape = conv_shape[0:3]
351 |     in_channels = conv_shape[3]
352 |     out_channels = conv_shape[4]
353 | 
354 |     conv_op = sess.graph.get_operation_by_name(
355 |         os.path.join(name, 'convolution'))
356 |     padding_name = conv_op.get_attr('padding')
357 |     padding = _get_padding(padding_name, kernel_shape)
358 |     all_strides = conv_op.get_attr('strides')
359 |     strides = all_strides[1:4]
360 |     conv_params = [
361 |         conv_weights, kernel_shape, in_channels, out_channels, strides, padding
362 |     ]
363 |     if bias:
364 |         conv_params.append(conv_bias)
365 |     return conv_params
366 | 
367 | 
368 | def get_bn_params(sess, name):
369 |     moving_mean_tensor = sess.graph.get_tensor_by_name(
370 |         os.path.join(name, 'moving_mean:0'))
371 |     moving_var_tensor = sess.graph.get_tensor_by_name(
372 |         os.path.join(name, 'moving_variance:0'))
373 |     beta_tensor = sess.graph.get_tensor_by_name(os.path.join(name, 'beta:0'))
374 |     moving_mean = sess.run(moving_mean_tensor)
375 |     moving_var = sess.run(moving_var_tensor)
376 |     beta = sess.run(beta_tensor)
377 |     return moving_mean, moving_var, beta
378 | 
379 | 
380 | def _get_padding(padding_name, conv_shape):
381 |     padding_name = padding_name.decode("utf-8")
382 |     if padding_name == "VALID":
383 |         return [0, 0]
384 |     elif padding_name == "SAME":
385 |         # return [math.ceil(int(conv_shape[0])/2), math.ceil(int(conv_shape[1])/2)]
386 |         return [
387 |             math.floor(int(conv_shape[0]) / 2),
388 |             math.floor(int(conv_shape[1]) / 2),
389 |             math.floor(int(conv_shape[2]) / 2)
390 |         ]
391 |     else:
392 |         raise ValueError('Invalid padding name ' + padding_name)
393 | 
394 | 
395 | def load_conv3d(state_dict, name_pt, sess, name_tf, bias=False, bn=True):
396 |     # Transfer convolution params
397 |     conv_name_tf = os.path.join(name_tf, 'conv_3d')
398 |     conv_params = get_conv_params(sess, conv_name_tf, bias=bias)
399 |     if bias:
400 |         conv_weights, kernel_shape, in_channels, out_channels, strides, padding, conv_bias = conv_params
401 |     else:
402 |         conv_weights, kernel_shape, in_channels, out_channels, strides, padding = conv_params
403 | 
404 |     conv_weights_rs = np.transpose(
405 |         conv_weights, (4, 3, 0, 1,
406 |                        2))  # to pt format (out_c, in_c, depth, height, width)
407 |     state_dict[name_pt + '.conv3d.weight'] = torch.from_numpy(conv_weights_rs)
408 |     if bias:
409 |         state_dict[name_pt + '.conv3d.bias'] = torch.from_numpy(conv_bias)
410 | 
411 |     # Transfer batch norm params
412 |     if bn:
413 |         conv_tf_name = os.path.join(name_tf, 'batch_norm')
414 |         moving_mean, moving_var, beta = get_bn_params(sess, conv_tf_name)
415 | 
416 |         out_planes = conv_weights_rs.shape[0]
417 |         state_dict[name_pt + '.batch3d.weight'] = torch.ones(out_planes)
418 |         state_dict[name_pt +
419 |                    '.batch3d.bias'] = torch.from_numpy(beta.squeeze())
420 |         state_dict[name_pt
421 |                    + '.batch3d.running_mean'] = torch.from_numpy(moving_mean.squeeze())
422 |         state_dict[name_pt
423 |                    + '.batch3d.running_var'] = torch.from_numpy(moving_var.squeeze())
424 | 
425 | 
426 | def load_mixed(state_dict, name_pt, sess, name_tf, fix_typo=False):
427 |     # Branch 0
428 |     load_conv3d(state_dict, name_pt + '.branch_0', sess,
429 |                 os.path.join(name_tf, 'Branch_0/Conv3d_0a_1x1'))
430 | 
431 |     # Branch .1
432 |     load_conv3d(state_dict, name_pt + '.branch_1.0', sess,
433 |                 os.path.join(name_tf, 'Branch_1/Conv3d_0a_1x1'))
434 |     load_conv3d(state_dict, name_pt + '.branch_1.1', sess,
435 |                 os.path.join(name_tf, 'Branch_1/Conv3d_0b_3x3'))
436 | 
437 |     # Branch 2
438 |     load_conv3d(state_dict, name_pt + '.branch_2.0', sess,
439 |                 os.path.join(name_tf, 'Branch_2/Conv3d_0a_1x1'))
440 |     if fix_typo:
441 |         load_conv3d(state_dict, name_pt + '.branch_2.1', sess,
442 |                     os.path.join(name_tf, 'Branch_2/Conv3d_0a_3x3'))
443 |     else:
444 |         load_conv3d(state_dict, name_pt + '.branch_2.1', sess,
445 |                     os.path.join(name_tf, 'Branch_2/Conv3d_0b_3x3'))
446 | 
447 |     # Branch 3
448 |     load_conv3d(state_dict, name_pt + '.branch_3.1', sess,
449 |                 os.path.join(name_tf, 'Branch_3/Conv3d_0b_1x1'))
450 | 
451 | 
452 | def weights_init(model):
453 |     """ Initializes the weights of the CNN model using the Xavier
454 |     initialization.
455 |     """
456 |     if isinstance(model, nn.Conv2d) or isinstance(model, nn.Conv3d) or isinstance(model, nn.Conv1d):
457 |         xavier_uniform_(model.weight, gain=math.sqrt(2.0))
458 |         constant_(model.bias, 0.1)
459 |     elif isinstance(model, nn.BatchNorm2d) or isinstance(model, nn.BatchNorm1d) or isinstance(model, nn.BatchNorm3d):
460 |         normal_(model.weight, 1.0, 0.02)
461 |         constant_(model.bias, 0)
462 |         # zeros_(model.bias)
463 | 


--------------------------------------------------------------------------------
/net/mfnet_3d.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Original Author: Yunpeng Chen
  4 | https://github.com/cypw/PyTorch-MFNet/blob/master/network/mfnet_3d.py
  5 | """
  6 | 
  7 | from collections import OrderedDict
  8 | import torch.nn as nn
  9 | 
 10 | class BN_AC_CONV3D(nn.Module):
 11 | 
 12 |     def __init__(self, num_in, num_filter,
 13 |                  kernel=(1,1,1), pad=(0,0,0), stride=(1,1,1), g=1, bias=False):
 14 |         super(BN_AC_CONV3D, self).__init__()
 15 |         self.bn = nn.BatchNorm3d(num_in)
 16 |         self.relu = nn.ReLU(inplace=True)
 17 |         self.conv = nn.Conv3d(num_in, num_filter, kernel_size=kernel, padding=pad,
 18 |                                stride=stride, groups=g, bias=bias)
 19 | 
 20 |     def forward(self, x):
 21 |         h = self.relu(self.bn(x))
 22 |         h = self.conv(h)
 23 |         return h
 24 | 
 25 | 
 26 | class MF_UNIT(nn.Module):
 27 | 
 28 |     def __init__(self, num_in, num_mid, num_out, g=1, stride=(1,1,1), first_block=False, use_3d=True):
 29 |         super(MF_UNIT, self).__init__()
 30 |         num_ix = int(num_mid/4)
 31 |         kt,pt = (3,1) if use_3d else (1,0)
 32 |         # prepare input
 33 |         self.conv_i1 =     BN_AC_CONV3D(num_in=num_in,  num_filter=num_ix,  kernel=(1,1,1), pad=(0,0,0))
 34 |         self.conv_i2 =     BN_AC_CONV3D(num_in=num_ix,  num_filter=num_in,  kernel=(1,1,1), pad=(0,0,0))
 35 |         # main part
 36 |         self.conv_m1 =     BN_AC_CONV3D(num_in=num_in,  num_filter=num_mid, kernel=(kt,3,3), pad=(pt,1,1), stride=stride, g=g)
 37 |         if first_block:
 38 |             self.conv_m2 = BN_AC_CONV3D(num_in=num_mid, num_filter=num_out, kernel=(1,1,1), pad=(0,0,0))
 39 |         else:
 40 |             self.conv_m2 = BN_AC_CONV3D(num_in=num_mid, num_filter=num_out, kernel=(1,3,3), pad=(0,1,1), g=g)
 41 |         # adapter
 42 |         if first_block:
 43 |             self.conv_w1 = BN_AC_CONV3D(num_in=num_in,  num_filter=num_out, kernel=(1,1,1), pad=(0,0,0), stride=stride)
 44 | 
 45 |     def forward(self, x):
 46 | 
 47 |         h = self.conv_i1(x)
 48 |         x_in = x + self.conv_i2(h)
 49 | 
 50 |         h = self.conv_m1(x_in)
 51 |         h = self.conv_m2(h)
 52 | 
 53 |         if hasattr(self, 'conv_w1'):
 54 |             x = self.conv_w1(x)
 55 | 
 56 |         return h + x
 57 | 
 58 | 
 59 | class MFNET_3D(nn.Module):
 60 | 
 61 |     def __init__(self, num_classes, dropout=None, pretrained=False, pretrained_model="", **kwargs):
 62 |         super(MFNET_3D, self).__init__()
 63 | 
 64 |         groups = 16
 65 |         k_sec  = {  2: 3, \
 66 |                     3: 4, \
 67 |                     4: 6, \
 68 |                     5: 3  }
 69 | 
 70 |         # conv1 - x224 (x16)
 71 |         conv1_num_out = 16
 72 |         self.conv1 = nn.Sequential(OrderedDict([
 73 |                     ('conv', nn.Conv3d( 3, conv1_num_out, kernel_size=(3,5,5), padding=(1,2,2), stride=(1,2,2), bias=False)),
 74 |                     ('bn', nn.BatchNorm3d(conv1_num_out)),
 75 |                     ('relu', nn.ReLU(inplace=True))
 76 |                     ]))
 77 |         self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1))
 78 | 
 79 |         # conv2 - x56 (x8)
 80 |         num_mid = 96
 81 |         conv2_num_out = 96
 82 |         self.conv2 = nn.Sequential(OrderedDict([
 83 |                     ("B%02d"%i, MF_UNIT(num_in=conv1_num_out if i==1 else conv2_num_out,
 84 |                                         num_mid=num_mid,
 85 |                                         num_out=conv2_num_out,
 86 |                                         stride=(2,1,1) if i==1 else (1,1,1),
 87 |                                         g=groups,
 88 |                                         first_block=(i==1))) for i in range(1,k_sec[2]+1)
 89 |                     ]))
 90 | 
 91 |         # conv3 - x28 (x8)
 92 |         num_mid *= 2
 93 |         conv3_num_out = 2 * conv2_num_out
 94 |         self.conv3 = nn.Sequential(OrderedDict([
 95 |                     ("B%02d"%i, MF_UNIT(num_in=conv2_num_out if i==1 else conv3_num_out,
 96 |                                         num_mid=num_mid,
 97 |                                         num_out=conv3_num_out,
 98 |                                         stride=(1,2,2) if i==1 else (1,1,1),
 99 |                                         g=groups,
100 |                                         first_block=(i==1))) for i in range(1,k_sec[3]+1)
101 |                     ]))
102 | 
103 |         # conv4 - x14 (x8)
104 |         num_mid *= 2
105 |         conv4_num_out = 2 * conv3_num_out
106 |         self.conv4 = nn.Sequential(OrderedDict([
107 |                     ("B%02d"%i, MF_UNIT(num_in=conv3_num_out if i==1 else conv4_num_out,
108 |                                         num_mid=num_mid,
109 |                                         num_out=conv4_num_out,
110 |                                         stride=(1,2,2) if i==1 else (1,1,1),
111 |                                         g=groups,
112 |                                         first_block=(i==1))) for i in range(1,k_sec[4]+1)
113 |                     ]))
114 | 
115 |         # conv5 - x7 (x8)
116 |         num_mid *= 2
117 |         conv5_num_out = 2 * conv4_num_out
118 |         self.conv5 = nn.Sequential(OrderedDict([
119 |                     ("B%02d"%i, MF_UNIT(num_in=conv4_num_out if i==1 else conv5_num_out,
120 |                                         num_mid=num_mid,
121 |                                         num_out=conv5_num_out,
122 |                                         stride=(1,2,2) if i==1 else (1,1,1),
123 |                                         g=groups,
124 |                                         first_block=(i==1))) for i in range(1,k_sec[5]+1)
125 |                     ]))
126 | 
127 |         # final
128 |         self.tail = nn.Sequential(OrderedDict([
129 |                     ('bn', nn.BatchNorm3d(conv5_num_out)),
130 |                     ('relu', nn.ReLU(inplace=True))
131 |                     ]))
132 |         
133 |         if dropout:
134 |             self.globalpool = nn.Sequential(OrderedDict([
135 |                             ('avg', nn.AvgPool3d(kernel_size=(8,7,7), stride=(1,1,1))),
136 |                             ('dropout', nn.Dropout(p=dropout)),
137 |                             ]))
138 |         else:
139 |             self.globalpool = nn.Sequential(OrderedDict([
140 |                             ('avg', nn.AvgPool3d(kernel_size=(8,7,7),  stride=(1,1,1))),
141 |                             # ('dropout', nn.Dropout(p=0.5)), only for fine-tuning
142 |                             ]))
143 |         self.classifier = nn.Linear(conv5_num_out, num_classes)
144 | 
145 |     def forward(self, x):
146 |         assert x.shape[2] == 16
147 | 
148 |         h = self.conv1(x)   # x224 -> x112
149 |         h = self.maxpool(h) # x112 ->  x56
150 | 
151 |         h = self.conv2(h)   #  x56 ->  x56
152 |         h = self.conv3(h)   #  x56 ->  x28
153 |         h = self.conv4(h)   #  x28 ->  x14
154 |         h = self.conv5(h)   #  x14 ->   x7
155 |         h = self.tail(h)
156 |         layerout = h.detach().cpu()
157 |         h = self.globalpool(h)
158 | 
159 |         h = h.view(h.shape[0], -1)
160 |         h = self.classifier(h)
161 | 
162 |         return h, layerout


--------------------------------------------------------------------------------
/net/model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 |      # @Time    : 2020-09-17 16:02
 5 |      # @Author  : Awiny
 6 |      # @Site    :
 7 |      # @Project : amax_Action_Video_Visualization
 8 |      # @File    : model.py
 9 |      # @Software: PyCharm
10 |      # @Github  : https://github.com/FingerRec
11 |      # @Blog    : http://fingerrec.github.io
12 | """
13 | import scipy.io
14 | import os
15 | 
16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
17 | 
18 | from torch import nn
19 | import torch.nn.functional as F
20 | import torch
21 | import numpy as np
22 | 
23 | class Flatten(nn.Module):
24 |     def __init__(self):
25 |         super(Flatten, self).__init__()
26 | 
27 |     def forward(self, input):
28 |         return input.view(input.size(0), -1)
29 | 
30 | 
31 | class Normalize(nn.Module):
32 |     def __init__(self, power=2):
33 |         super(Normalize, self).__init__()
34 |         self.power = power
35 | 
36 |     def forward(self, x):
37 |         norm = x.pow(self.power).sum(1, keepdim=True).pow(1./self.power)
38 |         out = x.div(norm)
39 |         return out
40 | 
41 | 
42 | class Sharpen(nn.Module):
43 |     def __init__(self, tempeature=0.5):
44 |         super(Sharpen, self).__init__()
45 |         self.T = tempeature
46 | 
47 |     def forward(self, probabilities):
48 |         tempered = torch.pow(probabilities, 1 / self.T)
49 |         tempered = tempered / tempered.sum(dim=-1, keepdim=True)
50 |         return tempered
51 | 
52 | class MotionEnhance(nn.Module):
53 |     def __init__(self, beta=1, maxium_radio=0.3):
54 |         super(MotionEnhance, self).__init__()
55 |         self.beta = beta
56 |         self.maxium_radio = maxium_radio
57 | 
58 |     def forward(self, x):
59 |         b, c, t, h, w = x.size()
60 |         mean = nn.AdaptiveAvgPool3d((1, h, w))(x)
61 |         lam = np.random.beta(self.beta, self.beta) * self.maxium_radio
62 |         out = (x - mean * lam) * (1 / (1 - lam))
63 |         return out


--------------------------------------------------------------------------------
/net/mp_i3d.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-04-27 16:18
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : mp_i3d.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | 
 16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 17 | #!/usr/bin/env python
 18 | # -*- coding: utf-8 -*-
 19 | """
 20 |      # @Time    : 2019-04-08 18:32
 21 |      # @Author  : Awiny
 22 |      # @Site    :
 23 |      # @Project : pytorch_i3d
 24 |      # @File    : multi_path_i3d.py
 25 |      # @Software: PyCharm
 26 |      # @Github  : https://github.com/FingerRec
 27 |      # @Blog    : http://fingerrec.github.io
 28 | """
 29 | import scipy.io
 30 | import os
 31 | 
 32 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 33 | import scipy.io
 34 | import os
 35 | import random
 36 | 
 37 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 38 | #========================================================================================
 39 | #This network is designed to capture different range dependencies and cobine them.
 40 | #With dilated conv and downsample, i want to down the number of parameters.
 41 | #The network are divided into 3 parllel network. and across information between them.
 42 | #1:64frame input, 56 x 56 input, long range temporal dependencies, call s
 43 | #2:16frame input, 112x112, middle range temporal dependencies, call m
 44 | #3:4frame input, 224x224, shortest temporal dependencies, call l
 45 | #after these network, use tpp to combine them and put it into fc layer
 46 | #========================================================================================
 47 | 
 48 | import torch
 49 | import torch.nn as nn
 50 | import torch.nn.functional as F
 51 | from torch.autograd import Variable
 52 | 
 53 | import numpy as np
 54 | import math
 55 | from math import exp
 56 | import os
 57 | import sys
 58 | from collections import OrderedDict
 59 | 
 60 | class MaxPool3dSamePadding(nn.MaxPool3d):
 61 | 
 62 |     def compute_pad(self, dim, s):
 63 |         if s % self.stride[dim] == 0:
 64 |             return max(self.kernel_size[dim] - self.stride[dim], 0)
 65 |         else:
 66 |             return max(self.kernel_size[dim] - (s % self.stride[dim]), 0)
 67 | 
 68 |     def forward(self, x):
 69 |         # compute 'same' padding
 70 |         (batch, channel, t, h, w) = x.size()
 71 |         # print t,h,w
 72 |         out_t = np.ceil(float(t) / float(self.stride[0]))
 73 |         out_h = np.ceil(float(h) / float(self.stride[1]))
 74 |         out_w = np.ceil(float(w) / float(self.stride[2]))
 75 |         # print out_t, out_h, out_w
 76 |         pad_t = self.compute_pad(0, t)
 77 |         pad_h = self.compute_pad(1, h)
 78 |         pad_w = self.compute_pad(2, w)
 79 |         # print pad_t, pad_h, pad_w
 80 | 
 81 |         pad_t_f = pad_t // 2
 82 |         pad_t_b = pad_t - pad_t_f
 83 |         pad_h_f = pad_h // 2
 84 |         pad_h_b = pad_h - pad_h_f
 85 |         pad_w_f = pad_w // 2
 86 |         pad_w_b = pad_w - pad_w_f
 87 | 
 88 |         pad = (pad_w_f, pad_w_b, pad_h_f, pad_h_b, pad_t_f, pad_t_b)
 89 |         # print x.size()
 90 |         # print pad
 91 |         x = F.pad(x, pad)
 92 |         return super(MaxPool3dSamePadding, self).forward(x)
 93 | 
 94 | class Unit3D(nn.Module):
 95 | 
 96 |     def __init__(self, in_channels,
 97 |                  output_channels,
 98 |                  kernel_shape=(1, 1, 1),
 99 |                  stride=(1, 1, 1),
100 |                  padding=0,
101 |                  dilation=1,
102 |                  activation_fn=F.relu,
103 |                  use_batch_norm=True,
104 |                  use_bias=False,
105 |                  name='unit_3d'):
106 | 
107 |         """Initializes Unit3D module."""
108 |         super(Unit3D, self).__init__()
109 | 
110 |         self._output_channels = output_channels
111 |         self._kernel_shape = kernel_shape
112 |         self._stride = stride
113 |         self._use_batch_norm = use_batch_norm
114 |         self._activation_fn = activation_fn
115 |         self._use_bias = use_bias
116 |         self.name = name
117 |         self.padding = padding
118 | 
119 |         self.conv3d = nn.Conv3d(in_channels=in_channels,
120 |                                 out_channels=self._output_channels,
121 |                                 kernel_size=self._kernel_shape,
122 |                                 stride=self._stride,
123 |                                 dilation=dilation,
124 |                                 padding=0,
125 |                                 # we always want padding to be 0 here. We will dynamically pad based on input size in forward function
126 |                                 bias=self._use_bias)
127 | 
128 |         if self._use_batch_norm:
129 |             self.bn = nn.BatchNorm3d(self._output_channels, eps=0.001, momentum=0.01)
130 | 
131 |     def compute_pad(self, dim, s):
132 |         if s % self._stride[dim] == 0:
133 |             return max(self._kernel_shape[dim] - self._stride[dim], 0)
134 |         else:
135 |             return max(self._kernel_shape[dim] - (s % self._stride[dim]), 0)
136 | 
137 |     def forward(self, x):
138 |         # compute 'same' padding
139 |         (batch, channel, t, h, w) = x.size()
140 |         # print t,h,w
141 |         out_t = np.ceil(float(t) / float(self._stride[0]))
142 |         out_h = np.ceil(float(h) / float(self._stride[1]))
143 |         out_w = np.ceil(float(w) / float(self._stride[2]))
144 |         # print out_t, out_h, out_w
145 |         pad_t = self.compute_pad(0, t)
146 |         pad_h = self.compute_pad(1, h)
147 |         pad_w = self.compute_pad(2, w)
148 |         # print pad_t, pad_h, pad_w
149 | 
150 |         pad_t_f = pad_t // 2
151 |         pad_t_b = pad_t - pad_t_f
152 |         pad_h_f = pad_h // 2
153 |         pad_h_b = pad_h - pad_h_f
154 |         pad_w_f = pad_w // 2
155 |         pad_w_b = pad_w - pad_w_f
156 | 
157 |         pad = (pad_w_f, pad_w_b, pad_h_f, pad_h_b, pad_t_f, pad_t_b)
158 |         # print x.size()
159 |         # print pad
160 |         x = F.pad(x, pad)
161 |         # print x.size()
162 | 
163 |         x = self.conv3d(x)
164 |         if self._use_batch_norm:
165 |             x = self.bn(x)
166 |         if self._activation_fn is not None:
167 |             x = self._activation_fn(x)
168 |         return x
169 | 
170 | class TemporalPyramidPool3D_2(nn.Module):
171 |     """
172 |     Args:
173 |         out_side (tuple): Length of side in the pooling results of each pyramid layer.
174 | 
175 |     Inputs:
176 |         - `input`: the input Tensor to invert ([batch, channel, width, height])
177 |     """
178 | 
179 |     def __init__(self, out_side):
180 |         super(TemporalPyramidPool3D_2, self).__init__()
181 |         self.out_side = out_side
182 |         self.out_t = out_side[0] + out_side[1] + out_side[2]
183 | 
184 |     def forward(self, x):
185 |         out = None
186 |         for n in self.out_side:
187 |             t_r, w_r, h_r = map(lambda s: math.ceil(s / n), x.size()[2:])  # Receptive Field Size
188 |             s_t, s_w, s_h = map(lambda s: math.floor(s / n), x.size()[2:])  # Stride
189 |             max_pool = nn.MaxPool3d(kernel_size=(t_r, w_r, h_r), stride=(s_t, s_w, s_h))
190 |             y = max_pool(x)
191 |             avg_pool = nn.AdaptiveAvgPool3d((y.size(2), 1, 1))
192 |             y = avg_pool(y)
193 |             # print(y.size())
194 |             if out is None:
195 |                 out = y.view(y.size()[0], y.size()[1], -1, 1, 1)
196 |             else:
197 |                 out = torch.cat((out, y.view(y.size()[0], y.size()[1], -1, 1, 1)), 2)
198 |         return out
199 | 
200 | class TemporalPyramidPool3D(nn.Module):
201 |     """
202 |     Args:
203 |         out_side (tuple): Length of side in the pooling results of each pyramid layer.
204 | 
205 |     Inputs:
206 |         - `input`: the input Tensor to invert ([batch, channel, width, height])
207 |     """
208 | 
209 |     def __init__(self, out_side):
210 |         super(TemporalPyramidPool3D, self).__init__()
211 |         self.out_side = out_side
212 | 
213 |     def forward(self, x):
214 |         out = None
215 |         for n in self.out_side:
216 |             avg_pool = nn.AdaptiveMaxPool3d((n, 1, 1))
217 |             y = avg_pool(x)
218 |             if out is None:
219 |                 out = y.view(y.size()[0], y.size()[1], -1, 1, 1)
220 |             else:
221 |                 out = torch.cat((out, y.view(y.size()[0], y.size()[1], -1, 1, 1)), 2)
222 |         return out
223 | 
224 | class SpatialPyramidPool3D(nn.Module):
225 |     """
226 |     Args:
227 |         out_side (tuple): Length of side in the pooling results of each pyramid layer.
228 | 
229 |     Inputs:
230 |         - `input`: the input Tensor to invert ([batch, channel, width, height])
231 |     """
232 | 
233 |     def __init__(self, out_side):
234 |         super(SpatialPyramidPool3D, self).__init__()
235 |         self.out_side = out_side
236 | 
237 |     def forward(self, x):
238 |         out = None
239 |         for n in self.out_side:
240 |             max_pool = nn.AdaptiveMaxPool3d((1, n, n))
241 |             y = max_pool(x)
242 |             if out is None:
243 |                 out = y.view(y.size()[0], y.size()[1], 1, n*n, 1)
244 |             else:
245 |                 out = torch.cat((out, y.view(y.size()[0], y.size()[1], 1, n*n, 1)), 3)
246 |         return out
247 | 
248 | class InplaceShift(torch.autograd.Function):
249 |     # Special thanks to @raoyongming for the help to this function
250 |     @staticmethod
251 |     def forward(ctx, input, fold):
252 |         # not support higher order gradient
253 |         # input = input.detach_()
254 |         ctx.fold_ = fold
255 |         n, t, c, h, w = input.size()
256 |         buffer = input.data.new(n, t, fold, h, w).zero_()
257 |         buffer[:, :-1] = input.data[:, 1:, :fold]
258 |         input.data[:, :, :fold] = buffer
259 |         buffer.zero_()
260 |         buffer[:, 1:] = input.data[:, :-1, fold: 2 * fold]
261 |         input.data[:, :, fold: 2 * fold] = buffer
262 |         return input
263 | 
264 |     @staticmethod
265 |     def backward(ctx, grad_output):
266 |         # grad_output = grad_output.detach_()
267 |         fold = ctx.fold_
268 |         n, t, c, h, w = grad_output.size()
269 |         buffer = grad_output.data.new(n, t, fold, h, w).zero_()
270 |         buffer[:, 1:] = grad_output.data[:, :-1, :fold]
271 |         grad_output.data[:, :, :fold] = buffer
272 |         buffer.zero_()
273 |         buffer[:, :-1] = grad_output.data[:, 1:, fold: 2 * fold]
274 |         grad_output.data[:, :, fold: 2 * fold] = buffer
275 |         return grad_output, None
276 | 
277 | class TemporalShuffle(nn.Module):
278 |     def __init__(self, fold_div=8):
279 |         super(TemporalShuffle, self).__init__()
280 |         self.fold_div = fold_div
281 | 
282 |     def forward(self, x):
283 |         b, t, c, h, w = x.size()
284 |         fold = c // self.fold_div
285 |         out = InplaceShift.apply(x, fold)
286 |         return out.view(b, t, c, h, w)
287 | 
288 | class MultiDependBlock(nn.Module):
289 |     def __init__(self, in_channel, out_channel, concat=False, fc=False):
290 |         super(MultiDependBlock, self).__init__()
291 |         self.out_channel = out_channel
292 |         self.channel_compress = Unit3D(in_channels=in_channel, output_channels=out_channel,
293 |                kernel_shape=[1, 1, 1],
294 |                stride=(1, 1, 1),
295 |                padding=0,
296 |                activation_fn=None,
297 |                use_batch_norm=False,
298 |                use_bias=True,
299 |                name='channel_compress')
300 |         self.long_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
301 |                kernel_shape=[2, 1, 1],
302 |                stride=(1, 1, 1),
303 |                padding=0,
304 |                activation_fn=None,
305 |                use_batch_norm=False,
306 |                use_bias=True,
307 |                name='long_range_depen')
308 |         self.middle_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
309 |                kernel_shape=[2, 1, 1],
310 |                stride=(1, 1, 1),
311 |                padding=0,
312 |                activation_fn=None,
313 |                use_batch_norm=False,
314 |                use_bias=True,
315 |                name='middle_range_depen')
316 |         self.small_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
317 |                kernel_shape=[2, 1, 1],
318 |                stride=(1, 1, 1),
319 |                padding=0,
320 |                activation_fn=None,
321 |                use_batch_norm=False,
322 |                use_bias=True,
323 |                name='small_range_depen')
324 |         self.local_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
325 |                kernel_shape=[1, 1, 1],
326 |                stride=(1, 1, 1),
327 |                padding=0,
328 |                activation_fn=None,
329 |                use_batch_norm=False,
330 |                use_bias=True,
331 |                name='local_range_depen')
332 |         '''
333 |         self.single_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
334 |                kernel_shape=[1, 1, 1],
335 |                stride=(1, 1, 1),
336 |                padding=0,
337 |                activation_fn=None,
338 |                use_batch_norm=False,
339 |                use_bias=True,
340 |                name='single_range_depen')
341 |         '''
342 |         self.concat = concat
343 |         self.fc = fc
344 |         if self.fc:
345 |             self.fc_fusion = nn.Sequential(
346 |                 nn.ReLU(),
347 |                 nn.Linear(3 * out_channel, 128),
348 |                 nn.ReLU(),
349 |                 nn.Linear(128, out_channel),
350 |             )
351 |         #self.dropout_probality = 0.05
352 |     def forward(self, x):
353 |         b, c, t, h, w = x.size()
354 |         spatial_pool_x = nn.AdaptiveAvgPool3d((t,1,1))(x)/2 + nn.AdaptiveMaxPool3d((t,1,1))(x)/2
355 |         #spatial_pool_x = nn.Dropout(self.dropout_probality)(spatial_pool_x)
356 |         spatial_pool_x = self.channel_compress(spatial_pool_x)
357 |         long_range_depen = self.long_range_depen(spatial_pool_x[:,:,::(t-1),:,:])
358 |         middle_range_depen = self.middle_range_depen(spatial_pool_x[:,:,::(t-1)//2,:,:])
359 |         small_range_depen = self.small_range_depen(spatial_pool_x[:,:,::(t-1)//4,:,:])
360 |         local_range_depen = self.local_range_depen(spatial_pool_x[:,:,::(t-1)//7,:,:])
361 |         #single_range_depen = self.single_range_depen(spatial_pool_x[:, :, ::1, :, :])
362 |         '''
363 |         long_range_depen = self.long_range_depen(spatial_pool_x[:,:,::7,:,:])
364 |         middle_range_depen = self.middle_range_depen(spatial_pool_x[:,:,::4,:,:])
365 |         small_range_depen = self.small_range_depen(spatial_pool_x[:,:,::2,:,:])
366 |         local_range_depen = self.local_range_depen(spatial_pool_x[:,:,::1,:,:])
367 |         '''
368 |         if self.fc:
369 |             out = torch.cat((nn.AdaptiveMaxPool3d((1, 1, 1))(long_range_depen).squeeze(2).squeeze(2).squeeze(2), nn.AdaptiveMaxPool3d((1, 1, 1))(middle_range_depen).squeeze(2).squeeze(2).squeeze(2), nn.AdaptiveMaxPool3d((1, 1, 1))(small_range_depen).squeeze(2).squeeze(2).squeeze(2)), dim = 1)
370 |             return self.fc_fusion(out)
371 |         elif self.concat:
372 |             return torch.cat((nn.AdaptiveMaxPool3d((1, 1, 1))(long_range_depen).squeeze(2).squeeze(2).squeeze(2), nn.AdaptiveMaxPool3d((1, 1, 1))(middle_range_depen).squeeze(2).squeeze(2).squeeze(2), nn.AdaptiveMaxPool3d((1, 1, 1))(small_range_depen).squeeze(2).squeeze(2).squeeze(2)), dim = 1)
373 |         else:
374 |             return nn.AdaptiveMaxPool3d((1, 1, 1))(long_range_depen).squeeze(2).squeeze(2).squeeze(2) + \
375 |                    nn.AdaptiveMaxPool3d((1, 1, 1))(middle_range_depen).squeeze(2).squeeze(2).squeeze(2) + \
376 |                    nn.AdaptiveMaxPool3d((1, 1, 1))(small_range_depen).squeeze(2).squeeze(2).squeeze(2) + \
377 |                    nn.AdaptiveMaxPool3d((1, 1, 1))(local_range_depen).squeeze(2).squeeze(2).squeeze(2) #+ \
378 |                    #nn.AdaptiveMaxPool3d((1, 1, 1))(single_range_depen).squeeze(2).squeeze(2).squeeze(2)
379 | 
380 | class TemporalDependBlock(nn.Module):
381 |     def __init__(self, in_channel, out_channel):
382 |         super(TemporalDependBlock, self).__init__()
383 |         self.out_channel = out_channel
384 |         self.channel_compress = Unit3D(in_channels=in_channel, output_channels=out_channel,
385 |                kernel_shape=[1, 1, 1],
386 |                stride=(1, 1, 1),
387 |                padding=0,
388 |                activation_fn=None,
389 |                use_batch_norm=False,
390 |                use_bias=True,
391 |                name='channel_compress')
392 |         self.tpp = TemporalPyramidPool3D((1,2,4,8))
393 |         self.temporal_conv = Unit3D(in_channels=out_channel, output_channels=out_channel,
394 |                                                 kernel_shape=[15, 1, 1],
395 |                                                 stride=(15, 1, 1),
396 |                                                 padding=0,
397 |                                                 activation_fn=None,
398 |                                                 use_batch_norm=False,
399 |                                                 use_bias=True,
400 |                                                 name='latter_temporal_conv')
401 |     def forward(self, x):
402 |         b, c, t, h, w = x.size()
403 |         compress = self.channel_compress(x)
404 |         tpp = self.tpp(compress)
405 |         out = self.temporal_conv(tpp)
406 |         return out.view(b, out.size(1))
407 | 
408 | class HeavyMultiDependBlock(nn.Module):
409 |     def __init__(self, in_channel, out_channel):
410 |         super(HeavyMultiDependBlock, self).__init__()
411 |         self.out_channel = out_channel
412 |         self.channel_compress = Unit3D(in_channels=in_channel, output_channels=out_channel,
413 |                kernel_shape=[1, 1, 1],
414 |                stride=(1, 1, 1),
415 |                padding=0,
416 |                activation_fn=None,
417 |                use_batch_norm=False,
418 |                use_bias=True,
419 |                name='channel_compress')
420 |         self.long_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
421 |                kernel_shape=[2, 1, 1],
422 |                stride=(1, 1, 1),
423 |                padding=0,
424 |                activation_fn=None,
425 |                use_batch_norm=False,
426 |                use_bias=True,
427 |                name='long_range_depen')
428 |         self.middle_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
429 |                kernel_shape=[2, 1, 1],
430 |                stride=(1, 1, 1),
431 |                padding=0,
432 |                activation_fn=None,
433 |                use_batch_norm=False,
434 |                use_bias=True,
435 |                name='middle_range_depen')
436 |         self.small_range_depen = Unit3D(in_channels=out_channel, output_channels=out_channel,
437 |                kernel_shape=[2, 1, 1],
438 |                stride=(1, 1, 1),
439 |                padding=0,
440 |                activation_fn=None,
441 |                use_batch_norm=False,
442 |                use_bias=True,
443 |                name='small_range_depen')
444 |         self.tpp_1 = TemporalPyramidPool3D((1,2,4))
445 |         self.fusion_1 = Unit3D(in_channels=out_channel, output_channels=out_channel,
446 |                kernel_shape=[7, 1, 1],
447 |                stride=(7, 1, 1),
448 |                padding=0,
449 |                activation_fn=None,
450 |                use_batch_norm=False,
451 |                use_bias=True,
452 |                name='long_range_depen')
453 |         self.tpp_2 = TemporalPyramidPool3D((1,2,4))
454 |         self.fusion_2 = Unit3D(in_channels=out_channel, output_channels=out_channel,
455 |                kernel_shape=[7, 1, 1],
456 |                stride=(7, 1, 1),
457 |                padding=0,
458 |                activation_fn=None,
459 |                use_batch_norm=False,
460 |                use_bias=True,
461 |                name='middle_range_depen')
462 |         self.tpp_3 = TemporalPyramidPool3D((1,2,4))
463 |         self.fusion_3 = Unit3D(in_channels=out_channel, output_channels=out_channel,
464 |                kernel_shape=[7, 1, 1],
465 |                stride=(7, 1, 1),
466 |                padding=0,
467 |                activation_fn=None,
468 |                use_batch_norm=False,
469 |                use_bias=True,
470 |                name='small_range_depen')
471 |     def forward(self, x):
472 |         b, c, t, h, w = x.size()
473 |         spatial_pool_x = nn.AdaptiveAvgPool3d((t,1,1))(x)/2 + nn.AdaptiveMaxPool3d((t,1,1))(x)/2
474 |         spatial_pool_x = self.channel_compress(spatial_pool_x)
475 |         long_range_depen = self.long_range_depen(spatial_pool_x[:,:,::4,:,:])
476 |         middle_range_depen = self.middle_range_depen(spatial_pool_x[:,:,::2,:,:])
477 |         small_range_depen = self.small_range_depen(spatial_pool_x[:,:,::1,:,:])
478 |         long_range_depen = self.tpp_1(long_range_depen)
479 |         middle_range_depen = self.tpp_2(middle_range_depen)
480 |         small_range_depen = self.tpp_3(small_range_depen)
481 |         return self.fusion_1(long_range_depen).squeeze(2).squeeze(2).squeeze(2) + self.fusion_2(middle_range_depen).squeeze(2).squeeze(2).squeeze(2) + self.fusion_3(small_range_depen).squeeze(2).squeeze(2).squeeze(2)
482 | 
483 | class InceptionModule(nn.Module):
484 |     def __init__(self, in_channels, out_channels, name):
485 |         super(InceptionModule, self).__init__()
486 | 
487 |         self.b0 = Unit3D(in_channels=in_channels, output_channels=out_channels[0], kernel_shape=[1, 1, 1], padding=0,
488 |                          name=name + '/Branch_0/Conv3d_0a_1x1')
489 |         self.b1a = Unit3D(in_channels=in_channels, output_channels=out_channels[1], kernel_shape=[1, 1, 1], padding=0,
490 |                           name=name + '/Branch_1/Conv3d_0a_1x1')
491 |         self.b1b = Unit3D(in_channels=out_channels[1], output_channels=out_channels[2], kernel_shape=[3, 3, 3],
492 |                           name=name + '/Branch_1/Conv3d_0b_3x3')
493 |         self.b2a = Unit3D(in_channels=in_channels, output_channels=out_channels[3], kernel_shape=[1, 1, 1], padding=0,
494 |                           name=name + '/Branch_2/Conv3d_0a_1x1')
495 |         self.b2b = Unit3D(in_channels=out_channels[3], output_channels=out_channels[4], kernel_shape=[3, 3, 3],
496 |                           name=name + '/Branch_2/Conv3d_0b_3x3')
497 |         self.b3a = MaxPool3dSamePadding(kernel_size=[3, 3, 3],
498 |                                         stride=(1, 1, 1), padding=0)
499 |         self.b3b = Unit3D(in_channels=in_channels, output_channels=out_channels[5], kernel_shape=[1, 1, 1], padding=0,
500 |                           name=name + '/Branch_3/Conv3d_0b_1x1')
501 |         #self.temporal_shift = TemporalShuffle(fold_div=16)
502 |         self.name = name
503 | 
504 |     def forward(self, x):
505 |         b0 = self.b0(x)
506 |         b1 = self.b1b(self.b1a(x))
507 |         b2 = self.b2b(self.b2a(x))
508 |         b3 = self.b3b(self.b3a(x))
509 |         return  torch.cat([b0, b1, b2, b3], dim=1)
510 |         """
511 |         out = torch.cat([b0, b1, b2, b3], dim=1)
512 |         b, c, t, h, w = x.size()
513 |         if t > 16:
514 |             ts_1 = self.temporal_shift(out)
515 |             return out + ts_1
516 |         else:
517 |             '''
518 |             tb0 = self.tba(x)
519 |             tb1 = self.tbb(tb0)
520 |             tb2 = self.tbc(tb1)
521 |             '''
522 |             return out
523 |         """
524 | class TemporalInceptionModule(nn.Module):
525 |     def __init__(self, in_channels, out_channels, name):
526 |         super(TemporalInceptionModule, self).__init__()
527 | 
528 |         self.b0 = Unit3D(in_channels=in_channels, output_channels=out_channels[0], kernel_shape=[1, 1, 1], padding=0,
529 |                          name=name + '/Branch_0/Conv3d_0a_1x1')
530 |         self.b1a = Unit3D(in_channels=in_channels, output_channels=out_channels[1], kernel_shape=[1, 1, 1], padding=0,
531 |                           name=name + '/Branch_1/Conv3d_0a_1x1')
532 |         self.b1b = Unit3D(in_channels=out_channels[1], output_channels=out_channels[2], kernel_shape=[1, 3, 3],
533 |                           name=name + '/Branch_1/Conv3d_0b_3x3')
534 |         self.b2a = Unit3D(in_channels=in_channels, output_channels=out_channels[3], kernel_shape=[1, 1, 1], padding=0,
535 |                           name=name + '/Branch_2/Conv3d_0a_1x1')
536 |         self.b2b = Unit3D(in_channels=out_channels[3], output_channels=out_channels[4], kernel_shape=[1, 3, 3],
537 |                           name=name + '/Branch_2/Conv3d_0b_3x3')
538 |         self.b3a = MaxPool3dSamePadding(kernel_size=[1, 3, 3],
539 |                                         stride=(1, 1, 1), padding=0)
540 |         self.b3b = Unit3D(in_channels=in_channels, output_channels=out_channels[5], kernel_shape=[1, 1, 1], padding=0,
541 |                           name=name + '/Branch_3/Conv3d_0b_1x1')
542 |         self.name = name
543 | 
544 |     def forward(self, x):
545 |         b0 = self.b0(x)
546 |         b1 = self.b1b(self.b1a(x))
547 |         b2 = self.b2b(self.b2a(x))
548 |         b3 = self.b3b(self.b3a(x))
549 |         return torch.cat([b0, b1, b2, b3], dim=1)
550 | 
551 | class MultiPathI3d(nn.Module):
552 |     def __init__(self, num_classes=400, spatial_squeeze=True, in_channels=3, dropout_prob=0.5):
553 | 
554 |         super(MultiPathI3d, self).__init__()
555 |         self._num_classes = num_classes
556 |         self._spatial_squeeze = spatial_squeeze
557 |         self.logits = None
558 | 
559 |         self.Conv3d_1a_7x7 = Unit3D(in_channels=in_channels, output_channels=64, kernel_shape=[7, 7, 7],
560 |                                             stride=(2, 2, 2), padding=(3, 3, 3), name='conv3d_1a_7_7')
561 | 
562 |         self.MaxPool3d_2a_3x3 = MaxPool3dSamePadding(kernel_size=[1, 3, 3], stride=(1, 2, 2),
563 |                                                           padding=0)
564 |         self.Conv3d_2b_1x1 = Unit3D(in_channels=64, output_channels=64, kernel_shape=[1, 1, 1], padding=0,name='Conv3d_2b_1x1')
565 |         self.Conv3d_2c_3x3 = Unit3D(in_channels=64, output_channels=192, kernel_shape=[3, 3, 3], padding=1, name='Conv3d_2c_3x3')
566 |         self.maxpool_1 = MaxPool3dSamePadding(kernel_size=[1, 3, 3], stride=(1, 2, 2), padding=0)
567 |         self.Mixed_3b = InceptionModule(192, [64, 96, 128, 16, 32, 32], 'Mixed_3b')
568 |         self.Mixed_3c = InceptionModule(256, [128, 128, 192, 32, 96, 64], 'Mixed_3c')
569 |         self.maxpool_2 = MaxPool3dSamePadding(kernel_size=[3, 3, 3], stride=(2, 2, 2), padding=0)
570 |         self.Mixed_4b = InceptionModule(128 + 192 + 96 + 64, [192, 96, 208, 16, 48, 64], 'Mixed_4b')
571 |         self.Mixed_4c = InceptionModule(192 + 208 + 48 + 64, [160, 112, 224, 24, 64, 64], 'Mixed_4c')
572 |         self.Mixed_4d = InceptionModule(160 + 224 + 64 + 64, [128, 128, 256, 24, 64, 64], 'Mixed_4d')
573 |         self.Mixed_4e = InceptionModule(128 + 256 + 64 + 64, [112, 144, 288, 32, 64, 64], 'Mixed_4e')
574 |         self.Mixed_4f = InceptionModule(112 + 288 + 64 + 64, [256, 160, 320, 32, 128, 128], 'Mixed_4f')
575 |         self.maxpool_3 = MaxPool3dSamePadding(kernel_size=[2, 2, 2], stride=(2, 2, 2), padding=0)
576 |         self.Mixed_5b = InceptionModule(256 + 320 + 128 + 128, [256, 160, 320, 32, 128, 128], 'Mixed_5b')
577 |         self.Mixed_5c = InceptionModule(256 + 320 + 128 + 128, [384, 192, 384, 48, 128, 128], 'Mixed_5c')
578 |         self.avg_pool = nn.AvgPool3d(kernel_size=[2, 7, 7],
579 |                                      stride=(1, 1, 1))
580 |         self.dropout = nn.Dropout(dropout_prob)
581 |         self.dropout_probality = dropout_prob
582 | 
583 | 
584 |         #=================================Multi Stride Multi Path Compress Network======================
585 |         self.s_depend = MultiDependBlock(480, self._num_classes, concat=False, fc=False)
586 |         self.m_depend = MultiDependBlock(832, self._num_classes, concat=False, fc=False)
587 |         self.l_depend = MultiDependBlock(1024, self._num_classes, concat=False, fc=False)
588 |         self.concat = False
589 |         self.fc_fusion = False
590 |         if self.concat:
591 |             self.fc = nn.Linear(self._num_classes*9, self._num_classes)
592 |     def _upsample_add(self, x, y):
593 |         '''Upsample and add two feature maps.
594 |         Args:
595 |           x: (Variable) top feature map to be upsampled.
596 |           y: (Variable) lateral feature map.
597 |         Returns:
598 |           (Variable) added feature map.
599 |         Note in PyTorch, when input size is odd, the upsampled feature map
600 |         with `F.upsample(..., scale_factor=2, mode='nearest')`
601 |         maybe not equal to the lateral feature map size.
602 |         e.g.
603 |         original input size: [N,_,15,15] ->
604 |         conv2d feature map size: [N,_,8,8] ->
605 |         upsampled feature map size: [N,_,16,16]
606 |         So we choose bilinear upsample which supports arbitrary output sizes.
607 |         '''
608 |         _, _, T, H, W = y.size()
609 |         return F.interpolate(x, size=(T, H, W), mode='trilinear', align_corners=True)/2 + y/2
610 |         #return F.upsample(x, size=(T, H, W), mode='trilinear') + y
611 | 
612 |     def constrain(self, x):
613 |         alpha = 0.2
614 |         beta = 5
615 |         return 1/(beta+exp(-x)) + alpha
616 | 
617 |     def forward(self, x):
618 |         x = self.Conv3d_1a_7x7(x)
619 |         x = self.MaxPool3d_2a_3x3(x)
620 |         x = self.Conv3d_2b_1x1(x)
621 |         x = self.Conv3d_2c_3x3(x)
622 |         x = self.maxpool_1(x)
623 |         x = self.Mixed_3b(x)
624 |         x = self.Mixed_3c(x)
625 |         path_s = x
626 |         x = self.maxpool_2(x)
627 |         x = self.Mixed_4b(x)
628 |         x = self.Mixed_4c(x)
629 |         x = self.Mixed_4d(x)
630 |         x = self.Mixed_4e(x)
631 |         x = self.Mixed_4f(x)
632 |         path_m = x
633 |         x = self.maxpool_3(x)
634 |         x = self.Mixed_5b(x)
635 |         x = self.Mixed_5c(x)
636 |         path_l = x
637 |         plot_s = path_s
638 |         plot_m = path_m
639 |         plot_l = path_l
640 |         path_s = self.s_depend(path_s)
641 |         path_m = self.m_depend(path_m)
642 |         path_l = self.l_depend(path_l)
643 |         #main_path = self.main_depend(x)
644 |         main_path = path_m + path_l + path_s
645 |         if self.concat:
646 |             out =  torch.cat((self.s_depend(path_s), self.m_depend(path_m), self.l_depend(path_l)), dim=1)
647 |             return self.fc(out) #+ temporal_path
648 |         else:
649 |             return main_path, plot_s, plot_m, plot_l, path_s, path_m, path_l
650 | 


--------------------------------------------------------------------------------
/net/r3d.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2020-09-17 15:59
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : amax_Action_Video_Visualization
  8 |      # @File    : r3d.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | 
 16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 17 | 
 18 | """C3D"""
 19 | import torch
 20 | import torch.nn as nn
 21 | import torch.nn.functional as F
 22 | from torch.autograd import Variable
 23 | import math
 24 | from functools import partial
 25 | from net.model import Flatten, Normalize
 26 | 
 27 | __all__ = [
 28 |     'ResNet', 'resnet10', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
 29 |     'resnet152', 'resnet200'
 30 | ]
 31 | 
 32 | 
 33 | def conv3x3x3(in_planes, out_planes, stride=1):
 34 |     # 3x3x3 convolution with padding
 35 |     return nn.Conv3d(
 36 |         in_planes,
 37 |         out_planes,
 38 |         kernel_size=3,
 39 |         stride=stride,
 40 |         padding=1,
 41 |         bias=False)
 42 | 
 43 | 
 44 | def downsample_basic_block(x, planes, stride):
 45 |     out = F.avg_pool3d(x, kernel_size=1, stride=stride)
 46 |     zero_pads = torch.Tensor(
 47 |         out.size(0), planes - out.size(1), out.size(2), out.size(3),
 48 |         out.size(4)).zero_()
 49 |     if isinstance(out.data, torch.cuda.FloatTensor):
 50 |         zero_pads = zero_pads.cuda()
 51 | 
 52 |     out = Variable(torch.cat([out.data, zero_pads], dim=1))
 53 | 
 54 |     return out
 55 | 
 56 | 
 57 | class BasicBlock(nn.Module):
 58 |     expansion = 1
 59 | 
 60 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 61 |         super(BasicBlock, self).__init__()
 62 |         self.conv1 = conv3x3x3(inplanes, planes, stride)
 63 |         self.bn1 = nn.BatchNorm3d(planes)
 64 |         self.relu = nn.ReLU(inplace=True)
 65 |         self.conv2 = conv3x3x3(planes, planes)
 66 |         self.bn2 = nn.BatchNorm3d(planes)
 67 |         self.downsample = downsample
 68 |         self.stride = stride
 69 | 
 70 |     def forward(self, x):
 71 |         residual = x
 72 | 
 73 |         out = self.conv1(x)
 74 |         out = self.bn1(out)
 75 |         out = self.relu(out)
 76 | 
 77 |         out = self.conv2(out)
 78 |         out = self.bn2(out)
 79 | 
 80 |         if self.downsample is not None:
 81 |             residual = self.downsample(x)
 82 | 
 83 |         out += residual
 84 |         out = self.relu(out)
 85 | 
 86 |         return out
 87 | 
 88 | 
 89 | class Bottleneck(nn.Module):
 90 |     expansion = 4
 91 | 
 92 |     def __init__(self, inplanes, planes, stride=1, downsample=None):
 93 |         super(Bottleneck, self).__init__()
 94 |         self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False)
 95 |         self.bn1 = nn.BatchNorm3d(planes)
 96 |         self.conv2 = nn.Conv3d(
 97 |             planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
 98 |         self.bn2 = nn.BatchNorm3d(planes)
 99 |         self.conv3 = nn.Conv3d(planes, planes * 4, kernel_size=1, bias=False)
100 |         self.bn3 = nn.BatchNorm3d(planes * 4)
101 |         self.relu = nn.ReLU(inplace=True)
102 |         self.downsample = downsample
103 |         self.stride = stride
104 | 
105 |     def forward(self, x):
106 |         residual = x
107 | 
108 |         out = self.conv1(x)
109 |         out = self.bn1(out)
110 |         out = self.relu(out)
111 | 
112 |         out = self.conv2(out)
113 |         out = self.bn2(out)
114 |         out = self.relu(out)
115 | 
116 |         out = self.conv3(out)
117 |         out = self.bn3(out)
118 | 
119 |         if self.downsample is not None:
120 |             residual = self.downsample(x)
121 | 
122 |         out += residual
123 |         out = self.relu(out)
124 | 
125 |         return out
126 | 
127 | 
128 | class ResNet(nn.Module):
129 |     def __init__(self,
130 |                  block,
131 |                  layers,
132 |                  sample_size=224,
133 |                  sample_duration=16,
134 |                  shortcut_type='B',
135 |                  num_classes=400,
136 |                  with_classifier=True):
137 |         self.inplanes = 64
138 |         super(ResNet, self).__init__()
139 |         self.conv1 = nn.Conv3d(
140 |             3,
141 |             64,
142 |             kernel_size=7,
143 |             stride=(1, 2, 2),
144 |             padding=(3, 3, 3),
145 |             bias=False)
146 |         self.bn1 = nn.BatchNorm3d(64)
147 |         self.relu = nn.ReLU(inplace=True)
148 |         self.maxpool = nn.MaxPool3d(kernel_size=(3, 3, 3), stride=2, padding=1)
149 |         self.layer1 = self._make_layer(block, 64, layers[0], shortcut_type)
150 |         self.layer2 = self._make_layer(
151 |             block, 128, layers[1], shortcut_type, stride=2)
152 |         self.layer3 = self._make_layer(
153 |             block, 256, layers[2], shortcut_type, stride=2)
154 |         self.layer4 = self._make_layer(
155 |             block, 512, layers[3], shortcut_type, stride=2)
156 |         last_duration = int(math.ceil(sample_duration / 16))
157 |         last_size = int(math.ceil(sample_size / 32))
158 |         self.with_classifier = with_classifier
159 |         if with_classifier:
160 |             # self.avgpool_custom = nn.AvgPool3d(
161 |             #     (1, last_size, last_size), stride=1)
162 |             # self.cp = nn.Conv3d(in_channels=512 * block.expansion, out_channels=num_classes,
163 |             #                     kernel_size=(last_duration, 1, 1), bias=False)
164 |             self.avgpool = nn.AdaptiveAvgPool3d(1)
165 |             self.fc = nn.Linear(512 * block.expansion, num_classes)
166 |         else:
167 |             self.id_head = nn.Sequential(
168 |                 torch.nn.AdaptiveAvgPool3d((1, 1, 1)),
169 |                 Flatten(),
170 |                 torch.nn.Linear(512, 128),
171 |                 Normalize(2)
172 |             )
173 |             self.cls_head = nn.Sequential(torch.nn.AdaptiveAvgPool3d(1),
174 |                                           Flatten(),
175 |                                           torch.nn.Linear(512, 200)
176 |                                           )
177 |             self.feature_head = nn.Sequential(torch.nn.AdaptiveAvgPool3d(1),
178 |                                               Flatten(),
179 |                                               Normalize(2)
180 |                                               )
181 |         for m in self.modules():
182 |             if isinstance(m, nn.Conv3d):
183 |                 m.weight = nn.init.kaiming_normal_(m.weight, mode='fan_out')
184 |             elif isinstance(m, nn.BatchNorm3d):
185 |                 m.weight.data.fill_(1)
186 |                 m.bias.data.zero_()
187 | 
188 |     def _make_layer(self, block, planes, blocks, shortcut_type, stride=1):
189 |         downsample = None
190 |         if stride != 1 or self.inplanes != planes * block.expansion:
191 |             if shortcut_type == 'A':
192 |                 downsample = partial(
193 |                     downsample_basic_block,
194 |                     planes=planes * block.expansion,
195 |                     stride=stride)
196 |             else:
197 |                 downsample = nn.Sequential(
198 |                     nn.Conv3d(
199 |                         self.inplanes,
200 |                         planes * block.expansion,
201 |                         kernel_size=1,
202 |                         stride=stride,
203 |                         bias=False), nn.BatchNorm3d(planes * block.expansion))
204 | 
205 |         layers = []
206 |         layers.append(block(self.inplanes, planes, stride, downsample))
207 |         self.inplanes = planes * block.expansion
208 |         for i in range(1, blocks):
209 |             layers.append(block(self.inplanes, planes))
210 | 
211 |         return nn.Sequential(*layers)
212 | 
213 |     def forward(self, x, return_conv=False):
214 |         x = self.conv1(x)
215 |         x = self.bn1(x)
216 |         x = self.relu(x)
217 |         x = self.maxpool(x)
218 | 
219 |         x = self.layer1(x)
220 |         x = self.layer2(x)
221 |         x = self.layer3(x)
222 |         x = self.layer4(x)
223 |         feature = x
224 |         x = self.avgpool(x)
225 |         # x = self.cp(self.avgpool_custom(x))
226 |         x = x.squeeze(3).squeeze(3).mean(2)
227 |         x = self.fc(x)
228 |         return F.log_softmax(x, dim=1), feature
229 | 
230 | 
231 | def get_fine_tuning_parameters(model, ft_begin_index):
232 |     if ft_begin_index == 0:
233 |         return model.parameters()
234 | 
235 |     ft_module_names = []
236 |     for i in range(ft_begin_index, 5):
237 |         ft_module_names.append('layer{}'.format(i))
238 |     ft_module_names.append('fc')
239 | 
240 |     parameters = []
241 |     for k, v in model.named_parameters():
242 |         for ft_module in ft_module_names:
243 |             if ft_module in k:
244 |                 parameters.append({'params': v})
245 |                 break
246 |         else:
247 |             parameters.append({'params': v, 'lr': 0.0})
248 | 
249 |     return parameters
250 | 
251 | 
252 | def resnet10(**kwargs):
253 |     """Constructs a ResNet-18 model.
254 |     """
255 |     model = ResNet(BasicBlock, [1, 1, 1, 1], **kwargs)
256 |     return model
257 | 
258 | 
259 | def resnet18(**kwargs):
260 |     """Constructs a ResNet-18 model.
261 |     """
262 |     model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs)
263 |     return model
264 | 
265 | 
266 | def resnet34(**kwargs):
267 |     """Constructs a ResNet-34 model.
268 |     """
269 |     model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs)
270 |     return model
271 | 
272 | 
273 | def resnet50(**kwargs):
274 |     """Constructs a ResNet-50 model.
275 |     """
276 |     model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs)
277 |     return model
278 | 
279 | 
280 | def resnet101(**kwargs):
281 |     """Constructs a ResNet-101 model.
282 |     """
283 |     model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs)
284 |     return model
285 | 
286 | 
287 | def resnet152(**kwargs):
288 |     """Constructs a ResNet-101 model.
289 |     """
290 |     model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs)
291 |     return model
292 | 
293 | 
294 | def resnet200(**kwargs):
295 |     """Constructs a ResNet-101 model.
296 |     """
297 |     model = ResNet(Bottleneck, [3, 24, 36, 3], **kwargs)
298 |     return model


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_000.png


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_001.png


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_002.png


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_003.png


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_004.png


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_005.png


--------------------------------------------------------------------------------
/output/imgs/79/focusmap_006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/focusmap_006.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_000.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_001.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_002.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_003.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_004.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_005.png


--------------------------------------------------------------------------------
/output/imgs/79/heatmap_006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/imgs/79/heatmap_006.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/000.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/001.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/002.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/003.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/004.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/005.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/006.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/007.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/008.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/009.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/010.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/011.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/012.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/012.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/013.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/014.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/014.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/focusmap/015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/focusmap/015.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/000.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/001.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/002.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/003.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/004.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/005.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/006.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/007.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/008.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/009.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/010.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/011.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/012.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/012.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/013.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/013.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/014.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/014.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/heatmap/015.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/ucf101_test_1/0/heatmap/015.png


--------------------------------------------------------------------------------
/output/ucf101_test_1/0/info.txt:
--------------------------------------------------------------------------------
1 | Visualizing for class 0
2 | Predicted class 0
3 | Visualizing for class 0
4 | Predicted class 0
5 | 


--------------------------------------------------------------------------------
/output/video/label_0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/video/label_0.mp4


--------------------------------------------------------------------------------
/output/video/label_28.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/video/label_28.mp4


--------------------------------------------------------------------------------
/output/video/label_471.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/output/video/label_471.mp4


--------------------------------------------------------------------------------
/process_all_hmdb51_videos.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 |      # @Time    : 2019-04-27 23:25
 5 |      # @Author  : Awiny
 6 |      # @Site    :
 7 |      # @Project : amax_Action_Video_Visualization
 8 |      # @File    : process_all_hmdb51_videos.py
 9 |      # @Software: PyCharm
10 |      # @Github  : https://github.com/FingerRec
11 |      # @Blog    : http://fingerrec.github.io
12 | """
13 | import scipy.io
14 | import os
15 | from main import heat_map_api
16 | import time
17 | import datetime
18 | 
19 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
20 | videos_dir = "/data1/DataSet/Hmdb51/hmdb51_mpeg/"
21 | output_dirs = "output/self_supervised_fine_tune/"
22 | frames_num = 16
23 | clip_steps = 8
24 | classes_list = "resources/hmdb51_classInd.txt"
25 | 
26 | 
27 | classes = {}
28 | with open(classes_list) as f:
29 |     for line in f.readlines():
30 |         info = line.strip().split(' ')
31 |         classes[info[1]] = int(info[0])
32 | count = 0
33 | videos_num = 7000
34 | begin =time.time()
35 | for dir in os.listdir(videos_dir):
36 |     for video in os.listdir(os.path.join(videos_dir,dir)):
37 |         count += 1
38 |         video_path = os.path.join(videos_dir, dir, video)
39 |         label = classes[dir]
40 |         output_dir = os.path.join(output_dirs, dir, video.split('.')[0])
41 |         if not os.path.exists(os.path.join(output_dirs, dir)):
42 |             os.mkdir(os.path.join(output_dirs, dir))
43 |         if not os.path.exists(output_dir):
44 |             os.mkdir(output_dir)
45 |         else:
46 |             continue
47 |         try:
48 |             heat_map_api(video_path, frames_num, clip_steps, output_dir, label, classes_list)
49 |         except TypeError:
50 |             print("video not found ")
51 |             continue
52 |         end = time.time()
53 |         #  datetime.datetime.fromtimestamp(1421077403.0)
54 |         #  print("have processed {}/{} videos, left time: {}".format(count, videos_num, (end-begin)/count*(videos_num-count)))
55 |         print("have processed {}/{} videos, will be finished in: {}".format(count, videos_num,
56 |                                                                   datetime.datetime.fromtimestamp(time.time() + (end - begin) / count * (videos_num - count))))
57 | 


--------------------------------------------------------------------------------
/resources/HMDB_snapshot1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/HMDB_snapshot1.png


--------------------------------------------------------------------------------
/resources/HMDB_snapshot2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/HMDB_snapshot2.png


--------------------------------------------------------------------------------
/resources/classInd.txt:
--------------------------------------------------------------------------------
  1 | 1 ApplyEyeMakeup
  2 | 2 ApplyLipstick
  3 | 3 Archery
  4 | 4 BabyCrawling
  5 | 5 BalanceBeam
  6 | 6 BandMarching
  7 | 7 BaseballPitch
  8 | 8 Basketball
  9 | 9 BasketballDunk
 10 | 10 BenchPress
 11 | 11 Biking
 12 | 12 Billiards
 13 | 13 BlowDryHair
 14 | 14 BlowingCandles
 15 | 15 BodyWeightSquats
 16 | 16 Bowling
 17 | 17 BoxingPunchingBag
 18 | 18 BoxingSpeedBag
 19 | 19 BreastStroke
 20 | 20 BrushingTeeth
 21 | 21 CleanAndJerk
 22 | 22 CliffDiving
 23 | 23 CricketBowling
 24 | 24 CricketShot
 25 | 25 CuttingInKitchen
 26 | 26 Diving
 27 | 27 Drumming
 28 | 28 Fencing
 29 | 29 FieldHockeyPenalty
 30 | 30 FloorGymnastics
 31 | 31 FrisbeeCatch
 32 | 32 FrontCrawl
 33 | 33 GolfSwing
 34 | 34 Haircut
 35 | 35 Hammering
 36 | 36 HammerThrow
 37 | 37 HandstandPushups
 38 | 38 HandstandWalking
 39 | 39 HeadMassage
 40 | 40 HighJump
 41 | 41 HorseRace
 42 | 42 HorseRiding
 43 | 43 HulaHoop
 44 | 44 IceDancing
 45 | 45 JavelinThrow
 46 | 46 JugglingBalls
 47 | 47 JumpingJack
 48 | 48 JumpRope
 49 | 49 Kayaking
 50 | 50 Knitting
 51 | 51 LongJump
 52 | 52 Lunges
 53 | 53 MilitaryParade
 54 | 54 Mixing
 55 | 55 MoppingFloor
 56 | 56 Nunchucks
 57 | 57 ParallelBars
 58 | 58 PizzaTossing
 59 | 59 PlayingCello
 60 | 60 PlayingDaf
 61 | 61 PlayingDhol
 62 | 62 PlayingFlute
 63 | 63 PlayingGuitar
 64 | 64 PlayingPiano
 65 | 65 PlayingSitar
 66 | 66 PlayingTabla
 67 | 67 PlayingViolin
 68 | 68 PoleVault
 69 | 69 PommelHorse
 70 | 70 PullUps
 71 | 71 Punch
 72 | 72 PushUps
 73 | 73 Rafting
 74 | 74 RockClimbingIndoor
 75 | 75 RopeClimbing
 76 | 76 Rowing
 77 | 77 SalsaSpin
 78 | 78 ShavingBeard
 79 | 79 Shotput
 80 | 80 SkateBoarding
 81 | 81 Skiing
 82 | 82 Skijet
 83 | 83 SkyDiving
 84 | 84 SoccerJuggling
 85 | 85 SoccerPenalty
 86 | 86 StillRings
 87 | 87 SumoWrestling
 88 | 88 Surfing
 89 | 89 Swing
 90 | 90 TableTennisShot
 91 | 91 TaiChi
 92 | 92 TennisSwing
 93 | 93 ThrowDiscus
 94 | 94 TrampolineJumping
 95 | 95 Typing
 96 | 96 UnevenBars
 97 | 97 VolleyballSpiking
 98 | 98 WalkingWithDog
 99 | 99 WallPushups
100 | 100 WritingOnBoard
101 | 101 YoYo
102 | 


--------------------------------------------------------------------------------
/resources/focusimg_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/focusimg_1.png


--------------------------------------------------------------------------------
/resources/heatmap_000.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_000.png


--------------------------------------------------------------------------------
/resources/heatmap_000_sc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_000_sc.png


--------------------------------------------------------------------------------
/resources/heatmap_003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_003.png


--------------------------------------------------------------------------------
/resources/heatmap_003_sc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_003_sc.png


--------------------------------------------------------------------------------
/resources/heatmap_007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_007.png


--------------------------------------------------------------------------------
/resources/heatmap_007_sc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_007_sc.png


--------------------------------------------------------------------------------
/resources/heatmap_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/heatmap_1.png


--------------------------------------------------------------------------------
/resources/hmdb51_classInd.txt:
--------------------------------------------------------------------------------
 1 | 1 brush_hair
 2 | 2 cartwheel
 3 | 3 catch
 4 | 4 chew
 5 | 5 clap
 6 | 6 climb
 7 | 7 climb_stairs
 8 | 8 dive
 9 | 9 draw_sword
10 | 10 dribble
11 | 11 drink
12 | 12 eat
13 | 13 fall_floor
14 | 14 fencing
15 | 15 flic_flac
16 | 16 golf
17 | 17 handstand
18 | 18 hit
19 | 19 hug
20 | 20 jump
21 | 21 kick
22 | 22 kick_ball
23 | 23 kiss
24 | 24 laugh
25 | 25 pick
26 | 26 pour
27 | 27 pullup
28 | 28 punch
29 | 29 push
30 | 30 pushup
31 | 31 ride_bike
32 | 32 ride_horse
33 | 33 run
34 | 34 shake_hands
35 | 35 shoot_ball
36 | 36 shoot_bow
37 | 37 shoot_gun
38 | 38 sit
39 | 39 situp
40 | 40 smile
41 | 41 smoke
42 | 42 somersault
43 | 43 stand
44 | 44 swing_baseball
45 | 45 sword_exercise
46 | 46 sword
47 | 47 talk
48 | 48 throw
49 | 49 turn
50 | 50 walk
51 | 51 wave
52 | 


--------------------------------------------------------------------------------
/resources/supervised.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/supervised.gif


--------------------------------------------------------------------------------
/resources/unsupervised.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/resources/unsupervised.gif


--------------------------------------------------------------------------------
/scripts/c3d_unsupervised_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py --num_classes 51 \
3 | --arch c3d \
4 | --classes_list resources/hmdb51_classInd.txt \
5 | --model_weights pretrained_model/c3d-pretrained.pth \
6 | --video test_videos/punch_28.mp4 \
7 | --frames_num 16 --label 28 --clip_steps 8 \
8 | --output_dir output --gpus 1 --supervised unsupervised


--------------------------------------------------------------------------------
/scripts/demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py --num_classes 101 \
3 | --arch mf_net \
4 | --classes_list resources/classInd.txt \
5 | --model_weights pretrained_model/MFNet3D_UCF-101_Split-1_96.3.pth \
6 | --video test_videos/v_ApplyEyeMakeup_g01_c01.avi \
7 | --frames_num 16 --label 0 --clip_steps 16 \
8 | --output_dir output


--------------------------------------------------------------------------------
/scripts/i3d_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py --num_classes 51 \
3 | --arch i3d \
4 | --classes_list resources/hmdb51_classInd.txt \
5 | --model_weights pretrained_model/hmdb51_rgb_gl_randomrotation_3flip_mixup_way2_1loss_stride_1_12_26_checkpoint_37.77.pth.tar \
6 | --video test_videos/punch_28.mp4 \
7 | --frames_num 16 --label 28 --clip_steps 16 \
8 | --output_dir output --gpus 1


--------------------------------------------------------------------------------
/scripts/i3d_mixup_demo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | for MIXUP_TYPE in {1..9..2}
 3 | do
 4 |     python main.py --num_classes 51 \
 5 |     --arch i3d \
 6 |     --classes_list resources/hmdb51_classInd.txt \
 7 |     --model_weights pretrained_model/31.372_i3dpt_rgb_model_best.pth.tar \
 8 |     --video test_videos/drive_0.$MIXUP_TYPE.mp4 \
 9 |     --frames_num 16 --label 111$MIXUP_TYPE --clip_steps 4 \
10 |     --output_dir output --gpus 1 --supervised self_supervised
11 | done
12 | for MIXUP_TYPE in {1..9..2}
13 | do
14 |     python main.py --num_classes 51 \
15 |     --arch i3d \
16 |     --classes_list resources/hmdb51_classInd.txt \
17 |     --model_weights pretrained_model/36.209_i3dpt_rgb_model_best.pth.tar \
18 |     --video test_videos/drive_0.$MIXUP_TYPE.mp4 \
19 |     --frames_num 16 --label 112$MIXUP_TYPE --clip_steps 4 \
20 |     --output_dir output --gpus 1 --supervised self_supervised
21 | done


--------------------------------------------------------------------------------
/scripts/i3d_rotate_demo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | # scratch
 3 | for ROTATE_TYPE in {0..15}
 4 | do
 5 |     echo "$ROTATE_TYPE / 16 finished"
 6 |     python main.py --num_classes 51 \
 7 |     --arch i3d \
 8 |     --classes_list resources/hmdb51_classInd.txt \
 9 |     --model_weights pretrained_model/31.372_i3dpt_rgb_model_best.pth.tar \
10 |     --video test_videos/shoot_gun_r_type$ROTATE_TYPE.mp4 \
11 |     --frames_num 16 --label 52$ROTATE_TYPE --clip_steps 8 \
12 |     --output_dir output --gpus 1 --supervised self_supervised
13 | done
14 | 
15 | # self-supervised
16 | for ROTATE_TYPE in {0..15}
17 | do
18 |     echo "$ROTATE_TYPE / 16 finished"
19 |     python main.py --num_classes 51 \
20 |     --arch i3d \
21 |     --classes_list resources/hmdb51_classInd.txt \
22 |     --model_weights pretrained_model/36.209_i3dpt_rgb_model_best.pth.tar \
23 |     --video test_videos/shoot_gun_r_type$ROTATE_TYPE.mp4 \
24 |     --frames_num 16 --label 53$ROTATE_TYPE --clip_steps 8 \
25 |     --output_dir output --gpus 1 --supervised self_supervised
26 | done


--------------------------------------------------------------------------------
/scripts/i3d_unsupervised_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py --num_classes 51 \
3 | --arch i3d \
4 | --classes_list resources/hmdb51_classInd.txt \
5 | --model_weights pretrained_model/77.254_mpi3d_rgb_model_best.pth.tar \
6 | --video test_videos/punch_28.mp4 \
7 | --frames_num 16 --label 28 --clip_steps 16 \
8 | --output_dir output --gpus 1 --supervised unsupervised


--------------------------------------------------------------------------------
/scripts/mpi3d_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py --num_classes 51 \
3 | --arch mpi3d \
4 | --classes_list resources/hmdb51_classInd.txt \
5 | --model_weights pretrained_model/77.254_mpi3d_rgb_model_best.pth.tar \
6 | --video test_videos/punch_28.mp4 \
7 | --frames_num 64 --label 28 --clip_steps 1 \
8 | --output_dir output --gpus 2


--------------------------------------------------------------------------------
/scripts/r3d_unsupervised_demo.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | python main.py --num_classes 51 \
3 | --arch r3d \
4 | --classes_list resources/hmdb51_classInd.txt \
5 | --model_weights pretrained_model/r3d50_K_200ep.pth \
6 | --video test_videos/punch_28.mp4 \
7 | --frames_num 16 --label 28 --clip_steps 8 \
8 | --output_dir output --gpus 1 --supervised unsupervised


--------------------------------------------------------------------------------
/test_videos/50_FIRST_DATES_drink_u_nm_np1_fr_goo_29.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/50_FIRST_DATES_drink_u_nm_np1_fr_goo_29.mp4


--------------------------------------------------------------------------------
/test_videos/BASE_Jumping_Compilation_-_Brilliant_dive_f_cm_np1_le_bad_3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/BASE_Jumping_Compilation_-_Brilliant_dive_f_cm_np1_le_bad_3.mp4


--------------------------------------------------------------------------------
/test_videos/BaseballSwingAnalysis_swing_baseball_u_nm_np1_ba_med_0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/BaseballSwingAnalysis_swing_baseball_u_nm_np1_ba_med_0.mp4


--------------------------------------------------------------------------------
/test_videos/Bodenturnen_im_sportunterricht_handstand_f_cm_np1_le_med_1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/Bodenturnen_im_sportunterricht_handstand_f_cm_np1_le_med_1.mp4


--------------------------------------------------------------------------------
/test_videos/Bruno_Walks_up_Stairs_-_Chicago_Dog_Training_-_We_can_teach_ANYTHING_to_a_dog!!!!_climb_stairs_f_cm_np1_fr_med_0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/Bruno_Walks_up_Stairs_-_Chicago_Dog_Training_-_We_can_teach_ANYTHING_to_a_dog!!!!_climb_stairs_f_cm_np1_fr_med_0.mp4


--------------------------------------------------------------------------------
/test_videos/DefensivePistolShootingTechniques_shoot_gun_f_nm_np1_fr_med_3.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/DefensivePistolShootingTechniques_shoot_gun_f_nm_np1_fr_med_3.mp4


--------------------------------------------------------------------------------
/test_videos/Documentario_Le_Parkour_Londrina_jump_f_cm_np1_ri_bad_6.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/Documentario_Le_Parkour_Londrina_jump_f_cm_np1_ri_bad_6.mp4


--------------------------------------------------------------------------------
/test_videos/v_ApplyEyeMakeup_g01_c01.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/v_ApplyEyeMakeup_g01_c01.avi


--------------------------------------------------------------------------------
/test_videos/v_HeadMassage_g02_c05.avi:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FingerRec/3DNet_Visualization/974aef9536cd89b6fc15feba8d73dc54ad193114/test_videos/v_HeadMassage_g02_c05.avi


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 |      # @Time    : 2019-03-17 11:55
  5 |      # @Author  : Awiny
  6 |      # @Site    :
  7 |      # @Project : Action_Video_Visualization
  8 |      # @File    : util.py
  9 |      # @Software: PyCharm
 10 |      # @Github  : https://github.com/FingerRec
 11 |      # @Blog    : http://fingerrec.github.io
 12 | """
 13 | import scipy.io
 14 | import os
 15 | 
 16 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
 17 | 
 18 | import cv2
 19 | import numpy as np
 20 | import torch
 21 | import skvideo.io
 22 | 
 23 | def video_frame_count(video_path):
 24 |     cap = cv2.VideoCapture(video_path)
 25 |     if not cap.isOpened():
 26 |         print("could not open: ", video_path)
 27 |         return -1
 28 |     length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 29 |     width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH) )
 30 |     height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT) )
 31 |     return length, width, height
 32 | 
 33 | 
 34 | def visualization(video_path, fps=30):
 35 |     cap = cv2.VideoCapture(video_path)
 36 |     while cap.isOpened():
 37 |         ret, frame = cap.read()
 38 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 39 | 
 40 |         cv2.imshow('frame', gray)
 41 |         if cv2.waitKey(1000 / fps) & 0xFF == ord('q'):
 42 |             break
 43 | 
 44 |     cap.release()
 45 |     cv2.destroyAllWindows()
 46 | 
 47 | 
 48 | def save_as_video(output_dir, frames, label):
 49 |     # save video
 50 |     if not os.path.exists(output_dir + '/video'):
 51 |         os.makedirs(output_dir + '/video')
 52 |     # print(output_dir + '/video')
 53 |     # print(skvideo._FFMPEG_SUPPORTED_ENCODERS)
 54 |     output_path = '{}/video/label_{}.mp4'.format(output_dir, label)
 55 |     writer = skvideo.io.FFmpegWriter(output_path,
 56 |                                     outputdict={'-b': '300000000'})
 57 |     # writer.set(cv2.CAP_PROP_FRAME_WIDTH, 224)
 58 |     # writer.set(cv2.CAP_PROP_FRAME_HEIGHT, 448)
 59 |     for frame in frames:
 60 |         new_frame = cv2.cvtColor(np.uint8(frame), cv2.COLOR_BGR2RGB)
 61 |         writer.writeFrame(new_frame)
 62 |     writer.close()
 63 |     print('The video result has been saved in {}.'.format(output_dir+'/video'))
 64 |     return output_dir + '/video'
 65 | 
 66 | def save_as_imgs(output_dir, frames, frames_num, label, prefix='heatmap_'):
 67 |     #save imgs
 68 |     if not os.path.exists(output_dir + '/imgs/' + str(label)):
 69 |         os.makedirs(output_dir + '/imgs/' + str(label))
 70 |     for i in range(frames_num):
 71 |         cv2.imwrite(os.path.join(output_dir + '/imgs/' + str(label), prefix + '{:03d}.png'.format(i)), frames[i])
 72 |     print('These images has been saved in {}.'.format(output_dir + '/imgs'))
 73 |     return output_dir + '/imgs'
 74 | 
 75 | 
 76 | def center_crop(data, tw=224, th=224):
 77 |     h, w, c = data.shape
 78 |     x1 = int(round((w - tw) / 2.))
 79 |     y1 = int(round((h - th) / 2.))
 80 |     cropped_data = data[y1:(y1 + th), x1:(x1 + tw), :]
 81 |     return cropped_data
 82 | 
 83 | 
 84 | def load_images(frame_dir, selected_frames):
 85 |     images = np.zeros((16, 224, 224, 3))
 86 |     orig_imgs = np.zeros_like(images)
 87 |     for i, frame_name in enumerate(selected_frames):
 88 |         im_name = os.path.join(frame_dir, frame_name)
 89 |         next_image = cv2.imread(im_name, cv2.IMREAD_COLOR)
 90 |         scaled_img = cv2.resize(next_image, (256, 256), interpolation=cv2.INTER_LINEAR)  # resize to 256x256
 91 |         cropped_img = center_crop(scaled_img)  # center crop 224x224
 92 |         final_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)
 93 |         images[i] = final_img
 94 |         orig_imgs[i] = cropped_img
 95 | 
 96 |     torch_imgs = torch.from_numpy(images.transpose(3, 0, 1, 2))
 97 |     torch_imgs = torch_imgs.float() / 255.0
 98 |     mean_3d = [124 / 255, 117 / 255, 104 / 255]
 99 |     std_3d = [0.229, 0.224, 0.225]
100 |     for t, m, s in zip(torch_imgs, mean_3d, std_3d):
101 |         t.sub_(m).div_(s)
102 |     return np.expand_dims(orig_imgs, 0), torch_imgs.unsqueeze(0)
103 | 
104 | def put_text(img, text, position, scale_factor=0.4):
105 |     t_w, t_h = cv2.getTextSize(
106 |         text, cv2.FONT_HERSHEY_TRIPLEX, scale_factor, thickness=1)[0]
107 |     H, W, _ = img.shape
108 |     position = (int(W * position[1] - t_w * 0.5), int(H * position[0] - t_h * 0.5))
109 |     params = (position, cv2.FONT_HERSHEY_TRIPLEX, scale_factor,
110 |             (255,255,255))
111 |     cv2.putText(img, text, *params)


--------------------------------------------------------------------------------
/utils/gen_new_video.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import skvideo.io
 3 | import numpy as np
 4 | 
 5 | 
 6 | def mixup_data(x, loss_prob):
 7 |     num = len(x)
 8 |     img_index = np.random.randint(num)
 9 |     mixed_x = x
10 |     for i in range(num):
11 |         mixed_x[i] = (1-loss_prob) * x[i] + loss_prob * x[img_index]
12 |     return mixed_x
13 | 
14 | 
15 | def read_video(video):
16 |     cap = cv2.VideoCapture(video)
17 |     frames = list()
18 |     while True:
19 |         ret, frame = cap.read()
20 |         if type(frame) is type(None):
21 |             break
22 |         else:
23 |             frames.append(frame)
24 |     return frames
25 | 
26 | 
27 | def write_video(name, frames):
28 |     writer = skvideo.io.FFmpegWriter(name,
29 |                                     outputdict={'-b': '300000000'})
30 |     for frame in frames:
31 |         writer.writeFrame(frame)
32 |     writer.close()
33 |     return 1
34 | 
35 | 
36 | if __name__ == '__main__':
37 |     video = 'test_videos/drive.mp4'
38 |     for i in range(1, 11, 2):
39 |         prob = i / 10
40 |         seqs = read_video(video)
41 |         seqs = mixup_data(seqs, prob)
42 |         name = 'test_videos/drive_{}.mp4'.format(prob)
43 |         write_video(name, seqs)
44 | 


--------------------------------------------------------------------------------
/utils/gen_rotation_data.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import skvideo.io
 3 | import numpy as np
 4 | 
 5 | 
 6 | def rotation_data(x, r_type):
 7 |     """
 8 | 
 9 |     :param x:
10 |     :param r_type: 0: no rotate 1: up-down flip 2: left-right flip
11 |     :return:
12 |     """
13 |     num = len(x)
14 |     x = np.array(x)
15 |     mixed_x = list()
16 |     f_type = r_type // 4
17 |     rota_type = r_type % 4
18 |     if f_type == 1:
19 |         # print(x[i].shape)
20 |         for i in range(num):
21 |             mixed_x.append(np.flip(x[i], 0))
22 |     elif f_type == 2:
23 |         for i in range(num):
24 |             mixed_x.append(x[num-i-1])
25 |     elif f_type == 3:
26 |         for i in range(num):
27 |             mixed_x.append(np.flip(x[num - i - 1], 0))
28 |     else:
29 |         for i in range(num):
30 |             mixed_x.append(x[i])
31 | 
32 |     if rota_type == 1:
33 |         for i in range(num):
34 |             mixed_x[i] = np.rot90(mixed_x[i], 1)
35 |     elif rota_type == 2:
36 |         for i in range(num):
37 |             mixed_x[i] = np.rot90(mixed_x[i], 2)
38 |     elif rota_type == 3:
39 |         for i in range(num):
40 |             mixed_x[i] = np.rot90(mixed_x[i], 3)
41 |     else:
42 |         for i in range(num):
43 |             mixed_x[i] = mixed_x[i]
44 |     return mixed_x
45 | 
46 | 
47 | def read_video(video):
48 |     cap = cv2.VideoCapture(video)
49 |     frames = list()
50 |     while True:
51 |         ret, frame = cap.read()
52 |         if type(frame) is type(None):
53 |             break
54 |         else:
55 |             frames.append(frame)
56 |     return frames
57 | 
58 | 
59 | def write_video(name, frames):
60 |     writer = skvideo.io.FFmpegWriter(name,
61 |                                     outputdict={'-b': '300000000'})
62 |     for frame in frames:
63 |         writer.writeFrame(frame)
64 |     writer.close()
65 |     return 1
66 | 
67 | 
68 | if __name__ == '__main__':
69 |     video = 'test_videos/shoot_gun.mp4'
70 |     for r_type in range(16):
71 |         seqs = read_video(video)
72 |         seqs = rotation_data(seqs, r_type)
73 |         name = 'test_videos/shoot_gun_r_type{}.mp4'.format(r_type)
74 |         write_video(name, seqs)
75 | 


--------------------------------------------------------------------------------
/utils/video_cat.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # -*- coding: utf-8 -*-
 3 | """
 4 |      # @Time    : 2020-09-17 10:17
 5 |      # @Author  : Awiny
 6 |      # @Site    :
 7 |      # @Project : amax_Action_Video_Visualization
 8 |      # @File    : video_cat.py
 9 |      # @Software: PyCharm
10 |      # @Github  : https://github.com/FingerRec
11 |      # @Blog    : http://fingerrec.github.io
12 | """
13 | import scipy.io
14 | import os
15 | import sys
16 | sys.path.append("../")
17 | import cv2
18 | from util import save_as_video, video_frame_count
19 | 
20 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  #close the warning
21 | 
22 | 
23 | videos_path = '../output/concat_videos'
24 | frames = list()
25 | 
26 | for video in os.listdir(videos_path):
27 |     try:
28 |         length, width, height = video_frame_count(os.path.join(videos_path, video))
29 |     except TypeError:
30 |         print("video {} not abailable".format(os.path.join(videos_path, video)))
31 |         continue
32 |     cap = cv2.VideoCapture(os.path.join(videos_path, video))
33 |     #  q = queue.Queue(self.frames_num)
34 |     count = 0
35 |     while count < length:
36 |         ret, frame = cap.read()
37 |         if type(frame) == type(None):
38 |             break
39 |         else:
40 |             count += 1
41 |             # print(frame.shape[0]//2)
42 |             save_frame = cv2.cvtColor(frame[:frame.shape[0]//2, :, :], cv2.COLOR_BGR2RGB)
43 |             cv2.putText(save_frame, 'DSM no label pretrain', (224, 20), cv2.FONT_HERSHEY_COMPLEX, 0.5,(0,255,0), 1)
44 |             frames.append(save_frame)
45 | 
46 | save_as_video('../output', frames, 'çoncated')


--------------------------------------------------------------------------------