├── .gitignore
├── 3DCNN
    ├── ARTNet
    │   ├── ARTNet.py
    │   └── README.md
    ├── FstCN
    │   ├── FstCN.py
    │   └── README.md
    ├── I3D
    │   ├── I3D.py
    │   └── README.md
    ├── LTC
    │   ├── LTC.py
    │   └── README.md
    ├── P3D
    │   ├── P3D.py
    │   └── README.md
    ├── R21D_34
    │   ├── R21D_34.py
    │   └── README.md
    ├── Res3D
    │   ├── README.md
    │   └── Res3D.py
    ├── S3D
    │   ├── Fast_S3D.py
    │   ├── README.md
    │   └── S3D_G.py
    └── c3d
    │   ├── README.md
    │   └── c3d.py
├── CNN+LSTM
    ├── ALSTM
    │   ├── ALSTM.py
    │   └── README.md
    ├── LRCNs
    │   ├── LRCNs.py
    │   └── README.md
    └── convpooling_LSTM
    │   ├── README.md
    │   └── convpooling_LSTM.py
├── LICENSE
└── README.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/3DCNN/ARTNet/ARTNet.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @project: ARTNet
 3 | @author: Zhimeng Zhang
 4 | '''
 5 | import torch.nn as nn
 6 | import torch
 7 | 
 8 | class SMART_block(nn.Module):
 9 | 
10 |     def __init__(self, in_channel,out_channel,kernel_size=(3,3,3), stride=(1,1,1), padding=(1,1,1)):
11 |         super(SMART_block, self).__init__()
12 | 
13 |         self.appearance_conv=nn.Conv3d(in_channel, out_channel, kernel_size=(1,kernel_size[1],kernel_size[2]),stride= stride,padding=(0, padding[1], padding[2]),bias=False)
14 |         self.appearance_bn=nn.BatchNorm3d(out_channel)
15 | 
16 |         self.relation_conv=nn.Conv3d(in_channel, out_channel,kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
17 |         self.relation_bn1=nn.BatchNorm3d(out_channel)
18 |         self.relation_pooling=nn.Conv3d(out_channel,out_channel//2,kernel_size=1,stride=1,groups=out_channel//2,bias=False)
19 |         nn.init.constant_(self.relation_pooling.weight,0.5)
20 |         self.relation_pooling.weight.requires_grad=False
21 |         self.relation_bn2 = nn.BatchNorm3d(out_channel//2)
22 | 
23 |         self.reduce=nn.Conv3d(out_channel+out_channel//2,out_channel,kernel_size=1,bias=False)
24 |         self.reduce_bn=nn.BatchNorm3d(out_channel)
25 | 
26 |         self.relu = nn.ReLU()
27 |         if in_channel != out_channel or stride[0] != 1 or stride[1] != 1:
28 |             self.down_sample = nn.Sequential(nn.Conv3d(in_channel, out_channel, kernel_size=1,
29 |                                                        stride=stride,
30 |                                                        bias=False),
31 |                                              nn.BatchNorm3d(out_channel))
32 |         else:
33 |             self.down_sample = None
34 | 
35 |     def forward(self, x):
36 |         appearance=x
37 |         relation=x
38 |         appearance=self.appearance_conv(appearance)
39 |         appearance=self.appearance_bn(appearance)
40 |         relation=self.relation_conv(relation)
41 |         relation=self.relation_bn1(relation)
42 |         relation=torch.pow(relation,2)
43 |         relation=self.relation_pooling(relation)
44 |         relation=self.relation_bn2(relation)
45 |         stream=self.relu(torch.cat([appearance,relation],1))
46 |         stream=self.reduce(stream)
47 |         stream=self.reduce_bn(stream)
48 |         if self.down_sample is not None:
49 |             x=self.down_sample(x)
50 | 
51 |         return self.relu(stream+x)
52 | 
53 | 
54 | class ARTNet(nn.Module):
55 |     # Input size: 16x112x112
56 |     def __init__(self, num_class):
57 |         super(ARTNet, self).__init__()
58 | 
59 |         self.conv1=SMART_block(3,64,kernel_size=(3,7,7),stride=(2,2,2),padding=(1,3,3))
60 |         self.conv2=nn.Sequential(SMART_block(64,64),
61 |                                  SMART_block(64, 64))
62 |         self.conv3=nn.Sequential(SMART_block(64,128,stride=(2,2,2)),
63 |                                  SMART_block(128, 128))
64 |         self.conv4 = nn.Sequential(SMART_block(128, 256, stride=(2,2,2)),
65 |                                    SMART_block(256, 256))
66 |         self.conv5 = nn.Sequential(SMART_block(256, 512, stride=(2,2,2)),
67 |                                    SMART_block(512, 512))
68 |         self.avg_pool=nn.AvgPool3d(kernel_size=(1,7,7))
69 |         self.linear=nn.Linear(512,num_class)
70 | 
71 |     def forward(self, x):
72 |         x=self.conv1(x)
73 |         x=self.conv2(x)
74 |         x=self.conv3(x)
75 |         x=self.conv4(x)
76 |         x=self.conv5(x)
77 |         x=self.avg_pool(x)
78 |         return self.linear(x.view(x.size(0),-1))


--------------------------------------------------------------------------------
/3DCNN/ARTNet/README.md:
--------------------------------------------------------------------------------
1 | # Appearance-and-Relation Networks for Video Classification
2 | This paper can be downloaded [here](http://openaccess.thecvf.com/content_cvpr_2018/papers/Wang_Appearance-and-Relation_Networks_for_CVPR_2018_paper.pdf).
3 | 
4 | 
5 | ## Note
6 | I reproduce the model based on the original caffe code  [here](https://github.com/wanglimin/ARTNet.git).
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/3DCNN/FstCN/FstCN.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @project: FstCN
  3 | @author: MRzzm
  4 | @E-mail: zhangzhimeng1@gmail.com
  5 | @github: https://github.com/MRzzm/action-recognition-models-pytorch.git
  6 | '''
  7 | 
  8 | import torch
  9 | import torch.nn as nn
 10 | from torch.nn import init
 11 | 
 12 | class TCL(nn.Module):
 13 |     def __init__(self, in_channels,init_weights):
 14 |         super(TCL, self).__init__()
 15 |         self.branch1=nn.Sequential(nn.Conv3d(in_channels,32,kernel_size=(3,1,1),stride=(1,1,1),padding=(1,0,0)),
 16 |                                    nn.ReLU(True),
 17 |                                    nn.MaxPool3d(kernel_size=(2, 1, 1), stride=(2, 1, 1))
 18 |                                    )
 19 |         self.branch2=nn.Sequential(nn.Conv3d(in_channels,32,kernel_size=(5,1,1),stride=(1,1,1),padding=(2,0,0)),
 20 |                                    nn.ReLU(True),
 21 |                                    nn.MaxPool3d(kernel_size=(2,1,1),stride=(2,1,1))
 22 |                                    )
 23 |         if init_weights:
 24 |             self._initialize_weights()
 25 | 
 26 |     def forward(self, x):
 27 |         res1=self.branch1(x)
 28 |         res2=self.branch2(x)
 29 |         return torch.cat([res1,res2],1)
 30 | 
 31 |     def _initialize_weights(self):
 32 |         for m in self.modules():
 33 |             if isinstance(m, nn.Sequential):
 34 |                 for n in m:
 35 |                     if isinstance(n,nn.Conv3d):
 36 |                         init.xavier_uniform_(n.weight)
 37 |                         init.constant_(n.bias, 0)
 38 | 
 39 | 
 40 | 
 41 | # input_size: 16x204x204
 42 | class FstCN(nn.Module):
 43 |     def __init__(self, num_class, init_weights=True):
 44 |         super(FstCN, self).__init__()
 45 | 
 46 |         self.SCL1 = nn.Sequential(nn.Conv3d(3, 96, kernel_size=(1,7,7), stride=(1,2,2),padding=(0,3,3)),
 47 |                                   nn.ReLU(True),
 48 |                                   nn.MaxPool3d((1,3,3),stride=(1,2,2)))
 49 |         self.SCL2=nn.Sequential(nn.Conv3d(96, 256, kernel_size=(1,5,5), stride=(1,2,2),padding=(0,2,2)),
 50 |                                   nn.ReLU(True),
 51 |                                   nn.MaxPool3d((1,3,3),stride=(1,2,2)))
 52 |         self.SCL3 = nn.Sequential(nn.Conv3d(256, 512, kernel_size=(1,3,3),stride=(1,1,1),padding=(0,1,1)),
 53 |                                   nn.ReLU(True)
 54 |                                   )
 55 |         self.SCL4 = nn.Sequential(nn.Conv3d(512, 512, kernel_size=(1,3,3),stride=(1,1,1),padding=(0,1,1)),
 56 |                                   nn.ReLU(True)
 57 |                                   )
 58 | 
 59 |         self.Parallel_temporal = nn.Sequential( nn.Conv3d(512,128,kernel_size=(1,3,3),stride=(1,1,1),padding=(0,1,1)),
 60 |                                                 nn.MaxPool3d((1,3,3),stride=(1,3,3)),
 61 |                                                 TCL(in_channels=128,init_weights=init_weights)
 62 |                                                 )
 63 |         self.Parallel_spatial = nn.Sequential( nn.Conv2d(512,128,kernel_size=(3,3),stride=(1,1),padding=(1,1)),
 64 |                                                nn.MaxPool2d((3, 3), stride=(3, 3))
 65 |                                                )
 66 |         self.tem_fc=nn.Sequential(nn.Linear(8192, 4096),
 67 |                                     nn.Dropout(),
 68 |                                     nn.Linear(4096, 2048))
 69 |         self.spa_fc = nn.Sequential(nn.Linear(2048, 4096),
 70 |                                     nn.Dropout(),
 71 |                                     nn.Linear(4096, 2048))
 72 |         self.fc=nn.Linear(4096,2048)
 73 |         self.out=nn.Linear(2048,num_class)
 74 | 
 75 |         if init_weights:
 76 |             self._initialize_weights()
 77 | 
 78 |     def forward(self,clip,clip_diff):
 79 |         clip_all=torch.cat([clip,clip_diff],2)
 80 |         clip_len=clip.size(2)
 81 |         clip_all = self.SCL1(clip_all)
 82 |         clip_all = self.SCL2(clip_all)
 83 |         clip_all = self.SCL3(clip_all)
 84 |         clip_all = self.SCL4(clip_all)
 85 |         clip=clip_all[:,:,:clip_len,:,:]
 86 |         clip_diff=clip_all[:,:,clip_len:,:,:]
 87 |         clip=torch.squeeze(clip[:,:,clip.size(2)//2,:,:])
 88 |         clip = self.Parallel_spatial(clip)
 89 |         clip=self.spa_fc(clip.view(clip.size(0),-1))
 90 |         clip_diff = self.Parallel_temporal(clip_diff)
 91 |         clip_diff=self.tem_fc(clip_diff.view(clip_diff.size(0),-1))
 92 |         res = torch.cat([clip,clip_diff],1)
 93 |         res=self.fc(res)
 94 |         res=self.out(res)
 95 |         return res
 96 | 
 97 |     def _initialize_weights(self):
 98 |         for m in self.modules():
 99 |             if isinstance(m, nn.Sequential):
100 |                 for n in m:
101 |                     if isinstance(n,nn.Conv3d):
102 |                         init.xavier_uniform_(n.weight)
103 |                         if n.bias is not None:
104 |                             init.constant_(n.bias, 0)
105 |                     elif isinstance(n,nn.Conv2d):
106 |                         init.xavier_uniform_(n.weight)
107 |                         if n.bias is not None:
108 |                             init.constant_(n.bias, 0)
109 |             elif isinstance(m, nn.Linear):
110 |                 init.xavier_uniform_(m.weight)
111 |                 init.constant_(m.bias, 0)
112 | 


--------------------------------------------------------------------------------
/3DCNN/FstCN/README.md:
--------------------------------------------------------------------------------
1 | # Human Action Recognition using Factorized Spatio-Temporal Convolutional Networks
2 | This paper can be downloaded [here](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Sun_Human_Action_Recognition_ICCV_2015_paper.pdf).
3 | 
4 | ## Detailed introduction of the paper
5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/84880257).
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/3DCNN/I3D/I3D.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @project: I3D
 3 | @author: Zhimeng Zhang
 4 | @E-mail: zhangzhimeng1@gmail.com
 5 | @github: https://github.com/MRzzm/action-recognition-models-pytorch.git
 6 | '''
 7 | import torch
 8 | import torch.nn as nn
 9 | 
10 | class BasicConv3d(nn.Module):
11 |     def __init__(self, in_channel, out_channel, kernel_size, stride, padding=0):
12 |         super(BasicConv3d, self).__init__()
13 |         self.conv = nn.Conv3d(in_channel, out_channel,
14 |                               kernel_size=kernel_size, stride=stride,
15 |                               padding=padding)
16 |         self.bn = nn.BatchNorm3d(out_channel,
17 |                                  eps=0.001, # value found in tensorflow
18 |                                 )
19 |         self.relu = nn.ReLU()
20 | 
21 |     def forward(self, x):
22 |         x = self.conv(x)
23 |         x = self.bn(x)
24 |         x = self.relu(x)
25 |         return x
26 | 
27 | class Inception_block(nn.Module):
28 | 
29 |     def __init__(self,in_channel,out_channel):
30 |         super(Inception_block, self).__init__()
31 |         # out_channel=[1x1x1,3x3x3_reduce,3x3x3,3x3x3_reduce,3x3x3,pooling_reduce]
32 | 
33 |         self.branch1 = BasicConv3d(in_channel,out_channel[0], kernel_size=1, stride=1)
34 |         self.branch2 = nn.Sequential(
35 |             BasicConv3d(in_channel, out_channel[1], kernel_size=1, stride=1),
36 |             BasicConv3d(out_channel[1], out_channel[2],kernel_size=3, stride=1, padding=1)
37 |         )
38 |         self.branch3 = nn.Sequential(
39 |             BasicConv3d(in_channel, out_channel[3], kernel_size=1, stride=1),
40 |             BasicConv3d(out_channel[3], out_channel[4], kernel_size=3, stride=1, padding=1)
41 |         )
42 |         self.branch4 = nn.Sequential(
43 |             nn.MaxPool3d(kernel_size=3,stride=1,padding=1),
44 |             BasicConv3d(in_channel, out_channel[5], kernel_size=1, stride=1),
45 |         )
46 | 
47 |     def forward(self, x):
48 |         x1 = self.branch1(x)
49 |         x2 = self.branch2(x)
50 |         x3 = self.branch3(x)
51 |         x4 = self.branch4(x)
52 |         return torch.cat([x1,x2,x3,x4], 1)
53 | 
54 | 
55 | class I3D(nn.Module):
56 |     # Input size: 64x224x224
57 |     def __init__(self, num_class):
58 |         super(I3D, self).__init__()
59 | 
60 |         self.conv1=BasicConv3d(3,64,kernel_size=7,stride=2,padding=3)
61 |         self.pool1=nn.MaxPool3d(kernel_size=(1,3,3),stride=(1,2,2),padding=(0,1,1))
62 |         self.conv2=BasicConv3d(64,64,kernel_size=1,stride=1)
63 |         self.conv3=BasicConv3d(64,192,kernel_size=3,stride=1,padding=1)
64 |         self.pool2=nn.MaxPool3d(kernel_size=(1,3,3),stride=(1,2,2),padding=(0,1,1))
65 |         self.Inception1=nn.Sequential(Inception_block(192, [64,96,128,16,32,32]),
66 |                                       Inception_block(256, [128, 128, 192, 32, 96, 64]))
67 |         self.pool3=nn.MaxPool3d(kernel_size=(3,3,3),stride=(2,2,2),padding=(1,1,1))
68 |         self.Inception2=nn.Sequential(Inception_block(480,[192,96,208,16,48,64]),
69 |                                       Inception_block(512, [160, 112, 224, 24, 64, 64]),
70 |                                       Inception_block(512, [128, 128, 256, 24, 64, 64]),
71 |                                       Inception_block(512, [112, 144, 288, 32, 64, 64]),
72 |                                       Inception_block(528, [256, 160, 320, 32, 128, 128]))
73 |         self.pool4=nn.MaxPool3d(kernel_size=(2,2,2),stride=2)
74 |         self.Inception3=nn.Sequential(Inception_block(832,[256,160,320,32,128,128]),
75 |                                       Inception_block(832, [384, 192, 384, 48, 128, 128]))
76 |         self.avg_pool=nn.AvgPool3d(kernel_size=(8,7,7))
77 |         self.dropout = nn.Dropout(0.4)
78 |         self.linear=nn.Linear(1024,num_class)
79 | 
80 |     def forward(self, x):
81 |         x = self.conv1(x)
82 |         x = self.pool1(x)
83 |         x = self.conv2(x)
84 |         x = self.conv3(x)
85 |         x = self.pool2(x)
86 |         x = self.Inception1(x)
87 |         x = self.pool3(x)
88 |         x = self.Inception2(x)
89 |         x = self.pool4(x)
90 |         x = self.Inception3(x)
91 |         x = self.avg_pool(x)
92 |         x = self.dropout(x.view(x.size(0),-1))
93 |         return self.linear(x)


--------------------------------------------------------------------------------
/3DCNN/I3D/README.md:
--------------------------------------------------------------------------------
1 | # Quo Vadis, Action Recognition? A New Model and the Kinetics Dataset
2 | This paper can be downloaded [here](https://arxiv.org/pdf/1705.07750.pdf).
3 | Limited by the time, I only provide the code of I3D without pre-trained ImageNet parameters. 
4 | 
5 | ## Detailed introduction of the paper
6 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/84936338).
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/3DCNN/LTC/LTC.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @project: LTC
 3 | @author: Zhimeng Zhang
 4 | '''
 5 | 
 6 | import torch.nn as nn
 7 | from torch.nn import init
 8 | 
 9 | class LTC(nn.Module):
10 |     # input size: 100x71x71
11 |     def __init__(self, num_class, init_weights=True):
12 |         super(LTC, self).__init__()
13 | 
14 |         self.conv1 = nn.Conv3d(3, 64, kernel_size=3, padding=1)
15 |         self.conv2 = nn.Conv3d(64, 128, kernel_size=3, padding=1)
16 |         self.conv3 = nn.Conv3d(128, 256, kernel_size=3, padding=1)
17 |         self.conv4 = nn.Conv3d(256, 256, kernel_size=3, padding=1)
18 |         self.conv5 = nn.Conv3d(256, 256, kernel_size=3, padding=1)
19 | 
20 |         self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
21 |         self.pool2 = nn.MaxPool3d(kernel_size=2, stride=2)
22 |         self.pool3 = nn.MaxPool3d(kernel_size=2, stride=2)
23 |         self.pool4 = nn.MaxPool3d(kernel_size=2, stride=2)
24 |         self.pool5 = nn.MaxPool3d(kernel_size=2, stride=2)
25 | 
26 |         self.fc1 = nn.Linear(6144, 2048)
27 |         self.fc2 = nn.Linear(2048, 2048)
28 |         self.out = nn.Linear(2048, num_class)
29 | 
30 |         self.relu = nn.ReLU()
31 |         self.dropout=nn.Dropout(0.9)
32 |         if init_weights:
33 |             self._initialize_weights()
34 | 
35 |     def forward(self, x):
36 | 
37 |         x = self.conv1(x)
38 |         x = self.relu(x)
39 |         x = self.pool1(x)
40 | 
41 |         x = self.conv2(x)
42 |         x = self.relu(x)
43 |         x = self.pool2(x)
44 | 
45 |         x = self.conv3(x)
46 |         x = self.relu(x)
47 |         x = self.pool3(x)
48 | 
49 |         x = self.conv4(x)
50 |         x = self.relu(x)
51 |         x = self.pool4(x)
52 | 
53 |         x = self.conv5(x)
54 |         x = self.relu(x)
55 |         x = self.pool5(x)
56 | 
57 |         x = x.view(x.size(0), -1)
58 |         x = self.relu(self.fc1(x))
59 |         x=self.dropout(x)
60 |         x = self.relu(self.fc2(x))
61 |         x = self.dropout(x)
62 |         res = self.out(x)
63 | 
64 |         return res
65 | 
66 |     def _initialize_weights(self):
67 |         for m in self.modules():
68 |             if isinstance(m, nn.Conv3d):
69 |                 init.xavier_uniform_(m.weight)
70 |                 if m.bias is not None:
71 |                     init.constant_(m.bias, 0)
72 |             elif isinstance(m, nn.Linear):
73 |                 init.xavier_uniform_(m.weight)
74 |                 init.constant_(m.bias, 0)
75 | 


--------------------------------------------------------------------------------
/3DCNN/LTC/README.md:
--------------------------------------------------------------------------------
 1 | # Long-term Temporal Convolutions for Action Recognition
 2 | This paper can be downloaded [here](https://arxiv.org/pdf/1604.04494.pdf). 
 3 | 
 4 | 
 5 | ## Detailed introduction of the paper
 6 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/85051850).
 7 | 
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/3DCNN/P3D/P3D.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @project: P3D
  3 | @author: Zhimeng Zhang
  4 | '''
  5 | import torch.nn as nn
  6 | 
  7 | class P3D_Block(nn.Module):
  8 | 
  9 |     def __init__(self, blockType, inplanes, planes, stride=1):
 10 |         super(P3D_Block, self).__init__()
 11 |         self.expansion = 4
 12 |         self.blockType=blockType
 13 |         self.conv1 = nn.Conv3d(inplanes, planes, kernel_size=1, bias=False)
 14 |         self.bn1 = nn.BatchNorm3d(planes)
 15 |         if self.blockType=='A':
 16 |             self.conv2D = nn.Conv3d(planes, planes, kernel_size=(1,3,3), stride=(1,stride,stride),
 17 |                                    padding=(0,1,1), bias=False)
 18 |             self.conv1D = nn.Conv3d(planes, planes, kernel_size=(3,1,1), stride=(stride,1,1),
 19 |                                     padding=(1,0,0), bias=False)
 20 |         elif self.blockType == 'B':
 21 |             self.conv2D = nn.Conv3d(planes, planes, kernel_size=(1, 3, 3), stride=stride,
 22 |                                     padding=(0, 1, 1), bias=False)
 23 |             self.conv1D = nn.Conv3d(planes, planes, kernel_size=(3, 1, 1), stride=stride,
 24 |                                     padding=(1, 0, 0), bias=False)
 25 |         else:
 26 |             self.conv2D = nn.Conv3d(planes, planes, kernel_size=(1, 3, 3), stride=stride,
 27 |                                     padding=(0, 1, 1), bias=False)
 28 |             self.conv1D = nn.Conv3d(planes, planes, kernel_size=(3, 1, 1), stride=1,
 29 |                                     padding=(1, 0, 0), bias=False)
 30 |         self.bn2D = nn.BatchNorm3d(planes)
 31 |         self.bn1D = nn.BatchNorm3d(planes)
 32 |         self.conv3 = nn.Conv3d(planes, planes * self.expansion, kernel_size=1, bias=False)
 33 |         self.bn3 = nn.BatchNorm3d(planes * self.expansion)
 34 |         self.relu = nn.ReLU()
 35 |         self.stride = stride
 36 | 
 37 |         if self.stride != 1 or inplanes!= planes * self.expansion:
 38 |             self.downsample = nn.Sequential(
 39 |                 nn.Conv3d(inplanes, planes * self.expansion,
 40 |                           kernel_size=1, stride=stride, bias=False),
 41 |                 nn.BatchNorm3d(planes * self.expansion),
 42 |             )
 43 |         else:
 44 |             self.downsample=None
 45 | 
 46 | 
 47 |     def forward(self, x):
 48 |         x_branch = self.conv1(x)
 49 |         x_branch = self.bn1(x_branch)
 50 |         x_branch = self.relu(x_branch)
 51 | 
 52 |         if self.blockType=='A':
 53 |             x_branch = self.conv2D(x_branch)
 54 |             x_branch = self.bn2D(x_branch)
 55 |             x_branch = self.relu(x_branch)
 56 |             x_branch = self.conv1D(x_branch)
 57 |             x_branch = self.bn1D(x_branch)
 58 |             x_branch = self.relu(x_branch)
 59 |         elif self.blockType=='B':
 60 |             x_branch2D = self.conv2D(x_branch)
 61 |             x_branch2D = self.bn2D(x_branch2D)
 62 |             x_branch2D = self.relu(x_branch2D)
 63 |             x_branch1D = self.conv1D(x_branch)
 64 |             x_branch1D = self.bn1D(x_branch1D)
 65 |             x_branch=x_branch1D+x_branch2D
 66 |             x_branch=self.relu(x_branch)
 67 |         else:
 68 |             x_branch = self.conv2D(x_branch)
 69 |             x_branch = self.bn2D(x_branch)
 70 |             x_branch = self.relu(x_branch)
 71 |             x_branch1D = self.conv1D(x_branch)
 72 |             x_branch1D = self.bn1D(x_branch1D)
 73 |             x_branch=x_branch+x_branch1D
 74 |             x_branch=self.relu(x_branch)
 75 | 
 76 |         x_branch = self.conv3(x_branch)
 77 |         x_branch = self.bn3(x_branch)
 78 | 
 79 |         if self.downsample is not None:
 80 |             x = self.downsample(x)
 81 | 
 82 |         x =x+ x_branch
 83 |         x = self.relu(x)
 84 |         return x
 85 | 
 86 | class P3D (nn.Module):
 87 |     # input size: 16 x 160 x 160
 88 |     def __init__(self, num_class):
 89 |         super(P3D, self).__init__()
 90 |         self.expansion = 4
 91 |         self.conv1 = nn.Conv3d(3, 64, kernel_size=(1,7,7), stride=(1,2,2), padding=(0,3,3),
 92 |                                bias=False)
 93 |         self.bn1 = nn.BatchNorm3d(64)
 94 |         self.relu = nn.ReLU()
 95 |         self.maxpool = nn.MaxPool3d(kernel_size=(1,3,3), stride=(1,2,2), padding=(0,1,1))
 96 |         self.conv2 = nn.Sequential(P3D_Block('A',64,64,2),
 97 |                                     P3D_Block('B', 64 * self.expansion, 64),
 98 |                                     P3D_Block('C', 64 * self.expansion, 64))
 99 |         self.conv3 = nn.Sequential(P3D_Block('A', 64 * self.expansion, 128, 2),
100 |                                    P3D_Block('B', 128 * self.expansion, 128),
101 |                                    P3D_Block('C', 128 * self.expansion, 128),
102 |                                    P3D_Block('A', 128 * self.expansion, 128))
103 |         self.conv4 = nn.Sequential(P3D_Block('B', 128 * self.expansion, 256, 2),
104 |                                    P3D_Block('C', 256 * self.expansion, 256),
105 |                                    P3D_Block('A', 256 * self.expansion, 256),
106 |                                    P3D_Block('B', 256 * self.expansion, 256),
107 |                                    P3D_Block('C', 256 * self.expansion, 256),
108 |                                    P3D_Block('A', 256 * self.expansion, 256))
109 |         self.conv5 = nn.Sequential(P3D_Block('B', 256 * self.expansion, 512, 2),
110 |                                    P3D_Block('C', 512 * self.expansion, 512),
111 |                                    P3D_Block('A', 512 * self.expansion, 512))
112 |         self.average_pool=nn.AvgPool3d((1,3,3))
113 |         self.fc=nn.Linear(512 * self.expansion,num_class)
114 | 
115 |     def forward(self, x):
116 |         x=self.conv1(x)
117 |         x=self.bn1(x)
118 |         x=self.relu(x)
119 |         x=self.maxpool(x)
120 |         x=self.conv2(x)
121 |         x=self.conv3(x)
122 |         x=self.conv4(x)
123 |         x=self.conv5(x)
124 |         x=self.average_pool(x)
125 |         x=x.view(x.size(0),-1)
126 |         x = self.fc(x)
127 |         return x


--------------------------------------------------------------------------------
/3DCNN/P3D/README.md:
--------------------------------------------------------------------------------
 1 | # Learning Spatio-Temporal Representation with Pseudo-3D Residual Networks
 2 | This paper can be downloaded [here](http://openaccess.thecvf.com/content_ICCV_2017/papers/Qiu_Learning_Spatio-Temporal_Representation_ICCV_2017_paper.pdf). 
 3 | 
 4 | # Note:
 5 | The structure of the network is not described in detail in this paper, so my code may not be the same as the author.
 6 | 
 7 | ## Detailed introduction of the paper
 8 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/85099886).
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/3DCNN/R21D_34/R21D_34.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @project: R21D_34
 3 | @author: Zhimeng Zhang
 4 | '''
 5 | import torch.nn as nn
 6 | 
 7 | class Res21D_Block(nn.Module):
 8 |     def __init__(self, in_channel,out_channel, spatial_stride=1,temporal_stride=1):
 9 |         super(Res21D_Block, self).__init__()
10 |         self.MidChannel1=int((27*in_channel*out_channel)/(9*in_channel+3*out_channel))
11 |         self.MidChannel2 = int((27 * out_channel * out_channel) / ( 12 * out_channel))
12 |         self.conv1_2D = nn.Conv3d(in_channel,self.MidChannel1 , kernel_size=(1, 3, 3), stride=(1, spatial_stride, spatial_stride),
13 |                                 padding=(0, 1, 1))
14 |         self.bn1_2D = nn.BatchNorm3d(self.MidChannel1)
15 |         self.conv1_1D=nn.Conv3d(self.MidChannel1, out_channel, kernel_size=(3, 1, 1), stride=(temporal_stride, 1, 1),
16 |                                 padding=(1, 0, 0))
17 |         self.bn1_1D = nn.BatchNorm3d(out_channel)
18 | 
19 |         self.conv2_2D = nn.Conv3d(out_channel, self.MidChannel2, kernel_size=(1, 3, 3), stride=1,
20 |                                   padding=(0, 1, 1))
21 |         self.bn2_2D = nn.BatchNorm3d(self.MidChannel2)
22 |         self.conv2_1D = nn.Conv3d(self.MidChannel2, out_channel, kernel_size=(3, 1, 1), stride=1,
23 |                                   padding=(1, 0, 0))
24 |         self.bn2_1D = nn.BatchNorm3d(out_channel)
25 | 
26 |         self.relu = nn.ReLU()
27 |         if in_channel != out_channel or spatial_stride != 1 or temporal_stride != 1:
28 |             self.down_sample=nn.Sequential(nn.Conv3d(in_channel, out_channel,kernel_size=1,stride=(temporal_stride, spatial_stride, spatial_stride),bias=False),
29 |                                            nn.BatchNorm3d(out_channel))
30 |         else:
31 |             self.down_sample=None
32 | 
33 |     def forward(self, x):
34 | 
35 |         x_branch = self.conv1_2D(x)
36 |         x_branch=self.bn1_2D(x_branch)
37 |         x_branch = self.relu(x_branch)
38 |         x_branch=self.conv1_1D(x_branch)
39 |         x_branch=self.bn1_1D(x_branch)
40 |         x_branch = self.relu(x_branch)
41 | 
42 |         x_branch = self.conv2_2D(x_branch)
43 |         x_branch = self.bn2_2D(x_branch)
44 |         x_branch = self.relu(x_branch)
45 |         x_branch = self.conv2_1D(x_branch)
46 |         x_branch = self.bn2_1D(x_branch)
47 | 
48 |         if self.down_sample is not None:
49 |             x=self.down_sample(x)
50 |         return self.relu(x_branch+x)
51 | 
52 | class Res21D(nn.Module):
53 |     # Input size: 8 x 112 x 112
54 |     def __init__(self, num_class):
55 |         super(Res21D, self).__init__()
56 | 
57 |         self.conv1=nn.Conv3d(3,64,kernel_size=(3,7,7),stride=(1,2,2),padding=(1,3,3))
58 |         self.conv2=nn.Sequential(Res21D_Block(64, 64, spatial_stride=2),
59 |                                  Res21D_Block(64, 64),
60 |                                  Res21D_Block(64, 64))
61 |         self.conv3=nn.Sequential(Res21D_Block(64,128,spatial_stride=2,temporal_stride=2),
62 |                                  Res21D_Block(128, 128),
63 |                                  Res21D_Block(128, 128),
64 |                                  Res21D_Block(128, 128),)
65 |         self.conv4 = nn.Sequential(Res21D_Block(128, 256, spatial_stride=2,temporal_stride=2),
66 |                                    Res21D_Block(256, 256),
67 |                                    Res21D_Block(256, 256),
68 |                                    Res21D_Block(256, 256),
69 |                                    Res21D_Block(256, 256),
70 |                                    Res21D_Block(256, 256))
71 |         self.conv5 = nn.Sequential(Res21D_Block(256, 512, spatial_stride=2,temporal_stride=2),
72 |                                    Res21D_Block(512, 512),
73 |                                    Res21D_Block(512, 512))
74 |         self.avg_pool=nn.AvgPool3d(kernel_size=(1,4,4))
75 |         self.linear=nn.Linear(512,num_class)
76 | 
77 |     def forward(self, x):
78 |         x=self.conv1(x)
79 |         x=self.conv2(x)
80 |         x=self.conv3(x)
81 |         x=self.conv4(x)
82 |         x=self.conv5(x)
83 |         x=self.avg_pool(x)
84 |         return self.linear(x.view(x.size(0),-1))


--------------------------------------------------------------------------------
/3DCNN/R21D_34/README.md:
--------------------------------------------------------------------------------
1 | # A Closer Look at Spatiotemporal Convolutions for Action Recognition
2 | This paper can be downloaded [here](http://openaccess.thecvf.com/content_cvpr_2018/CameraReady/2648.pdf). 
3 | 
4 | ## Detailed introduction of the paper
5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/85143711).
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/3DCNN/Res3D/README.md:
--------------------------------------------------------------------------------
1 | # ConvNet Architecture Search for Spatiotemporal Feature Learning
2 | This paper can be downloaded [here](https://arxiv.org/pdf/1708.05038.pdf). 
3 | 
4 | ## Detailed introduction of the paper
5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/84962135).
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/3DCNN/Res3D/Res3D.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | @project: Res3D
 3 | @author: Zhimeng Zhang
 4 | @E-mail: zhangzhimeng1@gmail.com
 5 | @github: https://github.com/MRzzm/action-recognition-models-pytorch.git
 6 | '''
 7 | 
 8 | import torch.nn as nn
 9 | import torch.nn.init as init
10 | 
11 | class ResBlock(nn.Module):
12 |     def __init__(self, in_channel,out_channel, spatial_stride=1,temporal_stride=1):
13 |         super(ResBlock, self).__init__()
14 | 
15 |         self.conv1 = nn.Conv3d(in_channel, out_channel,kernel_size=(3,3,3),stride=(temporal_stride,spatial_stride,spatial_stride),padding=(1,1,1))
16 |         self.conv2 = nn.Conv3d(out_channel, out_channel,kernel_size=(3, 3, 3),stride=(1, 1, 1),padding=(1, 1, 1))
17 |         self.bn1 = nn.BatchNorm3d(out_channel)
18 |         self.bn2 = nn.BatchNorm3d(out_channel)
19 |         self.relu = nn.ReLU()
20 |         if in_channel != out_channel or spatial_stride != 1 or temporal_stride != 1:
21 |             self.down_sample=nn.Sequential(nn.Conv3d(in_channel, out_channel,kernel_size=1,stride=(temporal_stride,spatial_stride,spatial_stride),bias=False),
22 |                                            nn.BatchNorm3d(out_channel))
23 |         else:
24 |             self.down_sample=None
25 | 
26 |     def forward(self, x):
27 |         x_branch = self.conv1(x)
28 |         x_branch = self.bn1(x_branch)
29 |         x_branch = self.relu(x_branch)
30 |         x_branch = self.conv2(x_branch)
31 |         x_branch = self.bn2(x_branch)
32 |         if self.down_sample is not None:
33 |             x=self.down_sample(x)
34 |         return self.relu(x_branch+x)
35 | 
36 | class Res3D(nn.Module):
37 |     # Input size: 8x224x224
38 |     def __init__(self, num_class):
39 |         super(Res3D, self).__init__()
40 | 
41 |         self.conv1=nn.Conv3d(3,64,kernel_size=(3,7,7),stride=(1,2,2),padding=(1,3,3))
42 |         self.conv2=nn.Sequential(ResBlock(64,64,spatial_stride=2),
43 |                                  ResBlock(64, 64))
44 |         self.conv3=nn.Sequential(ResBlock(64,128,spatial_stride=2,temporal_stride=2),
45 |                                  ResBlock(128, 128))
46 |         self.conv4 = nn.Sequential(ResBlock(128, 256, spatial_stride=2,temporal_stride=2),
47 |                                    ResBlock(256, 256))
48 |         self.conv5 = nn.Sequential(ResBlock(256, 512, spatial_stride=2,temporal_stride=2),
49 |                                    ResBlock(512, 512))
50 |         self.avg_pool=nn.AvgPool3d(kernel_size=(1,7,7))
51 |         self.linear=nn.Linear(512,num_class)
52 | 
53 |     def forward(self, x):
54 |         x=self.conv1(x)
55 |         x=self.conv2(x)
56 |         x=self.conv3(x)
57 |         x=self.conv4(x)
58 |         x=self.conv5(x)
59 |         x=self.avg_pool(x)
60 |         return self.linear(x.view(x.size(0),-1))
61 | 


--------------------------------------------------------------------------------
/3DCNN/S3D/Fast_S3D.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | @project: Fast_S3D
  3 | @author: Zhimeng Zhang
  4 | '''
  5 | import torch
  6 | import torch.nn as nn
  7 | 
  8 | class BasicConv3d(nn.Module):
  9 |     def __init__(self, in_channel, out_channel, kernel_size, stride, padding=(0,0,0)):
 10 |         super(BasicConv3d, self).__init__()
 11 |         self.conv = nn.Conv3d(in_channel, out_channel,
 12 |                               kernel_size=kernel_size, stride=stride,
 13 |                               padding=padding)
 14 |         self.bn = nn.BatchNorm3d(out_channel)
 15 |         self.relu = nn.ReLU()
 16 | 
 17 |     def forward(self, x):
 18 |         x = self.conv(x)
 19 |         x = self.bn(x)
 20 |         x = self.relu(x)
 21 |         return x
 22 | 
 23 | class Inception_block(nn.Module):
 24 | 
 25 |     def __init__(self,in_channel,out_channel):
 26 |         super(Inception_block, self).__init__()
 27 |         # out_channel=[1x1x1,3x3x3_reduce,3x3x3,3x3x3_reduce,3x3x3,pooling_reduce]
 28 | 
 29 |         self.branch1 = BasicConv3d(in_channel,out_channel[0], kernel_size=(3,1,1), stride=1, padding=(1,0,0))
 30 |         self.branch2 = nn.Sequential(
 31 |             BasicConv3d(in_channel, out_channel[1], kernel_size=1, stride=1),
 32 |             BasicConv3d(out_channel[1], out_channel[2],kernel_size=(1,3,3), stride=1, padding=(0,1,1))
 33 |         )
 34 |         self.branch3 = nn.Sequential(
 35 |             BasicConv3d(in_channel, out_channel[3], kernel_size=1, stride=1),
 36 |             BasicConv3d(out_channel[3], out_channel[4], kernel_size=(1, 3, 3), stride=1, padding= (0, 1, 1))
 37 |         )
 38 |         self.branch4 = nn.Sequential(
 39 |             nn.MaxPool3d(kernel_size=(1,3,3),stride=1,padding=(0,1,1)),
 40 |             BasicConv3d(in_channel, out_channel[5], kernel_size=(3,1,1), stride=1,padding=(1,0,0))
 41 |         )
 42 | 
 43 |     def forward(self, x):
 44 |         x1 = self.branch1(x)
 45 |         x2 = self.branch2(x)
 46 |         x3 = self.branch3(x)
 47 |         x4 = self.branch4(x)
 48 |         return torch.cat([x1,x2,x3,x4], 1)
 49 | 
 50 | class S3D_block(nn.Module):
 51 | 
 52 |     def __init__(self,in_channel,out_channel):
 53 |         super(S3D_block, self).__init__()
 54 |         # out_channel=[1x1x1,3x3x3_reduce,3x3x3,3x3x3_reduce,3x3x3,pooling_reduce]
 55 | 
 56 |         self.branch1 = BasicConv3d(in_channel,out_channel[0], kernel_size=(3,1,1), stride=1,padding=(1,0,0))
 57 |         self.branch2 = nn.Sequential(
 58 |             BasicConv3d(in_channel, out_channel[1], kernel_size=1, stride=1),
 59 |             BasicConv3d(out_channel[1], out_channel[1],kernel_size=(1,3,3), stride=1, padding=(0,1,1)),
 60 |             BasicConv3d(out_channel[1], out_channel[2], kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
 61 |         )
 62 |         self.branch3 = nn.Sequential(
 63 |             BasicConv3d(in_channel, out_channel[3], kernel_size=1, stride=1),
 64 |             BasicConv3d(out_channel[3], out_channel[3], kernel_size=(1, 3, 3), stride=1, padding= (0, 1, 1)),
 65 |             BasicConv3d(out_channel[3], out_channel[4], kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
 66 |         )
 67 |         self.branch4 = nn.Sequential(
 68 |             nn.MaxPool3d(kernel_size=3,stride=1,padding=1),
 69 |             BasicConv3d(in_channel, out_channel[5], kernel_size=(3,1,1), stride=1,padding=(1,0,0))
 70 |         )
 71 | 
 72 |     def forward(self, x):
 73 |         x1 = self.branch1(x)
 74 |         x2 = self.branch2(x)
 75 |         x3 = self.branch3(x)
 76 |         x4 = self.branch4(x)
 77 |         return torch.cat([x1,x2,x3,x4], 1)
 78 | 
 79 | class fast_S3D(nn.Module):
 80 |     # Input size: 64x224x224
 81 |     def __init__(self, num_class):
 82 |         super(fast_S3D, self).__init__()
 83 | 
 84 |         self.conv1=BasicConv3d(3,64,kernel_size=(1,7,7),stride=2,padding=(0,3,3))
 85 |         self.pool1=nn.MaxPool3d(kernel_size=(1,3,3),stride=(1,2,2),padding=(0,1,1))
 86 |         self.conv2=BasicConv3d(64,64,kernel_size=1,stride=1)
 87 |         self.conv3=BasicConv3d(64,192,kernel_size=(1,3,3),stride=1,padding=(0,1,1))
 88 |         self.pool2=nn.MaxPool3d(kernel_size=(1,3,3),stride=(1,2,2),padding=(0,1,1))
 89 |         self.Inception1=nn.Sequential(Inception_block(192, [64,96,128,16,32,32]),
 90 |                                       Inception_block(256, [128, 128, 192, 32, 96, 64]))
 91 |         self.pool3=nn.MaxPool3d(kernel_size=3,stride=2,padding=1)
 92 |         self.Inception2=nn.Sequential(Inception_block(480,[192,96,208,16,48,64]),
 93 |                                       Inception_block(512, [160, 112, 224, 24, 64, 64]),
 94 |                                       Inception_block(512, [128, 128, 256, 24, 64, 64]),
 95 |                                       Inception_block(512, [112, 144, 288, 32, 64, 64]),
 96 |                                       Inception_block(528, [256, 160, 320, 32, 128, 128]))
 97 |         self.pool4=nn.MaxPool3d(kernel_size=2,stride=2)
 98 |         self.Inception3=nn.Sequential(S3D_block(832,[256,160,320,32,128,128]),
 99 |                                       S3D_block(832, [384, 192, 384, 48, 128, 128]))
100 |         self.avg_pool=nn.AvgPool3d(kernel_size=(8,7,7))
101 |         self.dropout = nn.Dropout(0.4)
102 |         self.linear=nn.Linear(1024,num_class)
103 | 
104 |     def forward(self, x):
105 |         x = self.conv1(x)
106 |         x = self.pool1(x)
107 |         x = self.conv2(x)
108 |         x = self.conv3(x)
109 |         x = self.pool2(x)
110 |         x = self.Inception1(x)
111 |         x = self.pool3(x)
112 |         x = self.Inception2(x)
113 |         x = self.pool4(x)
114 |         x = self.Inception3(x)
115 |         x = self.avg_pool(x)
116 |         x = self.dropout(x.view(x.size(0),-1))
117 |         return self.linear(x)


--------------------------------------------------------------------------------
/3DCNN/S3D/README.md:
--------------------------------------------------------------------------------
1 | # Rethinking Spatiotemporal Feature Learning For Video Understanding
2 | This paper can be downloaded [here](http://chensun.me/files/xie_s3d.pdf). 
3 | 
4 | ## Detailed introduction of the paper
5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/85235239).
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/3DCNN/S3D/S3D_G.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import torch
  3 | import torch.nn as nn
  4 | 
  5 | class BasicConv3d(nn.Module):
  6 |     def __init__(self, in_channel, out_channel, kernel_size, stride, padding=(0, 0, 0)):
  7 |         super(BasicConv3d, self).__init__()
  8 |         self.conv = nn.Conv3d(in_channel, out_channel,
  9 |                               kernel_size=kernel_size, stride=stride,
 10 |                               padding=padding)
 11 |         self.bn = nn.BatchNorm3d(out_channel,
 12 |                                  eps=0.001, # value found in tensorflow
 13 |                                 )
 14 |         self.relu = nn.ReLU()
 15 | 
 16 |     def forward(self, x):
 17 |         x = self.conv(x)
 18 |         x = self.bn(x)
 19 |         x = self.relu(x)
 20 |         return x
 21 | 
 22 | class S3D_G_block(nn.Module):
 23 | 
 24 |     def __init__(self,in_channel,out_channel):
 25 |         super(S3D_G_block, self).__init__()
 26 |         # out_channel=[1x1x1,3x3x3_reduce,3x3x3,3x3x3_reduce,3x3x3,pooling_reduce]
 27 | 
 28 | 
 29 |         self.branch1 = BasicConv3d(in_channel,out_channel[0], kernel_size=(3,1,1), stride=1, padding=(1,0,0))
 30 |         self.branch2 = nn.Sequential(
 31 |             BasicConv3d(in_channel, out_channel[1], kernel_size=1, stride=1),
 32 |             BasicConv3d(out_channel[1], out_channel[1],kernel_size=(1,3,3), stride=1, padding=(0,1,1)),
 33 |             BasicConv3d(out_channel[1], out_channel[2], kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
 34 |         )
 35 |         self.branch3 = nn.Sequential(
 36 |             BasicConv3d(in_channel, out_channel[3], kernel_size=1, stride=1),
 37 |             BasicConv3d(out_channel[3], out_channel[3], kernel_size=(1, 3, 3), stride=1, padding= (0, 1, 1)),
 38 |             BasicConv3d(out_channel[3], out_channel[4], kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
 39 |         )
 40 |         self.branch4 = nn.Sequential(
 41 |             nn.MaxPool3d(kernel_size=3,stride=1,padding=1),
 42 |             BasicConv3d(in_channel, out_channel[5], kernel_size=(3,1,1), stride=1,padding=(1,0,0))
 43 |         )
 44 |         self.squeeze = nn.AdaptiveAvgPool3d(1)
 45 |         # we replace weight matrix with 1D conv to reduce the para
 46 |         self.excitation = nn.Conv1d(1, 1, (3,1,1), stride=1,padding=(1,0,0))
 47 |         self.sigmoid=nn.Sigmoid()
 48 |     def forward(self, x):
 49 |         x1 = self.branch1(x)
 50 |         x2 = self.branch2(x)
 51 |         x3 = self.branch3(x)
 52 |         x4 = self.branch4(x)
 53 |         x=torch.cat([x1,x2,x3,x4], 1)
 54 |         input = x
 55 |         x=self.squeeze(x)
 56 |         x=self.excitation(x.permute(0,2,1,3,4))
 57 |         x=self.sigmoid(x)
 58 |         return x.permute(0,2,1,3,4)*input
 59 | 
 60 | 
 61 | 
 62 | class S3D_G(nn.Module):
 63 |     # Input size: 64x224x224
 64 |     def __init__(self, num_class):
 65 |         super(S3D_G, self).__init__()
 66 | 
 67 |         self.conv1=BasicConv3d(3,64,kernel_size=7,stride=2,padding=3)
 68 |         self.pool1=nn.MaxPool3d(kernel_size=(1,3,3),stride=(1,2,2),padding=(0,1,1))
 69 |         self.conv2=BasicConv3d(64,64,kernel_size=1,stride=1)
 70 |         self.conv3=BasicConv3d(64,192,kernel_size=3,stride=1,padding=1)
 71 |         self.pool2=nn.MaxPool3d(kernel_size=(1,3,3),stride=(1,2,2),padding=(0,1,1))
 72 |         self.Inception1=nn.Sequential(S3D_G_block(192, [64,96,128,16,32,32]),
 73 |                                       S3D_G_block(256, [128, 128, 192, 32, 96, 64]))
 74 |         self.pool3=nn.MaxPool3d(kernel_size=(3,3,3),stride=(2,2,2),padding=(1,1,1))
 75 |         self.Inception2=nn.Sequential(S3D_G_block(480,[192,96,208,16,48,64]),
 76 |                                       S3D_G_block(512, [160, 112, 224, 24, 64, 64]),
 77 |                                       S3D_G_block(512, [128, 128, 256, 24, 64, 64]),
 78 |                                       S3D_G_block(512, [112, 144, 288, 32, 64, 64]),
 79 |                                       S3D_G_block(528, [256, 160, 320, 32, 128, 128]))
 80 |         self.pool4=nn.MaxPool3d(kernel_size=(2,2,2),stride=2)
 81 |         self.Inception3=nn.Sequential(S3D_G_block(832,[256,160,320,32,128,128]),
 82 |                                       S3D_G_block(832, [384, 192, 384, 48, 128, 128]))
 83 |         self.avg_pool=nn.AvgPool3d(kernel_size=(8,7,7))
 84 |         self.dropout = nn.Dropout(0.4)
 85 |         self.linear=nn.Linear(1024,num_class)
 86 | 
 87 |     def forward(self, x):
 88 |         x = self.conv1(x)
 89 |         x = self.pool1(x)
 90 |         x = self.conv2(x)
 91 |         x = self.conv3(x)
 92 |         x = self.pool2(x)
 93 |         x = self.Inception1(x)
 94 |         x = self.pool3(x)
 95 |         x = self.Inception2(x)
 96 |         x = self.pool4(x)
 97 |         x = self.Inception3(x)
 98 |         x = self.avg_pool(x)
 99 |         x = self.dropout(x.view(x.size(0),-1))
100 |         return self.linear(x)


--------------------------------------------------------------------------------
/3DCNN/c3d/README.md:
--------------------------------------------------------------------------------
 1 | # Learning Spatiotemporal Features with 3D Convolutional Networks
 2 | This paper can be downloaded [here](https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Tran_Learning_Spatiotemporal_Features_ICCV_2015_paper.pdf).
 3 | 
 4 | ## code
 5 | We not provide pre-trained model on sports-1M. if you need, pls go [here](https://github.com/DavideA/c3d-pytorch.git).
 6 | 
 7 | ## Detailed introduction of the paper
 8 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/84866514#comments).
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/3DCNN/c3d/c3d.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from torch.nn import init
 3 | class c3d(nn.Module):
 4 |     def __init__(self,num_class,init_weights=True):
 5 |         super(c3d, self).__init__()
 6 | 
 7 |         self.conv1a = nn.Conv3d(3, 64, kernel_size=3, padding=1)
 8 |         self.conv2a = nn.Conv3d(64, 128, kernel_size=3, padding=1)
 9 |         self.conv3a = nn.Conv3d(128, 256, kernel_size=3, padding=1)
10 |         self.conv3b = nn.Conv3d(256, 256, kernel_size=3, padding=1)
11 |         self.conv4a = nn.Conv3d(256, 512, kernel_size=3, padding=1)
12 |         self.conv4b = nn.Conv3d(512, 512, kernel_size=3, padding=1)
13 |         self.conv5a = nn.Conv3d(512, 512, kernel_size=3, padding=1)
14 |         self.conv5b = nn.Conv3d(512, 512, kernel_size=3, padding=1)
15 | 
16 |         self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))
17 |         self.pool2 = nn.MaxPool3d(kernel_size=2, stride=2)
18 |         self.pool3 = nn.MaxPool3d(kernel_size=2, stride=2)
19 |         self.pool4 = nn.MaxPool3d(kernel_size=2, stride=2)
20 |         self.pool5 = nn.MaxPool3d(kernel_size=2, stride=2)#, padding=(0, 1, 1)
21 | 
22 |         self.fc6 = nn.Linear(4608, 4096)
23 |         self.fc7 = nn.Linear(4096, 4096)
24 |         self.out = nn.Linear(4096, num_class)
25 | 
26 |         self.relu = nn.ReLU()
27 |         self.softmax = nn.Softmax()
28 |         if init_weights:
29 |             self._initialize_weights()
30 | 
31 |     def forward(self, x):
32 | 
33 |         x = self.conv1a(x)
34 |         x = self.relu(x)
35 |         x = self.pool1(x)
36 | 
37 |         x = self.conv2a(x)
38 |         x = self.relu(x)
39 |         x = self.pool2(x)
40 | 
41 |         x = self.conv3a(x)
42 |         x = self.relu(x)
43 |         x = self.conv3b(x)
44 |         x = self.relu(x)
45 |         x = self.pool3(x)
46 | 
47 |         x = self.conv4a(x)
48 |         x = self.relu(x)
49 |         x = self.conv4b(x)
50 |         x = self.relu(x)
51 |         x = self.pool4(x)
52 | 
53 |         x = self.conv5a(x)
54 |         x = self.relu(x)
55 |         x = self.conv5b(x)
56 |         x = self.relu(x)
57 |         x = self.pool5(x)
58 | 
59 |         x = x.view(x.size(0), -1)
60 |         x = self.fc6(x)
61 |         x = self.relu(x)
62 |         x = self.fc7(x)
63 |         x = self.relu(x)
64 |         res = self.out(x)
65 |         # if you use CrossEntropyLoss, you don't need to add softmax in network
66 |         # res = self.softmax(x)
67 | 
68 |         return res
69 |     
70 |     def _initialize_weights(self):
71 |         for m in self.modules():
72 |             if isinstance(m, nn.Conv3d):
73 |                 init.xavier_uniform_(m.weight)
74 |                 if m.bias is not None:
75 |                     init.constant_(m.bias,0)
76 |             elif isinstance(m, nn.Linear):
77 |                 init.xavier_uniform_(m.weight)
78 |                 init.constant_(m.bias, 0)
79 | 


--------------------------------------------------------------------------------
/CNN+LSTM/ALSTM/ALSTM.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torchvision
  4 | 
  5 | class lstm_cell(nn.Module):
  6 |     def __init__(self, input_num, hidden_num):
  7 |         super(lstm_cell, self).__init__()
  8 | 
  9 |         self.input_num = input_num
 10 |         self.hidden_num = hidden_num
 11 | 
 12 |         self.Wxi = nn.Linear(self.input_num, self.hidden_num, bias=True)
 13 |         self.Whi = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
 14 |         self.Wxf = nn.Linear(self.input_num, self.hidden_num, bias=True)
 15 |         self.Whf = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
 16 |         self.Wxc = nn.Linear(self.input_num, self.hidden_num, bias=True)
 17 |         self.Whc = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
 18 |         self.Wxo = nn.Linear(self.input_num, self.hidden_num, bias=True)
 19 |         self.Who = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
 20 | 
 21 |     def forward(self, xt, ht_1, ct_1):
 22 |         it = torch.sigmoid(self.Wxi(xt) + self.Whi(ht_1))
 23 |         ft = torch.sigmoid(self.Wxf(xt) + self.Whf(ht_1))
 24 |         ot = torch.sigmoid(self.Wxo(xt) + self.Who(ht_1))
 25 |         ct = ft * ct_1 + it * torch.tanh(self.Wxc(xt) + self.Whc(ht_1))
 26 |         ht = ot * torch.tanh(ct)
 27 |         return  ht, ct
 28 | 
 29 | 
 30 | class ALSTM(nn.Module):
 31 | 
 32 |     def __init__(self, input_num, hidden_num, num_layers,out_num ):
 33 |         super(ALSTM, self).__init__()
 34 | 
 35 |         # Make sure that `hidden_num` are lists having len == num_layers
 36 |         hidden_num = self._extend_for_multilayer(hidden_num, num_layers)
 37 |         if not len(hidden_num) == num_layers:
 38 |             raise ValueError('The length of hidden_num is not consistent with num_layers.')
 39 | 
 40 |         self.input_num = input_num
 41 |         self.hidden_num = hidden_num
 42 |         self.num_layers = num_layers
 43 |         self.out_num = out_num
 44 | 
 45 |         cell_list = []
 46 |         for i in range(0, self.num_layers):
 47 |             cur_input_num = self.input_num if i == 0 else self.hidden_num[i - 1]
 48 |             cell_list.append(lstm_cell(cur_input_num,self.hidden_num[i]))
 49 | 
 50 |         self.cell_list = nn.ModuleList(cell_list)
 51 |         self.conv=nn.Sequential(*list(torchvision.models.resnet101().children())[:-2])
 52 |         self.Wha=nn.Linear(self.hidden_num[-1],49)
 53 |         self.fc=nn.Linear(self.hidden_num[-1],self.out_num)
 54 |         self.softmax=nn.Softmax(dim=1)
 55 | 
 56 |     def forward(self, x, hidden_state=None):
 57 |         #input model: batch x channel x time x height x width
 58 |         #input size: 30 x 224 x 224
 59 | 
 60 |         # init -1 time hidden units
 61 |         if hidden_state is not None:
 62 |             raise NotImplementedError()
 63 |         else:
 64 |             hidden_state = self._init_hidden(batch_size=x.size(0))
 65 |         out_list=[]
 66 |         seq_len = x.size(2)
 67 | 
 68 |         for t in range(seq_len):
 69 |             output_t = []
 70 |             for layer_idx in range(self.num_layers):
 71 |                 if 0==t:
 72 |                     ht_1, ct_1 = hidden_state[layer_idx][0],hidden_state[layer_idx][1]
 73 |                     attention_h=hidden_state[-1][0]
 74 |                 else:
 75 |                     ht_1, ct_1 = hct_1[layer_idx][0],hct_1[layer_idx][1]
 76 |                 if 0==layer_idx:
 77 |                     feature_map=self.conv(x[:, :, t, :, :])
 78 |                     feature_map=feature_map.view(feature_map.size(0),feature_map.size(1),-1)
 79 |                     attention_map=self.Wha(attention_h)
 80 |                     attention_map=torch.unsqueeze(self.softmax(attention_map),1)
 81 |                     attention_feature=attention_map*feature_map
 82 |                     attention_feature=torch.sum(attention_feature,2)
 83 |                     ht, ct = self.cell_list[layer_idx](attention_feature,ht_1, ct_1)
 84 |                     output_t.append([ht,ct])
 85 |                 else:
 86 |                     ht, ct = self.cell_list[layer_idx](output_t[layer_idx-1][0], ht_1, ct_1)
 87 |                     output_t.append([ht,ct])
 88 |             attention_h=output_t[-1][0]
 89 |             hct_1=output_t
 90 |             out_list.append(self.fc(output_t[-1][0]))
 91 | 
 92 | 
 93 |         return torch.stack(out_list,1)
 94 | 
 95 | 
 96 |     def _init_hidden(self, batch_size):
 97 |         init_states = []
 98 |         for i in range(self.num_layers):
 99 |             init_states.append([torch.zeros(batch_size, self.hidden_num[i]),torch.zeros(batch_size, self.hidden_num[i])])
100 |         return init_states
101 | 
102 | 
103 |     @staticmethod
104 |     def _extend_for_multilayer(param, num_layers):
105 |         if not isinstance(param, list):
106 |             param = [param] * num_layers
107 |         return param
108 | 
109 | 


--------------------------------------------------------------------------------
/CNN+LSTM/ALSTM/README.md:
--------------------------------------------------------------------------------
 1 | # Action Recognition Using Visual Attention
 2 | This paper can be downloaded [here](https://arxiv.org/pdf/1511.04119.pdf).
 3 | 
 4 | ## Detailed introduction of the paper
 5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/86063410).
 6 | 
 7 | ## Note
 8 | The 2D CNN in my code is resnet101 instead of GoogLeNet.
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/CNN+LSTM/LRCNs/LRCNs.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class lstm_cell(nn.Module):
 6 |     def __init__(self, input_num, hidden_num):
 7 |         super(lstm_cell, self).__init__()
 8 | 
 9 |         self.input_num = input_num
10 |         self.hidden_num = hidden_num
11 | 
12 |         self.Wxi = nn.Linear(self.input_num, self.hidden_num, bias=True)
13 |         self.Whi = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
14 |         self.Wxf = nn.Linear(self.input_num, self.hidden_num, bias=True)
15 |         self.Whf = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
16 |         self.Wxc = nn.Linear(self.input_num, self.hidden_num, bias=True)
17 |         self.Whc = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
18 |         self.Wxo = nn.Linear(self.input_num, self.hidden_num, bias=True)
19 |         self.Who = nn.Linear(self.hidden_num, self.hidden_num, bias=False)
20 | 
21 |     def forward(self, xt, ht_1, ct_1):
22 |         it = torch.sigmoid(self.Wxi(xt) + self.Whi(ht_1))
23 |         ft = torch.sigmoid(self.Wxf(xt) + self.Whf(ht_1))
24 |         ot = torch.sigmoid(self.Wxo(xt) + self.Who(ht_1))
25 |         ct = ft * ct_1 + it * torch.tanh(self.Wxc(xt) + self.Whc(ht_1))
26 |         ht = ot * torch.tanh(ct)
27 |         return  ht, ct
28 | 
29 | 
30 | class LRCNs(nn.Module):
31 | 
32 |     def __init__(self, input_num, hidden_num, num_layers,out_num ):
33 |         super(LRCNs, self).__init__()
34 | 
35 |         # Make sure that `hidden_num` are lists having len == num_layers
36 |         hidden_num = self._extend_for_multilayer(hidden_num, num_layers)
37 |         if not len(hidden_num) == num_layers:
38 |             raise ValueError('The length of hidden_num is not consistent with num_layers.')
39 | 
40 |         self.input_num = input_num
41 |         self.hidden_num = hidden_num
42 |         self.num_layers = num_layers
43 |         self.out_num=out_num
44 |         cell_list = []
45 |         for i in range(0, self.num_layers):
46 |             cur_input_num = self.input_num if i == 0 else self.hidden_num[i - 1]
47 |             cell_list.append(lstm_cell(input_num=cur_input_num,hidden_num=self.hidden_num[i]))
48 | 
49 |         self.cell_list = nn.ModuleList(cell_list)
50 |         self.conv=nn.Sequential(*list(torchvision.models.resnet101().children())[:-1])
51 |         self.fc = nn.Linear(self.hidden_num[-1],self.out_num)
52 | 
53 |     def forward(self, x, hidden_state=None):
54 |         #input size: batch x channel x time x height x width
55 | 
56 |         # init the -1 time hidden units
57 |         if hidden_state is not None:
58 |             raise NotImplementedError()
59 |         else:
60 |             hidden_state = self._init_hidden(batch_size=x.size(0))
61 | 
62 |         seq_len = x.size(2)
63 |         cur_layer_input = x
64 | 
65 |         for layer_idx in range(self.num_layers):
66 |             h, c = hidden_state[layer_idx][0],hidden_state[layer_idx][1]
67 |             output_inner = []
68 |             for t in range(seq_len):
69 |                 if layer_idx==0:
70 |                     cnn_feature=torch.squeeze(self.conv(cur_layer_input[:, :, t, :, :]))
71 |                     h, c = self.cell_list[layer_idx](cnn_feature,h, c)
72 |                 else:
73 |                     h, c = self.cell_list[layer_idx](cur_layer_input[:, t, :], h, c)
74 | 
75 |                 if self.num_layers==layer_idx+1:
76 |                     output_inner.append(self.fc(h))
77 |                 else:
78 |                     output_inner.append(h)
79 |             layer_output = torch.stack(output_inner, dim=1)
80 |             cur_layer_input = layer_output
81 | 
82 |         return layer_output
83 |     
84 | 
85 |     def _init_hidden(self, batch_size):
86 |         init_states = []
87 |         for i in range(self.num_layers):
88 |             init_states.append([torch.zeros(batch_size, self.hidden_num[i]),torch.zeros(batch_size, self.hidden_num[i])])
89 |         return init_states
90 | 
91 | 
92 |     @staticmethod
93 |     def _extend_for_multilayer(param, num_layers):
94 |         if not isinstance(param, list):
95 |             param = [param] * num_layers
96 |         return param
97 | 
98 | 


--------------------------------------------------------------------------------
/CNN+LSTM/LRCNs/README.md:
--------------------------------------------------------------------------------
 1 | # Long-Term Recurrent Convolutional Networks for Visual Recognition and Description
 2 | This paper can be downloaded [here](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Donahue_Long-Term_Recurrent_Convolutional_2015_CVPR_paper.pdf).
 3 | 
 4 | ## Detailed introduction of the paper
 5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/85989394).
 6 | 
 7 | ## Note
 8 | The 2D CNN in my code is resnet101 instead of GoogLeNet.
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/CNN+LSTM/convpooling_LSTM/README.md:
--------------------------------------------------------------------------------
 1 | # Beyond Short Snippets: Deep Networks for Video Classification
 2 | This paper can be downloaded [here](https://www.cv-foundation.org/openaccess/content_cvpr_2015/papers/Ng_Beyond_Short_Snippets_2015_CVPR_paper.pdf).
 3 | 
 4 | ## Detailed introduction of the paper
 5 | I introduced the paper in detail in my [blog](https://blog.csdn.net/zzmshuai/article/details/85762257).
 6 | 
 7 | ## Note
 8 | The 2D CNN in my code is resnet101 instead of GoogLeNet.
 9 | 
10 | 
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/CNN+LSTM/convpooling_LSTM/convpooling_LSTM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torchvision
 4 | 
 5 | class conv_pooling(nn.Module):
 6 |     # Input size: 120x224x224
 7 |     # The CNN structure is first trained from single frame, then the FC layers are fine-tuned from scratch.
 8 |     def __init__(self, num_class):
 9 |         super(conv_pooling, self).__init__()
10 | 
11 |         self.conv=nn.Sequential(* list(torchvision.models.resnet101().children())[:-2])
12 |         self.time_pooling=nn.MaxPool3d(kernel_size=(120,1,1))
13 |         self.average_pool=nn.AvgPool3d(kernel_size=(1,7,7))
14 |         self.linear1=nn.Linear(2048,2048)
15 |         self.linear2=nn.Linear(2048, num_class)
16 |     def forward(self, x):
17 |         t_len=x.size(2)
18 |         conv_out_list=[]
19 |         for i in range(t_len):
20 |             conv_out_list.append(self.conv(torch.squeeze(x[:,:,i,:,:])))
21 |         conv_out=self.time_pooling(torch.stack(conv_out_list,2))
22 |         conv_out = self.average_pool(conv_out)
23 |         conv_out=self.linear1(conv_out.view(conv_out.size(0),-1))
24 |         conv_out=self.linear2(conv_out)
25 |         return conv_out
26 | 
27 | class cnn_lstm(nn.Module):
28 |     # Input size: 30x224x224
29 |     # The CNN structure is first trained from single frame, then the lstm is fine-tuned from scratch.
30 |     def __init__(self, num_class):
31 |         super(cnn_lstm, self).__init__()
32 | 
33 |         self.conv = nn.Sequential(*list(torchvision.models.resnet101().children())[:-1])
34 |         self.lstm = nn.LSTM(2048,512,5,batch_first=True)
35 |         self.fc=nn.Linear(512,num_class)
36 | 
37 |     def forward(self, x):
38 |         t_len = x.size(2)
39 |         conv_out_list = []
40 |         for i in range(t_len):
41 |             conv_out_list.append(self.conv(torch.squeeze(x[:, :, i, :, :])))
42 |         conv_out=torch.stack(conv_out_list,1)
43 |         conv_out,hidden=self.lstm(conv_out.view(conv_out.size(0),conv_out.size(1),-1))
44 |         lstm_out=[]
45 |         for j in range (conv_out.size(1)):
46 |             lstm_out.append(self.fc(torch.squeeze(conv_out[:,j,:])))
47 |         return torch.stack(lstm_out,1),hidden


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 MRzzm
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # action-recognition-models-pytorch(update paused)
2 | **I'm working as an intern in company now, so the project is suspended!**
3 | 
4 | I'm trying to reproduce the models of action recognition with pytorch to deepen the understanding of the paper. I follow the taxonomy of deep learning models of action recognition as follow. 
5 | ![The taxonomy of deep learning based models](http://m.qpic.cn/psb?/V146Uaoq2KWgA7/.rlEuCIe*T1BTj3MN*HcI0UG7.LRuqX9G1nKxi7HBAQ!/b/dDcBAAAAAAAA&bo=tAY8AwAAAAARB70!&rf=viewer_4)
6 | 


--------------------------------------------------------------------------------