├── .gitignore
├── ISSUE_TEMPLATE.md
├── LICENSE
├── OLD_README.md
├── README.md
├── config
├── st_gcn.twostream
│ ├── ntu-xsub
│ │ └── train.yaml
│ └── ntu-xview
│ │ └── train.yaml
└── st_gcn
│ ├── kinetics-skeleton
│ ├── demo_offline.yaml
│ ├── demo_old.yaml
│ ├── demo_realtime.yaml
│ ├── test.yaml
│ └── train.yaml
│ ├── ntu-xsub
│ ├── test.yaml
│ └── train.yaml
│ └── ntu-xview
│ ├── test.yaml
│ └── train.yaml
├── feeder
├── __init__.py
├── feeder.py
├── feeder_kinetics.py
└── tools.py
├── main.py
├── models
└── pose
│ └── coco
│ └── pose_deploy_linevec.prototxt
├── net
├── __init__.py
├── st_gcn.py
├── st_gcn_twostream.py
└── utils
│ ├── __init__.py
│ ├── graph.py
│ └── tgcn.py
├── processor
├── __init__.py
├── demo_offline.py
├── demo_old.py
├── demo_realtime.py
├── io.py
├── processor.py
└── recognition.py
├── requirements.txt
├── resource
├── NTU-RGB-D
│ └── samples_with_missing_skeletons.txt
├── demo_asset
│ ├── attention+prediction.png
│ ├── attention+rgb.png
│ ├── original_video.png
│ └── pose_estimation.png
├── info
│ ├── S001C001P001R001A044_w.gif
│ ├── S001C001P001R001A051_w.gif
│ ├── S002C001P010R001A017_w.gif
│ ├── S003C001P008R001A002_w.gif
│ ├── S003C001P008R001A008_w.gif
│ ├── clean_and_jerk_w.gif
│ ├── demo_video.gif
│ ├── hammer_throw_w.gif
│ ├── juggling_balls_w.gif
│ ├── pipeline.png
│ ├── pull_ups_w.gif
│ └── tai_chi_w.gif
├── kinetics-motion.txt
├── kinetics_skeleton
│ └── label_name.txt
├── media
│ ├── clean_and_jerk.mp4
│ ├── skateboarding.mp4
│ └── ta_chi.mp4
└── reference_model.txt
├── tools
├── __init__.py
├── get_models.sh
├── kinetics_gendata.py
├── ntu_gendata.py
└── utils
│ ├── __init__.py
│ ├── ntu_read_skeleton.py
│ ├── openpose.py
│ ├── video.py
│ └── visualization.py
└── torchlight
├── setup.py
└── torchlight
├── __init__.py
├── gpu.py
└── io.py
/.gitignore:
--------------------------------------------------------------------------------
1 | #custom
2 | tmp*
3 | work_dir/*
4 | data
5 | config_v0/*
6 | backup/*
7 | .vscode
8 | model/*
9 | *.pt
10 | *.caffemodel
11 | cache/
12 |
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | *.py[cod]
16 | *$py.class
17 |
18 | # C extensions
19 | *.so
20 |
21 | # Distribution / packaging
22 | .Python
23 | build/
24 | develop-eggs/
25 | dist/
26 | downloads/
27 | eggs/
28 | .eggs/
29 | lib/
30 | lib64/
31 | parts/
32 | sdist/
33 | var/
34 | wheels/
35 | *.egg-info/
36 | .installed.cfg
37 | *.egg
38 |
39 | # PyInstaller
40 | # Usually these files are written by a python script from a template
41 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
42 | *.manifest
43 | *.spec
44 |
45 | # Installer logs
46 | pip-log.txt
47 | pip-delete-this-directory.txt
48 |
49 | # Unit test / coverage reports
50 | htmlcov/
51 | .tox/
52 | .coverage
53 | .coverage.*
54 | .cache
55 | nosetests.xml
56 | coverage.xml
57 | *.cover
58 | .hypothesis/
59 |
60 | # Translations
61 | *.mo
62 | *.pot
63 |
64 | # Django stuff:
65 | *.log
66 | local_settings.py
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # celery beat schedule file
88 | celerybeat-schedule
89 |
90 | # SageMath parsed files
91 | *.sage.py
92 |
93 | # Environments
94 | .env
95 | .venv
96 | env/
97 | venv/
98 | ENV/
99 |
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 |
104 | # Rope project settings
105 | .ropeproject
106 |
107 | # mkdocs documentation
108 | /site
109 |
110 | # mypy
111 | .mypy_cache/
112 |
--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | ### Code version (Git Hash) and PyTorch version
2 |
3 | ### Dataset used
4 |
5 | ### Expected behavior
6 |
7 | ### Actual behavior
8 |
9 | ### Steps to reproduce the behavior
10 |
11 | ### Other comments
12 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2018, Multimedia Laboratary, The Chinese University of Hong Kong
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 |
7 | * Redistributions of source code must retain the above copyright notice, this
8 | list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 | this list of conditions and the following disclaimer in the documentation
12 | and/or other materials provided with the distribution.
13 |
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 |
--------------------------------------------------------------------------------
/OLD_README.md:
--------------------------------------------------------------------------------
1 | # ST-GCN
2 |
3 | ## Introduction
4 | This repository holds the codebase, dataset and models for the paper:
5 |
6 | **Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition** Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018. [[Arxiv Preprint]](https://arxiv.org/abs/1801.07455)
7 |
8 |
9 |

10 |
11 |
12 | ## Visulization of ST-GCN in Action
13 | Our demo for skeleton-based action recognition:
14 |
15 |
16 |
17 |
18 |
19 | ST-GCN is able to exploit local pattern and correlation from human skeletons.
20 | Below figures show the neural response magnitude of each node in the last layer of our ST-GCN.
21 |
22 |
23 |
24 |
25 |  |
26 |  |
27 |  |
28 |  |
29 |  |
30 |
31 |
32 | Touch head |
33 | Sitting down |
34 | Take off a shoe |
35 | Eat meal/snack |
36 | Kick other person |
37 |
38 |
39 |  |
40 |  |
41 |  |
42 |  |
43 |  |
44 |
45 |
46 | Hammer throw |
47 | Clean and jerk |
48 | Pull ups |
49 | Tai chi |
50 | Juggling ball |
51 |
52 |
53 |
54 | The first row of above results is from **NTU-RGB+D** dataset, and the second row is from **Kinetics-skeleton**.
55 |
56 |
57 | ## Prerequisites
58 | - Python3 (>3.5)
59 | - [PyTorch](http://pytorch.org/)
60 | - [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) **with** [Python API](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md#python-api). (Optional: for demo only)
61 | - Other Python libraries can be installed by `pip install -r requirements.txt`
62 |
63 |
64 |
65 | ### Installation
66 | ``` shell
67 | git clone https://github.com/yysijie/st-gcn.git; cd st-gcn
68 | cd torchlight; python setup.py install; cd ..
69 | ```
70 |
71 | ### Get pretrained models
72 | We provided the pretrained model weithts of our **ST-GCN**. The model weights can be downloaded by running the script
73 | ```
74 | bash tools/get_models.sh
75 | ```
76 |
77 | You can also obtain models from [GoogleDrive](https://drive.google.com/drive/folders/1IYKoSrjeI3yYJ9bO0_z_eDo92i7ob_aF) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fst-gcn%2Fmodels&parentPath=%2FShare), and manually put them into ```./models```.
78 |
79 | ## Demo
80 |
81 |
82 |
83 | You can use the following commands to run the demo.
84 |
85 | ```shell
86 | # with offline pose estimation
87 | python main.py demo_offline [--video ${PATH_TO_VIDEO}] [--openpose ${PATH_TO_OPENPOSE}]
88 |
89 | # with realtime pose estimation
90 | python main.py demo [--video ${PATH_TO_VIDEO}] [--openpose ${PATH_TO_OPENPOSE}]
91 | ```
92 |
93 | Optional arguments:
94 |
95 | - `PATH_TO_OPENPOSE`: It is required if the Openpose Python API is not in `PYTHONPATH`.
96 | - `PATH_TO_VIDEO`: Filename of the input video.
97 |
98 |
102 |
103 |
104 |
105 | ## Data Preparation
106 |
107 | We experimented on two skeleton-based action recognition datasts: **Kinetics-skeleton** and **NTU RGB+D**.
108 | Before training and testing, for convenience of fast data loading,
109 | the datasets should be converted to proper file structure.
110 | You can download the pre-processed data from
111 | [GoogleDrive](https://drive.google.com/open?id=103NOL9YYZSW1hLoWmYnv5Fs8mK-Ij7qb)
112 | and extract files with
113 | ```
114 | cd st-gcn
115 | unzip
116 | ```
117 | Otherwise, for processing raw data by yourself,
118 | please refer to below guidances.
119 |
120 | #### Kinetics-skeleton
121 | [Kinetics](https://deepmind.com/research/open-source/open-source-datasets/kinetics/) is a video-based dataset for action recognition which only provide raw video clips without skeleton data. Kinetics dataset include To obatin the joint locations, we first resized all videos to the resolution of 340x256 and converted the frame rate to 30 fps. Then, we extracted skeletons from each frame in Kinetics by [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). The extracted skeleton data we called **Kinetics-skeleton**(7.5GB) can be directly downloaded from [GoogleDrive](https://drive.google.com/open?id=1SPQ6FmFsjGg3f59uCWfdUWI-5HJM_YhZ) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fkinetics-skeleton&parentPath=%2FShare).
122 |
123 | After uncompressing, rebuild the database by this command:
124 | ```
125 | python tools/kinetics_gendata.py --data_path
126 | ```
127 |
128 | #### NTU RGB+D
129 | NTU RGB+D can be downloaded from [their website](http://rose1.ntu.edu.sg/datasets/actionrecognition.asp).
130 | Only the **3D skeletons**(5.8GB) modality is required in our experiments. After that, this command should be used to build the database for training or evaluation:
131 | ```
132 | python tools/ntu_gendata.py --data_path
133 | ```
134 | where the `````` points to the 3D skeletons modality of NTU RGB+D dataset you download.
135 |
136 |
137 | ## Testing Pretrained Models
138 |
139 |
141 |
142 | To evaluate ST-GCN model pretrained on **Kinetcis-skeleton**, run
143 | ```
144 | python main.py recognition -c config/st_gcn/kinetics-skeleton/test.yaml
145 | ```
146 | For **cross-view** evaluation in **NTU RGB+D**, run
147 | ```
148 | python main.py recognition -c config/st_gcn/ntu-xview/test.yaml
149 | ```
150 | For **cross-subject** evaluation in **NTU RGB+D**, run
151 | ```
152 | python main.py recognition -c config/st_gcn/ntu-xsub/test.yaml
153 | ```
154 |
155 |
156 |
157 | To speed up evaluation by multi-gpu inference or modify batch size for reducing the memory cost, set ```--test_batch_size``` and ```--device``` like:
158 | ```
159 | python main.py recognition -c --test_batch_size --device ...
160 | ```
161 |
162 | ### Results
163 | The expected **Top-1** **accuracy** of provided models are shown here:
164 |
165 | | Model| Kinetics-
skeleton (%)|NTU RGB+D
Cross View (%) |NTU RGB+D
Cross Subject (%) |
166 | | :------| :------: | :------: | :------: |
167 | |Baseline[1]| 20.3 | 83.1 | 74.3 |
168 | |**ST-GCN** (Ours)| **31.6**| **88.8** | **81.6** |
169 |
170 | [1] Kim, T. S., and Reiter, A. 2017. Interpretable 3d human action analysis with temporal convolutional networks. In BNMW CVPRW.
171 |
172 | ## Training
173 | To train a new ST-GCN model, run
174 |
175 | ```
176 | python main.py recognition -c config/st_gcn//train.yaml [--work_dir ]
177 | ```
178 | where the `````` must be ```ntu-xsub```, ```ntu-xview``` or ```kinetics-skeleton```, depending on the dataset you want to use.
179 | The training results, including **model weights**, configurations and logging files, will be saved under the ```./work_dir``` by default or `````` if you appoint it.
180 |
181 | You can modify the training parameters such as ```work_dir```, ```batch_size```, ```step```, ```base_lr``` and ```device``` in the command line or configuration files. The order of priority is: command line > config file > default parameter. For more information, use ```main.py -h```.
182 |
183 | Finally, custom model evaluation can be achieved by this command as we mentioned above:
184 | ```
185 | python main.py recognition -c config/st_gcn//test.yaml --weights
186 | ```
187 |
188 | ## Citation
189 | Please cite the following paper if you use this repository in your reseach.
190 | ```
191 | @inproceedings{stgcn2018aaai,
192 | title = {Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition},
193 | author = {Sijie Yan and Yuanjun Xiong and Dahua Lin},
194 | booktitle = {AAAI},
195 | year = {2018},
196 | }
197 | ```
198 |
199 | ## Contact
200 | For any question, feel free to contact
201 | ```
202 | Sijie Yan : ys016@ie.cuhk.edu.hk
203 | Yuanjun Xiong : bitxiong@gmail.com
204 | ```
205 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Reminder
2 |
3 | ST-GCN has transferred to [MMSkeleton](https://github.com/open-mmlab/mmskeleton),
4 | and keep on developing as an flexible open source toolbox for skeleton-based human understanding.
5 | You are welcome to migrate to new MMSkeleton.
6 | Custom networks, data loaders and checkpoints of old st-gcn are compatible with MMSkeleton.
7 | If you want to use old ST-GCN, please refer to [OLD_README.md](./OLD_README.md).
8 |
9 | This code base will soon be not maintained and exists as a historical artifact to supplement our AAAI papers on:
10 |
11 | > **Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition**, Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018. [[Arxiv Preprint]](https://arxiv.org/abs/1801.07455)
12 |
13 | For more recent works please checkout MMSkeleton.
14 |
15 |
--------------------------------------------------------------------------------
/config/st_gcn.twostream/ntu-xsub/train.yaml:
--------------------------------------------------------------------------------
1 | work_dir: ./work_dir/recognition/ntu-xsub/ST_GCN_TWO_STREAM
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | train_feeder_args:
6 | data_path: ./data/NTU-RGB-D/xsub/train_data.npy
7 | label_path: ./data/NTU-RGB-D/xsub/train_label.pkl
8 | test_feeder_args:
9 | data_path: ./data/NTU-RGB-D/xsub/val_data.npy
10 | label_path: ./data/NTU-RGB-D/xsub/val_label.pkl
11 |
12 | # model
13 | model: net.st_gcn_twostream.Model
14 | model_args:
15 | in_channels: 3
16 | num_class: 60
17 | dropout: 0.5
18 | edge_importance_weighting: True
19 | graph_args:
20 | layout: 'ntu-rgb+d'
21 | strategy: 'spatial'
22 |
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 |
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 32
31 | test_batch_size: 32
32 | num_epoch: 80
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config/st_gcn.twostream/ntu-xview/train.yaml:
--------------------------------------------------------------------------------
1 | work_dir: ./work_dir/recognition/ntu-xview/ST_GCN_TWO_STREAM
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | train_feeder_args:
6 | data_path: ./data/NTU-RGB-D/xview/train_data.npy
7 | label_path: ./data/NTU-RGB-D/xview/train_label.pkl
8 | test_feeder_args:
9 | data_path: ./data/NTU-RGB-D/xview/val_data.npy
10 | label_path: ./data/NTU-RGB-D/xview/val_label.pkl
11 |
12 | # model
13 | model: net.st_gcn_twostream.Model
14 | model_args:
15 | in_channels: 3
16 | num_class: 60
17 | dropout: 0.5
18 | edge_importance_weighting: True
19 | graph_args:
20 | layout: 'ntu-rgb+d'
21 | strategy: 'spatial'
22 |
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 |
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 32
31 | test_batch_size: 32
32 | num_epoch: 80
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/demo_offline.yaml:
--------------------------------------------------------------------------------
1 | weights: ./models/st_gcn.kinetics.pt
2 | model_fps: 30
3 |
4 | # model
5 | model: net.st_gcn.Model
6 | model_args:
7 | in_channels: 3
8 | num_class: 400
9 | edge_importance_weighting: True
10 | graph_args:
11 | layout: 'openpose'
12 | strategy: 'spatial'
13 |
14 | # training
15 | device: [0]
--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/demo_old.yaml:
--------------------------------------------------------------------------------
1 | weights: ./models/st_gcn.kinetics.pt
2 |
3 | # model
4 | model: net.st_gcn.Model
5 | model_args:
6 | in_channels: 3
7 | num_class: 400
8 | edge_importance_weighting: True
9 | graph_args:
10 | layout: 'openpose'
11 | strategy: 'spatial'
12 |
13 | # training
14 | device: [0]
--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/demo_realtime.yaml:
--------------------------------------------------------------------------------
1 | weights: ./models/st_gcn.kinetics.pt
2 | model_fps: 30
3 |
4 | # model
5 | model: net.st_gcn.Model
6 | model_args:
7 | in_channels: 3
8 | num_class: 400
9 | edge_importance_weighting: True
10 | graph_args:
11 | layout: 'openpose'
12 | strategy: 'spatial'
13 |
14 | # training
15 | device: [0]
--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/test.yaml:
--------------------------------------------------------------------------------
1 | weights: ./models/st_gcn.kinetics.pt
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | test_feeder_args:
6 | data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy
7 | label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl
8 |
9 | # model
10 | model: net.st_gcn.Model
11 | model_args:
12 | in_channels: 3
13 | num_class: 400
14 | edge_importance_weighting: True
15 | graph_args:
16 | layout: 'openpose'
17 | strategy: 'spatial'
18 |
19 | # test
20 | phase: test
21 | device: 0
22 | test_batch_size: 64
23 |
--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/train.yaml:
--------------------------------------------------------------------------------
1 | work_dir: ./work_dir/recognition/kinetics_skeleton/ST_GCN
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | train_feeder_args:
6 | random_choose: True
7 | random_move: True
8 | window_size: 150
9 | data_path: ./data/Kinetics/kinetics-skeleton/train_data.npy
10 | label_path: ./data/Kinetics/kinetics-skeleton/train_label.pkl
11 | test_feeder_args:
12 | data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy
13 | label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl
14 |
15 | # model
16 | model: net.st_gcn.Model
17 | model_args:
18 | in_channels: 3
19 | num_class: 400
20 | edge_importance_weighting: True
21 | graph_args:
22 | layout: 'openpose'
23 | strategy: 'spatial'
24 |
25 | # training
26 | device: [0,1,2,3]
27 | batch_size: 256
28 | test_batch_size: 256
29 |
30 | #optim
31 | base_lr: 0.1
32 | step: [20, 30, 40, 50]
33 | num_epoch: 50
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/config/st_gcn/ntu-xsub/test.yaml:
--------------------------------------------------------------------------------
1 | weights: ./models/st_gcn.ntu-xsub.pt
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | test_feeder_args:
6 | data_path: ./data/NTU-RGB-D/xsub/val_data.npy
7 | label_path: ./data/NTU-RGB-D/xsub/val_label.pkl
8 |
9 | # model
10 | model: net.st_gcn.Model
11 | model_args:
12 | in_channels: 3
13 | num_class: 60
14 | dropout: 0.5
15 | edge_importance_weighting: True
16 | graph_args:
17 | layout: 'ntu-rgb+d'
18 | strategy: 'spatial'
19 |
20 | # test
21 | phase: test
22 | device: 0
23 | test_batch_size: 64
24 |
25 |
--------------------------------------------------------------------------------
/config/st_gcn/ntu-xsub/train.yaml:
--------------------------------------------------------------------------------
1 | work_dir: ./work_dir/recognition/ntu-xsub/ST_GCN
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | train_feeder_args:
6 | data_path: ./data/NTU-RGB-D/xsub/train_data.npy
7 | label_path: ./data/NTU-RGB-D/xsub/train_label.pkl
8 | test_feeder_args:
9 | data_path: ./data/NTU-RGB-D/xsub/val_data.npy
10 | label_path: ./data/NTU-RGB-D/xsub/val_label.pkl
11 |
12 | # model
13 | model: net.st_gcn.Model
14 | model_args:
15 | in_channels: 3
16 | num_class: 60
17 | dropout: 0.5
18 | edge_importance_weighting: True
19 | graph_args:
20 | layout: 'ntu-rgb+d'
21 | strategy: 'spatial'
22 |
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 |
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 64
31 | test_batch_size: 64
32 | num_epoch: 80
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/config/st_gcn/ntu-xview/test.yaml:
--------------------------------------------------------------------------------
1 | weights: ./models/st_gcn.ntu-xview.pt
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | test_feeder_args:
6 | data_path: ./data/NTU-RGB-D/xview/val_data.npy
7 | label_path: ./data/NTU-RGB-D/xview/val_label.pkl
8 |
9 | # model
10 | model: net.st_gcn.Model
11 | model_args:
12 | in_channels: 3
13 | num_class: 60
14 | dropout: 0.5
15 | edge_importance_weighting: True
16 | graph_args:
17 | layout: 'ntu-rgb+d'
18 | strategy: 'spatial'
19 |
20 | # test
21 | phase: test
22 | device: 0
23 | test_batch_size: 64
24 |
25 |
--------------------------------------------------------------------------------
/config/st_gcn/ntu-xview/train.yaml:
--------------------------------------------------------------------------------
1 | work_dir: ./work_dir/recognition/ntu-xview/ST_GCN
2 |
3 | # feeder
4 | feeder: feeder.feeder.Feeder
5 | train_feeder_args:
6 | data_path: ./data/NTU-RGB-D/xview/train_data.npy
7 | label_path: ./data/NTU-RGB-D/xview/train_label.pkl
8 | test_feeder_args:
9 | data_path: ./data/NTU-RGB-D/xview/val_data.npy
10 | label_path: ./data/NTU-RGB-D/xview/val_label.pkl
11 |
12 | # model
13 | model: net.st_gcn.Model
14 | model_args:
15 | in_channels: 3
16 | num_class: 60
17 | dropout: 0.5
18 | edge_importance_weighting: True
19 | graph_args:
20 | layout: 'ntu-rgb+d'
21 | strategy: 'spatial'
22 |
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 |
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 64
31 | test_batch_size: 64
32 | num_epoch: 80
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/feeder/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tools
--------------------------------------------------------------------------------
/feeder/feeder.py:
--------------------------------------------------------------------------------
1 | # sys
2 | import os
3 | import sys
4 | import numpy as np
5 | import random
6 | import pickle
7 |
8 | # torch
9 | import torch
10 | import torch.nn as nn
11 | import torch.optim as optim
12 | import torch.nn.functional as F
13 | from torchvision import datasets, transforms
14 |
15 | # visualization
16 | import time
17 |
18 | # operation
19 | from . import tools
20 |
21 | class Feeder(torch.utils.data.Dataset):
22 | """ Feeder for skeleton-based action recognition
23 | Arguments:
24 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
25 | label_path: the path to label
26 | random_choose: If true, randomly choose a portion of the input sequence
27 | random_shift: If true, randomly pad zeros at the begining or end of sequence
28 | window_size: The length of the output sequence
29 | normalization: If true, normalize input sequence
30 | debug: If true, only use the first 100 samples
31 | """
32 |
33 | def __init__(self,
34 | data_path,
35 | label_path,
36 | random_choose=False,
37 | random_move=False,
38 | window_size=-1,
39 | debug=False,
40 | mmap=True):
41 | self.debug = debug
42 | self.data_path = data_path
43 | self.label_path = label_path
44 | self.random_choose = random_choose
45 | self.random_move = random_move
46 | self.window_size = window_size
47 |
48 | self.load_data(mmap)
49 |
50 | def load_data(self, mmap):
51 | # data: N C V T M
52 |
53 | # load label
54 | with open(self.label_path, 'rb') as f:
55 | self.sample_name, self.label = pickle.load(f)
56 |
57 | # load data
58 | if mmap:
59 | self.data = np.load(self.data_path, mmap_mode='r')
60 | else:
61 | self.data = np.load(self.data_path)
62 |
63 | if self.debug:
64 | self.label = self.label[0:100]
65 | self.data = self.data[0:100]
66 | self.sample_name = self.sample_name[0:100]
67 |
68 | self.N, self.C, self.T, self.V, self.M = self.data.shape
69 |
70 | def __len__(self):
71 | return len(self.label)
72 |
73 | def __getitem__(self, index):
74 | # get data
75 | data_numpy = np.array(self.data[index])
76 | label = self.label[index]
77 |
78 | # processing
79 | if self.random_choose:
80 | data_numpy = tools.random_choose(data_numpy, self.window_size)
81 | elif self.window_size > 0:
82 | data_numpy = tools.auto_pading(data_numpy, self.window_size)
83 | if self.random_move:
84 | data_numpy = tools.random_move(data_numpy)
85 |
86 | return data_numpy, label
--------------------------------------------------------------------------------
/feeder/feeder_kinetics.py:
--------------------------------------------------------------------------------
1 | # sys
2 | import os
3 | import sys
4 | import numpy as np
5 | import random
6 | import pickle
7 | import json
8 | # torch
9 | import torch
10 | import torch.nn as nn
11 | from torchvision import datasets, transforms
12 |
13 | # operation
14 | from . import tools
15 |
16 |
17 | class Feeder_kinetics(torch.utils.data.Dataset):
18 | """ Feeder for skeleton-based action recognition in kinetics-skeleton dataset
19 | Arguments:
20 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
21 | label_path: the path to label
22 | random_choose: If true, randomly choose a portion of the input sequence
23 | random_shift: If true, randomly pad zeros at the begining or end of sequence
24 | random_move: If true, perform randomly but continuously changed transformation to input sequence
25 | window_size: The length of the output sequence
26 | pose_matching: If ture, match the pose between two frames
27 | num_person_in: The number of people the feeder can observe in the input sequence
28 | num_person_out: The number of people the feeder in the output sequence
29 | debug: If true, only use the first 100 samples
30 | """
31 |
32 | def __init__(self,
33 | data_path,
34 | label_path,
35 | ignore_empty_sample=True,
36 | random_choose=False,
37 | random_shift=False,
38 | random_move=False,
39 | window_size=-1,
40 | pose_matching=False,
41 | num_person_in=5,
42 | num_person_out=2,
43 | debug=False):
44 | self.debug = debug
45 | self.data_path = data_path
46 | self.label_path = label_path
47 | self.random_choose = random_choose
48 | self.random_shift = random_shift
49 | self.random_move = random_move
50 | self.window_size = window_size
51 | self.num_person_in = num_person_in
52 | self.num_person_out = num_person_out
53 | self.pose_matching = pose_matching
54 | self.ignore_empty_sample = ignore_empty_sample
55 |
56 | self.load_data()
57 |
58 | def load_data(self):
59 | # load file list
60 | self.sample_name = os.listdir(self.data_path)
61 |
62 | if self.debug:
63 | self.sample_name = self.sample_name[0:2]
64 |
65 | # load label
66 | label_path = self.label_path
67 | with open(label_path) as f:
68 | label_info = json.load(f)
69 |
70 | sample_id = [name.split('.')[0] for name in self.sample_name]
71 | self.label = np.array(
72 | [label_info[id]['label_index'] for id in sample_id])
73 | has_skeleton = np.array(
74 | [label_info[id]['has_skeleton'] for id in sample_id])
75 |
76 | # ignore the samples which does not has skeleton sequence
77 | if self.ignore_empty_sample:
78 | self.sample_name = [
79 | s for h, s in zip(has_skeleton, self.sample_name) if h
80 | ]
81 | self.label = self.label[has_skeleton]
82 |
83 | # output data shape (N, C, T, V, M)
84 | self.N = len(self.sample_name) #sample
85 | self.C = 3 #channel
86 | self.T = 300 #frame
87 | self.V = 18 #joint
88 | self.M = self.num_person_out #person
89 |
90 | def __len__(self):
91 | return len(self.sample_name)
92 |
93 | def __iter__(self):
94 | return self
95 |
96 | def __getitem__(self, index):
97 |
98 | # output shape (C, T, V, M)
99 | # get data
100 | sample_name = self.sample_name[index]
101 | sample_path = os.path.join(self.data_path, sample_name)
102 | with open(sample_path, 'r') as f:
103 | video_info = json.load(f)
104 |
105 | # fill data_numpy
106 | data_numpy = np.zeros((self.C, self.T, self.V, self.num_person_in))
107 | for frame_info in video_info['data']:
108 | frame_index = frame_info['frame_index']
109 | for m, skeleton_info in enumerate(frame_info["skeleton"]):
110 | if m >= self.num_person_in:
111 | break
112 | pose = skeleton_info['pose']
113 | score = skeleton_info['score']
114 | data_numpy[0, frame_index, :, m] = pose[0::2]
115 | data_numpy[1, frame_index, :, m] = pose[1::2]
116 | data_numpy[2, frame_index, :, m] = score
117 |
118 | # centralization
119 | data_numpy[0:2] = data_numpy[0:2] - 0.5
120 | data_numpy[0][data_numpy[2] == 0] = 0
121 | data_numpy[1][data_numpy[2] == 0] = 0
122 |
123 | # get & check label index
124 | label = video_info['label_index']
125 | assert (self.label[index] == label)
126 |
127 | # data augmentation
128 | if self.random_shift:
129 | data_numpy = tools.random_shift(data_numpy)
130 | if self.random_choose:
131 | data_numpy = tools.random_choose(data_numpy, self.window_size)
132 | elif self.window_size > 0:
133 | data_numpy = tools.auto_pading(data_numpy, self.window_size)
134 | if self.random_move:
135 | data_numpy = tools.random_move(data_numpy)
136 |
137 | # sort by score
138 | sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1)
139 | for t, s in enumerate(sort_index):
140 | data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2,
141 | 0))
142 | data_numpy = data_numpy[:, :, :, 0:self.num_person_out]
143 |
144 | # match poses between 2 frames
145 | if self.pose_matching:
146 | data_numpy = tools.openpose_match(data_numpy)
147 |
148 | return data_numpy, label
149 |
150 | def top_k(self, score, top_k):
151 | assert (all(self.label >= 0))
152 |
153 | rank = score.argsort()
154 | hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label)]
155 | return sum(hit_top_k) * 1.0 / len(hit_top_k)
156 |
157 | def top_k_by_category(self, score, top_k):
158 | assert (all(self.label >= 0))
159 | return tools.top_k_by_category(self.label, score, top_k)
160 |
161 | def calculate_recall_precision(self, score):
162 | assert (all(self.label >= 0))
163 | return tools.calculate_recall_precision(self.label, score)
164 |
--------------------------------------------------------------------------------
/feeder/tools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 |
5 | def downsample(data_numpy, step, random_sample=True):
6 | # input: C,T,V,M
7 | begin = np.random.randint(step) if random_sample else 0
8 | return data_numpy[:, begin::step, :, :]
9 |
10 |
11 | def temporal_slice(data_numpy, step):
12 | # input: C,T,V,M
13 | C, T, V, M = data_numpy.shape
14 | return data_numpy.reshape(C, T / step, step, V, M).transpose(
15 | (0, 1, 3, 2, 4)).reshape(C, T / step, V, step * M)
16 |
17 |
18 | def mean_subtractor(data_numpy, mean):
19 | # input: C,T,V,M
20 | # naive version
21 | if mean == 0:
22 | return
23 | C, T, V, M = data_numpy.shape
24 | valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
25 | begin = valid_frame.argmax()
26 | end = len(valid_frame) - valid_frame[::-1].argmax()
27 | data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean
28 | return data_numpy
29 |
30 |
31 | def auto_pading(data_numpy, size, random_pad=False):
32 | C, T, V, M = data_numpy.shape
33 | if T < size:
34 | begin = random.randint(0, size - T) if random_pad else 0
35 | data_numpy_paded = np.zeros((C, size, V, M))
36 | data_numpy_paded[:, begin:begin + T, :, :] = data_numpy
37 | return data_numpy_paded
38 | else:
39 | return data_numpy
40 |
41 |
42 | def random_choose(data_numpy, size, auto_pad=True):
43 | # input: C,T,V,M
44 | C, T, V, M = data_numpy.shape
45 | if T == size:
46 | return data_numpy
47 | elif T < size:
48 | if auto_pad:
49 | return auto_pading(data_numpy, size, random_pad=True)
50 | else:
51 | return data_numpy
52 | else:
53 | begin = random.randint(0, T - size)
54 | return data_numpy[:, begin:begin + size, :, :]
55 |
56 |
57 | def random_move(data_numpy,
58 | angle_candidate=[-10., -5., 0., 5., 10.],
59 | scale_candidate=[0.9, 1.0, 1.1],
60 | transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2],
61 | move_time_candidate=[1]):
62 | # input: C,T,V,M
63 | C, T, V, M = data_numpy.shape
64 | move_time = random.choice(move_time_candidate)
65 | node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
66 | node = np.append(node, T)
67 | num_node = len(node)
68 |
69 | A = np.random.choice(angle_candidate, num_node)
70 | S = np.random.choice(scale_candidate, num_node)
71 | T_x = np.random.choice(transform_candidate, num_node)
72 | T_y = np.random.choice(transform_candidate, num_node)
73 |
74 | a = np.zeros(T)
75 | s = np.zeros(T)
76 | t_x = np.zeros(T)
77 | t_y = np.zeros(T)
78 |
79 | # linspace
80 | for i in range(num_node - 1):
81 | a[node[i]:node[i + 1]] = np.linspace(
82 | A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
83 | s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1],
84 | node[i + 1] - node[i])
85 | t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1],
86 | node[i + 1] - node[i])
87 | t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1],
88 | node[i + 1] - node[i])
89 |
90 | theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
91 | [np.sin(a) * s, np.cos(a) * s]])
92 |
93 | # perform transformation
94 | for i_frame in range(T):
95 | xy = data_numpy[0:2, i_frame, :, :]
96 | new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
97 | new_xy[0] += t_x[i_frame]
98 | new_xy[1] += t_y[i_frame]
99 | data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
100 |
101 | return data_numpy
102 |
103 |
104 | def random_shift(data_numpy):
105 | # input: C,T,V,M
106 | C, T, V, M = data_numpy.shape
107 | data_shift = np.zeros(data_numpy.shape)
108 | valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
109 | begin = valid_frame.argmax()
110 | end = len(valid_frame) - valid_frame[::-1].argmax()
111 |
112 | size = end - begin
113 | bias = random.randint(0, T - size)
114 | data_shift[:, bias:bias + size, :, :] = data_numpy[:, begin:end, :, :]
115 |
116 | return data_shift
117 |
118 |
119 | def openpose_match(data_numpy):
120 | C, T, V, M = data_numpy.shape
121 | assert (C == 3)
122 | score = data_numpy[2, :, :, :].sum(axis=1)
123 | # the rank of body confidence in each frame (shape: T-1, M)
124 | rank = (-score[0:T - 1]).argsort(axis=1).reshape(T - 1, M)
125 |
126 | # data of frame 1
127 | xy1 = data_numpy[0:2, 0:T - 1, :, :].reshape(2, T - 1, V, M, 1)
128 | # data of frame 2
129 | xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M)
130 | # square of distance between frame 1&2 (shape: T-1, M, M)
131 | distance = ((xy2 - xy1)**2).sum(axis=2).sum(axis=0)
132 |
133 | # match pose
134 | forward_map = np.zeros((T, M), dtype=int) - 1
135 | forward_map[0] = range(M)
136 | for m in range(M):
137 | choose = (rank == m)
138 | forward = distance[choose].argmin(axis=1)
139 | for t in range(T - 1):
140 | distance[t, :, forward[t]] = np.inf
141 | forward_map[1:][choose] = forward
142 | assert (np.all(forward_map >= 0))
143 |
144 | # string data
145 | for t in range(T - 1):
146 | forward_map[t + 1] = forward_map[t + 1][forward_map[t]]
147 |
148 | # generate data
149 | new_data_numpy = np.zeros(data_numpy.shape)
150 | for t in range(T):
151 | new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[
152 | t]].transpose(1, 2, 0)
153 | data_numpy = new_data_numpy
154 |
155 | # score sort
156 | trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0)
157 | rank = (-trace_score).argsort()
158 | data_numpy = data_numpy[:, :, :, rank]
159 |
160 | return data_numpy
161 |
162 |
163 | def top_k_by_category(label, score, top_k):
164 | instance_num, class_num = score.shape
165 | rank = score.argsort()
166 | hit_top_k = [[] for i in range(class_num)]
167 | for i in range(instance_num):
168 | l = label[i]
169 | hit_top_k[l].append(l in rank[i, -top_k:])
170 |
171 | accuracy_list = []
172 | for hit_per_category in hit_top_k:
173 | if hit_per_category:
174 | accuracy_list.append(sum(hit_per_category) * 1.0 / len(hit_per_category))
175 | else:
176 | accuracy_list.append(0.0)
177 | return accuracy_list
178 |
179 |
180 | def calculate_recall_precision(label, score):
181 | instance_num, class_num = score.shape
182 | rank = score.argsort()
183 | confusion_matrix = np.zeros([class_num, class_num])
184 |
185 | for i in range(instance_num):
186 | true_l = label[i]
187 | pred_l = rank[i, -1]
188 | confusion_matrix[true_l][pred_l] += 1
189 |
190 | precision = []
191 | recall = []
192 |
193 | for i in range(class_num):
194 | true_p = confusion_matrix[i][i]
195 | false_n = sum(confusion_matrix[i, :]) - true_p
196 | false_p = sum(confusion_matrix[:, i]) - true_p
197 | precision.append(true_p * 1.0 / (true_p + false_p))
198 | recall.append(true_p * 1.0 / (true_p + false_n))
199 |
200 | return precision, recall
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import sys
4 |
5 | # torchlight
6 | import torchlight
7 | from torchlight import import_class
8 |
9 | if __name__ == '__main__':
10 |
11 | parser = argparse.ArgumentParser(description='Processor collection')
12 |
13 | # region register processor yapf: disable
14 | processors = dict()
15 | processors['recognition'] = import_class('processor.recognition.REC_Processor')
16 | processors['demo_old'] = import_class('processor.demo_old.Demo')
17 | processors['demo'] = import_class('processor.demo_realtime.DemoRealtime')
18 | processors['demo_offline'] = import_class('processor.demo_offline.DemoOffline')
19 | #endregion yapf: enable
20 |
21 | # add sub-parser
22 | subparsers = parser.add_subparsers(dest='processor')
23 | for k, p in processors.items():
24 | subparsers.add_parser(k, parents=[p.get_parser()])
25 |
26 | # read arguments
27 | arg = parser.parse_args()
28 |
29 | # start
30 | Processor = processors[arg.processor]
31 | p = Processor(sys.argv[2:])
32 |
33 | p.start()
34 |
--------------------------------------------------------------------------------
/models/pose/coco/pose_deploy_linevec.prototxt:
--------------------------------------------------------------------------------
1 | input: "image"
2 | input_dim: 1
3 | input_dim: 3
4 | input_dim: 1 # This value will be defined at runtime
5 | input_dim: 1 # This value will be defined at runtime
6 | layer {
7 | name: "conv1_1"
8 | type: "Convolution"
9 | bottom: "image"
10 | top: "conv1_1"
11 | param {
12 | lr_mult: 1.0
13 | decay_mult: 1
14 | }
15 | param {
16 | lr_mult: 2.0
17 | decay_mult: 0
18 | }
19 | convolution_param {
20 | num_output: 64
21 | pad: 1
22 | kernel_size: 3
23 | weight_filler {
24 | type: "gaussian"
25 | std: 0.01
26 | }
27 | bias_filler {
28 | type: "constant"
29 | }
30 | }
31 | }
32 | layer {
33 | name: "relu1_1"
34 | type: "ReLU"
35 | bottom: "conv1_1"
36 | top: "conv1_1"
37 | }
38 | layer {
39 | name: "conv1_2"
40 | type: "Convolution"
41 | bottom: "conv1_1"
42 | top: "conv1_2"
43 | param {
44 | lr_mult: 1.0
45 | decay_mult: 1
46 | }
47 | param {
48 | lr_mult: 2.0
49 | decay_mult: 0
50 | }
51 | convolution_param {
52 | num_output: 64
53 | pad: 1
54 | kernel_size: 3
55 | weight_filler {
56 | type: "gaussian"
57 | std: 0.01
58 | }
59 | bias_filler {
60 | type: "constant"
61 | }
62 | }
63 | }
64 | layer {
65 | name: "relu1_2"
66 | type: "ReLU"
67 | bottom: "conv1_2"
68 | top: "conv1_2"
69 | }
70 | layer {
71 | name: "pool1_stage1"
72 | type: "Pooling"
73 | bottom: "conv1_2"
74 | top: "pool1_stage1"
75 | pooling_param {
76 | pool: MAX
77 | kernel_size: 2
78 | stride: 2
79 | }
80 | }
81 | layer {
82 | name: "conv2_1"
83 | type: "Convolution"
84 | bottom: "pool1_stage1"
85 | top: "conv2_1"
86 | param {
87 | lr_mult: 1.0
88 | decay_mult: 1
89 | }
90 | param {
91 | lr_mult: 2.0
92 | decay_mult: 0
93 | }
94 | convolution_param {
95 | num_output: 128
96 | pad: 1
97 | kernel_size: 3
98 | weight_filler {
99 | type: "gaussian"
100 | std: 0.01
101 | }
102 | bias_filler {
103 | type: "constant"
104 | }
105 | }
106 | }
107 | layer {
108 | name: "relu2_1"
109 | type: "ReLU"
110 | bottom: "conv2_1"
111 | top: "conv2_1"
112 | }
113 | layer {
114 | name: "conv2_2"
115 | type: "Convolution"
116 | bottom: "conv2_1"
117 | top: "conv2_2"
118 | param {
119 | lr_mult: 1.0
120 | decay_mult: 1
121 | }
122 | param {
123 | lr_mult: 2.0
124 | decay_mult: 0
125 | }
126 | convolution_param {
127 | num_output: 128
128 | pad: 1
129 | kernel_size: 3
130 | weight_filler {
131 | type: "gaussian"
132 | std: 0.01
133 | }
134 | bias_filler {
135 | type: "constant"
136 | }
137 | }
138 | }
139 | layer {
140 | name: "relu2_2"
141 | type: "ReLU"
142 | bottom: "conv2_2"
143 | top: "conv2_2"
144 | }
145 | layer {
146 | name: "pool2_stage1"
147 | type: "Pooling"
148 | bottom: "conv2_2"
149 | top: "pool2_stage1"
150 | pooling_param {
151 | pool: MAX
152 | kernel_size: 2
153 | stride: 2
154 | }
155 | }
156 | layer {
157 | name: "conv3_1"
158 | type: "Convolution"
159 | bottom: "pool2_stage1"
160 | top: "conv3_1"
161 | param {
162 | lr_mult: 1.0
163 | decay_mult: 1
164 | }
165 | param {
166 | lr_mult: 2.0
167 | decay_mult: 0
168 | }
169 | convolution_param {
170 | num_output: 256
171 | pad: 1
172 | kernel_size: 3
173 | weight_filler {
174 | type: "gaussian"
175 | std: 0.01
176 | }
177 | bias_filler {
178 | type: "constant"
179 | }
180 | }
181 | }
182 | layer {
183 | name: "relu3_1"
184 | type: "ReLU"
185 | bottom: "conv3_1"
186 | top: "conv3_1"
187 | }
188 | layer {
189 | name: "conv3_2"
190 | type: "Convolution"
191 | bottom: "conv3_1"
192 | top: "conv3_2"
193 | param {
194 | lr_mult: 1.0
195 | decay_mult: 1
196 | }
197 | param {
198 | lr_mult: 2.0
199 | decay_mult: 0
200 | }
201 | convolution_param {
202 | num_output: 256
203 | pad: 1
204 | kernel_size: 3
205 | weight_filler {
206 | type: "gaussian"
207 | std: 0.01
208 | }
209 | bias_filler {
210 | type: "constant"
211 | }
212 | }
213 | }
214 | layer {
215 | name: "relu3_2"
216 | type: "ReLU"
217 | bottom: "conv3_2"
218 | top: "conv3_2"
219 | }
220 | layer {
221 | name: "conv3_3"
222 | type: "Convolution"
223 | bottom: "conv3_2"
224 | top: "conv3_3"
225 | param {
226 | lr_mult: 1.0
227 | decay_mult: 1
228 | }
229 | param {
230 | lr_mult: 2.0
231 | decay_mult: 0
232 | }
233 | convolution_param {
234 | num_output: 256
235 | pad: 1
236 | kernel_size: 3
237 | weight_filler {
238 | type: "gaussian"
239 | std: 0.01
240 | }
241 | bias_filler {
242 | type: "constant"
243 | }
244 | }
245 | }
246 | layer {
247 | name: "relu3_3"
248 | type: "ReLU"
249 | bottom: "conv3_3"
250 | top: "conv3_3"
251 | }
252 | layer {
253 | name: "conv3_4"
254 | type: "Convolution"
255 | bottom: "conv3_3"
256 | top: "conv3_4"
257 | param {
258 | lr_mult: 1.0
259 | decay_mult: 1
260 | }
261 | param {
262 | lr_mult: 2.0
263 | decay_mult: 0
264 | }
265 | convolution_param {
266 | num_output: 256
267 | pad: 1
268 | kernel_size: 3
269 | weight_filler {
270 | type: "gaussian"
271 | std: 0.01
272 | }
273 | bias_filler {
274 | type: "constant"
275 | }
276 | }
277 | }
278 | layer {
279 | name: "relu3_4"
280 | type: "ReLU"
281 | bottom: "conv3_4"
282 | top: "conv3_4"
283 | }
284 | layer {
285 | name: "pool3_stage1"
286 | type: "Pooling"
287 | bottom: "conv3_4"
288 | top: "pool3_stage1"
289 | pooling_param {
290 | pool: MAX
291 | kernel_size: 2
292 | stride: 2
293 | }
294 | }
295 | layer {
296 | name: "conv4_1"
297 | type: "Convolution"
298 | bottom: "pool3_stage1"
299 | top: "conv4_1"
300 | param {
301 | lr_mult: 1.0
302 | decay_mult: 1
303 | }
304 | param {
305 | lr_mult: 2.0
306 | decay_mult: 0
307 | }
308 | convolution_param {
309 | num_output: 512
310 | pad: 1
311 | kernel_size: 3
312 | weight_filler {
313 | type: "gaussian"
314 | std: 0.01
315 | }
316 | bias_filler {
317 | type: "constant"
318 | }
319 | }
320 | }
321 | layer {
322 | name: "relu4_1"
323 | type: "ReLU"
324 | bottom: "conv4_1"
325 | top: "conv4_1"
326 | }
327 | layer {
328 | name: "conv4_2"
329 | type: "Convolution"
330 | bottom: "conv4_1"
331 | top: "conv4_2"
332 | param {
333 | lr_mult: 1.0
334 | decay_mult: 1
335 | }
336 | param {
337 | lr_mult: 2.0
338 | decay_mult: 0
339 | }
340 | convolution_param {
341 | num_output: 512
342 | pad: 1
343 | kernel_size: 3
344 | weight_filler {
345 | type: "gaussian"
346 | std: 0.01
347 | }
348 | bias_filler {
349 | type: "constant"
350 | }
351 | }
352 | }
353 | layer {
354 | name: "relu4_2"
355 | type: "ReLU"
356 | bottom: "conv4_2"
357 | top: "conv4_2"
358 | }
359 | layer {
360 | name: "conv4_3_CPM"
361 | type: "Convolution"
362 | bottom: "conv4_2"
363 | top: "conv4_3_CPM"
364 | param {
365 | lr_mult: 1.0
366 | decay_mult: 1
367 | }
368 | param {
369 | lr_mult: 2.0
370 | decay_mult: 0
371 | }
372 | convolution_param {
373 | num_output: 256
374 | pad: 1
375 | kernel_size: 3
376 | weight_filler {
377 | type: "gaussian"
378 | std: 0.01
379 | }
380 | bias_filler {
381 | type: "constant"
382 | }
383 | }
384 | }
385 | layer {
386 | name: "relu4_3_CPM"
387 | type: "ReLU"
388 | bottom: "conv4_3_CPM"
389 | top: "conv4_3_CPM"
390 | }
391 | layer {
392 | name: "conv4_4_CPM"
393 | type: "Convolution"
394 | bottom: "conv4_3_CPM"
395 | top: "conv4_4_CPM"
396 | param {
397 | lr_mult: 1.0
398 | decay_mult: 1
399 | }
400 | param {
401 | lr_mult: 2.0
402 | decay_mult: 0
403 | }
404 | convolution_param {
405 | num_output: 128
406 | pad: 1
407 | kernel_size: 3
408 | weight_filler {
409 | type: "gaussian"
410 | std: 0.01
411 | }
412 | bias_filler {
413 | type: "constant"
414 | }
415 | }
416 | }
417 | layer {
418 | name: "relu4_4_CPM"
419 | type: "ReLU"
420 | bottom: "conv4_4_CPM"
421 | top: "conv4_4_CPM"
422 | }
423 | layer {
424 | name: "conv5_1_CPM_L1"
425 | type: "Convolution"
426 | bottom: "conv4_4_CPM"
427 | top: "conv5_1_CPM_L1"
428 | param {
429 | lr_mult: 1.0
430 | decay_mult: 1
431 | }
432 | param {
433 | lr_mult: 2.0
434 | decay_mult: 0
435 | }
436 | convolution_param {
437 | num_output: 128
438 | pad: 1
439 | kernel_size: 3
440 | weight_filler {
441 | type: "gaussian"
442 | std: 0.01
443 | }
444 | bias_filler {
445 | type: "constant"
446 | }
447 | }
448 | }
449 | layer {
450 | name: "relu5_1_CPM_L1"
451 | type: "ReLU"
452 | bottom: "conv5_1_CPM_L1"
453 | top: "conv5_1_CPM_L1"
454 | }
455 | layer {
456 | name: "conv5_1_CPM_L2"
457 | type: "Convolution"
458 | bottom: "conv4_4_CPM"
459 | top: "conv5_1_CPM_L2"
460 | param {
461 | lr_mult: 1.0
462 | decay_mult: 1
463 | }
464 | param {
465 | lr_mult: 2.0
466 | decay_mult: 0
467 | }
468 | convolution_param {
469 | num_output: 128
470 | pad: 1
471 | kernel_size: 3
472 | weight_filler {
473 | type: "gaussian"
474 | std: 0.01
475 | }
476 | bias_filler {
477 | type: "constant"
478 | }
479 | }
480 | }
481 | layer {
482 | name: "relu5_1_CPM_L2"
483 | type: "ReLU"
484 | bottom: "conv5_1_CPM_L2"
485 | top: "conv5_1_CPM_L2"
486 | }
487 | layer {
488 | name: "conv5_2_CPM_L1"
489 | type: "Convolution"
490 | bottom: "conv5_1_CPM_L1"
491 | top: "conv5_2_CPM_L1"
492 | param {
493 | lr_mult: 1.0
494 | decay_mult: 1
495 | }
496 | param {
497 | lr_mult: 2.0
498 | decay_mult: 0
499 | }
500 | convolution_param {
501 | num_output: 128
502 | pad: 1
503 | kernel_size: 3
504 | weight_filler {
505 | type: "gaussian"
506 | std: 0.01
507 | }
508 | bias_filler {
509 | type: "constant"
510 | }
511 | }
512 | }
513 | layer {
514 | name: "relu5_2_CPM_L1"
515 | type: "ReLU"
516 | bottom: "conv5_2_CPM_L1"
517 | top: "conv5_2_CPM_L1"
518 | }
519 | layer {
520 | name: "conv5_2_CPM_L2"
521 | type: "Convolution"
522 | bottom: "conv5_1_CPM_L2"
523 | top: "conv5_2_CPM_L2"
524 | param {
525 | lr_mult: 1.0
526 | decay_mult: 1
527 | }
528 | param {
529 | lr_mult: 2.0
530 | decay_mult: 0
531 | }
532 | convolution_param {
533 | num_output: 128
534 | pad: 1
535 | kernel_size: 3
536 | weight_filler {
537 | type: "gaussian"
538 | std: 0.01
539 | }
540 | bias_filler {
541 | type: "constant"
542 | }
543 | }
544 | }
545 | layer {
546 | name: "relu5_2_CPM_L2"
547 | type: "ReLU"
548 | bottom: "conv5_2_CPM_L2"
549 | top: "conv5_2_CPM_L2"
550 | }
551 | layer {
552 | name: "conv5_3_CPM_L1"
553 | type: "Convolution"
554 | bottom: "conv5_2_CPM_L1"
555 | top: "conv5_3_CPM_L1"
556 | param {
557 | lr_mult: 1.0
558 | decay_mult: 1
559 | }
560 | param {
561 | lr_mult: 2.0
562 | decay_mult: 0
563 | }
564 | convolution_param {
565 | num_output: 128
566 | pad: 1
567 | kernel_size: 3
568 | weight_filler {
569 | type: "gaussian"
570 | std: 0.01
571 | }
572 | bias_filler {
573 | type: "constant"
574 | }
575 | }
576 | }
577 | layer {
578 | name: "relu5_3_CPM_L1"
579 | type: "ReLU"
580 | bottom: "conv5_3_CPM_L1"
581 | top: "conv5_3_CPM_L1"
582 | }
583 | layer {
584 | name: "conv5_3_CPM_L2"
585 | type: "Convolution"
586 | bottom: "conv5_2_CPM_L2"
587 | top: "conv5_3_CPM_L2"
588 | param {
589 | lr_mult: 1.0
590 | decay_mult: 1
591 | }
592 | param {
593 | lr_mult: 2.0
594 | decay_mult: 0
595 | }
596 | convolution_param {
597 | num_output: 128
598 | pad: 1
599 | kernel_size: 3
600 | weight_filler {
601 | type: "gaussian"
602 | std: 0.01
603 | }
604 | bias_filler {
605 | type: "constant"
606 | }
607 | }
608 | }
609 | layer {
610 | name: "relu5_3_CPM_L2"
611 | type: "ReLU"
612 | bottom: "conv5_3_CPM_L2"
613 | top: "conv5_3_CPM_L2"
614 | }
615 | layer {
616 | name: "conv5_4_CPM_L1"
617 | type: "Convolution"
618 | bottom: "conv5_3_CPM_L1"
619 | top: "conv5_4_CPM_L1"
620 | param {
621 | lr_mult: 1.0
622 | decay_mult: 1
623 | }
624 | param {
625 | lr_mult: 2.0
626 | decay_mult: 0
627 | }
628 | convolution_param {
629 | num_output: 512
630 | pad: 0
631 | kernel_size: 1
632 | weight_filler {
633 | type: "gaussian"
634 | std: 0.01
635 | }
636 | bias_filler {
637 | type: "constant"
638 | }
639 | }
640 | }
641 | layer {
642 | name: "relu5_4_CPM_L1"
643 | type: "ReLU"
644 | bottom: "conv5_4_CPM_L1"
645 | top: "conv5_4_CPM_L1"
646 | }
647 | layer {
648 | name: "conv5_4_CPM_L2"
649 | type: "Convolution"
650 | bottom: "conv5_3_CPM_L2"
651 | top: "conv5_4_CPM_L2"
652 | param {
653 | lr_mult: 1.0
654 | decay_mult: 1
655 | }
656 | param {
657 | lr_mult: 2.0
658 | decay_mult: 0
659 | }
660 | convolution_param {
661 | num_output: 512
662 | pad: 0
663 | kernel_size: 1
664 | weight_filler {
665 | type: "gaussian"
666 | std: 0.01
667 | }
668 | bias_filler {
669 | type: "constant"
670 | }
671 | }
672 | }
673 | layer {
674 | name: "relu5_4_CPM_L2"
675 | type: "ReLU"
676 | bottom: "conv5_4_CPM_L2"
677 | top: "conv5_4_CPM_L2"
678 | }
679 | layer {
680 | name: "conv5_5_CPM_L1"
681 | type: "Convolution"
682 | bottom: "conv5_4_CPM_L1"
683 | top: "conv5_5_CPM_L1"
684 | param {
685 | lr_mult: 1.0
686 | decay_mult: 1
687 | }
688 | param {
689 | lr_mult: 2.0
690 | decay_mult: 0
691 | }
692 | convolution_param {
693 | num_output: 38
694 | pad: 0
695 | kernel_size: 1
696 | weight_filler {
697 | type: "gaussian"
698 | std: 0.01
699 | }
700 | bias_filler {
701 | type: "constant"
702 | }
703 | }
704 | }
705 | layer {
706 | name: "conv5_5_CPM_L2"
707 | type: "Convolution"
708 | bottom: "conv5_4_CPM_L2"
709 | top: "conv5_5_CPM_L2"
710 | param {
711 | lr_mult: 1.0
712 | decay_mult: 1
713 | }
714 | param {
715 | lr_mult: 2.0
716 | decay_mult: 0
717 | }
718 | convolution_param {
719 | num_output: 19
720 | pad: 0
721 | kernel_size: 1
722 | weight_filler {
723 | type: "gaussian"
724 | std: 0.01
725 | }
726 | bias_filler {
727 | type: "constant"
728 | }
729 | }
730 | }
731 | layer {
732 | name: "concat_stage2"
733 | type: "Concat"
734 | bottom: "conv5_5_CPM_L1"
735 | bottom: "conv5_5_CPM_L2"
736 | bottom: "conv4_4_CPM"
737 | top: "concat_stage2"
738 | concat_param {
739 | axis: 1
740 | }
741 | }
742 | layer {
743 | name: "Mconv1_stage2_L1"
744 | type: "Convolution"
745 | bottom: "concat_stage2"
746 | top: "Mconv1_stage2_L1"
747 | param {
748 | lr_mult: 4.0
749 | decay_mult: 1
750 | }
751 | param {
752 | lr_mult: 8.0
753 | decay_mult: 0
754 | }
755 | convolution_param {
756 | num_output: 128
757 | pad: 3
758 | kernel_size: 7
759 | weight_filler {
760 | type: "gaussian"
761 | std: 0.01
762 | }
763 | bias_filler {
764 | type: "constant"
765 | }
766 | }
767 | }
768 | layer {
769 | name: "Mrelu1_stage2_L1"
770 | type: "ReLU"
771 | bottom: "Mconv1_stage2_L1"
772 | top: "Mconv1_stage2_L1"
773 | }
774 | layer {
775 | name: "Mconv1_stage2_L2"
776 | type: "Convolution"
777 | bottom: "concat_stage2"
778 | top: "Mconv1_stage2_L2"
779 | param {
780 | lr_mult: 4.0
781 | decay_mult: 1
782 | }
783 | param {
784 | lr_mult: 8.0
785 | decay_mult: 0
786 | }
787 | convolution_param {
788 | num_output: 128
789 | pad: 3
790 | kernel_size: 7
791 | weight_filler {
792 | type: "gaussian"
793 | std: 0.01
794 | }
795 | bias_filler {
796 | type: "constant"
797 | }
798 | }
799 | }
800 | layer {
801 | name: "Mrelu1_stage2_L2"
802 | type: "ReLU"
803 | bottom: "Mconv1_stage2_L2"
804 | top: "Mconv1_stage2_L2"
805 | }
806 | layer {
807 | name: "Mconv2_stage2_L1"
808 | type: "Convolution"
809 | bottom: "Mconv1_stage2_L1"
810 | top: "Mconv2_stage2_L1"
811 | param {
812 | lr_mult: 4.0
813 | decay_mult: 1
814 | }
815 | param {
816 | lr_mult: 8.0
817 | decay_mult: 0
818 | }
819 | convolution_param {
820 | num_output: 128
821 | pad: 3
822 | kernel_size: 7
823 | weight_filler {
824 | type: "gaussian"
825 | std: 0.01
826 | }
827 | bias_filler {
828 | type: "constant"
829 | }
830 | }
831 | }
832 | layer {
833 | name: "Mrelu2_stage2_L1"
834 | type: "ReLU"
835 | bottom: "Mconv2_stage2_L1"
836 | top: "Mconv2_stage2_L1"
837 | }
838 | layer {
839 | name: "Mconv2_stage2_L2"
840 | type: "Convolution"
841 | bottom: "Mconv1_stage2_L2"
842 | top: "Mconv2_stage2_L2"
843 | param {
844 | lr_mult: 4.0
845 | decay_mult: 1
846 | }
847 | param {
848 | lr_mult: 8.0
849 | decay_mult: 0
850 | }
851 | convolution_param {
852 | num_output: 128
853 | pad: 3
854 | kernel_size: 7
855 | weight_filler {
856 | type: "gaussian"
857 | std: 0.01
858 | }
859 | bias_filler {
860 | type: "constant"
861 | }
862 | }
863 | }
864 | layer {
865 | name: "Mrelu2_stage2_L2"
866 | type: "ReLU"
867 | bottom: "Mconv2_stage2_L2"
868 | top: "Mconv2_stage2_L2"
869 | }
870 | layer {
871 | name: "Mconv3_stage2_L1"
872 | type: "Convolution"
873 | bottom: "Mconv2_stage2_L1"
874 | top: "Mconv3_stage2_L1"
875 | param {
876 | lr_mult: 4.0
877 | decay_mult: 1
878 | }
879 | param {
880 | lr_mult: 8.0
881 | decay_mult: 0
882 | }
883 | convolution_param {
884 | num_output: 128
885 | pad: 3
886 | kernel_size: 7
887 | weight_filler {
888 | type: "gaussian"
889 | std: 0.01
890 | }
891 | bias_filler {
892 | type: "constant"
893 | }
894 | }
895 | }
896 | layer {
897 | name: "Mrelu3_stage2_L1"
898 | type: "ReLU"
899 | bottom: "Mconv3_stage2_L1"
900 | top: "Mconv3_stage2_L1"
901 | }
902 | layer {
903 | name: "Mconv3_stage2_L2"
904 | type: "Convolution"
905 | bottom: "Mconv2_stage2_L2"
906 | top: "Mconv3_stage2_L2"
907 | param {
908 | lr_mult: 4.0
909 | decay_mult: 1
910 | }
911 | param {
912 | lr_mult: 8.0
913 | decay_mult: 0
914 | }
915 | convolution_param {
916 | num_output: 128
917 | pad: 3
918 | kernel_size: 7
919 | weight_filler {
920 | type: "gaussian"
921 | std: 0.01
922 | }
923 | bias_filler {
924 | type: "constant"
925 | }
926 | }
927 | }
928 | layer {
929 | name: "Mrelu3_stage2_L2"
930 | type: "ReLU"
931 | bottom: "Mconv3_stage2_L2"
932 | top: "Mconv3_stage2_L2"
933 | }
934 | layer {
935 | name: "Mconv4_stage2_L1"
936 | type: "Convolution"
937 | bottom: "Mconv3_stage2_L1"
938 | top: "Mconv4_stage2_L1"
939 | param {
940 | lr_mult: 4.0
941 | decay_mult: 1
942 | }
943 | param {
944 | lr_mult: 8.0
945 | decay_mult: 0
946 | }
947 | convolution_param {
948 | num_output: 128
949 | pad: 3
950 | kernel_size: 7
951 | weight_filler {
952 | type: "gaussian"
953 | std: 0.01
954 | }
955 | bias_filler {
956 | type: "constant"
957 | }
958 | }
959 | }
960 | layer {
961 | name: "Mrelu4_stage2_L1"
962 | type: "ReLU"
963 | bottom: "Mconv4_stage2_L1"
964 | top: "Mconv4_stage2_L1"
965 | }
966 | layer {
967 | name: "Mconv4_stage2_L2"
968 | type: "Convolution"
969 | bottom: "Mconv3_stage2_L2"
970 | top: "Mconv4_stage2_L2"
971 | param {
972 | lr_mult: 4.0
973 | decay_mult: 1
974 | }
975 | param {
976 | lr_mult: 8.0
977 | decay_mult: 0
978 | }
979 | convolution_param {
980 | num_output: 128
981 | pad: 3
982 | kernel_size: 7
983 | weight_filler {
984 | type: "gaussian"
985 | std: 0.01
986 | }
987 | bias_filler {
988 | type: "constant"
989 | }
990 | }
991 | }
992 | layer {
993 | name: "Mrelu4_stage2_L2"
994 | type: "ReLU"
995 | bottom: "Mconv4_stage2_L2"
996 | top: "Mconv4_stage2_L2"
997 | }
998 | layer {
999 | name: "Mconv5_stage2_L1"
1000 | type: "Convolution"
1001 | bottom: "Mconv4_stage2_L1"
1002 | top: "Mconv5_stage2_L1"
1003 | param {
1004 | lr_mult: 4.0
1005 | decay_mult: 1
1006 | }
1007 | param {
1008 | lr_mult: 8.0
1009 | decay_mult: 0
1010 | }
1011 | convolution_param {
1012 | num_output: 128
1013 | pad: 3
1014 | kernel_size: 7
1015 | weight_filler {
1016 | type: "gaussian"
1017 | std: 0.01
1018 | }
1019 | bias_filler {
1020 | type: "constant"
1021 | }
1022 | }
1023 | }
1024 | layer {
1025 | name: "Mrelu5_stage2_L1"
1026 | type: "ReLU"
1027 | bottom: "Mconv5_stage2_L1"
1028 | top: "Mconv5_stage2_L1"
1029 | }
1030 | layer {
1031 | name: "Mconv5_stage2_L2"
1032 | type: "Convolution"
1033 | bottom: "Mconv4_stage2_L2"
1034 | top: "Mconv5_stage2_L2"
1035 | param {
1036 | lr_mult: 4.0
1037 | decay_mult: 1
1038 | }
1039 | param {
1040 | lr_mult: 8.0
1041 | decay_mult: 0
1042 | }
1043 | convolution_param {
1044 | num_output: 128
1045 | pad: 3
1046 | kernel_size: 7
1047 | weight_filler {
1048 | type: "gaussian"
1049 | std: 0.01
1050 | }
1051 | bias_filler {
1052 | type: "constant"
1053 | }
1054 | }
1055 | }
1056 | layer {
1057 | name: "Mrelu5_stage2_L2"
1058 | type: "ReLU"
1059 | bottom: "Mconv5_stage2_L2"
1060 | top: "Mconv5_stage2_L2"
1061 | }
1062 | layer {
1063 | name: "Mconv6_stage2_L1"
1064 | type: "Convolution"
1065 | bottom: "Mconv5_stage2_L1"
1066 | top: "Mconv6_stage2_L1"
1067 | param {
1068 | lr_mult: 4.0
1069 | decay_mult: 1
1070 | }
1071 | param {
1072 | lr_mult: 8.0
1073 | decay_mult: 0
1074 | }
1075 | convolution_param {
1076 | num_output: 128
1077 | pad: 0
1078 | kernel_size: 1
1079 | weight_filler {
1080 | type: "gaussian"
1081 | std: 0.01
1082 | }
1083 | bias_filler {
1084 | type: "constant"
1085 | }
1086 | }
1087 | }
1088 | layer {
1089 | name: "Mrelu6_stage2_L1"
1090 | type: "ReLU"
1091 | bottom: "Mconv6_stage2_L1"
1092 | top: "Mconv6_stage2_L1"
1093 | }
1094 | layer {
1095 | name: "Mconv6_stage2_L2"
1096 | type: "Convolution"
1097 | bottom: "Mconv5_stage2_L2"
1098 | top: "Mconv6_stage2_L2"
1099 | param {
1100 | lr_mult: 4.0
1101 | decay_mult: 1
1102 | }
1103 | param {
1104 | lr_mult: 8.0
1105 | decay_mult: 0
1106 | }
1107 | convolution_param {
1108 | num_output: 128
1109 | pad: 0
1110 | kernel_size: 1
1111 | weight_filler {
1112 | type: "gaussian"
1113 | std: 0.01
1114 | }
1115 | bias_filler {
1116 | type: "constant"
1117 | }
1118 | }
1119 | }
1120 | layer {
1121 | name: "Mrelu6_stage2_L2"
1122 | type: "ReLU"
1123 | bottom: "Mconv6_stage2_L2"
1124 | top: "Mconv6_stage2_L2"
1125 | }
1126 | layer {
1127 | name: "Mconv7_stage2_L1"
1128 | type: "Convolution"
1129 | bottom: "Mconv6_stage2_L1"
1130 | top: "Mconv7_stage2_L1"
1131 | param {
1132 | lr_mult: 4.0
1133 | decay_mult: 1
1134 | }
1135 | param {
1136 | lr_mult: 8.0
1137 | decay_mult: 0
1138 | }
1139 | convolution_param {
1140 | num_output: 38
1141 | pad: 0
1142 | kernel_size: 1
1143 | weight_filler {
1144 | type: "gaussian"
1145 | std: 0.01
1146 | }
1147 | bias_filler {
1148 | type: "constant"
1149 | }
1150 | }
1151 | }
1152 | layer {
1153 | name: "Mconv7_stage2_L2"
1154 | type: "Convolution"
1155 | bottom: "Mconv6_stage2_L2"
1156 | top: "Mconv7_stage2_L2"
1157 | param {
1158 | lr_mult: 4.0
1159 | decay_mult: 1
1160 | }
1161 | param {
1162 | lr_mult: 8.0
1163 | decay_mult: 0
1164 | }
1165 | convolution_param {
1166 | num_output: 19
1167 | pad: 0
1168 | kernel_size: 1
1169 | weight_filler {
1170 | type: "gaussian"
1171 | std: 0.01
1172 | }
1173 | bias_filler {
1174 | type: "constant"
1175 | }
1176 | }
1177 | }
1178 | layer {
1179 | name: "concat_stage3"
1180 | type: "Concat"
1181 | bottom: "Mconv7_stage2_L1"
1182 | bottom: "Mconv7_stage2_L2"
1183 | bottom: "conv4_4_CPM"
1184 | top: "concat_stage3"
1185 | concat_param {
1186 | axis: 1
1187 | }
1188 | }
1189 | layer {
1190 | name: "Mconv1_stage3_L1"
1191 | type: "Convolution"
1192 | bottom: "concat_stage3"
1193 | top: "Mconv1_stage3_L1"
1194 | param {
1195 | lr_mult: 4.0
1196 | decay_mult: 1
1197 | }
1198 | param {
1199 | lr_mult: 8.0
1200 | decay_mult: 0
1201 | }
1202 | convolution_param {
1203 | num_output: 128
1204 | pad: 3
1205 | kernel_size: 7
1206 | weight_filler {
1207 | type: "gaussian"
1208 | std: 0.01
1209 | }
1210 | bias_filler {
1211 | type: "constant"
1212 | }
1213 | }
1214 | }
1215 | layer {
1216 | name: "Mrelu1_stage3_L1"
1217 | type: "ReLU"
1218 | bottom: "Mconv1_stage3_L1"
1219 | top: "Mconv1_stage3_L1"
1220 | }
1221 | layer {
1222 | name: "Mconv1_stage3_L2"
1223 | type: "Convolution"
1224 | bottom: "concat_stage3"
1225 | top: "Mconv1_stage3_L2"
1226 | param {
1227 | lr_mult: 4.0
1228 | decay_mult: 1
1229 | }
1230 | param {
1231 | lr_mult: 8.0
1232 | decay_mult: 0
1233 | }
1234 | convolution_param {
1235 | num_output: 128
1236 | pad: 3
1237 | kernel_size: 7
1238 | weight_filler {
1239 | type: "gaussian"
1240 | std: 0.01
1241 | }
1242 | bias_filler {
1243 | type: "constant"
1244 | }
1245 | }
1246 | }
1247 | layer {
1248 | name: "Mrelu1_stage3_L2"
1249 | type: "ReLU"
1250 | bottom: "Mconv1_stage3_L2"
1251 | top: "Mconv1_stage3_L2"
1252 | }
1253 | layer {
1254 | name: "Mconv2_stage3_L1"
1255 | type: "Convolution"
1256 | bottom: "Mconv1_stage3_L1"
1257 | top: "Mconv2_stage3_L1"
1258 | param {
1259 | lr_mult: 4.0
1260 | decay_mult: 1
1261 | }
1262 | param {
1263 | lr_mult: 8.0
1264 | decay_mult: 0
1265 | }
1266 | convolution_param {
1267 | num_output: 128
1268 | pad: 3
1269 | kernel_size: 7
1270 | weight_filler {
1271 | type: "gaussian"
1272 | std: 0.01
1273 | }
1274 | bias_filler {
1275 | type: "constant"
1276 | }
1277 | }
1278 | }
1279 | layer {
1280 | name: "Mrelu2_stage3_L1"
1281 | type: "ReLU"
1282 | bottom: "Mconv2_stage3_L1"
1283 | top: "Mconv2_stage3_L1"
1284 | }
1285 | layer {
1286 | name: "Mconv2_stage3_L2"
1287 | type: "Convolution"
1288 | bottom: "Mconv1_stage3_L2"
1289 | top: "Mconv2_stage3_L2"
1290 | param {
1291 | lr_mult: 4.0
1292 | decay_mult: 1
1293 | }
1294 | param {
1295 | lr_mult: 8.0
1296 | decay_mult: 0
1297 | }
1298 | convolution_param {
1299 | num_output: 128
1300 | pad: 3
1301 | kernel_size: 7
1302 | weight_filler {
1303 | type: "gaussian"
1304 | std: 0.01
1305 | }
1306 | bias_filler {
1307 | type: "constant"
1308 | }
1309 | }
1310 | }
1311 | layer {
1312 | name: "Mrelu2_stage3_L2"
1313 | type: "ReLU"
1314 | bottom: "Mconv2_stage3_L2"
1315 | top: "Mconv2_stage3_L2"
1316 | }
1317 | layer {
1318 | name: "Mconv3_stage3_L1"
1319 | type: "Convolution"
1320 | bottom: "Mconv2_stage3_L1"
1321 | top: "Mconv3_stage3_L1"
1322 | param {
1323 | lr_mult: 4.0
1324 | decay_mult: 1
1325 | }
1326 | param {
1327 | lr_mult: 8.0
1328 | decay_mult: 0
1329 | }
1330 | convolution_param {
1331 | num_output: 128
1332 | pad: 3
1333 | kernel_size: 7
1334 | weight_filler {
1335 | type: "gaussian"
1336 | std: 0.01
1337 | }
1338 | bias_filler {
1339 | type: "constant"
1340 | }
1341 | }
1342 | }
1343 | layer {
1344 | name: "Mrelu3_stage3_L1"
1345 | type: "ReLU"
1346 | bottom: "Mconv3_stage3_L1"
1347 | top: "Mconv3_stage3_L1"
1348 | }
1349 | layer {
1350 | name: "Mconv3_stage3_L2"
1351 | type: "Convolution"
1352 | bottom: "Mconv2_stage3_L2"
1353 | top: "Mconv3_stage3_L2"
1354 | param {
1355 | lr_mult: 4.0
1356 | decay_mult: 1
1357 | }
1358 | param {
1359 | lr_mult: 8.0
1360 | decay_mult: 0
1361 | }
1362 | convolution_param {
1363 | num_output: 128
1364 | pad: 3
1365 | kernel_size: 7
1366 | weight_filler {
1367 | type: "gaussian"
1368 | std: 0.01
1369 | }
1370 | bias_filler {
1371 | type: "constant"
1372 | }
1373 | }
1374 | }
1375 | layer {
1376 | name: "Mrelu3_stage3_L2"
1377 | type: "ReLU"
1378 | bottom: "Mconv3_stage3_L2"
1379 | top: "Mconv3_stage3_L2"
1380 | }
1381 | layer {
1382 | name: "Mconv4_stage3_L1"
1383 | type: "Convolution"
1384 | bottom: "Mconv3_stage3_L1"
1385 | top: "Mconv4_stage3_L1"
1386 | param {
1387 | lr_mult: 4.0
1388 | decay_mult: 1
1389 | }
1390 | param {
1391 | lr_mult: 8.0
1392 | decay_mult: 0
1393 | }
1394 | convolution_param {
1395 | num_output: 128
1396 | pad: 3
1397 | kernel_size: 7
1398 | weight_filler {
1399 | type: "gaussian"
1400 | std: 0.01
1401 | }
1402 | bias_filler {
1403 | type: "constant"
1404 | }
1405 | }
1406 | }
1407 | layer {
1408 | name: "Mrelu4_stage3_L1"
1409 | type: "ReLU"
1410 | bottom: "Mconv4_stage3_L1"
1411 | top: "Mconv4_stage3_L1"
1412 | }
1413 | layer {
1414 | name: "Mconv4_stage3_L2"
1415 | type: "Convolution"
1416 | bottom: "Mconv3_stage3_L2"
1417 | top: "Mconv4_stage3_L2"
1418 | param {
1419 | lr_mult: 4.0
1420 | decay_mult: 1
1421 | }
1422 | param {
1423 | lr_mult: 8.0
1424 | decay_mult: 0
1425 | }
1426 | convolution_param {
1427 | num_output: 128
1428 | pad: 3
1429 | kernel_size: 7
1430 | weight_filler {
1431 | type: "gaussian"
1432 | std: 0.01
1433 | }
1434 | bias_filler {
1435 | type: "constant"
1436 | }
1437 | }
1438 | }
1439 | layer {
1440 | name: "Mrelu4_stage3_L2"
1441 | type: "ReLU"
1442 | bottom: "Mconv4_stage3_L2"
1443 | top: "Mconv4_stage3_L2"
1444 | }
1445 | layer {
1446 | name: "Mconv5_stage3_L1"
1447 | type: "Convolution"
1448 | bottom: "Mconv4_stage3_L1"
1449 | top: "Mconv5_stage3_L1"
1450 | param {
1451 | lr_mult: 4.0
1452 | decay_mult: 1
1453 | }
1454 | param {
1455 | lr_mult: 8.0
1456 | decay_mult: 0
1457 | }
1458 | convolution_param {
1459 | num_output: 128
1460 | pad: 3
1461 | kernel_size: 7
1462 | weight_filler {
1463 | type: "gaussian"
1464 | std: 0.01
1465 | }
1466 | bias_filler {
1467 | type: "constant"
1468 | }
1469 | }
1470 | }
1471 | layer {
1472 | name: "Mrelu5_stage3_L1"
1473 | type: "ReLU"
1474 | bottom: "Mconv5_stage3_L1"
1475 | top: "Mconv5_stage3_L1"
1476 | }
1477 | layer {
1478 | name: "Mconv5_stage3_L2"
1479 | type: "Convolution"
1480 | bottom: "Mconv4_stage3_L2"
1481 | top: "Mconv5_stage3_L2"
1482 | param {
1483 | lr_mult: 4.0
1484 | decay_mult: 1
1485 | }
1486 | param {
1487 | lr_mult: 8.0
1488 | decay_mult: 0
1489 | }
1490 | convolution_param {
1491 | num_output: 128
1492 | pad: 3
1493 | kernel_size: 7
1494 | weight_filler {
1495 | type: "gaussian"
1496 | std: 0.01
1497 | }
1498 | bias_filler {
1499 | type: "constant"
1500 | }
1501 | }
1502 | }
1503 | layer {
1504 | name: "Mrelu5_stage3_L2"
1505 | type: "ReLU"
1506 | bottom: "Mconv5_stage3_L2"
1507 | top: "Mconv5_stage3_L2"
1508 | }
1509 | layer {
1510 | name: "Mconv6_stage3_L1"
1511 | type: "Convolution"
1512 | bottom: "Mconv5_stage3_L1"
1513 | top: "Mconv6_stage3_L1"
1514 | param {
1515 | lr_mult: 4.0
1516 | decay_mult: 1
1517 | }
1518 | param {
1519 | lr_mult: 8.0
1520 | decay_mult: 0
1521 | }
1522 | convolution_param {
1523 | num_output: 128
1524 | pad: 0
1525 | kernel_size: 1
1526 | weight_filler {
1527 | type: "gaussian"
1528 | std: 0.01
1529 | }
1530 | bias_filler {
1531 | type: "constant"
1532 | }
1533 | }
1534 | }
1535 | layer {
1536 | name: "Mrelu6_stage3_L1"
1537 | type: "ReLU"
1538 | bottom: "Mconv6_stage3_L1"
1539 | top: "Mconv6_stage3_L1"
1540 | }
1541 | layer {
1542 | name: "Mconv6_stage3_L2"
1543 | type: "Convolution"
1544 | bottom: "Mconv5_stage3_L2"
1545 | top: "Mconv6_stage3_L2"
1546 | param {
1547 | lr_mult: 4.0
1548 | decay_mult: 1
1549 | }
1550 | param {
1551 | lr_mult: 8.0
1552 | decay_mult: 0
1553 | }
1554 | convolution_param {
1555 | num_output: 128
1556 | pad: 0
1557 | kernel_size: 1
1558 | weight_filler {
1559 | type: "gaussian"
1560 | std: 0.01
1561 | }
1562 | bias_filler {
1563 | type: "constant"
1564 | }
1565 | }
1566 | }
1567 | layer {
1568 | name: "Mrelu6_stage3_L2"
1569 | type: "ReLU"
1570 | bottom: "Mconv6_stage3_L2"
1571 | top: "Mconv6_stage3_L2"
1572 | }
1573 | layer {
1574 | name: "Mconv7_stage3_L1"
1575 | type: "Convolution"
1576 | bottom: "Mconv6_stage3_L1"
1577 | top: "Mconv7_stage3_L1"
1578 | param {
1579 | lr_mult: 4.0
1580 | decay_mult: 1
1581 | }
1582 | param {
1583 | lr_mult: 8.0
1584 | decay_mult: 0
1585 | }
1586 | convolution_param {
1587 | num_output: 38
1588 | pad: 0
1589 | kernel_size: 1
1590 | weight_filler {
1591 | type: "gaussian"
1592 | std: 0.01
1593 | }
1594 | bias_filler {
1595 | type: "constant"
1596 | }
1597 | }
1598 | }
1599 | layer {
1600 | name: "Mconv7_stage3_L2"
1601 | type: "Convolution"
1602 | bottom: "Mconv6_stage3_L2"
1603 | top: "Mconv7_stage3_L2"
1604 | param {
1605 | lr_mult: 4.0
1606 | decay_mult: 1
1607 | }
1608 | param {
1609 | lr_mult: 8.0
1610 | decay_mult: 0
1611 | }
1612 | convolution_param {
1613 | num_output: 19
1614 | pad: 0
1615 | kernel_size: 1
1616 | weight_filler {
1617 | type: "gaussian"
1618 | std: 0.01
1619 | }
1620 | bias_filler {
1621 | type: "constant"
1622 | }
1623 | }
1624 | }
1625 | layer {
1626 | name: "concat_stage4"
1627 | type: "Concat"
1628 | bottom: "Mconv7_stage3_L1"
1629 | bottom: "Mconv7_stage3_L2"
1630 | bottom: "conv4_4_CPM"
1631 | top: "concat_stage4"
1632 | concat_param {
1633 | axis: 1
1634 | }
1635 | }
1636 | layer {
1637 | name: "Mconv1_stage4_L1"
1638 | type: "Convolution"
1639 | bottom: "concat_stage4"
1640 | top: "Mconv1_stage4_L1"
1641 | param {
1642 | lr_mult: 4.0
1643 | decay_mult: 1
1644 | }
1645 | param {
1646 | lr_mult: 8.0
1647 | decay_mult: 0
1648 | }
1649 | convolution_param {
1650 | num_output: 128
1651 | pad: 3
1652 | kernel_size: 7
1653 | weight_filler {
1654 | type: "gaussian"
1655 | std: 0.01
1656 | }
1657 | bias_filler {
1658 | type: "constant"
1659 | }
1660 | }
1661 | }
1662 | layer {
1663 | name: "Mrelu1_stage4_L1"
1664 | type: "ReLU"
1665 | bottom: "Mconv1_stage4_L1"
1666 | top: "Mconv1_stage4_L1"
1667 | }
1668 | layer {
1669 | name: "Mconv1_stage4_L2"
1670 | type: "Convolution"
1671 | bottom: "concat_stage4"
1672 | top: "Mconv1_stage4_L2"
1673 | param {
1674 | lr_mult: 4.0
1675 | decay_mult: 1
1676 | }
1677 | param {
1678 | lr_mult: 8.0
1679 | decay_mult: 0
1680 | }
1681 | convolution_param {
1682 | num_output: 128
1683 | pad: 3
1684 | kernel_size: 7
1685 | weight_filler {
1686 | type: "gaussian"
1687 | std: 0.01
1688 | }
1689 | bias_filler {
1690 | type: "constant"
1691 | }
1692 | }
1693 | }
1694 | layer {
1695 | name: "Mrelu1_stage4_L2"
1696 | type: "ReLU"
1697 | bottom: "Mconv1_stage4_L2"
1698 | top: "Mconv1_stage4_L2"
1699 | }
1700 | layer {
1701 | name: "Mconv2_stage4_L1"
1702 | type: "Convolution"
1703 | bottom: "Mconv1_stage4_L1"
1704 | top: "Mconv2_stage4_L1"
1705 | param {
1706 | lr_mult: 4.0
1707 | decay_mult: 1
1708 | }
1709 | param {
1710 | lr_mult: 8.0
1711 | decay_mult: 0
1712 | }
1713 | convolution_param {
1714 | num_output: 128
1715 | pad: 3
1716 | kernel_size: 7
1717 | weight_filler {
1718 | type: "gaussian"
1719 | std: 0.01
1720 | }
1721 | bias_filler {
1722 | type: "constant"
1723 | }
1724 | }
1725 | }
1726 | layer {
1727 | name: "Mrelu2_stage4_L1"
1728 | type: "ReLU"
1729 | bottom: "Mconv2_stage4_L1"
1730 | top: "Mconv2_stage4_L1"
1731 | }
1732 | layer {
1733 | name: "Mconv2_stage4_L2"
1734 | type: "Convolution"
1735 | bottom: "Mconv1_stage4_L2"
1736 | top: "Mconv2_stage4_L2"
1737 | param {
1738 | lr_mult: 4.0
1739 | decay_mult: 1
1740 | }
1741 | param {
1742 | lr_mult: 8.0
1743 | decay_mult: 0
1744 | }
1745 | convolution_param {
1746 | num_output: 128
1747 | pad: 3
1748 | kernel_size: 7
1749 | weight_filler {
1750 | type: "gaussian"
1751 | std: 0.01
1752 | }
1753 | bias_filler {
1754 | type: "constant"
1755 | }
1756 | }
1757 | }
1758 | layer {
1759 | name: "Mrelu2_stage4_L2"
1760 | type: "ReLU"
1761 | bottom: "Mconv2_stage4_L2"
1762 | top: "Mconv2_stage4_L2"
1763 | }
1764 | layer {
1765 | name: "Mconv3_stage4_L1"
1766 | type: "Convolution"
1767 | bottom: "Mconv2_stage4_L1"
1768 | top: "Mconv3_stage4_L1"
1769 | param {
1770 | lr_mult: 4.0
1771 | decay_mult: 1
1772 | }
1773 | param {
1774 | lr_mult: 8.0
1775 | decay_mult: 0
1776 | }
1777 | convolution_param {
1778 | num_output: 128
1779 | pad: 3
1780 | kernel_size: 7
1781 | weight_filler {
1782 | type: "gaussian"
1783 | std: 0.01
1784 | }
1785 | bias_filler {
1786 | type: "constant"
1787 | }
1788 | }
1789 | }
1790 | layer {
1791 | name: "Mrelu3_stage4_L1"
1792 | type: "ReLU"
1793 | bottom: "Mconv3_stage4_L1"
1794 | top: "Mconv3_stage4_L1"
1795 | }
1796 | layer {
1797 | name: "Mconv3_stage4_L2"
1798 | type: "Convolution"
1799 | bottom: "Mconv2_stage4_L2"
1800 | top: "Mconv3_stage4_L2"
1801 | param {
1802 | lr_mult: 4.0
1803 | decay_mult: 1
1804 | }
1805 | param {
1806 | lr_mult: 8.0
1807 | decay_mult: 0
1808 | }
1809 | convolution_param {
1810 | num_output: 128
1811 | pad: 3
1812 | kernel_size: 7
1813 | weight_filler {
1814 | type: "gaussian"
1815 | std: 0.01
1816 | }
1817 | bias_filler {
1818 | type: "constant"
1819 | }
1820 | }
1821 | }
1822 | layer {
1823 | name: "Mrelu3_stage4_L2"
1824 | type: "ReLU"
1825 | bottom: "Mconv3_stage4_L2"
1826 | top: "Mconv3_stage4_L2"
1827 | }
1828 | layer {
1829 | name: "Mconv4_stage4_L1"
1830 | type: "Convolution"
1831 | bottom: "Mconv3_stage4_L1"
1832 | top: "Mconv4_stage4_L1"
1833 | param {
1834 | lr_mult: 4.0
1835 | decay_mult: 1
1836 | }
1837 | param {
1838 | lr_mult: 8.0
1839 | decay_mult: 0
1840 | }
1841 | convolution_param {
1842 | num_output: 128
1843 | pad: 3
1844 | kernel_size: 7
1845 | weight_filler {
1846 | type: "gaussian"
1847 | std: 0.01
1848 | }
1849 | bias_filler {
1850 | type: "constant"
1851 | }
1852 | }
1853 | }
1854 | layer {
1855 | name: "Mrelu4_stage4_L1"
1856 | type: "ReLU"
1857 | bottom: "Mconv4_stage4_L1"
1858 | top: "Mconv4_stage4_L1"
1859 | }
1860 | layer {
1861 | name: "Mconv4_stage4_L2"
1862 | type: "Convolution"
1863 | bottom: "Mconv3_stage4_L2"
1864 | top: "Mconv4_stage4_L2"
1865 | param {
1866 | lr_mult: 4.0
1867 | decay_mult: 1
1868 | }
1869 | param {
1870 | lr_mult: 8.0
1871 | decay_mult: 0
1872 | }
1873 | convolution_param {
1874 | num_output: 128
1875 | pad: 3
1876 | kernel_size: 7
1877 | weight_filler {
1878 | type: "gaussian"
1879 | std: 0.01
1880 | }
1881 | bias_filler {
1882 | type: "constant"
1883 | }
1884 | }
1885 | }
1886 | layer {
1887 | name: "Mrelu4_stage4_L2"
1888 | type: "ReLU"
1889 | bottom: "Mconv4_stage4_L2"
1890 | top: "Mconv4_stage4_L2"
1891 | }
1892 | layer {
1893 | name: "Mconv5_stage4_L1"
1894 | type: "Convolution"
1895 | bottom: "Mconv4_stage4_L1"
1896 | top: "Mconv5_stage4_L1"
1897 | param {
1898 | lr_mult: 4.0
1899 | decay_mult: 1
1900 | }
1901 | param {
1902 | lr_mult: 8.0
1903 | decay_mult: 0
1904 | }
1905 | convolution_param {
1906 | num_output: 128
1907 | pad: 3
1908 | kernel_size: 7
1909 | weight_filler {
1910 | type: "gaussian"
1911 | std: 0.01
1912 | }
1913 | bias_filler {
1914 | type: "constant"
1915 | }
1916 | }
1917 | }
1918 | layer {
1919 | name: "Mrelu5_stage4_L1"
1920 | type: "ReLU"
1921 | bottom: "Mconv5_stage4_L1"
1922 | top: "Mconv5_stage4_L1"
1923 | }
1924 | layer {
1925 | name: "Mconv5_stage4_L2"
1926 | type: "Convolution"
1927 | bottom: "Mconv4_stage4_L2"
1928 | top: "Mconv5_stage4_L2"
1929 | param {
1930 | lr_mult: 4.0
1931 | decay_mult: 1
1932 | }
1933 | param {
1934 | lr_mult: 8.0
1935 | decay_mult: 0
1936 | }
1937 | convolution_param {
1938 | num_output: 128
1939 | pad: 3
1940 | kernel_size: 7
1941 | weight_filler {
1942 | type: "gaussian"
1943 | std: 0.01
1944 | }
1945 | bias_filler {
1946 | type: "constant"
1947 | }
1948 | }
1949 | }
1950 | layer {
1951 | name: "Mrelu5_stage4_L2"
1952 | type: "ReLU"
1953 | bottom: "Mconv5_stage4_L2"
1954 | top: "Mconv5_stage4_L2"
1955 | }
1956 | layer {
1957 | name: "Mconv6_stage4_L1"
1958 | type: "Convolution"
1959 | bottom: "Mconv5_stage4_L1"
1960 | top: "Mconv6_stage4_L1"
1961 | param {
1962 | lr_mult: 4.0
1963 | decay_mult: 1
1964 | }
1965 | param {
1966 | lr_mult: 8.0
1967 | decay_mult: 0
1968 | }
1969 | convolution_param {
1970 | num_output: 128
1971 | pad: 0
1972 | kernel_size: 1
1973 | weight_filler {
1974 | type: "gaussian"
1975 | std: 0.01
1976 | }
1977 | bias_filler {
1978 | type: "constant"
1979 | }
1980 | }
1981 | }
1982 | layer {
1983 | name: "Mrelu6_stage4_L1"
1984 | type: "ReLU"
1985 | bottom: "Mconv6_stage4_L1"
1986 | top: "Mconv6_stage4_L1"
1987 | }
1988 | layer {
1989 | name: "Mconv6_stage4_L2"
1990 | type: "Convolution"
1991 | bottom: "Mconv5_stage4_L2"
1992 | top: "Mconv6_stage4_L2"
1993 | param {
1994 | lr_mult: 4.0
1995 | decay_mult: 1
1996 | }
1997 | param {
1998 | lr_mult: 8.0
1999 | decay_mult: 0
2000 | }
2001 | convolution_param {
2002 | num_output: 128
2003 | pad: 0
2004 | kernel_size: 1
2005 | weight_filler {
2006 | type: "gaussian"
2007 | std: 0.01
2008 | }
2009 | bias_filler {
2010 | type: "constant"
2011 | }
2012 | }
2013 | }
2014 | layer {
2015 | name: "Mrelu6_stage4_L2"
2016 | type: "ReLU"
2017 | bottom: "Mconv6_stage4_L2"
2018 | top: "Mconv6_stage4_L2"
2019 | }
2020 | layer {
2021 | name: "Mconv7_stage4_L1"
2022 | type: "Convolution"
2023 | bottom: "Mconv6_stage4_L1"
2024 | top: "Mconv7_stage4_L1"
2025 | param {
2026 | lr_mult: 4.0
2027 | decay_mult: 1
2028 | }
2029 | param {
2030 | lr_mult: 8.0
2031 | decay_mult: 0
2032 | }
2033 | convolution_param {
2034 | num_output: 38
2035 | pad: 0
2036 | kernel_size: 1
2037 | weight_filler {
2038 | type: "gaussian"
2039 | std: 0.01
2040 | }
2041 | bias_filler {
2042 | type: "constant"
2043 | }
2044 | }
2045 | }
2046 | layer {
2047 | name: "Mconv7_stage4_L2"
2048 | type: "Convolution"
2049 | bottom: "Mconv6_stage4_L2"
2050 | top: "Mconv7_stage4_L2"
2051 | param {
2052 | lr_mult: 4.0
2053 | decay_mult: 1
2054 | }
2055 | param {
2056 | lr_mult: 8.0
2057 | decay_mult: 0
2058 | }
2059 | convolution_param {
2060 | num_output: 19
2061 | pad: 0
2062 | kernel_size: 1
2063 | weight_filler {
2064 | type: "gaussian"
2065 | std: 0.01
2066 | }
2067 | bias_filler {
2068 | type: "constant"
2069 | }
2070 | }
2071 | }
2072 | layer {
2073 | name: "concat_stage5"
2074 | type: "Concat"
2075 | bottom: "Mconv7_stage4_L1"
2076 | bottom: "Mconv7_stage4_L2"
2077 | bottom: "conv4_4_CPM"
2078 | top: "concat_stage5"
2079 | concat_param {
2080 | axis: 1
2081 | }
2082 | }
2083 | layer {
2084 | name: "Mconv1_stage5_L1"
2085 | type: "Convolution"
2086 | bottom: "concat_stage5"
2087 | top: "Mconv1_stage5_L1"
2088 | param {
2089 | lr_mult: 4.0
2090 | decay_mult: 1
2091 | }
2092 | param {
2093 | lr_mult: 8.0
2094 | decay_mult: 0
2095 | }
2096 | convolution_param {
2097 | num_output: 128
2098 | pad: 3
2099 | kernel_size: 7
2100 | weight_filler {
2101 | type: "gaussian"
2102 | std: 0.01
2103 | }
2104 | bias_filler {
2105 | type: "constant"
2106 | }
2107 | }
2108 | }
2109 | layer {
2110 | name: "Mrelu1_stage5_L1"
2111 | type: "ReLU"
2112 | bottom: "Mconv1_stage5_L1"
2113 | top: "Mconv1_stage5_L1"
2114 | }
2115 | layer {
2116 | name: "Mconv1_stage5_L2"
2117 | type: "Convolution"
2118 | bottom: "concat_stage5"
2119 | top: "Mconv1_stage5_L2"
2120 | param {
2121 | lr_mult: 4.0
2122 | decay_mult: 1
2123 | }
2124 | param {
2125 | lr_mult: 8.0
2126 | decay_mult: 0
2127 | }
2128 | convolution_param {
2129 | num_output: 128
2130 | pad: 3
2131 | kernel_size: 7
2132 | weight_filler {
2133 | type: "gaussian"
2134 | std: 0.01
2135 | }
2136 | bias_filler {
2137 | type: "constant"
2138 | }
2139 | }
2140 | }
2141 | layer {
2142 | name: "Mrelu1_stage5_L2"
2143 | type: "ReLU"
2144 | bottom: "Mconv1_stage5_L2"
2145 | top: "Mconv1_stage5_L2"
2146 | }
2147 | layer {
2148 | name: "Mconv2_stage5_L1"
2149 | type: "Convolution"
2150 | bottom: "Mconv1_stage5_L1"
2151 | top: "Mconv2_stage5_L1"
2152 | param {
2153 | lr_mult: 4.0
2154 | decay_mult: 1
2155 | }
2156 | param {
2157 | lr_mult: 8.0
2158 | decay_mult: 0
2159 | }
2160 | convolution_param {
2161 | num_output: 128
2162 | pad: 3
2163 | kernel_size: 7
2164 | weight_filler {
2165 | type: "gaussian"
2166 | std: 0.01
2167 | }
2168 | bias_filler {
2169 | type: "constant"
2170 | }
2171 | }
2172 | }
2173 | layer {
2174 | name: "Mrelu2_stage5_L1"
2175 | type: "ReLU"
2176 | bottom: "Mconv2_stage5_L1"
2177 | top: "Mconv2_stage5_L1"
2178 | }
2179 | layer {
2180 | name: "Mconv2_stage5_L2"
2181 | type: "Convolution"
2182 | bottom: "Mconv1_stage5_L2"
2183 | top: "Mconv2_stage5_L2"
2184 | param {
2185 | lr_mult: 4.0
2186 | decay_mult: 1
2187 | }
2188 | param {
2189 | lr_mult: 8.0
2190 | decay_mult: 0
2191 | }
2192 | convolution_param {
2193 | num_output: 128
2194 | pad: 3
2195 | kernel_size: 7
2196 | weight_filler {
2197 | type: "gaussian"
2198 | std: 0.01
2199 | }
2200 | bias_filler {
2201 | type: "constant"
2202 | }
2203 | }
2204 | }
2205 | layer {
2206 | name: "Mrelu2_stage5_L2"
2207 | type: "ReLU"
2208 | bottom: "Mconv2_stage5_L2"
2209 | top: "Mconv2_stage5_L2"
2210 | }
2211 | layer {
2212 | name: "Mconv3_stage5_L1"
2213 | type: "Convolution"
2214 | bottom: "Mconv2_stage5_L1"
2215 | top: "Mconv3_stage5_L1"
2216 | param {
2217 | lr_mult: 4.0
2218 | decay_mult: 1
2219 | }
2220 | param {
2221 | lr_mult: 8.0
2222 | decay_mult: 0
2223 | }
2224 | convolution_param {
2225 | num_output: 128
2226 | pad: 3
2227 | kernel_size: 7
2228 | weight_filler {
2229 | type: "gaussian"
2230 | std: 0.01
2231 | }
2232 | bias_filler {
2233 | type: "constant"
2234 | }
2235 | }
2236 | }
2237 | layer {
2238 | name: "Mrelu3_stage5_L1"
2239 | type: "ReLU"
2240 | bottom: "Mconv3_stage5_L1"
2241 | top: "Mconv3_stage5_L1"
2242 | }
2243 | layer {
2244 | name: "Mconv3_stage5_L2"
2245 | type: "Convolution"
2246 | bottom: "Mconv2_stage5_L2"
2247 | top: "Mconv3_stage5_L2"
2248 | param {
2249 | lr_mult: 4.0
2250 | decay_mult: 1
2251 | }
2252 | param {
2253 | lr_mult: 8.0
2254 | decay_mult: 0
2255 | }
2256 | convolution_param {
2257 | num_output: 128
2258 | pad: 3
2259 | kernel_size: 7
2260 | weight_filler {
2261 | type: "gaussian"
2262 | std: 0.01
2263 | }
2264 | bias_filler {
2265 | type: "constant"
2266 | }
2267 | }
2268 | }
2269 | layer {
2270 | name: "Mrelu3_stage5_L2"
2271 | type: "ReLU"
2272 | bottom: "Mconv3_stage5_L2"
2273 | top: "Mconv3_stage5_L2"
2274 | }
2275 | layer {
2276 | name: "Mconv4_stage5_L1"
2277 | type: "Convolution"
2278 | bottom: "Mconv3_stage5_L1"
2279 | top: "Mconv4_stage5_L1"
2280 | param {
2281 | lr_mult: 4.0
2282 | decay_mult: 1
2283 | }
2284 | param {
2285 | lr_mult: 8.0
2286 | decay_mult: 0
2287 | }
2288 | convolution_param {
2289 | num_output: 128
2290 | pad: 3
2291 | kernel_size: 7
2292 | weight_filler {
2293 | type: "gaussian"
2294 | std: 0.01
2295 | }
2296 | bias_filler {
2297 | type: "constant"
2298 | }
2299 | }
2300 | }
2301 | layer {
2302 | name: "Mrelu4_stage5_L1"
2303 | type: "ReLU"
2304 | bottom: "Mconv4_stage5_L1"
2305 | top: "Mconv4_stage5_L1"
2306 | }
2307 | layer {
2308 | name: "Mconv4_stage5_L2"
2309 | type: "Convolution"
2310 | bottom: "Mconv3_stage5_L2"
2311 | top: "Mconv4_stage5_L2"
2312 | param {
2313 | lr_mult: 4.0
2314 | decay_mult: 1
2315 | }
2316 | param {
2317 | lr_mult: 8.0
2318 | decay_mult: 0
2319 | }
2320 | convolution_param {
2321 | num_output: 128
2322 | pad: 3
2323 | kernel_size: 7
2324 | weight_filler {
2325 | type: "gaussian"
2326 | std: 0.01
2327 | }
2328 | bias_filler {
2329 | type: "constant"
2330 | }
2331 | }
2332 | }
2333 | layer {
2334 | name: "Mrelu4_stage5_L2"
2335 | type: "ReLU"
2336 | bottom: "Mconv4_stage5_L2"
2337 | top: "Mconv4_stage5_L2"
2338 | }
2339 | layer {
2340 | name: "Mconv5_stage5_L1"
2341 | type: "Convolution"
2342 | bottom: "Mconv4_stage5_L1"
2343 | top: "Mconv5_stage5_L1"
2344 | param {
2345 | lr_mult: 4.0
2346 | decay_mult: 1
2347 | }
2348 | param {
2349 | lr_mult: 8.0
2350 | decay_mult: 0
2351 | }
2352 | convolution_param {
2353 | num_output: 128
2354 | pad: 3
2355 | kernel_size: 7
2356 | weight_filler {
2357 | type: "gaussian"
2358 | std: 0.01
2359 | }
2360 | bias_filler {
2361 | type: "constant"
2362 | }
2363 | }
2364 | }
2365 | layer {
2366 | name: "Mrelu5_stage5_L1"
2367 | type: "ReLU"
2368 | bottom: "Mconv5_stage5_L1"
2369 | top: "Mconv5_stage5_L1"
2370 | }
2371 | layer {
2372 | name: "Mconv5_stage5_L2"
2373 | type: "Convolution"
2374 | bottom: "Mconv4_stage5_L2"
2375 | top: "Mconv5_stage5_L2"
2376 | param {
2377 | lr_mult: 4.0
2378 | decay_mult: 1
2379 | }
2380 | param {
2381 | lr_mult: 8.0
2382 | decay_mult: 0
2383 | }
2384 | convolution_param {
2385 | num_output: 128
2386 | pad: 3
2387 | kernel_size: 7
2388 | weight_filler {
2389 | type: "gaussian"
2390 | std: 0.01
2391 | }
2392 | bias_filler {
2393 | type: "constant"
2394 | }
2395 | }
2396 | }
2397 | layer {
2398 | name: "Mrelu5_stage5_L2"
2399 | type: "ReLU"
2400 | bottom: "Mconv5_stage5_L2"
2401 | top: "Mconv5_stage5_L2"
2402 | }
2403 | layer {
2404 | name: "Mconv6_stage5_L1"
2405 | type: "Convolution"
2406 | bottom: "Mconv5_stage5_L1"
2407 | top: "Mconv6_stage5_L1"
2408 | param {
2409 | lr_mult: 4.0
2410 | decay_mult: 1
2411 | }
2412 | param {
2413 | lr_mult: 8.0
2414 | decay_mult: 0
2415 | }
2416 | convolution_param {
2417 | num_output: 128
2418 | pad: 0
2419 | kernel_size: 1
2420 | weight_filler {
2421 | type: "gaussian"
2422 | std: 0.01
2423 | }
2424 | bias_filler {
2425 | type: "constant"
2426 | }
2427 | }
2428 | }
2429 | layer {
2430 | name: "Mrelu6_stage5_L1"
2431 | type: "ReLU"
2432 | bottom: "Mconv6_stage5_L1"
2433 | top: "Mconv6_stage5_L1"
2434 | }
2435 | layer {
2436 | name: "Mconv6_stage5_L2"
2437 | type: "Convolution"
2438 | bottom: "Mconv5_stage5_L2"
2439 | top: "Mconv6_stage5_L2"
2440 | param {
2441 | lr_mult: 4.0
2442 | decay_mult: 1
2443 | }
2444 | param {
2445 | lr_mult: 8.0
2446 | decay_mult: 0
2447 | }
2448 | convolution_param {
2449 | num_output: 128
2450 | pad: 0
2451 | kernel_size: 1
2452 | weight_filler {
2453 | type: "gaussian"
2454 | std: 0.01
2455 | }
2456 | bias_filler {
2457 | type: "constant"
2458 | }
2459 | }
2460 | }
2461 | layer {
2462 | name: "Mrelu6_stage5_L2"
2463 | type: "ReLU"
2464 | bottom: "Mconv6_stage5_L2"
2465 | top: "Mconv6_stage5_L2"
2466 | }
2467 | layer {
2468 | name: "Mconv7_stage5_L1"
2469 | type: "Convolution"
2470 | bottom: "Mconv6_stage5_L1"
2471 | top: "Mconv7_stage5_L1"
2472 | param {
2473 | lr_mult: 4.0
2474 | decay_mult: 1
2475 | }
2476 | param {
2477 | lr_mult: 8.0
2478 | decay_mult: 0
2479 | }
2480 | convolution_param {
2481 | num_output: 38
2482 | pad: 0
2483 | kernel_size: 1
2484 | weight_filler {
2485 | type: "gaussian"
2486 | std: 0.01
2487 | }
2488 | bias_filler {
2489 | type: "constant"
2490 | }
2491 | }
2492 | }
2493 | layer {
2494 | name: "Mconv7_stage5_L2"
2495 | type: "Convolution"
2496 | bottom: "Mconv6_stage5_L2"
2497 | top: "Mconv7_stage5_L2"
2498 | param {
2499 | lr_mult: 4.0
2500 | decay_mult: 1
2501 | }
2502 | param {
2503 | lr_mult: 8.0
2504 | decay_mult: 0
2505 | }
2506 | convolution_param {
2507 | num_output: 19
2508 | pad: 0
2509 | kernel_size: 1
2510 | weight_filler {
2511 | type: "gaussian"
2512 | std: 0.01
2513 | }
2514 | bias_filler {
2515 | type: "constant"
2516 | }
2517 | }
2518 | }
2519 | layer {
2520 | name: "concat_stage6"
2521 | type: "Concat"
2522 | bottom: "Mconv7_stage5_L1"
2523 | bottom: "Mconv7_stage5_L2"
2524 | bottom: "conv4_4_CPM"
2525 | top: "concat_stage6"
2526 | concat_param {
2527 | axis: 1
2528 | }
2529 | }
2530 | layer {
2531 | name: "Mconv1_stage6_L1"
2532 | type: "Convolution"
2533 | bottom: "concat_stage6"
2534 | top: "Mconv1_stage6_L1"
2535 | param {
2536 | lr_mult: 4.0
2537 | decay_mult: 1
2538 | }
2539 | param {
2540 | lr_mult: 8.0
2541 | decay_mult: 0
2542 | }
2543 | convolution_param {
2544 | num_output: 128
2545 | pad: 3
2546 | kernel_size: 7
2547 | weight_filler {
2548 | type: "gaussian"
2549 | std: 0.01
2550 | }
2551 | bias_filler {
2552 | type: "constant"
2553 | }
2554 | }
2555 | }
2556 | layer {
2557 | name: "Mrelu1_stage6_L1"
2558 | type: "ReLU"
2559 | bottom: "Mconv1_stage6_L1"
2560 | top: "Mconv1_stage6_L1"
2561 | }
2562 | layer {
2563 | name: "Mconv1_stage6_L2"
2564 | type: "Convolution"
2565 | bottom: "concat_stage6"
2566 | top: "Mconv1_stage6_L2"
2567 | param {
2568 | lr_mult: 4.0
2569 | decay_mult: 1
2570 | }
2571 | param {
2572 | lr_mult: 8.0
2573 | decay_mult: 0
2574 | }
2575 | convolution_param {
2576 | num_output: 128
2577 | pad: 3
2578 | kernel_size: 7
2579 | weight_filler {
2580 | type: "gaussian"
2581 | std: 0.01
2582 | }
2583 | bias_filler {
2584 | type: "constant"
2585 | }
2586 | }
2587 | }
2588 | layer {
2589 | name: "Mrelu1_stage6_L2"
2590 | type: "ReLU"
2591 | bottom: "Mconv1_stage6_L2"
2592 | top: "Mconv1_stage6_L2"
2593 | }
2594 | layer {
2595 | name: "Mconv2_stage6_L1"
2596 | type: "Convolution"
2597 | bottom: "Mconv1_stage6_L1"
2598 | top: "Mconv2_stage6_L1"
2599 | param {
2600 | lr_mult: 4.0
2601 | decay_mult: 1
2602 | }
2603 | param {
2604 | lr_mult: 8.0
2605 | decay_mult: 0
2606 | }
2607 | convolution_param {
2608 | num_output: 128
2609 | pad: 3
2610 | kernel_size: 7
2611 | weight_filler {
2612 | type: "gaussian"
2613 | std: 0.01
2614 | }
2615 | bias_filler {
2616 | type: "constant"
2617 | }
2618 | }
2619 | }
2620 | layer {
2621 | name: "Mrelu2_stage6_L1"
2622 | type: "ReLU"
2623 | bottom: "Mconv2_stage6_L1"
2624 | top: "Mconv2_stage6_L1"
2625 | }
2626 | layer {
2627 | name: "Mconv2_stage6_L2"
2628 | type: "Convolution"
2629 | bottom: "Mconv1_stage6_L2"
2630 | top: "Mconv2_stage6_L2"
2631 | param {
2632 | lr_mult: 4.0
2633 | decay_mult: 1
2634 | }
2635 | param {
2636 | lr_mult: 8.0
2637 | decay_mult: 0
2638 | }
2639 | convolution_param {
2640 | num_output: 128
2641 | pad: 3
2642 | kernel_size: 7
2643 | weight_filler {
2644 | type: "gaussian"
2645 | std: 0.01
2646 | }
2647 | bias_filler {
2648 | type: "constant"
2649 | }
2650 | }
2651 | }
2652 | layer {
2653 | name: "Mrelu2_stage6_L2"
2654 | type: "ReLU"
2655 | bottom: "Mconv2_stage6_L2"
2656 | top: "Mconv2_stage6_L2"
2657 | }
2658 | layer {
2659 | name: "Mconv3_stage6_L1"
2660 | type: "Convolution"
2661 | bottom: "Mconv2_stage6_L1"
2662 | top: "Mconv3_stage6_L1"
2663 | param {
2664 | lr_mult: 4.0
2665 | decay_mult: 1
2666 | }
2667 | param {
2668 | lr_mult: 8.0
2669 | decay_mult: 0
2670 | }
2671 | convolution_param {
2672 | num_output: 128
2673 | pad: 3
2674 | kernel_size: 7
2675 | weight_filler {
2676 | type: "gaussian"
2677 | std: 0.01
2678 | }
2679 | bias_filler {
2680 | type: "constant"
2681 | }
2682 | }
2683 | }
2684 | layer {
2685 | name: "Mrelu3_stage6_L1"
2686 | type: "ReLU"
2687 | bottom: "Mconv3_stage6_L1"
2688 | top: "Mconv3_stage6_L1"
2689 | }
2690 | layer {
2691 | name: "Mconv3_stage6_L2"
2692 | type: "Convolution"
2693 | bottom: "Mconv2_stage6_L2"
2694 | top: "Mconv3_stage6_L2"
2695 | param {
2696 | lr_mult: 4.0
2697 | decay_mult: 1
2698 | }
2699 | param {
2700 | lr_mult: 8.0
2701 | decay_mult: 0
2702 | }
2703 | convolution_param {
2704 | num_output: 128
2705 | pad: 3
2706 | kernel_size: 7
2707 | weight_filler {
2708 | type: "gaussian"
2709 | std: 0.01
2710 | }
2711 | bias_filler {
2712 | type: "constant"
2713 | }
2714 | }
2715 | }
2716 | layer {
2717 | name: "Mrelu3_stage6_L2"
2718 | type: "ReLU"
2719 | bottom: "Mconv3_stage6_L2"
2720 | top: "Mconv3_stage6_L2"
2721 | }
2722 | layer {
2723 | name: "Mconv4_stage6_L1"
2724 | type: "Convolution"
2725 | bottom: "Mconv3_stage6_L1"
2726 | top: "Mconv4_stage6_L1"
2727 | param {
2728 | lr_mult: 4.0
2729 | decay_mult: 1
2730 | }
2731 | param {
2732 | lr_mult: 8.0
2733 | decay_mult: 0
2734 | }
2735 | convolution_param {
2736 | num_output: 128
2737 | pad: 3
2738 | kernel_size: 7
2739 | weight_filler {
2740 | type: "gaussian"
2741 | std: 0.01
2742 | }
2743 | bias_filler {
2744 | type: "constant"
2745 | }
2746 | }
2747 | }
2748 | layer {
2749 | name: "Mrelu4_stage6_L1"
2750 | type: "ReLU"
2751 | bottom: "Mconv4_stage6_L1"
2752 | top: "Mconv4_stage6_L1"
2753 | }
2754 | layer {
2755 | name: "Mconv4_stage6_L2"
2756 | type: "Convolution"
2757 | bottom: "Mconv3_stage6_L2"
2758 | top: "Mconv4_stage6_L2"
2759 | param {
2760 | lr_mult: 4.0
2761 | decay_mult: 1
2762 | }
2763 | param {
2764 | lr_mult: 8.0
2765 | decay_mult: 0
2766 | }
2767 | convolution_param {
2768 | num_output: 128
2769 | pad: 3
2770 | kernel_size: 7
2771 | weight_filler {
2772 | type: "gaussian"
2773 | std: 0.01
2774 | }
2775 | bias_filler {
2776 | type: "constant"
2777 | }
2778 | }
2779 | }
2780 | layer {
2781 | name: "Mrelu4_stage6_L2"
2782 | type: "ReLU"
2783 | bottom: "Mconv4_stage6_L2"
2784 | top: "Mconv4_stage6_L2"
2785 | }
2786 | layer {
2787 | name: "Mconv5_stage6_L1"
2788 | type: "Convolution"
2789 | bottom: "Mconv4_stage6_L1"
2790 | top: "Mconv5_stage6_L1"
2791 | param {
2792 | lr_mult: 4.0
2793 | decay_mult: 1
2794 | }
2795 | param {
2796 | lr_mult: 8.0
2797 | decay_mult: 0
2798 | }
2799 | convolution_param {
2800 | num_output: 128
2801 | pad: 3
2802 | kernel_size: 7
2803 | weight_filler {
2804 | type: "gaussian"
2805 | std: 0.01
2806 | }
2807 | bias_filler {
2808 | type: "constant"
2809 | }
2810 | }
2811 | }
2812 | layer {
2813 | name: "Mrelu5_stage6_L1"
2814 | type: "ReLU"
2815 | bottom: "Mconv5_stage6_L1"
2816 | top: "Mconv5_stage6_L1"
2817 | }
2818 | layer {
2819 | name: "Mconv5_stage6_L2"
2820 | type: "Convolution"
2821 | bottom: "Mconv4_stage6_L2"
2822 | top: "Mconv5_stage6_L2"
2823 | param {
2824 | lr_mult: 4.0
2825 | decay_mult: 1
2826 | }
2827 | param {
2828 | lr_mult: 8.0
2829 | decay_mult: 0
2830 | }
2831 | convolution_param {
2832 | num_output: 128
2833 | pad: 3
2834 | kernel_size: 7
2835 | weight_filler {
2836 | type: "gaussian"
2837 | std: 0.01
2838 | }
2839 | bias_filler {
2840 | type: "constant"
2841 | }
2842 | }
2843 | }
2844 | layer {
2845 | name: "Mrelu5_stage6_L2"
2846 | type: "ReLU"
2847 | bottom: "Mconv5_stage6_L2"
2848 | top: "Mconv5_stage6_L2"
2849 | }
2850 | layer {
2851 | name: "Mconv6_stage6_L1"
2852 | type: "Convolution"
2853 | bottom: "Mconv5_stage6_L1"
2854 | top: "Mconv6_stage6_L1"
2855 | param {
2856 | lr_mult: 4.0
2857 | decay_mult: 1
2858 | }
2859 | param {
2860 | lr_mult: 8.0
2861 | decay_mult: 0
2862 | }
2863 | convolution_param {
2864 | num_output: 128
2865 | pad: 0
2866 | kernel_size: 1
2867 | weight_filler {
2868 | type: "gaussian"
2869 | std: 0.01
2870 | }
2871 | bias_filler {
2872 | type: "constant"
2873 | }
2874 | }
2875 | }
2876 | layer {
2877 | name: "Mrelu6_stage6_L1"
2878 | type: "ReLU"
2879 | bottom: "Mconv6_stage6_L1"
2880 | top: "Mconv6_stage6_L1"
2881 | }
2882 | layer {
2883 | name: "Mconv6_stage6_L2"
2884 | type: "Convolution"
2885 | bottom: "Mconv5_stage6_L2"
2886 | top: "Mconv6_stage6_L2"
2887 | param {
2888 | lr_mult: 4.0
2889 | decay_mult: 1
2890 | }
2891 | param {
2892 | lr_mult: 8.0
2893 | decay_mult: 0
2894 | }
2895 | convolution_param {
2896 | num_output: 128
2897 | pad: 0
2898 | kernel_size: 1
2899 | weight_filler {
2900 | type: "gaussian"
2901 | std: 0.01
2902 | }
2903 | bias_filler {
2904 | type: "constant"
2905 | }
2906 | }
2907 | }
2908 | layer {
2909 | name: "Mrelu6_stage6_L2"
2910 | type: "ReLU"
2911 | bottom: "Mconv6_stage6_L2"
2912 | top: "Mconv6_stage6_L2"
2913 | }
2914 | layer {
2915 | name: "Mconv7_stage6_L1"
2916 | type: "Convolution"
2917 | bottom: "Mconv6_stage6_L1"
2918 | top: "Mconv7_stage6_L1"
2919 | param {
2920 | lr_mult: 4.0
2921 | decay_mult: 1
2922 | }
2923 | param {
2924 | lr_mult: 8.0
2925 | decay_mult: 0
2926 | }
2927 | convolution_param {
2928 | num_output: 38
2929 | pad: 0
2930 | kernel_size: 1
2931 | weight_filler {
2932 | type: "gaussian"
2933 | std: 0.01
2934 | }
2935 | bias_filler {
2936 | type: "constant"
2937 | }
2938 | }
2939 | }
2940 | layer {
2941 | name: "Mconv7_stage6_L2"
2942 | type: "Convolution"
2943 | bottom: "Mconv6_stage6_L2"
2944 | top: "Mconv7_stage6_L2"
2945 | param {
2946 | lr_mult: 4.0
2947 | decay_mult: 1
2948 | }
2949 | param {
2950 | lr_mult: 8.0
2951 | decay_mult: 0
2952 | }
2953 | convolution_param {
2954 | num_output: 19
2955 | pad: 0
2956 | kernel_size: 1
2957 | weight_filler {
2958 | type: "gaussian"
2959 | std: 0.01
2960 | }
2961 | bias_filler {
2962 | type: "constant"
2963 | }
2964 | }
2965 | }
2966 | layer {
2967 | name: "concat_stage7"
2968 | type: "Concat"
2969 | bottom: "Mconv7_stage6_L2"
2970 | bottom: "Mconv7_stage6_L1"
2971 | # top: "concat_stage7"
2972 | top: "net_output"
2973 | concat_param {
2974 | axis: 1
2975 | }
2976 | }
2977 |
--------------------------------------------------------------------------------
/net/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils
--------------------------------------------------------------------------------
/net/st_gcn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 |
6 | from net.utils.tgcn import ConvTemporalGraphical
7 | from net.utils.graph import Graph
8 |
9 | class Model(nn.Module):
10 | r"""Spatial temporal graph convolutional networks.
11 |
12 | Args:
13 | in_channels (int): Number of channels in the input data
14 | num_class (int): Number of classes for the classification task
15 | graph_args (dict): The arguments for building the graph
16 | edge_importance_weighting (bool): If ``True``, adds a learnable
17 | importance weighting to the edges of the graph
18 | **kwargs (optional): Other parameters for graph convolution units
19 |
20 | Shape:
21 | - Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`
22 | - Output: :math:`(N, num_class)` where
23 | :math:`N` is a batch size,
24 | :math:`T_{in}` is a length of input sequence,
25 | :math:`V_{in}` is the number of graph nodes,
26 | :math:`M_{in}` is the number of instance in a frame.
27 | """
28 |
29 | def __init__(self, in_channels, num_class, graph_args,
30 | edge_importance_weighting, **kwargs):
31 | super().__init__()
32 |
33 | # load graph
34 | self.graph = Graph(**graph_args)
35 | A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False)
36 | self.register_buffer('A', A)
37 |
38 | # build networks
39 | spatial_kernel_size = A.size(0)
40 | temporal_kernel_size = 9
41 | kernel_size = (temporal_kernel_size, spatial_kernel_size)
42 | self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
43 | kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
44 | self.st_gcn_networks = nn.ModuleList((
45 | st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
46 | st_gcn(64, 64, kernel_size, 1, **kwargs),
47 | st_gcn(64, 64, kernel_size, 1, **kwargs),
48 | st_gcn(64, 64, kernel_size, 1, **kwargs),
49 | st_gcn(64, 128, kernel_size, 2, **kwargs),
50 | st_gcn(128, 128, kernel_size, 1, **kwargs),
51 | st_gcn(128, 128, kernel_size, 1, **kwargs),
52 | st_gcn(128, 256, kernel_size, 2, **kwargs),
53 | st_gcn(256, 256, kernel_size, 1, **kwargs),
54 | st_gcn(256, 256, kernel_size, 1, **kwargs),
55 | ))
56 |
57 | # initialize parameters for edge importance weighting
58 | if edge_importance_weighting:
59 | self.edge_importance = nn.ParameterList([
60 | nn.Parameter(torch.ones(self.A.size()))
61 | for i in self.st_gcn_networks
62 | ])
63 | else:
64 | self.edge_importance = [1] * len(self.st_gcn_networks)
65 |
66 | # fcn for prediction
67 | self.fcn = nn.Conv2d(256, num_class, kernel_size=1)
68 |
69 | def forward(self, x):
70 |
71 | # data normalization
72 | N, C, T, V, M = x.size()
73 | x = x.permute(0, 4, 3, 1, 2).contiguous()
74 | x = x.view(N * M, V * C, T)
75 | x = self.data_bn(x)
76 | x = x.view(N, M, V, C, T)
77 | x = x.permute(0, 1, 3, 4, 2).contiguous()
78 | x = x.view(N * M, C, T, V)
79 |
80 | # forwad
81 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
82 | x, _ = gcn(x, self.A * importance)
83 |
84 | # global pooling
85 | x = F.avg_pool2d(x, x.size()[2:])
86 | x = x.view(N, M, -1, 1, 1).mean(dim=1)
87 |
88 | # prediction
89 | x = self.fcn(x)
90 | x = x.view(x.size(0), -1)
91 |
92 | return x
93 |
94 | def extract_feature(self, x):
95 |
96 | # data normalization
97 | N, C, T, V, M = x.size()
98 | x = x.permute(0, 4, 3, 1, 2).contiguous()
99 | x = x.view(N * M, V * C, T)
100 | x = self.data_bn(x)
101 | x = x.view(N, M, V, C, T)
102 | x = x.permute(0, 1, 3, 4, 2).contiguous()
103 | x = x.view(N * M, C, T, V)
104 |
105 | # forwad
106 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
107 | x, _ = gcn(x, self.A * importance)
108 |
109 | _, c, t, v = x.size()
110 | feature = x.view(N, M, c, t, v).permute(0, 2, 3, 4, 1)
111 |
112 | # prediction
113 | x = self.fcn(x)
114 | output = x.view(N, M, -1, t, v).permute(0, 2, 3, 4, 1)
115 |
116 | return output, feature
117 |
118 | class st_gcn(nn.Module):
119 | r"""Applies a spatial temporal graph convolution over an input graph sequence.
120 |
121 | Args:
122 | in_channels (int): Number of channels in the input sequence data
123 | out_channels (int): Number of channels produced by the convolution
124 | kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
125 | stride (int, optional): Stride of the temporal convolution. Default: 1
126 | dropout (int, optional): Dropout rate of the final output. Default: 0
127 | residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
128 |
129 | Shape:
130 | - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
131 | - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
132 | - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
133 | - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
134 |
135 | where
136 | :math:`N` is a batch size,
137 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
138 | :math:`T_{in}/T_{out}` is a length of input/output sequence,
139 | :math:`V` is the number of graph nodes.
140 |
141 | """
142 |
143 | def __init__(self,
144 | in_channels,
145 | out_channels,
146 | kernel_size,
147 | stride=1,
148 | dropout=0,
149 | residual=True):
150 | super().__init__()
151 |
152 | assert len(kernel_size) == 2
153 | assert kernel_size[0] % 2 == 1
154 | padding = ((kernel_size[0] - 1) // 2, 0)
155 |
156 | self.gcn = ConvTemporalGraphical(in_channels, out_channels,
157 | kernel_size[1])
158 |
159 | self.tcn = nn.Sequential(
160 | nn.BatchNorm2d(out_channels),
161 | nn.ReLU(inplace=True),
162 | nn.Conv2d(
163 | out_channels,
164 | out_channels,
165 | (kernel_size[0], 1),
166 | (stride, 1),
167 | padding,
168 | ),
169 | nn.BatchNorm2d(out_channels),
170 | nn.Dropout(dropout, inplace=True),
171 | )
172 |
173 | if not residual:
174 | self.residual = lambda x: 0
175 |
176 | elif (in_channels == out_channels) and (stride == 1):
177 | self.residual = lambda x: x
178 |
179 | else:
180 | self.residual = nn.Sequential(
181 | nn.Conv2d(
182 | in_channels,
183 | out_channels,
184 | kernel_size=1,
185 | stride=(stride, 1)),
186 | nn.BatchNorm2d(out_channels),
187 | )
188 |
189 | self.relu = nn.ReLU(inplace=True)
190 |
191 | def forward(self, x, A):
192 |
193 | res = self.residual(x)
194 | x, A = self.gcn(x, A)
195 | x = self.tcn(x) + res
196 |
197 | return self.relu(x), A
--------------------------------------------------------------------------------
/net/st_gcn_twostream.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.autograd import Variable
5 |
6 | from net.utils.tgcn import ConvTemporalGraphical
7 | from net.utils.graph import Graph
8 |
9 | from .st_gcn import Model as ST_GCN
10 |
11 | class Model(nn.Module):
12 |
13 | def __init__(self, *args, **kwargs):
14 | super().__init__()
15 |
16 | self.origin_stream = ST_GCN(*args, **kwargs)
17 | self.motion_stream = ST_GCN(*args, **kwargs)
18 |
19 | def forward(self, x):
20 | N, C, T, V, M = x.size()
21 | m = torch.cat((torch.cuda.FloatTensor(N, C, 1, V, M).zero_(),
22 | x[:, :, 1:-1] - 0.5 * x[:, :, 2:] - 0.5 * x[:, :, :-2],
23 | torch.cuda.FloatTensor(N, C, 1, V, M).zero_()), 2)
24 |
25 | res = self.origin_stream(x) + self.motion_stream(m)
26 | return res
--------------------------------------------------------------------------------
/net/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/net/utils/__init__.py
--------------------------------------------------------------------------------
/net/utils/graph.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | class Graph():
4 | """ The Graph to model the skeletons extracted by the openpose
5 |
6 | Args:
7 | strategy (string): must be one of the follow candidates
8 | - uniform: Uniform Labeling
9 | - distance: Distance Partitioning
10 | - spatial: Spatial Configuration
11 | For more information, please refer to the section 'Partition Strategies'
12 | in our paper (https://arxiv.org/abs/1801.07455).
13 |
14 | layout (string): must be one of the follow candidates
15 | - openpose: Is consists of 18 joints. For more information, please
16 | refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose#output
17 | - ntu-rgb+d: Is consists of 25 joints. For more information, please
18 | refer to https://github.com/shahroudy/NTURGB-D
19 |
20 | max_hop (int): the maximal distance between two connected nodes
21 | dilation (int): controls the spacing between the kernel points
22 |
23 | """
24 |
25 | def __init__(self,
26 | layout='openpose',
27 | strategy='uniform',
28 | max_hop=1,
29 | dilation=1):
30 | self.max_hop = max_hop
31 | self.dilation = dilation
32 |
33 | self.get_edge(layout)
34 | self.hop_dis = get_hop_distance(
35 | self.num_node, self.edge, max_hop=max_hop)
36 | self.get_adjacency(strategy)
37 |
38 | def __str__(self):
39 | return self.A
40 |
41 | def get_edge(self, layout):
42 | if layout == 'openpose':
43 | self.num_node = 18
44 | self_link = [(i, i) for i in range(self.num_node)]
45 | neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12,
46 | 11),
47 | (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1),
48 | (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)]
49 | self.edge = self_link + neighbor_link
50 | self.center = 1
51 | elif layout == 'ntu-rgb+d':
52 | self.num_node = 25
53 | self_link = [(i, i) for i in range(self.num_node)]
54 | neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21),
55 | (6, 5), (7, 6), (8, 7), (9, 21), (10, 9),
56 | (11, 10), (12, 11), (13, 1), (14, 13), (15, 14),
57 | (16, 15), (17, 1), (18, 17), (19, 18), (20, 19),
58 | (22, 23), (23, 8), (24, 25), (25, 12)]
59 | neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
60 | self.edge = self_link + neighbor_link
61 | self.center = 21 - 1
62 | elif layout == 'ntu_edge':
63 | self.num_node = 24
64 | self_link = [(i, i) for i in range(self.num_node)]
65 | neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6),
66 | (8, 7), (9, 2), (10, 9), (11, 10), (12, 11),
67 | (13, 1), (14, 13), (15, 14), (16, 15), (17, 1),
68 | (18, 17), (19, 18), (20, 19), (21, 22), (22, 8),
69 | (23, 24), (24, 12)]
70 | neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
71 | self.edge = self_link + neighbor_link
72 | self.center = 2
73 | # elif layout=='customer settings'
74 | # pass
75 | else:
76 | raise ValueError("Do Not Exist This Layout.")
77 |
78 | def get_adjacency(self, strategy):
79 | valid_hop = range(0, self.max_hop + 1, self.dilation)
80 | adjacency = np.zeros((self.num_node, self.num_node))
81 | for hop in valid_hop:
82 | adjacency[self.hop_dis == hop] = 1
83 | normalize_adjacency = normalize_digraph(adjacency)
84 |
85 | if strategy == 'uniform':
86 | A = np.zeros((1, self.num_node, self.num_node))
87 | A[0] = normalize_adjacency
88 | self.A = A
89 | elif strategy == 'distance':
90 | A = np.zeros((len(valid_hop), self.num_node, self.num_node))
91 | for i, hop in enumerate(valid_hop):
92 | A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
93 | hop]
94 | self.A = A
95 | elif strategy == 'spatial':
96 | A = []
97 | for hop in valid_hop:
98 | a_root = np.zeros((self.num_node, self.num_node))
99 | a_close = np.zeros((self.num_node, self.num_node))
100 | a_further = np.zeros((self.num_node, self.num_node))
101 | for i in range(self.num_node):
102 | for j in range(self.num_node):
103 | if self.hop_dis[j, i] == hop:
104 | if self.hop_dis[j, self.center] == self.hop_dis[
105 | i, self.center]:
106 | a_root[j, i] = normalize_adjacency[j, i]
107 | elif self.hop_dis[j, self.
108 | center] > self.hop_dis[i, self.
109 | center]:
110 | a_close[j, i] = normalize_adjacency[j, i]
111 | else:
112 | a_further[j, i] = normalize_adjacency[j, i]
113 | if hop == 0:
114 | A.append(a_root)
115 | else:
116 | A.append(a_root + a_close)
117 | A.append(a_further)
118 | A = np.stack(A)
119 | self.A = A
120 | else:
121 | raise ValueError("Do Not Exist This Strategy")
122 |
123 |
124 | def get_hop_distance(num_node, edge, max_hop=1):
125 | A = np.zeros((num_node, num_node))
126 | for i, j in edge:
127 | A[j, i] = 1
128 | A[i, j] = 1
129 |
130 | # compute hop steps
131 | hop_dis = np.zeros((num_node, num_node)) + np.inf
132 | transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
133 | arrive_mat = (np.stack(transfer_mat) > 0)
134 | for d in range(max_hop, -1, -1):
135 | hop_dis[arrive_mat[d]] = d
136 | return hop_dis
137 |
138 |
139 | def normalize_digraph(A):
140 | Dl = np.sum(A, 0)
141 | num_node = A.shape[0]
142 | Dn = np.zeros((num_node, num_node))
143 | for i in range(num_node):
144 | if Dl[i] > 0:
145 | Dn[i, i] = Dl[i]**(-1)
146 | AD = np.dot(A, Dn)
147 | return AD
148 |
149 |
150 | def normalize_undigraph(A):
151 | Dl = np.sum(A, 0)
152 | num_node = A.shape[0]
153 | Dn = np.zeros((num_node, num_node))
154 | for i in range(num_node):
155 | if Dl[i] > 0:
156 | Dn[i, i] = Dl[i]**(-0.5)
157 | DAD = np.dot(np.dot(Dn, A), Dn)
158 | return DAD
--------------------------------------------------------------------------------
/net/utils/tgcn.py:
--------------------------------------------------------------------------------
1 | # The based unit of graph convolutional networks.
2 |
3 | import torch
4 | import torch.nn as nn
5 |
6 | class ConvTemporalGraphical(nn.Module):
7 |
8 | r"""The basic module for applying a graph convolution.
9 |
10 | Args:
11 | in_channels (int): Number of channels in the input sequence data
12 | out_channels (int): Number of channels produced by the convolution
13 | kernel_size (int): Size of the graph convolving kernel
14 | t_kernel_size (int): Size of the temporal convolving kernel
15 | t_stride (int, optional): Stride of the temporal convolution. Default: 1
16 | t_padding (int, optional): Temporal zero-padding added to both sides of
17 | the input. Default: 0
18 | t_dilation (int, optional): Spacing between temporal kernel elements.
19 | Default: 1
20 | bias (bool, optional): If ``True``, adds a learnable bias to the output.
21 | Default: ``True``
22 |
23 | Shape:
24 | - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
25 | - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
26 | - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
27 | - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
28 |
29 | where
30 | :math:`N` is a batch size,
31 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
32 | :math:`T_{in}/T_{out}` is a length of input/output sequence,
33 | :math:`V` is the number of graph nodes.
34 | """
35 |
36 | def __init__(self,
37 | in_channels,
38 | out_channels,
39 | kernel_size,
40 | t_kernel_size=1,
41 | t_stride=1,
42 | t_padding=0,
43 | t_dilation=1,
44 | bias=True):
45 | super().__init__()
46 |
47 | self.kernel_size = kernel_size
48 | self.conv = nn.Conv2d(
49 | in_channels,
50 | out_channels * kernel_size,
51 | kernel_size=(t_kernel_size, 1),
52 | padding=(t_padding, 0),
53 | stride=(t_stride, 1),
54 | dilation=(t_dilation, 1),
55 | bias=bias)
56 |
57 | def forward(self, x, A):
58 | assert A.size(0) == self.kernel_size
59 |
60 | x = self.conv(x)
61 |
62 | n, kc, t, v = x.size()
63 | x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
64 | x = torch.einsum('nkctv,kvw->nctw', (x, A))
65 |
66 | return x.contiguous(), A
67 |
--------------------------------------------------------------------------------
/processor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/processor/__init__.py
--------------------------------------------------------------------------------
/processor/demo_offline.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 | import argparse
5 | import json
6 | import shutil
7 | import time
8 |
9 | import numpy as np
10 | import torch
11 | import skvideo.io
12 |
13 | from .io import IO
14 | import tools
15 | import tools.utils as utils
16 |
17 | import cv2
18 |
19 | class DemoOffline(IO):
20 |
21 | def start(self):
22 |
23 | # initiate
24 | label_name_path = './resource/kinetics_skeleton/label_name.txt'
25 | with open(label_name_path) as f:
26 | label_name = f.readlines()
27 | label_name = [line.rstrip() for line in label_name]
28 | self.label_name = label_name
29 |
30 | # pose estimation
31 | video, data_numpy = self.pose_estimation()
32 |
33 | # action recognition
34 | data = torch.from_numpy(data_numpy)
35 | data = data.unsqueeze(0)
36 | data = data.float().to(self.dev).detach() # (1, channel, frame, joint, person)
37 |
38 | # model predict
39 | voting_label_name, video_label_name, output, intensity = self.predict(data)
40 |
41 | # render the video
42 | images = self.render_video(data_numpy, voting_label_name,
43 | video_label_name, intensity, video)
44 |
45 | # visualize
46 | for image in images:
47 | image = image.astype(np.uint8)
48 | cv2.imshow("ST-GCN", image)
49 | if cv2.waitKey(1) & 0xFF == ord('q'):
50 | break
51 |
52 | def predict(self, data):
53 | # forward
54 | output, feature = self.model.extract_feature(data)
55 | output = output[0]
56 | feature = feature[0]
57 | intensity = (feature*feature).sum(dim=0)**0.5
58 | intensity = intensity.cpu().detach().numpy()
59 |
60 | # get result
61 | # classification result of the full sequence
62 | voting_label = output.sum(dim=3).sum(
63 | dim=2).sum(dim=1).argmax(dim=0)
64 | voting_label_name = self.label_name[voting_label]
65 | # classification result for each person of the latest frame
66 | num_person = data.size(4)
67 | latest_frame_label = [output[:, :, :, m].sum(
68 | dim=2)[:, -1].argmax(dim=0) for m in range(num_person)]
69 | latest_frame_label_name = [self.label_name[l]
70 | for l in latest_frame_label]
71 |
72 | num_person = output.size(3)
73 | num_frame = output.size(1)
74 | video_label_name = list()
75 | for t in range(num_frame):
76 | frame_label_name = list()
77 | for m in range(num_person):
78 | person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0)
79 | person_label_name = self.label_name[person_label]
80 | frame_label_name.append(person_label_name)
81 | video_label_name.append(frame_label_name)
82 | return voting_label_name, video_label_name, output, intensity
83 |
84 | def render_video(self, data_numpy, voting_label_name, video_label_name, intensity, video):
85 | images = utils.visualization.stgcn_visualize(
86 | data_numpy,
87 | self.model.graph.edge,
88 | intensity, video,
89 | voting_label_name,
90 | video_label_name,
91 | self.arg.height)
92 | return images
93 |
94 | def pose_estimation(self):
95 | # load openpose python api
96 | if self.arg.openpose is not None:
97 | sys.path.append('{}/python'.format(self.arg.openpose))
98 | sys.path.append('{}/build/python'.format(self.arg.openpose))
99 | try:
100 | from openpose import pyopenpose as op
101 | except:
102 | print('Can not find Openpose Python API.')
103 | return
104 |
105 |
106 | video_name = self.arg.video.split('/')[-1].split('.')[0]
107 |
108 | # initiate
109 | opWrapper = op.WrapperPython()
110 | params = dict(model_folder='./models', model_pose='COCO')
111 | opWrapper.configure(params)
112 | opWrapper.start()
113 | self.model.eval()
114 | video_capture = cv2.VideoCapture(self.arg.video)
115 | video_length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
116 | pose_tracker = naive_pose_tracker(data_frame=video_length)
117 |
118 | # pose estimation
119 | start_time = time.time()
120 | frame_index = 0
121 | video = list()
122 | while(True):
123 |
124 | # get image
125 | ret, orig_image = video_capture.read()
126 | if orig_image is None:
127 | break
128 | source_H, source_W, _ = orig_image.shape
129 | orig_image = cv2.resize(
130 | orig_image, (256 * source_W // source_H, 256))
131 | H, W, _ = orig_image.shape
132 | video.append(orig_image)
133 |
134 | # pose estimation
135 | datum = op.Datum()
136 | datum.cvInputData = orig_image
137 | opWrapper.emplaceAndPop([datum])
138 | multi_pose = datum.poseKeypoints # (num_person, num_joint, 3)
139 | if len(multi_pose.shape) != 3:
140 | continue
141 |
142 | # normalization
143 | multi_pose[:, :, 0] = multi_pose[:, :, 0]/W
144 | multi_pose[:, :, 1] = multi_pose[:, :, 1]/H
145 | multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
146 | multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
147 | multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0
148 |
149 | # pose tracking
150 | pose_tracker.update(multi_pose, frame_index)
151 | frame_index += 1
152 |
153 | print('Pose estimation ({}/{}).'.format(frame_index, video_length))
154 |
155 | data_numpy = pose_tracker.get_skeleton_sequence()
156 | return video, data_numpy
157 |
158 | @staticmethod
159 | def get_parser(add_help=False):
160 |
161 | # parameter priority: command line > config > default
162 | parent_parser = IO.get_parser(add_help=False)
163 | parser = argparse.ArgumentParser(
164 | add_help=add_help,
165 | parents=[parent_parser],
166 | description='Demo for Spatial Temporal Graph Convolution Network')
167 |
168 | # region arguments yapf: disable
169 | parser.add_argument('--video',
170 | default='./resource/media/skateboarding.mp4',
171 | help='Path to video')
172 | parser.add_argument('--openpose',
173 | default=None,
174 | help='Path to openpose')
175 | parser.add_argument('--model_input_frame',
176 | default=128,
177 | type=int)
178 | parser.add_argument('--model_fps',
179 | default=30,
180 | type=int)
181 | parser.add_argument('--height',
182 | default=1080,
183 | type=int,
184 | help='height of frame in the output video.')
185 | parser.set_defaults(
186 | config='./config/st_gcn/kinetics-skeleton/demo_offline.yaml')
187 | parser.set_defaults(print_log=False)
188 | # endregion yapf: enable
189 |
190 | return parser
191 |
192 | class naive_pose_tracker():
193 | """ A simple tracker for recording person poses and generating skeleton sequences.
194 | For actual occasion, I recommend you to implement a robuster tracker.
195 | Pull-requests are welcomed.
196 | """
197 |
198 | def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf):
199 | self.data_frame = data_frame
200 | self.num_joint = num_joint
201 | self.max_frame_dis = max_frame_dis
202 | self.latest_frame = 0
203 | self.trace_info = list()
204 |
205 | def update(self, multi_pose, current_frame):
206 | # multi_pose.shape: (num_person, num_joint, 3)
207 |
208 | if current_frame <= self.latest_frame:
209 | return
210 |
211 | if len(multi_pose.shape) != 3:
212 | return
213 |
214 | score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0)
215 | for p in multi_pose[score_order]:
216 |
217 | # match existing traces
218 | matching_trace = None
219 | matching_dis = None
220 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
221 | # trace.shape: (num_frame, num_joint, 3)
222 | if current_frame <= latest_frame:
223 | continue
224 | mean_dis, is_close = self.get_dis(trace, p)
225 | if is_close:
226 | if matching_trace is None:
227 | matching_trace = trace_index
228 | matching_dis = mean_dis
229 | elif matching_dis > mean_dis:
230 | matching_trace = trace_index
231 | matching_dis = mean_dis
232 |
233 | # update trace information
234 | if matching_trace is not None:
235 | trace, latest_frame = self.trace_info[matching_trace]
236 |
237 | # padding zero if the trace is fractured
238 | pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero'
239 | pad = current_frame-latest_frame-1
240 | new_trace = self.cat_pose(trace, p, pad, pad_mode)
241 | self.trace_info[matching_trace] = (new_trace, current_frame)
242 |
243 | else:
244 | new_trace = np.array([p])
245 | self.trace_info.append((new_trace, current_frame))
246 |
247 | self.latest_frame = current_frame
248 |
249 | def get_skeleton_sequence(self):
250 |
251 | # remove old traces
252 | valid_trace_index = []
253 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
254 | if self.latest_frame - latest_frame < self.data_frame:
255 | valid_trace_index.append(trace_index)
256 | self.trace_info = [self.trace_info[v] for v in valid_trace_index]
257 |
258 | num_trace = len(self.trace_info)
259 | if num_trace == 0:
260 | return None
261 |
262 | data = np.zeros((3, self.data_frame, self.num_joint, num_trace))
263 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
264 | end = self.data_frame - (self.latest_frame - latest_frame)
265 | d = trace[-end:]
266 | beg = end - len(d)
267 | data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1))
268 |
269 | return data
270 |
271 | # concatenate pose to a trace
272 | def cat_pose(self, trace, pose, pad, pad_mode):
273 | # trace.shape: (num_frame, num_joint, 3)
274 | num_joint = pose.shape[0]
275 | num_channel = pose.shape[1]
276 | if pad != 0:
277 | if pad_mode == 'zero':
278 | trace = np.concatenate(
279 | (trace, np.zeros((pad, num_joint, 3))), 0)
280 | elif pad_mode == 'interp':
281 | last_pose = trace[-1]
282 | coeff = [(p+1)/(pad+1) for p in range(pad)]
283 | interp_pose = [(1-c)*last_pose + c*pose for c in coeff]
284 | trace = np.concatenate((trace, interp_pose), 0)
285 | new_trace = np.concatenate((trace, [pose]), 0)
286 | return new_trace
287 |
288 | # calculate the distance between a existing trace and the input pose
289 |
290 | def get_dis(self, trace, pose):
291 | last_pose_xy = trace[-1, :, 0:2]
292 | curr_pose_xy = pose[:, 0:2]
293 |
294 | mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean()
295 | wh = last_pose_xy.max(0) - last_pose_xy.min(0)
296 | scale = (wh[0] * wh[1]) ** 0.5 + 0.0001
297 | is_close = mean_dis < scale * self.max_frame_dis
298 | return mean_dis, is_close
299 |
--------------------------------------------------------------------------------
/processor/demo_old.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import argparse
4 | import json
5 | import shutil
6 |
7 | import numpy as np
8 | import torch
9 | import skvideo.io
10 |
11 | from .io import IO
12 | import tools
13 | import tools.utils as utils
14 |
15 | class Demo(IO):
16 | """
17 | Demo for Skeleton-based Action Recgnition
18 | """
19 | def start(self):
20 |
21 | openpose = '{}/examples/openpose/openpose.bin'.format(self.arg.openpose)
22 | video_name = self.arg.video.split('/')[-1].split('.')[0]
23 | output_snippets_dir = './data/openpose_estimation/snippets/{}'.format(video_name)
24 | output_sequence_dir = './data/openpose_estimation/data'
25 | output_sequence_path = '{}/{}.json'.format(output_sequence_dir, video_name)
26 | output_result_dir = self.arg.output_dir
27 | output_result_path = '{}/{}.mp4'.format(output_result_dir, video_name)
28 | label_name_path = './resource/kinetics_skeleton/label_name.txt'
29 | with open(label_name_path) as f:
30 | label_name = f.readlines()
31 | label_name = [line.rstrip() for line in label_name]
32 |
33 | # pose estimation
34 | openpose_args = dict(
35 | video=self.arg.video,
36 | write_json=output_snippets_dir,
37 | display=0,
38 | render_pose=0,
39 | model_pose='COCO')
40 | command_line = openpose + ' '
41 | command_line += ' '.join(['--{} {}'.format(k, v) for k, v in openpose_args.items()])
42 | shutil.rmtree(output_snippets_dir, ignore_errors=True)
43 | os.makedirs(output_snippets_dir)
44 | os.system(command_line)
45 |
46 | # pack openpose ouputs
47 | video = utils.video.get_video_frames(self.arg.video)
48 | height, width, _ = video[0].shape
49 | video_info = utils.openpose.json_pack(
50 | output_snippets_dir, video_name, width, height)
51 | if not os.path.exists(output_sequence_dir):
52 | os.makedirs(output_sequence_dir)
53 | with open(output_sequence_path, 'w') as outfile:
54 | json.dump(video_info, outfile)
55 | if len(video_info['data']) == 0:
56 | print('Can not find pose estimation results.')
57 | return
58 | else:
59 | print('Pose estimation complete.')
60 |
61 | # parse skeleton data
62 | pose, _ = utils.video.video_info_parsing(video_info)
63 | data = torch.from_numpy(pose)
64 | data = data.unsqueeze(0)
65 | data = data.float().to(self.dev).detach()
66 |
67 | # extract feature
68 | print('\nNetwork forwad...')
69 | self.model.eval()
70 | output, feature = self.model.extract_feature(data)
71 | output = output[0]
72 | feature = feature[0]
73 | intensity = (feature*feature).sum(dim=0)**0.5
74 | intensity = intensity.cpu().detach().numpy()
75 | label = output.sum(dim=3).sum(dim=2).sum(dim=1).argmax(dim=0)
76 | print('Prediction result: {}'.format(label_name[label]))
77 | print('Done.')
78 |
79 | # visualization
80 | print('\nVisualization...')
81 | label_sequence = output.sum(dim=2).argmax(dim=0)
82 | label_name_sequence = [[label_name[p] for p in l ]for l in label_sequence]
83 | edge = self.model.graph.edge
84 | images = utils.visualization.stgcn_visualize(
85 | pose, edge, intensity, video,label_name[label] , label_name_sequence, self.arg.height)
86 | print('Done.')
87 |
88 | # save video
89 | print('\nSaving...')
90 | if not os.path.exists(output_result_dir):
91 | os.makedirs(output_result_dir)
92 | writer = skvideo.io.FFmpegWriter(output_result_path,
93 | outputdict={'-b': '300000000'})
94 | for img in images:
95 | writer.writeFrame(img)
96 | writer.close()
97 | print('The Demo result has been saved in {}.'.format(output_result_path))
98 |
99 | @staticmethod
100 | def get_parser(add_help=False):
101 |
102 | # parameter priority: command line > config > default
103 | parent_parser = IO.get_parser(add_help=False)
104 | parser = argparse.ArgumentParser(
105 | add_help=add_help,
106 | parents=[parent_parser],
107 | description='Demo for Spatial Temporal Graph Convolution Network')
108 |
109 | # region arguments yapf: disable
110 | parser.add_argument('--video',
111 | default='./resource/media/skateboarding.mp4',
112 | help='Path to video')
113 | parser.add_argument('--openpose',
114 | default='3dparty/openpose/build',
115 | help='Path to openpose')
116 | parser.add_argument('--output_dir',
117 | default='./data/demo_result',
118 | help='Path to save results')
119 | parser.add_argument('--height',
120 | default=1080,
121 | type=int)
122 | parser.set_defaults(config='./config/st_gcn/kinetics-skeleton/demo_old.yaml')
123 | parser.set_defaults(print_log=False)
124 | # endregion yapf: enable
125 |
126 | return parser
127 |
--------------------------------------------------------------------------------
/processor/demo_realtime.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import os
3 | import sys
4 | import argparse
5 | import json
6 | import shutil
7 | import time
8 |
9 | import numpy as np
10 | import torch
11 | import skvideo.io
12 |
13 | from .io import IO
14 | import tools
15 | import tools.utils as utils
16 |
17 | import cv2
18 |
19 | class DemoRealtime(IO):
20 | """ A demo for utilizing st-gcn in the realtime action recognition.
21 | The Openpose python-api is required for this demo.
22 |
23 | Since the pre-trained model is trained on videos with 30fps,
24 | and Openpose is hard to achieve this high speed in the single GPU,
25 | if you want to predict actions by **camera** in realtime,
26 | either data interpolation or new pre-trained model
27 | is required.
28 |
29 | Pull requests are always welcome.
30 | """
31 |
32 | def start(self):
33 | # load openpose python api
34 | if self.arg.openpose is not None:
35 | sys.path.append('{}/python'.format(self.arg.openpose))
36 | sys.path.append('{}/build/python'.format(self.arg.openpose))
37 | try:
38 | from openpose import pyopenpose as op
39 | except:
40 | print('Can not find Openpose Python API.')
41 | return
42 |
43 | video_name = self.arg.video.split('/')[-1].split('.')[0]
44 | label_name_path = './resource/kinetics_skeleton/label_name.txt'
45 | with open(label_name_path) as f:
46 | label_name = f.readlines()
47 | label_name = [line.rstrip() for line in label_name]
48 | self.label_name = label_name
49 |
50 | # initiate
51 | opWrapper = op.WrapperPython()
52 | params = dict(model_folder='./models', model_pose='COCO')
53 | opWrapper.configure(params)
54 | opWrapper.start()
55 | self.model.eval()
56 | pose_tracker = naive_pose_tracker()
57 |
58 | if self.arg.video == 'camera_source':
59 | video_capture = cv2.VideoCapture(0)
60 | else:
61 | video_capture = cv2.VideoCapture(self.arg.video)
62 |
63 | # start recognition
64 | start_time = time.time()
65 | frame_index = 0
66 | while(True):
67 |
68 | tic = time.time()
69 |
70 | # get image
71 | ret, orig_image = video_capture.read()
72 | if orig_image is None:
73 | break
74 | source_H, source_W, _ = orig_image.shape
75 | orig_image = cv2.resize(
76 | orig_image, (256 * source_W // source_H, 256))
77 | H, W, _ = orig_image.shape
78 |
79 | # pose estimation
80 | datum = op.Datum()
81 | datum.cvInputData = orig_image
82 | opWrapper.emplaceAndPop([datum])
83 | multi_pose = datum.poseKeypoints # (num_person, num_joint, 3)
84 | if len(multi_pose.shape) != 3:
85 | continue
86 |
87 | # normalization
88 | multi_pose[:, :, 0] = multi_pose[:, :, 0]/W
89 | multi_pose[:, :, 1] = multi_pose[:, :, 1]/H
90 | multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
91 | multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
92 | multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0
93 |
94 | # pose tracking
95 | if self.arg.video == 'camera_source':
96 | frame_index = int((time.time() - start_time)*self.arg.fps)
97 | else:
98 | frame_index += 1
99 | pose_tracker.update(multi_pose, frame_index)
100 | data_numpy = pose_tracker.get_skeleton_sequence()
101 | data = torch.from_numpy(data_numpy)
102 | data = data.unsqueeze(0)
103 | data = data.float().to(self.dev).detach() # (1, channel, frame, joint, person)
104 |
105 | # model predict
106 | voting_label_name, video_label_name, output, intensity = self.predict(
107 | data)
108 |
109 | # visualization
110 | app_fps = 1 / (time.time() - tic)
111 | image = self.render(data_numpy, voting_label_name,
112 | video_label_name, intensity, orig_image, app_fps)
113 | cv2.imshow("ST-GCN", image)
114 | if cv2.waitKey(1) & 0xFF == ord('q'):
115 | break
116 |
117 | def predict(self, data):
118 | # forward
119 | output, feature = self.model.extract_feature(data)
120 | output = output[0]
121 | feature = feature[0]
122 | intensity = (feature*feature).sum(dim=0)**0.5
123 | intensity = intensity.cpu().detach().numpy()
124 |
125 | # get result
126 | # classification result of the full sequence
127 | voting_label = output.sum(dim=3).sum(
128 | dim=2).sum(dim=1).argmax(dim=0)
129 | voting_label_name = self.label_name[voting_label]
130 | # classification result for each person of the latest frame
131 | num_person = data.size(4)
132 | latest_frame_label = [output[:, :, :, m].sum(
133 | dim=2)[:, -1].argmax(dim=0) for m in range(num_person)]
134 | latest_frame_label_name = [self.label_name[l]
135 | for l in latest_frame_label]
136 |
137 | num_person = output.size(3)
138 | num_frame = output.size(1)
139 | video_label_name = list()
140 | for t in range(num_frame):
141 | frame_label_name = list()
142 | for m in range(num_person):
143 | person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0)
144 | person_label_name = self.label_name[person_label]
145 | frame_label_name.append(person_label_name)
146 | video_label_name.append(frame_label_name)
147 | return voting_label_name, video_label_name, output, intensity
148 |
149 | def render(self, data_numpy, voting_label_name, video_label_name, intensity, orig_image, fps=0):
150 | images = utils.visualization.stgcn_visualize(
151 | data_numpy[:, [-1]],
152 | self.model.graph.edge,
153 | intensity[[-1]], [orig_image],
154 | voting_label_name,
155 | [video_label_name[-1]],
156 | self.arg.height,
157 | fps=fps)
158 | image = next(images)
159 | image = image.astype(np.uint8)
160 | return image
161 |
162 | @staticmethod
163 | def get_parser(add_help=False):
164 |
165 | # parameter priority: command line > config > default
166 | parent_parser = IO.get_parser(add_help=False)
167 | parser = argparse.ArgumentParser(
168 | add_help=add_help,
169 | parents=[parent_parser],
170 | description='Demo for Spatial Temporal Graph Convolution Network')
171 |
172 | # region arguments yapf: disable
173 | parser.add_argument('--video',
174 | default='./resource/media/skateboarding.mp4',
175 | help='Path to video')
176 | parser.add_argument('--openpose',
177 | default=None,
178 | help='Path to openpose')
179 | parser.add_argument('--model_input_frame',
180 | default=128,
181 | type=int)
182 | parser.add_argument('--model_fps',
183 | default=30,
184 | type=int)
185 | parser.add_argument('--height',
186 | default=1080,
187 | type=int,
188 | help='height of frame in the output video.')
189 | parser.set_defaults(
190 | config='./config/st_gcn/kinetics-skeleton/demo_realtime.yaml')
191 | parser.set_defaults(print_log=False)
192 | # endregion yapf: enable
193 |
194 | return parser
195 |
196 | class naive_pose_tracker():
197 | """ A simple tracker for recording person poses and generating skeleton sequences.
198 | For actual occasion, I recommend you to implement a robuster tracker.
199 | Pull-requests are welcomed.
200 | """
201 |
202 | def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf):
203 | self.data_frame = data_frame
204 | self.num_joint = num_joint
205 | self.max_frame_dis = max_frame_dis
206 | self.latest_frame = 0
207 | self.trace_info = list()
208 |
209 | def update(self, multi_pose, current_frame):
210 | # multi_pose.shape: (num_person, num_joint, 3)
211 |
212 | if current_frame <= self.latest_frame:
213 | return
214 |
215 | if len(multi_pose.shape) != 3:
216 | return
217 |
218 | score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0)
219 | for p in multi_pose[score_order]:
220 |
221 | # match existing traces
222 | matching_trace = None
223 | matching_dis = None
224 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
225 | # trace.shape: (num_frame, num_joint, 3)
226 | if current_frame <= latest_frame:
227 | continue
228 | mean_dis, is_close = self.get_dis(trace, p)
229 | if is_close:
230 | if matching_trace is None:
231 | matching_trace = trace_index
232 | matching_dis = mean_dis
233 | elif matching_dis > mean_dis:
234 | matching_trace = trace_index
235 | matching_dis = mean_dis
236 |
237 | # update trace information
238 | if matching_trace is not None:
239 | trace, latest_frame = self.trace_info[matching_trace]
240 |
241 | # padding zero if the trace is fractured
242 | pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero'
243 | pad = current_frame-latest_frame-1
244 | new_trace = self.cat_pose(trace, p, pad, pad_mode)
245 | self.trace_info[matching_trace] = (new_trace, current_frame)
246 |
247 | else:
248 | new_trace = np.array([p])
249 | self.trace_info.append((new_trace, current_frame))
250 |
251 | self.latest_frame = current_frame
252 |
253 | def get_skeleton_sequence(self):
254 |
255 | # remove old traces
256 | valid_trace_index = []
257 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
258 | if self.latest_frame - latest_frame < self.data_frame:
259 | valid_trace_index.append(trace_index)
260 | self.trace_info = [self.trace_info[v] for v in valid_trace_index]
261 |
262 | num_trace = len(self.trace_info)
263 | if num_trace == 0:
264 | return None
265 |
266 | data = np.zeros((3, self.data_frame, self.num_joint, num_trace))
267 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
268 | end = self.data_frame - (self.latest_frame - latest_frame)
269 | d = trace[-end:]
270 | beg = end - len(d)
271 | data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1))
272 |
273 | return data
274 |
275 | # concatenate pose to a trace
276 | def cat_pose(self, trace, pose, pad, pad_mode):
277 | # trace.shape: (num_frame, num_joint, 3)
278 | num_joint = pose.shape[0]
279 | num_channel = pose.shape[1]
280 | if pad != 0:
281 | if pad_mode == 'zero':
282 | trace = np.concatenate(
283 | (trace, np.zeros((pad, num_joint, 3))), 0)
284 | elif pad_mode == 'interp':
285 | last_pose = trace[-1]
286 | coeff = [(p+1)/(pad+1) for p in range(pad)]
287 | interp_pose = [(1-c)*last_pose + c*pose for c in coeff]
288 | trace = np.concatenate((trace, interp_pose), 0)
289 | new_trace = np.concatenate((trace, [pose]), 0)
290 | return new_trace
291 |
292 | # calculate the distance between a existing trace and the input pose
293 |
294 | def get_dis(self, trace, pose):
295 | last_pose_xy = trace[-1, :, 0:2]
296 | curr_pose_xy = pose[:, 0:2]
297 |
298 | mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean()
299 | wh = last_pose_xy.max(0) - last_pose_xy.min(0)
300 | scale = (wh[0] * wh[1]) ** 0.5 + 0.0001
301 | is_close = mean_dis < scale * self.max_frame_dis
302 | return mean_dis, is_close
303 |
--------------------------------------------------------------------------------
/processor/io.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # pylint: disable=W0201
3 | import sys
4 | import argparse
5 | import yaml
6 | import numpy as np
7 |
8 | # torch
9 | import torch
10 | import torch.nn as nn
11 |
12 | # torchlight
13 | import torchlight
14 | from torchlight import str2bool
15 | from torchlight import DictAction
16 | from torchlight import import_class
17 |
18 | class IO():
19 | """
20 | IO Processor
21 | """
22 |
23 | def __init__(self, argv=None):
24 |
25 | self.load_arg(argv)
26 | self.init_environment()
27 | self.load_model()
28 | self.load_weights()
29 | self.gpu()
30 |
31 | def load_arg(self, argv=None):
32 | parser = self.get_parser()
33 |
34 | # load arg form config file
35 | p = parser.parse_args(argv)
36 | if p.config is not None:
37 | # load config file
38 | with open(p.config, 'r') as f:
39 | default_arg = yaml.load(f, Loader=yaml.FullLoader)
40 |
41 | # update parser from config file
42 | key = vars(p).keys()
43 | for k in default_arg.keys():
44 | if k not in key:
45 | print('Unknown Arguments: {}'.format(k))
46 | assert k in key
47 |
48 | parser.set_defaults(**default_arg)
49 |
50 | self.arg = parser.parse_args(argv)
51 |
52 | def init_environment(self):
53 | self.io = torchlight.IO(
54 | self.arg.work_dir,
55 | save_log=self.arg.save_log,
56 | print_log=self.arg.print_log)
57 | self.io.save_arg(self.arg)
58 |
59 | # gpu
60 | if self.arg.use_gpu:
61 | gpus = torchlight.visible_gpu(self.arg.device)
62 | torchlight.occupy_gpu(gpus)
63 | self.gpus = gpus
64 | self.dev = "cuda:0"
65 | else:
66 | self.dev = "cpu"
67 |
68 | def load_model(self):
69 | self.model = self.io.load_model(self.arg.model,
70 | **(self.arg.model_args))
71 |
72 | def load_weights(self):
73 | if self.arg.weights:
74 | self.model = self.io.load_weights(self.model, self.arg.weights,
75 | self.arg.ignore_weights)
76 |
77 | def gpu(self):
78 | # move modules to gpu
79 | self.model = self.model.to(self.dev)
80 | for name, value in vars(self).items():
81 | cls_name = str(value.__class__)
82 | if cls_name.find('torch.nn.modules') != -1:
83 | setattr(self, name, value.to(self.dev))
84 |
85 | # model parallel
86 | if self.arg.use_gpu and len(self.gpus) > 1:
87 | self.model = nn.DataParallel(self.model, device_ids=self.gpus)
88 |
89 | def start(self):
90 | self.io.print_log('Parameters:\n{}\n'.format(str(vars(self.arg))))
91 |
92 | @staticmethod
93 | def get_parser(add_help=False):
94 |
95 | #region arguments yapf: disable
96 | # parameter priority: command line > config > default
97 | parser = argparse.ArgumentParser( add_help=add_help, description='IO Processor')
98 |
99 | parser.add_argument('-w', '--work_dir', default='./work_dir/tmp', help='the work folder for storing results')
100 | parser.add_argument('-c', '--config', default=None, help='path to the configuration file')
101 |
102 | # processor
103 | parser.add_argument('--use_gpu', type=str2bool, default=True, help='use GPUs or not')
104 | parser.add_argument('--device', type=int, default=0, nargs='+', help='the indexes of GPUs for training or testing')
105 |
106 | # visulize and debug
107 | parser.add_argument('--print_log', type=str2bool, default=True, help='print logging or not')
108 | parser.add_argument('--save_log', type=str2bool, default=True, help='save logging or not')
109 |
110 | # model
111 | parser.add_argument('--model', default=None, help='the model will be used')
112 | parser.add_argument('--model_args', action=DictAction, default=dict(), help='the arguments of model')
113 | parser.add_argument('--weights', default=None, help='the weights for network initialization')
114 | parser.add_argument('--ignore_weights', type=str, default=[], nargs='+', help='the name of weights which will be ignored in the initialization')
115 | #endregion yapf: enable
116 |
117 | return parser
118 |
--------------------------------------------------------------------------------
/processor/processor.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # pylint: disable=W0201
3 | import sys
4 | import argparse
5 | import yaml
6 | import numpy as np
7 |
8 | # torch
9 | import torch
10 | import torch.nn as nn
11 | import torch.optim as optim
12 |
13 | # torchlight
14 | import torchlight
15 | from torchlight import str2bool
16 | from torchlight import DictAction
17 | from torchlight import import_class
18 |
19 | from .io import IO
20 |
21 | class Processor(IO):
22 | """
23 | Base Processor
24 | """
25 |
26 | def __init__(self, argv=None):
27 |
28 | self.load_arg(argv)
29 | self.init_environment()
30 | self.load_model()
31 | self.load_weights()
32 | self.gpu()
33 | self.load_data()
34 | self.load_optimizer()
35 |
36 | def init_environment(self):
37 |
38 | super().init_environment()
39 | self.result = dict()
40 | self.iter_info = dict()
41 | self.epoch_info = dict()
42 | self.meta_info = dict(epoch=0, iter=0)
43 |
44 | def load_optimizer(self):
45 | pass
46 |
47 | def load_data(self):
48 | Feeder = import_class(self.arg.feeder)
49 | if 'debug' not in self.arg.train_feeder_args:
50 | self.arg.train_feeder_args['debug'] = self.arg.debug
51 | self.data_loader = dict()
52 | if self.arg.phase == 'train':
53 | self.data_loader['train'] = torch.utils.data.DataLoader(
54 | dataset=Feeder(**self.arg.train_feeder_args),
55 | batch_size=self.arg.batch_size,
56 | shuffle=True,
57 | num_workers=self.arg.num_worker * torchlight.ngpu(
58 | self.arg.device),
59 | drop_last=True)
60 | if self.arg.test_feeder_args:
61 | self.data_loader['test'] = torch.utils.data.DataLoader(
62 | dataset=Feeder(**self.arg.test_feeder_args),
63 | batch_size=self.arg.test_batch_size,
64 | shuffle=False,
65 | num_workers=self.arg.num_worker * torchlight.ngpu(
66 | self.arg.device))
67 |
68 | def show_epoch_info(self):
69 | for k, v in self.epoch_info.items():
70 | self.io.print_log('\t{}: {}'.format(k, v))
71 | if self.arg.pavi_log:
72 | self.io.log('train', self.meta_info['iter'], self.epoch_info)
73 |
74 | def show_iter_info(self):
75 | if self.meta_info['iter'] % self.arg.log_interval == 0:
76 | info ='\tIter {} Done.'.format(self.meta_info['iter'])
77 | for k, v in self.iter_info.items():
78 | if isinstance(v, float):
79 | info = info + ' | {}: {:.4f}'.format(k, v)
80 | else:
81 | info = info + ' | {}: {}'.format(k, v)
82 |
83 | self.io.print_log(info)
84 |
85 | if self.arg.pavi_log:
86 | self.io.log('train', self.meta_info['iter'], self.iter_info)
87 |
88 | def train(self):
89 | for _ in range(100):
90 | self.iter_info['loss'] = 0
91 | self.show_iter_info()
92 | self.meta_info['iter'] += 1
93 | self.epoch_info['mean loss'] = 0
94 | self.show_epoch_info()
95 |
96 | def test(self):
97 | for _ in range(100):
98 | self.iter_info['loss'] = 1
99 | self.show_iter_info()
100 | self.epoch_info['mean loss'] = 1
101 | self.show_epoch_info()
102 |
103 | def start(self):
104 | self.io.print_log('Parameters:\n{}\n'.format(str(vars(self.arg))))
105 |
106 | # training phase
107 | if self.arg.phase == 'train':
108 | for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
109 | self.meta_info['epoch'] = epoch
110 |
111 | # training
112 | self.io.print_log('Training epoch: {}'.format(epoch))
113 | self.train()
114 | self.io.print_log('Done.')
115 |
116 | # save model
117 | if ((epoch + 1) % self.arg.save_interval == 0) or (
118 | epoch + 1 == self.arg.num_epoch):
119 | filename = 'epoch{}_model.pt'.format(epoch + 1)
120 | self.io.save_model(self.model, filename)
121 |
122 | # evaluation
123 | if ((epoch + 1) % self.arg.eval_interval == 0) or (
124 | epoch + 1 == self.arg.num_epoch):
125 | self.io.print_log('Eval epoch: {}'.format(epoch))
126 | self.test()
127 | self.io.print_log('Done.')
128 | # test phase
129 | elif self.arg.phase == 'test':
130 |
131 | # the path of weights must be appointed
132 | if self.arg.weights is None:
133 | raise ValueError('Please appoint --weights.')
134 | self.io.print_log('Model: {}.'.format(self.arg.model))
135 | self.io.print_log('Weights: {}.'.format(self.arg.weights))
136 |
137 | # evaluation
138 | self.io.print_log('Evaluation Start:')
139 | self.test()
140 | self.io.print_log('Done.\n')
141 |
142 | # save the output of model
143 | if self.arg.save_result:
144 | result_dict = dict(
145 | zip(self.data_loader['test'].dataset.sample_name,
146 | self.result))
147 | self.io.save_pkl(result_dict, 'test_result.pkl')
148 |
149 | @staticmethod
150 | def get_parser(add_help=False):
151 |
152 | #region arguments yapf: disable
153 | # parameter priority: command line > config > default
154 | parser = argparse.ArgumentParser( add_help=add_help, description='Base Processor')
155 |
156 | parser.add_argument('-w', '--work_dir', default='./work_dir/tmp', help='the work folder for storing results')
157 | parser.add_argument('-c', '--config', default=None, help='path to the configuration file')
158 |
159 | # processor
160 | parser.add_argument('--phase', default='train', help='must be train or test')
161 | parser.add_argument('--save_result', type=str2bool, default=False, help='if ture, the output of the model will be stored')
162 | parser.add_argument('--start_epoch', type=int, default=0, help='start training from which epoch')
163 | parser.add_argument('--num_epoch', type=int, default=80, help='stop training in which epoch')
164 | parser.add_argument('--use_gpu', type=str2bool, default=True, help='use GPUs or not')
165 | parser.add_argument('--device', type=int, default=0, nargs='+', help='the indexes of GPUs for training or testing')
166 |
167 | # visulize and debug
168 | parser.add_argument('--log_interval', type=int, default=100, help='the interval for printing messages (#iteration)')
169 | parser.add_argument('--save_interval', type=int, default=10, help='the interval for storing models (#iteration)')
170 | parser.add_argument('--eval_interval', type=int, default=5, help='the interval for evaluating models (#iteration)')
171 | parser.add_argument('--save_log', type=str2bool, default=True, help='save logging or not')
172 | parser.add_argument('--print_log', type=str2bool, default=True, help='print logging or not')
173 | parser.add_argument('--pavi_log', type=str2bool, default=False, help='logging on pavi or not')
174 |
175 | # feeder
176 | parser.add_argument('--feeder', default='feeder.feeder', help='data loader will be used')
177 | parser.add_argument('--num_worker', type=int, default=4, help='the number of worker per gpu for data loader')
178 | parser.add_argument('--train_feeder_args', action=DictAction, default=dict(), help='the arguments of data loader for training')
179 | parser.add_argument('--test_feeder_args', action=DictAction, default=dict(), help='the arguments of data loader for test')
180 | parser.add_argument('--batch_size', type=int, default=256, help='training batch size')
181 | parser.add_argument('--test_batch_size', type=int, default=256, help='test batch size')
182 | parser.add_argument('--debug', action="store_true", help='less data, faster loading')
183 |
184 | # model
185 | parser.add_argument('--model', default=None, help='the model will be used')
186 | parser.add_argument('--model_args', action=DictAction, default=dict(), help='the arguments of model')
187 | parser.add_argument('--weights', default=None, help='the weights for network initialization')
188 | parser.add_argument('--ignore_weights', type=str, default=[], nargs='+', help='the name of weights which will be ignored in the initialization')
189 | #endregion yapf: enable
190 |
191 | return parser
192 |
--------------------------------------------------------------------------------
/processor/recognition.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # pylint: disable=W0201
3 | import sys
4 | import argparse
5 | import yaml
6 | import numpy as np
7 |
8 | # torch
9 | import torch
10 | import torch.nn as nn
11 | import torch.optim as optim
12 |
13 | # torchlight
14 | import torchlight
15 | from torchlight import str2bool
16 | from torchlight import DictAction
17 | from torchlight import import_class
18 |
19 | from .processor import Processor
20 |
21 | def weights_init(m):
22 | classname = m.__class__.__name__
23 | if classname.find('Conv1d') != -1:
24 | m.weight.data.normal_(0.0, 0.02)
25 | if m.bias is not None:
26 | m.bias.data.fill_(0)
27 | elif classname.find('Conv2d') != -1:
28 | m.weight.data.normal_(0.0, 0.02)
29 | if m.bias is not None:
30 | m.bias.data.fill_(0)
31 | elif classname.find('BatchNorm') != -1:
32 | m.weight.data.normal_(1.0, 0.02)
33 | m.bias.data.fill_(0)
34 |
35 | class REC_Processor(Processor):
36 | """
37 | Processor for Skeleton-based Action Recgnition
38 | """
39 |
40 | def load_model(self):
41 | self.model = self.io.load_model(self.arg.model,
42 | **(self.arg.model_args))
43 | self.model.apply(weights_init)
44 | self.loss = nn.CrossEntropyLoss()
45 |
46 | def load_optimizer(self):
47 | if self.arg.optimizer == 'SGD':
48 | self.optimizer = optim.SGD(
49 | self.model.parameters(),
50 | lr=self.arg.base_lr,
51 | momentum=0.9,
52 | nesterov=self.arg.nesterov,
53 | weight_decay=self.arg.weight_decay)
54 | elif self.arg.optimizer == 'Adam':
55 | self.optimizer = optim.Adam(
56 | self.model.parameters(),
57 | lr=self.arg.base_lr,
58 | weight_decay=self.arg.weight_decay)
59 | else:
60 | raise ValueError()
61 |
62 | def adjust_lr(self):
63 | if self.arg.optimizer == 'SGD' and self.arg.step:
64 | lr = self.arg.base_lr * (
65 | 0.1**np.sum(self.meta_info['epoch']>= np.array(self.arg.step)))
66 | for param_group in self.optimizer.param_groups:
67 | param_group['lr'] = lr
68 | self.lr = lr
69 | else:
70 | self.lr = self.arg.base_lr
71 |
72 | def show_topk(self, k):
73 | rank = self.result.argsort()
74 | hit_top_k = [l in rank[i, -k:] for i, l in enumerate(self.label)]
75 | accuracy = sum(hit_top_k) * 1.0 / len(hit_top_k)
76 | self.io.print_log('\tTop{}: {:.2f}%'.format(k, 100 * accuracy))
77 |
78 | def train(self):
79 | self.model.train()
80 | self.adjust_lr()
81 | loader = self.data_loader['train']
82 | loss_value = []
83 |
84 | for data, label in loader:
85 |
86 | # get data
87 | data = data.float().to(self.dev)
88 | label = label.long().to(self.dev)
89 |
90 | # forward
91 | output = self.model(data)
92 | loss = self.loss(output, label)
93 |
94 | # backward
95 | self.optimizer.zero_grad()
96 | loss.backward()
97 | self.optimizer.step()
98 |
99 | # statistics
100 | self.iter_info['loss'] = loss.data.item()
101 | self.iter_info['lr'] = '{:.6f}'.format(self.lr)
102 | loss_value.append(self.iter_info['loss'])
103 | self.show_iter_info()
104 | self.meta_info['iter'] += 1
105 |
106 | self.epoch_info['mean_loss']= np.mean(loss_value)
107 | self.show_epoch_info()
108 | self.io.print_timer()
109 |
110 | def test(self, evaluation=True):
111 |
112 | self.model.eval()
113 | loader = self.data_loader['test']
114 | loss_value = []
115 | result_frag = []
116 | label_frag = []
117 |
118 | for data, label in loader:
119 |
120 | # get data
121 | data = data.float().to(self.dev)
122 | label = label.long().to(self.dev)
123 |
124 | # inference
125 | with torch.no_grad():
126 | output = self.model(data)
127 | result_frag.append(output.data.cpu().numpy())
128 |
129 | # get loss
130 | if evaluation:
131 | loss = self.loss(output, label)
132 | loss_value.append(loss.item())
133 | label_frag.append(label.data.cpu().numpy())
134 |
135 | self.result = np.concatenate(result_frag)
136 | if evaluation:
137 | self.label = np.concatenate(label_frag)
138 | self.epoch_info['mean_loss']= np.mean(loss_value)
139 | self.show_epoch_info()
140 |
141 | # show top-k accuracy
142 | for k in self.arg.show_topk:
143 | self.show_topk(k)
144 |
145 | @staticmethod
146 | def get_parser(add_help=False):
147 |
148 | # parameter priority: command line > config > default
149 | parent_parser = Processor.get_parser(add_help=False)
150 | parser = argparse.ArgumentParser(
151 | add_help=add_help,
152 | parents=[parent_parser],
153 | description='Spatial Temporal Graph Convolution Network')
154 |
155 | # region arguments yapf: disable
156 | # evaluation
157 | parser.add_argument('--show_topk', type=int, default=[1, 5], nargs='+', help='which Top K accuracy will be shown')
158 | # optim
159 | parser.add_argument('--base_lr', type=float, default=0.01, help='initial learning rate')
160 | parser.add_argument('--step', type=int, default=[], nargs='+', help='the epoch where optimizer reduce the learning rate')
161 | parser.add_argument('--optimizer', default='SGD', help='type of optimizer')
162 | parser.add_argument('--nesterov', type=str2bool, default=True, help='use nesterov or not')
163 | parser.add_argument('--weight_decay', type=float, default=0.0001, help='weight decay for optimizer')
164 | # endregion yapf: enable
165 |
166 | return parser
167 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pyyaml
2 | argparse
3 | numpy
4 | h5py
5 | opencv-python
6 | imageio
7 | scikit-video
8 | torch
9 | torchvision
--------------------------------------------------------------------------------
/resource/NTU-RGB-D/samples_with_missing_skeletons.txt:
--------------------------------------------------------------------------------
1 | S001C002P005R002A008
2 | S001C002P006R001A008
3 | S001C003P002R001A055
4 | S001C003P002R002A012
5 | S001C003P005R002A004
6 | S001C003P005R002A005
7 | S001C003P005R002A006
8 | S001C003P006R002A008
9 | S002C002P011R002A030
10 | S002C003P008R001A020
11 | S002C003P010R002A010
12 | S002C003P011R002A007
13 | S002C003P011R002A011
14 | S002C003P014R002A007
15 | S003C001P019R001A055
16 | S003C002P002R002A055
17 | S003C002P018R002A055
18 | S003C003P002R001A055
19 | S003C003P016R001A055
20 | S003C003P018R002A024
21 | S004C002P003R001A013
22 | S004C002P008R001A009
23 | S004C002P020R001A003
24 | S004C002P020R001A004
25 | S004C002P020R001A012
26 | S004C002P020R001A020
27 | S004C002P020R001A021
28 | S004C002P020R001A036
29 | S005C002P004R001A001
30 | S005C002P004R001A003
31 | S005C002P010R001A016
32 | S005C002P010R001A017
33 | S005C002P010R001A048
34 | S005C002P010R001A049
35 | S005C002P016R001A009
36 | S005C002P016R001A010
37 | S005C002P018R001A003
38 | S005C002P018R001A028
39 | S005C002P018R001A029
40 | S005C003P016R002A009
41 | S005C003P018R002A013
42 | S005C003P021R002A057
43 | S006C001P001R002A055
44 | S006C002P007R001A005
45 | S006C002P007R001A006
46 | S006C002P016R001A043
47 | S006C002P016R001A051
48 | S006C002P016R001A052
49 | S006C002P022R001A012
50 | S006C002P023R001A020
51 | S006C002P023R001A021
52 | S006C002P023R001A022
53 | S006C002P023R001A023
54 | S006C002P024R001A018
55 | S006C002P024R001A019
56 | S006C003P001R002A013
57 | S006C003P007R002A009
58 | S006C003P007R002A010
59 | S006C003P007R002A025
60 | S006C003P016R001A060
61 | S006C003P017R001A055
62 | S006C003P017R002A013
63 | S006C003P017R002A014
64 | S006C003P017R002A015
65 | S006C003P022R002A013
66 | S007C001P018R002A050
67 | S007C001P025R002A051
68 | S007C001P028R001A050
69 | S007C001P028R001A051
70 | S007C001P028R001A052
71 | S007C002P008R002A008
72 | S007C002P015R002A055
73 | S007C002P026R001A008
74 | S007C002P026R001A009
75 | S007C002P026R001A010
76 | S007C002P026R001A011
77 | S007C002P026R001A012
78 | S007C002P026R001A050
79 | S007C002P027R001A011
80 | S007C002P027R001A013
81 | S007C002P028R002A055
82 | S007C003P007R001A002
83 | S007C003P007R001A004
84 | S007C003P019R001A060
85 | S007C003P027R002A001
86 | S007C003P027R002A002
87 | S007C003P027R002A003
88 | S007C003P027R002A004
89 | S007C003P027R002A005
90 | S007C003P027R002A006
91 | S007C003P027R002A007
92 | S007C003P027R002A008
93 | S007C003P027R002A009
94 | S007C003P027R002A010
95 | S007C003P027R002A011
96 | S007C003P027R002A012
97 | S007C003P027R002A013
98 | S008C002P001R001A009
99 | S008C002P001R001A010
100 | S008C002P001R001A014
101 | S008C002P001R001A015
102 | S008C002P001R001A016
103 | S008C002P001R001A018
104 | S008C002P001R001A019
105 | S008C002P008R002A059
106 | S008C002P025R001A060
107 | S008C002P029R001A004
108 | S008C002P031R001A005
109 | S008C002P031R001A006
110 | S008C002P032R001A018
111 | S008C002P034R001A018
112 | S008C002P034R001A019
113 | S008C002P035R001A059
114 | S008C002P035R002A002
115 | S008C002P035R002A005
116 | S008C003P007R001A009
117 | S008C003P007R001A016
118 | S008C003P007R001A017
119 | S008C003P007R001A018
120 | S008C003P007R001A019
121 | S008C003P007R001A020
122 | S008C003P007R001A021
123 | S008C003P007R001A022
124 | S008C003P007R001A023
125 | S008C003P007R001A025
126 | S008C003P007R001A026
127 | S008C003P007R001A028
128 | S008C003P007R001A029
129 | S008C003P007R002A003
130 | S008C003P008R002A050
131 | S008C003P025R002A002
132 | S008C003P025R002A011
133 | S008C003P025R002A012
134 | S008C003P025R002A016
135 | S008C003P025R002A020
136 | S008C003P025R002A022
137 | S008C003P025R002A023
138 | S008C003P025R002A030
139 | S008C003P025R002A031
140 | S008C003P025R002A032
141 | S008C003P025R002A033
142 | S008C003P025R002A049
143 | S008C003P025R002A060
144 | S008C003P031R001A001
145 | S008C003P031R002A004
146 | S008C003P031R002A014
147 | S008C003P031R002A015
148 | S008C003P031R002A016
149 | S008C003P031R002A017
150 | S008C003P032R002A013
151 | S008C003P033R002A001
152 | S008C003P033R002A011
153 | S008C003P033R002A012
154 | S008C003P034R002A001
155 | S008C003P034R002A012
156 | S008C003P034R002A022
157 | S008C003P034R002A023
158 | S008C003P034R002A024
159 | S008C003P034R002A044
160 | S008C003P034R002A045
161 | S008C003P035R002A016
162 | S008C003P035R002A017
163 | S008C003P035R002A018
164 | S008C003P035R002A019
165 | S008C003P035R002A020
166 | S008C003P035R002A021
167 | S009C002P007R001A001
168 | S009C002P007R001A003
169 | S009C002P007R001A014
170 | S009C002P008R001A014
171 | S009C002P015R002A050
172 | S009C002P016R001A002
173 | S009C002P017R001A028
174 | S009C002P017R001A029
175 | S009C003P017R002A030
176 | S009C003P025R002A054
177 | S010C001P007R002A020
178 | S010C002P016R002A055
179 | S010C002P017R001A005
180 | S010C002P017R001A018
181 | S010C002P017R001A019
182 | S010C002P019R001A001
183 | S010C002P025R001A012
184 | S010C003P007R002A043
185 | S010C003P008R002A003
186 | S010C003P016R001A055
187 | S010C003P017R002A055
188 | S011C001P002R001A008
189 | S011C001P018R002A050
190 | S011C002P008R002A059
191 | S011C002P016R002A055
192 | S011C002P017R001A020
193 | S011C002P017R001A021
194 | S011C002P018R002A055
195 | S011C002P027R001A009
196 | S011C002P027R001A010
197 | S011C002P027R001A037
198 | S011C003P001R001A055
199 | S011C003P002R001A055
200 | S011C003P008R002A012
201 | S011C003P015R001A055
202 | S011C003P016R001A055
203 | S011C003P019R001A055
204 | S011C003P025R001A055
205 | S011C003P028R002A055
206 | S012C001P019R001A060
207 | S012C001P019R002A060
208 | S012C002P015R001A055
209 | S012C002P017R002A012
210 | S012C002P025R001A060
211 | S012C003P008R001A057
212 | S012C003P015R001A055
213 | S012C003P015R002A055
214 | S012C003P016R001A055
215 | S012C003P017R002A055
216 | S012C003P018R001A055
217 | S012C003P018R001A057
218 | S012C003P019R002A011
219 | S012C003P019R002A012
220 | S012C003P025R001A055
221 | S012C003P027R001A055
222 | S012C003P027R002A009
223 | S012C003P028R001A035
224 | S012C003P028R002A055
225 | S013C001P015R001A054
226 | S013C001P017R002A054
227 | S013C001P018R001A016
228 | S013C001P028R001A040
229 | S013C002P015R001A054
230 | S013C002P017R002A054
231 | S013C002P028R001A040
232 | S013C003P008R002A059
233 | S013C003P015R001A054
234 | S013C003P017R002A054
235 | S013C003P025R002A022
236 | S013C003P027R001A055
237 | S013C003P028R001A040
238 | S014C001P027R002A040
239 | S014C002P015R001A003
240 | S014C002P019R001A029
241 | S014C002P025R002A059
242 | S014C002P027R002A040
243 | S014C002P039R001A050
244 | S014C003P007R002A059
245 | S014C003P015R002A055
246 | S014C003P019R002A055
247 | S014C003P025R001A048
248 | S014C003P027R002A040
249 | S015C001P008R002A040
250 | S015C001P016R001A055
251 | S015C001P017R001A055
252 | S015C001P017R002A055
253 | S015C002P007R001A059
254 | S015C002P008R001A003
255 | S015C002P008R001A004
256 | S015C002P008R002A040
257 | S015C002P015R001A002
258 | S015C002P016R001A001
259 | S015C002P016R002A055
260 | S015C003P008R002A007
261 | S015C003P008R002A011
262 | S015C003P008R002A012
263 | S015C003P008R002A028
264 | S015C003P008R002A040
265 | S015C003P025R002A012
266 | S015C003P025R002A017
267 | S015C003P025R002A020
268 | S015C003P025R002A021
269 | S015C003P025R002A030
270 | S015C003P025R002A033
271 | S015C003P025R002A034
272 | S015C003P025R002A036
273 | S015C003P025R002A037
274 | S015C003P025R002A044
275 | S016C001P019R002A040
276 | S016C001P025R001A011
277 | S016C001P025R001A012
278 | S016C001P025R001A060
279 | S016C001P040R001A055
280 | S016C001P040R002A055
281 | S016C002P008R001A011
282 | S016C002P019R002A040
283 | S016C002P025R002A012
284 | S016C003P008R001A011
285 | S016C003P008R002A002
286 | S016C003P008R002A003
287 | S016C003P008R002A004
288 | S016C003P008R002A006
289 | S016C003P008R002A009
290 | S016C003P019R002A040
291 | S016C003P039R002A016
292 | S017C001P016R002A031
293 | S017C002P007R001A013
294 | S017C002P008R001A009
295 | S017C002P015R001A042
296 | S017C002P016R002A031
297 | S017C002P016R002A055
298 | S017C003P007R002A013
299 | S017C003P008R001A059
300 | S017C003P016R002A031
301 | S017C003P017R001A055
302 | S017C003P020R001A059
303 |
--------------------------------------------------------------------------------
/resource/demo_asset/attention+prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/attention+prediction.png
--------------------------------------------------------------------------------
/resource/demo_asset/attention+rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/attention+rgb.png
--------------------------------------------------------------------------------
/resource/demo_asset/original_video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/original_video.png
--------------------------------------------------------------------------------
/resource/demo_asset/pose_estimation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/pose_estimation.png
--------------------------------------------------------------------------------
/resource/info/S001C001P001R001A044_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S001C001P001R001A044_w.gif
--------------------------------------------------------------------------------
/resource/info/S001C001P001R001A051_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S001C001P001R001A051_w.gif
--------------------------------------------------------------------------------
/resource/info/S002C001P010R001A017_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S002C001P010R001A017_w.gif
--------------------------------------------------------------------------------
/resource/info/S003C001P008R001A002_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S003C001P008R001A002_w.gif
--------------------------------------------------------------------------------
/resource/info/S003C001P008R001A008_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S003C001P008R001A008_w.gif
--------------------------------------------------------------------------------
/resource/info/clean_and_jerk_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/clean_and_jerk_w.gif
--------------------------------------------------------------------------------
/resource/info/demo_video.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/demo_video.gif
--------------------------------------------------------------------------------
/resource/info/hammer_throw_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/hammer_throw_w.gif
--------------------------------------------------------------------------------
/resource/info/juggling_balls_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/juggling_balls_w.gif
--------------------------------------------------------------------------------
/resource/info/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/pipeline.png
--------------------------------------------------------------------------------
/resource/info/pull_ups_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/pull_ups_w.gif
--------------------------------------------------------------------------------
/resource/info/tai_chi_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/tai_chi_w.gif
--------------------------------------------------------------------------------
/resource/kinetics-motion.txt:
--------------------------------------------------------------------------------
1 | belly dancing
2 | punching bag
3 | capoeira
4 | squat
5 | windsurfing
6 | skipping rope
7 | swimming backstroke
8 | hammer throw
9 | throwing discus
10 | tobogganing
11 | hopscotch
12 | hitting baseball
13 | roller skating
14 | arm wrestling
15 | snatch weight lifting
16 | tai chi
17 | riding mechanical bull
18 | salsa dancing
19 | hurling (sport)
20 | lunge
21 | skateboarding
22 | country line dancing
23 | juggling balls
24 | surfing crowd
25 | deadlifting
26 | clean and jerk
27 | crawling baby
28 | push up
29 | front raises
30 | pull ups
--------------------------------------------------------------------------------
/resource/kinetics_skeleton/label_name.txt:
--------------------------------------------------------------------------------
1 | abseiling
2 | air drumming
3 | answering questions
4 | applauding
5 | applying cream
6 | archery
7 | arm wrestling
8 | arranging flowers
9 | assembling computer
10 | auctioning
11 | baby waking up
12 | baking cookies
13 | balloon blowing
14 | bandaging
15 | barbequing
16 | bartending
17 | beatboxing
18 | bee keeping
19 | belly dancing
20 | bench pressing
21 | bending back
22 | bending metal
23 | biking through snow
24 | blasting sand
25 | blowing glass
26 | blowing leaves
27 | blowing nose
28 | blowing out candles
29 | bobsledding
30 | bookbinding
31 | bouncing on trampoline
32 | bowling
33 | braiding hair
34 | breading or breadcrumbing
35 | breakdancing
36 | brush painting
37 | brushing hair
38 | brushing teeth
39 | building cabinet
40 | building shed
41 | bungee jumping
42 | busking
43 | canoeing or kayaking
44 | capoeira
45 | carrying baby
46 | cartwheeling
47 | carving pumpkin
48 | catching fish
49 | catching or throwing baseball
50 | catching or throwing frisbee
51 | catching or throwing softball
52 | celebrating
53 | changing oil
54 | changing wheel
55 | checking tires
56 | cheerleading
57 | chopping wood
58 | clapping
59 | clay pottery making
60 | clean and jerk
61 | cleaning floor
62 | cleaning gutters
63 | cleaning pool
64 | cleaning shoes
65 | cleaning toilet
66 | cleaning windows
67 | climbing a rope
68 | climbing ladder
69 | climbing tree
70 | contact juggling
71 | cooking chicken
72 | cooking egg
73 | cooking on campfire
74 | cooking sausages
75 | counting money
76 | country line dancing
77 | cracking neck
78 | crawling baby
79 | crossing river
80 | crying
81 | curling hair
82 | cutting nails
83 | cutting pineapple
84 | cutting watermelon
85 | dancing ballet
86 | dancing charleston
87 | dancing gangnam style
88 | dancing macarena
89 | deadlifting
90 | decorating the christmas tree
91 | digging
92 | dining
93 | disc golfing
94 | diving cliff
95 | dodgeball
96 | doing aerobics
97 | doing laundry
98 | doing nails
99 | drawing
100 | dribbling basketball
101 | drinking
102 | drinking beer
103 | drinking shots
104 | driving car
105 | driving tractor
106 | drop kicking
107 | drumming fingers
108 | dunking basketball
109 | dying hair
110 | eating burger
111 | eating cake
112 | eating carrots
113 | eating chips
114 | eating doughnuts
115 | eating hotdog
116 | eating ice cream
117 | eating spaghetti
118 | eating watermelon
119 | egg hunting
120 | exercising arm
121 | exercising with an exercise ball
122 | extinguishing fire
123 | faceplanting
124 | feeding birds
125 | feeding fish
126 | feeding goats
127 | filling eyebrows
128 | finger snapping
129 | fixing hair
130 | flipping pancake
131 | flying kite
132 | folding clothes
133 | folding napkins
134 | folding paper
135 | front raises
136 | frying vegetables
137 | garbage collecting
138 | gargling
139 | getting a haircut
140 | getting a tattoo
141 | giving or receiving award
142 | golf chipping
143 | golf driving
144 | golf putting
145 | grinding meat
146 | grooming dog
147 | grooming horse
148 | gymnastics tumbling
149 | hammer throw
150 | headbanging
151 | headbutting
152 | high jump
153 | high kick
154 | hitting baseball
155 | hockey stop
156 | holding snake
157 | hopscotch
158 | hoverboarding
159 | hugging
160 | hula hooping
161 | hurdling
162 | hurling (sport)
163 | ice climbing
164 | ice fishing
165 | ice skating
166 | ironing
167 | javelin throw
168 | jetskiing
169 | jogging
170 | juggling balls
171 | juggling fire
172 | juggling soccer ball
173 | jumping into pool
174 | jumpstyle dancing
175 | kicking field goal
176 | kicking soccer ball
177 | kissing
178 | kitesurfing
179 | knitting
180 | krumping
181 | laughing
182 | laying bricks
183 | long jump
184 | lunge
185 | making a cake
186 | making a sandwich
187 | making bed
188 | making jewelry
189 | making pizza
190 | making snowman
191 | making sushi
192 | making tea
193 | marching
194 | massaging back
195 | massaging feet
196 | massaging legs
197 | massaging person's head
198 | milking cow
199 | mopping floor
200 | motorcycling
201 | moving furniture
202 | mowing lawn
203 | news anchoring
204 | opening bottle
205 | opening present
206 | paragliding
207 | parasailing
208 | parkour
209 | passing American football (in game)
210 | passing American football (not in game)
211 | peeling apples
212 | peeling potatoes
213 | petting animal (not cat)
214 | petting cat
215 | picking fruit
216 | planting trees
217 | plastering
218 | playing accordion
219 | playing badminton
220 | playing bagpipes
221 | playing basketball
222 | playing bass guitar
223 | playing cards
224 | playing cello
225 | playing chess
226 | playing clarinet
227 | playing controller
228 | playing cricket
229 | playing cymbals
230 | playing didgeridoo
231 | playing drums
232 | playing flute
233 | playing guitar
234 | playing harmonica
235 | playing harp
236 | playing ice hockey
237 | playing keyboard
238 | playing kickball
239 | playing monopoly
240 | playing organ
241 | playing paintball
242 | playing piano
243 | playing poker
244 | playing recorder
245 | playing saxophone
246 | playing squash or racquetball
247 | playing tennis
248 | playing trombone
249 | playing trumpet
250 | playing ukulele
251 | playing violin
252 | playing volleyball
253 | playing xylophone
254 | pole vault
255 | presenting weather forecast
256 | pull ups
257 | pumping fist
258 | pumping gas
259 | punching bag
260 | punching person (boxing)
261 | push up
262 | pushing car
263 | pushing cart
264 | pushing wheelchair
265 | reading book
266 | reading newspaper
267 | recording music
268 | riding a bike
269 | riding camel
270 | riding elephant
271 | riding mechanical bull
272 | riding mountain bike
273 | riding mule
274 | riding or walking with horse
275 | riding scooter
276 | riding unicycle
277 | ripping paper
278 | robot dancing
279 | rock climbing
280 | rock scissors paper
281 | roller skating
282 | running on treadmill
283 | sailing
284 | salsa dancing
285 | sanding floor
286 | scrambling eggs
287 | scuba diving
288 | setting table
289 | shaking hands
290 | shaking head
291 | sharpening knives
292 | sharpening pencil
293 | shaving head
294 | shaving legs
295 | shearing sheep
296 | shining shoes
297 | shooting basketball
298 | shooting goal (soccer)
299 | shot put
300 | shoveling snow
301 | shredding paper
302 | shuffling cards
303 | side kick
304 | sign language interpreting
305 | singing
306 | situp
307 | skateboarding
308 | ski jumping
309 | skiing (not slalom or crosscountry)
310 | skiing crosscountry
311 | skiing slalom
312 | skipping rope
313 | skydiving
314 | slacklining
315 | slapping
316 | sled dog racing
317 | smoking
318 | smoking hookah
319 | snatch weight lifting
320 | sneezing
321 | sniffing
322 | snorkeling
323 | snowboarding
324 | snowkiting
325 | snowmobiling
326 | somersaulting
327 | spinning poi
328 | spray painting
329 | spraying
330 | springboard diving
331 | squat
332 | sticking tongue out
333 | stomping grapes
334 | stretching arm
335 | stretching leg
336 | strumming guitar
337 | surfing crowd
338 | surfing water
339 | sweeping floor
340 | swimming backstroke
341 | swimming breast stroke
342 | swimming butterfly stroke
343 | swing dancing
344 | swinging legs
345 | swinging on something
346 | sword fighting
347 | tai chi
348 | taking a shower
349 | tango dancing
350 | tap dancing
351 | tapping guitar
352 | tapping pen
353 | tasting beer
354 | tasting food
355 | testifying
356 | texting
357 | throwing axe
358 | throwing ball
359 | throwing discus
360 | tickling
361 | tobogganing
362 | tossing coin
363 | tossing salad
364 | training dog
365 | trapezing
366 | trimming or shaving beard
367 | trimming trees
368 | triple jump
369 | tying bow tie
370 | tying knot (not on a tie)
371 | tying tie
372 | unboxing
373 | unloading truck
374 | using computer
375 | using remote controller (not gaming)
376 | using segway
377 | vault
378 | waiting in line
379 | walking the dog
380 | washing dishes
381 | washing feet
382 | washing hair
383 | washing hands
384 | water skiing
385 | water sliding
386 | watering plants
387 | waxing back
388 | waxing chest
389 | waxing eyebrows
390 | waxing legs
391 | weaving basket
392 | welding
393 | whistling
394 | windsurfing
395 | wrapping present
396 | wrestling
397 | writing
398 | yawning
399 | yoga
400 | zumba
401 |
--------------------------------------------------------------------------------
/resource/media/clean_and_jerk.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/clean_and_jerk.mp4
--------------------------------------------------------------------------------
/resource/media/skateboarding.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/skateboarding.mp4
--------------------------------------------------------------------------------
/resource/media/ta_chi.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/ta_chi.mp4
--------------------------------------------------------------------------------
/resource/reference_model.txt:
--------------------------------------------------------------------------------
1 | st_gcn.kinetics.pt
2 | st_gcn.ntu-xview.pt
3 | st_gcn.ntu-xsub.pt
--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils
--------------------------------------------------------------------------------
/tools/get_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | out_path="models/"
4 | link="https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmskeleton/models/st-gcn/"
5 | reference_model="resource/reference_model.txt"
6 |
7 | mkdir -p $out_path
8 | while IFS='' read -r line || [[ -n "$line" ]]; do
9 | wget -c $link$line -O $out_path$line
10 | done < "$reference_model"
11 |
12 |
13 | # Downloading models for pose estimation
14 | OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/"
15 | POSE_FOLDER="pose/"
16 |
17 | # Body (COCO)
18 | COCO_FOLDER=${POSE_FOLDER}"coco/"
19 | OUT_FOLDER="models/${COCO_FOLDER}"
20 | COCO_MODEL=${COCO_FOLDER}"pose_iter_440000.caffemodel"
21 | wget -c ${OPENPOSE_URL}${COCO_MODEL} -P ${OUT_FOLDER}
--------------------------------------------------------------------------------
/tools/kinetics_gendata.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import sys
4 | import pickle
5 | import argparse
6 |
7 | import numpy as np
8 | from numpy.lib.format import open_memmap
9 |
10 | sys.path.append(
11 | os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
12 | from feeder.feeder_kinetics import Feeder_kinetics
13 |
14 | toolbar_width = 30
15 |
16 | def print_toolbar(rate, annotation=''):
17 | # setup toolbar
18 | sys.stdout.write("{}[".format(annotation))
19 | for i in range(toolbar_width):
20 | if i * 1.0 / toolbar_width > rate:
21 | sys.stdout.write(' ')
22 | else:
23 | sys.stdout.write('-')
24 | sys.stdout.flush()
25 | sys.stdout.write(']\r')
26 |
27 |
28 | def end_toolbar():
29 | sys.stdout.write("\n")
30 |
31 |
32 | def gendata(
33 | data_path,
34 | label_path,
35 | data_out_path,
36 | label_out_path,
37 | num_person_in=5, #observe the first 5 persons
38 | num_person_out=2, #then choose 2 persons with the highest score
39 | max_frame=300):
40 |
41 | feeder = Feeder_kinetics(
42 | data_path=data_path,
43 | label_path=label_path,
44 | num_person_in=num_person_in,
45 | num_person_out=num_person_out,
46 | window_size=max_frame)
47 |
48 | sample_name = feeder.sample_name
49 | sample_label = []
50 |
51 | fp = open_memmap(
52 | data_out_path,
53 | dtype='float32',
54 | mode='w+',
55 | shape=(len(sample_name), 3, max_frame, 18, num_person_out))
56 |
57 | for i, s in enumerate(sample_name):
58 | data, label = feeder[i]
59 | print_toolbar(i * 1.0 / len(sample_name),
60 | '({:>5}/{:<5}) Processing data: '.format(
61 | i + 1, len(sample_name)))
62 | fp[i, :, 0:data.shape[1], :, :] = data
63 | sample_label.append(label)
64 |
65 | with open(label_out_path, 'wb') as f:
66 | pickle.dump((sample_name, list(sample_label)), f)
67 |
68 |
69 | if __name__ == '__main__':
70 | parser = argparse.ArgumentParser(
71 | description='Kinetics-skeleton Data Converter.')
72 | parser.add_argument(
73 | '--data_path', default='data/Kinetics/kinetics-skeleton')
74 | parser.add_argument(
75 | '--out_folder', default='data/Kinetics/kinetics-skeleton')
76 | arg = parser.parse_args()
77 |
78 | part = ['train', 'val']
79 | for p in part:
80 | data_path = '{}/kinetics_{}'.format(arg.data_path, p)
81 | label_path = '{}/kinetics_{}_label.json'.format(arg.data_path, p)
82 | data_out_path = '{}/{}_data.npy'.format(arg.out_folder, p)
83 | label_out_path = '{}/{}_label.pkl'.format(arg.out_folder, p)
84 |
85 | if not os.path.exists(arg.out_folder):
86 | os.makedirs(arg.out_folder)
87 | gendata(data_path, label_path, data_out_path, label_out_path)
--------------------------------------------------------------------------------
/tools/ntu_gendata.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import pickle
4 |
5 | import argparse
6 | import numpy as np
7 | from numpy.lib.format import open_memmap
8 |
9 | from utils.ntu_read_skeleton import read_xyz
10 |
11 | training_subjects = [
12 | 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38
13 | ]
14 | training_cameras = [2, 3]
15 | max_body = 2
16 | num_joint = 25
17 | max_frame = 300
18 | toolbar_width = 30
19 |
20 | def print_toolbar(rate, annotation=''):
21 | # setup toolbar
22 | sys.stdout.write("{}[".format(annotation))
23 | for i in range(toolbar_width):
24 | if i * 1.0 / toolbar_width > rate:
25 | sys.stdout.write(' ')
26 | else:
27 | sys.stdout.write('-')
28 | sys.stdout.flush()
29 | sys.stdout.write(']\r')
30 |
31 |
32 | def end_toolbar():
33 | sys.stdout.write("\n")
34 |
35 |
36 | def gendata(data_path,
37 | out_path,
38 | ignored_sample_path=None,
39 | benchmark='xview',
40 | part='eval'):
41 | if ignored_sample_path != None:
42 | with open(ignored_sample_path, 'r') as f:
43 | ignored_samples = [
44 | line.strip() + '.skeleton' for line in f.readlines()
45 | ]
46 | else:
47 | ignored_samples = []
48 | sample_name = []
49 | sample_label = []
50 | for filename in os.listdir(data_path):
51 | if filename in ignored_samples:
52 | continue
53 | action_class = int(
54 | filename[filename.find('A') + 1:filename.find('A') + 4])
55 | subject_id = int(
56 | filename[filename.find('P') + 1:filename.find('P') + 4])
57 | camera_id = int(
58 | filename[filename.find('C') + 1:filename.find('C') + 4])
59 |
60 | if benchmark == 'xview':
61 | istraining = (camera_id in training_cameras)
62 | elif benchmark == 'xsub':
63 | istraining = (subject_id in training_subjects)
64 | else:
65 | raise ValueError()
66 |
67 | if part == 'train':
68 | issample = istraining
69 | elif part == 'val':
70 | issample = not (istraining)
71 | else:
72 | raise ValueError()
73 |
74 | if issample:
75 | sample_name.append(filename)
76 | sample_label.append(action_class - 1)
77 |
78 | with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
79 | pickle.dump((sample_name, list(sample_label)), f)
80 | # np.save('{}/{}_label.npy'.format(out_path, part), sample_label)
81 |
82 | fp = open_memmap(
83 | '{}/{}_data.npy'.format(out_path, part),
84 | dtype='float32',
85 | mode='w+',
86 | shape=(len(sample_label), 3, max_frame, num_joint, max_body))
87 |
88 | for i, s in enumerate(sample_name):
89 | print_toolbar(i * 1.0 / len(sample_label),
90 | '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '.format(
91 | i + 1, len(sample_name), benchmark, part))
92 | data = read_xyz(
93 | os.path.join(data_path, s), max_body=max_body, num_joint=num_joint)
94 | fp[i, :, 0:data.shape[1], :, :] = data
95 | end_toolbar()
96 |
97 |
98 | if __name__ == '__main__':
99 |
100 | parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.')
101 | parser.add_argument(
102 | '--data_path', default='data/NTU-RGB-D/nturgb+d_skeletons')
103 | parser.add_argument(
104 | '--ignored_sample_path',
105 | default='resource/NTU-RGB-D/samples_with_missing_skeletons.txt')
106 | parser.add_argument('--out_folder', default='data/NTU-RGB-D')
107 |
108 | benchmark = ['xsub', 'xview']
109 | part = ['train', 'val']
110 | arg = parser.parse_args()
111 |
112 | for b in benchmark:
113 | for p in part:
114 | out_path = os.path.join(arg.out_folder, b)
115 | if not os.path.exists(out_path):
116 | os.makedirs(out_path)
117 | gendata(
118 | arg.data_path,
119 | out_path,
120 | arg.ignored_sample_path,
121 | benchmark=b,
122 | part=p)
123 |
--------------------------------------------------------------------------------
/tools/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import video
2 | from . import openpose
3 | from . import visualization
--------------------------------------------------------------------------------
/tools/utils/ntu_read_skeleton.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 |
4 |
5 | def read_skeleton(file):
6 | with open(file, 'r') as f:
7 | skeleton_sequence = {}
8 | skeleton_sequence['numFrame'] = int(f.readline())
9 | skeleton_sequence['frameInfo'] = []
10 | for t in range(skeleton_sequence['numFrame']):
11 | frame_info = {}
12 | frame_info['numBody'] = int(f.readline())
13 | frame_info['bodyInfo'] = []
14 | for m in range(frame_info['numBody']):
15 | body_info = {}
16 | body_info_key = [
17 | 'bodyID', 'clipedEdges', 'handLeftConfidence',
18 | 'handLeftState', 'handRightConfidence', 'handRightState',
19 | 'isResticted', 'leanX', 'leanY', 'trackingState'
20 | ]
21 | body_info = {
22 | k: float(v)
23 | for k, v in zip(body_info_key, f.readline().split())
24 | }
25 | body_info['numJoint'] = int(f.readline())
26 | body_info['jointInfo'] = []
27 | for v in range(body_info['numJoint']):
28 | joint_info_key = [
29 | 'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY',
30 | 'orientationW', 'orientationX', 'orientationY',
31 | 'orientationZ', 'trackingState'
32 | ]
33 | joint_info = {
34 | k: float(v)
35 | for k, v in zip(joint_info_key, f.readline().split())
36 | }
37 | body_info['jointInfo'].append(joint_info)
38 | frame_info['bodyInfo'].append(body_info)
39 | skeleton_sequence['frameInfo'].append(frame_info)
40 | return skeleton_sequence
41 |
42 |
43 | def read_xyz(file, max_body=2, num_joint=25):
44 | seq_info = read_skeleton(file)
45 | data = np.zeros((3, seq_info['numFrame'], num_joint, max_body))
46 | for n, f in enumerate(seq_info['frameInfo']):
47 | for m, b in enumerate(f['bodyInfo']):
48 | for j, v in enumerate(b['jointInfo']):
49 | if m < max_body and j < num_joint:
50 | data[:, n, j, m] = [v['x'], v['y'], v['z']]
51 | else:
52 | pass
53 | return data
--------------------------------------------------------------------------------
/tools/utils/openpose.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | import json
3 |
4 | def json_pack(snippets_dir, video_name, frame_width, frame_height, label='unknown', label_index=-1):
5 | sequence_info = []
6 | p = Path(snippets_dir)
7 | for path in p.glob(video_name+'*.json'):
8 | json_path = str(path)
9 | print(path)
10 | frame_id = int(path.stem.split('_')[-2])
11 | frame_data = {'frame_index': frame_id}
12 | data = json.load(open(json_path))
13 | skeletons = []
14 | for person in data['people']:
15 | score, coordinates = [], []
16 | skeleton = {}
17 | keypoints = person['pose_keypoints_2d']
18 | for i in range(0, len(keypoints), 3):
19 | coordinates += [keypoints[i]/frame_width, keypoints[i + 1]/frame_height]
20 | score += [keypoints[i + 2]]
21 | skeleton['pose'] = coordinates
22 | skeleton['score'] = score
23 | skeletons += [skeleton]
24 | frame_data['skeleton'] = skeletons
25 | sequence_info += [frame_data]
26 |
27 | video_info = dict()
28 | video_info['data'] = sequence_info
29 | video_info['label'] = label
30 | video_info['label_index'] = label_index
31 |
32 | return video_info
--------------------------------------------------------------------------------
/tools/utils/video.py:
--------------------------------------------------------------------------------
1 | import skvideo.io
2 | import numpy as np
3 | import cv2
4 |
5 | def video_info_parsing(video_info, num_person_in=5, num_person_out=2):
6 | data_numpy = np.zeros((3, len(video_info['data']), 18, num_person_in))
7 | for frame_info in video_info['data']:
8 | frame_index = frame_info['frame_index']
9 | for m, skeleton_info in enumerate(frame_info["skeleton"]):
10 | if m >= num_person_in:
11 | break
12 | pose = skeleton_info['pose']
13 | score = skeleton_info['score']
14 | data_numpy[0, frame_index, :, m] = pose[0::2]
15 | data_numpy[1, frame_index, :, m] = pose[1::2]
16 | data_numpy[2, frame_index, :, m] = score
17 |
18 | # centralization
19 | data_numpy[0:2] = data_numpy[0:2] - 0.5
20 | data_numpy[0][data_numpy[2] == 0] = 0
21 | data_numpy[1][data_numpy[2] == 0] = 0
22 |
23 | sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1)
24 | for t, s in enumerate(sort_index):
25 | data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2,
26 | 0))
27 | data_numpy = data_numpy[:, :, :, :num_person_out]
28 |
29 | label = video_info['label_index']
30 | return data_numpy, label
31 |
32 | def get_video_frames(video_path):
33 | vread = skvideo.io.vread(video_path)
34 | video = []
35 | for frame in vread:
36 | video.append(frame)
37 | return video
38 |
39 | def video_play(video_path, fps=30):
40 | cap = cv2.VideoCapture(video_path)
41 |
42 | while(cap.isOpened()):
43 | ret, frame = cap.read()
44 |
45 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
46 |
47 | cv2.imshow('frame',gray)
48 | if cv2.waitKey(1000/fps) & 0xFF == ord('q'):
49 | break
50 |
51 | cap.release()
52 | cv2.destroyAllWindows()
--------------------------------------------------------------------------------
/tools/utils/visualization.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import numpy as np
3 |
4 |
5 | def stgcn_visualize(pose,
6 | edge,
7 | feature,
8 | video,
9 | label=None,
10 | label_sequence=None,
11 | height=1080,
12 | fps=None):
13 |
14 | _, T, V, M = pose.shape
15 | T = len(video)
16 | pos_track = [None] * M
17 | for t in range(T):
18 | frame = video[t]
19 |
20 | # image resize
21 | H, W, c = frame.shape
22 | frame = cv2.resize(frame, (height * W // H // 2, height//2))
23 | H, W, c = frame.shape
24 | scale_factor = 2 * height / 1080
25 |
26 | # draw skeleton
27 | skeleton = frame * 0
28 | text = frame * 0
29 | for m in range(M):
30 |
31 | score = pose[2, t, :, m].max()
32 | if score < 0.3:
33 | continue
34 |
35 | for i, j in edge:
36 | xi = pose[0, t, i, m]
37 | yi = pose[1, t, i, m]
38 | xj = pose[0, t, j, m]
39 | yj = pose[1, t, j, m]
40 | if xi + yi == 0 or xj + yj == 0:
41 | continue
42 | else:
43 | xi = int((xi + 0.5) * W)
44 | yi = int((yi + 0.5) * H)
45 | xj = int((xj + 0.5) * W)
46 | yj = int((yj + 0.5) * H)
47 | cv2.line(skeleton, (xi, yi), (xj, yj), (255, 255, 255),
48 | int(np.ceil(2 * scale_factor)))
49 |
50 | if label_sequence is not None:
51 | body_label = label_sequence[t // 4][m]
52 | else:
53 | body_label = ''
54 | x_nose = int((pose[0, t, 0, m] + 0.5) * W)
55 | y_nose = int((pose[1, t, 0, m] + 0.5) * H)
56 | x_neck = int((pose[0, t, 1, m] + 0.5) * W)
57 | y_neck = int((pose[1, t, 1, m] + 0.5) * H)
58 |
59 | half_head = int(((x_neck - x_nose)**2 + (y_neck - y_nose)**2)**0.5)
60 | pos = (x_nose + half_head, y_nose - half_head)
61 | if pos_track[m] is None:
62 | pos_track[m] = pos
63 | else:
64 | new_x = int(pos_track[m][0] + (pos[0] - pos_track[m][0]) * 0.2)
65 | new_y = int(pos_track[m][1] + (pos[1] - pos_track[m][1]) * 0.2)
66 | pos_track[m] = (new_x, new_y)
67 | cv2.putText(text, body_label, pos_track[m],
68 | cv2.FONT_HERSHEY_TRIPLEX, 0.5 * scale_factor,
69 | (255, 255, 255))
70 |
71 | # generate mask
72 | mask = frame * 0
73 | feature = np.abs(feature)
74 | feature = feature / feature.mean()
75 | for m in range(M):
76 | score = pose[2, t, :, m].max()
77 | if score < 0.3:
78 | continue
79 |
80 | f = feature[t // 4, :, m]**5
81 | if f.mean() != 0:
82 | f = f / f.mean()
83 | for v in range(V):
84 | x = pose[0, t, v, m]
85 | y = pose[1, t, v, m]
86 | if x + y == 0:
87 | continue
88 | else:
89 | x = int((x + 0.5) * W)
90 | y = int((y + 0.5) * H)
91 | cv2.circle(mask, (x, y), 0, (255, 255, 255),
92 | int(np.ceil(f[v]**0.5 * 8 * scale_factor)))
93 | blurred_mask = cv2.blur(mask, (12, 12))
94 |
95 | skeleton_result = blurred_mask.astype(float) * 0.75
96 | skeleton_result += skeleton.astype(float) * 0.25
97 | skeleton_result += text.astype(float)
98 | skeleton_result[skeleton_result > 255] = 255
99 | skeleton_result.astype(np.uint8)
100 |
101 | rgb_result = blurred_mask.astype(float) * 0.75
102 | rgb_result += frame.astype(float) * 0.5
103 | rgb_result += skeleton.astype(float) * 0.25
104 | rgb_result[rgb_result > 255] = 255
105 | rgb_result.astype(np.uint8)
106 |
107 | put_text(skeleton, 'inputs of st-gcn', (0.15, 0.5))
108 |
109 | text_1 = cv2.imread(
110 | './resource/demo_asset/original_video.png', cv2.IMREAD_UNCHANGED)
111 | text_2 = cv2.imread(
112 | './resource/demo_asset/pose_estimation.png', cv2.IMREAD_UNCHANGED)
113 | text_3 = cv2.imread(
114 | './resource/demo_asset/attention+prediction.png', cv2.IMREAD_UNCHANGED)
115 | text_4 = cv2.imread(
116 | './resource/demo_asset/attention+rgb.png', cv2.IMREAD_UNCHANGED)
117 |
118 | try:
119 | blend(frame, text_1)
120 | blend(skeleton, text_2)
121 | blend(skeleton_result, text_3)
122 | blend(rgb_result, text_4)
123 | except:
124 | pass
125 |
126 | if label is not None:
127 | label_name = 'voting result: ' + label
128 | put_text(skeleton_result, label_name, (0.1, 0.5))
129 |
130 | if fps is not None:
131 | put_text(skeleton, 'fps:{:.2f}'.format(fps), (0.9, 0.5))
132 |
133 | img0 = np.concatenate((frame, skeleton), axis=1)
134 | img1 = np.concatenate((skeleton_result, rgb_result), axis=1)
135 | img = np.concatenate((img0, img1), axis=0)
136 |
137 | yield img
138 |
139 |
140 | def put_text(img, text, position, scale_factor=1):
141 | t_w, t_h = cv2.getTextSize(
142 | text, cv2.FONT_HERSHEY_TRIPLEX, scale_factor, thickness=1)[0]
143 | H, W, _ = img.shape
144 | position = (int(W * position[1] - t_w * 0.5),
145 | int(H * position[0] - t_h * 0.5))
146 | params = (position, cv2.FONT_HERSHEY_TRIPLEX, scale_factor,
147 | (255, 255, 255))
148 | cv2.putText(img, text, *params)
149 |
150 |
151 | def blend(background, foreground, dx=20, dy=10, fy=0.7):
152 |
153 | foreground = cv2.resize(foreground, (0, 0), fx=fy, fy=fy)
154 | h, w = foreground.shape[:2]
155 | b, g, r, a = cv2.split(foreground)
156 | mask = np.dstack((a, a, a))
157 | rgb = np.dstack((b, g, r))
158 |
159 | canvas = background[-h-dy:-dy, dx:w+dx]
160 | imask = mask > 0
161 | canvas[imask] = rgb[imask]
162 |
--------------------------------------------------------------------------------
/torchlight/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | setup(
4 | name='torchlight',
5 | version='1.0',
6 | description='A mini framework for pytorch',
7 | packages=find_packages(),
8 | install_requires=[])
9 |
--------------------------------------------------------------------------------
/torchlight/torchlight/__init__.py:
--------------------------------------------------------------------------------
1 | from .io import IO
2 | from .io import str2bool
3 | from .io import str2dict
4 | from .io import DictAction
5 | from .io import import_class
6 | from .gpu import visible_gpu
7 | from .gpu import occupy_gpu
8 | from .gpu import ngpu
9 |
--------------------------------------------------------------------------------
/torchlight/torchlight/gpu.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 |
4 |
5 | def visible_gpu(gpus):
6 | """
7 | set visible gpu.
8 |
9 | can be a single id, or a list
10 |
11 | return a list of new gpus ids
12 | """
13 | gpus = [gpus] if isinstance(gpus, int) else list(gpus)
14 | os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, gpus)))
15 | return list(range(len(gpus)))
16 |
17 |
18 | def ngpu(gpus):
19 | """
20 | count how many gpus used.
21 | """
22 | gpus = [gpus] if isinstance(gpus, int) else list(gpus)
23 | return len(gpus)
24 |
25 |
26 | def occupy_gpu(gpus=None):
27 | """
28 | make program appear on nvidia-smi.
29 | """
30 | if gpus is None:
31 | torch.zeros(1).cuda()
32 | else:
33 | gpus = [gpus] if isinstance(gpus, int) else list(gpus)
34 | for g in gpus:
35 | torch.zeros(1).cuda(g)
36 |
--------------------------------------------------------------------------------
/torchlight/torchlight/io.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import argparse
3 | import os
4 | import sys
5 | import traceback
6 | import time
7 | import warnings
8 | import pickle
9 | from collections import OrderedDict
10 | import yaml
11 | import numpy as np
12 | # torch
13 | import torch
14 | import torch.nn as nn
15 | import torch.optim as optim
16 | from torch.autograd import Variable
17 |
18 | with warnings.catch_warnings():
19 | warnings.filterwarnings("ignore",category=FutureWarning)
20 | import h5py
21 |
22 | class IO():
23 | def __init__(self, work_dir, save_log=True, print_log=True):
24 | self.work_dir = work_dir
25 | self.save_log = save_log
26 | self.print_to_screen = print_log
27 | self.cur_time = time.time()
28 | self.split_timer = {}
29 | self.pavi_logger = None
30 | self.session_file = None
31 | self.model_text = ''
32 |
33 | # PaviLogger is removed in this version
34 | def log(self, *args, **kwargs):
35 | pass
36 | # try:
37 | # if self.pavi_logger is None:
38 | # from torchpack.runner.hooks import PaviLogger
39 | # url = 'http://pavi.parrotsdnn.org/log'
40 | # with open(self.session_file, 'r') as f:
41 | # info = dict(
42 | # session_file=self.session_file,
43 | # session_text=f.read(),
44 | # model_text=self.model_text)
45 | # self.pavi_logger = PaviLogger(url)
46 | # self.pavi_logger.connect(self.work_dir, info=info)
47 | # self.pavi_logger.log(*args, **kwargs)
48 | # except: #pylint: disable=W0702
49 | # pass
50 |
51 | def load_model(self, model, **model_args):
52 | Model = import_class(model)
53 | model = Model(**model_args)
54 | self.model_text += '\n\n' + str(model)
55 | return model
56 |
57 | def load_weights(self, model, weights_path, ignore_weights=None):
58 | if ignore_weights is None:
59 | ignore_weights = []
60 | if isinstance(ignore_weights, str):
61 | ignore_weights = [ignore_weights]
62 |
63 | self.print_log('Load weights from {}.'.format(weights_path))
64 | weights = torch.load(weights_path)
65 | weights = OrderedDict([[k.split('module.')[-1],
66 | v.cpu()] for k, v in weights.items()])
67 |
68 | # filter weights
69 | for i in ignore_weights:
70 | ignore_name = list()
71 | for w in weights:
72 | if w.find(i) == 0:
73 | ignore_name.append(w)
74 | for n in ignore_name:
75 | weights.pop(n)
76 | self.print_log('Filter [{}] remove weights [{}].'.format(i,n))
77 |
78 | for w in weights:
79 | self.print_log('Load weights [{}].'.format(w))
80 |
81 | try:
82 | model.load_state_dict(weights)
83 | except (KeyError, RuntimeError):
84 | state = model.state_dict()
85 | diff = list(set(state.keys()).difference(set(weights.keys())))
86 | for d in diff:
87 | self.print_log('Can not find weights [{}].'.format(d))
88 | state.update(weights)
89 | model.load_state_dict(state)
90 | return model
91 |
92 | def save_pkl(self, result, filename):
93 | with open('{}/{}'.format(self.work_dir, filename), 'wb') as f:
94 | pickle.dump(result, f)
95 |
96 | def save_h5(self, result, filename):
97 | with h5py.File('{}/{}'.format(self.work_dir, filename), 'w') as f:
98 | for k in result.keys():
99 | f[k] = result[k]
100 |
101 | def save_model(self, model, name):
102 | model_path = '{}/{}'.format(self.work_dir, name)
103 | state_dict = model.state_dict()
104 | weights = OrderedDict([[''.join(k.split('module.')),
105 | v.cpu()] for k, v in state_dict.items()])
106 | torch.save(weights, model_path)
107 | self.print_log('The model has been saved as {}.'.format(model_path))
108 |
109 | def save_arg(self, arg):
110 |
111 | self.session_file = '{}/config.yaml'.format(self.work_dir)
112 |
113 | # save arg
114 | arg_dict = vars(arg)
115 | if not os.path.exists(self.work_dir):
116 | os.makedirs(self.work_dir)
117 | with open(self.session_file, 'w') as f:
118 | f.write('# command line: {}\n\n'.format(' '.join(sys.argv)))
119 | yaml.dump(arg_dict, f, default_flow_style=False, indent=4)
120 |
121 | def print_log(self, str, print_time=True):
122 | if print_time:
123 | # localtime = time.asctime(time.localtime(time.time()))
124 | str = time.strftime("[%m.%d.%y|%X] ", time.localtime()) + str
125 |
126 | if self.print_to_screen:
127 | print(str)
128 | if self.save_log:
129 | with open('{}/log.txt'.format(self.work_dir), 'a') as f:
130 | print(str, file=f)
131 |
132 | def init_timer(self, *name):
133 | self.record_time()
134 | self.split_timer = {k: 0.0000001 for k in name}
135 |
136 | def check_time(self, name):
137 | self.split_timer[name] += self.split_time()
138 |
139 | def record_time(self):
140 | self.cur_time = time.time()
141 | return self.cur_time
142 |
143 | def split_time(self):
144 | split_time = time.time() - self.cur_time
145 | self.record_time()
146 | return split_time
147 |
148 | def print_timer(self):
149 | proportion = {
150 | k: '{:02d}%'.format(int(round(v * 100 / sum(self.split_timer.values()))))
151 | for k, v in self.split_timer.items()
152 | }
153 | self.print_log('Time consumption:')
154 | for k in proportion:
155 | self.print_log(
156 | '\t[{}][{}]: {:.4f}'.format(k, proportion[k],self.split_timer[k])
157 | )
158 |
159 |
160 | def str2bool(v):
161 | if v.lower() in ('yes', 'true', 't', 'y', '1'):
162 | return True
163 | elif v.lower() in ('no', 'false', 'f', 'n', '0'):
164 | return False
165 | else:
166 | raise argparse.ArgumentTypeError('Boolean value expected.')
167 |
168 |
169 | def str2dict(v):
170 | return eval('dict({})'.format(v)) #pylint: disable=W0123
171 |
172 |
173 | def _import_class_0(name):
174 | components = name.split('.')
175 | mod = __import__(components[0])
176 | for comp in components[1:]:
177 | mod = getattr(mod, comp)
178 | return mod
179 |
180 |
181 | def import_class(import_str):
182 | mod_str, _sep, class_str = import_str.rpartition('.')
183 | __import__(mod_str)
184 | try:
185 | return getattr(sys.modules[mod_str], class_str)
186 | except AttributeError:
187 | raise ImportError('Class %s cannot be found (%s)' %
188 | (class_str,
189 | traceback.format_exception(*sys.exc_info())))
190 |
191 |
192 | class DictAction(argparse.Action):
193 | def __init__(self, option_strings, dest, nargs=None, **kwargs):
194 | if nargs is not None:
195 | raise ValueError("nargs not allowed")
196 | super(DictAction, self).__init__(option_strings, dest, **kwargs)
197 |
198 | def __call__(self, parser, namespace, values, option_string=None):
199 | input_dict = eval('dict({})'.format(values)) #pylint: disable=W0123
200 | output_dict = getattr(namespace, self.dest)
201 | for k in input_dict:
202 | output_dict[k] = input_dict[k]
203 | setattr(namespace, self.dest, output_dict)
204 |
--------------------------------------------------------------------------------