├── .gitignore
├── ISSUE_TEMPLATE.md
├── LICENSE
├── OLD_README.md
├── README.md
├── config
    ├── st_gcn.twostream
    │   ├── ntu-xsub
    │   │   └── train.yaml
    │   └── ntu-xview
    │   │   └── train.yaml
    └── st_gcn
    │   ├── kinetics-skeleton
    │       ├── demo_offline.yaml
    │       ├── demo_old.yaml
    │       ├── demo_realtime.yaml
    │       ├── test.yaml
    │       └── train.yaml
    │   ├── ntu-xsub
    │       ├── test.yaml
    │       └── train.yaml
    │   └── ntu-xview
    │       ├── test.yaml
    │       └── train.yaml
├── feeder
    ├── __init__.py
    ├── feeder.py
    ├── feeder_kinetics.py
    └── tools.py
├── main.py
├── models
    └── pose
    │   └── coco
    │       └── pose_deploy_linevec.prototxt
├── net
    ├── __init__.py
    ├── st_gcn.py
    ├── st_gcn_twostream.py
    └── utils
    │   ├── __init__.py
    │   ├── graph.py
    │   └── tgcn.py
├── processor
    ├── __init__.py
    ├── demo_offline.py
    ├── demo_old.py
    ├── demo_realtime.py
    ├── io.py
    ├── processor.py
    └── recognition.py
├── requirements.txt
├── resource
    ├── NTU-RGB-D
    │   └── samples_with_missing_skeletons.txt
    ├── demo_asset
    │   ├── attention+prediction.png
    │   ├── attention+rgb.png
    │   ├── original_video.png
    │   └── pose_estimation.png
    ├── info
    │   ├── S001C001P001R001A044_w.gif
    │   ├── S001C001P001R001A051_w.gif
    │   ├── S002C001P010R001A017_w.gif
    │   ├── S003C001P008R001A002_w.gif
    │   ├── S003C001P008R001A008_w.gif
    │   ├── clean_and_jerk_w.gif
    │   ├── demo_video.gif
    │   ├── hammer_throw_w.gif
    │   ├── juggling_balls_w.gif
    │   ├── pipeline.png
    │   ├── pull_ups_w.gif
    │   └── tai_chi_w.gif
    ├── kinetics-motion.txt
    ├── kinetics_skeleton
    │   └── label_name.txt
    ├── media
    │   ├── clean_and_jerk.mp4
    │   ├── skateboarding.mp4
    │   └── ta_chi.mp4
    └── reference_model.txt
├── tools
    ├── __init__.py
    ├── get_models.sh
    ├── kinetics_gendata.py
    ├── ntu_gendata.py
    └── utils
    │   ├── __init__.py
    │   ├── ntu_read_skeleton.py
    │   ├── openpose.py
    │   ├── video.py
    │   └── visualization.py
└── torchlight
    ├── setup.py
    └── torchlight
        ├── __init__.py
        ├── gpu.py
        └── io.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | #custom
  2 | tmp*
  3 | work_dir/*
  4 | data
  5 | config_v0/*
  6 | backup/*
  7 | .vscode
  8 | model/*
  9 | *.pt
 10 | *.caffemodel
 11 | cache/
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | wheels/
 35 | *.egg-info/
 36 | .installed.cfg
 37 | *.egg
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .coverage
 53 | .coverage.*
 54 | .cache
 55 | nosetests.xml
 56 | coverage.xml
 57 | *.cover
 58 | .hypothesis/
 59 | 
 60 | # Translations
 61 | *.mo
 62 | *.pot
 63 | 
 64 | # Django stuff:
 65 | *.log
 66 | local_settings.py
 67 | 
 68 | # Flask stuff:
 69 | instance/
 70 | .webassets-cache
 71 | 
 72 | # Scrapy stuff:
 73 | .scrapy
 74 | 
 75 | # Sphinx documentation
 76 | docs/_build/
 77 | 
 78 | # PyBuilder
 79 | target/
 80 | 
 81 | # Jupyter Notebook
 82 | .ipynb_checkpoints
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # celery beat schedule file
 88 | celerybeat-schedule
 89 | 
 90 | # SageMath parsed files
 91 | *.sage.py
 92 | 
 93 | # Environments
 94 | .env
 95 | .venv
 96 | env/
 97 | venv/
 98 | ENV/
 99 | 
100 | # Spyder project settings
101 | .spyderproject
102 | .spyproject
103 | 
104 | # Rope project settings
105 | .ropeproject
106 | 
107 | # mkdocs documentation
108 | /site
109 | 
110 | # mypy
111 | .mypy_cache/
112 | 


--------------------------------------------------------------------------------
/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ### Code version (Git Hash) and PyTorch version
 2 | 
 3 | ### Dataset used
 4 | 
 5 | ### Expected behavior
 6 | 
 7 | ### Actual behavior
 8 | 
 9 | ### Steps to reproduce the behavior
10 | 
11 | ### Other comments
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Multimedia Laboratary, The Chinese University of Hong Kong
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 | 


--------------------------------------------------------------------------------
/OLD_README.md:
--------------------------------------------------------------------------------
  1 | # ST-GCN
  2 | 
  3 | ## Introduction
  4 | This repository holds the codebase, dataset and models for the paper:
  5 | 
  6 | **Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition** Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018. [[Arxiv Preprint]](https://arxiv.org/abs/1801.07455)
  7 | 
  8 | <div align="center">
  9 |     <img src="resource/info/pipeline.png">
 10 | </div>
 11 | 
 12 | ## Visulization of ST-GCN in Action
 13 | Our demo for skeleton-based action recognition:
 14 | <p align="center">
 15 |     <img src="resource/info/demo_video.gif", width="1200">
 16 | </p>
 17 | 
 18 | 
 19 | ST-GCN is able to exploit local pattern and correlation from human skeletons.
 20 | Below figures show the neural response magnitude of each node in the last layer of our ST-GCN. 
 21 | 
 22 | 
 23 | <table style="width:100%; table-layout:fixed;">
 24 |   <tr>
 25 |     <td><img width="150px" src="resource/info/S001C001P001R001A044_w.gif"></td>
 26 |     <td><img width="150px" src="resource/info/S003C001P008R001A008_w.gif"></td>
 27 |     <td><img width="150px" src="resource/info/S002C001P010R001A017_w.gif"></td>
 28 |     <td><img width="150px" src="resource/info/S003C001P008R001A002_w.gif"></td>
 29 |     <td><img width="150px" src="resource/info/S001C001P001R001A051_w.gif"></td>
 30 |   </tr>
 31 |   <tr>
 32 |     <td><font size="1">Touch head<font></td>
 33 |     <td><font size="1">Sitting down<font></td>
 34 |     <td><font size="1">Take off a shoe<font></td>
 35 |     <td><font size="1">Eat meal/snack<font></td>
 36 |     <td><font size="1">Kick other person<font></td>
 37 |   </tr>
 38 |   <tr>
 39 |     <td><img width="150px" src="resource/info/hammer_throw_w.gif"></td>
 40 |     <td><img width="150px" src="resource/info/clean_and_jerk_w.gif"></td>
 41 |     <td><img width="150px" src="resource/info/pull_ups_w.gif"></td>
 42 |     <td><img width="150px" src="resource/info/tai_chi_w.gif"></td>
 43 |     <td><img width="150px" src="resource/info/juggling_balls_w.gif"></td>
 44 |   </tr>
 45 |   <tr>
 46 |     <td><font size="1">Hammer throw<font></td>
 47 |     <td><font size="1">Clean and jerk<font></td>
 48 |     <td><font size="1">Pull ups<font></td>
 49 |     <td><font size="1">Tai chi<font></td>
 50 |     <td><font size="1">Juggling ball<font></td>
 51 |   </tr>
 52 | </table>
 53 | 
 54 | The first row of above results is from **NTU-RGB+D** dataset, and the second row is from **Kinetics-skeleton**. 
 55 | 
 56 | 
 57 | ## Prerequisites
 58 | - Python3 (>3.5)
 59 | - [PyTorch](http://pytorch.org/)
 60 | - [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) **with** [Python API](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md#python-api). (Optional: for demo only)
 61 | - Other Python libraries can be installed by `pip install -r requirements.txt`
 62 | <!-- - FFmpeg (Optional: for demo only), which can be installed by `sudo apt-get install ffmpeg` -->
 63 | 
 64 | 
 65 | ### Installation
 66 | ``` shell
 67 | git clone https://github.com/yysijie/st-gcn.git; cd st-gcn
 68 | cd torchlight; python setup.py install; cd ..
 69 | ```
 70 | 
 71 | ### Get pretrained models
 72 | We provided the pretrained model weithts of our **ST-GCN**. The model weights can be downloaded by running the script
 73 | ```
 74 | bash tools/get_models.sh
 75 | ```
 76 | <!-- The downloaded models will be stored under ```./models```. -->
 77 | You can also obtain models from [GoogleDrive](https://drive.google.com/drive/folders/1IYKoSrjeI3yYJ9bO0_z_eDo92i7ob_aF) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fst-gcn%2Fmodels&parentPath=%2FShare), and manually put them into ```./models```.
 78 | 
 79 | ## Demo
 80 | 
 81 | <!-- To visualize how ST-GCN exploit local correlation and local pattern, we compute the feature vector magnitude of each node in the final spatial temporal graph, and overlay them on the original video. **Openpose** should be ready for extracting human skeletons from videos. The skeleton based action recognition results is also shwon thereon. -->
 82 | 
 83 | You can use the following commands to run the demo.
 84 | 
 85 | ```shell
 86 | # with offline pose estimation
 87 | python main.py demo_offline [--video ${PATH_TO_VIDEO}] [--openpose ${PATH_TO_OPENPOSE}]
 88 | 
 89 | # with realtime pose estimation
 90 | python main.py demo [--video ${PATH_TO_VIDEO}] [--openpose ${PATH_TO_OPENPOSE}]
 91 | ```
 92 | 
 93 | Optional arguments:
 94 | 
 95 | - `PATH_TO_OPENPOSE`: It is required if the Openpose Python API is not in `PYTHONPATH`.
 96 | - `PATH_TO_VIDEO`: Filename of the input video.
 97 | 
 98 | <!-- The realtime demo also support to load video streams from camera source by
 99 | ```
100 | python main.py demo --video camera
101 | ``` -->
102 | 
103 | <!-- Openpose Python API is required in the above demos. -->
104 | 
105 | ## Data Preparation
106 | 
107 | We experimented on two skeleton-based action recognition datasts: **Kinetics-skeleton** and **NTU RGB+D**.
108 | Before training and testing, for convenience of fast data loading,
109 | the datasets should be converted to proper file structure. 
110 | You can download the pre-processed data from 
111 | [GoogleDrive](https://drive.google.com/open?id=103NOL9YYZSW1hLoWmYnv5Fs8mK-Ij7qb)
112 | and extract files with
113 | ``` 
114 | cd st-gcn
115 | unzip <path to st-gcn-processed-data.zip>
116 | ```
117 | Otherwise, for processing raw data by yourself,
118 | please refer to below guidances.
119 | 
120 | #### Kinetics-skeleton
121 | [Kinetics](https://deepmind.com/research/open-source/open-source-datasets/kinetics/) is a video-based dataset for action recognition which only provide raw video clips without skeleton data. Kinetics dataset include To obatin the joint locations, we first resized all videos to the resolution of 340x256 and converted the frame rate to 30 fps.  Then, we extracted skeletons from each frame in Kinetics by [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). The extracted skeleton data we called **Kinetics-skeleton**(7.5GB) can be directly downloaded from [GoogleDrive](https://drive.google.com/open?id=1SPQ6FmFsjGg3f59uCWfdUWI-5HJM_YhZ) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fkinetics-skeleton&parentPath=%2FShare).
122 | 
123 | After uncompressing, rebuild the database by this command:
124 | ```
125 | python tools/kinetics_gendata.py --data_path <path to kinetics-skeleton>
126 | ```
127 | 
128 | #### NTU RGB+D
129 | NTU RGB+D can be downloaded from [their website](http://rose1.ntu.edu.sg/datasets/actionrecognition.asp).
130 | Only the **3D skeletons**(5.8GB) modality is required in our experiments. After that, this command should be used to build the database for training or evaluation:
131 | ```
132 | python tools/ntu_gendata.py --data_path <path to nturgbd+d_skeletons>
133 | ```
134 | where the ```<path to nturgbd+d_skeletons>``` points to the 3D skeletons modality of NTU RGB+D dataset you download.
135 | 
136 | 
137 | ## Testing Pretrained Models
138 | 
139 | <!-- ### Evaluation
140 | Once datasets ready, we can start the evaluation. -->
141 | 
142 | To evaluate ST-GCN model pretrained on **Kinetcis-skeleton**, run
143 | ```
144 | python main.py recognition -c config/st_gcn/kinetics-skeleton/test.yaml
145 | ```
146 | For **cross-view** evaluation in **NTU RGB+D**, run
147 | ```
148 | python main.py recognition -c config/st_gcn/ntu-xview/test.yaml
149 | ```
150 | For **cross-subject** evaluation in **NTU RGB+D**, run
151 | ```
152 | python main.py recognition -c config/st_gcn/ntu-xsub/test.yaml
153 | ``` 
154 | 
155 | <!-- Similary, the configuration file for testing baseline models can be found under the ```./config/baseline```. -->
156 | 
157 | To speed up evaluation by multi-gpu inference or modify batch size for reducing the memory cost, set ```--test_batch_size``` and ```--device``` like:
158 | ```
159 | python main.py recognition -c <config file> --test_batch_size <batch size> --device <gpu0> <gpu1> ...
160 | ```
161 | 
162 | ### Results
163 | The expected **Top-1** **accuracy** of provided models are shown here:
164 | 
165 | | Model| Kinetics-<br>skeleton (%)|NTU RGB+D <br> Cross View (%) |NTU RGB+D <br> Cross Subject (%) |
166 | | :------| :------: | :------: | :------: |
167 | |Baseline[1]| 20.3    | 83.1     |  74.3    |
168 | |**ST-GCN** (Ours)| **31.6**| **88.8** | **81.6** | 
169 | 
170 | [1] Kim, T. S., and Reiter, A. 2017. Interpretable 3d human action analysis with temporal convolutional networks. In BNMW CVPRW. 
171 | 
172 | ## Training
173 | To train a new ST-GCN model, run
174 | 
175 | ```
176 | python main.py recognition -c config/st_gcn/<dataset>/train.yaml [--work_dir <work folder>]
177 | ```
178 | where the ```<dataset>``` must be ```ntu-xsub```, ```ntu-xview``` or ```kinetics-skeleton```, depending on the dataset you want to use.
179 | The training results, including **model weights**, configurations and logging files, will be saved under the ```./work_dir``` by default or ```<work folder>``` if you appoint it.
180 | 
181 | You can modify the training parameters such as ```work_dir```, ```batch_size```, ```step```, ```base_lr``` and ```device``` in the command line or configuration files. The order of priority is:  command line > config file > default parameter. For more information, use ```main.py -h```.
182 | 
183 | Finally, custom model evaluation can be achieved by this command as we mentioned above:
184 | ```
185 | python main.py recognition -c config/st_gcn/<dataset>/test.yaml --weights <path to model weights>
186 | ```
187 | 
188 | ## Citation
189 | Please cite the following paper if you use this repository in your reseach.
190 | ```
191 | @inproceedings{stgcn2018aaai,
192 |   title     = {Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition},
193 |   author    = {Sijie Yan and Yuanjun Xiong and Dahua Lin},
194 |   booktitle = {AAAI},
195 |   year      = {2018},
196 | }
197 | ```
198 | 
199 | ## Contact
200 | For any question, feel free to contact
201 | ```
202 | Sijie Yan     : ys016@ie.cuhk.edu.hk
203 | Yuanjun Xiong : bitxiong@gmail.com
204 | ```
205 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Reminder
 2 | 
 3 | ST-GCN has transferred to [MMSkeleton](https://github.com/open-mmlab/mmskeleton),
 4 | and keep on developing as an flexible open source toolbox for skeleton-based human understanding.
 5 | You are welcome to migrate to new MMSkeleton.
 6 | Custom networks, data loaders and checkpoints of old st-gcn are compatible with MMSkeleton.
 7 | If you want to use old ST-GCN, please refer to [OLD_README.md](./OLD_README.md).
 8 | 
 9 | This code base will soon be not maintained and exists as a historical artifact to supplement our AAAI papers on:
10 | 
11 | > **Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition**, Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018. [[Arxiv Preprint]](https://arxiv.org/abs/1801.07455)
12 | 
13 | For more recent works please checkout MMSkeleton.
14 |   
15 | 


--------------------------------------------------------------------------------
/config/st_gcn.twostream/ntu-xsub/train.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/recognition/ntu-xsub/ST_GCN_TWO_STREAM
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | train_feeder_args:
 6 |   data_path: ./data/NTU-RGB-D/xsub/train_data.npy
 7 |   label_path: ./data/NTU-RGB-D/xsub/train_label.pkl
 8 | test_feeder_args:
 9 |   data_path: ./data/NTU-RGB-D/xsub/val_data.npy
10 |   label_path: ./data/NTU-RGB-D/xsub/val_label.pkl
11 | 
12 | # model
13 | model: net.st_gcn_twostream.Model
14 | model_args:
15 |   in_channels: 3
16 |   num_class: 60
17 |   dropout: 0.5
18 |   edge_importance_weighting: True
19 |   graph_args:
20 |     layout: 'ntu-rgb+d'
21 |     strategy: 'spatial'
22 | 
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 | 
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 32
31 | test_batch_size: 32
32 | num_epoch: 80
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/config/st_gcn.twostream/ntu-xview/train.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/recognition/ntu-xview/ST_GCN_TWO_STREAM
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | train_feeder_args:
 6 |   data_path: ./data/NTU-RGB-D/xview/train_data.npy
 7 |   label_path: ./data/NTU-RGB-D/xview/train_label.pkl
 8 | test_feeder_args:
 9 |   data_path: ./data/NTU-RGB-D/xview/val_data.npy
10 |   label_path: ./data/NTU-RGB-D/xview/val_label.pkl
11 | 
12 | # model
13 | model: net.st_gcn_twostream.Model
14 | model_args:
15 |   in_channels: 3
16 |   num_class: 60
17 |   dropout: 0.5
18 |   edge_importance_weighting: True
19 |   graph_args:
20 |     layout: 'ntu-rgb+d'
21 |     strategy: 'spatial'
22 | 
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 | 
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 32 
31 | test_batch_size: 32
32 | num_epoch: 80
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/demo_offline.yaml:
--------------------------------------------------------------------------------
 1 | weights: ./models/st_gcn.kinetics.pt
 2 | model_fps: 30
 3 | 
 4 | # model
 5 | model: net.st_gcn.Model
 6 | model_args:
 7 |   in_channels: 3
 8 |   num_class: 400
 9 |   edge_importance_weighting: True
10 |   graph_args:
11 |     layout: 'openpose'
12 |     strategy: 'spatial'
13 |     
14 | # training
15 | device: [0]


--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/demo_old.yaml:
--------------------------------------------------------------------------------
 1 | weights: ./models/st_gcn.kinetics.pt
 2 | 
 3 | # model
 4 | model: net.st_gcn.Model
 5 | model_args:
 6 |   in_channels: 3
 7 |   num_class: 400
 8 |   edge_importance_weighting: True
 9 |   graph_args:
10 |     layout: 'openpose'
11 |     strategy: 'spatial'
12 |     
13 | # training
14 | device: [0]


--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/demo_realtime.yaml:
--------------------------------------------------------------------------------
 1 | weights: ./models/st_gcn.kinetics.pt
 2 | model_fps: 30
 3 | 
 4 | # model
 5 | model: net.st_gcn.Model
 6 | model_args:
 7 |   in_channels: 3
 8 |   num_class: 400
 9 |   edge_importance_weighting: True
10 |   graph_args:
11 |     layout: 'openpose'
12 |     strategy: 'spatial'
13 |     
14 | # training
15 | device: [0]


--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/test.yaml:
--------------------------------------------------------------------------------
 1 | weights: ./models/st_gcn.kinetics.pt
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | test_feeder_args:
 6 |   data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy
 7 |   label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl
 8 | 
 9 | # model
10 | model: net.st_gcn.Model
11 | model_args:
12 |   in_channels: 3
13 |   num_class: 400
14 |   edge_importance_weighting: True
15 |   graph_args:
16 |     layout: 'openpose'
17 |     strategy: 'spatial'
18 | 
19 | # test 
20 | phase: test
21 | device: 0
22 | test_batch_size: 64
23 | 


--------------------------------------------------------------------------------
/config/st_gcn/kinetics-skeleton/train.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/recognition/kinetics_skeleton/ST_GCN
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | train_feeder_args:
 6 |   random_choose: True
 7 |   random_move: True
 8 |   window_size: 150 
 9 |   data_path: ./data/Kinetics/kinetics-skeleton/train_data.npy
10 |   label_path: ./data/Kinetics/kinetics-skeleton/train_label.pkl
11 | test_feeder_args:
12 |   data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy
13 |   label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl
14 | 
15 | # model
16 | model: net.st_gcn.Model
17 | model_args:
18 |   in_channels: 3
19 |   num_class: 400
20 |   edge_importance_weighting: True
21 |   graph_args:
22 |     layout: 'openpose'
23 |     strategy: 'spatial'
24 | 
25 | # training
26 | device: [0,1,2,3]
27 | batch_size: 256 
28 | test_batch_size: 256
29 | 
30 | #optim
31 | base_lr: 0.1
32 | step: [20, 30, 40, 50]
33 | num_epoch: 50
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/config/st_gcn/ntu-xsub/test.yaml:
--------------------------------------------------------------------------------
 1 | weights: ./models/st_gcn.ntu-xsub.pt
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | test_feeder_args:
 6 |   data_path: ./data/NTU-RGB-D/xsub/val_data.npy
 7 |   label_path: ./data/NTU-RGB-D/xsub/val_label.pkl
 8 | 
 9 | # model
10 | model: net.st_gcn.Model
11 | model_args:
12 |   in_channels: 3
13 |   num_class: 60
14 |   dropout: 0.5
15 |   edge_importance_weighting: True
16 |   graph_args:
17 |     layout: 'ntu-rgb+d'
18 |     strategy: 'spatial'
19 | 
20 | # test 
21 | phase: test
22 | device: 0
23 | test_batch_size: 64
24 | 
25 | 


--------------------------------------------------------------------------------
/config/st_gcn/ntu-xsub/train.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/recognition/ntu-xsub/ST_GCN
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | train_feeder_args:
 6 |   data_path: ./data/NTU-RGB-D/xsub/train_data.npy
 7 |   label_path: ./data/NTU-RGB-D/xsub/train_label.pkl
 8 | test_feeder_args:
 9 |   data_path: ./data/NTU-RGB-D/xsub/val_data.npy
10 |   label_path: ./data/NTU-RGB-D/xsub/val_label.pkl
11 | 
12 | # model
13 | model: net.st_gcn.Model
14 | model_args:
15 |   in_channels: 3
16 |   num_class: 60
17 |   dropout: 0.5
18 |   edge_importance_weighting: True
19 |   graph_args:
20 |     layout: 'ntu-rgb+d'
21 |     strategy: 'spatial'
22 | 
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 | 
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 64 
31 | test_batch_size: 64
32 | num_epoch: 80
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/config/st_gcn/ntu-xview/test.yaml:
--------------------------------------------------------------------------------
 1 | weights: ./models/st_gcn.ntu-xview.pt
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | test_feeder_args:
 6 |   data_path: ./data/NTU-RGB-D/xview/val_data.npy
 7 |   label_path: ./data/NTU-RGB-D/xview/val_label.pkl
 8 | 
 9 | # model
10 | model: net.st_gcn.Model
11 | model_args:
12 |   in_channels: 3
13 |   num_class: 60
14 |   dropout: 0.5
15 |   edge_importance_weighting: True
16 |   graph_args:
17 |     layout: 'ntu-rgb+d'
18 |     strategy: 'spatial'
19 | 
20 | # test 
21 | phase: test
22 | device: 0
23 | test_batch_size: 64
24 | 
25 | 


--------------------------------------------------------------------------------
/config/st_gcn/ntu-xview/train.yaml:
--------------------------------------------------------------------------------
 1 | work_dir: ./work_dir/recognition/ntu-xview/ST_GCN
 2 | 
 3 | # feeder
 4 | feeder: feeder.feeder.Feeder
 5 | train_feeder_args:
 6 |   data_path: ./data/NTU-RGB-D/xview/train_data.npy
 7 |   label_path: ./data/NTU-RGB-D/xview/train_label.pkl
 8 | test_feeder_args:
 9 |   data_path: ./data/NTU-RGB-D/xview/val_data.npy
10 |   label_path: ./data/NTU-RGB-D/xview/val_label.pkl
11 | 
12 | # model
13 | model: net.st_gcn.Model
14 | model_args:
15 |   in_channels: 3
16 |   num_class: 60
17 |   dropout: 0.5
18 |   edge_importance_weighting: True
19 |   graph_args:
20 |     layout: 'ntu-rgb+d'
21 |     strategy: 'spatial'
22 | 
23 | #optim
24 | weight_decay: 0.0001
25 | base_lr: 0.1
26 | step: [10, 50]
27 | 
28 | # training
29 | device: [0,1,2,3]
30 | batch_size: 64 
31 | test_batch_size: 64
32 | num_epoch: 80
33 | 
34 | 
35 | 
36 | 


--------------------------------------------------------------------------------
/feeder/__init__.py:
--------------------------------------------------------------------------------
1 | from . import tools


--------------------------------------------------------------------------------
/feeder/feeder.py:
--------------------------------------------------------------------------------
 1 | # sys
 2 | import os
 3 | import sys
 4 | import numpy as np
 5 | import random
 6 | import pickle
 7 | 
 8 | # torch
 9 | import torch
10 | import torch.nn as nn
11 | import torch.optim as optim
12 | import torch.nn.functional as F
13 | from torchvision import datasets, transforms
14 | 
15 | # visualization
16 | import time
17 | 
18 | # operation
19 | from . import tools
20 | 
21 | class Feeder(torch.utils.data.Dataset):
22 |     """ Feeder for skeleton-based action recognition
23 |     Arguments:
24 |         data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
25 |         label_path: the path to label
26 |         random_choose: If true, randomly choose a portion of the input sequence
27 |         random_shift: If true, randomly pad zeros at the begining or end of sequence
28 |         window_size: The length of the output sequence
29 |         normalization: If true, normalize input sequence
30 |         debug: If true, only use the first 100 samples
31 |     """
32 | 
33 |     def __init__(self,
34 |                  data_path,
35 |                  label_path,
36 |                  random_choose=False,
37 |                  random_move=False,
38 |                  window_size=-1,
39 |                  debug=False,
40 |                  mmap=True):
41 |         self.debug = debug
42 |         self.data_path = data_path
43 |         self.label_path = label_path
44 |         self.random_choose = random_choose
45 |         self.random_move = random_move
46 |         self.window_size = window_size
47 | 
48 |         self.load_data(mmap)
49 | 
50 |     def load_data(self, mmap):
51 |         # data: N C V T M
52 | 
53 |         # load label
54 |         with open(self.label_path, 'rb') as f:
55 |             self.sample_name, self.label = pickle.load(f)
56 | 
57 |         # load data
58 |         if mmap:
59 |             self.data = np.load(self.data_path, mmap_mode='r')
60 |         else:
61 |             self.data = np.load(self.data_path)
62 |             
63 |         if self.debug:
64 |             self.label = self.label[0:100]
65 |             self.data = self.data[0:100]
66 |             self.sample_name = self.sample_name[0:100]
67 | 
68 |         self.N, self.C, self.T, self.V, self.M = self.data.shape
69 | 
70 |     def __len__(self):
71 |         return len(self.label)
72 | 
73 |     def __getitem__(self, index):
74 |         # get data
75 |         data_numpy = np.array(self.data[index])
76 |         label = self.label[index]
77 |         
78 |         # processing
79 |         if self.random_choose:
80 |             data_numpy = tools.random_choose(data_numpy, self.window_size)
81 |         elif self.window_size > 0:
82 |             data_numpy = tools.auto_pading(data_numpy, self.window_size)
83 |         if self.random_move:
84 |             data_numpy = tools.random_move(data_numpy)
85 | 
86 |         return data_numpy, label


--------------------------------------------------------------------------------
/feeder/feeder_kinetics.py:
--------------------------------------------------------------------------------
  1 | # sys
  2 | import os
  3 | import sys
  4 | import numpy as np
  5 | import random
  6 | import pickle
  7 | import json
  8 | # torch
  9 | import torch
 10 | import torch.nn as nn
 11 | from torchvision import datasets, transforms
 12 | 
 13 | # operation
 14 | from . import tools
 15 | 
 16 | 
 17 | class Feeder_kinetics(torch.utils.data.Dataset):
 18 |     """ Feeder for skeleton-based action recognition in kinetics-skeleton dataset
 19 |     Arguments:
 20 |         data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M)
 21 |         label_path: the path to label
 22 |         random_choose: If true, randomly choose a portion of the input sequence
 23 |         random_shift: If true, randomly pad zeros at the begining or end of sequence
 24 |         random_move: If true, perform randomly but continuously changed transformation to input sequence
 25 |         window_size: The length of the output sequence
 26 |         pose_matching: If ture, match the pose between two frames
 27 |         num_person_in: The number of people the feeder can observe in the input sequence
 28 |         num_person_out: The number of people the feeder in the output sequence
 29 |         debug: If true, only use the first 100 samples
 30 |     """
 31 | 
 32 |     def __init__(self,
 33 |                  data_path,
 34 |                  label_path,
 35 |                  ignore_empty_sample=True,
 36 |                  random_choose=False,
 37 |                  random_shift=False,
 38 |                  random_move=False,
 39 |                  window_size=-1,
 40 |                  pose_matching=False,
 41 |                  num_person_in=5,
 42 |                  num_person_out=2,
 43 |                  debug=False):
 44 |         self.debug = debug
 45 |         self.data_path = data_path
 46 |         self.label_path = label_path
 47 |         self.random_choose = random_choose
 48 |         self.random_shift = random_shift
 49 |         self.random_move = random_move
 50 |         self.window_size = window_size
 51 |         self.num_person_in = num_person_in
 52 |         self.num_person_out = num_person_out
 53 |         self.pose_matching = pose_matching
 54 |         self.ignore_empty_sample = ignore_empty_sample
 55 | 
 56 |         self.load_data()
 57 | 
 58 |     def load_data(self):
 59 |         # load file list
 60 |         self.sample_name = os.listdir(self.data_path)
 61 | 
 62 |         if self.debug:
 63 |             self.sample_name = self.sample_name[0:2]
 64 | 
 65 |         # load label
 66 |         label_path = self.label_path
 67 |         with open(label_path) as f:
 68 |             label_info = json.load(f)
 69 | 
 70 |         sample_id = [name.split('.')[0] for name in self.sample_name]
 71 |         self.label = np.array(
 72 |             [label_info[id]['label_index'] for id in sample_id])
 73 |         has_skeleton = np.array(
 74 |             [label_info[id]['has_skeleton'] for id in sample_id])
 75 | 
 76 |         # ignore the samples which does not has skeleton sequence
 77 |         if self.ignore_empty_sample:
 78 |             self.sample_name = [
 79 |                 s for h, s in zip(has_skeleton, self.sample_name) if h
 80 |             ]
 81 |             self.label = self.label[has_skeleton]
 82 | 
 83 |         # output data shape (N, C, T, V, M)
 84 |         self.N = len(self.sample_name)  #sample
 85 |         self.C = 3  #channel
 86 |         self.T = 300  #frame
 87 |         self.V = 18  #joint
 88 |         self.M = self.num_person_out  #person
 89 | 
 90 |     def __len__(self):
 91 |         return len(self.sample_name)
 92 | 
 93 |     def __iter__(self):
 94 |         return self
 95 | 
 96 |     def __getitem__(self, index):
 97 | 
 98 |         # output shape (C, T, V, M)
 99 |         # get data
100 |         sample_name = self.sample_name[index]
101 |         sample_path = os.path.join(self.data_path, sample_name)
102 |         with open(sample_path, 'r') as f:
103 |             video_info = json.load(f)
104 | 
105 |         # fill data_numpy
106 |         data_numpy = np.zeros((self.C, self.T, self.V, self.num_person_in))
107 |         for frame_info in video_info['data']:
108 |             frame_index = frame_info['frame_index']
109 |             for m, skeleton_info in enumerate(frame_info["skeleton"]):
110 |                 if m >= self.num_person_in:
111 |                     break
112 |                 pose = skeleton_info['pose']
113 |                 score = skeleton_info['score']
114 |                 data_numpy[0, frame_index, :, m] = pose[0::2]
115 |                 data_numpy[1, frame_index, :, m] = pose[1::2]
116 |                 data_numpy[2, frame_index, :, m] = score
117 | 
118 |         # centralization
119 |         data_numpy[0:2] = data_numpy[0:2] - 0.5
120 |         data_numpy[0][data_numpy[2] == 0] = 0
121 |         data_numpy[1][data_numpy[2] == 0] = 0
122 | 
123 |         # get & check label index
124 |         label = video_info['label_index']
125 |         assert (self.label[index] == label)
126 | 
127 |         # data augmentation
128 |         if self.random_shift:
129 |             data_numpy = tools.random_shift(data_numpy)
130 |         if self.random_choose:
131 |             data_numpy = tools.random_choose(data_numpy, self.window_size)
132 |         elif self.window_size > 0:
133 |             data_numpy = tools.auto_pading(data_numpy, self.window_size)
134 |         if self.random_move:
135 |             data_numpy = tools.random_move(data_numpy)
136 | 
137 |         # sort by score
138 |         sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1)
139 |         for t, s in enumerate(sort_index):
140 |             data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2,
141 |                                                                        0))
142 |         data_numpy = data_numpy[:, :, :, 0:self.num_person_out]
143 | 
144 |         # match poses between 2 frames
145 |         if self.pose_matching:
146 |             data_numpy = tools.openpose_match(data_numpy)
147 | 
148 |         return data_numpy, label
149 | 
150 |     def top_k(self, score, top_k):
151 |         assert (all(self.label >= 0))
152 | 
153 |         rank = score.argsort()
154 |         hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label)]
155 |         return sum(hit_top_k) * 1.0 / len(hit_top_k)
156 | 
157 |     def top_k_by_category(self, score, top_k):
158 |         assert (all(self.label >= 0))
159 |         return tools.top_k_by_category(self.label, score, top_k)
160 | 
161 |     def calculate_recall_precision(self, score):
162 |         assert (all(self.label >= 0))
163 |         return tools.calculate_recall_precision(self.label, score)
164 | 


--------------------------------------------------------------------------------
/feeder/tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | 
  4 | 
  5 | def downsample(data_numpy, step, random_sample=True):
  6 |     # input: C,T,V,M
  7 |     begin = np.random.randint(step) if random_sample else 0
  8 |     return data_numpy[:, begin::step, :, :]
  9 | 
 10 | 
 11 | def temporal_slice(data_numpy, step):
 12 |     # input: C,T,V,M
 13 |     C, T, V, M = data_numpy.shape
 14 |     return data_numpy.reshape(C, T / step, step, V, M).transpose(
 15 |         (0, 1, 3, 2, 4)).reshape(C, T / step, V, step * M)
 16 | 
 17 | 
 18 | def mean_subtractor(data_numpy, mean):
 19 |     # input: C,T,V,M
 20 |     # naive version
 21 |     if mean == 0:
 22 |         return
 23 |     C, T, V, M = data_numpy.shape
 24 |     valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
 25 |     begin = valid_frame.argmax()
 26 |     end = len(valid_frame) - valid_frame[::-1].argmax()
 27 |     data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean
 28 |     return data_numpy
 29 | 
 30 | 
 31 | def auto_pading(data_numpy, size, random_pad=False):
 32 |     C, T, V, M = data_numpy.shape
 33 |     if T < size:
 34 |         begin = random.randint(0, size - T) if random_pad else 0
 35 |         data_numpy_paded = np.zeros((C, size, V, M))
 36 |         data_numpy_paded[:, begin:begin + T, :, :] = data_numpy
 37 |         return data_numpy_paded
 38 |     else:
 39 |         return data_numpy
 40 | 
 41 | 
 42 | def random_choose(data_numpy, size, auto_pad=True):
 43 |     # input: C,T,V,M
 44 |     C, T, V, M = data_numpy.shape
 45 |     if T == size:
 46 |         return data_numpy
 47 |     elif T < size:
 48 |         if auto_pad:
 49 |             return auto_pading(data_numpy, size, random_pad=True)
 50 |         else:
 51 |             return data_numpy
 52 |     else:
 53 |         begin = random.randint(0, T - size)
 54 |         return data_numpy[:, begin:begin + size, :, :]
 55 | 
 56 | 
 57 | def random_move(data_numpy,
 58 |                 angle_candidate=[-10., -5., 0., 5., 10.],
 59 |                 scale_candidate=[0.9, 1.0, 1.1],
 60 |                 transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2],
 61 |                 move_time_candidate=[1]):
 62 |     # input: C,T,V,M
 63 |     C, T, V, M = data_numpy.shape
 64 |     move_time = random.choice(move_time_candidate)
 65 |     node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
 66 |     node = np.append(node, T)
 67 |     num_node = len(node)
 68 | 
 69 |     A = np.random.choice(angle_candidate, num_node)
 70 |     S = np.random.choice(scale_candidate, num_node)
 71 |     T_x = np.random.choice(transform_candidate, num_node)
 72 |     T_y = np.random.choice(transform_candidate, num_node)
 73 | 
 74 |     a = np.zeros(T)
 75 |     s = np.zeros(T)
 76 |     t_x = np.zeros(T)
 77 |     t_y = np.zeros(T)
 78 | 
 79 |     # linspace
 80 |     for i in range(num_node - 1):
 81 |         a[node[i]:node[i + 1]] = np.linspace(
 82 |             A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
 83 |         s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1],
 84 |                                              node[i + 1] - node[i])
 85 |         t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1],
 86 |                                                node[i + 1] - node[i])
 87 |         t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1],
 88 |                                                node[i + 1] - node[i])
 89 | 
 90 |     theta = np.array([[np.cos(a) * s, -np.sin(a) * s],
 91 |                       [np.sin(a) * s, np.cos(a) * s]])
 92 | 
 93 |     # perform transformation
 94 |     for i_frame in range(T):
 95 |         xy = data_numpy[0:2, i_frame, :, :]
 96 |         new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
 97 |         new_xy[0] += t_x[i_frame]
 98 |         new_xy[1] += t_y[i_frame]
 99 |         data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)
100 | 
101 |     return data_numpy
102 | 
103 | 
104 | def random_shift(data_numpy):
105 |     # input: C,T,V,M
106 |     C, T, V, M = data_numpy.shape
107 |     data_shift = np.zeros(data_numpy.shape)
108 |     valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
109 |     begin = valid_frame.argmax()
110 |     end = len(valid_frame) - valid_frame[::-1].argmax()
111 | 
112 |     size = end - begin
113 |     bias = random.randint(0, T - size)
114 |     data_shift[:, bias:bias + size, :, :] = data_numpy[:, begin:end, :, :]
115 | 
116 |     return data_shift
117 | 
118 | 
119 | def openpose_match(data_numpy):
120 |     C, T, V, M = data_numpy.shape
121 |     assert (C == 3)
122 |     score = data_numpy[2, :, :, :].sum(axis=1)
123 |     # the rank of body confidence in each frame (shape: T-1, M)
124 |     rank = (-score[0:T - 1]).argsort(axis=1).reshape(T - 1, M)
125 | 
126 |     # data of frame 1
127 |     xy1 = data_numpy[0:2, 0:T - 1, :, :].reshape(2, T - 1, V, M, 1)
128 |     # data of frame 2
129 |     xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M)
130 |     # square of distance between frame 1&2 (shape: T-1, M, M)
131 |     distance = ((xy2 - xy1)**2).sum(axis=2).sum(axis=0)
132 | 
133 |     # match pose
134 |     forward_map = np.zeros((T, M), dtype=int) - 1
135 |     forward_map[0] = range(M)
136 |     for m in range(M):
137 |         choose = (rank == m)
138 |         forward = distance[choose].argmin(axis=1)
139 |         for t in range(T - 1):
140 |             distance[t, :, forward[t]] = np.inf
141 |         forward_map[1:][choose] = forward
142 |     assert (np.all(forward_map >= 0))
143 | 
144 |     # string data
145 |     for t in range(T - 1):
146 |         forward_map[t + 1] = forward_map[t + 1][forward_map[t]]
147 | 
148 |     # generate data
149 |     new_data_numpy = np.zeros(data_numpy.shape)
150 |     for t in range(T):
151 |         new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[
152 |             t]].transpose(1, 2, 0)
153 |     data_numpy = new_data_numpy
154 | 
155 |     # score sort
156 |     trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0)
157 |     rank = (-trace_score).argsort()
158 |     data_numpy = data_numpy[:, :, :, rank]
159 | 
160 |     return data_numpy
161 | 
162 | 
163 | def top_k_by_category(label, score, top_k):
164 |     instance_num, class_num = score.shape
165 |     rank = score.argsort()
166 |     hit_top_k = [[] for i in range(class_num)]
167 |     for i in range(instance_num):
168 |         l = label[i]
169 |         hit_top_k[l].append(l in rank[i, -top_k:])
170 | 
171 |     accuracy_list = []
172 |     for hit_per_category in hit_top_k:
173 |         if hit_per_category:
174 |             accuracy_list.append(sum(hit_per_category) * 1.0 / len(hit_per_category))
175 |         else:
176 |             accuracy_list.append(0.0)
177 |     return accuracy_list
178 | 
179 | 
180 | def calculate_recall_precision(label, score):
181 |     instance_num, class_num = score.shape
182 |     rank = score.argsort()
183 |     confusion_matrix = np.zeros([class_num, class_num])
184 | 
185 |     for i in range(instance_num):
186 |         true_l = label[i]
187 |         pred_l = rank[i, -1]
188 |         confusion_matrix[true_l][pred_l] += 1
189 | 
190 |     precision = []
191 |     recall = []
192 | 
193 |     for i in range(class_num):
194 |         true_p = confusion_matrix[i][i]
195 |         false_n = sum(confusion_matrix[i, :]) - true_p
196 |         false_p = sum(confusion_matrix[:, i]) - true_p
197 |         precision.append(true_p * 1.0 / (true_p + false_p))
198 |         recall.append(true_p * 1.0 / (true_p + false_n))
199 | 
200 |     return precision, recall


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import sys
 4 | 
 5 | # torchlight
 6 | import torchlight
 7 | from torchlight import import_class
 8 | 
 9 | if __name__ == '__main__':
10 | 
11 |     parser = argparse.ArgumentParser(description='Processor collection')
12 | 
13 |     # region register processor yapf: disable
14 |     processors = dict()
15 |     processors['recognition'] = import_class('processor.recognition.REC_Processor')
16 |     processors['demo_old'] = import_class('processor.demo_old.Demo')
17 |     processors['demo'] = import_class('processor.demo_realtime.DemoRealtime')
18 |     processors['demo_offline'] = import_class('processor.demo_offline.DemoOffline')
19 |     #endregion yapf: enable
20 | 
21 |     # add sub-parser
22 |     subparsers = parser.add_subparsers(dest='processor')
23 |     for k, p in processors.items():
24 |         subparsers.add_parser(k, parents=[p.get_parser()])
25 | 
26 |     # read arguments
27 |     arg = parser.parse_args()
28 | 
29 |     # start
30 |     Processor = processors[arg.processor]
31 |     p = Processor(sys.argv[2:])
32 | 
33 |     p.start()
34 | 


--------------------------------------------------------------------------------
/models/pose/coco/pose_deploy_linevec.prototxt:
--------------------------------------------------------------------------------
   1 | input: "image"
   2 | input_dim: 1
   3 | input_dim: 3
   4 | input_dim: 1 # This value will be defined at runtime
   5 | input_dim: 1 # This value will be defined at runtime
   6 | layer {
   7 |   name: "conv1_1"
   8 |   type: "Convolution"
   9 |   bottom: "image"
  10 |   top: "conv1_1"
  11 |   param {
  12 |     lr_mult: 1.0
  13 |     decay_mult: 1
  14 |   }
  15 |   param {
  16 |     lr_mult: 2.0
  17 |     decay_mult: 0
  18 |   }
  19 |   convolution_param {
  20 |     num_output: 64
  21 |     pad: 1
  22 |     kernel_size: 3
  23 |     weight_filler {
  24 |       type: "gaussian"
  25 |       std: 0.01
  26 |     }
  27 |     bias_filler {
  28 |       type: "constant"
  29 |     }
  30 |   }
  31 | }
  32 | layer {
  33 |   name: "relu1_1"
  34 |   type: "ReLU"
  35 |   bottom: "conv1_1"
  36 |   top: "conv1_1"
  37 | }
  38 | layer {
  39 |   name: "conv1_2"
  40 |   type: "Convolution"
  41 |   bottom: "conv1_1"
  42 |   top: "conv1_2"
  43 |   param {
  44 |     lr_mult: 1.0
  45 |     decay_mult: 1
  46 |   }
  47 |   param {
  48 |     lr_mult: 2.0
  49 |     decay_mult: 0
  50 |   }
  51 |   convolution_param {
  52 |     num_output: 64
  53 |     pad: 1
  54 |     kernel_size: 3
  55 |     weight_filler {
  56 |       type: "gaussian"
  57 |       std: 0.01
  58 |     }
  59 |     bias_filler {
  60 |       type: "constant"
  61 |     }
  62 |   }
  63 | }
  64 | layer {
  65 |   name: "relu1_2"
  66 |   type: "ReLU"
  67 |   bottom: "conv1_2"
  68 |   top: "conv1_2"
  69 | }
  70 | layer {
  71 |   name: "pool1_stage1"
  72 |   type: "Pooling"
  73 |   bottom: "conv1_2"
  74 |   top: "pool1_stage1"
  75 |   pooling_param {
  76 |     pool: MAX
  77 |     kernel_size: 2
  78 |     stride: 2
  79 |   }
  80 | }
  81 | layer {
  82 |   name: "conv2_1"
  83 |   type: "Convolution"
  84 |   bottom: "pool1_stage1"
  85 |   top: "conv2_1"
  86 |   param {
  87 |     lr_mult: 1.0
  88 |     decay_mult: 1
  89 |   }
  90 |   param {
  91 |     lr_mult: 2.0
  92 |     decay_mult: 0
  93 |   }
  94 |   convolution_param {
  95 |     num_output: 128
  96 |     pad: 1
  97 |     kernel_size: 3
  98 |     weight_filler {
  99 |       type: "gaussian"
 100 |       std: 0.01
 101 |     }
 102 |     bias_filler {
 103 |       type: "constant"
 104 |     }
 105 |   }
 106 | }
 107 | layer {
 108 |   name: "relu2_1"
 109 |   type: "ReLU"
 110 |   bottom: "conv2_1"
 111 |   top: "conv2_1"
 112 | }
 113 | layer {
 114 |   name: "conv2_2"
 115 |   type: "Convolution"
 116 |   bottom: "conv2_1"
 117 |   top: "conv2_2"
 118 |   param {
 119 |     lr_mult: 1.0
 120 |     decay_mult: 1
 121 |   }
 122 |   param {
 123 |     lr_mult: 2.0
 124 |     decay_mult: 0
 125 |   }
 126 |   convolution_param {
 127 |     num_output: 128
 128 |     pad: 1
 129 |     kernel_size: 3
 130 |     weight_filler {
 131 |       type: "gaussian"
 132 |       std: 0.01
 133 |     }
 134 |     bias_filler {
 135 |       type: "constant"
 136 |     }
 137 |   }
 138 | }
 139 | layer {
 140 |   name: "relu2_2"
 141 |   type: "ReLU"
 142 |   bottom: "conv2_2"
 143 |   top: "conv2_2"
 144 | }
 145 | layer {
 146 |   name: "pool2_stage1"
 147 |   type: "Pooling"
 148 |   bottom: "conv2_2"
 149 |   top: "pool2_stage1"
 150 |   pooling_param {
 151 |     pool: MAX
 152 |     kernel_size: 2
 153 |     stride: 2
 154 |   }
 155 | }
 156 | layer {
 157 |   name: "conv3_1"
 158 |   type: "Convolution"
 159 |   bottom: "pool2_stage1"
 160 |   top: "conv3_1"
 161 |   param {
 162 |     lr_mult: 1.0
 163 |     decay_mult: 1
 164 |   }
 165 |   param {
 166 |     lr_mult: 2.0
 167 |     decay_mult: 0
 168 |   }
 169 |   convolution_param {
 170 |     num_output: 256
 171 |     pad: 1
 172 |     kernel_size: 3
 173 |     weight_filler {
 174 |       type: "gaussian"
 175 |       std: 0.01
 176 |     }
 177 |     bias_filler {
 178 |       type: "constant"
 179 |     }
 180 |   }
 181 | }
 182 | layer {
 183 |   name: "relu3_1"
 184 |   type: "ReLU"
 185 |   bottom: "conv3_1"
 186 |   top: "conv3_1"
 187 | }
 188 | layer {
 189 |   name: "conv3_2"
 190 |   type: "Convolution"
 191 |   bottom: "conv3_1"
 192 |   top: "conv3_2"
 193 |   param {
 194 |     lr_mult: 1.0
 195 |     decay_mult: 1
 196 |   }
 197 |   param {
 198 |     lr_mult: 2.0
 199 |     decay_mult: 0
 200 |   }
 201 |   convolution_param {
 202 |     num_output: 256
 203 |     pad: 1
 204 |     kernel_size: 3
 205 |     weight_filler {
 206 |       type: "gaussian"
 207 |       std: 0.01
 208 |     }
 209 |     bias_filler {
 210 |       type: "constant"
 211 |     }
 212 |   }
 213 | }
 214 | layer {
 215 |   name: "relu3_2"
 216 |   type: "ReLU"
 217 |   bottom: "conv3_2"
 218 |   top: "conv3_2"
 219 | }
 220 | layer {
 221 |   name: "conv3_3"
 222 |   type: "Convolution"
 223 |   bottom: "conv3_2"
 224 |   top: "conv3_3"
 225 |   param {
 226 |     lr_mult: 1.0
 227 |     decay_mult: 1
 228 |   }
 229 |   param {
 230 |     lr_mult: 2.0
 231 |     decay_mult: 0
 232 |   }
 233 |   convolution_param {
 234 |     num_output: 256
 235 |     pad: 1
 236 |     kernel_size: 3
 237 |     weight_filler {
 238 |       type: "gaussian"
 239 |       std: 0.01
 240 |     }
 241 |     bias_filler {
 242 |       type: "constant"
 243 |     }
 244 |   }
 245 | }
 246 | layer {
 247 |   name: "relu3_3"
 248 |   type: "ReLU"
 249 |   bottom: "conv3_3"
 250 |   top: "conv3_3"
 251 | }
 252 | layer {
 253 |   name: "conv3_4"
 254 |   type: "Convolution"
 255 |   bottom: "conv3_3"
 256 |   top: "conv3_4"
 257 |   param {
 258 |     lr_mult: 1.0
 259 |     decay_mult: 1
 260 |   }
 261 |   param {
 262 |     lr_mult: 2.0
 263 |     decay_mult: 0
 264 |   }
 265 |   convolution_param {
 266 |     num_output: 256
 267 |     pad: 1
 268 |     kernel_size: 3
 269 |     weight_filler {
 270 |       type: "gaussian"
 271 |       std: 0.01
 272 |     }
 273 |     bias_filler {
 274 |       type: "constant"
 275 |     }
 276 |   }
 277 | }
 278 | layer {
 279 |   name: "relu3_4"
 280 |   type: "ReLU"
 281 |   bottom: "conv3_4"
 282 |   top: "conv3_4"
 283 | }
 284 | layer {
 285 |   name: "pool3_stage1"
 286 |   type: "Pooling"
 287 |   bottom: "conv3_4"
 288 |   top: "pool3_stage1"
 289 |   pooling_param {
 290 |     pool: MAX
 291 |     kernel_size: 2
 292 |     stride: 2
 293 |   }
 294 | }
 295 | layer {
 296 |   name: "conv4_1"
 297 |   type: "Convolution"
 298 |   bottom: "pool3_stage1"
 299 |   top: "conv4_1"
 300 |   param {
 301 |     lr_mult: 1.0
 302 |     decay_mult: 1
 303 |   }
 304 |   param {
 305 |     lr_mult: 2.0
 306 |     decay_mult: 0
 307 |   }
 308 |   convolution_param {
 309 |     num_output: 512
 310 |     pad: 1
 311 |     kernel_size: 3
 312 |     weight_filler {
 313 |       type: "gaussian"
 314 |       std: 0.01
 315 |     }
 316 |     bias_filler {
 317 |       type: "constant"
 318 |     }
 319 |   }
 320 | }
 321 | layer {
 322 |   name: "relu4_1"
 323 |   type: "ReLU"
 324 |   bottom: "conv4_1"
 325 |   top: "conv4_1"
 326 | }
 327 | layer {
 328 |   name: "conv4_2"
 329 |   type: "Convolution"
 330 |   bottom: "conv4_1"
 331 |   top: "conv4_2"
 332 |   param {
 333 |     lr_mult: 1.0
 334 |     decay_mult: 1
 335 |   }
 336 |   param {
 337 |     lr_mult: 2.0
 338 |     decay_mult: 0
 339 |   }
 340 |   convolution_param {
 341 |     num_output: 512
 342 |     pad: 1
 343 |     kernel_size: 3
 344 |     weight_filler {
 345 |       type: "gaussian"
 346 |       std: 0.01
 347 |     }
 348 |     bias_filler {
 349 |       type: "constant"
 350 |     }
 351 |   }
 352 | }
 353 | layer {
 354 |   name: "relu4_2"
 355 |   type: "ReLU"
 356 |   bottom: "conv4_2"
 357 |   top: "conv4_2"
 358 | }
 359 | layer {
 360 |   name: "conv4_3_CPM"
 361 |   type: "Convolution"
 362 |   bottom: "conv4_2"
 363 |   top: "conv4_3_CPM"
 364 |   param {
 365 |     lr_mult: 1.0
 366 |     decay_mult: 1
 367 |   }
 368 |   param {
 369 |     lr_mult: 2.0
 370 |     decay_mult: 0
 371 |   }
 372 |   convolution_param {
 373 |     num_output: 256
 374 |     pad: 1
 375 |     kernel_size: 3
 376 |     weight_filler {
 377 |       type: "gaussian"
 378 |       std: 0.01
 379 |     }
 380 |     bias_filler {
 381 |       type: "constant"
 382 |     }
 383 |   }
 384 | }
 385 | layer {
 386 |   name: "relu4_3_CPM"
 387 |   type: "ReLU"
 388 |   bottom: "conv4_3_CPM"
 389 |   top: "conv4_3_CPM"
 390 | }
 391 | layer {
 392 |   name: "conv4_4_CPM"
 393 |   type: "Convolution"
 394 |   bottom: "conv4_3_CPM"
 395 |   top: "conv4_4_CPM"
 396 |   param {
 397 |     lr_mult: 1.0
 398 |     decay_mult: 1
 399 |   }
 400 |   param {
 401 |     lr_mult: 2.0
 402 |     decay_mult: 0
 403 |   }
 404 |   convolution_param {
 405 |     num_output: 128
 406 |     pad: 1
 407 |     kernel_size: 3
 408 |     weight_filler {
 409 |       type: "gaussian"
 410 |       std: 0.01
 411 |     }
 412 |     bias_filler {
 413 |       type: "constant"
 414 |     }
 415 |   }
 416 | }
 417 | layer {
 418 |   name: "relu4_4_CPM"
 419 |   type: "ReLU"
 420 |   bottom: "conv4_4_CPM"
 421 |   top: "conv4_4_CPM"
 422 | }
 423 | layer {
 424 |   name: "conv5_1_CPM_L1"
 425 |   type: "Convolution"
 426 |   bottom: "conv4_4_CPM"
 427 |   top: "conv5_1_CPM_L1"
 428 |   param {
 429 |     lr_mult: 1.0
 430 |     decay_mult: 1
 431 |   }
 432 |   param {
 433 |     lr_mult: 2.0
 434 |     decay_mult: 0
 435 |   }
 436 |   convolution_param {
 437 |     num_output: 128
 438 |     pad: 1
 439 |     kernel_size: 3
 440 |     weight_filler {
 441 |       type: "gaussian"
 442 |       std: 0.01
 443 |     }
 444 |     bias_filler {
 445 |       type: "constant"
 446 |     }
 447 |   }
 448 | }
 449 | layer {
 450 |   name: "relu5_1_CPM_L1"
 451 |   type: "ReLU"
 452 |   bottom: "conv5_1_CPM_L1"
 453 |   top: "conv5_1_CPM_L1"
 454 | }
 455 | layer {
 456 |   name: "conv5_1_CPM_L2"
 457 |   type: "Convolution"
 458 |   bottom: "conv4_4_CPM"
 459 |   top: "conv5_1_CPM_L2"
 460 |   param {
 461 |     lr_mult: 1.0
 462 |     decay_mult: 1
 463 |   }
 464 |   param {
 465 |     lr_mult: 2.0
 466 |     decay_mult: 0
 467 |   }
 468 |   convolution_param {
 469 |     num_output: 128
 470 |     pad: 1
 471 |     kernel_size: 3
 472 |     weight_filler {
 473 |       type: "gaussian"
 474 |       std: 0.01
 475 |     }
 476 |     bias_filler {
 477 |       type: "constant"
 478 |     }
 479 |   }
 480 | }
 481 | layer {
 482 |   name: "relu5_1_CPM_L2"
 483 |   type: "ReLU"
 484 |   bottom: "conv5_1_CPM_L2"
 485 |   top: "conv5_1_CPM_L2"
 486 | }
 487 | layer {
 488 |   name: "conv5_2_CPM_L1"
 489 |   type: "Convolution"
 490 |   bottom: "conv5_1_CPM_L1"
 491 |   top: "conv5_2_CPM_L1"
 492 |   param {
 493 |     lr_mult: 1.0
 494 |     decay_mult: 1
 495 |   }
 496 |   param {
 497 |     lr_mult: 2.0
 498 |     decay_mult: 0
 499 |   }
 500 |   convolution_param {
 501 |     num_output: 128
 502 |     pad: 1
 503 |     kernel_size: 3
 504 |     weight_filler {
 505 |       type: "gaussian"
 506 |       std: 0.01
 507 |     }
 508 |     bias_filler {
 509 |       type: "constant"
 510 |     }
 511 |   }
 512 | }
 513 | layer {
 514 |   name: "relu5_2_CPM_L1"
 515 |   type: "ReLU"
 516 |   bottom: "conv5_2_CPM_L1"
 517 |   top: "conv5_2_CPM_L1"
 518 | }
 519 | layer {
 520 |   name: "conv5_2_CPM_L2"
 521 |   type: "Convolution"
 522 |   bottom: "conv5_1_CPM_L2"
 523 |   top: "conv5_2_CPM_L2"
 524 |   param {
 525 |     lr_mult: 1.0
 526 |     decay_mult: 1
 527 |   }
 528 |   param {
 529 |     lr_mult: 2.0
 530 |     decay_mult: 0
 531 |   }
 532 |   convolution_param {
 533 |     num_output: 128
 534 |     pad: 1
 535 |     kernel_size: 3
 536 |     weight_filler {
 537 |       type: "gaussian"
 538 |       std: 0.01
 539 |     }
 540 |     bias_filler {
 541 |       type: "constant"
 542 |     }
 543 |   }
 544 | }
 545 | layer {
 546 |   name: "relu5_2_CPM_L2"
 547 |   type: "ReLU"
 548 |   bottom: "conv5_2_CPM_L2"
 549 |   top: "conv5_2_CPM_L2"
 550 | }
 551 | layer {
 552 |   name: "conv5_3_CPM_L1"
 553 |   type: "Convolution"
 554 |   bottom: "conv5_2_CPM_L1"
 555 |   top: "conv5_3_CPM_L1"
 556 |   param {
 557 |     lr_mult: 1.0
 558 |     decay_mult: 1
 559 |   }
 560 |   param {
 561 |     lr_mult: 2.0
 562 |     decay_mult: 0
 563 |   }
 564 |   convolution_param {
 565 |     num_output: 128
 566 |     pad: 1
 567 |     kernel_size: 3
 568 |     weight_filler {
 569 |       type: "gaussian"
 570 |       std: 0.01
 571 |     }
 572 |     bias_filler {
 573 |       type: "constant"
 574 |     }
 575 |   }
 576 | }
 577 | layer {
 578 |   name: "relu5_3_CPM_L1"
 579 |   type: "ReLU"
 580 |   bottom: "conv5_3_CPM_L1"
 581 |   top: "conv5_3_CPM_L1"
 582 | }
 583 | layer {
 584 |   name: "conv5_3_CPM_L2"
 585 |   type: "Convolution"
 586 |   bottom: "conv5_2_CPM_L2"
 587 |   top: "conv5_3_CPM_L2"
 588 |   param {
 589 |     lr_mult: 1.0
 590 |     decay_mult: 1
 591 |   }
 592 |   param {
 593 |     lr_mult: 2.0
 594 |     decay_mult: 0
 595 |   }
 596 |   convolution_param {
 597 |     num_output: 128
 598 |     pad: 1
 599 |     kernel_size: 3
 600 |     weight_filler {
 601 |       type: "gaussian"
 602 |       std: 0.01
 603 |     }
 604 |     bias_filler {
 605 |       type: "constant"
 606 |     }
 607 |   }
 608 | }
 609 | layer {
 610 |   name: "relu5_3_CPM_L2"
 611 |   type: "ReLU"
 612 |   bottom: "conv5_3_CPM_L2"
 613 |   top: "conv5_3_CPM_L2"
 614 | }
 615 | layer {
 616 |   name: "conv5_4_CPM_L1"
 617 |   type: "Convolution"
 618 |   bottom: "conv5_3_CPM_L1"
 619 |   top: "conv5_4_CPM_L1"
 620 |   param {
 621 |     lr_mult: 1.0
 622 |     decay_mult: 1
 623 |   }
 624 |   param {
 625 |     lr_mult: 2.0
 626 |     decay_mult: 0
 627 |   }
 628 |   convolution_param {
 629 |     num_output: 512
 630 |     pad: 0
 631 |     kernel_size: 1
 632 |     weight_filler {
 633 |       type: "gaussian"
 634 |       std: 0.01
 635 |     }
 636 |     bias_filler {
 637 |       type: "constant"
 638 |     }
 639 |   }
 640 | }
 641 | layer {
 642 |   name: "relu5_4_CPM_L1"
 643 |   type: "ReLU"
 644 |   bottom: "conv5_4_CPM_L1"
 645 |   top: "conv5_4_CPM_L1"
 646 | }
 647 | layer {
 648 |   name: "conv5_4_CPM_L2"
 649 |   type: "Convolution"
 650 |   bottom: "conv5_3_CPM_L2"
 651 |   top: "conv5_4_CPM_L2"
 652 |   param {
 653 |     lr_mult: 1.0
 654 |     decay_mult: 1
 655 |   }
 656 |   param {
 657 |     lr_mult: 2.0
 658 |     decay_mult: 0
 659 |   }
 660 |   convolution_param {
 661 |     num_output: 512
 662 |     pad: 0
 663 |     kernel_size: 1
 664 |     weight_filler {
 665 |       type: "gaussian"
 666 |       std: 0.01
 667 |     }
 668 |     bias_filler {
 669 |       type: "constant"
 670 |     }
 671 |   }
 672 | }
 673 | layer {
 674 |   name: "relu5_4_CPM_L2"
 675 |   type: "ReLU"
 676 |   bottom: "conv5_4_CPM_L2"
 677 |   top: "conv5_4_CPM_L2"
 678 | }
 679 | layer {
 680 |   name: "conv5_5_CPM_L1"
 681 |   type: "Convolution"
 682 |   bottom: "conv5_4_CPM_L1"
 683 |   top: "conv5_5_CPM_L1"
 684 |   param {
 685 |     lr_mult: 1.0
 686 |     decay_mult: 1
 687 |   }
 688 |   param {
 689 |     lr_mult: 2.0
 690 |     decay_mult: 0
 691 |   }
 692 |   convolution_param {
 693 |     num_output: 38
 694 |     pad: 0
 695 |     kernel_size: 1
 696 |     weight_filler {
 697 |       type: "gaussian"
 698 |       std: 0.01
 699 |     }
 700 |     bias_filler {
 701 |       type: "constant"
 702 |     }
 703 |   }
 704 | }
 705 | layer {
 706 |   name: "conv5_5_CPM_L2"
 707 |   type: "Convolution"
 708 |   bottom: "conv5_4_CPM_L2"
 709 |   top: "conv5_5_CPM_L2"
 710 |   param {
 711 |     lr_mult: 1.0
 712 |     decay_mult: 1
 713 |   }
 714 |   param {
 715 |     lr_mult: 2.0
 716 |     decay_mult: 0
 717 |   }
 718 |   convolution_param {
 719 |     num_output: 19
 720 |     pad: 0
 721 |     kernel_size: 1
 722 |     weight_filler {
 723 |       type: "gaussian"
 724 |       std: 0.01
 725 |     }
 726 |     bias_filler {
 727 |       type: "constant"
 728 |     }
 729 |   }
 730 | }
 731 | layer {
 732 |   name: "concat_stage2"
 733 |   type: "Concat"
 734 |   bottom: "conv5_5_CPM_L1"
 735 |   bottom: "conv5_5_CPM_L2"
 736 |   bottom: "conv4_4_CPM"
 737 |   top: "concat_stage2"
 738 |   concat_param {
 739 |     axis: 1
 740 |   }
 741 | }
 742 | layer {
 743 |   name: "Mconv1_stage2_L1"
 744 |   type: "Convolution"
 745 |   bottom: "concat_stage2"
 746 |   top: "Mconv1_stage2_L1"
 747 |   param {
 748 |     lr_mult: 4.0
 749 |     decay_mult: 1
 750 |   }
 751 |   param {
 752 |     lr_mult: 8.0
 753 |     decay_mult: 0
 754 |   }
 755 |   convolution_param {
 756 |     num_output: 128
 757 |     pad: 3
 758 |     kernel_size: 7
 759 |     weight_filler {
 760 |       type: "gaussian"
 761 |       std: 0.01
 762 |     }
 763 |     bias_filler {
 764 |       type: "constant"
 765 |     }
 766 |   }
 767 | }
 768 | layer {
 769 |   name: "Mrelu1_stage2_L1"
 770 |   type: "ReLU"
 771 |   bottom: "Mconv1_stage2_L1"
 772 |   top: "Mconv1_stage2_L1"
 773 | }
 774 | layer {
 775 |   name: "Mconv1_stage2_L2"
 776 |   type: "Convolution"
 777 |   bottom: "concat_stage2"
 778 |   top: "Mconv1_stage2_L2"
 779 |   param {
 780 |     lr_mult: 4.0
 781 |     decay_mult: 1
 782 |   }
 783 |   param {
 784 |     lr_mult: 8.0
 785 |     decay_mult: 0
 786 |   }
 787 |   convolution_param {
 788 |     num_output: 128
 789 |     pad: 3
 790 |     kernel_size: 7
 791 |     weight_filler {
 792 |       type: "gaussian"
 793 |       std: 0.01
 794 |     }
 795 |     bias_filler {
 796 |       type: "constant"
 797 |     }
 798 |   }
 799 | }
 800 | layer {
 801 |   name: "Mrelu1_stage2_L2"
 802 |   type: "ReLU"
 803 |   bottom: "Mconv1_stage2_L2"
 804 |   top: "Mconv1_stage2_L2"
 805 | }
 806 | layer {
 807 |   name: "Mconv2_stage2_L1"
 808 |   type: "Convolution"
 809 |   bottom: "Mconv1_stage2_L1"
 810 |   top: "Mconv2_stage2_L1"
 811 |   param {
 812 |     lr_mult: 4.0
 813 |     decay_mult: 1
 814 |   }
 815 |   param {
 816 |     lr_mult: 8.0
 817 |     decay_mult: 0
 818 |   }
 819 |   convolution_param {
 820 |     num_output: 128
 821 |     pad: 3
 822 |     kernel_size: 7
 823 |     weight_filler {
 824 |       type: "gaussian"
 825 |       std: 0.01
 826 |     }
 827 |     bias_filler {
 828 |       type: "constant"
 829 |     }
 830 |   }
 831 | }
 832 | layer {
 833 |   name: "Mrelu2_stage2_L1"
 834 |   type: "ReLU"
 835 |   bottom: "Mconv2_stage2_L1"
 836 |   top: "Mconv2_stage2_L1"
 837 | }
 838 | layer {
 839 |   name: "Mconv2_stage2_L2"
 840 |   type: "Convolution"
 841 |   bottom: "Mconv1_stage2_L2"
 842 |   top: "Mconv2_stage2_L2"
 843 |   param {
 844 |     lr_mult: 4.0
 845 |     decay_mult: 1
 846 |   }
 847 |   param {
 848 |     lr_mult: 8.0
 849 |     decay_mult: 0
 850 |   }
 851 |   convolution_param {
 852 |     num_output: 128
 853 |     pad: 3
 854 |     kernel_size: 7
 855 |     weight_filler {
 856 |       type: "gaussian"
 857 |       std: 0.01
 858 |     }
 859 |     bias_filler {
 860 |       type: "constant"
 861 |     }
 862 |   }
 863 | }
 864 | layer {
 865 |   name: "Mrelu2_stage2_L2"
 866 |   type: "ReLU"
 867 |   bottom: "Mconv2_stage2_L2"
 868 |   top: "Mconv2_stage2_L2"
 869 | }
 870 | layer {
 871 |   name: "Mconv3_stage2_L1"
 872 |   type: "Convolution"
 873 |   bottom: "Mconv2_stage2_L1"
 874 |   top: "Mconv3_stage2_L1"
 875 |   param {
 876 |     lr_mult: 4.0
 877 |     decay_mult: 1
 878 |   }
 879 |   param {
 880 |     lr_mult: 8.0
 881 |     decay_mult: 0
 882 |   }
 883 |   convolution_param {
 884 |     num_output: 128
 885 |     pad: 3
 886 |     kernel_size: 7
 887 |     weight_filler {
 888 |       type: "gaussian"
 889 |       std: 0.01
 890 |     }
 891 |     bias_filler {
 892 |       type: "constant"
 893 |     }
 894 |   }
 895 | }
 896 | layer {
 897 |   name: "Mrelu3_stage2_L1"
 898 |   type: "ReLU"
 899 |   bottom: "Mconv3_stage2_L1"
 900 |   top: "Mconv3_stage2_L1"
 901 | }
 902 | layer {
 903 |   name: "Mconv3_stage2_L2"
 904 |   type: "Convolution"
 905 |   bottom: "Mconv2_stage2_L2"
 906 |   top: "Mconv3_stage2_L2"
 907 |   param {
 908 |     lr_mult: 4.0
 909 |     decay_mult: 1
 910 |   }
 911 |   param {
 912 |     lr_mult: 8.0
 913 |     decay_mult: 0
 914 |   }
 915 |   convolution_param {
 916 |     num_output: 128
 917 |     pad: 3
 918 |     kernel_size: 7
 919 |     weight_filler {
 920 |       type: "gaussian"
 921 |       std: 0.01
 922 |     }
 923 |     bias_filler {
 924 |       type: "constant"
 925 |     }
 926 |   }
 927 | }
 928 | layer {
 929 |   name: "Mrelu3_stage2_L2"
 930 |   type: "ReLU"
 931 |   bottom: "Mconv3_stage2_L2"
 932 |   top: "Mconv3_stage2_L2"
 933 | }
 934 | layer {
 935 |   name: "Mconv4_stage2_L1"
 936 |   type: "Convolution"
 937 |   bottom: "Mconv3_stage2_L1"
 938 |   top: "Mconv4_stage2_L1"
 939 |   param {
 940 |     lr_mult: 4.0
 941 |     decay_mult: 1
 942 |   }
 943 |   param {
 944 |     lr_mult: 8.0
 945 |     decay_mult: 0
 946 |   }
 947 |   convolution_param {
 948 |     num_output: 128
 949 |     pad: 3
 950 |     kernel_size: 7
 951 |     weight_filler {
 952 |       type: "gaussian"
 953 |       std: 0.01
 954 |     }
 955 |     bias_filler {
 956 |       type: "constant"
 957 |     }
 958 |   }
 959 | }
 960 | layer {
 961 |   name: "Mrelu4_stage2_L1"
 962 |   type: "ReLU"
 963 |   bottom: "Mconv4_stage2_L1"
 964 |   top: "Mconv4_stage2_L1"
 965 | }
 966 | layer {
 967 |   name: "Mconv4_stage2_L2"
 968 |   type: "Convolution"
 969 |   bottom: "Mconv3_stage2_L2"
 970 |   top: "Mconv4_stage2_L2"
 971 |   param {
 972 |     lr_mult: 4.0
 973 |     decay_mult: 1
 974 |   }
 975 |   param {
 976 |     lr_mult: 8.0
 977 |     decay_mult: 0
 978 |   }
 979 |   convolution_param {
 980 |     num_output: 128
 981 |     pad: 3
 982 |     kernel_size: 7
 983 |     weight_filler {
 984 |       type: "gaussian"
 985 |       std: 0.01
 986 |     }
 987 |     bias_filler {
 988 |       type: "constant"
 989 |     }
 990 |   }
 991 | }
 992 | layer {
 993 |   name: "Mrelu4_stage2_L2"
 994 |   type: "ReLU"
 995 |   bottom: "Mconv4_stage2_L2"
 996 |   top: "Mconv4_stage2_L2"
 997 | }
 998 | layer {
 999 |   name: "Mconv5_stage2_L1"
1000 |   type: "Convolution"
1001 |   bottom: "Mconv4_stage2_L1"
1002 |   top: "Mconv5_stage2_L1"
1003 |   param {
1004 |     lr_mult: 4.0
1005 |     decay_mult: 1
1006 |   }
1007 |   param {
1008 |     lr_mult: 8.0
1009 |     decay_mult: 0
1010 |   }
1011 |   convolution_param {
1012 |     num_output: 128
1013 |     pad: 3
1014 |     kernel_size: 7
1015 |     weight_filler {
1016 |       type: "gaussian"
1017 |       std: 0.01
1018 |     }
1019 |     bias_filler {
1020 |       type: "constant"
1021 |     }
1022 |   }
1023 | }
1024 | layer {
1025 |   name: "Mrelu5_stage2_L1"
1026 |   type: "ReLU"
1027 |   bottom: "Mconv5_stage2_L1"
1028 |   top: "Mconv5_stage2_L1"
1029 | }
1030 | layer {
1031 |   name: "Mconv5_stage2_L2"
1032 |   type: "Convolution"
1033 |   bottom: "Mconv4_stage2_L2"
1034 |   top: "Mconv5_stage2_L2"
1035 |   param {
1036 |     lr_mult: 4.0
1037 |     decay_mult: 1
1038 |   }
1039 |   param {
1040 |     lr_mult: 8.0
1041 |     decay_mult: 0
1042 |   }
1043 |   convolution_param {
1044 |     num_output: 128
1045 |     pad: 3
1046 |     kernel_size: 7
1047 |     weight_filler {
1048 |       type: "gaussian"
1049 |       std: 0.01
1050 |     }
1051 |     bias_filler {
1052 |       type: "constant"
1053 |     }
1054 |   }
1055 | }
1056 | layer {
1057 |   name: "Mrelu5_stage2_L2"
1058 |   type: "ReLU"
1059 |   bottom: "Mconv5_stage2_L2"
1060 |   top: "Mconv5_stage2_L2"
1061 | }
1062 | layer {
1063 |   name: "Mconv6_stage2_L1"
1064 |   type: "Convolution"
1065 |   bottom: "Mconv5_stage2_L1"
1066 |   top: "Mconv6_stage2_L1"
1067 |   param {
1068 |     lr_mult: 4.0
1069 |     decay_mult: 1
1070 |   }
1071 |   param {
1072 |     lr_mult: 8.0
1073 |     decay_mult: 0
1074 |   }
1075 |   convolution_param {
1076 |     num_output: 128
1077 |     pad: 0
1078 |     kernel_size: 1
1079 |     weight_filler {
1080 |       type: "gaussian"
1081 |       std: 0.01
1082 |     }
1083 |     bias_filler {
1084 |       type: "constant"
1085 |     }
1086 |   }
1087 | }
1088 | layer {
1089 |   name: "Mrelu6_stage2_L1"
1090 |   type: "ReLU"
1091 |   bottom: "Mconv6_stage2_L1"
1092 |   top: "Mconv6_stage2_L1"
1093 | }
1094 | layer {
1095 |   name: "Mconv6_stage2_L2"
1096 |   type: "Convolution"
1097 |   bottom: "Mconv5_stage2_L2"
1098 |   top: "Mconv6_stage2_L2"
1099 |   param {
1100 |     lr_mult: 4.0
1101 |     decay_mult: 1
1102 |   }
1103 |   param {
1104 |     lr_mult: 8.0
1105 |     decay_mult: 0
1106 |   }
1107 |   convolution_param {
1108 |     num_output: 128
1109 |     pad: 0
1110 |     kernel_size: 1
1111 |     weight_filler {
1112 |       type: "gaussian"
1113 |       std: 0.01
1114 |     }
1115 |     bias_filler {
1116 |       type: "constant"
1117 |     }
1118 |   }
1119 | }
1120 | layer {
1121 |   name: "Mrelu6_stage2_L2"
1122 |   type: "ReLU"
1123 |   bottom: "Mconv6_stage2_L2"
1124 |   top: "Mconv6_stage2_L2"
1125 | }
1126 | layer {
1127 |   name: "Mconv7_stage2_L1"
1128 |   type: "Convolution"
1129 |   bottom: "Mconv6_stage2_L1"
1130 |   top: "Mconv7_stage2_L1"
1131 |   param {
1132 |     lr_mult: 4.0
1133 |     decay_mult: 1
1134 |   }
1135 |   param {
1136 |     lr_mult: 8.0
1137 |     decay_mult: 0
1138 |   }
1139 |   convolution_param {
1140 |     num_output: 38
1141 |     pad: 0
1142 |     kernel_size: 1
1143 |     weight_filler {
1144 |       type: "gaussian"
1145 |       std: 0.01
1146 |     }
1147 |     bias_filler {
1148 |       type: "constant"
1149 |     }
1150 |   }
1151 | }
1152 | layer {
1153 |   name: "Mconv7_stage2_L2"
1154 |   type: "Convolution"
1155 |   bottom: "Mconv6_stage2_L2"
1156 |   top: "Mconv7_stage2_L2"
1157 |   param {
1158 |     lr_mult: 4.0
1159 |     decay_mult: 1
1160 |   }
1161 |   param {
1162 |     lr_mult: 8.0
1163 |     decay_mult: 0
1164 |   }
1165 |   convolution_param {
1166 |     num_output: 19
1167 |     pad: 0
1168 |     kernel_size: 1
1169 |     weight_filler {
1170 |       type: "gaussian"
1171 |       std: 0.01
1172 |     }
1173 |     bias_filler {
1174 |       type: "constant"
1175 |     }
1176 |   }
1177 | }
1178 | layer {
1179 |   name: "concat_stage3"
1180 |   type: "Concat"
1181 |   bottom: "Mconv7_stage2_L1"
1182 |   bottom: "Mconv7_stage2_L2"
1183 |   bottom: "conv4_4_CPM"
1184 |   top: "concat_stage3"
1185 |   concat_param {
1186 |     axis: 1
1187 |   }
1188 | }
1189 | layer {
1190 |   name: "Mconv1_stage3_L1"
1191 |   type: "Convolution"
1192 |   bottom: "concat_stage3"
1193 |   top: "Mconv1_stage3_L1"
1194 |   param {
1195 |     lr_mult: 4.0
1196 |     decay_mult: 1
1197 |   }
1198 |   param {
1199 |     lr_mult: 8.0
1200 |     decay_mult: 0
1201 |   }
1202 |   convolution_param {
1203 |     num_output: 128
1204 |     pad: 3
1205 |     kernel_size: 7
1206 |     weight_filler {
1207 |       type: "gaussian"
1208 |       std: 0.01
1209 |     }
1210 |     bias_filler {
1211 |       type: "constant"
1212 |     }
1213 |   }
1214 | }
1215 | layer {
1216 |   name: "Mrelu1_stage3_L1"
1217 |   type: "ReLU"
1218 |   bottom: "Mconv1_stage3_L1"
1219 |   top: "Mconv1_stage3_L1"
1220 | }
1221 | layer {
1222 |   name: "Mconv1_stage3_L2"
1223 |   type: "Convolution"
1224 |   bottom: "concat_stage3"
1225 |   top: "Mconv1_stage3_L2"
1226 |   param {
1227 |     lr_mult: 4.0
1228 |     decay_mult: 1
1229 |   }
1230 |   param {
1231 |     lr_mult: 8.0
1232 |     decay_mult: 0
1233 |   }
1234 |   convolution_param {
1235 |     num_output: 128
1236 |     pad: 3
1237 |     kernel_size: 7
1238 |     weight_filler {
1239 |       type: "gaussian"
1240 |       std: 0.01
1241 |     }
1242 |     bias_filler {
1243 |       type: "constant"
1244 |     }
1245 |   }
1246 | }
1247 | layer {
1248 |   name: "Mrelu1_stage3_L2"
1249 |   type: "ReLU"
1250 |   bottom: "Mconv1_stage3_L2"
1251 |   top: "Mconv1_stage3_L2"
1252 | }
1253 | layer {
1254 |   name: "Mconv2_stage3_L1"
1255 |   type: "Convolution"
1256 |   bottom: "Mconv1_stage3_L1"
1257 |   top: "Mconv2_stage3_L1"
1258 |   param {
1259 |     lr_mult: 4.0
1260 |     decay_mult: 1
1261 |   }
1262 |   param {
1263 |     lr_mult: 8.0
1264 |     decay_mult: 0
1265 |   }
1266 |   convolution_param {
1267 |     num_output: 128
1268 |     pad: 3
1269 |     kernel_size: 7
1270 |     weight_filler {
1271 |       type: "gaussian"
1272 |       std: 0.01
1273 |     }
1274 |     bias_filler {
1275 |       type: "constant"
1276 |     }
1277 |   }
1278 | }
1279 | layer {
1280 |   name: "Mrelu2_stage3_L1"
1281 |   type: "ReLU"
1282 |   bottom: "Mconv2_stage3_L1"
1283 |   top: "Mconv2_stage3_L1"
1284 | }
1285 | layer {
1286 |   name: "Mconv2_stage3_L2"
1287 |   type: "Convolution"
1288 |   bottom: "Mconv1_stage3_L2"
1289 |   top: "Mconv2_stage3_L2"
1290 |   param {
1291 |     lr_mult: 4.0
1292 |     decay_mult: 1
1293 |   }
1294 |   param {
1295 |     lr_mult: 8.0
1296 |     decay_mult: 0
1297 |   }
1298 |   convolution_param {
1299 |     num_output: 128
1300 |     pad: 3
1301 |     kernel_size: 7
1302 |     weight_filler {
1303 |       type: "gaussian"
1304 |       std: 0.01
1305 |     }
1306 |     bias_filler {
1307 |       type: "constant"
1308 |     }
1309 |   }
1310 | }
1311 | layer {
1312 |   name: "Mrelu2_stage3_L2"
1313 |   type: "ReLU"
1314 |   bottom: "Mconv2_stage3_L2"
1315 |   top: "Mconv2_stage3_L2"
1316 | }
1317 | layer {
1318 |   name: "Mconv3_stage3_L1"
1319 |   type: "Convolution"
1320 |   bottom: "Mconv2_stage3_L1"
1321 |   top: "Mconv3_stage3_L1"
1322 |   param {
1323 |     lr_mult: 4.0
1324 |     decay_mult: 1
1325 |   }
1326 |   param {
1327 |     lr_mult: 8.0
1328 |     decay_mult: 0
1329 |   }
1330 |   convolution_param {
1331 |     num_output: 128
1332 |     pad: 3
1333 |     kernel_size: 7
1334 |     weight_filler {
1335 |       type: "gaussian"
1336 |       std: 0.01
1337 |     }
1338 |     bias_filler {
1339 |       type: "constant"
1340 |     }
1341 |   }
1342 | }
1343 | layer {
1344 |   name: "Mrelu3_stage3_L1"
1345 |   type: "ReLU"
1346 |   bottom: "Mconv3_stage3_L1"
1347 |   top: "Mconv3_stage3_L1"
1348 | }
1349 | layer {
1350 |   name: "Mconv3_stage3_L2"
1351 |   type: "Convolution"
1352 |   bottom: "Mconv2_stage3_L2"
1353 |   top: "Mconv3_stage3_L2"
1354 |   param {
1355 |     lr_mult: 4.0
1356 |     decay_mult: 1
1357 |   }
1358 |   param {
1359 |     lr_mult: 8.0
1360 |     decay_mult: 0
1361 |   }
1362 |   convolution_param {
1363 |     num_output: 128
1364 |     pad: 3
1365 |     kernel_size: 7
1366 |     weight_filler {
1367 |       type: "gaussian"
1368 |       std: 0.01
1369 |     }
1370 |     bias_filler {
1371 |       type: "constant"
1372 |     }
1373 |   }
1374 | }
1375 | layer {
1376 |   name: "Mrelu3_stage3_L2"
1377 |   type: "ReLU"
1378 |   bottom: "Mconv3_stage3_L2"
1379 |   top: "Mconv3_stage3_L2"
1380 | }
1381 | layer {
1382 |   name: "Mconv4_stage3_L1"
1383 |   type: "Convolution"
1384 |   bottom: "Mconv3_stage3_L1"
1385 |   top: "Mconv4_stage3_L1"
1386 |   param {
1387 |     lr_mult: 4.0
1388 |     decay_mult: 1
1389 |   }
1390 |   param {
1391 |     lr_mult: 8.0
1392 |     decay_mult: 0
1393 |   }
1394 |   convolution_param {
1395 |     num_output: 128
1396 |     pad: 3
1397 |     kernel_size: 7
1398 |     weight_filler {
1399 |       type: "gaussian"
1400 |       std: 0.01
1401 |     }
1402 |     bias_filler {
1403 |       type: "constant"
1404 |     }
1405 |   }
1406 | }
1407 | layer {
1408 |   name: "Mrelu4_stage3_L1"
1409 |   type: "ReLU"
1410 |   bottom: "Mconv4_stage3_L1"
1411 |   top: "Mconv4_stage3_L1"
1412 | }
1413 | layer {
1414 |   name: "Mconv4_stage3_L2"
1415 |   type: "Convolution"
1416 |   bottom: "Mconv3_stage3_L2"
1417 |   top: "Mconv4_stage3_L2"
1418 |   param {
1419 |     lr_mult: 4.0
1420 |     decay_mult: 1
1421 |   }
1422 |   param {
1423 |     lr_mult: 8.0
1424 |     decay_mult: 0
1425 |   }
1426 |   convolution_param {
1427 |     num_output: 128
1428 |     pad: 3
1429 |     kernel_size: 7
1430 |     weight_filler {
1431 |       type: "gaussian"
1432 |       std: 0.01
1433 |     }
1434 |     bias_filler {
1435 |       type: "constant"
1436 |     }
1437 |   }
1438 | }
1439 | layer {
1440 |   name: "Mrelu4_stage3_L2"
1441 |   type: "ReLU"
1442 |   bottom: "Mconv4_stage3_L2"
1443 |   top: "Mconv4_stage3_L2"
1444 | }
1445 | layer {
1446 |   name: "Mconv5_stage3_L1"
1447 |   type: "Convolution"
1448 |   bottom: "Mconv4_stage3_L1"
1449 |   top: "Mconv5_stage3_L1"
1450 |   param {
1451 |     lr_mult: 4.0
1452 |     decay_mult: 1
1453 |   }
1454 |   param {
1455 |     lr_mult: 8.0
1456 |     decay_mult: 0
1457 |   }
1458 |   convolution_param {
1459 |     num_output: 128
1460 |     pad: 3
1461 |     kernel_size: 7
1462 |     weight_filler {
1463 |       type: "gaussian"
1464 |       std: 0.01
1465 |     }
1466 |     bias_filler {
1467 |       type: "constant"
1468 |     }
1469 |   }
1470 | }
1471 | layer {
1472 |   name: "Mrelu5_stage3_L1"
1473 |   type: "ReLU"
1474 |   bottom: "Mconv5_stage3_L1"
1475 |   top: "Mconv5_stage3_L1"
1476 | }
1477 | layer {
1478 |   name: "Mconv5_stage3_L2"
1479 |   type: "Convolution"
1480 |   bottom: "Mconv4_stage3_L2"
1481 |   top: "Mconv5_stage3_L2"
1482 |   param {
1483 |     lr_mult: 4.0
1484 |     decay_mult: 1
1485 |   }
1486 |   param {
1487 |     lr_mult: 8.0
1488 |     decay_mult: 0
1489 |   }
1490 |   convolution_param {
1491 |     num_output: 128
1492 |     pad: 3
1493 |     kernel_size: 7
1494 |     weight_filler {
1495 |       type: "gaussian"
1496 |       std: 0.01
1497 |     }
1498 |     bias_filler {
1499 |       type: "constant"
1500 |     }
1501 |   }
1502 | }
1503 | layer {
1504 |   name: "Mrelu5_stage3_L2"
1505 |   type: "ReLU"
1506 |   bottom: "Mconv5_stage3_L2"
1507 |   top: "Mconv5_stage3_L2"
1508 | }
1509 | layer {
1510 |   name: "Mconv6_stage3_L1"
1511 |   type: "Convolution"
1512 |   bottom: "Mconv5_stage3_L1"
1513 |   top: "Mconv6_stage3_L1"
1514 |   param {
1515 |     lr_mult: 4.0
1516 |     decay_mult: 1
1517 |   }
1518 |   param {
1519 |     lr_mult: 8.0
1520 |     decay_mult: 0
1521 |   }
1522 |   convolution_param {
1523 |     num_output: 128
1524 |     pad: 0
1525 |     kernel_size: 1
1526 |     weight_filler {
1527 |       type: "gaussian"
1528 |       std: 0.01
1529 |     }
1530 |     bias_filler {
1531 |       type: "constant"
1532 |     }
1533 |   }
1534 | }
1535 | layer {
1536 |   name: "Mrelu6_stage3_L1"
1537 |   type: "ReLU"
1538 |   bottom: "Mconv6_stage3_L1"
1539 |   top: "Mconv6_stage3_L1"
1540 | }
1541 | layer {
1542 |   name: "Mconv6_stage3_L2"
1543 |   type: "Convolution"
1544 |   bottom: "Mconv5_stage3_L2"
1545 |   top: "Mconv6_stage3_L2"
1546 |   param {
1547 |     lr_mult: 4.0
1548 |     decay_mult: 1
1549 |   }
1550 |   param {
1551 |     lr_mult: 8.0
1552 |     decay_mult: 0
1553 |   }
1554 |   convolution_param {
1555 |     num_output: 128
1556 |     pad: 0
1557 |     kernel_size: 1
1558 |     weight_filler {
1559 |       type: "gaussian"
1560 |       std: 0.01
1561 |     }
1562 |     bias_filler {
1563 |       type: "constant"
1564 |     }
1565 |   }
1566 | }
1567 | layer {
1568 |   name: "Mrelu6_stage3_L2"
1569 |   type: "ReLU"
1570 |   bottom: "Mconv6_stage3_L2"
1571 |   top: "Mconv6_stage3_L2"
1572 | }
1573 | layer {
1574 |   name: "Mconv7_stage3_L1"
1575 |   type: "Convolution"
1576 |   bottom: "Mconv6_stage3_L1"
1577 |   top: "Mconv7_stage3_L1"
1578 |   param {
1579 |     lr_mult: 4.0
1580 |     decay_mult: 1
1581 |   }
1582 |   param {
1583 |     lr_mult: 8.0
1584 |     decay_mult: 0
1585 |   }
1586 |   convolution_param {
1587 |     num_output: 38
1588 |     pad: 0
1589 |     kernel_size: 1
1590 |     weight_filler {
1591 |       type: "gaussian"
1592 |       std: 0.01
1593 |     }
1594 |     bias_filler {
1595 |       type: "constant"
1596 |     }
1597 |   }
1598 | }
1599 | layer {
1600 |   name: "Mconv7_stage3_L2"
1601 |   type: "Convolution"
1602 |   bottom: "Mconv6_stage3_L2"
1603 |   top: "Mconv7_stage3_L2"
1604 |   param {
1605 |     lr_mult: 4.0
1606 |     decay_mult: 1
1607 |   }
1608 |   param {
1609 |     lr_mult: 8.0
1610 |     decay_mult: 0
1611 |   }
1612 |   convolution_param {
1613 |     num_output: 19
1614 |     pad: 0
1615 |     kernel_size: 1
1616 |     weight_filler {
1617 |       type: "gaussian"
1618 |       std: 0.01
1619 |     }
1620 |     bias_filler {
1621 |       type: "constant"
1622 |     }
1623 |   }
1624 | }
1625 | layer {
1626 |   name: "concat_stage4"
1627 |   type: "Concat"
1628 |   bottom: "Mconv7_stage3_L1"
1629 |   bottom: "Mconv7_stage3_L2"
1630 |   bottom: "conv4_4_CPM"
1631 |   top: "concat_stage4"
1632 |   concat_param {
1633 |     axis: 1
1634 |   }
1635 | }
1636 | layer {
1637 |   name: "Mconv1_stage4_L1"
1638 |   type: "Convolution"
1639 |   bottom: "concat_stage4"
1640 |   top: "Mconv1_stage4_L1"
1641 |   param {
1642 |     lr_mult: 4.0
1643 |     decay_mult: 1
1644 |   }
1645 |   param {
1646 |     lr_mult: 8.0
1647 |     decay_mult: 0
1648 |   }
1649 |   convolution_param {
1650 |     num_output: 128
1651 |     pad: 3
1652 |     kernel_size: 7
1653 |     weight_filler {
1654 |       type: "gaussian"
1655 |       std: 0.01
1656 |     }
1657 |     bias_filler {
1658 |       type: "constant"
1659 |     }
1660 |   }
1661 | }
1662 | layer {
1663 |   name: "Mrelu1_stage4_L1"
1664 |   type: "ReLU"
1665 |   bottom: "Mconv1_stage4_L1"
1666 |   top: "Mconv1_stage4_L1"
1667 | }
1668 | layer {
1669 |   name: "Mconv1_stage4_L2"
1670 |   type: "Convolution"
1671 |   bottom: "concat_stage4"
1672 |   top: "Mconv1_stage4_L2"
1673 |   param {
1674 |     lr_mult: 4.0
1675 |     decay_mult: 1
1676 |   }
1677 |   param {
1678 |     lr_mult: 8.0
1679 |     decay_mult: 0
1680 |   }
1681 |   convolution_param {
1682 |     num_output: 128
1683 |     pad: 3
1684 |     kernel_size: 7
1685 |     weight_filler {
1686 |       type: "gaussian"
1687 |       std: 0.01
1688 |     }
1689 |     bias_filler {
1690 |       type: "constant"
1691 |     }
1692 |   }
1693 | }
1694 | layer {
1695 |   name: "Mrelu1_stage4_L2"
1696 |   type: "ReLU"
1697 |   bottom: "Mconv1_stage4_L2"
1698 |   top: "Mconv1_stage4_L2"
1699 | }
1700 | layer {
1701 |   name: "Mconv2_stage4_L1"
1702 |   type: "Convolution"
1703 |   bottom: "Mconv1_stage4_L1"
1704 |   top: "Mconv2_stage4_L1"
1705 |   param {
1706 |     lr_mult: 4.0
1707 |     decay_mult: 1
1708 |   }
1709 |   param {
1710 |     lr_mult: 8.0
1711 |     decay_mult: 0
1712 |   }
1713 |   convolution_param {
1714 |     num_output: 128
1715 |     pad: 3
1716 |     kernel_size: 7
1717 |     weight_filler {
1718 |       type: "gaussian"
1719 |       std: 0.01
1720 |     }
1721 |     bias_filler {
1722 |       type: "constant"
1723 |     }
1724 |   }
1725 | }
1726 | layer {
1727 |   name: "Mrelu2_stage4_L1"
1728 |   type: "ReLU"
1729 |   bottom: "Mconv2_stage4_L1"
1730 |   top: "Mconv2_stage4_L1"
1731 | }
1732 | layer {
1733 |   name: "Mconv2_stage4_L2"
1734 |   type: "Convolution"
1735 |   bottom: "Mconv1_stage4_L2"
1736 |   top: "Mconv2_stage4_L2"
1737 |   param {
1738 |     lr_mult: 4.0
1739 |     decay_mult: 1
1740 |   }
1741 |   param {
1742 |     lr_mult: 8.0
1743 |     decay_mult: 0
1744 |   }
1745 |   convolution_param {
1746 |     num_output: 128
1747 |     pad: 3
1748 |     kernel_size: 7
1749 |     weight_filler {
1750 |       type: "gaussian"
1751 |       std: 0.01
1752 |     }
1753 |     bias_filler {
1754 |       type: "constant"
1755 |     }
1756 |   }
1757 | }
1758 | layer {
1759 |   name: "Mrelu2_stage4_L2"
1760 |   type: "ReLU"
1761 |   bottom: "Mconv2_stage4_L2"
1762 |   top: "Mconv2_stage4_L2"
1763 | }
1764 | layer {
1765 |   name: "Mconv3_stage4_L1"
1766 |   type: "Convolution"
1767 |   bottom: "Mconv2_stage4_L1"
1768 |   top: "Mconv3_stage4_L1"
1769 |   param {
1770 |     lr_mult: 4.0
1771 |     decay_mult: 1
1772 |   }
1773 |   param {
1774 |     lr_mult: 8.0
1775 |     decay_mult: 0
1776 |   }
1777 |   convolution_param {
1778 |     num_output: 128
1779 |     pad: 3
1780 |     kernel_size: 7
1781 |     weight_filler {
1782 |       type: "gaussian"
1783 |       std: 0.01
1784 |     }
1785 |     bias_filler {
1786 |       type: "constant"
1787 |     }
1788 |   }
1789 | }
1790 | layer {
1791 |   name: "Mrelu3_stage4_L1"
1792 |   type: "ReLU"
1793 |   bottom: "Mconv3_stage4_L1"
1794 |   top: "Mconv3_stage4_L1"
1795 | }
1796 | layer {
1797 |   name: "Mconv3_stage4_L2"
1798 |   type: "Convolution"
1799 |   bottom: "Mconv2_stage4_L2"
1800 |   top: "Mconv3_stage4_L2"
1801 |   param {
1802 |     lr_mult: 4.0
1803 |     decay_mult: 1
1804 |   }
1805 |   param {
1806 |     lr_mult: 8.0
1807 |     decay_mult: 0
1808 |   }
1809 |   convolution_param {
1810 |     num_output: 128
1811 |     pad: 3
1812 |     kernel_size: 7
1813 |     weight_filler {
1814 |       type: "gaussian"
1815 |       std: 0.01
1816 |     }
1817 |     bias_filler {
1818 |       type: "constant"
1819 |     }
1820 |   }
1821 | }
1822 | layer {
1823 |   name: "Mrelu3_stage4_L2"
1824 |   type: "ReLU"
1825 |   bottom: "Mconv3_stage4_L2"
1826 |   top: "Mconv3_stage4_L2"
1827 | }
1828 | layer {
1829 |   name: "Mconv4_stage4_L1"
1830 |   type: "Convolution"
1831 |   bottom: "Mconv3_stage4_L1"
1832 |   top: "Mconv4_stage4_L1"
1833 |   param {
1834 |     lr_mult: 4.0
1835 |     decay_mult: 1
1836 |   }
1837 |   param {
1838 |     lr_mult: 8.0
1839 |     decay_mult: 0
1840 |   }
1841 |   convolution_param {
1842 |     num_output: 128
1843 |     pad: 3
1844 |     kernel_size: 7
1845 |     weight_filler {
1846 |       type: "gaussian"
1847 |       std: 0.01
1848 |     }
1849 |     bias_filler {
1850 |       type: "constant"
1851 |     }
1852 |   }
1853 | }
1854 | layer {
1855 |   name: "Mrelu4_stage4_L1"
1856 |   type: "ReLU"
1857 |   bottom: "Mconv4_stage4_L1"
1858 |   top: "Mconv4_stage4_L1"
1859 | }
1860 | layer {
1861 |   name: "Mconv4_stage4_L2"
1862 |   type: "Convolution"
1863 |   bottom: "Mconv3_stage4_L2"
1864 |   top: "Mconv4_stage4_L2"
1865 |   param {
1866 |     lr_mult: 4.0
1867 |     decay_mult: 1
1868 |   }
1869 |   param {
1870 |     lr_mult: 8.0
1871 |     decay_mult: 0
1872 |   }
1873 |   convolution_param {
1874 |     num_output: 128
1875 |     pad: 3
1876 |     kernel_size: 7
1877 |     weight_filler {
1878 |       type: "gaussian"
1879 |       std: 0.01
1880 |     }
1881 |     bias_filler {
1882 |       type: "constant"
1883 |     }
1884 |   }
1885 | }
1886 | layer {
1887 |   name: "Mrelu4_stage4_L2"
1888 |   type: "ReLU"
1889 |   bottom: "Mconv4_stage4_L2"
1890 |   top: "Mconv4_stage4_L2"
1891 | }
1892 | layer {
1893 |   name: "Mconv5_stage4_L1"
1894 |   type: "Convolution"
1895 |   bottom: "Mconv4_stage4_L1"
1896 |   top: "Mconv5_stage4_L1"
1897 |   param {
1898 |     lr_mult: 4.0
1899 |     decay_mult: 1
1900 |   }
1901 |   param {
1902 |     lr_mult: 8.0
1903 |     decay_mult: 0
1904 |   }
1905 |   convolution_param {
1906 |     num_output: 128
1907 |     pad: 3
1908 |     kernel_size: 7
1909 |     weight_filler {
1910 |       type: "gaussian"
1911 |       std: 0.01
1912 |     }
1913 |     bias_filler {
1914 |       type: "constant"
1915 |     }
1916 |   }
1917 | }
1918 | layer {
1919 |   name: "Mrelu5_stage4_L1"
1920 |   type: "ReLU"
1921 |   bottom: "Mconv5_stage4_L1"
1922 |   top: "Mconv5_stage4_L1"
1923 | }
1924 | layer {
1925 |   name: "Mconv5_stage4_L2"
1926 |   type: "Convolution"
1927 |   bottom: "Mconv4_stage4_L2"
1928 |   top: "Mconv5_stage4_L2"
1929 |   param {
1930 |     lr_mult: 4.0
1931 |     decay_mult: 1
1932 |   }
1933 |   param {
1934 |     lr_mult: 8.0
1935 |     decay_mult: 0
1936 |   }
1937 |   convolution_param {
1938 |     num_output: 128
1939 |     pad: 3
1940 |     kernel_size: 7
1941 |     weight_filler {
1942 |       type: "gaussian"
1943 |       std: 0.01
1944 |     }
1945 |     bias_filler {
1946 |       type: "constant"
1947 |     }
1948 |   }
1949 | }
1950 | layer {
1951 |   name: "Mrelu5_stage4_L2"
1952 |   type: "ReLU"
1953 |   bottom: "Mconv5_stage4_L2"
1954 |   top: "Mconv5_stage4_L2"
1955 | }
1956 | layer {
1957 |   name: "Mconv6_stage4_L1"
1958 |   type: "Convolution"
1959 |   bottom: "Mconv5_stage4_L1"
1960 |   top: "Mconv6_stage4_L1"
1961 |   param {
1962 |     lr_mult: 4.0
1963 |     decay_mult: 1
1964 |   }
1965 |   param {
1966 |     lr_mult: 8.0
1967 |     decay_mult: 0
1968 |   }
1969 |   convolution_param {
1970 |     num_output: 128
1971 |     pad: 0
1972 |     kernel_size: 1
1973 |     weight_filler {
1974 |       type: "gaussian"
1975 |       std: 0.01
1976 |     }
1977 |     bias_filler {
1978 |       type: "constant"
1979 |     }
1980 |   }
1981 | }
1982 | layer {
1983 |   name: "Mrelu6_stage4_L1"
1984 |   type: "ReLU"
1985 |   bottom: "Mconv6_stage4_L1"
1986 |   top: "Mconv6_stage4_L1"
1987 | }
1988 | layer {
1989 |   name: "Mconv6_stage4_L2"
1990 |   type: "Convolution"
1991 |   bottom: "Mconv5_stage4_L2"
1992 |   top: "Mconv6_stage4_L2"
1993 |   param {
1994 |     lr_mult: 4.0
1995 |     decay_mult: 1
1996 |   }
1997 |   param {
1998 |     lr_mult: 8.0
1999 |     decay_mult: 0
2000 |   }
2001 |   convolution_param {
2002 |     num_output: 128
2003 |     pad: 0
2004 |     kernel_size: 1
2005 |     weight_filler {
2006 |       type: "gaussian"
2007 |       std: 0.01
2008 |     }
2009 |     bias_filler {
2010 |       type: "constant"
2011 |     }
2012 |   }
2013 | }
2014 | layer {
2015 |   name: "Mrelu6_stage4_L2"
2016 |   type: "ReLU"
2017 |   bottom: "Mconv6_stage4_L2"
2018 |   top: "Mconv6_stage4_L2"
2019 | }
2020 | layer {
2021 |   name: "Mconv7_stage4_L1"
2022 |   type: "Convolution"
2023 |   bottom: "Mconv6_stage4_L1"
2024 |   top: "Mconv7_stage4_L1"
2025 |   param {
2026 |     lr_mult: 4.0
2027 |     decay_mult: 1
2028 |   }
2029 |   param {
2030 |     lr_mult: 8.0
2031 |     decay_mult: 0
2032 |   }
2033 |   convolution_param {
2034 |     num_output: 38
2035 |     pad: 0
2036 |     kernel_size: 1
2037 |     weight_filler {
2038 |       type: "gaussian"
2039 |       std: 0.01
2040 |     }
2041 |     bias_filler {
2042 |       type: "constant"
2043 |     }
2044 |   }
2045 | }
2046 | layer {
2047 |   name: "Mconv7_stage4_L2"
2048 |   type: "Convolution"
2049 |   bottom: "Mconv6_stage4_L2"
2050 |   top: "Mconv7_stage4_L2"
2051 |   param {
2052 |     lr_mult: 4.0
2053 |     decay_mult: 1
2054 |   }
2055 |   param {
2056 |     lr_mult: 8.0
2057 |     decay_mult: 0
2058 |   }
2059 |   convolution_param {
2060 |     num_output: 19
2061 |     pad: 0
2062 |     kernel_size: 1
2063 |     weight_filler {
2064 |       type: "gaussian"
2065 |       std: 0.01
2066 |     }
2067 |     bias_filler {
2068 |       type: "constant"
2069 |     }
2070 |   }
2071 | }
2072 | layer {
2073 |   name: "concat_stage5"
2074 |   type: "Concat"
2075 |   bottom: "Mconv7_stage4_L1"
2076 |   bottom: "Mconv7_stage4_L2"
2077 |   bottom: "conv4_4_CPM"
2078 |   top: "concat_stage5"
2079 |   concat_param {
2080 |     axis: 1
2081 |   }
2082 | }
2083 | layer {
2084 |   name: "Mconv1_stage5_L1"
2085 |   type: "Convolution"
2086 |   bottom: "concat_stage5"
2087 |   top: "Mconv1_stage5_L1"
2088 |   param {
2089 |     lr_mult: 4.0
2090 |     decay_mult: 1
2091 |   }
2092 |   param {
2093 |     lr_mult: 8.0
2094 |     decay_mult: 0
2095 |   }
2096 |   convolution_param {
2097 |     num_output: 128
2098 |     pad: 3
2099 |     kernel_size: 7
2100 |     weight_filler {
2101 |       type: "gaussian"
2102 |       std: 0.01
2103 |     }
2104 |     bias_filler {
2105 |       type: "constant"
2106 |     }
2107 |   }
2108 | }
2109 | layer {
2110 |   name: "Mrelu1_stage5_L1"
2111 |   type: "ReLU"
2112 |   bottom: "Mconv1_stage5_L1"
2113 |   top: "Mconv1_stage5_L1"
2114 | }
2115 | layer {
2116 |   name: "Mconv1_stage5_L2"
2117 |   type: "Convolution"
2118 |   bottom: "concat_stage5"
2119 |   top: "Mconv1_stage5_L2"
2120 |   param {
2121 |     lr_mult: 4.0
2122 |     decay_mult: 1
2123 |   }
2124 |   param {
2125 |     lr_mult: 8.0
2126 |     decay_mult: 0
2127 |   }
2128 |   convolution_param {
2129 |     num_output: 128
2130 |     pad: 3
2131 |     kernel_size: 7
2132 |     weight_filler {
2133 |       type: "gaussian"
2134 |       std: 0.01
2135 |     }
2136 |     bias_filler {
2137 |       type: "constant"
2138 |     }
2139 |   }
2140 | }
2141 | layer {
2142 |   name: "Mrelu1_stage5_L2"
2143 |   type: "ReLU"
2144 |   bottom: "Mconv1_stage5_L2"
2145 |   top: "Mconv1_stage5_L2"
2146 | }
2147 | layer {
2148 |   name: "Mconv2_stage5_L1"
2149 |   type: "Convolution"
2150 |   bottom: "Mconv1_stage5_L1"
2151 |   top: "Mconv2_stage5_L1"
2152 |   param {
2153 |     lr_mult: 4.0
2154 |     decay_mult: 1
2155 |   }
2156 |   param {
2157 |     lr_mult: 8.0
2158 |     decay_mult: 0
2159 |   }
2160 |   convolution_param {
2161 |     num_output: 128
2162 |     pad: 3
2163 |     kernel_size: 7
2164 |     weight_filler {
2165 |       type: "gaussian"
2166 |       std: 0.01
2167 |     }
2168 |     bias_filler {
2169 |       type: "constant"
2170 |     }
2171 |   }
2172 | }
2173 | layer {
2174 |   name: "Mrelu2_stage5_L1"
2175 |   type: "ReLU"
2176 |   bottom: "Mconv2_stage5_L1"
2177 |   top: "Mconv2_stage5_L1"
2178 | }
2179 | layer {
2180 |   name: "Mconv2_stage5_L2"
2181 |   type: "Convolution"
2182 |   bottom: "Mconv1_stage5_L2"
2183 |   top: "Mconv2_stage5_L2"
2184 |   param {
2185 |     lr_mult: 4.0
2186 |     decay_mult: 1
2187 |   }
2188 |   param {
2189 |     lr_mult: 8.0
2190 |     decay_mult: 0
2191 |   }
2192 |   convolution_param {
2193 |     num_output: 128
2194 |     pad: 3
2195 |     kernel_size: 7
2196 |     weight_filler {
2197 |       type: "gaussian"
2198 |       std: 0.01
2199 |     }
2200 |     bias_filler {
2201 |       type: "constant"
2202 |     }
2203 |   }
2204 | }
2205 | layer {
2206 |   name: "Mrelu2_stage5_L2"
2207 |   type: "ReLU"
2208 |   bottom: "Mconv2_stage5_L2"
2209 |   top: "Mconv2_stage5_L2"
2210 | }
2211 | layer {
2212 |   name: "Mconv3_stage5_L1"
2213 |   type: "Convolution"
2214 |   bottom: "Mconv2_stage5_L1"
2215 |   top: "Mconv3_stage5_L1"
2216 |   param {
2217 |     lr_mult: 4.0
2218 |     decay_mult: 1
2219 |   }
2220 |   param {
2221 |     lr_mult: 8.0
2222 |     decay_mult: 0
2223 |   }
2224 |   convolution_param {
2225 |     num_output: 128
2226 |     pad: 3
2227 |     kernel_size: 7
2228 |     weight_filler {
2229 |       type: "gaussian"
2230 |       std: 0.01
2231 |     }
2232 |     bias_filler {
2233 |       type: "constant"
2234 |     }
2235 |   }
2236 | }
2237 | layer {
2238 |   name: "Mrelu3_stage5_L1"
2239 |   type: "ReLU"
2240 |   bottom: "Mconv3_stage5_L1"
2241 |   top: "Mconv3_stage5_L1"
2242 | }
2243 | layer {
2244 |   name: "Mconv3_stage5_L2"
2245 |   type: "Convolution"
2246 |   bottom: "Mconv2_stage5_L2"
2247 |   top: "Mconv3_stage5_L2"
2248 |   param {
2249 |     lr_mult: 4.0
2250 |     decay_mult: 1
2251 |   }
2252 |   param {
2253 |     lr_mult: 8.0
2254 |     decay_mult: 0
2255 |   }
2256 |   convolution_param {
2257 |     num_output: 128
2258 |     pad: 3
2259 |     kernel_size: 7
2260 |     weight_filler {
2261 |       type: "gaussian"
2262 |       std: 0.01
2263 |     }
2264 |     bias_filler {
2265 |       type: "constant"
2266 |     }
2267 |   }
2268 | }
2269 | layer {
2270 |   name: "Mrelu3_stage5_L2"
2271 |   type: "ReLU"
2272 |   bottom: "Mconv3_stage5_L2"
2273 |   top: "Mconv3_stage5_L2"
2274 | }
2275 | layer {
2276 |   name: "Mconv4_stage5_L1"
2277 |   type: "Convolution"
2278 |   bottom: "Mconv3_stage5_L1"
2279 |   top: "Mconv4_stage5_L1"
2280 |   param {
2281 |     lr_mult: 4.0
2282 |     decay_mult: 1
2283 |   }
2284 |   param {
2285 |     lr_mult: 8.0
2286 |     decay_mult: 0
2287 |   }
2288 |   convolution_param {
2289 |     num_output: 128
2290 |     pad: 3
2291 |     kernel_size: 7
2292 |     weight_filler {
2293 |       type: "gaussian"
2294 |       std: 0.01
2295 |     }
2296 |     bias_filler {
2297 |       type: "constant"
2298 |     }
2299 |   }
2300 | }
2301 | layer {
2302 |   name: "Mrelu4_stage5_L1"
2303 |   type: "ReLU"
2304 |   bottom: "Mconv4_stage5_L1"
2305 |   top: "Mconv4_stage5_L1"
2306 | }
2307 | layer {
2308 |   name: "Mconv4_stage5_L2"
2309 |   type: "Convolution"
2310 |   bottom: "Mconv3_stage5_L2"
2311 |   top: "Mconv4_stage5_L2"
2312 |   param {
2313 |     lr_mult: 4.0
2314 |     decay_mult: 1
2315 |   }
2316 |   param {
2317 |     lr_mult: 8.0
2318 |     decay_mult: 0
2319 |   }
2320 |   convolution_param {
2321 |     num_output: 128
2322 |     pad: 3
2323 |     kernel_size: 7
2324 |     weight_filler {
2325 |       type: "gaussian"
2326 |       std: 0.01
2327 |     }
2328 |     bias_filler {
2329 |       type: "constant"
2330 |     }
2331 |   }
2332 | }
2333 | layer {
2334 |   name: "Mrelu4_stage5_L2"
2335 |   type: "ReLU"
2336 |   bottom: "Mconv4_stage5_L2"
2337 |   top: "Mconv4_stage5_L2"
2338 | }
2339 | layer {
2340 |   name: "Mconv5_stage5_L1"
2341 |   type: "Convolution"
2342 |   bottom: "Mconv4_stage5_L1"
2343 |   top: "Mconv5_stage5_L1"
2344 |   param {
2345 |     lr_mult: 4.0
2346 |     decay_mult: 1
2347 |   }
2348 |   param {
2349 |     lr_mult: 8.0
2350 |     decay_mult: 0
2351 |   }
2352 |   convolution_param {
2353 |     num_output: 128
2354 |     pad: 3
2355 |     kernel_size: 7
2356 |     weight_filler {
2357 |       type: "gaussian"
2358 |       std: 0.01
2359 |     }
2360 |     bias_filler {
2361 |       type: "constant"
2362 |     }
2363 |   }
2364 | }
2365 | layer {
2366 |   name: "Mrelu5_stage5_L1"
2367 |   type: "ReLU"
2368 |   bottom: "Mconv5_stage5_L1"
2369 |   top: "Mconv5_stage5_L1"
2370 | }
2371 | layer {
2372 |   name: "Mconv5_stage5_L2"
2373 |   type: "Convolution"
2374 |   bottom: "Mconv4_stage5_L2"
2375 |   top: "Mconv5_stage5_L2"
2376 |   param {
2377 |     lr_mult: 4.0
2378 |     decay_mult: 1
2379 |   }
2380 |   param {
2381 |     lr_mult: 8.0
2382 |     decay_mult: 0
2383 |   }
2384 |   convolution_param {
2385 |     num_output: 128
2386 |     pad: 3
2387 |     kernel_size: 7
2388 |     weight_filler {
2389 |       type: "gaussian"
2390 |       std: 0.01
2391 |     }
2392 |     bias_filler {
2393 |       type: "constant"
2394 |     }
2395 |   }
2396 | }
2397 | layer {
2398 |   name: "Mrelu5_stage5_L2"
2399 |   type: "ReLU"
2400 |   bottom: "Mconv5_stage5_L2"
2401 |   top: "Mconv5_stage5_L2"
2402 | }
2403 | layer {
2404 |   name: "Mconv6_stage5_L1"
2405 |   type: "Convolution"
2406 |   bottom: "Mconv5_stage5_L1"
2407 |   top: "Mconv6_stage5_L1"
2408 |   param {
2409 |     lr_mult: 4.0
2410 |     decay_mult: 1
2411 |   }
2412 |   param {
2413 |     lr_mult: 8.0
2414 |     decay_mult: 0
2415 |   }
2416 |   convolution_param {
2417 |     num_output: 128
2418 |     pad: 0
2419 |     kernel_size: 1
2420 |     weight_filler {
2421 |       type: "gaussian"
2422 |       std: 0.01
2423 |     }
2424 |     bias_filler {
2425 |       type: "constant"
2426 |     }
2427 |   }
2428 | }
2429 | layer {
2430 |   name: "Mrelu6_stage5_L1"
2431 |   type: "ReLU"
2432 |   bottom: "Mconv6_stage5_L1"
2433 |   top: "Mconv6_stage5_L1"
2434 | }
2435 | layer {
2436 |   name: "Mconv6_stage5_L2"
2437 |   type: "Convolution"
2438 |   bottom: "Mconv5_stage5_L2"
2439 |   top: "Mconv6_stage5_L2"
2440 |   param {
2441 |     lr_mult: 4.0
2442 |     decay_mult: 1
2443 |   }
2444 |   param {
2445 |     lr_mult: 8.0
2446 |     decay_mult: 0
2447 |   }
2448 |   convolution_param {
2449 |     num_output: 128
2450 |     pad: 0
2451 |     kernel_size: 1
2452 |     weight_filler {
2453 |       type: "gaussian"
2454 |       std: 0.01
2455 |     }
2456 |     bias_filler {
2457 |       type: "constant"
2458 |     }
2459 |   }
2460 | }
2461 | layer {
2462 |   name: "Mrelu6_stage5_L2"
2463 |   type: "ReLU"
2464 |   bottom: "Mconv6_stage5_L2"
2465 |   top: "Mconv6_stage5_L2"
2466 | }
2467 | layer {
2468 |   name: "Mconv7_stage5_L1"
2469 |   type: "Convolution"
2470 |   bottom: "Mconv6_stage5_L1"
2471 |   top: "Mconv7_stage5_L1"
2472 |   param {
2473 |     lr_mult: 4.0
2474 |     decay_mult: 1
2475 |   }
2476 |   param {
2477 |     lr_mult: 8.0
2478 |     decay_mult: 0
2479 |   }
2480 |   convolution_param {
2481 |     num_output: 38
2482 |     pad: 0
2483 |     kernel_size: 1
2484 |     weight_filler {
2485 |       type: "gaussian"
2486 |       std: 0.01
2487 |     }
2488 |     bias_filler {
2489 |       type: "constant"
2490 |     }
2491 |   }
2492 | }
2493 | layer {
2494 |   name: "Mconv7_stage5_L2"
2495 |   type: "Convolution"
2496 |   bottom: "Mconv6_stage5_L2"
2497 |   top: "Mconv7_stage5_L2"
2498 |   param {
2499 |     lr_mult: 4.0
2500 |     decay_mult: 1
2501 |   }
2502 |   param {
2503 |     lr_mult: 8.0
2504 |     decay_mult: 0
2505 |   }
2506 |   convolution_param {
2507 |     num_output: 19
2508 |     pad: 0
2509 |     kernel_size: 1
2510 |     weight_filler {
2511 |       type: "gaussian"
2512 |       std: 0.01
2513 |     }
2514 |     bias_filler {
2515 |       type: "constant"
2516 |     }
2517 |   }
2518 | }
2519 | layer {
2520 |   name: "concat_stage6"
2521 |   type: "Concat"
2522 |   bottom: "Mconv7_stage5_L1"
2523 |   bottom: "Mconv7_stage5_L2"
2524 |   bottom: "conv4_4_CPM"
2525 |   top: "concat_stage6"
2526 |   concat_param {
2527 |     axis: 1
2528 |   }
2529 | }
2530 | layer {
2531 |   name: "Mconv1_stage6_L1"
2532 |   type: "Convolution"
2533 |   bottom: "concat_stage6"
2534 |   top: "Mconv1_stage6_L1"
2535 |   param {
2536 |     lr_mult: 4.0
2537 |     decay_mult: 1
2538 |   }
2539 |   param {
2540 |     lr_mult: 8.0
2541 |     decay_mult: 0
2542 |   }
2543 |   convolution_param {
2544 |     num_output: 128
2545 |     pad: 3
2546 |     kernel_size: 7
2547 |     weight_filler {
2548 |       type: "gaussian"
2549 |       std: 0.01
2550 |     }
2551 |     bias_filler {
2552 |       type: "constant"
2553 |     }
2554 |   }
2555 | }
2556 | layer {
2557 |   name: "Mrelu1_stage6_L1"
2558 |   type: "ReLU"
2559 |   bottom: "Mconv1_stage6_L1"
2560 |   top: "Mconv1_stage6_L1"
2561 | }
2562 | layer {
2563 |   name: "Mconv1_stage6_L2"
2564 |   type: "Convolution"
2565 |   bottom: "concat_stage6"
2566 |   top: "Mconv1_stage6_L2"
2567 |   param {
2568 |     lr_mult: 4.0
2569 |     decay_mult: 1
2570 |   }
2571 |   param {
2572 |     lr_mult: 8.0
2573 |     decay_mult: 0
2574 |   }
2575 |   convolution_param {
2576 |     num_output: 128
2577 |     pad: 3
2578 |     kernel_size: 7
2579 |     weight_filler {
2580 |       type: "gaussian"
2581 |       std: 0.01
2582 |     }
2583 |     bias_filler {
2584 |       type: "constant"
2585 |     }
2586 |   }
2587 | }
2588 | layer {
2589 |   name: "Mrelu1_stage6_L2"
2590 |   type: "ReLU"
2591 |   bottom: "Mconv1_stage6_L2"
2592 |   top: "Mconv1_stage6_L2"
2593 | }
2594 | layer {
2595 |   name: "Mconv2_stage6_L1"
2596 |   type: "Convolution"
2597 |   bottom: "Mconv1_stage6_L1"
2598 |   top: "Mconv2_stage6_L1"
2599 |   param {
2600 |     lr_mult: 4.0
2601 |     decay_mult: 1
2602 |   }
2603 |   param {
2604 |     lr_mult: 8.0
2605 |     decay_mult: 0
2606 |   }
2607 |   convolution_param {
2608 |     num_output: 128
2609 |     pad: 3
2610 |     kernel_size: 7
2611 |     weight_filler {
2612 |       type: "gaussian"
2613 |       std: 0.01
2614 |     }
2615 |     bias_filler {
2616 |       type: "constant"
2617 |     }
2618 |   }
2619 | }
2620 | layer {
2621 |   name: "Mrelu2_stage6_L1"
2622 |   type: "ReLU"
2623 |   bottom: "Mconv2_stage6_L1"
2624 |   top: "Mconv2_stage6_L1"
2625 | }
2626 | layer {
2627 |   name: "Mconv2_stage6_L2"
2628 |   type: "Convolution"
2629 |   bottom: "Mconv1_stage6_L2"
2630 |   top: "Mconv2_stage6_L2"
2631 |   param {
2632 |     lr_mult: 4.0
2633 |     decay_mult: 1
2634 |   }
2635 |   param {
2636 |     lr_mult: 8.0
2637 |     decay_mult: 0
2638 |   }
2639 |   convolution_param {
2640 |     num_output: 128
2641 |     pad: 3
2642 |     kernel_size: 7
2643 |     weight_filler {
2644 |       type: "gaussian"
2645 |       std: 0.01
2646 |     }
2647 |     bias_filler {
2648 |       type: "constant"
2649 |     }
2650 |   }
2651 | }
2652 | layer {
2653 |   name: "Mrelu2_stage6_L2"
2654 |   type: "ReLU"
2655 |   bottom: "Mconv2_stage6_L2"
2656 |   top: "Mconv2_stage6_L2"
2657 | }
2658 | layer {
2659 |   name: "Mconv3_stage6_L1"
2660 |   type: "Convolution"
2661 |   bottom: "Mconv2_stage6_L1"
2662 |   top: "Mconv3_stage6_L1"
2663 |   param {
2664 |     lr_mult: 4.0
2665 |     decay_mult: 1
2666 |   }
2667 |   param {
2668 |     lr_mult: 8.0
2669 |     decay_mult: 0
2670 |   }
2671 |   convolution_param {
2672 |     num_output: 128
2673 |     pad: 3
2674 |     kernel_size: 7
2675 |     weight_filler {
2676 |       type: "gaussian"
2677 |       std: 0.01
2678 |     }
2679 |     bias_filler {
2680 |       type: "constant"
2681 |     }
2682 |   }
2683 | }
2684 | layer {
2685 |   name: "Mrelu3_stage6_L1"
2686 |   type: "ReLU"
2687 |   bottom: "Mconv3_stage6_L1"
2688 |   top: "Mconv3_stage6_L1"
2689 | }
2690 | layer {
2691 |   name: "Mconv3_stage6_L2"
2692 |   type: "Convolution"
2693 |   bottom: "Mconv2_stage6_L2"
2694 |   top: "Mconv3_stage6_L2"
2695 |   param {
2696 |     lr_mult: 4.0
2697 |     decay_mult: 1
2698 |   }
2699 |   param {
2700 |     lr_mult: 8.0
2701 |     decay_mult: 0
2702 |   }
2703 |   convolution_param {
2704 |     num_output: 128
2705 |     pad: 3
2706 |     kernel_size: 7
2707 |     weight_filler {
2708 |       type: "gaussian"
2709 |       std: 0.01
2710 |     }
2711 |     bias_filler {
2712 |       type: "constant"
2713 |     }
2714 |   }
2715 | }
2716 | layer {
2717 |   name: "Mrelu3_stage6_L2"
2718 |   type: "ReLU"
2719 |   bottom: "Mconv3_stage6_L2"
2720 |   top: "Mconv3_stage6_L2"
2721 | }
2722 | layer {
2723 |   name: "Mconv4_stage6_L1"
2724 |   type: "Convolution"
2725 |   bottom: "Mconv3_stage6_L1"
2726 |   top: "Mconv4_stage6_L1"
2727 |   param {
2728 |     lr_mult: 4.0
2729 |     decay_mult: 1
2730 |   }
2731 |   param {
2732 |     lr_mult: 8.0
2733 |     decay_mult: 0
2734 |   }
2735 |   convolution_param {
2736 |     num_output: 128
2737 |     pad: 3
2738 |     kernel_size: 7
2739 |     weight_filler {
2740 |       type: "gaussian"
2741 |       std: 0.01
2742 |     }
2743 |     bias_filler {
2744 |       type: "constant"
2745 |     }
2746 |   }
2747 | }
2748 | layer {
2749 |   name: "Mrelu4_stage6_L1"
2750 |   type: "ReLU"
2751 |   bottom: "Mconv4_stage6_L1"
2752 |   top: "Mconv4_stage6_L1"
2753 | }
2754 | layer {
2755 |   name: "Mconv4_stage6_L2"
2756 |   type: "Convolution"
2757 |   bottom: "Mconv3_stage6_L2"
2758 |   top: "Mconv4_stage6_L2"
2759 |   param {
2760 |     lr_mult: 4.0
2761 |     decay_mult: 1
2762 |   }
2763 |   param {
2764 |     lr_mult: 8.0
2765 |     decay_mult: 0
2766 |   }
2767 |   convolution_param {
2768 |     num_output: 128
2769 |     pad: 3
2770 |     kernel_size: 7
2771 |     weight_filler {
2772 |       type: "gaussian"
2773 |       std: 0.01
2774 |     }
2775 |     bias_filler {
2776 |       type: "constant"
2777 |     }
2778 |   }
2779 | }
2780 | layer {
2781 |   name: "Mrelu4_stage6_L2"
2782 |   type: "ReLU"
2783 |   bottom: "Mconv4_stage6_L2"
2784 |   top: "Mconv4_stage6_L2"
2785 | }
2786 | layer {
2787 |   name: "Mconv5_stage6_L1"
2788 |   type: "Convolution"
2789 |   bottom: "Mconv4_stage6_L1"
2790 |   top: "Mconv5_stage6_L1"
2791 |   param {
2792 |     lr_mult: 4.0
2793 |     decay_mult: 1
2794 |   }
2795 |   param {
2796 |     lr_mult: 8.0
2797 |     decay_mult: 0
2798 |   }
2799 |   convolution_param {
2800 |     num_output: 128
2801 |     pad: 3
2802 |     kernel_size: 7
2803 |     weight_filler {
2804 |       type: "gaussian"
2805 |       std: 0.01
2806 |     }
2807 |     bias_filler {
2808 |       type: "constant"
2809 |     }
2810 |   }
2811 | }
2812 | layer {
2813 |   name: "Mrelu5_stage6_L1"
2814 |   type: "ReLU"
2815 |   bottom: "Mconv5_stage6_L1"
2816 |   top: "Mconv5_stage6_L1"
2817 | }
2818 | layer {
2819 |   name: "Mconv5_stage6_L2"
2820 |   type: "Convolution"
2821 |   bottom: "Mconv4_stage6_L2"
2822 |   top: "Mconv5_stage6_L2"
2823 |   param {
2824 |     lr_mult: 4.0
2825 |     decay_mult: 1
2826 |   }
2827 |   param {
2828 |     lr_mult: 8.0
2829 |     decay_mult: 0
2830 |   }
2831 |   convolution_param {
2832 |     num_output: 128
2833 |     pad: 3
2834 |     kernel_size: 7
2835 |     weight_filler {
2836 |       type: "gaussian"
2837 |       std: 0.01
2838 |     }
2839 |     bias_filler {
2840 |       type: "constant"
2841 |     }
2842 |   }
2843 | }
2844 | layer {
2845 |   name: "Mrelu5_stage6_L2"
2846 |   type: "ReLU"
2847 |   bottom: "Mconv5_stage6_L2"
2848 |   top: "Mconv5_stage6_L2"
2849 | }
2850 | layer {
2851 |   name: "Mconv6_stage6_L1"
2852 |   type: "Convolution"
2853 |   bottom: "Mconv5_stage6_L1"
2854 |   top: "Mconv6_stage6_L1"
2855 |   param {
2856 |     lr_mult: 4.0
2857 |     decay_mult: 1
2858 |   }
2859 |   param {
2860 |     lr_mult: 8.0
2861 |     decay_mult: 0
2862 |   }
2863 |   convolution_param {
2864 |     num_output: 128
2865 |     pad: 0
2866 |     kernel_size: 1
2867 |     weight_filler {
2868 |       type: "gaussian"
2869 |       std: 0.01
2870 |     }
2871 |     bias_filler {
2872 |       type: "constant"
2873 |     }
2874 |   }
2875 | }
2876 | layer {
2877 |   name: "Mrelu6_stage6_L1"
2878 |   type: "ReLU"
2879 |   bottom: "Mconv6_stage6_L1"
2880 |   top: "Mconv6_stage6_L1"
2881 | }
2882 | layer {
2883 |   name: "Mconv6_stage6_L2"
2884 |   type: "Convolution"
2885 |   bottom: "Mconv5_stage6_L2"
2886 |   top: "Mconv6_stage6_L2"
2887 |   param {
2888 |     lr_mult: 4.0
2889 |     decay_mult: 1
2890 |   }
2891 |   param {
2892 |     lr_mult: 8.0
2893 |     decay_mult: 0
2894 |   }
2895 |   convolution_param {
2896 |     num_output: 128
2897 |     pad: 0
2898 |     kernel_size: 1
2899 |     weight_filler {
2900 |       type: "gaussian"
2901 |       std: 0.01
2902 |     }
2903 |     bias_filler {
2904 |       type: "constant"
2905 |     }
2906 |   }
2907 | }
2908 | layer {
2909 |   name: "Mrelu6_stage6_L2"
2910 |   type: "ReLU"
2911 |   bottom: "Mconv6_stage6_L2"
2912 |   top: "Mconv6_stage6_L2"
2913 | }
2914 | layer {
2915 |   name: "Mconv7_stage6_L1"
2916 |   type: "Convolution"
2917 |   bottom: "Mconv6_stage6_L1"
2918 |   top: "Mconv7_stage6_L1"
2919 |   param {
2920 |     lr_mult: 4.0
2921 |     decay_mult: 1
2922 |   }
2923 |   param {
2924 |     lr_mult: 8.0
2925 |     decay_mult: 0
2926 |   }
2927 |   convolution_param {
2928 |     num_output: 38
2929 |     pad: 0
2930 |     kernel_size: 1
2931 |     weight_filler {
2932 |       type: "gaussian"
2933 |       std: 0.01
2934 |     }
2935 |     bias_filler {
2936 |       type: "constant"
2937 |     }
2938 |   }
2939 | }
2940 | layer {
2941 |   name: "Mconv7_stage6_L2"
2942 |   type: "Convolution"
2943 |   bottom: "Mconv6_stage6_L2"
2944 |   top: "Mconv7_stage6_L2"
2945 |   param {
2946 |     lr_mult: 4.0
2947 |     decay_mult: 1
2948 |   }
2949 |   param {
2950 |     lr_mult: 8.0
2951 |     decay_mult: 0
2952 |   }
2953 |   convolution_param {
2954 |     num_output: 19
2955 |     pad: 0
2956 |     kernel_size: 1
2957 |     weight_filler {
2958 |       type: "gaussian"
2959 |       std: 0.01
2960 |     }
2961 |     bias_filler {
2962 |       type: "constant"
2963 |     }
2964 |   }
2965 | }
2966 | layer {
2967 |   name: "concat_stage7"
2968 |   type: "Concat"
2969 |   bottom: "Mconv7_stage6_L2"
2970 |   bottom: "Mconv7_stage6_L1"
2971 |   # top: "concat_stage7"
2972 |   top: "net_output"
2973 |   concat_param {
2974 |     axis: 1
2975 |   }
2976 | }
2977 | 


--------------------------------------------------------------------------------
/net/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils


--------------------------------------------------------------------------------
/net/st_gcn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.autograd import Variable
  5 | 
  6 | from net.utils.tgcn import ConvTemporalGraphical
  7 | from net.utils.graph import Graph
  8 | 
  9 | class Model(nn.Module):
 10 |     r"""Spatial temporal graph convolutional networks.
 11 | 
 12 |     Args:
 13 |         in_channels (int): Number of channels in the input data
 14 |         num_class (int): Number of classes for the classification task
 15 |         graph_args (dict): The arguments for building the graph
 16 |         edge_importance_weighting (bool): If ``True``, adds a learnable
 17 |             importance weighting to the edges of the graph
 18 |         **kwargs (optional): Other parameters for graph convolution units
 19 | 
 20 |     Shape:
 21 |         - Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})`
 22 |         - Output: :math:`(N, num_class)` where
 23 |             :math:`N` is a batch size,
 24 |             :math:`T_{in}` is a length of input sequence,
 25 |             :math:`V_{in}` is the number of graph nodes,
 26 |             :math:`M_{in}` is the number of instance in a frame.
 27 |     """
 28 | 
 29 |     def __init__(self, in_channels, num_class, graph_args,
 30 |                  edge_importance_weighting, **kwargs):
 31 |         super().__init__()
 32 | 
 33 |         # load graph
 34 |         self.graph = Graph(**graph_args)
 35 |         A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False)
 36 |         self.register_buffer('A', A)
 37 | 
 38 |         # build networks
 39 |         spatial_kernel_size = A.size(0)
 40 |         temporal_kernel_size = 9
 41 |         kernel_size = (temporal_kernel_size, spatial_kernel_size)
 42 |         self.data_bn = nn.BatchNorm1d(in_channels * A.size(1))
 43 |         kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'}
 44 |         self.st_gcn_networks = nn.ModuleList((
 45 |             st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0),
 46 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
 47 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
 48 |             st_gcn(64, 64, kernel_size, 1, **kwargs),
 49 |             st_gcn(64, 128, kernel_size, 2, **kwargs),
 50 |             st_gcn(128, 128, kernel_size, 1, **kwargs),
 51 |             st_gcn(128, 128, kernel_size, 1, **kwargs),
 52 |             st_gcn(128, 256, kernel_size, 2, **kwargs),
 53 |             st_gcn(256, 256, kernel_size, 1, **kwargs),
 54 |             st_gcn(256, 256, kernel_size, 1, **kwargs),
 55 |         ))
 56 | 
 57 |         # initialize parameters for edge importance weighting
 58 |         if edge_importance_weighting:
 59 |             self.edge_importance = nn.ParameterList([
 60 |                 nn.Parameter(torch.ones(self.A.size()))
 61 |                 for i in self.st_gcn_networks
 62 |             ])
 63 |         else:
 64 |             self.edge_importance = [1] * len(self.st_gcn_networks)
 65 | 
 66 |         # fcn for prediction
 67 |         self.fcn = nn.Conv2d(256, num_class, kernel_size=1)
 68 | 
 69 |     def forward(self, x):
 70 | 
 71 |         # data normalization
 72 |         N, C, T, V, M = x.size()
 73 |         x = x.permute(0, 4, 3, 1, 2).contiguous()
 74 |         x = x.view(N * M, V * C, T)
 75 |         x = self.data_bn(x)
 76 |         x = x.view(N, M, V, C, T)
 77 |         x = x.permute(0, 1, 3, 4, 2).contiguous()
 78 |         x = x.view(N * M, C, T, V)
 79 | 
 80 |         # forwad
 81 |         for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
 82 |             x, _ = gcn(x, self.A * importance)
 83 | 
 84 |         # global pooling
 85 |         x = F.avg_pool2d(x, x.size()[2:])
 86 |         x = x.view(N, M, -1, 1, 1).mean(dim=1)
 87 | 
 88 |         # prediction
 89 |         x = self.fcn(x)
 90 |         x = x.view(x.size(0), -1)
 91 | 
 92 |         return x
 93 | 
 94 |     def extract_feature(self, x):
 95 | 
 96 |         # data normalization
 97 |         N, C, T, V, M = x.size()
 98 |         x = x.permute(0, 4, 3, 1, 2).contiguous()
 99 |         x = x.view(N * M, V * C, T)
100 |         x = self.data_bn(x)
101 |         x = x.view(N, M, V, C, T)
102 |         x = x.permute(0, 1, 3, 4, 2).contiguous()
103 |         x = x.view(N * M, C, T, V)
104 | 
105 |         # forwad
106 |         for gcn, importance in zip(self.st_gcn_networks, self.edge_importance):
107 |             x, _ = gcn(x, self.A * importance)
108 | 
109 |         _, c, t, v = x.size()
110 |         feature = x.view(N, M, c, t, v).permute(0, 2, 3, 4, 1)
111 | 
112 |         # prediction
113 |         x = self.fcn(x)
114 |         output = x.view(N, M, -1, t, v).permute(0, 2, 3, 4, 1)
115 | 
116 |         return output, feature
117 | 
118 | class st_gcn(nn.Module):
119 |     r"""Applies a spatial temporal graph convolution over an input graph sequence.
120 | 
121 |     Args:
122 |         in_channels (int): Number of channels in the input sequence data
123 |         out_channels (int): Number of channels produced by the convolution
124 |         kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel
125 |         stride (int, optional): Stride of the temporal convolution. Default: 1
126 |         dropout (int, optional): Dropout rate of the final output. Default: 0
127 |         residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True``
128 | 
129 |     Shape:
130 |         - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
131 |         - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
132 |         - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
133 |         - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
134 | 
135 |         where
136 |             :math:`N` is a batch size,
137 |             :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
138 |             :math:`T_{in}/T_{out}` is a length of input/output sequence,
139 |             :math:`V` is the number of graph nodes.
140 | 
141 |     """
142 | 
143 |     def __init__(self,
144 |                  in_channels,
145 |                  out_channels,
146 |                  kernel_size,
147 |                  stride=1,
148 |                  dropout=0,
149 |                  residual=True):
150 |         super().__init__()
151 | 
152 |         assert len(kernel_size) == 2
153 |         assert kernel_size[0] % 2 == 1
154 |         padding = ((kernel_size[0] - 1) // 2, 0)
155 | 
156 |         self.gcn = ConvTemporalGraphical(in_channels, out_channels,
157 |                                          kernel_size[1])
158 | 
159 |         self.tcn = nn.Sequential(
160 |             nn.BatchNorm2d(out_channels),
161 |             nn.ReLU(inplace=True),
162 |             nn.Conv2d(
163 |                 out_channels,
164 |                 out_channels,
165 |                 (kernel_size[0], 1),
166 |                 (stride, 1),
167 |                 padding,
168 |             ),
169 |             nn.BatchNorm2d(out_channels),
170 |             nn.Dropout(dropout, inplace=True),
171 |         )
172 | 
173 |         if not residual:
174 |             self.residual = lambda x: 0
175 | 
176 |         elif (in_channels == out_channels) and (stride == 1):
177 |             self.residual = lambda x: x
178 | 
179 |         else:
180 |             self.residual = nn.Sequential(
181 |                 nn.Conv2d(
182 |                     in_channels,
183 |                     out_channels,
184 |                     kernel_size=1,
185 |                     stride=(stride, 1)),
186 |                 nn.BatchNorm2d(out_channels),
187 |             )
188 | 
189 |         self.relu = nn.ReLU(inplace=True)
190 | 
191 |     def forward(self, x, A):
192 | 
193 |         res = self.residual(x)
194 |         x, A = self.gcn(x, A)
195 |         x = self.tcn(x) + res
196 | 
197 |         return self.relu(x), A


--------------------------------------------------------------------------------
/net/st_gcn_twostream.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | from torch.autograd import Variable
 5 | 
 6 | from net.utils.tgcn import ConvTemporalGraphical
 7 | from net.utils.graph import Graph
 8 | 
 9 | from .st_gcn import Model as ST_GCN
10 | 
11 | class Model(nn.Module):
12 | 
13 |     def __init__(self, *args, **kwargs):
14 |         super().__init__()
15 | 
16 |         self.origin_stream = ST_GCN(*args, **kwargs)
17 |         self.motion_stream = ST_GCN(*args, **kwargs)
18 | 
19 |     def forward(self, x):
20 |         N, C, T, V, M = x.size()
21 |         m = torch.cat((torch.cuda.FloatTensor(N, C, 1, V, M).zero_(),
22 |                         x[:, :, 1:-1] - 0.5 * x[:, :, 2:] - 0.5 * x[:, :, :-2],
23 |                         torch.cuda.FloatTensor(N, C, 1, V, M).zero_()), 2)
24 | 
25 |         res = self.origin_stream(x) + self.motion_stream(m)
26 |         return res


--------------------------------------------------------------------------------
/net/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/net/utils/__init__.py


--------------------------------------------------------------------------------
/net/utils/graph.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Graph():
  4 |     """ The Graph to model the skeletons extracted by the openpose
  5 | 
  6 |     Args:
  7 |         strategy (string): must be one of the follow candidates
  8 |         - uniform: Uniform Labeling
  9 |         - distance: Distance Partitioning
 10 |         - spatial: Spatial Configuration
 11 |         For more information, please refer to the section 'Partition Strategies'
 12 |             in our paper (https://arxiv.org/abs/1801.07455).
 13 | 
 14 |         layout (string): must be one of the follow candidates
 15 |         - openpose: Is consists of 18 joints. For more information, please
 16 |             refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose#output
 17 |         - ntu-rgb+d: Is consists of 25 joints. For more information, please
 18 |             refer to https://github.com/shahroudy/NTURGB-D
 19 | 
 20 |         max_hop (int): the maximal distance between two connected nodes
 21 |         dilation (int): controls the spacing between the kernel points
 22 | 
 23 |     """
 24 | 
 25 |     def __init__(self,
 26 |                  layout='openpose',
 27 |                  strategy='uniform',
 28 |                  max_hop=1,
 29 |                  dilation=1):
 30 |         self.max_hop = max_hop
 31 |         self.dilation = dilation
 32 | 
 33 |         self.get_edge(layout)
 34 |         self.hop_dis = get_hop_distance(
 35 |             self.num_node, self.edge, max_hop=max_hop)
 36 |         self.get_adjacency(strategy)
 37 | 
 38 |     def __str__(self):
 39 |         return self.A
 40 | 
 41 |     def get_edge(self, layout):
 42 |         if layout == 'openpose':
 43 |             self.num_node = 18
 44 |             self_link = [(i, i) for i in range(self.num_node)]
 45 |             neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12,
 46 |                                                                         11),
 47 |                              (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1),
 48 |                              (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)]
 49 |             self.edge = self_link + neighbor_link
 50 |             self.center = 1
 51 |         elif layout == 'ntu-rgb+d':
 52 |             self.num_node = 25
 53 |             self_link = [(i, i) for i in range(self.num_node)]
 54 |             neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21),
 55 |                               (6, 5), (7, 6), (8, 7), (9, 21), (10, 9),
 56 |                               (11, 10), (12, 11), (13, 1), (14, 13), (15, 14),
 57 |                               (16, 15), (17, 1), (18, 17), (19, 18), (20, 19),
 58 |                               (22, 23), (23, 8), (24, 25), (25, 12)]
 59 |             neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
 60 |             self.edge = self_link + neighbor_link
 61 |             self.center = 21 - 1
 62 |         elif layout == 'ntu_edge':
 63 |             self.num_node = 24
 64 |             self_link = [(i, i) for i in range(self.num_node)]
 65 |             neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6),
 66 |                               (8, 7), (9, 2), (10, 9), (11, 10), (12, 11),
 67 |                               (13, 1), (14, 13), (15, 14), (16, 15), (17, 1),
 68 |                               (18, 17), (19, 18), (20, 19), (21, 22), (22, 8),
 69 |                               (23, 24), (24, 12)]
 70 |             neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base]
 71 |             self.edge = self_link + neighbor_link
 72 |             self.center = 2
 73 |         # elif layout=='customer settings'
 74 |         #     pass
 75 |         else:
 76 |             raise ValueError("Do Not Exist This Layout.")
 77 | 
 78 |     def get_adjacency(self, strategy):
 79 |         valid_hop = range(0, self.max_hop + 1, self.dilation)
 80 |         adjacency = np.zeros((self.num_node, self.num_node))
 81 |         for hop in valid_hop:
 82 |             adjacency[self.hop_dis == hop] = 1
 83 |         normalize_adjacency = normalize_digraph(adjacency)
 84 | 
 85 |         if strategy == 'uniform':
 86 |             A = np.zeros((1, self.num_node, self.num_node))
 87 |             A[0] = normalize_adjacency
 88 |             self.A = A
 89 |         elif strategy == 'distance':
 90 |             A = np.zeros((len(valid_hop), self.num_node, self.num_node))
 91 |             for i, hop in enumerate(valid_hop):
 92 |                 A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis ==
 93 |                                                                 hop]
 94 |             self.A = A
 95 |         elif strategy == 'spatial':
 96 |             A = []
 97 |             for hop in valid_hop:
 98 |                 a_root = np.zeros((self.num_node, self.num_node))
 99 |                 a_close = np.zeros((self.num_node, self.num_node))
100 |                 a_further = np.zeros((self.num_node, self.num_node))
101 |                 for i in range(self.num_node):
102 |                     for j in range(self.num_node):
103 |                         if self.hop_dis[j, i] == hop:
104 |                             if self.hop_dis[j, self.center] == self.hop_dis[
105 |                                     i, self.center]:
106 |                                 a_root[j, i] = normalize_adjacency[j, i]
107 |                             elif self.hop_dis[j, self.
108 |                                               center] > self.hop_dis[i, self.
109 |                                                                      center]:
110 |                                 a_close[j, i] = normalize_adjacency[j, i]
111 |                             else:
112 |                                 a_further[j, i] = normalize_adjacency[j, i]
113 |                 if hop == 0:
114 |                     A.append(a_root)
115 |                 else:
116 |                     A.append(a_root + a_close)
117 |                     A.append(a_further)
118 |             A = np.stack(A)
119 |             self.A = A
120 |         else:
121 |             raise ValueError("Do Not Exist This Strategy")
122 | 
123 | 
124 | def get_hop_distance(num_node, edge, max_hop=1):
125 |     A = np.zeros((num_node, num_node))
126 |     for i, j in edge:
127 |         A[j, i] = 1
128 |         A[i, j] = 1
129 | 
130 |     # compute hop steps
131 |     hop_dis = np.zeros((num_node, num_node)) + np.inf
132 |     transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)]
133 |     arrive_mat = (np.stack(transfer_mat) > 0)
134 |     for d in range(max_hop, -1, -1):
135 |         hop_dis[arrive_mat[d]] = d
136 |     return hop_dis
137 | 
138 | 
139 | def normalize_digraph(A):
140 |     Dl = np.sum(A, 0)
141 |     num_node = A.shape[0]
142 |     Dn = np.zeros((num_node, num_node))
143 |     for i in range(num_node):
144 |         if Dl[i] > 0:
145 |             Dn[i, i] = Dl[i]**(-1)
146 |     AD = np.dot(A, Dn)
147 |     return AD
148 | 
149 | 
150 | def normalize_undigraph(A):
151 |     Dl = np.sum(A, 0)
152 |     num_node = A.shape[0]
153 |     Dn = np.zeros((num_node, num_node))
154 |     for i in range(num_node):
155 |         if Dl[i] > 0:
156 |             Dn[i, i] = Dl[i]**(-0.5)
157 |     DAD = np.dot(np.dot(Dn, A), Dn)
158 |     return DAD


--------------------------------------------------------------------------------
/net/utils/tgcn.py:
--------------------------------------------------------------------------------
 1 | # The based unit of graph convolutional networks.
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | class ConvTemporalGraphical(nn.Module):
 7 | 
 8 |     r"""The basic module for applying a graph convolution.
 9 | 
10 |     Args:
11 |         in_channels (int): Number of channels in the input sequence data
12 |         out_channels (int): Number of channels produced by the convolution
13 |         kernel_size (int): Size of the graph convolving kernel
14 |         t_kernel_size (int): Size of the temporal convolving kernel
15 |         t_stride (int, optional): Stride of the temporal convolution. Default: 1
16 |         t_padding (int, optional): Temporal zero-padding added to both sides of
17 |             the input. Default: 0
18 |         t_dilation (int, optional): Spacing between temporal kernel elements.
19 |             Default: 1
20 |         bias (bool, optional): If ``True``, adds a learnable bias to the output.
21 |             Default: ``True``
22 | 
23 |     Shape:
24 |         - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format
25 |         - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
26 |         - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format
27 |         - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format
28 | 
29 |         where
30 |             :math:`N` is a batch size,
31 |             :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
32 |             :math:`T_{in}/T_{out}` is a length of input/output sequence,
33 |             :math:`V` is the number of graph nodes. 
34 |     """
35 | 
36 |     def __init__(self,
37 |                  in_channels,
38 |                  out_channels,
39 |                  kernel_size,
40 |                  t_kernel_size=1,
41 |                  t_stride=1,
42 |                  t_padding=0,
43 |                  t_dilation=1,
44 |                  bias=True):
45 |         super().__init__()
46 | 
47 |         self.kernel_size = kernel_size
48 |         self.conv = nn.Conv2d(
49 |             in_channels,
50 |             out_channels * kernel_size,
51 |             kernel_size=(t_kernel_size, 1),
52 |             padding=(t_padding, 0),
53 |             stride=(t_stride, 1),
54 |             dilation=(t_dilation, 1),
55 |             bias=bias)
56 | 
57 |     def forward(self, x, A):
58 |         assert A.size(0) == self.kernel_size
59 | 
60 |         x = self.conv(x)
61 | 
62 |         n, kc, t, v = x.size()
63 |         x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v)
64 |         x = torch.einsum('nkctv,kvw->nctw', (x, A))
65 | 
66 |         return x.contiguous(), A
67 | 


--------------------------------------------------------------------------------
/processor/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/processor/__init__.py


--------------------------------------------------------------------------------
/processor/demo_offline.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import sys
  4 | import argparse
  5 | import json
  6 | import shutil
  7 | import time
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | import skvideo.io
 12 | 
 13 | from .io import IO
 14 | import tools
 15 | import tools.utils as utils
 16 | 
 17 | import cv2
 18 | 
 19 | class DemoOffline(IO):
 20 | 
 21 |     def start(self):
 22 |         
 23 |         # initiate
 24 |         label_name_path = './resource/kinetics_skeleton/label_name.txt'
 25 |         with open(label_name_path) as f:
 26 |             label_name = f.readlines()
 27 |             label_name = [line.rstrip() for line in label_name]
 28 |             self.label_name = label_name
 29 | 
 30 |         # pose estimation
 31 |         video, data_numpy = self.pose_estimation()
 32 | 
 33 |         # action recognition
 34 |         data = torch.from_numpy(data_numpy)
 35 |         data = data.unsqueeze(0)
 36 |         data = data.float().to(self.dev).detach()  # (1, channel, frame, joint, person)
 37 | 
 38 |         # model predict
 39 |         voting_label_name, video_label_name, output, intensity = self.predict(data)
 40 | 
 41 |         # render the video
 42 |         images = self.render_video(data_numpy, voting_label_name,
 43 |                             video_label_name, intensity, video)
 44 | 
 45 |         # visualize
 46 |         for image in images:
 47 |             image = image.astype(np.uint8)
 48 |             cv2.imshow("ST-GCN", image)
 49 |             if cv2.waitKey(1) & 0xFF == ord('q'):
 50 |                 break
 51 | 
 52 |     def predict(self, data):
 53 |         # forward
 54 |         output, feature = self.model.extract_feature(data)
 55 |         output = output[0]
 56 |         feature = feature[0]
 57 |         intensity = (feature*feature).sum(dim=0)**0.5
 58 |         intensity = intensity.cpu().detach().numpy()
 59 | 
 60 |         # get result
 61 |         # classification result of the full sequence
 62 |         voting_label = output.sum(dim=3).sum(
 63 |             dim=2).sum(dim=1).argmax(dim=0)
 64 |         voting_label_name = self.label_name[voting_label]
 65 |         # classification result for each person of the latest frame
 66 |         num_person = data.size(4)
 67 |         latest_frame_label = [output[:, :, :, m].sum(
 68 |             dim=2)[:, -1].argmax(dim=0) for m in range(num_person)]
 69 |         latest_frame_label_name = [self.label_name[l]
 70 |                                    for l in latest_frame_label]
 71 | 
 72 |         num_person = output.size(3)
 73 |         num_frame = output.size(1)
 74 |         video_label_name = list()
 75 |         for t in range(num_frame):
 76 |             frame_label_name = list()
 77 |             for m in range(num_person):
 78 |                 person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0)
 79 |                 person_label_name = self.label_name[person_label]
 80 |                 frame_label_name.append(person_label_name)
 81 |             video_label_name.append(frame_label_name)
 82 |         return voting_label_name, video_label_name, output, intensity
 83 | 
 84 |     def render_video(self, data_numpy, voting_label_name, video_label_name, intensity, video):
 85 |         images = utils.visualization.stgcn_visualize(
 86 |             data_numpy,
 87 |             self.model.graph.edge,
 88 |             intensity, video,
 89 |             voting_label_name,
 90 |             video_label_name,
 91 |             self.arg.height)
 92 |         return images
 93 | 
 94 |     def pose_estimation(self):
 95 |         # load openpose python api
 96 |         if self.arg.openpose is not None:
 97 |             sys.path.append('{}/python'.format(self.arg.openpose))
 98 |             sys.path.append('{}/build/python'.format(self.arg.openpose))
 99 |         try:
100 |             from openpose import pyopenpose as op
101 |         except:
102 |             print('Can not find Openpose Python API.')
103 |             return
104 | 
105 | 
106 |         video_name = self.arg.video.split('/')[-1].split('.')[0]
107 | 
108 |         # initiate
109 |         opWrapper = op.WrapperPython()
110 |         params = dict(model_folder='./models', model_pose='COCO')
111 |         opWrapper.configure(params)
112 |         opWrapper.start()
113 |         self.model.eval()
114 |         video_capture = cv2.VideoCapture(self.arg.video)
115 |         video_length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT))
116 |         pose_tracker = naive_pose_tracker(data_frame=video_length)
117 | 
118 |         # pose estimation
119 |         start_time = time.time()
120 |         frame_index = 0
121 |         video = list()
122 |         while(True):
123 | 
124 |             # get image
125 |             ret, orig_image = video_capture.read()
126 |             if orig_image is None:
127 |                 break
128 |             source_H, source_W, _ = orig_image.shape
129 |             orig_image = cv2.resize(
130 |                 orig_image, (256 * source_W // source_H, 256))
131 |             H, W, _ = orig_image.shape
132 |             video.append(orig_image)
133 | 
134 |             # pose estimation
135 |             datum = op.Datum()
136 |             datum.cvInputData = orig_image
137 |             opWrapper.emplaceAndPop([datum])
138 |             multi_pose = datum.poseKeypoints  # (num_person, num_joint, 3)
139 |             if len(multi_pose.shape) != 3:
140 |                 continue
141 | 
142 |             # normalization
143 |             multi_pose[:, :, 0] = multi_pose[:, :, 0]/W
144 |             multi_pose[:, :, 1] = multi_pose[:, :, 1]/H
145 |             multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
146 |             multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
147 |             multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0
148 | 
149 |             # pose tracking
150 |             pose_tracker.update(multi_pose, frame_index)
151 |             frame_index += 1
152 | 
153 |             print('Pose estimation ({}/{}).'.format(frame_index, video_length))
154 | 
155 |         data_numpy = pose_tracker.get_skeleton_sequence()
156 |         return video, data_numpy
157 | 
158 |     @staticmethod
159 |     def get_parser(add_help=False):
160 | 
161 |         # parameter priority: command line > config > default
162 |         parent_parser = IO.get_parser(add_help=False)
163 |         parser = argparse.ArgumentParser(
164 |             add_help=add_help,
165 |             parents=[parent_parser],
166 |             description='Demo for Spatial Temporal Graph Convolution Network')
167 | 
168 |         # region arguments yapf: disable
169 |         parser.add_argument('--video',
170 |                             default='./resource/media/skateboarding.mp4',
171 |                             help='Path to video')
172 |         parser.add_argument('--openpose',
173 |                             default=None,
174 |                             help='Path to openpose')
175 |         parser.add_argument('--model_input_frame',
176 |                             default=128,
177 |                             type=int)
178 |         parser.add_argument('--model_fps',
179 |                             default=30,
180 |                             type=int)
181 |         parser.add_argument('--height',
182 |                             default=1080,
183 |                             type=int,
184 |                             help='height of frame in the output video.')
185 |         parser.set_defaults(
186 |             config='./config/st_gcn/kinetics-skeleton/demo_offline.yaml')
187 |         parser.set_defaults(print_log=False)
188 |         # endregion yapf: enable
189 | 
190 |         return parser
191 | 
192 | class naive_pose_tracker():
193 |     """ A simple tracker for recording person poses and generating skeleton sequences.
194 |     For actual occasion, I recommend you to implement a robuster tracker.
195 |     Pull-requests are welcomed.
196 |     """
197 | 
198 |     def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf):
199 |         self.data_frame = data_frame
200 |         self.num_joint = num_joint
201 |         self.max_frame_dis = max_frame_dis
202 |         self.latest_frame = 0
203 |         self.trace_info = list()
204 | 
205 |     def update(self, multi_pose, current_frame):
206 |         # multi_pose.shape: (num_person, num_joint, 3)
207 | 
208 |         if current_frame <= self.latest_frame:
209 |             return
210 | 
211 |         if len(multi_pose.shape) != 3:
212 |             return
213 | 
214 |         score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0)
215 |         for p in multi_pose[score_order]:
216 | 
217 |             # match existing traces
218 |             matching_trace = None
219 |             matching_dis = None
220 |             for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
221 |                 # trace.shape: (num_frame, num_joint, 3)
222 |                 if current_frame <= latest_frame:
223 |                     continue
224 |                 mean_dis, is_close = self.get_dis(trace, p)
225 |                 if is_close:
226 |                     if matching_trace is None:
227 |                         matching_trace = trace_index
228 |                         matching_dis = mean_dis
229 |                     elif matching_dis > mean_dis:
230 |                         matching_trace = trace_index
231 |                         matching_dis = mean_dis
232 | 
233 |             # update trace information
234 |             if matching_trace is not None:
235 |                 trace, latest_frame = self.trace_info[matching_trace]
236 | 
237 |                 # padding zero if the trace is fractured
238 |                 pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero'
239 |                 pad = current_frame-latest_frame-1
240 |                 new_trace = self.cat_pose(trace, p, pad, pad_mode)
241 |                 self.trace_info[matching_trace] = (new_trace, current_frame)
242 | 
243 |             else:
244 |                 new_trace = np.array([p])
245 |                 self.trace_info.append((new_trace, current_frame))
246 | 
247 |         self.latest_frame = current_frame
248 | 
249 |     def get_skeleton_sequence(self):
250 | 
251 |         # remove old traces
252 |         valid_trace_index = []
253 |         for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
254 |             if self.latest_frame - latest_frame < self.data_frame:
255 |                 valid_trace_index.append(trace_index)
256 |         self.trace_info = [self.trace_info[v] for v in valid_trace_index]
257 | 
258 |         num_trace = len(self.trace_info)
259 |         if num_trace == 0:
260 |             return None
261 | 
262 |         data = np.zeros((3, self.data_frame, self.num_joint, num_trace))
263 |         for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
264 |             end = self.data_frame - (self.latest_frame - latest_frame)
265 |             d = trace[-end:]
266 |             beg = end - len(d)
267 |             data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1))
268 | 
269 |         return data
270 | 
271 |     # concatenate pose to a trace
272 |     def cat_pose(self, trace, pose, pad, pad_mode):
273 |         # trace.shape: (num_frame, num_joint, 3)
274 |         num_joint = pose.shape[0]
275 |         num_channel = pose.shape[1]
276 |         if pad != 0:
277 |             if pad_mode == 'zero':
278 |                 trace = np.concatenate(
279 |                     (trace, np.zeros((pad, num_joint, 3))), 0)
280 |             elif pad_mode == 'interp':
281 |                 last_pose = trace[-1]
282 |                 coeff = [(p+1)/(pad+1) for p in range(pad)]
283 |                 interp_pose = [(1-c)*last_pose + c*pose for c in coeff]
284 |                 trace = np.concatenate((trace, interp_pose), 0)
285 |         new_trace = np.concatenate((trace, [pose]), 0)
286 |         return new_trace
287 | 
288 |     # calculate the distance between a existing trace and the input pose
289 | 
290 |     def get_dis(self, trace, pose):
291 |         last_pose_xy = trace[-1, :, 0:2]
292 |         curr_pose_xy = pose[:, 0:2]
293 | 
294 |         mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean()
295 |         wh = last_pose_xy.max(0) - last_pose_xy.min(0)
296 |         scale = (wh[0] * wh[1]) ** 0.5 + 0.0001
297 |         is_close = mean_dis < scale * self.max_frame_dis
298 |         return mean_dis, is_close
299 | 


--------------------------------------------------------------------------------
/processor/demo_old.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import argparse
  4 | import json
  5 | import shutil
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import skvideo.io
 10 | 
 11 | from .io import IO
 12 | import tools
 13 | import tools.utils as utils
 14 | 
 15 | class Demo(IO):
 16 |     """
 17 |         Demo for Skeleton-based Action Recgnition
 18 |     """
 19 |     def start(self):
 20 | 
 21 |         openpose = '{}/examples/openpose/openpose.bin'.format(self.arg.openpose)
 22 |         video_name = self.arg.video.split('/')[-1].split('.')[0]
 23 |         output_snippets_dir = './data/openpose_estimation/snippets/{}'.format(video_name)
 24 |         output_sequence_dir = './data/openpose_estimation/data'
 25 |         output_sequence_path = '{}/{}.json'.format(output_sequence_dir, video_name)
 26 |         output_result_dir = self.arg.output_dir
 27 |         output_result_path = '{}/{}.mp4'.format(output_result_dir, video_name)
 28 |         label_name_path = './resource/kinetics_skeleton/label_name.txt'
 29 |         with open(label_name_path) as f:
 30 |             label_name = f.readlines()
 31 |             label_name = [line.rstrip() for line in label_name]
 32 |     
 33 |         # pose estimation
 34 |         openpose_args = dict(
 35 |             video=self.arg.video,
 36 |             write_json=output_snippets_dir,
 37 |             display=0,
 38 |             render_pose=0, 
 39 |             model_pose='COCO')
 40 |         command_line = openpose + ' '
 41 |         command_line += ' '.join(['--{} {}'.format(k, v) for k, v in openpose_args.items()])
 42 |         shutil.rmtree(output_snippets_dir, ignore_errors=True)
 43 |         os.makedirs(output_snippets_dir)
 44 |         os.system(command_line)
 45 | 
 46 |         # pack openpose ouputs
 47 |         video = utils.video.get_video_frames(self.arg.video)
 48 |         height, width, _ = video[0].shape
 49 |         video_info = utils.openpose.json_pack(
 50 |             output_snippets_dir, video_name, width, height)
 51 |         if not os.path.exists(output_sequence_dir):
 52 |             os.makedirs(output_sequence_dir)
 53 |         with open(output_sequence_path, 'w') as outfile:
 54 |             json.dump(video_info, outfile)
 55 |         if len(video_info['data']) == 0:
 56 |             print('Can not find pose estimation results.')
 57 |             return
 58 |         else:
 59 |             print('Pose estimation complete.')
 60 | 
 61 |         # parse skeleton data
 62 |         pose, _ = utils.video.video_info_parsing(video_info)
 63 |         data = torch.from_numpy(pose)
 64 |         data = data.unsqueeze(0)
 65 |         data = data.float().to(self.dev).detach()
 66 | 
 67 |         # extract feature
 68 |         print('\nNetwork forwad...')
 69 |         self.model.eval()
 70 |         output, feature = self.model.extract_feature(data)
 71 |         output = output[0]
 72 |         feature = feature[0]
 73 |         intensity = (feature*feature).sum(dim=0)**0.5
 74 |         intensity = intensity.cpu().detach().numpy()
 75 |         label = output.sum(dim=3).sum(dim=2).sum(dim=1).argmax(dim=0)
 76 |         print('Prediction result: {}'.format(label_name[label]))
 77 |         print('Done.')
 78 | 
 79 |         # visualization
 80 |         print('\nVisualization...')
 81 |         label_sequence = output.sum(dim=2).argmax(dim=0)
 82 |         label_name_sequence = [[label_name[p] for p in l ]for l in label_sequence]
 83 |         edge = self.model.graph.edge
 84 |         images = utils.visualization.stgcn_visualize(
 85 |             pose, edge, intensity, video,label_name[label] , label_name_sequence, self.arg.height)
 86 |         print('Done.')
 87 | 
 88 |         # save video
 89 |         print('\nSaving...')
 90 |         if not os.path.exists(output_result_dir):
 91 |             os.makedirs(output_result_dir)
 92 |         writer = skvideo.io.FFmpegWriter(output_result_path,
 93 |                                         outputdict={'-b': '300000000'})
 94 |         for img in images:
 95 |             writer.writeFrame(img)
 96 |         writer.close()
 97 |         print('The Demo result has been saved in {}.'.format(output_result_path))
 98 | 
 99 |     @staticmethod
100 |     def get_parser(add_help=False):
101 | 
102 |         # parameter priority: command line > config > default
103 |         parent_parser = IO.get_parser(add_help=False)
104 |         parser = argparse.ArgumentParser(
105 |             add_help=add_help,
106 |             parents=[parent_parser],
107 |             description='Demo for Spatial Temporal Graph Convolution Network')
108 | 
109 |         # region arguments yapf: disable
110 |         parser.add_argument('--video',
111 |             default='./resource/media/skateboarding.mp4',
112 |             help='Path to video')
113 |         parser.add_argument('--openpose',
114 |             default='3dparty/openpose/build',
115 |             help='Path to openpose')
116 |         parser.add_argument('--output_dir',
117 |             default='./data/demo_result',
118 |             help='Path to save results')
119 |         parser.add_argument('--height',
120 |             default=1080,
121 |             type=int)
122 |         parser.set_defaults(config='./config/st_gcn/kinetics-skeleton/demo_old.yaml')
123 |         parser.set_defaults(print_log=False)
124 |         # endregion yapf: enable
125 | 
126 |         return parser
127 | 


--------------------------------------------------------------------------------
/processor/demo_realtime.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import os
  3 | import sys
  4 | import argparse
  5 | import json
  6 | import shutil
  7 | import time
  8 | 
  9 | import numpy as np
 10 | import torch
 11 | import skvideo.io
 12 | 
 13 | from .io import IO
 14 | import tools
 15 | import tools.utils as utils
 16 | 
 17 | import cv2
 18 | 
 19 | class DemoRealtime(IO):
 20 |     """ A demo for utilizing st-gcn in the realtime action recognition.
 21 |     The Openpose python-api is required for this demo.
 22 | 
 23 |     Since the pre-trained model is trained on videos with 30fps,
 24 |     and Openpose is hard to achieve this high speed in the single GPU,
 25 |     if you want to predict actions by **camera** in realtime,
 26 |     either data interpolation or new pre-trained model
 27 |     is required.
 28 | 
 29 |     Pull requests are always welcome.
 30 |     """
 31 | 
 32 |     def start(self):
 33 |         # load openpose python api
 34 |         if self.arg.openpose is not None:
 35 |             sys.path.append('{}/python'.format(self.arg.openpose))
 36 |             sys.path.append('{}/build/python'.format(self.arg.openpose))
 37 |         try:
 38 |             from openpose import pyopenpose as op
 39 |         except:
 40 |             print('Can not find Openpose Python API.')
 41 |             return
 42 | 
 43 |         video_name = self.arg.video.split('/')[-1].split('.')[0]
 44 |         label_name_path = './resource/kinetics_skeleton/label_name.txt'
 45 |         with open(label_name_path) as f:
 46 |             label_name = f.readlines()
 47 |             label_name = [line.rstrip() for line in label_name]
 48 |             self.label_name = label_name
 49 | 
 50 |         # initiate
 51 |         opWrapper = op.WrapperPython()
 52 |         params = dict(model_folder='./models', model_pose='COCO')
 53 |         opWrapper.configure(params)
 54 |         opWrapper.start()
 55 |         self.model.eval()
 56 |         pose_tracker = naive_pose_tracker()
 57 | 
 58 |         if self.arg.video == 'camera_source':
 59 |             video_capture = cv2.VideoCapture(0)
 60 |         else:
 61 |             video_capture = cv2.VideoCapture(self.arg.video)
 62 | 
 63 |         # start recognition
 64 |         start_time = time.time()
 65 |         frame_index = 0
 66 |         while(True):
 67 | 
 68 |             tic = time.time()
 69 | 
 70 |             # get image
 71 |             ret, orig_image = video_capture.read()
 72 |             if orig_image is None:
 73 |                 break
 74 |             source_H, source_W, _ = orig_image.shape
 75 |             orig_image = cv2.resize(
 76 |                 orig_image, (256 * source_W // source_H, 256))
 77 |             H, W, _ = orig_image.shape
 78 |             
 79 |             # pose estimation
 80 |             datum = op.Datum()
 81 |             datum.cvInputData = orig_image
 82 |             opWrapper.emplaceAndPop([datum])
 83 |             multi_pose = datum.poseKeypoints  # (num_person, num_joint, 3)
 84 |             if len(multi_pose.shape) != 3:
 85 |                 continue
 86 | 
 87 |             # normalization
 88 |             multi_pose[:, :, 0] = multi_pose[:, :, 0]/W
 89 |             multi_pose[:, :, 1] = multi_pose[:, :, 1]/H
 90 |             multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5
 91 |             multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0
 92 |             multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0
 93 | 
 94 |             # pose tracking
 95 |             if self.arg.video == 'camera_source':
 96 |                 frame_index = int((time.time() - start_time)*self.arg.fps)
 97 |             else:
 98 |                 frame_index += 1
 99 |             pose_tracker.update(multi_pose, frame_index)
100 |             data_numpy = pose_tracker.get_skeleton_sequence()
101 |             data = torch.from_numpy(data_numpy)
102 |             data = data.unsqueeze(0)
103 |             data = data.float().to(self.dev).detach()  # (1, channel, frame, joint, person)
104 | 
105 |             # model predict
106 |             voting_label_name, video_label_name, output, intensity = self.predict(
107 |                 data)
108 | 
109 |             # visualization
110 |             app_fps = 1 / (time.time() - tic)
111 |             image = self.render(data_numpy, voting_label_name,
112 |                                 video_label_name, intensity, orig_image, app_fps)
113 |             cv2.imshow("ST-GCN", image)
114 |             if cv2.waitKey(1) & 0xFF == ord('q'):
115 |                 break
116 | 
117 |     def predict(self, data):
118 |         # forward
119 |         output, feature = self.model.extract_feature(data)
120 |         output = output[0]
121 |         feature = feature[0]
122 |         intensity = (feature*feature).sum(dim=0)**0.5
123 |         intensity = intensity.cpu().detach().numpy()
124 | 
125 |         # get result
126 |         # classification result of the full sequence
127 |         voting_label = output.sum(dim=3).sum(
128 |             dim=2).sum(dim=1).argmax(dim=0)
129 |         voting_label_name = self.label_name[voting_label]
130 |         # classification result for each person of the latest frame
131 |         num_person = data.size(4)
132 |         latest_frame_label = [output[:, :, :, m].sum(
133 |             dim=2)[:, -1].argmax(dim=0) for m in range(num_person)]
134 |         latest_frame_label_name = [self.label_name[l]
135 |                                    for l in latest_frame_label]
136 | 
137 |         num_person = output.size(3)
138 |         num_frame = output.size(1)
139 |         video_label_name = list()
140 |         for t in range(num_frame):
141 |             frame_label_name = list()
142 |             for m in range(num_person):
143 |                 person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0)
144 |                 person_label_name = self.label_name[person_label]
145 |                 frame_label_name.append(person_label_name)
146 |             video_label_name.append(frame_label_name)
147 |         return voting_label_name, video_label_name, output, intensity
148 | 
149 |     def render(self, data_numpy, voting_label_name, video_label_name, intensity, orig_image, fps=0):
150 |         images = utils.visualization.stgcn_visualize(
151 |             data_numpy[:, [-1]],
152 |             self.model.graph.edge,
153 |             intensity[[-1]], [orig_image],
154 |             voting_label_name,
155 |             [video_label_name[-1]],
156 |             self.arg.height,
157 |             fps=fps)
158 |         image = next(images)
159 |         image = image.astype(np.uint8)
160 |         return image
161 | 
162 |     @staticmethod
163 |     def get_parser(add_help=False):
164 | 
165 |         # parameter priority: command line > config > default
166 |         parent_parser = IO.get_parser(add_help=False)
167 |         parser = argparse.ArgumentParser(
168 |             add_help=add_help,
169 |             parents=[parent_parser],
170 |             description='Demo for Spatial Temporal Graph Convolution Network')
171 | 
172 |         # region arguments yapf: disable
173 |         parser.add_argument('--video',
174 |                             default='./resource/media/skateboarding.mp4',
175 |                             help='Path to video')
176 |         parser.add_argument('--openpose',
177 |                             default=None,
178 |                             help='Path to openpose')
179 |         parser.add_argument('--model_input_frame',
180 |                             default=128,
181 |                             type=int)
182 |         parser.add_argument('--model_fps',
183 |                             default=30,
184 |                             type=int)
185 |         parser.add_argument('--height',
186 |                             default=1080,
187 |                             type=int,
188 |                             help='height of frame in the output video.')
189 |         parser.set_defaults(
190 |             config='./config/st_gcn/kinetics-skeleton/demo_realtime.yaml')
191 |         parser.set_defaults(print_log=False)
192 |         # endregion yapf: enable
193 | 
194 |         return parser
195 | 
196 | class naive_pose_tracker():
197 |     """ A simple tracker for recording person poses and generating skeleton sequences.
198 |     For actual occasion, I recommend you to implement a robuster tracker.
199 |     Pull-requests are welcomed.
200 |     """
201 | 
202 |     def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf):
203 |         self.data_frame = data_frame
204 |         self.num_joint = num_joint
205 |         self.max_frame_dis = max_frame_dis
206 |         self.latest_frame = 0
207 |         self.trace_info = list()
208 | 
209 |     def update(self, multi_pose, current_frame):
210 |         # multi_pose.shape: (num_person, num_joint, 3)
211 | 
212 |         if current_frame <= self.latest_frame:
213 |             return
214 | 
215 |         if len(multi_pose.shape) != 3:
216 |             return
217 | 
218 |         score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0)
219 |         for p in multi_pose[score_order]:
220 | 
221 |             # match existing traces
222 |             matching_trace = None
223 |             matching_dis = None
224 |             for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
225 |                 # trace.shape: (num_frame, num_joint, 3)
226 |                 if current_frame <= latest_frame:
227 |                     continue
228 |                 mean_dis, is_close = self.get_dis(trace, p)
229 |                 if is_close:
230 |                     if matching_trace is None:
231 |                         matching_trace = trace_index
232 |                         matching_dis = mean_dis
233 |                     elif matching_dis > mean_dis:
234 |                         matching_trace = trace_index
235 |                         matching_dis = mean_dis
236 | 
237 |             # update trace information
238 |             if matching_trace is not None:
239 |                 trace, latest_frame = self.trace_info[matching_trace]
240 | 
241 |                 # padding zero if the trace is fractured
242 |                 pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero'
243 |                 pad = current_frame-latest_frame-1
244 |                 new_trace = self.cat_pose(trace, p, pad, pad_mode)
245 |                 self.trace_info[matching_trace] = (new_trace, current_frame)
246 | 
247 |             else:
248 |                 new_trace = np.array([p])
249 |                 self.trace_info.append((new_trace, current_frame))
250 | 
251 |         self.latest_frame = current_frame
252 | 
253 |     def get_skeleton_sequence(self):
254 | 
255 |         # remove old traces
256 |         valid_trace_index = []
257 |         for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
258 |             if self.latest_frame - latest_frame < self.data_frame:
259 |                 valid_trace_index.append(trace_index)
260 |         self.trace_info = [self.trace_info[v] for v in valid_trace_index]
261 | 
262 |         num_trace = len(self.trace_info)
263 |         if num_trace == 0:
264 |             return None
265 | 
266 |         data = np.zeros((3, self.data_frame, self.num_joint, num_trace))
267 |         for trace_index, (trace, latest_frame) in enumerate(self.trace_info):
268 |             end = self.data_frame - (self.latest_frame - latest_frame)
269 |             d = trace[-end:]
270 |             beg = end - len(d)
271 |             data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1))
272 | 
273 |         return data
274 | 
275 |     # concatenate pose to a trace
276 |     def cat_pose(self, trace, pose, pad, pad_mode):
277 |         # trace.shape: (num_frame, num_joint, 3)
278 |         num_joint = pose.shape[0]
279 |         num_channel = pose.shape[1]
280 |         if pad != 0:
281 |             if pad_mode == 'zero':
282 |                 trace = np.concatenate(
283 |                     (trace, np.zeros((pad, num_joint, 3))), 0)
284 |             elif pad_mode == 'interp':
285 |                 last_pose = trace[-1]
286 |                 coeff = [(p+1)/(pad+1) for p in range(pad)]
287 |                 interp_pose = [(1-c)*last_pose + c*pose for c in coeff]
288 |                 trace = np.concatenate((trace, interp_pose), 0)
289 |         new_trace = np.concatenate((trace, [pose]), 0)
290 |         return new_trace
291 | 
292 |     # calculate the distance between a existing trace and the input pose
293 | 
294 |     def get_dis(self, trace, pose):
295 |         last_pose_xy = trace[-1, :, 0:2]
296 |         curr_pose_xy = pose[:, 0:2]
297 | 
298 |         mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean()
299 |         wh = last_pose_xy.max(0) - last_pose_xy.min(0)
300 |         scale = (wh[0] * wh[1]) ** 0.5 + 0.0001
301 |         is_close = mean_dis < scale * self.max_frame_dis
302 |         return mean_dis, is_close
303 | 


--------------------------------------------------------------------------------
/processor/io.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # pylint: disable=W0201
  3 | import sys
  4 | import argparse
  5 | import yaml
  6 | import numpy as np
  7 | 
  8 | # torch
  9 | import torch
 10 | import torch.nn as nn
 11 | 
 12 | # torchlight
 13 | import torchlight
 14 | from torchlight import str2bool
 15 | from torchlight import DictAction
 16 | from torchlight import import_class
 17 | 
 18 | class IO():
 19 |     """
 20 |         IO Processor
 21 |     """
 22 | 
 23 |     def __init__(self, argv=None):
 24 | 
 25 |         self.load_arg(argv)
 26 |         self.init_environment()
 27 |         self.load_model()
 28 |         self.load_weights()
 29 |         self.gpu()
 30 | 
 31 |     def load_arg(self, argv=None):
 32 |         parser = self.get_parser()
 33 | 
 34 |         # load arg form config file
 35 |         p = parser.parse_args(argv)
 36 |         if p.config is not None:
 37 |             # load config file
 38 |             with open(p.config, 'r') as f:
 39 |                 default_arg = yaml.load(f, Loader=yaml.FullLoader)
 40 | 
 41 |             # update parser from config file
 42 |             key = vars(p).keys()
 43 |             for k in default_arg.keys():
 44 |                 if k not in key:
 45 |                     print('Unknown Arguments: {}'.format(k))
 46 |                     assert k in key
 47 | 
 48 |             parser.set_defaults(**default_arg)
 49 | 
 50 |         self.arg = parser.parse_args(argv)
 51 | 
 52 |     def init_environment(self):
 53 |         self.io = torchlight.IO(
 54 |             self.arg.work_dir,
 55 |             save_log=self.arg.save_log,
 56 |             print_log=self.arg.print_log)
 57 |         self.io.save_arg(self.arg)
 58 | 
 59 |         # gpu
 60 |         if self.arg.use_gpu:
 61 |             gpus = torchlight.visible_gpu(self.arg.device)
 62 |             torchlight.occupy_gpu(gpus)
 63 |             self.gpus = gpus
 64 |             self.dev = "cuda:0"
 65 |         else:
 66 |             self.dev = "cpu"
 67 | 
 68 |     def load_model(self):
 69 |         self.model = self.io.load_model(self.arg.model,
 70 |                                         **(self.arg.model_args))
 71 | 
 72 |     def load_weights(self):
 73 |         if self.arg.weights:
 74 |             self.model = self.io.load_weights(self.model, self.arg.weights,
 75 |                                               self.arg.ignore_weights)
 76 | 
 77 |     def gpu(self):
 78 |         # move modules to gpu
 79 |         self.model = self.model.to(self.dev)
 80 |         for name, value in vars(self).items():
 81 |             cls_name = str(value.__class__)
 82 |             if cls_name.find('torch.nn.modules') != -1:
 83 |                 setattr(self, name, value.to(self.dev))
 84 | 
 85 |         # model parallel
 86 |         if self.arg.use_gpu and len(self.gpus) > 1:
 87 |             self.model = nn.DataParallel(self.model, device_ids=self.gpus)
 88 | 
 89 |     def start(self):
 90 |         self.io.print_log('Parameters:\n{}\n'.format(str(vars(self.arg))))
 91 | 
 92 |     @staticmethod
 93 |     def get_parser(add_help=False):
 94 | 
 95 |         #region arguments yapf: disable
 96 |         # parameter priority: command line > config > default
 97 |         parser = argparse.ArgumentParser( add_help=add_help, description='IO Processor')
 98 | 
 99 |         parser.add_argument('-w', '--work_dir', default='./work_dir/tmp', help='the work folder for storing results')
100 |         parser.add_argument('-c', '--config', default=None, help='path to the configuration file')
101 | 
102 |         # processor
103 |         parser.add_argument('--use_gpu', type=str2bool, default=True, help='use GPUs or not')
104 |         parser.add_argument('--device', type=int, default=0, nargs='+', help='the indexes of GPUs for training or testing')
105 | 
106 |         # visulize and debug
107 |         parser.add_argument('--print_log', type=str2bool, default=True, help='print logging or not')
108 |         parser.add_argument('--save_log', type=str2bool, default=True, help='save logging or not')
109 | 
110 |         # model
111 |         parser.add_argument('--model', default=None, help='the model will be used')
112 |         parser.add_argument('--model_args', action=DictAction, default=dict(), help='the arguments of model')
113 |         parser.add_argument('--weights', default=None, help='the weights for network initialization')
114 |         parser.add_argument('--ignore_weights', type=str, default=[], nargs='+', help='the name of weights which will be ignored in the initialization')
115 |         #endregion yapf: enable
116 | 
117 |         return parser
118 | 


--------------------------------------------------------------------------------
/processor/processor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # pylint: disable=W0201
  3 | import sys
  4 | import argparse
  5 | import yaml
  6 | import numpy as np
  7 | 
  8 | # torch
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | 
 13 | # torchlight
 14 | import torchlight
 15 | from torchlight import str2bool
 16 | from torchlight import DictAction
 17 | from torchlight import import_class
 18 | 
 19 | from .io import IO
 20 | 
 21 | class Processor(IO):
 22 |     """
 23 |         Base Processor
 24 |     """
 25 | 
 26 |     def __init__(self, argv=None):
 27 | 
 28 |         self.load_arg(argv)
 29 |         self.init_environment()
 30 |         self.load_model()
 31 |         self.load_weights()
 32 |         self.gpu()
 33 |         self.load_data()
 34 |         self.load_optimizer()
 35 | 
 36 |     def init_environment(self):
 37 | 
 38 |         super().init_environment()
 39 |         self.result = dict()
 40 |         self.iter_info = dict()
 41 |         self.epoch_info = dict()
 42 |         self.meta_info = dict(epoch=0, iter=0)
 43 | 
 44 |     def load_optimizer(self):
 45 |         pass
 46 | 
 47 |     def load_data(self):
 48 |         Feeder = import_class(self.arg.feeder)
 49 |         if 'debug' not in self.arg.train_feeder_args:
 50 |             self.arg.train_feeder_args['debug'] = self.arg.debug
 51 |         self.data_loader = dict()
 52 |         if self.arg.phase == 'train':
 53 |             self.data_loader['train'] = torch.utils.data.DataLoader(
 54 |                 dataset=Feeder(**self.arg.train_feeder_args),
 55 |                 batch_size=self.arg.batch_size,
 56 |                 shuffle=True,
 57 |                 num_workers=self.arg.num_worker * torchlight.ngpu(
 58 |                     self.arg.device),
 59 |                 drop_last=True)
 60 |         if self.arg.test_feeder_args:
 61 |             self.data_loader['test'] = torch.utils.data.DataLoader(
 62 |                 dataset=Feeder(**self.arg.test_feeder_args),
 63 |                 batch_size=self.arg.test_batch_size,
 64 |                 shuffle=False,
 65 |                 num_workers=self.arg.num_worker * torchlight.ngpu(
 66 |                     self.arg.device))
 67 | 
 68 |     def show_epoch_info(self):
 69 |         for k, v in self.epoch_info.items():
 70 |             self.io.print_log('\t{}: {}'.format(k, v))
 71 |         if self.arg.pavi_log:
 72 |             self.io.log('train', self.meta_info['iter'], self.epoch_info)
 73 | 
 74 |     def show_iter_info(self):
 75 |         if self.meta_info['iter'] % self.arg.log_interval == 0:
 76 |             info ='\tIter {} Done.'.format(self.meta_info['iter'])
 77 |             for k, v in self.iter_info.items():
 78 |                 if isinstance(v, float):
 79 |                     info = info + ' | {}: {:.4f}'.format(k, v)
 80 |                 else:
 81 |                     info = info + ' | {}: {}'.format(k, v)
 82 | 
 83 |             self.io.print_log(info)
 84 | 
 85 |             if self.arg.pavi_log:
 86 |                 self.io.log('train', self.meta_info['iter'], self.iter_info)
 87 | 
 88 |     def train(self):
 89 |         for _ in range(100):
 90 |             self.iter_info['loss'] = 0
 91 |             self.show_iter_info()
 92 |             self.meta_info['iter'] += 1
 93 |         self.epoch_info['mean loss'] = 0
 94 |         self.show_epoch_info()
 95 | 
 96 |     def test(self):
 97 |         for _ in range(100):
 98 |             self.iter_info['loss'] = 1
 99 |             self.show_iter_info()
100 |         self.epoch_info['mean loss'] = 1
101 |         self.show_epoch_info()
102 | 
103 |     def start(self):
104 |         self.io.print_log('Parameters:\n{}\n'.format(str(vars(self.arg))))
105 | 
106 |         # training phase
107 |         if self.arg.phase == 'train':
108 |             for epoch in range(self.arg.start_epoch, self.arg.num_epoch):
109 |                 self.meta_info['epoch'] = epoch
110 | 
111 |                 # training
112 |                 self.io.print_log('Training epoch: {}'.format(epoch))
113 |                 self.train()
114 |                 self.io.print_log('Done.')
115 | 
116 |                 # save model
117 |                 if ((epoch + 1) % self.arg.save_interval == 0) or (
118 |                         epoch + 1 == self.arg.num_epoch):
119 |                     filename = 'epoch{}_model.pt'.format(epoch + 1)
120 |                     self.io.save_model(self.model, filename)
121 | 
122 |                 # evaluation
123 |                 if ((epoch + 1) % self.arg.eval_interval == 0) or (
124 |                         epoch + 1 == self.arg.num_epoch):
125 |                     self.io.print_log('Eval epoch: {}'.format(epoch))
126 |                     self.test()
127 |                     self.io.print_log('Done.')
128 |         # test phase
129 |         elif self.arg.phase == 'test':
130 | 
131 |             # the path of weights must be appointed
132 |             if self.arg.weights is None:
133 |                 raise ValueError('Please appoint --weights.')
134 |             self.io.print_log('Model:   {}.'.format(self.arg.model))
135 |             self.io.print_log('Weights: {}.'.format(self.arg.weights))
136 | 
137 |             # evaluation
138 |             self.io.print_log('Evaluation Start:')
139 |             self.test()
140 |             self.io.print_log('Done.\n')
141 | 
142 |             # save the output of model
143 |             if self.arg.save_result:
144 |                 result_dict = dict(
145 |                     zip(self.data_loader['test'].dataset.sample_name,
146 |                         self.result))
147 |                 self.io.save_pkl(result_dict, 'test_result.pkl')
148 | 
149 |     @staticmethod
150 |     def get_parser(add_help=False):
151 | 
152 |         #region arguments yapf: disable
153 |         # parameter priority: command line > config > default
154 |         parser = argparse.ArgumentParser( add_help=add_help, description='Base Processor')
155 | 
156 |         parser.add_argument('-w', '--work_dir', default='./work_dir/tmp', help='the work folder for storing results')
157 |         parser.add_argument('-c', '--config', default=None, help='path to the configuration file')
158 | 
159 |         # processor
160 |         parser.add_argument('--phase', default='train', help='must be train or test')
161 |         parser.add_argument('--save_result', type=str2bool, default=False, help='if ture, the output of the model will be stored')
162 |         parser.add_argument('--start_epoch', type=int, default=0, help='start training from which epoch')
163 |         parser.add_argument('--num_epoch', type=int, default=80, help='stop training in which epoch')
164 |         parser.add_argument('--use_gpu', type=str2bool, default=True, help='use GPUs or not')
165 |         parser.add_argument('--device', type=int, default=0, nargs='+', help='the indexes of GPUs for training or testing')
166 | 
167 |         # visulize and debug
168 |         parser.add_argument('--log_interval', type=int, default=100, help='the interval for printing messages (#iteration)')
169 |         parser.add_argument('--save_interval', type=int, default=10, help='the interval for storing models (#iteration)')
170 |         parser.add_argument('--eval_interval', type=int, default=5, help='the interval for evaluating models (#iteration)')
171 |         parser.add_argument('--save_log', type=str2bool, default=True, help='save logging or not')
172 |         parser.add_argument('--print_log', type=str2bool, default=True, help='print logging or not')
173 |         parser.add_argument('--pavi_log', type=str2bool, default=False, help='logging on pavi or not')
174 | 
175 |         # feeder
176 |         parser.add_argument('--feeder', default='feeder.feeder', help='data loader will be used')
177 |         parser.add_argument('--num_worker', type=int, default=4, help='the number of worker per gpu for data loader')
178 |         parser.add_argument('--train_feeder_args', action=DictAction, default=dict(), help='the arguments of data loader for training')
179 |         parser.add_argument('--test_feeder_args', action=DictAction, default=dict(), help='the arguments of data loader for test')
180 |         parser.add_argument('--batch_size', type=int, default=256, help='training batch size')
181 |         parser.add_argument('--test_batch_size', type=int, default=256, help='test batch size')
182 |         parser.add_argument('--debug', action="store_true", help='less data, faster loading')
183 | 
184 |         # model
185 |         parser.add_argument('--model', default=None, help='the model will be used')
186 |         parser.add_argument('--model_args', action=DictAction, default=dict(), help='the arguments of model')
187 |         parser.add_argument('--weights', default=None, help='the weights for network initialization')
188 |         parser.add_argument('--ignore_weights', type=str, default=[], nargs='+', help='the name of weights which will be ignored in the initialization')
189 |         #endregion yapf: enable
190 | 
191 |         return parser
192 | 


--------------------------------------------------------------------------------
/processor/recognition.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # pylint: disable=W0201
  3 | import sys
  4 | import argparse
  5 | import yaml
  6 | import numpy as np
  7 | 
  8 | # torch
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | 
 13 | # torchlight
 14 | import torchlight
 15 | from torchlight import str2bool
 16 | from torchlight import DictAction
 17 | from torchlight import import_class
 18 | 
 19 | from .processor import Processor
 20 | 
 21 | def weights_init(m):
 22 |     classname = m.__class__.__name__
 23 |     if classname.find('Conv1d') != -1:
 24 |         m.weight.data.normal_(0.0, 0.02)
 25 |         if m.bias is not None:
 26 |             m.bias.data.fill_(0)
 27 |     elif classname.find('Conv2d') != -1:
 28 |         m.weight.data.normal_(0.0, 0.02)
 29 |         if m.bias is not None:
 30 |             m.bias.data.fill_(0)
 31 |     elif classname.find('BatchNorm') != -1:
 32 |         m.weight.data.normal_(1.0, 0.02)
 33 |         m.bias.data.fill_(0)
 34 | 
 35 | class REC_Processor(Processor):
 36 |     """
 37 |         Processor for Skeleton-based Action Recgnition
 38 |     """
 39 | 
 40 |     def load_model(self):
 41 |         self.model = self.io.load_model(self.arg.model,
 42 |                                         **(self.arg.model_args))
 43 |         self.model.apply(weights_init)
 44 |         self.loss = nn.CrossEntropyLoss()
 45 |         
 46 |     def load_optimizer(self):
 47 |         if self.arg.optimizer == 'SGD':
 48 |             self.optimizer = optim.SGD(
 49 |                 self.model.parameters(),
 50 |                 lr=self.arg.base_lr,
 51 |                 momentum=0.9,
 52 |                 nesterov=self.arg.nesterov,
 53 |                 weight_decay=self.arg.weight_decay)
 54 |         elif self.arg.optimizer == 'Adam':
 55 |             self.optimizer = optim.Adam(
 56 |                 self.model.parameters(),
 57 |                 lr=self.arg.base_lr,
 58 |                 weight_decay=self.arg.weight_decay)
 59 |         else:
 60 |             raise ValueError()
 61 | 
 62 |     def adjust_lr(self):
 63 |         if self.arg.optimizer == 'SGD' and self.arg.step:
 64 |             lr = self.arg.base_lr * (
 65 |                 0.1**np.sum(self.meta_info['epoch']>= np.array(self.arg.step)))
 66 |             for param_group in self.optimizer.param_groups:
 67 |                 param_group['lr'] = lr
 68 |             self.lr = lr
 69 |         else:
 70 |             self.lr = self.arg.base_lr
 71 | 
 72 |     def show_topk(self, k):
 73 |         rank = self.result.argsort()
 74 |         hit_top_k = [l in rank[i, -k:] for i, l in enumerate(self.label)]
 75 |         accuracy = sum(hit_top_k) * 1.0 / len(hit_top_k)
 76 |         self.io.print_log('\tTop{}: {:.2f}%'.format(k, 100 * accuracy))
 77 | 
 78 |     def train(self):
 79 |         self.model.train()
 80 |         self.adjust_lr()
 81 |         loader = self.data_loader['train']
 82 |         loss_value = []
 83 | 
 84 |         for data, label in loader:
 85 | 
 86 |             # get data
 87 |             data = data.float().to(self.dev)
 88 |             label = label.long().to(self.dev)
 89 | 
 90 |             # forward
 91 |             output = self.model(data)
 92 |             loss = self.loss(output, label)
 93 | 
 94 |             # backward
 95 |             self.optimizer.zero_grad()
 96 |             loss.backward()
 97 |             self.optimizer.step()
 98 | 
 99 |             # statistics
100 |             self.iter_info['loss'] = loss.data.item()
101 |             self.iter_info['lr'] = '{:.6f}'.format(self.lr)
102 |             loss_value.append(self.iter_info['loss'])
103 |             self.show_iter_info()
104 |             self.meta_info['iter'] += 1
105 | 
106 |         self.epoch_info['mean_loss']= np.mean(loss_value)
107 |         self.show_epoch_info()
108 |         self.io.print_timer()
109 | 
110 |     def test(self, evaluation=True):
111 | 
112 |         self.model.eval()
113 |         loader = self.data_loader['test']
114 |         loss_value = []
115 |         result_frag = []
116 |         label_frag = []
117 | 
118 |         for data, label in loader:
119 |             
120 |             # get data
121 |             data = data.float().to(self.dev)
122 |             label = label.long().to(self.dev)
123 | 
124 |             # inference
125 |             with torch.no_grad():
126 |                 output = self.model(data)
127 |             result_frag.append(output.data.cpu().numpy())
128 | 
129 |             # get loss
130 |             if evaluation:
131 |                 loss = self.loss(output, label)
132 |                 loss_value.append(loss.item())
133 |                 label_frag.append(label.data.cpu().numpy())
134 | 
135 |         self.result = np.concatenate(result_frag)
136 |         if evaluation:
137 |             self.label = np.concatenate(label_frag)
138 |             self.epoch_info['mean_loss']= np.mean(loss_value)
139 |             self.show_epoch_info()
140 | 
141 |             # show top-k accuracy
142 |             for k in self.arg.show_topk:
143 |                 self.show_topk(k)
144 | 
145 |     @staticmethod
146 |     def get_parser(add_help=False):
147 | 
148 |         # parameter priority: command line > config > default
149 |         parent_parser = Processor.get_parser(add_help=False)
150 |         parser = argparse.ArgumentParser(
151 |             add_help=add_help,
152 |             parents=[parent_parser],
153 |             description='Spatial Temporal Graph Convolution Network')
154 | 
155 |         # region arguments yapf: disable
156 |         # evaluation
157 |         parser.add_argument('--show_topk', type=int, default=[1, 5], nargs='+', help='which Top K accuracy will be shown')
158 |         # optim
159 |         parser.add_argument('--base_lr', type=float, default=0.01, help='initial learning rate')
160 |         parser.add_argument('--step', type=int, default=[], nargs='+', help='the epoch where optimizer reduce the learning rate')
161 |         parser.add_argument('--optimizer', default='SGD', help='type of optimizer')
162 |         parser.add_argument('--nesterov', type=str2bool, default=True, help='use nesterov or not')
163 |         parser.add_argument('--weight_decay', type=float, default=0.0001, help='weight decay for optimizer')
164 |         # endregion yapf: enable
165 | 
166 |         return parser
167 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | pyyaml
2 | argparse
3 | numpy
4 | h5py
5 | opencv-python
6 | imageio
7 | scikit-video
8 | torch
9 | torchvision


--------------------------------------------------------------------------------
/resource/NTU-RGB-D/samples_with_missing_skeletons.txt:
--------------------------------------------------------------------------------
  1 | S001C002P005R002A008
  2 | S001C002P006R001A008
  3 | S001C003P002R001A055
  4 | S001C003P002R002A012
  5 | S001C003P005R002A004
  6 | S001C003P005R002A005
  7 | S001C003P005R002A006
  8 | S001C003P006R002A008
  9 | S002C002P011R002A030
 10 | S002C003P008R001A020
 11 | S002C003P010R002A010
 12 | S002C003P011R002A007
 13 | S002C003P011R002A011
 14 | S002C003P014R002A007
 15 | S003C001P019R001A055
 16 | S003C002P002R002A055
 17 | S003C002P018R002A055
 18 | S003C003P002R001A055
 19 | S003C003P016R001A055
 20 | S003C003P018R002A024
 21 | S004C002P003R001A013
 22 | S004C002P008R001A009
 23 | S004C002P020R001A003
 24 | S004C002P020R001A004
 25 | S004C002P020R001A012
 26 | S004C002P020R001A020
 27 | S004C002P020R001A021
 28 | S004C002P020R001A036
 29 | S005C002P004R001A001
 30 | S005C002P004R001A003
 31 | S005C002P010R001A016
 32 | S005C002P010R001A017
 33 | S005C002P010R001A048
 34 | S005C002P010R001A049
 35 | S005C002P016R001A009
 36 | S005C002P016R001A010
 37 | S005C002P018R001A003
 38 | S005C002P018R001A028
 39 | S005C002P018R001A029
 40 | S005C003P016R002A009
 41 | S005C003P018R002A013
 42 | S005C003P021R002A057
 43 | S006C001P001R002A055
 44 | S006C002P007R001A005
 45 | S006C002P007R001A006
 46 | S006C002P016R001A043
 47 | S006C002P016R001A051
 48 | S006C002P016R001A052
 49 | S006C002P022R001A012
 50 | S006C002P023R001A020
 51 | S006C002P023R001A021
 52 | S006C002P023R001A022
 53 | S006C002P023R001A023
 54 | S006C002P024R001A018
 55 | S006C002P024R001A019
 56 | S006C003P001R002A013
 57 | S006C003P007R002A009
 58 | S006C003P007R002A010
 59 | S006C003P007R002A025
 60 | S006C003P016R001A060
 61 | S006C003P017R001A055
 62 | S006C003P017R002A013
 63 | S006C003P017R002A014
 64 | S006C003P017R002A015
 65 | S006C003P022R002A013
 66 | S007C001P018R002A050
 67 | S007C001P025R002A051
 68 | S007C001P028R001A050
 69 | S007C001P028R001A051
 70 | S007C001P028R001A052
 71 | S007C002P008R002A008
 72 | S007C002P015R002A055
 73 | S007C002P026R001A008
 74 | S007C002P026R001A009
 75 | S007C002P026R001A010
 76 | S007C002P026R001A011
 77 | S007C002P026R001A012
 78 | S007C002P026R001A050
 79 | S007C002P027R001A011
 80 | S007C002P027R001A013
 81 | S007C002P028R002A055
 82 | S007C003P007R001A002
 83 | S007C003P007R001A004
 84 | S007C003P019R001A060
 85 | S007C003P027R002A001
 86 | S007C003P027R002A002
 87 | S007C003P027R002A003
 88 | S007C003P027R002A004
 89 | S007C003P027R002A005
 90 | S007C003P027R002A006
 91 | S007C003P027R002A007
 92 | S007C003P027R002A008
 93 | S007C003P027R002A009
 94 | S007C003P027R002A010
 95 | S007C003P027R002A011
 96 | S007C003P027R002A012
 97 | S007C003P027R002A013
 98 | S008C002P001R001A009
 99 | S008C002P001R001A010
100 | S008C002P001R001A014
101 | S008C002P001R001A015
102 | S008C002P001R001A016
103 | S008C002P001R001A018
104 | S008C002P001R001A019
105 | S008C002P008R002A059
106 | S008C002P025R001A060
107 | S008C002P029R001A004
108 | S008C002P031R001A005
109 | S008C002P031R001A006
110 | S008C002P032R001A018
111 | S008C002P034R001A018
112 | S008C002P034R001A019
113 | S008C002P035R001A059
114 | S008C002P035R002A002
115 | S008C002P035R002A005
116 | S008C003P007R001A009
117 | S008C003P007R001A016
118 | S008C003P007R001A017
119 | S008C003P007R001A018
120 | S008C003P007R001A019
121 | S008C003P007R001A020
122 | S008C003P007R001A021
123 | S008C003P007R001A022
124 | S008C003P007R001A023
125 | S008C003P007R001A025
126 | S008C003P007R001A026
127 | S008C003P007R001A028
128 | S008C003P007R001A029
129 | S008C003P007R002A003
130 | S008C003P008R002A050
131 | S008C003P025R002A002
132 | S008C003P025R002A011
133 | S008C003P025R002A012
134 | S008C003P025R002A016
135 | S008C003P025R002A020
136 | S008C003P025R002A022
137 | S008C003P025R002A023
138 | S008C003P025R002A030
139 | S008C003P025R002A031
140 | S008C003P025R002A032
141 | S008C003P025R002A033
142 | S008C003P025R002A049
143 | S008C003P025R002A060
144 | S008C003P031R001A001
145 | S008C003P031R002A004
146 | S008C003P031R002A014
147 | S008C003P031R002A015
148 | S008C003P031R002A016
149 | S008C003P031R002A017
150 | S008C003P032R002A013
151 | S008C003P033R002A001
152 | S008C003P033R002A011
153 | S008C003P033R002A012
154 | S008C003P034R002A001
155 | S008C003P034R002A012
156 | S008C003P034R002A022
157 | S008C003P034R002A023
158 | S008C003P034R002A024
159 | S008C003P034R002A044
160 | S008C003P034R002A045
161 | S008C003P035R002A016
162 | S008C003P035R002A017
163 | S008C003P035R002A018
164 | S008C003P035R002A019
165 | S008C003P035R002A020
166 | S008C003P035R002A021
167 | S009C002P007R001A001
168 | S009C002P007R001A003
169 | S009C002P007R001A014
170 | S009C002P008R001A014
171 | S009C002P015R002A050
172 | S009C002P016R001A002
173 | S009C002P017R001A028
174 | S009C002P017R001A029
175 | S009C003P017R002A030
176 | S009C003P025R002A054
177 | S010C001P007R002A020
178 | S010C002P016R002A055
179 | S010C002P017R001A005
180 | S010C002P017R001A018
181 | S010C002P017R001A019
182 | S010C002P019R001A001
183 | S010C002P025R001A012
184 | S010C003P007R002A043
185 | S010C003P008R002A003
186 | S010C003P016R001A055
187 | S010C003P017R002A055
188 | S011C001P002R001A008
189 | S011C001P018R002A050
190 | S011C002P008R002A059
191 | S011C002P016R002A055
192 | S011C002P017R001A020
193 | S011C002P017R001A021
194 | S011C002P018R002A055
195 | S011C002P027R001A009
196 | S011C002P027R001A010
197 | S011C002P027R001A037
198 | S011C003P001R001A055
199 | S011C003P002R001A055
200 | S011C003P008R002A012
201 | S011C003P015R001A055
202 | S011C003P016R001A055
203 | S011C003P019R001A055
204 | S011C003P025R001A055
205 | S011C003P028R002A055
206 | S012C001P019R001A060
207 | S012C001P019R002A060
208 | S012C002P015R001A055
209 | S012C002P017R002A012
210 | S012C002P025R001A060
211 | S012C003P008R001A057
212 | S012C003P015R001A055
213 | S012C003P015R002A055
214 | S012C003P016R001A055
215 | S012C003P017R002A055
216 | S012C003P018R001A055
217 | S012C003P018R001A057
218 | S012C003P019R002A011
219 | S012C003P019R002A012
220 | S012C003P025R001A055
221 | S012C003P027R001A055
222 | S012C003P027R002A009
223 | S012C003P028R001A035
224 | S012C003P028R002A055
225 | S013C001P015R001A054
226 | S013C001P017R002A054
227 | S013C001P018R001A016
228 | S013C001P028R001A040
229 | S013C002P015R001A054
230 | S013C002P017R002A054
231 | S013C002P028R001A040
232 | S013C003P008R002A059
233 | S013C003P015R001A054
234 | S013C003P017R002A054
235 | S013C003P025R002A022
236 | S013C003P027R001A055
237 | S013C003P028R001A040
238 | S014C001P027R002A040
239 | S014C002P015R001A003
240 | S014C002P019R001A029
241 | S014C002P025R002A059
242 | S014C002P027R002A040
243 | S014C002P039R001A050
244 | S014C003P007R002A059
245 | S014C003P015R002A055
246 | S014C003P019R002A055
247 | S014C003P025R001A048
248 | S014C003P027R002A040
249 | S015C001P008R002A040
250 | S015C001P016R001A055
251 | S015C001P017R001A055
252 | S015C001P017R002A055
253 | S015C002P007R001A059
254 | S015C002P008R001A003
255 | S015C002P008R001A004
256 | S015C002P008R002A040
257 | S015C002P015R001A002
258 | S015C002P016R001A001
259 | S015C002P016R002A055
260 | S015C003P008R002A007
261 | S015C003P008R002A011
262 | S015C003P008R002A012
263 | S015C003P008R002A028
264 | S015C003P008R002A040
265 | S015C003P025R002A012
266 | S015C003P025R002A017
267 | S015C003P025R002A020
268 | S015C003P025R002A021
269 | S015C003P025R002A030
270 | S015C003P025R002A033
271 | S015C003P025R002A034
272 | S015C003P025R002A036
273 | S015C003P025R002A037
274 | S015C003P025R002A044
275 | S016C001P019R002A040
276 | S016C001P025R001A011
277 | S016C001P025R001A012
278 | S016C001P025R001A060
279 | S016C001P040R001A055
280 | S016C001P040R002A055
281 | S016C002P008R001A011
282 | S016C002P019R002A040
283 | S016C002P025R002A012
284 | S016C003P008R001A011
285 | S016C003P008R002A002
286 | S016C003P008R002A003
287 | S016C003P008R002A004
288 | S016C003P008R002A006
289 | S016C003P008R002A009
290 | S016C003P019R002A040
291 | S016C003P039R002A016
292 | S017C001P016R002A031
293 | S017C002P007R001A013
294 | S017C002P008R001A009
295 | S017C002P015R001A042
296 | S017C002P016R002A031
297 | S017C002P016R002A055
298 | S017C003P007R002A013
299 | S017C003P008R001A059
300 | S017C003P016R002A031
301 | S017C003P017R001A055
302 | S017C003P020R001A059
303 | 


--------------------------------------------------------------------------------
/resource/demo_asset/attention+prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/attention+prediction.png


--------------------------------------------------------------------------------
/resource/demo_asset/attention+rgb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/attention+rgb.png


--------------------------------------------------------------------------------
/resource/demo_asset/original_video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/original_video.png


--------------------------------------------------------------------------------
/resource/demo_asset/pose_estimation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/pose_estimation.png


--------------------------------------------------------------------------------
/resource/info/S001C001P001R001A044_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S001C001P001R001A044_w.gif


--------------------------------------------------------------------------------
/resource/info/S001C001P001R001A051_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S001C001P001R001A051_w.gif


--------------------------------------------------------------------------------
/resource/info/S002C001P010R001A017_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S002C001P010R001A017_w.gif


--------------------------------------------------------------------------------
/resource/info/S003C001P008R001A002_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S003C001P008R001A002_w.gif


--------------------------------------------------------------------------------
/resource/info/S003C001P008R001A008_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S003C001P008R001A008_w.gif


--------------------------------------------------------------------------------
/resource/info/clean_and_jerk_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/clean_and_jerk_w.gif


--------------------------------------------------------------------------------
/resource/info/demo_video.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/demo_video.gif


--------------------------------------------------------------------------------
/resource/info/hammer_throw_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/hammer_throw_w.gif


--------------------------------------------------------------------------------
/resource/info/juggling_balls_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/juggling_balls_w.gif


--------------------------------------------------------------------------------
/resource/info/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/pipeline.png


--------------------------------------------------------------------------------
/resource/info/pull_ups_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/pull_ups_w.gif


--------------------------------------------------------------------------------
/resource/info/tai_chi_w.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/tai_chi_w.gif


--------------------------------------------------------------------------------
/resource/kinetics-motion.txt:
--------------------------------------------------------------------------------
 1 | belly dancing
 2 | punching bag
 3 | capoeira
 4 | squat
 5 | windsurfing
 6 | skipping rope
 7 | swimming backstroke
 8 | hammer throw
 9 | throwing discus
10 | tobogganing
11 | hopscotch
12 | hitting baseball
13 | roller skating
14 | arm wrestling
15 | snatch weight lifting
16 | tai chi
17 | riding mechanical bull
18 | salsa dancing
19 | hurling (sport)
20 | lunge
21 | skateboarding
22 | country line dancing
23 | juggling balls
24 | surfing crowd
25 | deadlifting
26 | clean and jerk
27 | crawling baby
28 | push up
29 | front raises
30 | pull ups


--------------------------------------------------------------------------------
/resource/kinetics_skeleton/label_name.txt:
--------------------------------------------------------------------------------
  1 | abseiling
  2 | air drumming
  3 | answering questions
  4 | applauding
  5 | applying cream
  6 | archery
  7 | arm wrestling
  8 | arranging flowers
  9 | assembling computer
 10 | auctioning
 11 | baby waking up
 12 | baking cookies
 13 | balloon blowing
 14 | bandaging
 15 | barbequing
 16 | bartending
 17 | beatboxing
 18 | bee keeping
 19 | belly dancing
 20 | bench pressing
 21 | bending back
 22 | bending metal
 23 | biking through snow
 24 | blasting sand
 25 | blowing glass
 26 | blowing leaves
 27 | blowing nose
 28 | blowing out candles
 29 | bobsledding
 30 | bookbinding
 31 | bouncing on trampoline
 32 | bowling
 33 | braiding hair
 34 | breading or breadcrumbing
 35 | breakdancing
 36 | brush painting
 37 | brushing hair
 38 | brushing teeth
 39 | building cabinet
 40 | building shed
 41 | bungee jumping
 42 | busking
 43 | canoeing or kayaking
 44 | capoeira
 45 | carrying baby
 46 | cartwheeling
 47 | carving pumpkin
 48 | catching fish
 49 | catching or throwing baseball
 50 | catching or throwing frisbee
 51 | catching or throwing softball
 52 | celebrating
 53 | changing oil
 54 | changing wheel
 55 | checking tires
 56 | cheerleading
 57 | chopping wood
 58 | clapping
 59 | clay pottery making
 60 | clean and jerk
 61 | cleaning floor
 62 | cleaning gutters
 63 | cleaning pool
 64 | cleaning shoes
 65 | cleaning toilet
 66 | cleaning windows
 67 | climbing a rope
 68 | climbing ladder
 69 | climbing tree
 70 | contact juggling
 71 | cooking chicken
 72 | cooking egg
 73 | cooking on campfire
 74 | cooking sausages
 75 | counting money
 76 | country line dancing
 77 | cracking neck
 78 | crawling baby
 79 | crossing river
 80 | crying
 81 | curling hair
 82 | cutting nails
 83 | cutting pineapple
 84 | cutting watermelon
 85 | dancing ballet
 86 | dancing charleston
 87 | dancing gangnam style
 88 | dancing macarena
 89 | deadlifting
 90 | decorating the christmas tree
 91 | digging
 92 | dining
 93 | disc golfing
 94 | diving cliff
 95 | dodgeball
 96 | doing aerobics
 97 | doing laundry
 98 | doing nails
 99 | drawing
100 | dribbling basketball
101 | drinking
102 | drinking beer
103 | drinking shots
104 | driving car
105 | driving tractor
106 | drop kicking
107 | drumming fingers
108 | dunking basketball
109 | dying hair
110 | eating burger
111 | eating cake
112 | eating carrots
113 | eating chips
114 | eating doughnuts
115 | eating hotdog
116 | eating ice cream
117 | eating spaghetti
118 | eating watermelon
119 | egg hunting
120 | exercising arm
121 | exercising with an exercise ball
122 | extinguishing fire
123 | faceplanting
124 | feeding birds
125 | feeding fish
126 | feeding goats
127 | filling eyebrows
128 | finger snapping
129 | fixing hair
130 | flipping pancake
131 | flying kite
132 | folding clothes
133 | folding napkins
134 | folding paper
135 | front raises
136 | frying vegetables
137 | garbage collecting
138 | gargling
139 | getting a haircut
140 | getting a tattoo
141 | giving or receiving award
142 | golf chipping
143 | golf driving
144 | golf putting
145 | grinding meat
146 | grooming dog
147 | grooming horse
148 | gymnastics tumbling
149 | hammer throw
150 | headbanging
151 | headbutting
152 | high jump
153 | high kick
154 | hitting baseball
155 | hockey stop
156 | holding snake
157 | hopscotch
158 | hoverboarding
159 | hugging
160 | hula hooping
161 | hurdling
162 | hurling (sport)
163 | ice climbing
164 | ice fishing
165 | ice skating
166 | ironing
167 | javelin throw
168 | jetskiing
169 | jogging
170 | juggling balls
171 | juggling fire
172 | juggling soccer ball
173 | jumping into pool
174 | jumpstyle dancing
175 | kicking field goal
176 | kicking soccer ball
177 | kissing
178 | kitesurfing
179 | knitting
180 | krumping
181 | laughing
182 | laying bricks
183 | long jump
184 | lunge
185 | making a cake
186 | making a sandwich
187 | making bed
188 | making jewelry
189 | making pizza
190 | making snowman
191 | making sushi
192 | making tea
193 | marching
194 | massaging back
195 | massaging feet
196 | massaging legs
197 | massaging person's head
198 | milking cow
199 | mopping floor
200 | motorcycling
201 | moving furniture
202 | mowing lawn
203 | news anchoring
204 | opening bottle
205 | opening present
206 | paragliding
207 | parasailing
208 | parkour
209 | passing American football (in game)
210 | passing American football (not in game)
211 | peeling apples
212 | peeling potatoes
213 | petting animal (not cat)
214 | petting cat
215 | picking fruit
216 | planting trees
217 | plastering
218 | playing accordion
219 | playing badminton
220 | playing bagpipes
221 | playing basketball
222 | playing bass guitar
223 | playing cards
224 | playing cello
225 | playing chess
226 | playing clarinet
227 | playing controller
228 | playing cricket
229 | playing cymbals
230 | playing didgeridoo
231 | playing drums
232 | playing flute
233 | playing guitar
234 | playing harmonica
235 | playing harp
236 | playing ice hockey
237 | playing keyboard
238 | playing kickball
239 | playing monopoly
240 | playing organ
241 | playing paintball
242 | playing piano
243 | playing poker
244 | playing recorder
245 | playing saxophone
246 | playing squash or racquetball
247 | playing tennis
248 | playing trombone
249 | playing trumpet
250 | playing ukulele
251 | playing violin
252 | playing volleyball
253 | playing xylophone
254 | pole vault
255 | presenting weather forecast
256 | pull ups
257 | pumping fist
258 | pumping gas
259 | punching bag
260 | punching person (boxing)
261 | push up
262 | pushing car
263 | pushing cart
264 | pushing wheelchair
265 | reading book
266 | reading newspaper
267 | recording music
268 | riding a bike
269 | riding camel
270 | riding elephant
271 | riding mechanical bull
272 | riding mountain bike
273 | riding mule
274 | riding or walking with horse
275 | riding scooter
276 | riding unicycle
277 | ripping paper
278 | robot dancing
279 | rock climbing
280 | rock scissors paper
281 | roller skating
282 | running on treadmill
283 | sailing
284 | salsa dancing
285 | sanding floor
286 | scrambling eggs
287 | scuba diving
288 | setting table
289 | shaking hands
290 | shaking head
291 | sharpening knives
292 | sharpening pencil
293 | shaving head
294 | shaving legs
295 | shearing sheep
296 | shining shoes
297 | shooting basketball
298 | shooting goal (soccer)
299 | shot put
300 | shoveling snow
301 | shredding paper
302 | shuffling cards
303 | side kick
304 | sign language interpreting
305 | singing
306 | situp
307 | skateboarding
308 | ski jumping
309 | skiing (not slalom or crosscountry)
310 | skiing crosscountry
311 | skiing slalom
312 | skipping rope
313 | skydiving
314 | slacklining
315 | slapping
316 | sled dog racing
317 | smoking
318 | smoking hookah
319 | snatch weight lifting
320 | sneezing
321 | sniffing
322 | snorkeling
323 | snowboarding
324 | snowkiting
325 | snowmobiling
326 | somersaulting
327 | spinning poi
328 | spray painting
329 | spraying
330 | springboard diving
331 | squat
332 | sticking tongue out
333 | stomping grapes
334 | stretching arm
335 | stretching leg
336 | strumming guitar
337 | surfing crowd
338 | surfing water
339 | sweeping floor
340 | swimming backstroke
341 | swimming breast stroke
342 | swimming butterfly stroke
343 | swing dancing
344 | swinging legs
345 | swinging on something
346 | sword fighting
347 | tai chi
348 | taking a shower
349 | tango dancing
350 | tap dancing
351 | tapping guitar
352 | tapping pen
353 | tasting beer
354 | tasting food
355 | testifying
356 | texting
357 | throwing axe
358 | throwing ball
359 | throwing discus
360 | tickling
361 | tobogganing
362 | tossing coin
363 | tossing salad
364 | training dog
365 | trapezing
366 | trimming or shaving beard
367 | trimming trees
368 | triple jump
369 | tying bow tie
370 | tying knot (not on a tie)
371 | tying tie
372 | unboxing
373 | unloading truck
374 | using computer
375 | using remote controller (not gaming)
376 | using segway
377 | vault
378 | waiting in line
379 | walking the dog
380 | washing dishes
381 | washing feet
382 | washing hair
383 | washing hands
384 | water skiing
385 | water sliding
386 | watering plants
387 | waxing back
388 | waxing chest
389 | waxing eyebrows
390 | waxing legs
391 | weaving basket
392 | welding
393 | whistling
394 | windsurfing
395 | wrapping present
396 | wrestling
397 | writing
398 | yawning
399 | yoga
400 | zumba
401 | 


--------------------------------------------------------------------------------
/resource/media/clean_and_jerk.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/clean_and_jerk.mp4


--------------------------------------------------------------------------------
/resource/media/skateboarding.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/skateboarding.mp4


--------------------------------------------------------------------------------
/resource/media/ta_chi.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/ta_chi.mp4


--------------------------------------------------------------------------------
/resource/reference_model.txt:
--------------------------------------------------------------------------------
1 | st_gcn.kinetics.pt
2 | st_gcn.ntu-xview.pt
3 | st_gcn.ntu-xsub.pt


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
1 | from . import utils


--------------------------------------------------------------------------------
/tools/get_models.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | out_path="models/"
 4 | link="https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmskeleton/models/st-gcn/"
 5 | reference_model="resource/reference_model.txt"
 6 | 
 7 | mkdir -p $out_path
 8 | while IFS='' read -r line || [[ -n "$line" ]]; do
 9 |     wget -c $link$line -O $out_path$line
10 | done < "$reference_model"
11 | 
12 | 
13 | # Downloading models for pose estimation
14 | OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/"
15 | POSE_FOLDER="pose/"
16 | 
17 | # Body (COCO)
18 | COCO_FOLDER=${POSE_FOLDER}"coco/"
19 | OUT_FOLDER="models/${COCO_FOLDER}"
20 | COCO_MODEL=${COCO_FOLDER}"pose_iter_440000.caffemodel"
21 | wget -c ${OPENPOSE_URL}${COCO_MODEL} -P ${OUT_FOLDER}


--------------------------------------------------------------------------------
/tools/kinetics_gendata.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import os
 3 | import sys
 4 | import pickle
 5 | import argparse
 6 | 
 7 | import numpy as np
 8 | from numpy.lib.format import open_memmap
 9 | 
10 | sys.path.append(
11 |     os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir)))
12 | from feeder.feeder_kinetics import Feeder_kinetics
13 | 
14 | toolbar_width = 30
15 | 
16 | def print_toolbar(rate, annotation=''):
17 |     # setup toolbar
18 |     sys.stdout.write("{}[".format(annotation))
19 |     for i in range(toolbar_width):
20 |         if i * 1.0 / toolbar_width > rate:
21 |             sys.stdout.write(' ')
22 |         else:
23 |             sys.stdout.write('-')
24 |         sys.stdout.flush()
25 |     sys.stdout.write(']\r')
26 | 
27 | 
28 | def end_toolbar():
29 |     sys.stdout.write("\n")
30 | 
31 | 
32 | def gendata(
33 |         data_path,
34 |         label_path,
35 |         data_out_path,
36 |         label_out_path,
37 |         num_person_in=5,  #observe the first 5 persons 
38 |         num_person_out=2,  #then choose 2 persons with the highest score 
39 |         max_frame=300):
40 | 
41 |     feeder = Feeder_kinetics(
42 |         data_path=data_path,
43 |         label_path=label_path,
44 |         num_person_in=num_person_in,
45 |         num_person_out=num_person_out,
46 |         window_size=max_frame)
47 | 
48 |     sample_name = feeder.sample_name
49 |     sample_label = []
50 | 
51 |     fp = open_memmap(
52 |         data_out_path,
53 |         dtype='float32',
54 |         mode='w+',
55 |         shape=(len(sample_name), 3, max_frame, 18, num_person_out))
56 | 
57 |     for i, s in enumerate(sample_name):
58 |         data, label = feeder[i]
59 |         print_toolbar(i * 1.0 / len(sample_name),
60 |                       '({:>5}/{:<5}) Processing data: '.format(
61 |                           i + 1, len(sample_name)))
62 |         fp[i, :, 0:data.shape[1], :, :] = data
63 |         sample_label.append(label)
64 | 
65 |     with open(label_out_path, 'wb') as f:
66 |         pickle.dump((sample_name, list(sample_label)), f)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     parser = argparse.ArgumentParser(
71 |         description='Kinetics-skeleton Data Converter.')
72 |     parser.add_argument(
73 |         '--data_path', default='data/Kinetics/kinetics-skeleton')
74 |     parser.add_argument(
75 |         '--out_folder', default='data/Kinetics/kinetics-skeleton')
76 |     arg = parser.parse_args()
77 | 
78 |     part = ['train', 'val']
79 |     for p in part:
80 |         data_path = '{}/kinetics_{}'.format(arg.data_path, p)
81 |         label_path = '{}/kinetics_{}_label.json'.format(arg.data_path, p)
82 |         data_out_path = '{}/{}_data.npy'.format(arg.out_folder, p)
83 |         label_out_path = '{}/{}_label.pkl'.format(arg.out_folder, p)
84 | 
85 |         if not os.path.exists(arg.out_folder):
86 |             os.makedirs(arg.out_folder)
87 |         gendata(data_path, label_path, data_out_path, label_out_path)


--------------------------------------------------------------------------------
/tools/ntu_gendata.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import pickle
  4 | 
  5 | import argparse
  6 | import numpy as np
  7 | from numpy.lib.format import open_memmap
  8 | 
  9 | from utils.ntu_read_skeleton import read_xyz
 10 | 
 11 | training_subjects = [
 12 |     1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38
 13 | ]
 14 | training_cameras = [2, 3]
 15 | max_body = 2
 16 | num_joint = 25
 17 | max_frame = 300
 18 | toolbar_width = 30
 19 | 
 20 | def print_toolbar(rate, annotation=''):
 21 |     # setup toolbar
 22 |     sys.stdout.write("{}[".format(annotation))
 23 |     for i in range(toolbar_width):
 24 |         if i * 1.0 / toolbar_width > rate:
 25 |             sys.stdout.write(' ')
 26 |         else:
 27 |             sys.stdout.write('-')
 28 |         sys.stdout.flush()
 29 |     sys.stdout.write(']\r')
 30 | 
 31 | 
 32 | def end_toolbar():
 33 |     sys.stdout.write("\n")
 34 | 
 35 | 
 36 | def gendata(data_path,
 37 |             out_path,
 38 |             ignored_sample_path=None,
 39 |             benchmark='xview',
 40 |             part='eval'):
 41 |     if ignored_sample_path != None:
 42 |         with open(ignored_sample_path, 'r') as f:
 43 |             ignored_samples = [
 44 |                 line.strip() + '.skeleton' for line in f.readlines()
 45 |             ]
 46 |     else:
 47 |         ignored_samples = []
 48 |     sample_name = []
 49 |     sample_label = []
 50 |     for filename in os.listdir(data_path):
 51 |         if filename in ignored_samples:
 52 |             continue
 53 |         action_class = int(
 54 |             filename[filename.find('A') + 1:filename.find('A') + 4])
 55 |         subject_id = int(
 56 |             filename[filename.find('P') + 1:filename.find('P') + 4])
 57 |         camera_id = int(
 58 |             filename[filename.find('C') + 1:filename.find('C') + 4])
 59 | 
 60 |         if benchmark == 'xview':
 61 |             istraining = (camera_id in training_cameras)
 62 |         elif benchmark == 'xsub':
 63 |             istraining = (subject_id in training_subjects)
 64 |         else:
 65 |             raise ValueError()
 66 | 
 67 |         if part == 'train':
 68 |             issample = istraining
 69 |         elif part == 'val':
 70 |             issample = not (istraining)
 71 |         else:
 72 |             raise ValueError()
 73 | 
 74 |         if issample:
 75 |             sample_name.append(filename)
 76 |             sample_label.append(action_class - 1)
 77 | 
 78 |     with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f:
 79 |         pickle.dump((sample_name, list(sample_label)), f)
 80 |     # np.save('{}/{}_label.npy'.format(out_path, part), sample_label)
 81 | 
 82 |     fp = open_memmap(
 83 |         '{}/{}_data.npy'.format(out_path, part),
 84 |         dtype='float32',
 85 |         mode='w+',
 86 |         shape=(len(sample_label), 3, max_frame, num_joint, max_body))
 87 | 
 88 |     for i, s in enumerate(sample_name):
 89 |         print_toolbar(i * 1.0 / len(sample_label),
 90 |                       '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '.format(
 91 |                           i + 1, len(sample_name), benchmark, part))
 92 |         data = read_xyz(
 93 |             os.path.join(data_path, s), max_body=max_body, num_joint=num_joint)
 94 |         fp[i, :, 0:data.shape[1], :, :] = data
 95 |     end_toolbar()
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 | 
100 |     parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.')
101 |     parser.add_argument(
102 |         '--data_path', default='data/NTU-RGB-D/nturgb+d_skeletons')
103 |     parser.add_argument(
104 |         '--ignored_sample_path',
105 |         default='resource/NTU-RGB-D/samples_with_missing_skeletons.txt')
106 |     parser.add_argument('--out_folder', default='data/NTU-RGB-D')
107 | 
108 |     benchmark = ['xsub', 'xview']
109 |     part = ['train', 'val']
110 |     arg = parser.parse_args()
111 | 
112 |     for b in benchmark:
113 |         for p in part:
114 |             out_path = os.path.join(arg.out_folder, b)
115 |             if not os.path.exists(out_path):
116 |                 os.makedirs(out_path)
117 |             gendata(
118 |                 arg.data_path,
119 |                 out_path,
120 |                 arg.ignored_sample_path,
121 |                 benchmark=b,
122 |                 part=p)
123 | 


--------------------------------------------------------------------------------
/tools/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import video
2 | from . import openpose
3 | from . import visualization


--------------------------------------------------------------------------------
/tools/utils/ntu_read_skeleton.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | 
 5 | def read_skeleton(file):
 6 |     with open(file, 'r') as f:
 7 |         skeleton_sequence = {}
 8 |         skeleton_sequence['numFrame'] = int(f.readline())
 9 |         skeleton_sequence['frameInfo'] = []
10 |         for t in range(skeleton_sequence['numFrame']):
11 |             frame_info = {}
12 |             frame_info['numBody'] = int(f.readline())
13 |             frame_info['bodyInfo'] = []
14 |             for m in range(frame_info['numBody']):
15 |                 body_info = {}
16 |                 body_info_key = [
17 |                     'bodyID', 'clipedEdges', 'handLeftConfidence',
18 |                     'handLeftState', 'handRightConfidence', 'handRightState',
19 |                     'isResticted', 'leanX', 'leanY', 'trackingState'
20 |                 ]
21 |                 body_info = {
22 |                     k: float(v)
23 |                     for k, v in zip(body_info_key, f.readline().split())
24 |                 }
25 |                 body_info['numJoint'] = int(f.readline())
26 |                 body_info['jointInfo'] = []
27 |                 for v in range(body_info['numJoint']):
28 |                     joint_info_key = [
29 |                         'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY',
30 |                         'orientationW', 'orientationX', 'orientationY',
31 |                         'orientationZ', 'trackingState'
32 |                     ]
33 |                     joint_info = {
34 |                         k: float(v)
35 |                         for k, v in zip(joint_info_key, f.readline().split())
36 |                     }
37 |                     body_info['jointInfo'].append(joint_info)
38 |                 frame_info['bodyInfo'].append(body_info)
39 |             skeleton_sequence['frameInfo'].append(frame_info)
40 |     return skeleton_sequence
41 | 
42 | 
43 | def read_xyz(file, max_body=2, num_joint=25):
44 |     seq_info = read_skeleton(file)
45 |     data = np.zeros((3, seq_info['numFrame'], num_joint, max_body))
46 |     for n, f in enumerate(seq_info['frameInfo']):
47 |         for m, b in enumerate(f['bodyInfo']):
48 |             for j, v in enumerate(b['jointInfo']):
49 |                 if m < max_body and j < num_joint:
50 |                     data[:, n, j, m] = [v['x'], v['y'], v['z']]
51 |                 else:
52 |                     pass
53 |     return data


--------------------------------------------------------------------------------
/tools/utils/openpose.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | import json
 3 | 
 4 | def json_pack(snippets_dir, video_name, frame_width, frame_height, label='unknown', label_index=-1):
 5 |     sequence_info = []
 6 |     p = Path(snippets_dir)
 7 |     for path in p.glob(video_name+'*.json'):
 8 |         json_path = str(path)
 9 |         print(path)
10 |         frame_id = int(path.stem.split('_')[-2])
11 |         frame_data = {'frame_index': frame_id}
12 |         data = json.load(open(json_path))
13 |         skeletons = []
14 |         for person in data['people']:
15 |             score, coordinates = [], []
16 |             skeleton = {}
17 |             keypoints = person['pose_keypoints_2d']
18 |             for i in range(0, len(keypoints), 3):
19 |                 coordinates += [keypoints[i]/frame_width, keypoints[i + 1]/frame_height]
20 |                 score += [keypoints[i + 2]]
21 |             skeleton['pose'] = coordinates
22 |             skeleton['score'] = score
23 |             skeletons += [skeleton]
24 |         frame_data['skeleton'] = skeletons
25 |         sequence_info += [frame_data]
26 | 
27 |     video_info = dict()
28 |     video_info['data'] = sequence_info
29 |     video_info['label'] = label
30 |     video_info['label_index'] = label_index
31 | 
32 |     return video_info


--------------------------------------------------------------------------------
/tools/utils/video.py:
--------------------------------------------------------------------------------
 1 | import skvideo.io
 2 | import numpy as np
 3 | import cv2
 4 | 
 5 | def video_info_parsing(video_info, num_person_in=5, num_person_out=2):
 6 |     data_numpy = np.zeros((3, len(video_info['data']), 18, num_person_in))
 7 |     for frame_info in video_info['data']:
 8 |         frame_index = frame_info['frame_index']
 9 |         for m, skeleton_info in enumerate(frame_info["skeleton"]):
10 |             if m >= num_person_in:
11 |                 break
12 |             pose = skeleton_info['pose']
13 |             score = skeleton_info['score']
14 |             data_numpy[0, frame_index, :, m] = pose[0::2]
15 |             data_numpy[1, frame_index, :, m] = pose[1::2]
16 |             data_numpy[2, frame_index, :, m] = score
17 | 
18 |     # centralization
19 |     data_numpy[0:2] = data_numpy[0:2] - 0.5
20 |     data_numpy[0][data_numpy[2] == 0] = 0
21 |     data_numpy[1][data_numpy[2] == 0] = 0
22 | 
23 |     sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1)
24 |     for t, s in enumerate(sort_index):
25 |         data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2,
26 |                                                                     0))
27 |     data_numpy = data_numpy[:, :, :, :num_person_out]
28 | 
29 |     label = video_info['label_index']
30 |     return data_numpy, label
31 | 
32 | def get_video_frames(video_path):
33 |     vread = skvideo.io.vread(video_path)
34 |     video = []
35 |     for frame in vread:
36 |         video.append(frame)
37 |     return video
38 | 
39 | def video_play(video_path, fps=30):
40 |     cap = cv2.VideoCapture(video_path)
41 | 
42 |     while(cap.isOpened()):
43 |         ret, frame = cap.read()
44 | 
45 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
46 | 
47 |         cv2.imshow('frame',gray)
48 |         if cv2.waitKey(1000/fps) & 0xFF == ord('q'):
49 |             break
50 | 
51 |     cap.release()
52 |     cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/tools/utils/visualization.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | 
  4 | 
  5 | def stgcn_visualize(pose,
  6 |                     edge,
  7 |                     feature,
  8 |                     video,
  9 |                     label=None,
 10 |                     label_sequence=None,
 11 |                     height=1080,
 12 |                     fps=None):
 13 | 
 14 |     _, T, V, M = pose.shape
 15 |     T = len(video)
 16 |     pos_track = [None] * M
 17 |     for t in range(T):
 18 |         frame = video[t]
 19 | 
 20 |         # image resize
 21 |         H, W, c = frame.shape
 22 |         frame = cv2.resize(frame, (height * W // H // 2, height//2))
 23 |         H, W, c = frame.shape
 24 |         scale_factor = 2 * height / 1080
 25 | 
 26 |         # draw skeleton
 27 |         skeleton = frame * 0
 28 |         text = frame * 0
 29 |         for m in range(M):
 30 | 
 31 |             score = pose[2, t, :, m].max()
 32 |             if score < 0.3:
 33 |                 continue
 34 | 
 35 |             for i, j in edge:
 36 |                 xi = pose[0, t, i, m]
 37 |                 yi = pose[1, t, i, m]
 38 |                 xj = pose[0, t, j, m]
 39 |                 yj = pose[1, t, j, m]
 40 |                 if xi + yi == 0 or xj + yj == 0:
 41 |                     continue
 42 |                 else:
 43 |                     xi = int((xi + 0.5) * W)
 44 |                     yi = int((yi + 0.5) * H)
 45 |                     xj = int((xj + 0.5) * W)
 46 |                     yj = int((yj + 0.5) * H)
 47 |                 cv2.line(skeleton, (xi, yi), (xj, yj), (255, 255, 255),
 48 |                          int(np.ceil(2 * scale_factor)))
 49 | 
 50 |             if label_sequence is not None:
 51 |                 body_label = label_sequence[t // 4][m]
 52 |             else:
 53 |                 body_label = ''
 54 |             x_nose = int((pose[0, t, 0, m] + 0.5) * W)
 55 |             y_nose = int((pose[1, t, 0, m] + 0.5) * H)
 56 |             x_neck = int((pose[0, t, 1, m] + 0.5) * W)
 57 |             y_neck = int((pose[1, t, 1, m] + 0.5) * H)
 58 | 
 59 |             half_head = int(((x_neck - x_nose)**2 + (y_neck - y_nose)**2)**0.5)
 60 |             pos = (x_nose + half_head, y_nose - half_head)
 61 |             if pos_track[m] is None:
 62 |                 pos_track[m] = pos
 63 |             else:
 64 |                 new_x = int(pos_track[m][0] + (pos[0] - pos_track[m][0]) * 0.2)
 65 |                 new_y = int(pos_track[m][1] + (pos[1] - pos_track[m][1]) * 0.2)
 66 |                 pos_track[m] = (new_x, new_y)
 67 |             cv2.putText(text, body_label, pos_track[m],
 68 |                         cv2.FONT_HERSHEY_TRIPLEX, 0.5 * scale_factor,
 69 |                         (255, 255, 255))
 70 | 
 71 |         # generate mask
 72 |         mask = frame * 0
 73 |         feature = np.abs(feature)
 74 |         feature = feature / feature.mean()
 75 |         for m in range(M):
 76 |             score = pose[2, t, :, m].max()
 77 |             if score < 0.3:
 78 |                 continue
 79 | 
 80 |             f = feature[t // 4, :, m]**5
 81 |             if f.mean() != 0:
 82 |                 f = f / f.mean()
 83 |             for v in range(V):
 84 |                 x = pose[0, t, v, m]
 85 |                 y = pose[1, t, v, m]
 86 |                 if x + y == 0:
 87 |                     continue
 88 |                 else:
 89 |                     x = int((x + 0.5) * W)
 90 |                     y = int((y + 0.5) * H)
 91 |                 cv2.circle(mask, (x, y), 0, (255, 255, 255),
 92 |                            int(np.ceil(f[v]**0.5 * 8 * scale_factor)))
 93 |         blurred_mask = cv2.blur(mask, (12, 12))
 94 | 
 95 |         skeleton_result = blurred_mask.astype(float) * 0.75
 96 |         skeleton_result += skeleton.astype(float) * 0.25
 97 |         skeleton_result += text.astype(float)
 98 |         skeleton_result[skeleton_result > 255] = 255
 99 |         skeleton_result.astype(np.uint8)
100 | 
101 |         rgb_result = blurred_mask.astype(float) * 0.75
102 |         rgb_result += frame.astype(float) * 0.5
103 |         rgb_result += skeleton.astype(float) * 0.25
104 |         rgb_result[rgb_result > 255] = 255
105 |         rgb_result.astype(np.uint8)
106 | 
107 |         put_text(skeleton, 'inputs of st-gcn', (0.15, 0.5))
108 | 
109 |         text_1 = cv2.imread(
110 |             './resource/demo_asset/original_video.png', cv2.IMREAD_UNCHANGED)
111 |         text_2 = cv2.imread(
112 |             './resource/demo_asset/pose_estimation.png', cv2.IMREAD_UNCHANGED)
113 |         text_3 = cv2.imread(
114 |             './resource/demo_asset/attention+prediction.png', cv2.IMREAD_UNCHANGED)
115 |         text_4 = cv2.imread(
116 |             './resource/demo_asset/attention+rgb.png', cv2.IMREAD_UNCHANGED)
117 | 
118 |         try:
119 |             blend(frame, text_1)
120 |             blend(skeleton, text_2)
121 |             blend(skeleton_result, text_3)
122 |             blend(rgb_result, text_4)
123 |         except:
124 |             pass
125 | 
126 |         if label is not None:
127 |             label_name = 'voting result: ' + label
128 |             put_text(skeleton_result, label_name, (0.1, 0.5))
129 | 
130 |         if fps is not None:
131 |             put_text(skeleton, 'fps:{:.2f}'.format(fps), (0.9, 0.5))
132 | 
133 |         img0 = np.concatenate((frame, skeleton), axis=1)
134 |         img1 = np.concatenate((skeleton_result, rgb_result), axis=1)
135 |         img = np.concatenate((img0, img1), axis=0)
136 | 
137 |         yield img
138 | 
139 | 
140 | def put_text(img, text, position, scale_factor=1):
141 |     t_w, t_h = cv2.getTextSize(
142 |         text, cv2.FONT_HERSHEY_TRIPLEX, scale_factor, thickness=1)[0]
143 |     H, W, _ = img.shape
144 |     position = (int(W * position[1] - t_w * 0.5),
145 |                 int(H * position[0] - t_h * 0.5))
146 |     params = (position, cv2.FONT_HERSHEY_TRIPLEX, scale_factor,
147 |               (255, 255, 255))
148 |     cv2.putText(img, text, *params)
149 | 
150 | 
151 | def blend(background, foreground, dx=20, dy=10, fy=0.7):
152 | 
153 |     foreground = cv2.resize(foreground, (0, 0), fx=fy, fy=fy)
154 |     h, w = foreground.shape[:2]
155 |     b, g, r, a = cv2.split(foreground)
156 |     mask = np.dstack((a, a, a))
157 |     rgb = np.dstack((b, g, r))
158 | 
159 |     canvas = background[-h-dy:-dy, dx:w+dx]
160 |     imask = mask > 0
161 |     canvas[imask] = rgb[imask]
162 | 


--------------------------------------------------------------------------------
/torchlight/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 | 
3 | setup(
4 |     name='torchlight',
5 |     version='1.0',
6 |     description='A mini framework for pytorch',
7 |     packages=find_packages(),
8 |     install_requires=[])
9 | 


--------------------------------------------------------------------------------
/torchlight/torchlight/__init__.py:
--------------------------------------------------------------------------------
1 | from .io import IO
2 | from .io import str2bool
3 | from .io import str2dict
4 | from .io import DictAction
5 | from .io import import_class
6 | from .gpu import visible_gpu
7 | from .gpu import occupy_gpu
8 | from .gpu import ngpu
9 | 


--------------------------------------------------------------------------------
/torchlight/torchlight/gpu.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | 
 4 | 
 5 | def visible_gpu(gpus):
 6 |     """
 7 |         set visible gpu.
 8 | 
 9 |         can be a single id, or a list
10 | 
11 |         return a list of new gpus ids
12 |     """
13 |     gpus = [gpus] if isinstance(gpus, int) else list(gpus)
14 |     os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, gpus)))
15 |     return list(range(len(gpus)))
16 | 
17 | 
18 | def ngpu(gpus):
19 |     """
20 |         count how many gpus used.
21 |     """
22 |     gpus = [gpus] if isinstance(gpus, int) else list(gpus)
23 |     return len(gpus)
24 | 
25 | 
26 | def occupy_gpu(gpus=None):
27 |     """
28 |         make program appear on nvidia-smi.
29 |     """
30 |     if gpus is None:
31 |         torch.zeros(1).cuda()
32 |     else:
33 |         gpus = [gpus] if isinstance(gpus, int) else list(gpus)
34 |         for g in gpus:
35 |             torch.zeros(1).cuda(g)
36 | 


--------------------------------------------------------------------------------
/torchlight/torchlight/io.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | import argparse
  3 | import os
  4 | import sys
  5 | import traceback
  6 | import time
  7 | import warnings
  8 | import pickle
  9 | from collections import OrderedDict
 10 | import yaml
 11 | import numpy as np
 12 | # torch
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.optim as optim
 16 | from torch.autograd import Variable
 17 | 
 18 | with warnings.catch_warnings():
 19 |     warnings.filterwarnings("ignore",category=FutureWarning)
 20 |     import h5py
 21 | 
 22 | class IO():
 23 |     def __init__(self, work_dir, save_log=True, print_log=True):
 24 |         self.work_dir = work_dir
 25 |         self.save_log = save_log
 26 |         self.print_to_screen = print_log
 27 |         self.cur_time = time.time()
 28 |         self.split_timer = {}
 29 |         self.pavi_logger = None
 30 |         self.session_file = None
 31 |         self.model_text = ''
 32 |         
 33 |     # PaviLogger is removed in this version
 34 |     def log(self, *args, **kwargs):
 35 |         pass
 36 |     #     try:
 37 |     #         if self.pavi_logger is None:
 38 |     #             from torchpack.runner.hooks import PaviLogger
 39 |     #             url = 'http://pavi.parrotsdnn.org/log'
 40 |     #             with open(self.session_file, 'r') as f:
 41 |     #                 info = dict(
 42 |     #                     session_file=self.session_file,
 43 |     #                     session_text=f.read(),
 44 |     #                     model_text=self.model_text)
 45 |     #             self.pavi_logger = PaviLogger(url)
 46 |     #             self.pavi_logger.connect(self.work_dir, info=info)
 47 |     #         self.pavi_logger.log(*args, **kwargs)
 48 |     #     except:  #pylint: disable=W0702
 49 |     #         pass
 50 | 
 51 |     def load_model(self, model, **model_args):
 52 |         Model = import_class(model)
 53 |         model = Model(**model_args)
 54 |         self.model_text += '\n\n' + str(model)
 55 |         return model
 56 | 
 57 |     def load_weights(self, model, weights_path, ignore_weights=None):
 58 |         if ignore_weights is None:
 59 |             ignore_weights = []
 60 |         if isinstance(ignore_weights, str):
 61 |             ignore_weights = [ignore_weights]
 62 | 
 63 |         self.print_log('Load weights from {}.'.format(weights_path))
 64 |         weights = torch.load(weights_path)
 65 |         weights = OrderedDict([[k.split('module.')[-1],
 66 |                                 v.cpu()] for k, v in weights.items()])
 67 | 
 68 |         # filter weights
 69 |         for i in ignore_weights:
 70 |             ignore_name = list()
 71 |             for w in weights:
 72 |                 if w.find(i) == 0:
 73 |                     ignore_name.append(w)
 74 |             for n in ignore_name:
 75 |                 weights.pop(n)
 76 |                 self.print_log('Filter [{}] remove weights [{}].'.format(i,n))
 77 | 
 78 |         for w in weights:
 79 |             self.print_log('Load weights [{}].'.format(w))
 80 | 
 81 |         try:
 82 |             model.load_state_dict(weights)
 83 |         except (KeyError, RuntimeError):
 84 |             state = model.state_dict()
 85 |             diff = list(set(state.keys()).difference(set(weights.keys())))
 86 |             for d in diff:
 87 |                 self.print_log('Can not find weights [{}].'.format(d))
 88 |             state.update(weights)
 89 |             model.load_state_dict(state)
 90 |         return model
 91 | 
 92 |     def save_pkl(self, result, filename):
 93 |         with open('{}/{}'.format(self.work_dir, filename), 'wb') as f:
 94 |             pickle.dump(result, f)
 95 | 
 96 |     def save_h5(self, result, filename):
 97 |         with h5py.File('{}/{}'.format(self.work_dir, filename), 'w') as f:
 98 |             for k in result.keys():
 99 |                 f[k] = result[k]
100 | 
101 |     def save_model(self, model, name):
102 |         model_path = '{}/{}'.format(self.work_dir, name)
103 |         state_dict = model.state_dict()
104 |         weights = OrderedDict([[''.join(k.split('module.')),
105 |                                 v.cpu()] for k, v in state_dict.items()])
106 |         torch.save(weights, model_path)
107 |         self.print_log('The model has been saved as {}.'.format(model_path))
108 | 
109 |     def save_arg(self, arg):
110 | 
111 |         self.session_file = '{}/config.yaml'.format(self.work_dir)
112 | 
113 |         # save arg
114 |         arg_dict = vars(arg)
115 |         if not os.path.exists(self.work_dir):
116 |             os.makedirs(self.work_dir)
117 |         with open(self.session_file, 'w') as f:
118 |             f.write('# command line: {}\n\n'.format(' '.join(sys.argv)))
119 |             yaml.dump(arg_dict, f, default_flow_style=False, indent=4)
120 | 
121 |     def print_log(self, str, print_time=True):
122 |         if print_time:
123 |             # localtime = time.asctime(time.localtime(time.time()))
124 |             str = time.strftime("[%m.%d.%y|%X] ", time.localtime()) + str
125 | 
126 |         if self.print_to_screen:
127 |             print(str)
128 |         if self.save_log:
129 |             with open('{}/log.txt'.format(self.work_dir), 'a') as f:
130 |                 print(str, file=f)
131 | 
132 |     def init_timer(self, *name):
133 |         self.record_time()
134 |         self.split_timer = {k: 0.0000001 for k in name}
135 | 
136 |     def check_time(self, name):
137 |         self.split_timer[name] += self.split_time()
138 | 
139 |     def record_time(self):
140 |         self.cur_time = time.time()
141 |         return self.cur_time
142 | 
143 |     def split_time(self):
144 |         split_time = time.time() - self.cur_time
145 |         self.record_time()
146 |         return split_time
147 | 
148 |     def print_timer(self):
149 |         proportion = {
150 |             k: '{:02d}%'.format(int(round(v * 100 / sum(self.split_timer.values()))))
151 |             for k, v in self.split_timer.items()
152 |         }
153 |         self.print_log('Time consumption:')
154 |         for k in proportion:
155 |             self.print_log(
156 |                 '\t[{}][{}]: {:.4f}'.format(k, proportion[k],self.split_timer[k])
157 |                 )
158 | 
159 | 
160 | def str2bool(v):
161 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
162 |         return True
163 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
164 |         return False
165 |     else:
166 |         raise argparse.ArgumentTypeError('Boolean value expected.')
167 | 
168 | 
169 | def str2dict(v):
170 |     return eval('dict({})'.format(v))  #pylint: disable=W0123
171 | 
172 | 
173 | def _import_class_0(name):
174 |     components = name.split('.')
175 |     mod = __import__(components[0])
176 |     for comp in components[1:]:
177 |         mod = getattr(mod, comp)
178 |     return mod
179 | 
180 | 
181 | def import_class(import_str):
182 |     mod_str, _sep, class_str = import_str.rpartition('.')
183 |     __import__(mod_str)
184 |     try:
185 |         return getattr(sys.modules[mod_str], class_str)
186 |     except AttributeError:
187 |         raise ImportError('Class %s cannot be found (%s)' %
188 |                           (class_str,
189 |                            traceback.format_exception(*sys.exc_info())))
190 | 
191 | 
192 | class DictAction(argparse.Action):
193 |     def __init__(self, option_strings, dest, nargs=None, **kwargs):
194 |         if nargs is not None:
195 |             raise ValueError("nargs not allowed")
196 |         super(DictAction, self).__init__(option_strings, dest, **kwargs)
197 | 
198 |     def __call__(self, parser, namespace, values, option_string=None):
199 |         input_dict = eval('dict({})'.format(values))  #pylint: disable=W0123
200 |         output_dict = getattr(namespace, self.dest)
201 |         for k in input_dict:
202 |             output_dict[k] = input_dict[k]
203 |         setattr(namespace, self.dest, output_dict)
204 | 


--------------------------------------------------------------------------------