├── .gitignore ├── ISSUE_TEMPLATE.md ├── LICENSE ├── OLD_README.md ├── README.md ├── config ├── st_gcn.twostream │ ├── ntu-xsub │ │ └── train.yaml │ └── ntu-xview │ │ └── train.yaml └── st_gcn │ ├── kinetics-skeleton │ ├── demo_offline.yaml │ ├── demo_old.yaml │ ├── demo_realtime.yaml │ ├── test.yaml │ └── train.yaml │ ├── ntu-xsub │ ├── test.yaml │ └── train.yaml │ └── ntu-xview │ ├── test.yaml │ └── train.yaml ├── feeder ├── __init__.py ├── feeder.py ├── feeder_kinetics.py └── tools.py ├── main.py ├── models └── pose │ └── coco │ └── pose_deploy_linevec.prototxt ├── net ├── __init__.py ├── st_gcn.py ├── st_gcn_twostream.py └── utils │ ├── __init__.py │ ├── graph.py │ └── tgcn.py ├── processor ├── __init__.py ├── demo_offline.py ├── demo_old.py ├── demo_realtime.py ├── io.py ├── processor.py └── recognition.py ├── requirements.txt ├── resource ├── NTU-RGB-D │ └── samples_with_missing_skeletons.txt ├── demo_asset │ ├── attention+prediction.png │ ├── attention+rgb.png │ ├── original_video.png │ └── pose_estimation.png ├── info │ ├── S001C001P001R001A044_w.gif │ ├── S001C001P001R001A051_w.gif │ ├── S002C001P010R001A017_w.gif │ ├── S003C001P008R001A002_w.gif │ ├── S003C001P008R001A008_w.gif │ ├── clean_and_jerk_w.gif │ ├── demo_video.gif │ ├── hammer_throw_w.gif │ ├── juggling_balls_w.gif │ ├── pipeline.png │ ├── pull_ups_w.gif │ └── tai_chi_w.gif ├── kinetics-motion.txt ├── kinetics_skeleton │ └── label_name.txt ├── media │ ├── clean_and_jerk.mp4 │ ├── skateboarding.mp4 │ └── ta_chi.mp4 └── reference_model.txt ├── tools ├── __init__.py ├── get_models.sh ├── kinetics_gendata.py ├── ntu_gendata.py └── utils │ ├── __init__.py │ ├── ntu_read_skeleton.py │ ├── openpose.py │ ├── video.py │ └── visualization.py └── torchlight ├── setup.py └── torchlight ├── __init__.py ├── gpu.py └── io.py /.gitignore: -------------------------------------------------------------------------------- 1 | #custom 2 | tmp* 3 | work_dir/* 4 | data 5 | config_v0/* 6 | backup/* 7 | .vscode 8 | model/* 9 | *.pt 10 | *.caffemodel 11 | cache/ 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | *.py[cod] 16 | *$py.class 17 | 18 | # C extensions 19 | *.so 20 | 21 | # Distribution / packaging 22 | .Python 23 | build/ 24 | develop-eggs/ 25 | dist/ 26 | downloads/ 27 | eggs/ 28 | .eggs/ 29 | lib/ 30 | lib64/ 31 | parts/ 32 | sdist/ 33 | var/ 34 | wheels/ 35 | *.egg-info/ 36 | .installed.cfg 37 | *.egg 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .coverage 53 | .coverage.* 54 | .cache 55 | nosetests.xml 56 | coverage.xml 57 | *.cover 58 | .hypothesis/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | 68 | # Flask stuff: 69 | instance/ 70 | .webassets-cache 71 | 72 | # Scrapy stuff: 73 | .scrapy 74 | 75 | # Sphinx documentation 76 | docs/_build/ 77 | 78 | # PyBuilder 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # celery beat schedule file 88 | celerybeat-schedule 89 | 90 | # SageMath parsed files 91 | *.sage.py 92 | 93 | # Environments 94 | .env 95 | .venv 96 | env/ 97 | venv/ 98 | ENV/ 99 | 100 | # Spyder project settings 101 | .spyderproject 102 | .spyproject 103 | 104 | # Rope project settings 105 | .ropeproject 106 | 107 | # mkdocs documentation 108 | /site 109 | 110 | # mypy 111 | .mypy_cache/ 112 | -------------------------------------------------------------------------------- /ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ### Code version (Git Hash) and PyTorch version 2 | 3 | ### Dataset used 4 | 5 | ### Expected behavior 6 | 7 | ### Actual behavior 8 | 9 | ### Steps to reproduce the behavior 10 | 11 | ### Other comments 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Multimedia Laboratary, The Chinese University of Hong Kong 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 15 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 18 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 21 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 22 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | -------------------------------------------------------------------------------- /OLD_README.md: -------------------------------------------------------------------------------- 1 | # ST-GCN 2 | 3 | ## Introduction 4 | This repository holds the codebase, dataset and models for the paper: 5 | 6 | **Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition** Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018. [[Arxiv Preprint]](https://arxiv.org/abs/1801.07455) 7 | 8 |
9 | 10 |
11 | 12 | ## Visulization of ST-GCN in Action 13 | Our demo for skeleton-based action recognition: 14 |

15 | 16 |

17 | 18 | 19 | ST-GCN is able to exploit local pattern and correlation from human skeletons. 20 | Below figures show the neural response magnitude of each node in the last layer of our ST-GCN. 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 |
Touch headSitting downTake off a shoeEat meal/snackKick other person
Hammer throwClean and jerkPull upsTai chiJuggling ball
53 | 54 | The first row of above results is from **NTU-RGB+D** dataset, and the second row is from **Kinetics-skeleton**. 55 | 56 | 57 | ## Prerequisites 58 | - Python3 (>3.5) 59 | - [PyTorch](http://pytorch.org/) 60 | - [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose) **with** [Python API](https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/doc/installation.md#python-api). (Optional: for demo only) 61 | - Other Python libraries can be installed by `pip install -r requirements.txt` 62 | 63 | 64 | 65 | ### Installation 66 | ``` shell 67 | git clone https://github.com/yysijie/st-gcn.git; cd st-gcn 68 | cd torchlight; python setup.py install; cd .. 69 | ``` 70 | 71 | ### Get pretrained models 72 | We provided the pretrained model weithts of our **ST-GCN**. The model weights can be downloaded by running the script 73 | ``` 74 | bash tools/get_models.sh 75 | ``` 76 | 77 | You can also obtain models from [GoogleDrive](https://drive.google.com/drive/folders/1IYKoSrjeI3yYJ9bO0_z_eDo92i7ob_aF) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fst-gcn%2Fmodels&parentPath=%2FShare), and manually put them into ```./models```. 78 | 79 | ## Demo 80 | 81 | 82 | 83 | You can use the following commands to run the demo. 84 | 85 | ```shell 86 | # with offline pose estimation 87 | python main.py demo_offline [--video ${PATH_TO_VIDEO}] [--openpose ${PATH_TO_OPENPOSE}] 88 | 89 | # with realtime pose estimation 90 | python main.py demo [--video ${PATH_TO_VIDEO}] [--openpose ${PATH_TO_OPENPOSE}] 91 | ``` 92 | 93 | Optional arguments: 94 | 95 | - `PATH_TO_OPENPOSE`: It is required if the Openpose Python API is not in `PYTHONPATH`. 96 | - `PATH_TO_VIDEO`: Filename of the input video. 97 | 98 | 102 | 103 | 104 | 105 | ## Data Preparation 106 | 107 | We experimented on two skeleton-based action recognition datasts: **Kinetics-skeleton** and **NTU RGB+D**. 108 | Before training and testing, for convenience of fast data loading, 109 | the datasets should be converted to proper file structure. 110 | You can download the pre-processed data from 111 | [GoogleDrive](https://drive.google.com/open?id=103NOL9YYZSW1hLoWmYnv5Fs8mK-Ij7qb) 112 | and extract files with 113 | ``` 114 | cd st-gcn 115 | unzip 116 | ``` 117 | Otherwise, for processing raw data by yourself, 118 | please refer to below guidances. 119 | 120 | #### Kinetics-skeleton 121 | [Kinetics](https://deepmind.com/research/open-source/open-source-datasets/kinetics/) is a video-based dataset for action recognition which only provide raw video clips without skeleton data. Kinetics dataset include To obatin the joint locations, we first resized all videos to the resolution of 340x256 and converted the frame rate to 30 fps. Then, we extracted skeletons from each frame in Kinetics by [Openpose](https://github.com/CMU-Perceptual-Computing-Lab/openpose). The extracted skeleton data we called **Kinetics-skeleton**(7.5GB) can be directly downloaded from [GoogleDrive](https://drive.google.com/open?id=1SPQ6FmFsjGg3f59uCWfdUWI-5HJM_YhZ) or [BaiduYun](https://pan.baidu.com/s/1dwKG2TLvG-R1qeIiE4MjeA#list/path=%2FShare%2FAAAI18%2Fkinetics-skeleton&parentPath=%2FShare). 122 | 123 | After uncompressing, rebuild the database by this command: 124 | ``` 125 | python tools/kinetics_gendata.py --data_path 126 | ``` 127 | 128 | #### NTU RGB+D 129 | NTU RGB+D can be downloaded from [their website](http://rose1.ntu.edu.sg/datasets/actionrecognition.asp). 130 | Only the **3D skeletons**(5.8GB) modality is required in our experiments. After that, this command should be used to build the database for training or evaluation: 131 | ``` 132 | python tools/ntu_gendata.py --data_path 133 | ``` 134 | where the `````` points to the 3D skeletons modality of NTU RGB+D dataset you download. 135 | 136 | 137 | ## Testing Pretrained Models 138 | 139 | 141 | 142 | To evaluate ST-GCN model pretrained on **Kinetcis-skeleton**, run 143 | ``` 144 | python main.py recognition -c config/st_gcn/kinetics-skeleton/test.yaml 145 | ``` 146 | For **cross-view** evaluation in **NTU RGB+D**, run 147 | ``` 148 | python main.py recognition -c config/st_gcn/ntu-xview/test.yaml 149 | ``` 150 | For **cross-subject** evaluation in **NTU RGB+D**, run 151 | ``` 152 | python main.py recognition -c config/st_gcn/ntu-xsub/test.yaml 153 | ``` 154 | 155 | 156 | 157 | To speed up evaluation by multi-gpu inference or modify batch size for reducing the memory cost, set ```--test_batch_size``` and ```--device``` like: 158 | ``` 159 | python main.py recognition -c --test_batch_size --device ... 160 | ``` 161 | 162 | ### Results 163 | The expected **Top-1** **accuracy** of provided models are shown here: 164 | 165 | | Model| Kinetics-
skeleton (%)|NTU RGB+D
Cross View (%) |NTU RGB+D
Cross Subject (%) | 166 | | :------| :------: | :------: | :------: | 167 | |Baseline[1]| 20.3 | 83.1 | 74.3 | 168 | |**ST-GCN** (Ours)| **31.6**| **88.8** | **81.6** | 169 | 170 | [1] Kim, T. S., and Reiter, A. 2017. Interpretable 3d human action analysis with temporal convolutional networks. In BNMW CVPRW. 171 | 172 | ## Training 173 | To train a new ST-GCN model, run 174 | 175 | ``` 176 | python main.py recognition -c config/st_gcn//train.yaml [--work_dir ] 177 | ``` 178 | where the `````` must be ```ntu-xsub```, ```ntu-xview``` or ```kinetics-skeleton```, depending on the dataset you want to use. 179 | The training results, including **model weights**, configurations and logging files, will be saved under the ```./work_dir``` by default or `````` if you appoint it. 180 | 181 | You can modify the training parameters such as ```work_dir```, ```batch_size```, ```step```, ```base_lr``` and ```device``` in the command line or configuration files. The order of priority is: command line > config file > default parameter. For more information, use ```main.py -h```. 182 | 183 | Finally, custom model evaluation can be achieved by this command as we mentioned above: 184 | ``` 185 | python main.py recognition -c config/st_gcn//test.yaml --weights 186 | ``` 187 | 188 | ## Citation 189 | Please cite the following paper if you use this repository in your reseach. 190 | ``` 191 | @inproceedings{stgcn2018aaai, 192 | title = {Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition}, 193 | author = {Sijie Yan and Yuanjun Xiong and Dahua Lin}, 194 | booktitle = {AAAI}, 195 | year = {2018}, 196 | } 197 | ``` 198 | 199 | ## Contact 200 | For any question, feel free to contact 201 | ``` 202 | Sijie Yan : ys016@ie.cuhk.edu.hk 203 | Yuanjun Xiong : bitxiong@gmail.com 204 | ``` 205 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Reminder 2 | 3 | ST-GCN has transferred to [MMSkeleton](https://github.com/open-mmlab/mmskeleton), 4 | and keep on developing as an flexible open source toolbox for skeleton-based human understanding. 5 | You are welcome to migrate to new MMSkeleton. 6 | Custom networks, data loaders and checkpoints of old st-gcn are compatible with MMSkeleton. 7 | If you want to use old ST-GCN, please refer to [OLD_README.md](./OLD_README.md). 8 | 9 | This code base will soon be not maintained and exists as a historical artifact to supplement our AAAI papers on: 10 | 11 | > **Spatial Temporal Graph Convolutional Networks for Skeleton-Based Action Recognition**, Sijie Yan, Yuanjun Xiong and Dahua Lin, AAAI 2018. [[Arxiv Preprint]](https://arxiv.org/abs/1801.07455) 12 | 13 | For more recent works please checkout MMSkeleton. 14 | 15 | -------------------------------------------------------------------------------- /config/st_gcn.twostream/ntu-xsub/train.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/recognition/ntu-xsub/ST_GCN_TWO_STREAM 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/NTU-RGB-D/xsub/train_data.npy 7 | label_path: ./data/NTU-RGB-D/xsub/train_label.pkl 8 | test_feeder_args: 9 | data_path: ./data/NTU-RGB-D/xsub/val_data.npy 10 | label_path: ./data/NTU-RGB-D/xsub/val_label.pkl 11 | 12 | # model 13 | model: net.st_gcn_twostream.Model 14 | model_args: 15 | in_channels: 3 16 | num_class: 60 17 | dropout: 0.5 18 | edge_importance_weighting: True 19 | graph_args: 20 | layout: 'ntu-rgb+d' 21 | strategy: 'spatial' 22 | 23 | #optim 24 | weight_decay: 0.0001 25 | base_lr: 0.1 26 | step: [10, 50] 27 | 28 | # training 29 | device: [0,1,2,3] 30 | batch_size: 32 31 | test_batch_size: 32 32 | num_epoch: 80 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /config/st_gcn.twostream/ntu-xview/train.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/recognition/ntu-xview/ST_GCN_TWO_STREAM 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/NTU-RGB-D/xview/train_data.npy 7 | label_path: ./data/NTU-RGB-D/xview/train_label.pkl 8 | test_feeder_args: 9 | data_path: ./data/NTU-RGB-D/xview/val_data.npy 10 | label_path: ./data/NTU-RGB-D/xview/val_label.pkl 11 | 12 | # model 13 | model: net.st_gcn_twostream.Model 14 | model_args: 15 | in_channels: 3 16 | num_class: 60 17 | dropout: 0.5 18 | edge_importance_weighting: True 19 | graph_args: 20 | layout: 'ntu-rgb+d' 21 | strategy: 'spatial' 22 | 23 | #optim 24 | weight_decay: 0.0001 25 | base_lr: 0.1 26 | step: [10, 50] 27 | 28 | # training 29 | device: [0,1,2,3] 30 | batch_size: 32 31 | test_batch_size: 32 32 | num_epoch: 80 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /config/st_gcn/kinetics-skeleton/demo_offline.yaml: -------------------------------------------------------------------------------- 1 | weights: ./models/st_gcn.kinetics.pt 2 | model_fps: 30 3 | 4 | # model 5 | model: net.st_gcn.Model 6 | model_args: 7 | in_channels: 3 8 | num_class: 400 9 | edge_importance_weighting: True 10 | graph_args: 11 | layout: 'openpose' 12 | strategy: 'spatial' 13 | 14 | # training 15 | device: [0] -------------------------------------------------------------------------------- /config/st_gcn/kinetics-skeleton/demo_old.yaml: -------------------------------------------------------------------------------- 1 | weights: ./models/st_gcn.kinetics.pt 2 | 3 | # model 4 | model: net.st_gcn.Model 5 | model_args: 6 | in_channels: 3 7 | num_class: 400 8 | edge_importance_weighting: True 9 | graph_args: 10 | layout: 'openpose' 11 | strategy: 'spatial' 12 | 13 | # training 14 | device: [0] -------------------------------------------------------------------------------- /config/st_gcn/kinetics-skeleton/demo_realtime.yaml: -------------------------------------------------------------------------------- 1 | weights: ./models/st_gcn.kinetics.pt 2 | model_fps: 30 3 | 4 | # model 5 | model: net.st_gcn.Model 6 | model_args: 7 | in_channels: 3 8 | num_class: 400 9 | edge_importance_weighting: True 10 | graph_args: 11 | layout: 'openpose' 12 | strategy: 'spatial' 13 | 14 | # training 15 | device: [0] -------------------------------------------------------------------------------- /config/st_gcn/kinetics-skeleton/test.yaml: -------------------------------------------------------------------------------- 1 | weights: ./models/st_gcn.kinetics.pt 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | test_feeder_args: 6 | data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy 7 | label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl 8 | 9 | # model 10 | model: net.st_gcn.Model 11 | model_args: 12 | in_channels: 3 13 | num_class: 400 14 | edge_importance_weighting: True 15 | graph_args: 16 | layout: 'openpose' 17 | strategy: 'spatial' 18 | 19 | # test 20 | phase: test 21 | device: 0 22 | test_batch_size: 64 23 | -------------------------------------------------------------------------------- /config/st_gcn/kinetics-skeleton/train.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/recognition/kinetics_skeleton/ST_GCN 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | train_feeder_args: 6 | random_choose: True 7 | random_move: True 8 | window_size: 150 9 | data_path: ./data/Kinetics/kinetics-skeleton/train_data.npy 10 | label_path: ./data/Kinetics/kinetics-skeleton/train_label.pkl 11 | test_feeder_args: 12 | data_path: ./data/Kinetics/kinetics-skeleton/val_data.npy 13 | label_path: ./data/Kinetics/kinetics-skeleton/val_label.pkl 14 | 15 | # model 16 | model: net.st_gcn.Model 17 | model_args: 18 | in_channels: 3 19 | num_class: 400 20 | edge_importance_weighting: True 21 | graph_args: 22 | layout: 'openpose' 23 | strategy: 'spatial' 24 | 25 | # training 26 | device: [0,1,2,3] 27 | batch_size: 256 28 | test_batch_size: 256 29 | 30 | #optim 31 | base_lr: 0.1 32 | step: [20, 30, 40, 50] 33 | num_epoch: 50 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /config/st_gcn/ntu-xsub/test.yaml: -------------------------------------------------------------------------------- 1 | weights: ./models/st_gcn.ntu-xsub.pt 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | test_feeder_args: 6 | data_path: ./data/NTU-RGB-D/xsub/val_data.npy 7 | label_path: ./data/NTU-RGB-D/xsub/val_label.pkl 8 | 9 | # model 10 | model: net.st_gcn.Model 11 | model_args: 12 | in_channels: 3 13 | num_class: 60 14 | dropout: 0.5 15 | edge_importance_weighting: True 16 | graph_args: 17 | layout: 'ntu-rgb+d' 18 | strategy: 'spatial' 19 | 20 | # test 21 | phase: test 22 | device: 0 23 | test_batch_size: 64 24 | 25 | -------------------------------------------------------------------------------- /config/st_gcn/ntu-xsub/train.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/recognition/ntu-xsub/ST_GCN 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/NTU-RGB-D/xsub/train_data.npy 7 | label_path: ./data/NTU-RGB-D/xsub/train_label.pkl 8 | test_feeder_args: 9 | data_path: ./data/NTU-RGB-D/xsub/val_data.npy 10 | label_path: ./data/NTU-RGB-D/xsub/val_label.pkl 11 | 12 | # model 13 | model: net.st_gcn.Model 14 | model_args: 15 | in_channels: 3 16 | num_class: 60 17 | dropout: 0.5 18 | edge_importance_weighting: True 19 | graph_args: 20 | layout: 'ntu-rgb+d' 21 | strategy: 'spatial' 22 | 23 | #optim 24 | weight_decay: 0.0001 25 | base_lr: 0.1 26 | step: [10, 50] 27 | 28 | # training 29 | device: [0,1,2,3] 30 | batch_size: 64 31 | test_batch_size: 64 32 | num_epoch: 80 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /config/st_gcn/ntu-xview/test.yaml: -------------------------------------------------------------------------------- 1 | weights: ./models/st_gcn.ntu-xview.pt 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | test_feeder_args: 6 | data_path: ./data/NTU-RGB-D/xview/val_data.npy 7 | label_path: ./data/NTU-RGB-D/xview/val_label.pkl 8 | 9 | # model 10 | model: net.st_gcn.Model 11 | model_args: 12 | in_channels: 3 13 | num_class: 60 14 | dropout: 0.5 15 | edge_importance_weighting: True 16 | graph_args: 17 | layout: 'ntu-rgb+d' 18 | strategy: 'spatial' 19 | 20 | # test 21 | phase: test 22 | device: 0 23 | test_batch_size: 64 24 | 25 | -------------------------------------------------------------------------------- /config/st_gcn/ntu-xview/train.yaml: -------------------------------------------------------------------------------- 1 | work_dir: ./work_dir/recognition/ntu-xview/ST_GCN 2 | 3 | # feeder 4 | feeder: feeder.feeder.Feeder 5 | train_feeder_args: 6 | data_path: ./data/NTU-RGB-D/xview/train_data.npy 7 | label_path: ./data/NTU-RGB-D/xview/train_label.pkl 8 | test_feeder_args: 9 | data_path: ./data/NTU-RGB-D/xview/val_data.npy 10 | label_path: ./data/NTU-RGB-D/xview/val_label.pkl 11 | 12 | # model 13 | model: net.st_gcn.Model 14 | model_args: 15 | in_channels: 3 16 | num_class: 60 17 | dropout: 0.5 18 | edge_importance_weighting: True 19 | graph_args: 20 | layout: 'ntu-rgb+d' 21 | strategy: 'spatial' 22 | 23 | #optim 24 | weight_decay: 0.0001 25 | base_lr: 0.1 26 | step: [10, 50] 27 | 28 | # training 29 | device: [0,1,2,3] 30 | batch_size: 64 31 | test_batch_size: 64 32 | num_epoch: 80 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /feeder/__init__.py: -------------------------------------------------------------------------------- 1 | from . import tools -------------------------------------------------------------------------------- /feeder/feeder.py: -------------------------------------------------------------------------------- 1 | # sys 2 | import os 3 | import sys 4 | import numpy as np 5 | import random 6 | import pickle 7 | 8 | # torch 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torch.nn.functional as F 13 | from torchvision import datasets, transforms 14 | 15 | # visualization 16 | import time 17 | 18 | # operation 19 | from . import tools 20 | 21 | class Feeder(torch.utils.data.Dataset): 22 | """ Feeder for skeleton-based action recognition 23 | Arguments: 24 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 25 | label_path: the path to label 26 | random_choose: If true, randomly choose a portion of the input sequence 27 | random_shift: If true, randomly pad zeros at the begining or end of sequence 28 | window_size: The length of the output sequence 29 | normalization: If true, normalize input sequence 30 | debug: If true, only use the first 100 samples 31 | """ 32 | 33 | def __init__(self, 34 | data_path, 35 | label_path, 36 | random_choose=False, 37 | random_move=False, 38 | window_size=-1, 39 | debug=False, 40 | mmap=True): 41 | self.debug = debug 42 | self.data_path = data_path 43 | self.label_path = label_path 44 | self.random_choose = random_choose 45 | self.random_move = random_move 46 | self.window_size = window_size 47 | 48 | self.load_data(mmap) 49 | 50 | def load_data(self, mmap): 51 | # data: N C V T M 52 | 53 | # load label 54 | with open(self.label_path, 'rb') as f: 55 | self.sample_name, self.label = pickle.load(f) 56 | 57 | # load data 58 | if mmap: 59 | self.data = np.load(self.data_path, mmap_mode='r') 60 | else: 61 | self.data = np.load(self.data_path) 62 | 63 | if self.debug: 64 | self.label = self.label[0:100] 65 | self.data = self.data[0:100] 66 | self.sample_name = self.sample_name[0:100] 67 | 68 | self.N, self.C, self.T, self.V, self.M = self.data.shape 69 | 70 | def __len__(self): 71 | return len(self.label) 72 | 73 | def __getitem__(self, index): 74 | # get data 75 | data_numpy = np.array(self.data[index]) 76 | label = self.label[index] 77 | 78 | # processing 79 | if self.random_choose: 80 | data_numpy = tools.random_choose(data_numpy, self.window_size) 81 | elif self.window_size > 0: 82 | data_numpy = tools.auto_pading(data_numpy, self.window_size) 83 | if self.random_move: 84 | data_numpy = tools.random_move(data_numpy) 85 | 86 | return data_numpy, label -------------------------------------------------------------------------------- /feeder/feeder_kinetics.py: -------------------------------------------------------------------------------- 1 | # sys 2 | import os 3 | import sys 4 | import numpy as np 5 | import random 6 | import pickle 7 | import json 8 | # torch 9 | import torch 10 | import torch.nn as nn 11 | from torchvision import datasets, transforms 12 | 13 | # operation 14 | from . import tools 15 | 16 | 17 | class Feeder_kinetics(torch.utils.data.Dataset): 18 | """ Feeder for skeleton-based action recognition in kinetics-skeleton dataset 19 | Arguments: 20 | data_path: the path to '.npy' data, the shape of data should be (N, C, T, V, M) 21 | label_path: the path to label 22 | random_choose: If true, randomly choose a portion of the input sequence 23 | random_shift: If true, randomly pad zeros at the begining or end of sequence 24 | random_move: If true, perform randomly but continuously changed transformation to input sequence 25 | window_size: The length of the output sequence 26 | pose_matching: If ture, match the pose between two frames 27 | num_person_in: The number of people the feeder can observe in the input sequence 28 | num_person_out: The number of people the feeder in the output sequence 29 | debug: If true, only use the first 100 samples 30 | """ 31 | 32 | def __init__(self, 33 | data_path, 34 | label_path, 35 | ignore_empty_sample=True, 36 | random_choose=False, 37 | random_shift=False, 38 | random_move=False, 39 | window_size=-1, 40 | pose_matching=False, 41 | num_person_in=5, 42 | num_person_out=2, 43 | debug=False): 44 | self.debug = debug 45 | self.data_path = data_path 46 | self.label_path = label_path 47 | self.random_choose = random_choose 48 | self.random_shift = random_shift 49 | self.random_move = random_move 50 | self.window_size = window_size 51 | self.num_person_in = num_person_in 52 | self.num_person_out = num_person_out 53 | self.pose_matching = pose_matching 54 | self.ignore_empty_sample = ignore_empty_sample 55 | 56 | self.load_data() 57 | 58 | def load_data(self): 59 | # load file list 60 | self.sample_name = os.listdir(self.data_path) 61 | 62 | if self.debug: 63 | self.sample_name = self.sample_name[0:2] 64 | 65 | # load label 66 | label_path = self.label_path 67 | with open(label_path) as f: 68 | label_info = json.load(f) 69 | 70 | sample_id = [name.split('.')[0] for name in self.sample_name] 71 | self.label = np.array( 72 | [label_info[id]['label_index'] for id in sample_id]) 73 | has_skeleton = np.array( 74 | [label_info[id]['has_skeleton'] for id in sample_id]) 75 | 76 | # ignore the samples which does not has skeleton sequence 77 | if self.ignore_empty_sample: 78 | self.sample_name = [ 79 | s for h, s in zip(has_skeleton, self.sample_name) if h 80 | ] 81 | self.label = self.label[has_skeleton] 82 | 83 | # output data shape (N, C, T, V, M) 84 | self.N = len(self.sample_name) #sample 85 | self.C = 3 #channel 86 | self.T = 300 #frame 87 | self.V = 18 #joint 88 | self.M = self.num_person_out #person 89 | 90 | def __len__(self): 91 | return len(self.sample_name) 92 | 93 | def __iter__(self): 94 | return self 95 | 96 | def __getitem__(self, index): 97 | 98 | # output shape (C, T, V, M) 99 | # get data 100 | sample_name = self.sample_name[index] 101 | sample_path = os.path.join(self.data_path, sample_name) 102 | with open(sample_path, 'r') as f: 103 | video_info = json.load(f) 104 | 105 | # fill data_numpy 106 | data_numpy = np.zeros((self.C, self.T, self.V, self.num_person_in)) 107 | for frame_info in video_info['data']: 108 | frame_index = frame_info['frame_index'] 109 | for m, skeleton_info in enumerate(frame_info["skeleton"]): 110 | if m >= self.num_person_in: 111 | break 112 | pose = skeleton_info['pose'] 113 | score = skeleton_info['score'] 114 | data_numpy[0, frame_index, :, m] = pose[0::2] 115 | data_numpy[1, frame_index, :, m] = pose[1::2] 116 | data_numpy[2, frame_index, :, m] = score 117 | 118 | # centralization 119 | data_numpy[0:2] = data_numpy[0:2] - 0.5 120 | data_numpy[0][data_numpy[2] == 0] = 0 121 | data_numpy[1][data_numpy[2] == 0] = 0 122 | 123 | # get & check label index 124 | label = video_info['label_index'] 125 | assert (self.label[index] == label) 126 | 127 | # data augmentation 128 | if self.random_shift: 129 | data_numpy = tools.random_shift(data_numpy) 130 | if self.random_choose: 131 | data_numpy = tools.random_choose(data_numpy, self.window_size) 132 | elif self.window_size > 0: 133 | data_numpy = tools.auto_pading(data_numpy, self.window_size) 134 | if self.random_move: 135 | data_numpy = tools.random_move(data_numpy) 136 | 137 | # sort by score 138 | sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1) 139 | for t, s in enumerate(sort_index): 140 | data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2, 141 | 0)) 142 | data_numpy = data_numpy[:, :, :, 0:self.num_person_out] 143 | 144 | # match poses between 2 frames 145 | if self.pose_matching: 146 | data_numpy = tools.openpose_match(data_numpy) 147 | 148 | return data_numpy, label 149 | 150 | def top_k(self, score, top_k): 151 | assert (all(self.label >= 0)) 152 | 153 | rank = score.argsort() 154 | hit_top_k = [l in rank[i, -top_k:] for i, l in enumerate(self.label)] 155 | return sum(hit_top_k) * 1.0 / len(hit_top_k) 156 | 157 | def top_k_by_category(self, score, top_k): 158 | assert (all(self.label >= 0)) 159 | return tools.top_k_by_category(self.label, score, top_k) 160 | 161 | def calculate_recall_precision(self, score): 162 | assert (all(self.label >= 0)) 163 | return tools.calculate_recall_precision(self.label, score) 164 | -------------------------------------------------------------------------------- /feeder/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | 5 | def downsample(data_numpy, step, random_sample=True): 6 | # input: C,T,V,M 7 | begin = np.random.randint(step) if random_sample else 0 8 | return data_numpy[:, begin::step, :, :] 9 | 10 | 11 | def temporal_slice(data_numpy, step): 12 | # input: C,T,V,M 13 | C, T, V, M = data_numpy.shape 14 | return data_numpy.reshape(C, T / step, step, V, M).transpose( 15 | (0, 1, 3, 2, 4)).reshape(C, T / step, V, step * M) 16 | 17 | 18 | def mean_subtractor(data_numpy, mean): 19 | # input: C,T,V,M 20 | # naive version 21 | if mean == 0: 22 | return 23 | C, T, V, M = data_numpy.shape 24 | valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0 25 | begin = valid_frame.argmax() 26 | end = len(valid_frame) - valid_frame[::-1].argmax() 27 | data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean 28 | return data_numpy 29 | 30 | 31 | def auto_pading(data_numpy, size, random_pad=False): 32 | C, T, V, M = data_numpy.shape 33 | if T < size: 34 | begin = random.randint(0, size - T) if random_pad else 0 35 | data_numpy_paded = np.zeros((C, size, V, M)) 36 | data_numpy_paded[:, begin:begin + T, :, :] = data_numpy 37 | return data_numpy_paded 38 | else: 39 | return data_numpy 40 | 41 | 42 | def random_choose(data_numpy, size, auto_pad=True): 43 | # input: C,T,V,M 44 | C, T, V, M = data_numpy.shape 45 | if T == size: 46 | return data_numpy 47 | elif T < size: 48 | if auto_pad: 49 | return auto_pading(data_numpy, size, random_pad=True) 50 | else: 51 | return data_numpy 52 | else: 53 | begin = random.randint(0, T - size) 54 | return data_numpy[:, begin:begin + size, :, :] 55 | 56 | 57 | def random_move(data_numpy, 58 | angle_candidate=[-10., -5., 0., 5., 10.], 59 | scale_candidate=[0.9, 1.0, 1.1], 60 | transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2], 61 | move_time_candidate=[1]): 62 | # input: C,T,V,M 63 | C, T, V, M = data_numpy.shape 64 | move_time = random.choice(move_time_candidate) 65 | node = np.arange(0, T, T * 1.0 / move_time).round().astype(int) 66 | node = np.append(node, T) 67 | num_node = len(node) 68 | 69 | A = np.random.choice(angle_candidate, num_node) 70 | S = np.random.choice(scale_candidate, num_node) 71 | T_x = np.random.choice(transform_candidate, num_node) 72 | T_y = np.random.choice(transform_candidate, num_node) 73 | 74 | a = np.zeros(T) 75 | s = np.zeros(T) 76 | t_x = np.zeros(T) 77 | t_y = np.zeros(T) 78 | 79 | # linspace 80 | for i in range(num_node - 1): 81 | a[node[i]:node[i + 1]] = np.linspace( 82 | A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180 83 | s[node[i]:node[i + 1]] = np.linspace(S[i], S[i + 1], 84 | node[i + 1] - node[i]) 85 | t_x[node[i]:node[i + 1]] = np.linspace(T_x[i], T_x[i + 1], 86 | node[i + 1] - node[i]) 87 | t_y[node[i]:node[i + 1]] = np.linspace(T_y[i], T_y[i + 1], 88 | node[i + 1] - node[i]) 89 | 90 | theta = np.array([[np.cos(a) * s, -np.sin(a) * s], 91 | [np.sin(a) * s, np.cos(a) * s]]) 92 | 93 | # perform transformation 94 | for i_frame in range(T): 95 | xy = data_numpy[0:2, i_frame, :, :] 96 | new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1)) 97 | new_xy[0] += t_x[i_frame] 98 | new_xy[1] += t_y[i_frame] 99 | data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M) 100 | 101 | return data_numpy 102 | 103 | 104 | def random_shift(data_numpy): 105 | # input: C,T,V,M 106 | C, T, V, M = data_numpy.shape 107 | data_shift = np.zeros(data_numpy.shape) 108 | valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0 109 | begin = valid_frame.argmax() 110 | end = len(valid_frame) - valid_frame[::-1].argmax() 111 | 112 | size = end - begin 113 | bias = random.randint(0, T - size) 114 | data_shift[:, bias:bias + size, :, :] = data_numpy[:, begin:end, :, :] 115 | 116 | return data_shift 117 | 118 | 119 | def openpose_match(data_numpy): 120 | C, T, V, M = data_numpy.shape 121 | assert (C == 3) 122 | score = data_numpy[2, :, :, :].sum(axis=1) 123 | # the rank of body confidence in each frame (shape: T-1, M) 124 | rank = (-score[0:T - 1]).argsort(axis=1).reshape(T - 1, M) 125 | 126 | # data of frame 1 127 | xy1 = data_numpy[0:2, 0:T - 1, :, :].reshape(2, T - 1, V, M, 1) 128 | # data of frame 2 129 | xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M) 130 | # square of distance between frame 1&2 (shape: T-1, M, M) 131 | distance = ((xy2 - xy1)**2).sum(axis=2).sum(axis=0) 132 | 133 | # match pose 134 | forward_map = np.zeros((T, M), dtype=int) - 1 135 | forward_map[0] = range(M) 136 | for m in range(M): 137 | choose = (rank == m) 138 | forward = distance[choose].argmin(axis=1) 139 | for t in range(T - 1): 140 | distance[t, :, forward[t]] = np.inf 141 | forward_map[1:][choose] = forward 142 | assert (np.all(forward_map >= 0)) 143 | 144 | # string data 145 | for t in range(T - 1): 146 | forward_map[t + 1] = forward_map[t + 1][forward_map[t]] 147 | 148 | # generate data 149 | new_data_numpy = np.zeros(data_numpy.shape) 150 | for t in range(T): 151 | new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[ 152 | t]].transpose(1, 2, 0) 153 | data_numpy = new_data_numpy 154 | 155 | # score sort 156 | trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0) 157 | rank = (-trace_score).argsort() 158 | data_numpy = data_numpy[:, :, :, rank] 159 | 160 | return data_numpy 161 | 162 | 163 | def top_k_by_category(label, score, top_k): 164 | instance_num, class_num = score.shape 165 | rank = score.argsort() 166 | hit_top_k = [[] for i in range(class_num)] 167 | for i in range(instance_num): 168 | l = label[i] 169 | hit_top_k[l].append(l in rank[i, -top_k:]) 170 | 171 | accuracy_list = [] 172 | for hit_per_category in hit_top_k: 173 | if hit_per_category: 174 | accuracy_list.append(sum(hit_per_category) * 1.0 / len(hit_per_category)) 175 | else: 176 | accuracy_list.append(0.0) 177 | return accuracy_list 178 | 179 | 180 | def calculate_recall_precision(label, score): 181 | instance_num, class_num = score.shape 182 | rank = score.argsort() 183 | confusion_matrix = np.zeros([class_num, class_num]) 184 | 185 | for i in range(instance_num): 186 | true_l = label[i] 187 | pred_l = rank[i, -1] 188 | confusion_matrix[true_l][pred_l] += 1 189 | 190 | precision = [] 191 | recall = [] 192 | 193 | for i in range(class_num): 194 | true_p = confusion_matrix[i][i] 195 | false_n = sum(confusion_matrix[i, :]) - true_p 196 | false_p = sum(confusion_matrix[:, i]) - true_p 197 | precision.append(true_p * 1.0 / (true_p + false_p)) 198 | recall.append(true_p * 1.0 / (true_p + false_n)) 199 | 200 | return precision, recall -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import sys 4 | 5 | # torchlight 6 | import torchlight 7 | from torchlight import import_class 8 | 9 | if __name__ == '__main__': 10 | 11 | parser = argparse.ArgumentParser(description='Processor collection') 12 | 13 | # region register processor yapf: disable 14 | processors = dict() 15 | processors['recognition'] = import_class('processor.recognition.REC_Processor') 16 | processors['demo_old'] = import_class('processor.demo_old.Demo') 17 | processors['demo'] = import_class('processor.demo_realtime.DemoRealtime') 18 | processors['demo_offline'] = import_class('processor.demo_offline.DemoOffline') 19 | #endregion yapf: enable 20 | 21 | # add sub-parser 22 | subparsers = parser.add_subparsers(dest='processor') 23 | for k, p in processors.items(): 24 | subparsers.add_parser(k, parents=[p.get_parser()]) 25 | 26 | # read arguments 27 | arg = parser.parse_args() 28 | 29 | # start 30 | Processor = processors[arg.processor] 31 | p = Processor(sys.argv[2:]) 32 | 33 | p.start() 34 | -------------------------------------------------------------------------------- /models/pose/coco/pose_deploy_linevec.prototxt: -------------------------------------------------------------------------------- 1 | input: "image" 2 | input_dim: 1 3 | input_dim: 3 4 | input_dim: 1 # This value will be defined at runtime 5 | input_dim: 1 # This value will be defined at runtime 6 | layer { 7 | name: "conv1_1" 8 | type: "Convolution" 9 | bottom: "image" 10 | top: "conv1_1" 11 | param { 12 | lr_mult: 1.0 13 | decay_mult: 1 14 | } 15 | param { 16 | lr_mult: 2.0 17 | decay_mult: 0 18 | } 19 | convolution_param { 20 | num_output: 64 21 | pad: 1 22 | kernel_size: 3 23 | weight_filler { 24 | type: "gaussian" 25 | std: 0.01 26 | } 27 | bias_filler { 28 | type: "constant" 29 | } 30 | } 31 | } 32 | layer { 33 | name: "relu1_1" 34 | type: "ReLU" 35 | bottom: "conv1_1" 36 | top: "conv1_1" 37 | } 38 | layer { 39 | name: "conv1_2" 40 | type: "Convolution" 41 | bottom: "conv1_1" 42 | top: "conv1_2" 43 | param { 44 | lr_mult: 1.0 45 | decay_mult: 1 46 | } 47 | param { 48 | lr_mult: 2.0 49 | decay_mult: 0 50 | } 51 | convolution_param { 52 | num_output: 64 53 | pad: 1 54 | kernel_size: 3 55 | weight_filler { 56 | type: "gaussian" 57 | std: 0.01 58 | } 59 | bias_filler { 60 | type: "constant" 61 | } 62 | } 63 | } 64 | layer { 65 | name: "relu1_2" 66 | type: "ReLU" 67 | bottom: "conv1_2" 68 | top: "conv1_2" 69 | } 70 | layer { 71 | name: "pool1_stage1" 72 | type: "Pooling" 73 | bottom: "conv1_2" 74 | top: "pool1_stage1" 75 | pooling_param { 76 | pool: MAX 77 | kernel_size: 2 78 | stride: 2 79 | } 80 | } 81 | layer { 82 | name: "conv2_1" 83 | type: "Convolution" 84 | bottom: "pool1_stage1" 85 | top: "conv2_1" 86 | param { 87 | lr_mult: 1.0 88 | decay_mult: 1 89 | } 90 | param { 91 | lr_mult: 2.0 92 | decay_mult: 0 93 | } 94 | convolution_param { 95 | num_output: 128 96 | pad: 1 97 | kernel_size: 3 98 | weight_filler { 99 | type: "gaussian" 100 | std: 0.01 101 | } 102 | bias_filler { 103 | type: "constant" 104 | } 105 | } 106 | } 107 | layer { 108 | name: "relu2_1" 109 | type: "ReLU" 110 | bottom: "conv2_1" 111 | top: "conv2_1" 112 | } 113 | layer { 114 | name: "conv2_2" 115 | type: "Convolution" 116 | bottom: "conv2_1" 117 | top: "conv2_2" 118 | param { 119 | lr_mult: 1.0 120 | decay_mult: 1 121 | } 122 | param { 123 | lr_mult: 2.0 124 | decay_mult: 0 125 | } 126 | convolution_param { 127 | num_output: 128 128 | pad: 1 129 | kernel_size: 3 130 | weight_filler { 131 | type: "gaussian" 132 | std: 0.01 133 | } 134 | bias_filler { 135 | type: "constant" 136 | } 137 | } 138 | } 139 | layer { 140 | name: "relu2_2" 141 | type: "ReLU" 142 | bottom: "conv2_2" 143 | top: "conv2_2" 144 | } 145 | layer { 146 | name: "pool2_stage1" 147 | type: "Pooling" 148 | bottom: "conv2_2" 149 | top: "pool2_stage1" 150 | pooling_param { 151 | pool: MAX 152 | kernel_size: 2 153 | stride: 2 154 | } 155 | } 156 | layer { 157 | name: "conv3_1" 158 | type: "Convolution" 159 | bottom: "pool2_stage1" 160 | top: "conv3_1" 161 | param { 162 | lr_mult: 1.0 163 | decay_mult: 1 164 | } 165 | param { 166 | lr_mult: 2.0 167 | decay_mult: 0 168 | } 169 | convolution_param { 170 | num_output: 256 171 | pad: 1 172 | kernel_size: 3 173 | weight_filler { 174 | type: "gaussian" 175 | std: 0.01 176 | } 177 | bias_filler { 178 | type: "constant" 179 | } 180 | } 181 | } 182 | layer { 183 | name: "relu3_1" 184 | type: "ReLU" 185 | bottom: "conv3_1" 186 | top: "conv3_1" 187 | } 188 | layer { 189 | name: "conv3_2" 190 | type: "Convolution" 191 | bottom: "conv3_1" 192 | top: "conv3_2" 193 | param { 194 | lr_mult: 1.0 195 | decay_mult: 1 196 | } 197 | param { 198 | lr_mult: 2.0 199 | decay_mult: 0 200 | } 201 | convolution_param { 202 | num_output: 256 203 | pad: 1 204 | kernel_size: 3 205 | weight_filler { 206 | type: "gaussian" 207 | std: 0.01 208 | } 209 | bias_filler { 210 | type: "constant" 211 | } 212 | } 213 | } 214 | layer { 215 | name: "relu3_2" 216 | type: "ReLU" 217 | bottom: "conv3_2" 218 | top: "conv3_2" 219 | } 220 | layer { 221 | name: "conv3_3" 222 | type: "Convolution" 223 | bottom: "conv3_2" 224 | top: "conv3_3" 225 | param { 226 | lr_mult: 1.0 227 | decay_mult: 1 228 | } 229 | param { 230 | lr_mult: 2.0 231 | decay_mult: 0 232 | } 233 | convolution_param { 234 | num_output: 256 235 | pad: 1 236 | kernel_size: 3 237 | weight_filler { 238 | type: "gaussian" 239 | std: 0.01 240 | } 241 | bias_filler { 242 | type: "constant" 243 | } 244 | } 245 | } 246 | layer { 247 | name: "relu3_3" 248 | type: "ReLU" 249 | bottom: "conv3_3" 250 | top: "conv3_3" 251 | } 252 | layer { 253 | name: "conv3_4" 254 | type: "Convolution" 255 | bottom: "conv3_3" 256 | top: "conv3_4" 257 | param { 258 | lr_mult: 1.0 259 | decay_mult: 1 260 | } 261 | param { 262 | lr_mult: 2.0 263 | decay_mult: 0 264 | } 265 | convolution_param { 266 | num_output: 256 267 | pad: 1 268 | kernel_size: 3 269 | weight_filler { 270 | type: "gaussian" 271 | std: 0.01 272 | } 273 | bias_filler { 274 | type: "constant" 275 | } 276 | } 277 | } 278 | layer { 279 | name: "relu3_4" 280 | type: "ReLU" 281 | bottom: "conv3_4" 282 | top: "conv3_4" 283 | } 284 | layer { 285 | name: "pool3_stage1" 286 | type: "Pooling" 287 | bottom: "conv3_4" 288 | top: "pool3_stage1" 289 | pooling_param { 290 | pool: MAX 291 | kernel_size: 2 292 | stride: 2 293 | } 294 | } 295 | layer { 296 | name: "conv4_1" 297 | type: "Convolution" 298 | bottom: "pool3_stage1" 299 | top: "conv4_1" 300 | param { 301 | lr_mult: 1.0 302 | decay_mult: 1 303 | } 304 | param { 305 | lr_mult: 2.0 306 | decay_mult: 0 307 | } 308 | convolution_param { 309 | num_output: 512 310 | pad: 1 311 | kernel_size: 3 312 | weight_filler { 313 | type: "gaussian" 314 | std: 0.01 315 | } 316 | bias_filler { 317 | type: "constant" 318 | } 319 | } 320 | } 321 | layer { 322 | name: "relu4_1" 323 | type: "ReLU" 324 | bottom: "conv4_1" 325 | top: "conv4_1" 326 | } 327 | layer { 328 | name: "conv4_2" 329 | type: "Convolution" 330 | bottom: "conv4_1" 331 | top: "conv4_2" 332 | param { 333 | lr_mult: 1.0 334 | decay_mult: 1 335 | } 336 | param { 337 | lr_mult: 2.0 338 | decay_mult: 0 339 | } 340 | convolution_param { 341 | num_output: 512 342 | pad: 1 343 | kernel_size: 3 344 | weight_filler { 345 | type: "gaussian" 346 | std: 0.01 347 | } 348 | bias_filler { 349 | type: "constant" 350 | } 351 | } 352 | } 353 | layer { 354 | name: "relu4_2" 355 | type: "ReLU" 356 | bottom: "conv4_2" 357 | top: "conv4_2" 358 | } 359 | layer { 360 | name: "conv4_3_CPM" 361 | type: "Convolution" 362 | bottom: "conv4_2" 363 | top: "conv4_3_CPM" 364 | param { 365 | lr_mult: 1.0 366 | decay_mult: 1 367 | } 368 | param { 369 | lr_mult: 2.0 370 | decay_mult: 0 371 | } 372 | convolution_param { 373 | num_output: 256 374 | pad: 1 375 | kernel_size: 3 376 | weight_filler { 377 | type: "gaussian" 378 | std: 0.01 379 | } 380 | bias_filler { 381 | type: "constant" 382 | } 383 | } 384 | } 385 | layer { 386 | name: "relu4_3_CPM" 387 | type: "ReLU" 388 | bottom: "conv4_3_CPM" 389 | top: "conv4_3_CPM" 390 | } 391 | layer { 392 | name: "conv4_4_CPM" 393 | type: "Convolution" 394 | bottom: "conv4_3_CPM" 395 | top: "conv4_4_CPM" 396 | param { 397 | lr_mult: 1.0 398 | decay_mult: 1 399 | } 400 | param { 401 | lr_mult: 2.0 402 | decay_mult: 0 403 | } 404 | convolution_param { 405 | num_output: 128 406 | pad: 1 407 | kernel_size: 3 408 | weight_filler { 409 | type: "gaussian" 410 | std: 0.01 411 | } 412 | bias_filler { 413 | type: "constant" 414 | } 415 | } 416 | } 417 | layer { 418 | name: "relu4_4_CPM" 419 | type: "ReLU" 420 | bottom: "conv4_4_CPM" 421 | top: "conv4_4_CPM" 422 | } 423 | layer { 424 | name: "conv5_1_CPM_L1" 425 | type: "Convolution" 426 | bottom: "conv4_4_CPM" 427 | top: "conv5_1_CPM_L1" 428 | param { 429 | lr_mult: 1.0 430 | decay_mult: 1 431 | } 432 | param { 433 | lr_mult: 2.0 434 | decay_mult: 0 435 | } 436 | convolution_param { 437 | num_output: 128 438 | pad: 1 439 | kernel_size: 3 440 | weight_filler { 441 | type: "gaussian" 442 | std: 0.01 443 | } 444 | bias_filler { 445 | type: "constant" 446 | } 447 | } 448 | } 449 | layer { 450 | name: "relu5_1_CPM_L1" 451 | type: "ReLU" 452 | bottom: "conv5_1_CPM_L1" 453 | top: "conv5_1_CPM_L1" 454 | } 455 | layer { 456 | name: "conv5_1_CPM_L2" 457 | type: "Convolution" 458 | bottom: "conv4_4_CPM" 459 | top: "conv5_1_CPM_L2" 460 | param { 461 | lr_mult: 1.0 462 | decay_mult: 1 463 | } 464 | param { 465 | lr_mult: 2.0 466 | decay_mult: 0 467 | } 468 | convolution_param { 469 | num_output: 128 470 | pad: 1 471 | kernel_size: 3 472 | weight_filler { 473 | type: "gaussian" 474 | std: 0.01 475 | } 476 | bias_filler { 477 | type: "constant" 478 | } 479 | } 480 | } 481 | layer { 482 | name: "relu5_1_CPM_L2" 483 | type: "ReLU" 484 | bottom: "conv5_1_CPM_L2" 485 | top: "conv5_1_CPM_L2" 486 | } 487 | layer { 488 | name: "conv5_2_CPM_L1" 489 | type: "Convolution" 490 | bottom: "conv5_1_CPM_L1" 491 | top: "conv5_2_CPM_L1" 492 | param { 493 | lr_mult: 1.0 494 | decay_mult: 1 495 | } 496 | param { 497 | lr_mult: 2.0 498 | decay_mult: 0 499 | } 500 | convolution_param { 501 | num_output: 128 502 | pad: 1 503 | kernel_size: 3 504 | weight_filler { 505 | type: "gaussian" 506 | std: 0.01 507 | } 508 | bias_filler { 509 | type: "constant" 510 | } 511 | } 512 | } 513 | layer { 514 | name: "relu5_2_CPM_L1" 515 | type: "ReLU" 516 | bottom: "conv5_2_CPM_L1" 517 | top: "conv5_2_CPM_L1" 518 | } 519 | layer { 520 | name: "conv5_2_CPM_L2" 521 | type: "Convolution" 522 | bottom: "conv5_1_CPM_L2" 523 | top: "conv5_2_CPM_L2" 524 | param { 525 | lr_mult: 1.0 526 | decay_mult: 1 527 | } 528 | param { 529 | lr_mult: 2.0 530 | decay_mult: 0 531 | } 532 | convolution_param { 533 | num_output: 128 534 | pad: 1 535 | kernel_size: 3 536 | weight_filler { 537 | type: "gaussian" 538 | std: 0.01 539 | } 540 | bias_filler { 541 | type: "constant" 542 | } 543 | } 544 | } 545 | layer { 546 | name: "relu5_2_CPM_L2" 547 | type: "ReLU" 548 | bottom: "conv5_2_CPM_L2" 549 | top: "conv5_2_CPM_L2" 550 | } 551 | layer { 552 | name: "conv5_3_CPM_L1" 553 | type: "Convolution" 554 | bottom: "conv5_2_CPM_L1" 555 | top: "conv5_3_CPM_L1" 556 | param { 557 | lr_mult: 1.0 558 | decay_mult: 1 559 | } 560 | param { 561 | lr_mult: 2.0 562 | decay_mult: 0 563 | } 564 | convolution_param { 565 | num_output: 128 566 | pad: 1 567 | kernel_size: 3 568 | weight_filler { 569 | type: "gaussian" 570 | std: 0.01 571 | } 572 | bias_filler { 573 | type: "constant" 574 | } 575 | } 576 | } 577 | layer { 578 | name: "relu5_3_CPM_L1" 579 | type: "ReLU" 580 | bottom: "conv5_3_CPM_L1" 581 | top: "conv5_3_CPM_L1" 582 | } 583 | layer { 584 | name: "conv5_3_CPM_L2" 585 | type: "Convolution" 586 | bottom: "conv5_2_CPM_L2" 587 | top: "conv5_3_CPM_L2" 588 | param { 589 | lr_mult: 1.0 590 | decay_mult: 1 591 | } 592 | param { 593 | lr_mult: 2.0 594 | decay_mult: 0 595 | } 596 | convolution_param { 597 | num_output: 128 598 | pad: 1 599 | kernel_size: 3 600 | weight_filler { 601 | type: "gaussian" 602 | std: 0.01 603 | } 604 | bias_filler { 605 | type: "constant" 606 | } 607 | } 608 | } 609 | layer { 610 | name: "relu5_3_CPM_L2" 611 | type: "ReLU" 612 | bottom: "conv5_3_CPM_L2" 613 | top: "conv5_3_CPM_L2" 614 | } 615 | layer { 616 | name: "conv5_4_CPM_L1" 617 | type: "Convolution" 618 | bottom: "conv5_3_CPM_L1" 619 | top: "conv5_4_CPM_L1" 620 | param { 621 | lr_mult: 1.0 622 | decay_mult: 1 623 | } 624 | param { 625 | lr_mult: 2.0 626 | decay_mult: 0 627 | } 628 | convolution_param { 629 | num_output: 512 630 | pad: 0 631 | kernel_size: 1 632 | weight_filler { 633 | type: "gaussian" 634 | std: 0.01 635 | } 636 | bias_filler { 637 | type: "constant" 638 | } 639 | } 640 | } 641 | layer { 642 | name: "relu5_4_CPM_L1" 643 | type: "ReLU" 644 | bottom: "conv5_4_CPM_L1" 645 | top: "conv5_4_CPM_L1" 646 | } 647 | layer { 648 | name: "conv5_4_CPM_L2" 649 | type: "Convolution" 650 | bottom: "conv5_3_CPM_L2" 651 | top: "conv5_4_CPM_L2" 652 | param { 653 | lr_mult: 1.0 654 | decay_mult: 1 655 | } 656 | param { 657 | lr_mult: 2.0 658 | decay_mult: 0 659 | } 660 | convolution_param { 661 | num_output: 512 662 | pad: 0 663 | kernel_size: 1 664 | weight_filler { 665 | type: "gaussian" 666 | std: 0.01 667 | } 668 | bias_filler { 669 | type: "constant" 670 | } 671 | } 672 | } 673 | layer { 674 | name: "relu5_4_CPM_L2" 675 | type: "ReLU" 676 | bottom: "conv5_4_CPM_L2" 677 | top: "conv5_4_CPM_L2" 678 | } 679 | layer { 680 | name: "conv5_5_CPM_L1" 681 | type: "Convolution" 682 | bottom: "conv5_4_CPM_L1" 683 | top: "conv5_5_CPM_L1" 684 | param { 685 | lr_mult: 1.0 686 | decay_mult: 1 687 | } 688 | param { 689 | lr_mult: 2.0 690 | decay_mult: 0 691 | } 692 | convolution_param { 693 | num_output: 38 694 | pad: 0 695 | kernel_size: 1 696 | weight_filler { 697 | type: "gaussian" 698 | std: 0.01 699 | } 700 | bias_filler { 701 | type: "constant" 702 | } 703 | } 704 | } 705 | layer { 706 | name: "conv5_5_CPM_L2" 707 | type: "Convolution" 708 | bottom: "conv5_4_CPM_L2" 709 | top: "conv5_5_CPM_L2" 710 | param { 711 | lr_mult: 1.0 712 | decay_mult: 1 713 | } 714 | param { 715 | lr_mult: 2.0 716 | decay_mult: 0 717 | } 718 | convolution_param { 719 | num_output: 19 720 | pad: 0 721 | kernel_size: 1 722 | weight_filler { 723 | type: "gaussian" 724 | std: 0.01 725 | } 726 | bias_filler { 727 | type: "constant" 728 | } 729 | } 730 | } 731 | layer { 732 | name: "concat_stage2" 733 | type: "Concat" 734 | bottom: "conv5_5_CPM_L1" 735 | bottom: "conv5_5_CPM_L2" 736 | bottom: "conv4_4_CPM" 737 | top: "concat_stage2" 738 | concat_param { 739 | axis: 1 740 | } 741 | } 742 | layer { 743 | name: "Mconv1_stage2_L1" 744 | type: "Convolution" 745 | bottom: "concat_stage2" 746 | top: "Mconv1_stage2_L1" 747 | param { 748 | lr_mult: 4.0 749 | decay_mult: 1 750 | } 751 | param { 752 | lr_mult: 8.0 753 | decay_mult: 0 754 | } 755 | convolution_param { 756 | num_output: 128 757 | pad: 3 758 | kernel_size: 7 759 | weight_filler { 760 | type: "gaussian" 761 | std: 0.01 762 | } 763 | bias_filler { 764 | type: "constant" 765 | } 766 | } 767 | } 768 | layer { 769 | name: "Mrelu1_stage2_L1" 770 | type: "ReLU" 771 | bottom: "Mconv1_stage2_L1" 772 | top: "Mconv1_stage2_L1" 773 | } 774 | layer { 775 | name: "Mconv1_stage2_L2" 776 | type: "Convolution" 777 | bottom: "concat_stage2" 778 | top: "Mconv1_stage2_L2" 779 | param { 780 | lr_mult: 4.0 781 | decay_mult: 1 782 | } 783 | param { 784 | lr_mult: 8.0 785 | decay_mult: 0 786 | } 787 | convolution_param { 788 | num_output: 128 789 | pad: 3 790 | kernel_size: 7 791 | weight_filler { 792 | type: "gaussian" 793 | std: 0.01 794 | } 795 | bias_filler { 796 | type: "constant" 797 | } 798 | } 799 | } 800 | layer { 801 | name: "Mrelu1_stage2_L2" 802 | type: "ReLU" 803 | bottom: "Mconv1_stage2_L2" 804 | top: "Mconv1_stage2_L2" 805 | } 806 | layer { 807 | name: "Mconv2_stage2_L1" 808 | type: "Convolution" 809 | bottom: "Mconv1_stage2_L1" 810 | top: "Mconv2_stage2_L1" 811 | param { 812 | lr_mult: 4.0 813 | decay_mult: 1 814 | } 815 | param { 816 | lr_mult: 8.0 817 | decay_mult: 0 818 | } 819 | convolution_param { 820 | num_output: 128 821 | pad: 3 822 | kernel_size: 7 823 | weight_filler { 824 | type: "gaussian" 825 | std: 0.01 826 | } 827 | bias_filler { 828 | type: "constant" 829 | } 830 | } 831 | } 832 | layer { 833 | name: "Mrelu2_stage2_L1" 834 | type: "ReLU" 835 | bottom: "Mconv2_stage2_L1" 836 | top: "Mconv2_stage2_L1" 837 | } 838 | layer { 839 | name: "Mconv2_stage2_L2" 840 | type: "Convolution" 841 | bottom: "Mconv1_stage2_L2" 842 | top: "Mconv2_stage2_L2" 843 | param { 844 | lr_mult: 4.0 845 | decay_mult: 1 846 | } 847 | param { 848 | lr_mult: 8.0 849 | decay_mult: 0 850 | } 851 | convolution_param { 852 | num_output: 128 853 | pad: 3 854 | kernel_size: 7 855 | weight_filler { 856 | type: "gaussian" 857 | std: 0.01 858 | } 859 | bias_filler { 860 | type: "constant" 861 | } 862 | } 863 | } 864 | layer { 865 | name: "Mrelu2_stage2_L2" 866 | type: "ReLU" 867 | bottom: "Mconv2_stage2_L2" 868 | top: "Mconv2_stage2_L2" 869 | } 870 | layer { 871 | name: "Mconv3_stage2_L1" 872 | type: "Convolution" 873 | bottom: "Mconv2_stage2_L1" 874 | top: "Mconv3_stage2_L1" 875 | param { 876 | lr_mult: 4.0 877 | decay_mult: 1 878 | } 879 | param { 880 | lr_mult: 8.0 881 | decay_mult: 0 882 | } 883 | convolution_param { 884 | num_output: 128 885 | pad: 3 886 | kernel_size: 7 887 | weight_filler { 888 | type: "gaussian" 889 | std: 0.01 890 | } 891 | bias_filler { 892 | type: "constant" 893 | } 894 | } 895 | } 896 | layer { 897 | name: "Mrelu3_stage2_L1" 898 | type: "ReLU" 899 | bottom: "Mconv3_stage2_L1" 900 | top: "Mconv3_stage2_L1" 901 | } 902 | layer { 903 | name: "Mconv3_stage2_L2" 904 | type: "Convolution" 905 | bottom: "Mconv2_stage2_L2" 906 | top: "Mconv3_stage2_L2" 907 | param { 908 | lr_mult: 4.0 909 | decay_mult: 1 910 | } 911 | param { 912 | lr_mult: 8.0 913 | decay_mult: 0 914 | } 915 | convolution_param { 916 | num_output: 128 917 | pad: 3 918 | kernel_size: 7 919 | weight_filler { 920 | type: "gaussian" 921 | std: 0.01 922 | } 923 | bias_filler { 924 | type: "constant" 925 | } 926 | } 927 | } 928 | layer { 929 | name: "Mrelu3_stage2_L2" 930 | type: "ReLU" 931 | bottom: "Mconv3_stage2_L2" 932 | top: "Mconv3_stage2_L2" 933 | } 934 | layer { 935 | name: "Mconv4_stage2_L1" 936 | type: "Convolution" 937 | bottom: "Mconv3_stage2_L1" 938 | top: "Mconv4_stage2_L1" 939 | param { 940 | lr_mult: 4.0 941 | decay_mult: 1 942 | } 943 | param { 944 | lr_mult: 8.0 945 | decay_mult: 0 946 | } 947 | convolution_param { 948 | num_output: 128 949 | pad: 3 950 | kernel_size: 7 951 | weight_filler { 952 | type: "gaussian" 953 | std: 0.01 954 | } 955 | bias_filler { 956 | type: "constant" 957 | } 958 | } 959 | } 960 | layer { 961 | name: "Mrelu4_stage2_L1" 962 | type: "ReLU" 963 | bottom: "Mconv4_stage2_L1" 964 | top: "Mconv4_stage2_L1" 965 | } 966 | layer { 967 | name: "Mconv4_stage2_L2" 968 | type: "Convolution" 969 | bottom: "Mconv3_stage2_L2" 970 | top: "Mconv4_stage2_L2" 971 | param { 972 | lr_mult: 4.0 973 | decay_mult: 1 974 | } 975 | param { 976 | lr_mult: 8.0 977 | decay_mult: 0 978 | } 979 | convolution_param { 980 | num_output: 128 981 | pad: 3 982 | kernel_size: 7 983 | weight_filler { 984 | type: "gaussian" 985 | std: 0.01 986 | } 987 | bias_filler { 988 | type: "constant" 989 | } 990 | } 991 | } 992 | layer { 993 | name: "Mrelu4_stage2_L2" 994 | type: "ReLU" 995 | bottom: "Mconv4_stage2_L2" 996 | top: "Mconv4_stage2_L2" 997 | } 998 | layer { 999 | name: "Mconv5_stage2_L1" 1000 | type: "Convolution" 1001 | bottom: "Mconv4_stage2_L1" 1002 | top: "Mconv5_stage2_L1" 1003 | param { 1004 | lr_mult: 4.0 1005 | decay_mult: 1 1006 | } 1007 | param { 1008 | lr_mult: 8.0 1009 | decay_mult: 0 1010 | } 1011 | convolution_param { 1012 | num_output: 128 1013 | pad: 3 1014 | kernel_size: 7 1015 | weight_filler { 1016 | type: "gaussian" 1017 | std: 0.01 1018 | } 1019 | bias_filler { 1020 | type: "constant" 1021 | } 1022 | } 1023 | } 1024 | layer { 1025 | name: "Mrelu5_stage2_L1" 1026 | type: "ReLU" 1027 | bottom: "Mconv5_stage2_L1" 1028 | top: "Mconv5_stage2_L1" 1029 | } 1030 | layer { 1031 | name: "Mconv5_stage2_L2" 1032 | type: "Convolution" 1033 | bottom: "Mconv4_stage2_L2" 1034 | top: "Mconv5_stage2_L2" 1035 | param { 1036 | lr_mult: 4.0 1037 | decay_mult: 1 1038 | } 1039 | param { 1040 | lr_mult: 8.0 1041 | decay_mult: 0 1042 | } 1043 | convolution_param { 1044 | num_output: 128 1045 | pad: 3 1046 | kernel_size: 7 1047 | weight_filler { 1048 | type: "gaussian" 1049 | std: 0.01 1050 | } 1051 | bias_filler { 1052 | type: "constant" 1053 | } 1054 | } 1055 | } 1056 | layer { 1057 | name: "Mrelu5_stage2_L2" 1058 | type: "ReLU" 1059 | bottom: "Mconv5_stage2_L2" 1060 | top: "Mconv5_stage2_L2" 1061 | } 1062 | layer { 1063 | name: "Mconv6_stage2_L1" 1064 | type: "Convolution" 1065 | bottom: "Mconv5_stage2_L1" 1066 | top: "Mconv6_stage2_L1" 1067 | param { 1068 | lr_mult: 4.0 1069 | decay_mult: 1 1070 | } 1071 | param { 1072 | lr_mult: 8.0 1073 | decay_mult: 0 1074 | } 1075 | convolution_param { 1076 | num_output: 128 1077 | pad: 0 1078 | kernel_size: 1 1079 | weight_filler { 1080 | type: "gaussian" 1081 | std: 0.01 1082 | } 1083 | bias_filler { 1084 | type: "constant" 1085 | } 1086 | } 1087 | } 1088 | layer { 1089 | name: "Mrelu6_stage2_L1" 1090 | type: "ReLU" 1091 | bottom: "Mconv6_stage2_L1" 1092 | top: "Mconv6_stage2_L1" 1093 | } 1094 | layer { 1095 | name: "Mconv6_stage2_L2" 1096 | type: "Convolution" 1097 | bottom: "Mconv5_stage2_L2" 1098 | top: "Mconv6_stage2_L2" 1099 | param { 1100 | lr_mult: 4.0 1101 | decay_mult: 1 1102 | } 1103 | param { 1104 | lr_mult: 8.0 1105 | decay_mult: 0 1106 | } 1107 | convolution_param { 1108 | num_output: 128 1109 | pad: 0 1110 | kernel_size: 1 1111 | weight_filler { 1112 | type: "gaussian" 1113 | std: 0.01 1114 | } 1115 | bias_filler { 1116 | type: "constant" 1117 | } 1118 | } 1119 | } 1120 | layer { 1121 | name: "Mrelu6_stage2_L2" 1122 | type: "ReLU" 1123 | bottom: "Mconv6_stage2_L2" 1124 | top: "Mconv6_stage2_L2" 1125 | } 1126 | layer { 1127 | name: "Mconv7_stage2_L1" 1128 | type: "Convolution" 1129 | bottom: "Mconv6_stage2_L1" 1130 | top: "Mconv7_stage2_L1" 1131 | param { 1132 | lr_mult: 4.0 1133 | decay_mult: 1 1134 | } 1135 | param { 1136 | lr_mult: 8.0 1137 | decay_mult: 0 1138 | } 1139 | convolution_param { 1140 | num_output: 38 1141 | pad: 0 1142 | kernel_size: 1 1143 | weight_filler { 1144 | type: "gaussian" 1145 | std: 0.01 1146 | } 1147 | bias_filler { 1148 | type: "constant" 1149 | } 1150 | } 1151 | } 1152 | layer { 1153 | name: "Mconv7_stage2_L2" 1154 | type: "Convolution" 1155 | bottom: "Mconv6_stage2_L2" 1156 | top: "Mconv7_stage2_L2" 1157 | param { 1158 | lr_mult: 4.0 1159 | decay_mult: 1 1160 | } 1161 | param { 1162 | lr_mult: 8.0 1163 | decay_mult: 0 1164 | } 1165 | convolution_param { 1166 | num_output: 19 1167 | pad: 0 1168 | kernel_size: 1 1169 | weight_filler { 1170 | type: "gaussian" 1171 | std: 0.01 1172 | } 1173 | bias_filler { 1174 | type: "constant" 1175 | } 1176 | } 1177 | } 1178 | layer { 1179 | name: "concat_stage3" 1180 | type: "Concat" 1181 | bottom: "Mconv7_stage2_L1" 1182 | bottom: "Mconv7_stage2_L2" 1183 | bottom: "conv4_4_CPM" 1184 | top: "concat_stage3" 1185 | concat_param { 1186 | axis: 1 1187 | } 1188 | } 1189 | layer { 1190 | name: "Mconv1_stage3_L1" 1191 | type: "Convolution" 1192 | bottom: "concat_stage3" 1193 | top: "Mconv1_stage3_L1" 1194 | param { 1195 | lr_mult: 4.0 1196 | decay_mult: 1 1197 | } 1198 | param { 1199 | lr_mult: 8.0 1200 | decay_mult: 0 1201 | } 1202 | convolution_param { 1203 | num_output: 128 1204 | pad: 3 1205 | kernel_size: 7 1206 | weight_filler { 1207 | type: "gaussian" 1208 | std: 0.01 1209 | } 1210 | bias_filler { 1211 | type: "constant" 1212 | } 1213 | } 1214 | } 1215 | layer { 1216 | name: "Mrelu1_stage3_L1" 1217 | type: "ReLU" 1218 | bottom: "Mconv1_stage3_L1" 1219 | top: "Mconv1_stage3_L1" 1220 | } 1221 | layer { 1222 | name: "Mconv1_stage3_L2" 1223 | type: "Convolution" 1224 | bottom: "concat_stage3" 1225 | top: "Mconv1_stage3_L2" 1226 | param { 1227 | lr_mult: 4.0 1228 | decay_mult: 1 1229 | } 1230 | param { 1231 | lr_mult: 8.0 1232 | decay_mult: 0 1233 | } 1234 | convolution_param { 1235 | num_output: 128 1236 | pad: 3 1237 | kernel_size: 7 1238 | weight_filler { 1239 | type: "gaussian" 1240 | std: 0.01 1241 | } 1242 | bias_filler { 1243 | type: "constant" 1244 | } 1245 | } 1246 | } 1247 | layer { 1248 | name: "Mrelu1_stage3_L2" 1249 | type: "ReLU" 1250 | bottom: "Mconv1_stage3_L2" 1251 | top: "Mconv1_stage3_L2" 1252 | } 1253 | layer { 1254 | name: "Mconv2_stage3_L1" 1255 | type: "Convolution" 1256 | bottom: "Mconv1_stage3_L1" 1257 | top: "Mconv2_stage3_L1" 1258 | param { 1259 | lr_mult: 4.0 1260 | decay_mult: 1 1261 | } 1262 | param { 1263 | lr_mult: 8.0 1264 | decay_mult: 0 1265 | } 1266 | convolution_param { 1267 | num_output: 128 1268 | pad: 3 1269 | kernel_size: 7 1270 | weight_filler { 1271 | type: "gaussian" 1272 | std: 0.01 1273 | } 1274 | bias_filler { 1275 | type: "constant" 1276 | } 1277 | } 1278 | } 1279 | layer { 1280 | name: "Mrelu2_stage3_L1" 1281 | type: "ReLU" 1282 | bottom: "Mconv2_stage3_L1" 1283 | top: "Mconv2_stage3_L1" 1284 | } 1285 | layer { 1286 | name: "Mconv2_stage3_L2" 1287 | type: "Convolution" 1288 | bottom: "Mconv1_stage3_L2" 1289 | top: "Mconv2_stage3_L2" 1290 | param { 1291 | lr_mult: 4.0 1292 | decay_mult: 1 1293 | } 1294 | param { 1295 | lr_mult: 8.0 1296 | decay_mult: 0 1297 | } 1298 | convolution_param { 1299 | num_output: 128 1300 | pad: 3 1301 | kernel_size: 7 1302 | weight_filler { 1303 | type: "gaussian" 1304 | std: 0.01 1305 | } 1306 | bias_filler { 1307 | type: "constant" 1308 | } 1309 | } 1310 | } 1311 | layer { 1312 | name: "Mrelu2_stage3_L2" 1313 | type: "ReLU" 1314 | bottom: "Mconv2_stage3_L2" 1315 | top: "Mconv2_stage3_L2" 1316 | } 1317 | layer { 1318 | name: "Mconv3_stage3_L1" 1319 | type: "Convolution" 1320 | bottom: "Mconv2_stage3_L1" 1321 | top: "Mconv3_stage3_L1" 1322 | param { 1323 | lr_mult: 4.0 1324 | decay_mult: 1 1325 | } 1326 | param { 1327 | lr_mult: 8.0 1328 | decay_mult: 0 1329 | } 1330 | convolution_param { 1331 | num_output: 128 1332 | pad: 3 1333 | kernel_size: 7 1334 | weight_filler { 1335 | type: "gaussian" 1336 | std: 0.01 1337 | } 1338 | bias_filler { 1339 | type: "constant" 1340 | } 1341 | } 1342 | } 1343 | layer { 1344 | name: "Mrelu3_stage3_L1" 1345 | type: "ReLU" 1346 | bottom: "Mconv3_stage3_L1" 1347 | top: "Mconv3_stage3_L1" 1348 | } 1349 | layer { 1350 | name: "Mconv3_stage3_L2" 1351 | type: "Convolution" 1352 | bottom: "Mconv2_stage3_L2" 1353 | top: "Mconv3_stage3_L2" 1354 | param { 1355 | lr_mult: 4.0 1356 | decay_mult: 1 1357 | } 1358 | param { 1359 | lr_mult: 8.0 1360 | decay_mult: 0 1361 | } 1362 | convolution_param { 1363 | num_output: 128 1364 | pad: 3 1365 | kernel_size: 7 1366 | weight_filler { 1367 | type: "gaussian" 1368 | std: 0.01 1369 | } 1370 | bias_filler { 1371 | type: "constant" 1372 | } 1373 | } 1374 | } 1375 | layer { 1376 | name: "Mrelu3_stage3_L2" 1377 | type: "ReLU" 1378 | bottom: "Mconv3_stage3_L2" 1379 | top: "Mconv3_stage3_L2" 1380 | } 1381 | layer { 1382 | name: "Mconv4_stage3_L1" 1383 | type: "Convolution" 1384 | bottom: "Mconv3_stage3_L1" 1385 | top: "Mconv4_stage3_L1" 1386 | param { 1387 | lr_mult: 4.0 1388 | decay_mult: 1 1389 | } 1390 | param { 1391 | lr_mult: 8.0 1392 | decay_mult: 0 1393 | } 1394 | convolution_param { 1395 | num_output: 128 1396 | pad: 3 1397 | kernel_size: 7 1398 | weight_filler { 1399 | type: "gaussian" 1400 | std: 0.01 1401 | } 1402 | bias_filler { 1403 | type: "constant" 1404 | } 1405 | } 1406 | } 1407 | layer { 1408 | name: "Mrelu4_stage3_L1" 1409 | type: "ReLU" 1410 | bottom: "Mconv4_stage3_L1" 1411 | top: "Mconv4_stage3_L1" 1412 | } 1413 | layer { 1414 | name: "Mconv4_stage3_L2" 1415 | type: "Convolution" 1416 | bottom: "Mconv3_stage3_L2" 1417 | top: "Mconv4_stage3_L2" 1418 | param { 1419 | lr_mult: 4.0 1420 | decay_mult: 1 1421 | } 1422 | param { 1423 | lr_mult: 8.0 1424 | decay_mult: 0 1425 | } 1426 | convolution_param { 1427 | num_output: 128 1428 | pad: 3 1429 | kernel_size: 7 1430 | weight_filler { 1431 | type: "gaussian" 1432 | std: 0.01 1433 | } 1434 | bias_filler { 1435 | type: "constant" 1436 | } 1437 | } 1438 | } 1439 | layer { 1440 | name: "Mrelu4_stage3_L2" 1441 | type: "ReLU" 1442 | bottom: "Mconv4_stage3_L2" 1443 | top: "Mconv4_stage3_L2" 1444 | } 1445 | layer { 1446 | name: "Mconv5_stage3_L1" 1447 | type: "Convolution" 1448 | bottom: "Mconv4_stage3_L1" 1449 | top: "Mconv5_stage3_L1" 1450 | param { 1451 | lr_mult: 4.0 1452 | decay_mult: 1 1453 | } 1454 | param { 1455 | lr_mult: 8.0 1456 | decay_mult: 0 1457 | } 1458 | convolution_param { 1459 | num_output: 128 1460 | pad: 3 1461 | kernel_size: 7 1462 | weight_filler { 1463 | type: "gaussian" 1464 | std: 0.01 1465 | } 1466 | bias_filler { 1467 | type: "constant" 1468 | } 1469 | } 1470 | } 1471 | layer { 1472 | name: "Mrelu5_stage3_L1" 1473 | type: "ReLU" 1474 | bottom: "Mconv5_stage3_L1" 1475 | top: "Mconv5_stage3_L1" 1476 | } 1477 | layer { 1478 | name: "Mconv5_stage3_L2" 1479 | type: "Convolution" 1480 | bottom: "Mconv4_stage3_L2" 1481 | top: "Mconv5_stage3_L2" 1482 | param { 1483 | lr_mult: 4.0 1484 | decay_mult: 1 1485 | } 1486 | param { 1487 | lr_mult: 8.0 1488 | decay_mult: 0 1489 | } 1490 | convolution_param { 1491 | num_output: 128 1492 | pad: 3 1493 | kernel_size: 7 1494 | weight_filler { 1495 | type: "gaussian" 1496 | std: 0.01 1497 | } 1498 | bias_filler { 1499 | type: "constant" 1500 | } 1501 | } 1502 | } 1503 | layer { 1504 | name: "Mrelu5_stage3_L2" 1505 | type: "ReLU" 1506 | bottom: "Mconv5_stage3_L2" 1507 | top: "Mconv5_stage3_L2" 1508 | } 1509 | layer { 1510 | name: "Mconv6_stage3_L1" 1511 | type: "Convolution" 1512 | bottom: "Mconv5_stage3_L1" 1513 | top: "Mconv6_stage3_L1" 1514 | param { 1515 | lr_mult: 4.0 1516 | decay_mult: 1 1517 | } 1518 | param { 1519 | lr_mult: 8.0 1520 | decay_mult: 0 1521 | } 1522 | convolution_param { 1523 | num_output: 128 1524 | pad: 0 1525 | kernel_size: 1 1526 | weight_filler { 1527 | type: "gaussian" 1528 | std: 0.01 1529 | } 1530 | bias_filler { 1531 | type: "constant" 1532 | } 1533 | } 1534 | } 1535 | layer { 1536 | name: "Mrelu6_stage3_L1" 1537 | type: "ReLU" 1538 | bottom: "Mconv6_stage3_L1" 1539 | top: "Mconv6_stage3_L1" 1540 | } 1541 | layer { 1542 | name: "Mconv6_stage3_L2" 1543 | type: "Convolution" 1544 | bottom: "Mconv5_stage3_L2" 1545 | top: "Mconv6_stage3_L2" 1546 | param { 1547 | lr_mult: 4.0 1548 | decay_mult: 1 1549 | } 1550 | param { 1551 | lr_mult: 8.0 1552 | decay_mult: 0 1553 | } 1554 | convolution_param { 1555 | num_output: 128 1556 | pad: 0 1557 | kernel_size: 1 1558 | weight_filler { 1559 | type: "gaussian" 1560 | std: 0.01 1561 | } 1562 | bias_filler { 1563 | type: "constant" 1564 | } 1565 | } 1566 | } 1567 | layer { 1568 | name: "Mrelu6_stage3_L2" 1569 | type: "ReLU" 1570 | bottom: "Mconv6_stage3_L2" 1571 | top: "Mconv6_stage3_L2" 1572 | } 1573 | layer { 1574 | name: "Mconv7_stage3_L1" 1575 | type: "Convolution" 1576 | bottom: "Mconv6_stage3_L1" 1577 | top: "Mconv7_stage3_L1" 1578 | param { 1579 | lr_mult: 4.0 1580 | decay_mult: 1 1581 | } 1582 | param { 1583 | lr_mult: 8.0 1584 | decay_mult: 0 1585 | } 1586 | convolution_param { 1587 | num_output: 38 1588 | pad: 0 1589 | kernel_size: 1 1590 | weight_filler { 1591 | type: "gaussian" 1592 | std: 0.01 1593 | } 1594 | bias_filler { 1595 | type: "constant" 1596 | } 1597 | } 1598 | } 1599 | layer { 1600 | name: "Mconv7_stage3_L2" 1601 | type: "Convolution" 1602 | bottom: "Mconv6_stage3_L2" 1603 | top: "Mconv7_stage3_L2" 1604 | param { 1605 | lr_mult: 4.0 1606 | decay_mult: 1 1607 | } 1608 | param { 1609 | lr_mult: 8.0 1610 | decay_mult: 0 1611 | } 1612 | convolution_param { 1613 | num_output: 19 1614 | pad: 0 1615 | kernel_size: 1 1616 | weight_filler { 1617 | type: "gaussian" 1618 | std: 0.01 1619 | } 1620 | bias_filler { 1621 | type: "constant" 1622 | } 1623 | } 1624 | } 1625 | layer { 1626 | name: "concat_stage4" 1627 | type: "Concat" 1628 | bottom: "Mconv7_stage3_L1" 1629 | bottom: "Mconv7_stage3_L2" 1630 | bottom: "conv4_4_CPM" 1631 | top: "concat_stage4" 1632 | concat_param { 1633 | axis: 1 1634 | } 1635 | } 1636 | layer { 1637 | name: "Mconv1_stage4_L1" 1638 | type: "Convolution" 1639 | bottom: "concat_stage4" 1640 | top: "Mconv1_stage4_L1" 1641 | param { 1642 | lr_mult: 4.0 1643 | decay_mult: 1 1644 | } 1645 | param { 1646 | lr_mult: 8.0 1647 | decay_mult: 0 1648 | } 1649 | convolution_param { 1650 | num_output: 128 1651 | pad: 3 1652 | kernel_size: 7 1653 | weight_filler { 1654 | type: "gaussian" 1655 | std: 0.01 1656 | } 1657 | bias_filler { 1658 | type: "constant" 1659 | } 1660 | } 1661 | } 1662 | layer { 1663 | name: "Mrelu1_stage4_L1" 1664 | type: "ReLU" 1665 | bottom: "Mconv1_stage4_L1" 1666 | top: "Mconv1_stage4_L1" 1667 | } 1668 | layer { 1669 | name: "Mconv1_stage4_L2" 1670 | type: "Convolution" 1671 | bottom: "concat_stage4" 1672 | top: "Mconv1_stage4_L2" 1673 | param { 1674 | lr_mult: 4.0 1675 | decay_mult: 1 1676 | } 1677 | param { 1678 | lr_mult: 8.0 1679 | decay_mult: 0 1680 | } 1681 | convolution_param { 1682 | num_output: 128 1683 | pad: 3 1684 | kernel_size: 7 1685 | weight_filler { 1686 | type: "gaussian" 1687 | std: 0.01 1688 | } 1689 | bias_filler { 1690 | type: "constant" 1691 | } 1692 | } 1693 | } 1694 | layer { 1695 | name: "Mrelu1_stage4_L2" 1696 | type: "ReLU" 1697 | bottom: "Mconv1_stage4_L2" 1698 | top: "Mconv1_stage4_L2" 1699 | } 1700 | layer { 1701 | name: "Mconv2_stage4_L1" 1702 | type: "Convolution" 1703 | bottom: "Mconv1_stage4_L1" 1704 | top: "Mconv2_stage4_L1" 1705 | param { 1706 | lr_mult: 4.0 1707 | decay_mult: 1 1708 | } 1709 | param { 1710 | lr_mult: 8.0 1711 | decay_mult: 0 1712 | } 1713 | convolution_param { 1714 | num_output: 128 1715 | pad: 3 1716 | kernel_size: 7 1717 | weight_filler { 1718 | type: "gaussian" 1719 | std: 0.01 1720 | } 1721 | bias_filler { 1722 | type: "constant" 1723 | } 1724 | } 1725 | } 1726 | layer { 1727 | name: "Mrelu2_stage4_L1" 1728 | type: "ReLU" 1729 | bottom: "Mconv2_stage4_L1" 1730 | top: "Mconv2_stage4_L1" 1731 | } 1732 | layer { 1733 | name: "Mconv2_stage4_L2" 1734 | type: "Convolution" 1735 | bottom: "Mconv1_stage4_L2" 1736 | top: "Mconv2_stage4_L2" 1737 | param { 1738 | lr_mult: 4.0 1739 | decay_mult: 1 1740 | } 1741 | param { 1742 | lr_mult: 8.0 1743 | decay_mult: 0 1744 | } 1745 | convolution_param { 1746 | num_output: 128 1747 | pad: 3 1748 | kernel_size: 7 1749 | weight_filler { 1750 | type: "gaussian" 1751 | std: 0.01 1752 | } 1753 | bias_filler { 1754 | type: "constant" 1755 | } 1756 | } 1757 | } 1758 | layer { 1759 | name: "Mrelu2_stage4_L2" 1760 | type: "ReLU" 1761 | bottom: "Mconv2_stage4_L2" 1762 | top: "Mconv2_stage4_L2" 1763 | } 1764 | layer { 1765 | name: "Mconv3_stage4_L1" 1766 | type: "Convolution" 1767 | bottom: "Mconv2_stage4_L1" 1768 | top: "Mconv3_stage4_L1" 1769 | param { 1770 | lr_mult: 4.0 1771 | decay_mult: 1 1772 | } 1773 | param { 1774 | lr_mult: 8.0 1775 | decay_mult: 0 1776 | } 1777 | convolution_param { 1778 | num_output: 128 1779 | pad: 3 1780 | kernel_size: 7 1781 | weight_filler { 1782 | type: "gaussian" 1783 | std: 0.01 1784 | } 1785 | bias_filler { 1786 | type: "constant" 1787 | } 1788 | } 1789 | } 1790 | layer { 1791 | name: "Mrelu3_stage4_L1" 1792 | type: "ReLU" 1793 | bottom: "Mconv3_stage4_L1" 1794 | top: "Mconv3_stage4_L1" 1795 | } 1796 | layer { 1797 | name: "Mconv3_stage4_L2" 1798 | type: "Convolution" 1799 | bottom: "Mconv2_stage4_L2" 1800 | top: "Mconv3_stage4_L2" 1801 | param { 1802 | lr_mult: 4.0 1803 | decay_mult: 1 1804 | } 1805 | param { 1806 | lr_mult: 8.0 1807 | decay_mult: 0 1808 | } 1809 | convolution_param { 1810 | num_output: 128 1811 | pad: 3 1812 | kernel_size: 7 1813 | weight_filler { 1814 | type: "gaussian" 1815 | std: 0.01 1816 | } 1817 | bias_filler { 1818 | type: "constant" 1819 | } 1820 | } 1821 | } 1822 | layer { 1823 | name: "Mrelu3_stage4_L2" 1824 | type: "ReLU" 1825 | bottom: "Mconv3_stage4_L2" 1826 | top: "Mconv3_stage4_L2" 1827 | } 1828 | layer { 1829 | name: "Mconv4_stage4_L1" 1830 | type: "Convolution" 1831 | bottom: "Mconv3_stage4_L1" 1832 | top: "Mconv4_stage4_L1" 1833 | param { 1834 | lr_mult: 4.0 1835 | decay_mult: 1 1836 | } 1837 | param { 1838 | lr_mult: 8.0 1839 | decay_mult: 0 1840 | } 1841 | convolution_param { 1842 | num_output: 128 1843 | pad: 3 1844 | kernel_size: 7 1845 | weight_filler { 1846 | type: "gaussian" 1847 | std: 0.01 1848 | } 1849 | bias_filler { 1850 | type: "constant" 1851 | } 1852 | } 1853 | } 1854 | layer { 1855 | name: "Mrelu4_stage4_L1" 1856 | type: "ReLU" 1857 | bottom: "Mconv4_stage4_L1" 1858 | top: "Mconv4_stage4_L1" 1859 | } 1860 | layer { 1861 | name: "Mconv4_stage4_L2" 1862 | type: "Convolution" 1863 | bottom: "Mconv3_stage4_L2" 1864 | top: "Mconv4_stage4_L2" 1865 | param { 1866 | lr_mult: 4.0 1867 | decay_mult: 1 1868 | } 1869 | param { 1870 | lr_mult: 8.0 1871 | decay_mult: 0 1872 | } 1873 | convolution_param { 1874 | num_output: 128 1875 | pad: 3 1876 | kernel_size: 7 1877 | weight_filler { 1878 | type: "gaussian" 1879 | std: 0.01 1880 | } 1881 | bias_filler { 1882 | type: "constant" 1883 | } 1884 | } 1885 | } 1886 | layer { 1887 | name: "Mrelu4_stage4_L2" 1888 | type: "ReLU" 1889 | bottom: "Mconv4_stage4_L2" 1890 | top: "Mconv4_stage4_L2" 1891 | } 1892 | layer { 1893 | name: "Mconv5_stage4_L1" 1894 | type: "Convolution" 1895 | bottom: "Mconv4_stage4_L1" 1896 | top: "Mconv5_stage4_L1" 1897 | param { 1898 | lr_mult: 4.0 1899 | decay_mult: 1 1900 | } 1901 | param { 1902 | lr_mult: 8.0 1903 | decay_mult: 0 1904 | } 1905 | convolution_param { 1906 | num_output: 128 1907 | pad: 3 1908 | kernel_size: 7 1909 | weight_filler { 1910 | type: "gaussian" 1911 | std: 0.01 1912 | } 1913 | bias_filler { 1914 | type: "constant" 1915 | } 1916 | } 1917 | } 1918 | layer { 1919 | name: "Mrelu5_stage4_L1" 1920 | type: "ReLU" 1921 | bottom: "Mconv5_stage4_L1" 1922 | top: "Mconv5_stage4_L1" 1923 | } 1924 | layer { 1925 | name: "Mconv5_stage4_L2" 1926 | type: "Convolution" 1927 | bottom: "Mconv4_stage4_L2" 1928 | top: "Mconv5_stage4_L2" 1929 | param { 1930 | lr_mult: 4.0 1931 | decay_mult: 1 1932 | } 1933 | param { 1934 | lr_mult: 8.0 1935 | decay_mult: 0 1936 | } 1937 | convolution_param { 1938 | num_output: 128 1939 | pad: 3 1940 | kernel_size: 7 1941 | weight_filler { 1942 | type: "gaussian" 1943 | std: 0.01 1944 | } 1945 | bias_filler { 1946 | type: "constant" 1947 | } 1948 | } 1949 | } 1950 | layer { 1951 | name: "Mrelu5_stage4_L2" 1952 | type: "ReLU" 1953 | bottom: "Mconv5_stage4_L2" 1954 | top: "Mconv5_stage4_L2" 1955 | } 1956 | layer { 1957 | name: "Mconv6_stage4_L1" 1958 | type: "Convolution" 1959 | bottom: "Mconv5_stage4_L1" 1960 | top: "Mconv6_stage4_L1" 1961 | param { 1962 | lr_mult: 4.0 1963 | decay_mult: 1 1964 | } 1965 | param { 1966 | lr_mult: 8.0 1967 | decay_mult: 0 1968 | } 1969 | convolution_param { 1970 | num_output: 128 1971 | pad: 0 1972 | kernel_size: 1 1973 | weight_filler { 1974 | type: "gaussian" 1975 | std: 0.01 1976 | } 1977 | bias_filler { 1978 | type: "constant" 1979 | } 1980 | } 1981 | } 1982 | layer { 1983 | name: "Mrelu6_stage4_L1" 1984 | type: "ReLU" 1985 | bottom: "Mconv6_stage4_L1" 1986 | top: "Mconv6_stage4_L1" 1987 | } 1988 | layer { 1989 | name: "Mconv6_stage4_L2" 1990 | type: "Convolution" 1991 | bottom: "Mconv5_stage4_L2" 1992 | top: "Mconv6_stage4_L2" 1993 | param { 1994 | lr_mult: 4.0 1995 | decay_mult: 1 1996 | } 1997 | param { 1998 | lr_mult: 8.0 1999 | decay_mult: 0 2000 | } 2001 | convolution_param { 2002 | num_output: 128 2003 | pad: 0 2004 | kernel_size: 1 2005 | weight_filler { 2006 | type: "gaussian" 2007 | std: 0.01 2008 | } 2009 | bias_filler { 2010 | type: "constant" 2011 | } 2012 | } 2013 | } 2014 | layer { 2015 | name: "Mrelu6_stage4_L2" 2016 | type: "ReLU" 2017 | bottom: "Mconv6_stage4_L2" 2018 | top: "Mconv6_stage4_L2" 2019 | } 2020 | layer { 2021 | name: "Mconv7_stage4_L1" 2022 | type: "Convolution" 2023 | bottom: "Mconv6_stage4_L1" 2024 | top: "Mconv7_stage4_L1" 2025 | param { 2026 | lr_mult: 4.0 2027 | decay_mult: 1 2028 | } 2029 | param { 2030 | lr_mult: 8.0 2031 | decay_mult: 0 2032 | } 2033 | convolution_param { 2034 | num_output: 38 2035 | pad: 0 2036 | kernel_size: 1 2037 | weight_filler { 2038 | type: "gaussian" 2039 | std: 0.01 2040 | } 2041 | bias_filler { 2042 | type: "constant" 2043 | } 2044 | } 2045 | } 2046 | layer { 2047 | name: "Mconv7_stage4_L2" 2048 | type: "Convolution" 2049 | bottom: "Mconv6_stage4_L2" 2050 | top: "Mconv7_stage4_L2" 2051 | param { 2052 | lr_mult: 4.0 2053 | decay_mult: 1 2054 | } 2055 | param { 2056 | lr_mult: 8.0 2057 | decay_mult: 0 2058 | } 2059 | convolution_param { 2060 | num_output: 19 2061 | pad: 0 2062 | kernel_size: 1 2063 | weight_filler { 2064 | type: "gaussian" 2065 | std: 0.01 2066 | } 2067 | bias_filler { 2068 | type: "constant" 2069 | } 2070 | } 2071 | } 2072 | layer { 2073 | name: "concat_stage5" 2074 | type: "Concat" 2075 | bottom: "Mconv7_stage4_L1" 2076 | bottom: "Mconv7_stage4_L2" 2077 | bottom: "conv4_4_CPM" 2078 | top: "concat_stage5" 2079 | concat_param { 2080 | axis: 1 2081 | } 2082 | } 2083 | layer { 2084 | name: "Mconv1_stage5_L1" 2085 | type: "Convolution" 2086 | bottom: "concat_stage5" 2087 | top: "Mconv1_stage5_L1" 2088 | param { 2089 | lr_mult: 4.0 2090 | decay_mult: 1 2091 | } 2092 | param { 2093 | lr_mult: 8.0 2094 | decay_mult: 0 2095 | } 2096 | convolution_param { 2097 | num_output: 128 2098 | pad: 3 2099 | kernel_size: 7 2100 | weight_filler { 2101 | type: "gaussian" 2102 | std: 0.01 2103 | } 2104 | bias_filler { 2105 | type: "constant" 2106 | } 2107 | } 2108 | } 2109 | layer { 2110 | name: "Mrelu1_stage5_L1" 2111 | type: "ReLU" 2112 | bottom: "Mconv1_stage5_L1" 2113 | top: "Mconv1_stage5_L1" 2114 | } 2115 | layer { 2116 | name: "Mconv1_stage5_L2" 2117 | type: "Convolution" 2118 | bottom: "concat_stage5" 2119 | top: "Mconv1_stage5_L2" 2120 | param { 2121 | lr_mult: 4.0 2122 | decay_mult: 1 2123 | } 2124 | param { 2125 | lr_mult: 8.0 2126 | decay_mult: 0 2127 | } 2128 | convolution_param { 2129 | num_output: 128 2130 | pad: 3 2131 | kernel_size: 7 2132 | weight_filler { 2133 | type: "gaussian" 2134 | std: 0.01 2135 | } 2136 | bias_filler { 2137 | type: "constant" 2138 | } 2139 | } 2140 | } 2141 | layer { 2142 | name: "Mrelu1_stage5_L2" 2143 | type: "ReLU" 2144 | bottom: "Mconv1_stage5_L2" 2145 | top: "Mconv1_stage5_L2" 2146 | } 2147 | layer { 2148 | name: "Mconv2_stage5_L1" 2149 | type: "Convolution" 2150 | bottom: "Mconv1_stage5_L1" 2151 | top: "Mconv2_stage5_L1" 2152 | param { 2153 | lr_mult: 4.0 2154 | decay_mult: 1 2155 | } 2156 | param { 2157 | lr_mult: 8.0 2158 | decay_mult: 0 2159 | } 2160 | convolution_param { 2161 | num_output: 128 2162 | pad: 3 2163 | kernel_size: 7 2164 | weight_filler { 2165 | type: "gaussian" 2166 | std: 0.01 2167 | } 2168 | bias_filler { 2169 | type: "constant" 2170 | } 2171 | } 2172 | } 2173 | layer { 2174 | name: "Mrelu2_stage5_L1" 2175 | type: "ReLU" 2176 | bottom: "Mconv2_stage5_L1" 2177 | top: "Mconv2_stage5_L1" 2178 | } 2179 | layer { 2180 | name: "Mconv2_stage5_L2" 2181 | type: "Convolution" 2182 | bottom: "Mconv1_stage5_L2" 2183 | top: "Mconv2_stage5_L2" 2184 | param { 2185 | lr_mult: 4.0 2186 | decay_mult: 1 2187 | } 2188 | param { 2189 | lr_mult: 8.0 2190 | decay_mult: 0 2191 | } 2192 | convolution_param { 2193 | num_output: 128 2194 | pad: 3 2195 | kernel_size: 7 2196 | weight_filler { 2197 | type: "gaussian" 2198 | std: 0.01 2199 | } 2200 | bias_filler { 2201 | type: "constant" 2202 | } 2203 | } 2204 | } 2205 | layer { 2206 | name: "Mrelu2_stage5_L2" 2207 | type: "ReLU" 2208 | bottom: "Mconv2_stage5_L2" 2209 | top: "Mconv2_stage5_L2" 2210 | } 2211 | layer { 2212 | name: "Mconv3_stage5_L1" 2213 | type: "Convolution" 2214 | bottom: "Mconv2_stage5_L1" 2215 | top: "Mconv3_stage5_L1" 2216 | param { 2217 | lr_mult: 4.0 2218 | decay_mult: 1 2219 | } 2220 | param { 2221 | lr_mult: 8.0 2222 | decay_mult: 0 2223 | } 2224 | convolution_param { 2225 | num_output: 128 2226 | pad: 3 2227 | kernel_size: 7 2228 | weight_filler { 2229 | type: "gaussian" 2230 | std: 0.01 2231 | } 2232 | bias_filler { 2233 | type: "constant" 2234 | } 2235 | } 2236 | } 2237 | layer { 2238 | name: "Mrelu3_stage5_L1" 2239 | type: "ReLU" 2240 | bottom: "Mconv3_stage5_L1" 2241 | top: "Mconv3_stage5_L1" 2242 | } 2243 | layer { 2244 | name: "Mconv3_stage5_L2" 2245 | type: "Convolution" 2246 | bottom: "Mconv2_stage5_L2" 2247 | top: "Mconv3_stage5_L2" 2248 | param { 2249 | lr_mult: 4.0 2250 | decay_mult: 1 2251 | } 2252 | param { 2253 | lr_mult: 8.0 2254 | decay_mult: 0 2255 | } 2256 | convolution_param { 2257 | num_output: 128 2258 | pad: 3 2259 | kernel_size: 7 2260 | weight_filler { 2261 | type: "gaussian" 2262 | std: 0.01 2263 | } 2264 | bias_filler { 2265 | type: "constant" 2266 | } 2267 | } 2268 | } 2269 | layer { 2270 | name: "Mrelu3_stage5_L2" 2271 | type: "ReLU" 2272 | bottom: "Mconv3_stage5_L2" 2273 | top: "Mconv3_stage5_L2" 2274 | } 2275 | layer { 2276 | name: "Mconv4_stage5_L1" 2277 | type: "Convolution" 2278 | bottom: "Mconv3_stage5_L1" 2279 | top: "Mconv4_stage5_L1" 2280 | param { 2281 | lr_mult: 4.0 2282 | decay_mult: 1 2283 | } 2284 | param { 2285 | lr_mult: 8.0 2286 | decay_mult: 0 2287 | } 2288 | convolution_param { 2289 | num_output: 128 2290 | pad: 3 2291 | kernel_size: 7 2292 | weight_filler { 2293 | type: "gaussian" 2294 | std: 0.01 2295 | } 2296 | bias_filler { 2297 | type: "constant" 2298 | } 2299 | } 2300 | } 2301 | layer { 2302 | name: "Mrelu4_stage5_L1" 2303 | type: "ReLU" 2304 | bottom: "Mconv4_stage5_L1" 2305 | top: "Mconv4_stage5_L1" 2306 | } 2307 | layer { 2308 | name: "Mconv4_stage5_L2" 2309 | type: "Convolution" 2310 | bottom: "Mconv3_stage5_L2" 2311 | top: "Mconv4_stage5_L2" 2312 | param { 2313 | lr_mult: 4.0 2314 | decay_mult: 1 2315 | } 2316 | param { 2317 | lr_mult: 8.0 2318 | decay_mult: 0 2319 | } 2320 | convolution_param { 2321 | num_output: 128 2322 | pad: 3 2323 | kernel_size: 7 2324 | weight_filler { 2325 | type: "gaussian" 2326 | std: 0.01 2327 | } 2328 | bias_filler { 2329 | type: "constant" 2330 | } 2331 | } 2332 | } 2333 | layer { 2334 | name: "Mrelu4_stage5_L2" 2335 | type: "ReLU" 2336 | bottom: "Mconv4_stage5_L2" 2337 | top: "Mconv4_stage5_L2" 2338 | } 2339 | layer { 2340 | name: "Mconv5_stage5_L1" 2341 | type: "Convolution" 2342 | bottom: "Mconv4_stage5_L1" 2343 | top: "Mconv5_stage5_L1" 2344 | param { 2345 | lr_mult: 4.0 2346 | decay_mult: 1 2347 | } 2348 | param { 2349 | lr_mult: 8.0 2350 | decay_mult: 0 2351 | } 2352 | convolution_param { 2353 | num_output: 128 2354 | pad: 3 2355 | kernel_size: 7 2356 | weight_filler { 2357 | type: "gaussian" 2358 | std: 0.01 2359 | } 2360 | bias_filler { 2361 | type: "constant" 2362 | } 2363 | } 2364 | } 2365 | layer { 2366 | name: "Mrelu5_stage5_L1" 2367 | type: "ReLU" 2368 | bottom: "Mconv5_stage5_L1" 2369 | top: "Mconv5_stage5_L1" 2370 | } 2371 | layer { 2372 | name: "Mconv5_stage5_L2" 2373 | type: "Convolution" 2374 | bottom: "Mconv4_stage5_L2" 2375 | top: "Mconv5_stage5_L2" 2376 | param { 2377 | lr_mult: 4.0 2378 | decay_mult: 1 2379 | } 2380 | param { 2381 | lr_mult: 8.0 2382 | decay_mult: 0 2383 | } 2384 | convolution_param { 2385 | num_output: 128 2386 | pad: 3 2387 | kernel_size: 7 2388 | weight_filler { 2389 | type: "gaussian" 2390 | std: 0.01 2391 | } 2392 | bias_filler { 2393 | type: "constant" 2394 | } 2395 | } 2396 | } 2397 | layer { 2398 | name: "Mrelu5_stage5_L2" 2399 | type: "ReLU" 2400 | bottom: "Mconv5_stage5_L2" 2401 | top: "Mconv5_stage5_L2" 2402 | } 2403 | layer { 2404 | name: "Mconv6_stage5_L1" 2405 | type: "Convolution" 2406 | bottom: "Mconv5_stage5_L1" 2407 | top: "Mconv6_stage5_L1" 2408 | param { 2409 | lr_mult: 4.0 2410 | decay_mult: 1 2411 | } 2412 | param { 2413 | lr_mult: 8.0 2414 | decay_mult: 0 2415 | } 2416 | convolution_param { 2417 | num_output: 128 2418 | pad: 0 2419 | kernel_size: 1 2420 | weight_filler { 2421 | type: "gaussian" 2422 | std: 0.01 2423 | } 2424 | bias_filler { 2425 | type: "constant" 2426 | } 2427 | } 2428 | } 2429 | layer { 2430 | name: "Mrelu6_stage5_L1" 2431 | type: "ReLU" 2432 | bottom: "Mconv6_stage5_L1" 2433 | top: "Mconv6_stage5_L1" 2434 | } 2435 | layer { 2436 | name: "Mconv6_stage5_L2" 2437 | type: "Convolution" 2438 | bottom: "Mconv5_stage5_L2" 2439 | top: "Mconv6_stage5_L2" 2440 | param { 2441 | lr_mult: 4.0 2442 | decay_mult: 1 2443 | } 2444 | param { 2445 | lr_mult: 8.0 2446 | decay_mult: 0 2447 | } 2448 | convolution_param { 2449 | num_output: 128 2450 | pad: 0 2451 | kernel_size: 1 2452 | weight_filler { 2453 | type: "gaussian" 2454 | std: 0.01 2455 | } 2456 | bias_filler { 2457 | type: "constant" 2458 | } 2459 | } 2460 | } 2461 | layer { 2462 | name: "Mrelu6_stage5_L2" 2463 | type: "ReLU" 2464 | bottom: "Mconv6_stage5_L2" 2465 | top: "Mconv6_stage5_L2" 2466 | } 2467 | layer { 2468 | name: "Mconv7_stage5_L1" 2469 | type: "Convolution" 2470 | bottom: "Mconv6_stage5_L1" 2471 | top: "Mconv7_stage5_L1" 2472 | param { 2473 | lr_mult: 4.0 2474 | decay_mult: 1 2475 | } 2476 | param { 2477 | lr_mult: 8.0 2478 | decay_mult: 0 2479 | } 2480 | convolution_param { 2481 | num_output: 38 2482 | pad: 0 2483 | kernel_size: 1 2484 | weight_filler { 2485 | type: "gaussian" 2486 | std: 0.01 2487 | } 2488 | bias_filler { 2489 | type: "constant" 2490 | } 2491 | } 2492 | } 2493 | layer { 2494 | name: "Mconv7_stage5_L2" 2495 | type: "Convolution" 2496 | bottom: "Mconv6_stage5_L2" 2497 | top: "Mconv7_stage5_L2" 2498 | param { 2499 | lr_mult: 4.0 2500 | decay_mult: 1 2501 | } 2502 | param { 2503 | lr_mult: 8.0 2504 | decay_mult: 0 2505 | } 2506 | convolution_param { 2507 | num_output: 19 2508 | pad: 0 2509 | kernel_size: 1 2510 | weight_filler { 2511 | type: "gaussian" 2512 | std: 0.01 2513 | } 2514 | bias_filler { 2515 | type: "constant" 2516 | } 2517 | } 2518 | } 2519 | layer { 2520 | name: "concat_stage6" 2521 | type: "Concat" 2522 | bottom: "Mconv7_stage5_L1" 2523 | bottom: "Mconv7_stage5_L2" 2524 | bottom: "conv4_4_CPM" 2525 | top: "concat_stage6" 2526 | concat_param { 2527 | axis: 1 2528 | } 2529 | } 2530 | layer { 2531 | name: "Mconv1_stage6_L1" 2532 | type: "Convolution" 2533 | bottom: "concat_stage6" 2534 | top: "Mconv1_stage6_L1" 2535 | param { 2536 | lr_mult: 4.0 2537 | decay_mult: 1 2538 | } 2539 | param { 2540 | lr_mult: 8.0 2541 | decay_mult: 0 2542 | } 2543 | convolution_param { 2544 | num_output: 128 2545 | pad: 3 2546 | kernel_size: 7 2547 | weight_filler { 2548 | type: "gaussian" 2549 | std: 0.01 2550 | } 2551 | bias_filler { 2552 | type: "constant" 2553 | } 2554 | } 2555 | } 2556 | layer { 2557 | name: "Mrelu1_stage6_L1" 2558 | type: "ReLU" 2559 | bottom: "Mconv1_stage6_L1" 2560 | top: "Mconv1_stage6_L1" 2561 | } 2562 | layer { 2563 | name: "Mconv1_stage6_L2" 2564 | type: "Convolution" 2565 | bottom: "concat_stage6" 2566 | top: "Mconv1_stage6_L2" 2567 | param { 2568 | lr_mult: 4.0 2569 | decay_mult: 1 2570 | } 2571 | param { 2572 | lr_mult: 8.0 2573 | decay_mult: 0 2574 | } 2575 | convolution_param { 2576 | num_output: 128 2577 | pad: 3 2578 | kernel_size: 7 2579 | weight_filler { 2580 | type: "gaussian" 2581 | std: 0.01 2582 | } 2583 | bias_filler { 2584 | type: "constant" 2585 | } 2586 | } 2587 | } 2588 | layer { 2589 | name: "Mrelu1_stage6_L2" 2590 | type: "ReLU" 2591 | bottom: "Mconv1_stage6_L2" 2592 | top: "Mconv1_stage6_L2" 2593 | } 2594 | layer { 2595 | name: "Mconv2_stage6_L1" 2596 | type: "Convolution" 2597 | bottom: "Mconv1_stage6_L1" 2598 | top: "Mconv2_stage6_L1" 2599 | param { 2600 | lr_mult: 4.0 2601 | decay_mult: 1 2602 | } 2603 | param { 2604 | lr_mult: 8.0 2605 | decay_mult: 0 2606 | } 2607 | convolution_param { 2608 | num_output: 128 2609 | pad: 3 2610 | kernel_size: 7 2611 | weight_filler { 2612 | type: "gaussian" 2613 | std: 0.01 2614 | } 2615 | bias_filler { 2616 | type: "constant" 2617 | } 2618 | } 2619 | } 2620 | layer { 2621 | name: "Mrelu2_stage6_L1" 2622 | type: "ReLU" 2623 | bottom: "Mconv2_stage6_L1" 2624 | top: "Mconv2_stage6_L1" 2625 | } 2626 | layer { 2627 | name: "Mconv2_stage6_L2" 2628 | type: "Convolution" 2629 | bottom: "Mconv1_stage6_L2" 2630 | top: "Mconv2_stage6_L2" 2631 | param { 2632 | lr_mult: 4.0 2633 | decay_mult: 1 2634 | } 2635 | param { 2636 | lr_mult: 8.0 2637 | decay_mult: 0 2638 | } 2639 | convolution_param { 2640 | num_output: 128 2641 | pad: 3 2642 | kernel_size: 7 2643 | weight_filler { 2644 | type: "gaussian" 2645 | std: 0.01 2646 | } 2647 | bias_filler { 2648 | type: "constant" 2649 | } 2650 | } 2651 | } 2652 | layer { 2653 | name: "Mrelu2_stage6_L2" 2654 | type: "ReLU" 2655 | bottom: "Mconv2_stage6_L2" 2656 | top: "Mconv2_stage6_L2" 2657 | } 2658 | layer { 2659 | name: "Mconv3_stage6_L1" 2660 | type: "Convolution" 2661 | bottom: "Mconv2_stage6_L1" 2662 | top: "Mconv3_stage6_L1" 2663 | param { 2664 | lr_mult: 4.0 2665 | decay_mult: 1 2666 | } 2667 | param { 2668 | lr_mult: 8.0 2669 | decay_mult: 0 2670 | } 2671 | convolution_param { 2672 | num_output: 128 2673 | pad: 3 2674 | kernel_size: 7 2675 | weight_filler { 2676 | type: "gaussian" 2677 | std: 0.01 2678 | } 2679 | bias_filler { 2680 | type: "constant" 2681 | } 2682 | } 2683 | } 2684 | layer { 2685 | name: "Mrelu3_stage6_L1" 2686 | type: "ReLU" 2687 | bottom: "Mconv3_stage6_L1" 2688 | top: "Mconv3_stage6_L1" 2689 | } 2690 | layer { 2691 | name: "Mconv3_stage6_L2" 2692 | type: "Convolution" 2693 | bottom: "Mconv2_stage6_L2" 2694 | top: "Mconv3_stage6_L2" 2695 | param { 2696 | lr_mult: 4.0 2697 | decay_mult: 1 2698 | } 2699 | param { 2700 | lr_mult: 8.0 2701 | decay_mult: 0 2702 | } 2703 | convolution_param { 2704 | num_output: 128 2705 | pad: 3 2706 | kernel_size: 7 2707 | weight_filler { 2708 | type: "gaussian" 2709 | std: 0.01 2710 | } 2711 | bias_filler { 2712 | type: "constant" 2713 | } 2714 | } 2715 | } 2716 | layer { 2717 | name: "Mrelu3_stage6_L2" 2718 | type: "ReLU" 2719 | bottom: "Mconv3_stage6_L2" 2720 | top: "Mconv3_stage6_L2" 2721 | } 2722 | layer { 2723 | name: "Mconv4_stage6_L1" 2724 | type: "Convolution" 2725 | bottom: "Mconv3_stage6_L1" 2726 | top: "Mconv4_stage6_L1" 2727 | param { 2728 | lr_mult: 4.0 2729 | decay_mult: 1 2730 | } 2731 | param { 2732 | lr_mult: 8.0 2733 | decay_mult: 0 2734 | } 2735 | convolution_param { 2736 | num_output: 128 2737 | pad: 3 2738 | kernel_size: 7 2739 | weight_filler { 2740 | type: "gaussian" 2741 | std: 0.01 2742 | } 2743 | bias_filler { 2744 | type: "constant" 2745 | } 2746 | } 2747 | } 2748 | layer { 2749 | name: "Mrelu4_stage6_L1" 2750 | type: "ReLU" 2751 | bottom: "Mconv4_stage6_L1" 2752 | top: "Mconv4_stage6_L1" 2753 | } 2754 | layer { 2755 | name: "Mconv4_stage6_L2" 2756 | type: "Convolution" 2757 | bottom: "Mconv3_stage6_L2" 2758 | top: "Mconv4_stage6_L2" 2759 | param { 2760 | lr_mult: 4.0 2761 | decay_mult: 1 2762 | } 2763 | param { 2764 | lr_mult: 8.0 2765 | decay_mult: 0 2766 | } 2767 | convolution_param { 2768 | num_output: 128 2769 | pad: 3 2770 | kernel_size: 7 2771 | weight_filler { 2772 | type: "gaussian" 2773 | std: 0.01 2774 | } 2775 | bias_filler { 2776 | type: "constant" 2777 | } 2778 | } 2779 | } 2780 | layer { 2781 | name: "Mrelu4_stage6_L2" 2782 | type: "ReLU" 2783 | bottom: "Mconv4_stage6_L2" 2784 | top: "Mconv4_stage6_L2" 2785 | } 2786 | layer { 2787 | name: "Mconv5_stage6_L1" 2788 | type: "Convolution" 2789 | bottom: "Mconv4_stage6_L1" 2790 | top: "Mconv5_stage6_L1" 2791 | param { 2792 | lr_mult: 4.0 2793 | decay_mult: 1 2794 | } 2795 | param { 2796 | lr_mult: 8.0 2797 | decay_mult: 0 2798 | } 2799 | convolution_param { 2800 | num_output: 128 2801 | pad: 3 2802 | kernel_size: 7 2803 | weight_filler { 2804 | type: "gaussian" 2805 | std: 0.01 2806 | } 2807 | bias_filler { 2808 | type: "constant" 2809 | } 2810 | } 2811 | } 2812 | layer { 2813 | name: "Mrelu5_stage6_L1" 2814 | type: "ReLU" 2815 | bottom: "Mconv5_stage6_L1" 2816 | top: "Mconv5_stage6_L1" 2817 | } 2818 | layer { 2819 | name: "Mconv5_stage6_L2" 2820 | type: "Convolution" 2821 | bottom: "Mconv4_stage6_L2" 2822 | top: "Mconv5_stage6_L2" 2823 | param { 2824 | lr_mult: 4.0 2825 | decay_mult: 1 2826 | } 2827 | param { 2828 | lr_mult: 8.0 2829 | decay_mult: 0 2830 | } 2831 | convolution_param { 2832 | num_output: 128 2833 | pad: 3 2834 | kernel_size: 7 2835 | weight_filler { 2836 | type: "gaussian" 2837 | std: 0.01 2838 | } 2839 | bias_filler { 2840 | type: "constant" 2841 | } 2842 | } 2843 | } 2844 | layer { 2845 | name: "Mrelu5_stage6_L2" 2846 | type: "ReLU" 2847 | bottom: "Mconv5_stage6_L2" 2848 | top: "Mconv5_stage6_L2" 2849 | } 2850 | layer { 2851 | name: "Mconv6_stage6_L1" 2852 | type: "Convolution" 2853 | bottom: "Mconv5_stage6_L1" 2854 | top: "Mconv6_stage6_L1" 2855 | param { 2856 | lr_mult: 4.0 2857 | decay_mult: 1 2858 | } 2859 | param { 2860 | lr_mult: 8.0 2861 | decay_mult: 0 2862 | } 2863 | convolution_param { 2864 | num_output: 128 2865 | pad: 0 2866 | kernel_size: 1 2867 | weight_filler { 2868 | type: "gaussian" 2869 | std: 0.01 2870 | } 2871 | bias_filler { 2872 | type: "constant" 2873 | } 2874 | } 2875 | } 2876 | layer { 2877 | name: "Mrelu6_stage6_L1" 2878 | type: "ReLU" 2879 | bottom: "Mconv6_stage6_L1" 2880 | top: "Mconv6_stage6_L1" 2881 | } 2882 | layer { 2883 | name: "Mconv6_stage6_L2" 2884 | type: "Convolution" 2885 | bottom: "Mconv5_stage6_L2" 2886 | top: "Mconv6_stage6_L2" 2887 | param { 2888 | lr_mult: 4.0 2889 | decay_mult: 1 2890 | } 2891 | param { 2892 | lr_mult: 8.0 2893 | decay_mult: 0 2894 | } 2895 | convolution_param { 2896 | num_output: 128 2897 | pad: 0 2898 | kernel_size: 1 2899 | weight_filler { 2900 | type: "gaussian" 2901 | std: 0.01 2902 | } 2903 | bias_filler { 2904 | type: "constant" 2905 | } 2906 | } 2907 | } 2908 | layer { 2909 | name: "Mrelu6_stage6_L2" 2910 | type: "ReLU" 2911 | bottom: "Mconv6_stage6_L2" 2912 | top: "Mconv6_stage6_L2" 2913 | } 2914 | layer { 2915 | name: "Mconv7_stage6_L1" 2916 | type: "Convolution" 2917 | bottom: "Mconv6_stage6_L1" 2918 | top: "Mconv7_stage6_L1" 2919 | param { 2920 | lr_mult: 4.0 2921 | decay_mult: 1 2922 | } 2923 | param { 2924 | lr_mult: 8.0 2925 | decay_mult: 0 2926 | } 2927 | convolution_param { 2928 | num_output: 38 2929 | pad: 0 2930 | kernel_size: 1 2931 | weight_filler { 2932 | type: "gaussian" 2933 | std: 0.01 2934 | } 2935 | bias_filler { 2936 | type: "constant" 2937 | } 2938 | } 2939 | } 2940 | layer { 2941 | name: "Mconv7_stage6_L2" 2942 | type: "Convolution" 2943 | bottom: "Mconv6_stage6_L2" 2944 | top: "Mconv7_stage6_L2" 2945 | param { 2946 | lr_mult: 4.0 2947 | decay_mult: 1 2948 | } 2949 | param { 2950 | lr_mult: 8.0 2951 | decay_mult: 0 2952 | } 2953 | convolution_param { 2954 | num_output: 19 2955 | pad: 0 2956 | kernel_size: 1 2957 | weight_filler { 2958 | type: "gaussian" 2959 | std: 0.01 2960 | } 2961 | bias_filler { 2962 | type: "constant" 2963 | } 2964 | } 2965 | } 2966 | layer { 2967 | name: "concat_stage7" 2968 | type: "Concat" 2969 | bottom: "Mconv7_stage6_L2" 2970 | bottom: "Mconv7_stage6_L1" 2971 | # top: "concat_stage7" 2972 | top: "net_output" 2973 | concat_param { 2974 | axis: 1 2975 | } 2976 | } 2977 | -------------------------------------------------------------------------------- /net/__init__.py: -------------------------------------------------------------------------------- 1 | from . import utils -------------------------------------------------------------------------------- /net/st_gcn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | from net.utils.tgcn import ConvTemporalGraphical 7 | from net.utils.graph import Graph 8 | 9 | class Model(nn.Module): 10 | r"""Spatial temporal graph convolutional networks. 11 | 12 | Args: 13 | in_channels (int): Number of channels in the input data 14 | num_class (int): Number of classes for the classification task 15 | graph_args (dict): The arguments for building the graph 16 | edge_importance_weighting (bool): If ``True``, adds a learnable 17 | importance weighting to the edges of the graph 18 | **kwargs (optional): Other parameters for graph convolution units 19 | 20 | Shape: 21 | - Input: :math:`(N, in_channels, T_{in}, V_{in}, M_{in})` 22 | - Output: :math:`(N, num_class)` where 23 | :math:`N` is a batch size, 24 | :math:`T_{in}` is a length of input sequence, 25 | :math:`V_{in}` is the number of graph nodes, 26 | :math:`M_{in}` is the number of instance in a frame. 27 | """ 28 | 29 | def __init__(self, in_channels, num_class, graph_args, 30 | edge_importance_weighting, **kwargs): 31 | super().__init__() 32 | 33 | # load graph 34 | self.graph = Graph(**graph_args) 35 | A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False) 36 | self.register_buffer('A', A) 37 | 38 | # build networks 39 | spatial_kernel_size = A.size(0) 40 | temporal_kernel_size = 9 41 | kernel_size = (temporal_kernel_size, spatial_kernel_size) 42 | self.data_bn = nn.BatchNorm1d(in_channels * A.size(1)) 43 | kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'} 44 | self.st_gcn_networks = nn.ModuleList(( 45 | st_gcn(in_channels, 64, kernel_size, 1, residual=False, **kwargs0), 46 | st_gcn(64, 64, kernel_size, 1, **kwargs), 47 | st_gcn(64, 64, kernel_size, 1, **kwargs), 48 | st_gcn(64, 64, kernel_size, 1, **kwargs), 49 | st_gcn(64, 128, kernel_size, 2, **kwargs), 50 | st_gcn(128, 128, kernel_size, 1, **kwargs), 51 | st_gcn(128, 128, kernel_size, 1, **kwargs), 52 | st_gcn(128, 256, kernel_size, 2, **kwargs), 53 | st_gcn(256, 256, kernel_size, 1, **kwargs), 54 | st_gcn(256, 256, kernel_size, 1, **kwargs), 55 | )) 56 | 57 | # initialize parameters for edge importance weighting 58 | if edge_importance_weighting: 59 | self.edge_importance = nn.ParameterList([ 60 | nn.Parameter(torch.ones(self.A.size())) 61 | for i in self.st_gcn_networks 62 | ]) 63 | else: 64 | self.edge_importance = [1] * len(self.st_gcn_networks) 65 | 66 | # fcn for prediction 67 | self.fcn = nn.Conv2d(256, num_class, kernel_size=1) 68 | 69 | def forward(self, x): 70 | 71 | # data normalization 72 | N, C, T, V, M = x.size() 73 | x = x.permute(0, 4, 3, 1, 2).contiguous() 74 | x = x.view(N * M, V * C, T) 75 | x = self.data_bn(x) 76 | x = x.view(N, M, V, C, T) 77 | x = x.permute(0, 1, 3, 4, 2).contiguous() 78 | x = x.view(N * M, C, T, V) 79 | 80 | # forwad 81 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance): 82 | x, _ = gcn(x, self.A * importance) 83 | 84 | # global pooling 85 | x = F.avg_pool2d(x, x.size()[2:]) 86 | x = x.view(N, M, -1, 1, 1).mean(dim=1) 87 | 88 | # prediction 89 | x = self.fcn(x) 90 | x = x.view(x.size(0), -1) 91 | 92 | return x 93 | 94 | def extract_feature(self, x): 95 | 96 | # data normalization 97 | N, C, T, V, M = x.size() 98 | x = x.permute(0, 4, 3, 1, 2).contiguous() 99 | x = x.view(N * M, V * C, T) 100 | x = self.data_bn(x) 101 | x = x.view(N, M, V, C, T) 102 | x = x.permute(0, 1, 3, 4, 2).contiguous() 103 | x = x.view(N * M, C, T, V) 104 | 105 | # forwad 106 | for gcn, importance in zip(self.st_gcn_networks, self.edge_importance): 107 | x, _ = gcn(x, self.A * importance) 108 | 109 | _, c, t, v = x.size() 110 | feature = x.view(N, M, c, t, v).permute(0, 2, 3, 4, 1) 111 | 112 | # prediction 113 | x = self.fcn(x) 114 | output = x.view(N, M, -1, t, v).permute(0, 2, 3, 4, 1) 115 | 116 | return output, feature 117 | 118 | class st_gcn(nn.Module): 119 | r"""Applies a spatial temporal graph convolution over an input graph sequence. 120 | 121 | Args: 122 | in_channels (int): Number of channels in the input sequence data 123 | out_channels (int): Number of channels produced by the convolution 124 | kernel_size (tuple): Size of the temporal convolving kernel and graph convolving kernel 125 | stride (int, optional): Stride of the temporal convolution. Default: 1 126 | dropout (int, optional): Dropout rate of the final output. Default: 0 127 | residual (bool, optional): If ``True``, applies a residual mechanism. Default: ``True`` 128 | 129 | Shape: 130 | - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format 131 | - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format 132 | - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format 133 | - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format 134 | 135 | where 136 | :math:`N` is a batch size, 137 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, 138 | :math:`T_{in}/T_{out}` is a length of input/output sequence, 139 | :math:`V` is the number of graph nodes. 140 | 141 | """ 142 | 143 | def __init__(self, 144 | in_channels, 145 | out_channels, 146 | kernel_size, 147 | stride=1, 148 | dropout=0, 149 | residual=True): 150 | super().__init__() 151 | 152 | assert len(kernel_size) == 2 153 | assert kernel_size[0] % 2 == 1 154 | padding = ((kernel_size[0] - 1) // 2, 0) 155 | 156 | self.gcn = ConvTemporalGraphical(in_channels, out_channels, 157 | kernel_size[1]) 158 | 159 | self.tcn = nn.Sequential( 160 | nn.BatchNorm2d(out_channels), 161 | nn.ReLU(inplace=True), 162 | nn.Conv2d( 163 | out_channels, 164 | out_channels, 165 | (kernel_size[0], 1), 166 | (stride, 1), 167 | padding, 168 | ), 169 | nn.BatchNorm2d(out_channels), 170 | nn.Dropout(dropout, inplace=True), 171 | ) 172 | 173 | if not residual: 174 | self.residual = lambda x: 0 175 | 176 | elif (in_channels == out_channels) and (stride == 1): 177 | self.residual = lambda x: x 178 | 179 | else: 180 | self.residual = nn.Sequential( 181 | nn.Conv2d( 182 | in_channels, 183 | out_channels, 184 | kernel_size=1, 185 | stride=(stride, 1)), 186 | nn.BatchNorm2d(out_channels), 187 | ) 188 | 189 | self.relu = nn.ReLU(inplace=True) 190 | 191 | def forward(self, x, A): 192 | 193 | res = self.residual(x) 194 | x, A = self.gcn(x, A) 195 | x = self.tcn(x) + res 196 | 197 | return self.relu(x), A -------------------------------------------------------------------------------- /net/st_gcn_twostream.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.autograd import Variable 5 | 6 | from net.utils.tgcn import ConvTemporalGraphical 7 | from net.utils.graph import Graph 8 | 9 | from .st_gcn import Model as ST_GCN 10 | 11 | class Model(nn.Module): 12 | 13 | def __init__(self, *args, **kwargs): 14 | super().__init__() 15 | 16 | self.origin_stream = ST_GCN(*args, **kwargs) 17 | self.motion_stream = ST_GCN(*args, **kwargs) 18 | 19 | def forward(self, x): 20 | N, C, T, V, M = x.size() 21 | m = torch.cat((torch.cuda.FloatTensor(N, C, 1, V, M).zero_(), 22 | x[:, :, 1:-1] - 0.5 * x[:, :, 2:] - 0.5 * x[:, :, :-2], 23 | torch.cuda.FloatTensor(N, C, 1, V, M).zero_()), 2) 24 | 25 | res = self.origin_stream(x) + self.motion_stream(m) 26 | return res -------------------------------------------------------------------------------- /net/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/net/utils/__init__.py -------------------------------------------------------------------------------- /net/utils/graph.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Graph(): 4 | """ The Graph to model the skeletons extracted by the openpose 5 | 6 | Args: 7 | strategy (string): must be one of the follow candidates 8 | - uniform: Uniform Labeling 9 | - distance: Distance Partitioning 10 | - spatial: Spatial Configuration 11 | For more information, please refer to the section 'Partition Strategies' 12 | in our paper (https://arxiv.org/abs/1801.07455). 13 | 14 | layout (string): must be one of the follow candidates 15 | - openpose: Is consists of 18 joints. For more information, please 16 | refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose#output 17 | - ntu-rgb+d: Is consists of 25 joints. For more information, please 18 | refer to https://github.com/shahroudy/NTURGB-D 19 | 20 | max_hop (int): the maximal distance between two connected nodes 21 | dilation (int): controls the spacing between the kernel points 22 | 23 | """ 24 | 25 | def __init__(self, 26 | layout='openpose', 27 | strategy='uniform', 28 | max_hop=1, 29 | dilation=1): 30 | self.max_hop = max_hop 31 | self.dilation = dilation 32 | 33 | self.get_edge(layout) 34 | self.hop_dis = get_hop_distance( 35 | self.num_node, self.edge, max_hop=max_hop) 36 | self.get_adjacency(strategy) 37 | 38 | def __str__(self): 39 | return self.A 40 | 41 | def get_edge(self, layout): 42 | if layout == 'openpose': 43 | self.num_node = 18 44 | self_link = [(i, i) for i in range(self.num_node)] 45 | neighbor_link = [(4, 3), (3, 2), (7, 6), (6, 5), (13, 12), (12, 46 | 11), 47 | (10, 9), (9, 8), (11, 5), (8, 2), (5, 1), (2, 1), 48 | (0, 1), (15, 0), (14, 0), (17, 15), (16, 14)] 49 | self.edge = self_link + neighbor_link 50 | self.center = 1 51 | elif layout == 'ntu-rgb+d': 52 | self.num_node = 25 53 | self_link = [(i, i) for i in range(self.num_node)] 54 | neighbor_1base = [(1, 2), (2, 21), (3, 21), (4, 3), (5, 21), 55 | (6, 5), (7, 6), (8, 7), (9, 21), (10, 9), 56 | (11, 10), (12, 11), (13, 1), (14, 13), (15, 14), 57 | (16, 15), (17, 1), (18, 17), (19, 18), (20, 19), 58 | (22, 23), (23, 8), (24, 25), (25, 12)] 59 | neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base] 60 | self.edge = self_link + neighbor_link 61 | self.center = 21 - 1 62 | elif layout == 'ntu_edge': 63 | self.num_node = 24 64 | self_link = [(i, i) for i in range(self.num_node)] 65 | neighbor_1base = [(1, 2), (3, 2), (4, 3), (5, 2), (6, 5), (7, 6), 66 | (8, 7), (9, 2), (10, 9), (11, 10), (12, 11), 67 | (13, 1), (14, 13), (15, 14), (16, 15), (17, 1), 68 | (18, 17), (19, 18), (20, 19), (21, 22), (22, 8), 69 | (23, 24), (24, 12)] 70 | neighbor_link = [(i - 1, j - 1) for (i, j) in neighbor_1base] 71 | self.edge = self_link + neighbor_link 72 | self.center = 2 73 | # elif layout=='customer settings' 74 | # pass 75 | else: 76 | raise ValueError("Do Not Exist This Layout.") 77 | 78 | def get_adjacency(self, strategy): 79 | valid_hop = range(0, self.max_hop + 1, self.dilation) 80 | adjacency = np.zeros((self.num_node, self.num_node)) 81 | for hop in valid_hop: 82 | adjacency[self.hop_dis == hop] = 1 83 | normalize_adjacency = normalize_digraph(adjacency) 84 | 85 | if strategy == 'uniform': 86 | A = np.zeros((1, self.num_node, self.num_node)) 87 | A[0] = normalize_adjacency 88 | self.A = A 89 | elif strategy == 'distance': 90 | A = np.zeros((len(valid_hop), self.num_node, self.num_node)) 91 | for i, hop in enumerate(valid_hop): 92 | A[i][self.hop_dis == hop] = normalize_adjacency[self.hop_dis == 93 | hop] 94 | self.A = A 95 | elif strategy == 'spatial': 96 | A = [] 97 | for hop in valid_hop: 98 | a_root = np.zeros((self.num_node, self.num_node)) 99 | a_close = np.zeros((self.num_node, self.num_node)) 100 | a_further = np.zeros((self.num_node, self.num_node)) 101 | for i in range(self.num_node): 102 | for j in range(self.num_node): 103 | if self.hop_dis[j, i] == hop: 104 | if self.hop_dis[j, self.center] == self.hop_dis[ 105 | i, self.center]: 106 | a_root[j, i] = normalize_adjacency[j, i] 107 | elif self.hop_dis[j, self. 108 | center] > self.hop_dis[i, self. 109 | center]: 110 | a_close[j, i] = normalize_adjacency[j, i] 111 | else: 112 | a_further[j, i] = normalize_adjacency[j, i] 113 | if hop == 0: 114 | A.append(a_root) 115 | else: 116 | A.append(a_root + a_close) 117 | A.append(a_further) 118 | A = np.stack(A) 119 | self.A = A 120 | else: 121 | raise ValueError("Do Not Exist This Strategy") 122 | 123 | 124 | def get_hop_distance(num_node, edge, max_hop=1): 125 | A = np.zeros((num_node, num_node)) 126 | for i, j in edge: 127 | A[j, i] = 1 128 | A[i, j] = 1 129 | 130 | # compute hop steps 131 | hop_dis = np.zeros((num_node, num_node)) + np.inf 132 | transfer_mat = [np.linalg.matrix_power(A, d) for d in range(max_hop + 1)] 133 | arrive_mat = (np.stack(transfer_mat) > 0) 134 | for d in range(max_hop, -1, -1): 135 | hop_dis[arrive_mat[d]] = d 136 | return hop_dis 137 | 138 | 139 | def normalize_digraph(A): 140 | Dl = np.sum(A, 0) 141 | num_node = A.shape[0] 142 | Dn = np.zeros((num_node, num_node)) 143 | for i in range(num_node): 144 | if Dl[i] > 0: 145 | Dn[i, i] = Dl[i]**(-1) 146 | AD = np.dot(A, Dn) 147 | return AD 148 | 149 | 150 | def normalize_undigraph(A): 151 | Dl = np.sum(A, 0) 152 | num_node = A.shape[0] 153 | Dn = np.zeros((num_node, num_node)) 154 | for i in range(num_node): 155 | if Dl[i] > 0: 156 | Dn[i, i] = Dl[i]**(-0.5) 157 | DAD = np.dot(np.dot(Dn, A), Dn) 158 | return DAD -------------------------------------------------------------------------------- /net/utils/tgcn.py: -------------------------------------------------------------------------------- 1 | # The based unit of graph convolutional networks. 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | class ConvTemporalGraphical(nn.Module): 7 | 8 | r"""The basic module for applying a graph convolution. 9 | 10 | Args: 11 | in_channels (int): Number of channels in the input sequence data 12 | out_channels (int): Number of channels produced by the convolution 13 | kernel_size (int): Size of the graph convolving kernel 14 | t_kernel_size (int): Size of the temporal convolving kernel 15 | t_stride (int, optional): Stride of the temporal convolution. Default: 1 16 | t_padding (int, optional): Temporal zero-padding added to both sides of 17 | the input. Default: 0 18 | t_dilation (int, optional): Spacing between temporal kernel elements. 19 | Default: 1 20 | bias (bool, optional): If ``True``, adds a learnable bias to the output. 21 | Default: ``True`` 22 | 23 | Shape: 24 | - Input[0]: Input graph sequence in :math:`(N, in_channels, T_{in}, V)` format 25 | - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format 26 | - Output[0]: Outpu graph sequence in :math:`(N, out_channels, T_{out}, V)` format 27 | - Output[1]: Graph adjacency matrix for output data in :math:`(K, V, V)` format 28 | 29 | where 30 | :math:`N` is a batch size, 31 | :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`, 32 | :math:`T_{in}/T_{out}` is a length of input/output sequence, 33 | :math:`V` is the number of graph nodes. 34 | """ 35 | 36 | def __init__(self, 37 | in_channels, 38 | out_channels, 39 | kernel_size, 40 | t_kernel_size=1, 41 | t_stride=1, 42 | t_padding=0, 43 | t_dilation=1, 44 | bias=True): 45 | super().__init__() 46 | 47 | self.kernel_size = kernel_size 48 | self.conv = nn.Conv2d( 49 | in_channels, 50 | out_channels * kernel_size, 51 | kernel_size=(t_kernel_size, 1), 52 | padding=(t_padding, 0), 53 | stride=(t_stride, 1), 54 | dilation=(t_dilation, 1), 55 | bias=bias) 56 | 57 | def forward(self, x, A): 58 | assert A.size(0) == self.kernel_size 59 | 60 | x = self.conv(x) 61 | 62 | n, kc, t, v = x.size() 63 | x = x.view(n, self.kernel_size, kc//self.kernel_size, t, v) 64 | x = torch.einsum('nkctv,kvw->nctw', (x, A)) 65 | 66 | return x.contiguous(), A 67 | -------------------------------------------------------------------------------- /processor/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/processor/__init__.py -------------------------------------------------------------------------------- /processor/demo_offline.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | import argparse 5 | import json 6 | import shutil 7 | import time 8 | 9 | import numpy as np 10 | import torch 11 | import skvideo.io 12 | 13 | from .io import IO 14 | import tools 15 | import tools.utils as utils 16 | 17 | import cv2 18 | 19 | class DemoOffline(IO): 20 | 21 | def start(self): 22 | 23 | # initiate 24 | label_name_path = './resource/kinetics_skeleton/label_name.txt' 25 | with open(label_name_path) as f: 26 | label_name = f.readlines() 27 | label_name = [line.rstrip() for line in label_name] 28 | self.label_name = label_name 29 | 30 | # pose estimation 31 | video, data_numpy = self.pose_estimation() 32 | 33 | # action recognition 34 | data = torch.from_numpy(data_numpy) 35 | data = data.unsqueeze(0) 36 | data = data.float().to(self.dev).detach() # (1, channel, frame, joint, person) 37 | 38 | # model predict 39 | voting_label_name, video_label_name, output, intensity = self.predict(data) 40 | 41 | # render the video 42 | images = self.render_video(data_numpy, voting_label_name, 43 | video_label_name, intensity, video) 44 | 45 | # visualize 46 | for image in images: 47 | image = image.astype(np.uint8) 48 | cv2.imshow("ST-GCN", image) 49 | if cv2.waitKey(1) & 0xFF == ord('q'): 50 | break 51 | 52 | def predict(self, data): 53 | # forward 54 | output, feature = self.model.extract_feature(data) 55 | output = output[0] 56 | feature = feature[0] 57 | intensity = (feature*feature).sum(dim=0)**0.5 58 | intensity = intensity.cpu().detach().numpy() 59 | 60 | # get result 61 | # classification result of the full sequence 62 | voting_label = output.sum(dim=3).sum( 63 | dim=2).sum(dim=1).argmax(dim=0) 64 | voting_label_name = self.label_name[voting_label] 65 | # classification result for each person of the latest frame 66 | num_person = data.size(4) 67 | latest_frame_label = [output[:, :, :, m].sum( 68 | dim=2)[:, -1].argmax(dim=0) for m in range(num_person)] 69 | latest_frame_label_name = [self.label_name[l] 70 | for l in latest_frame_label] 71 | 72 | num_person = output.size(3) 73 | num_frame = output.size(1) 74 | video_label_name = list() 75 | for t in range(num_frame): 76 | frame_label_name = list() 77 | for m in range(num_person): 78 | person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0) 79 | person_label_name = self.label_name[person_label] 80 | frame_label_name.append(person_label_name) 81 | video_label_name.append(frame_label_name) 82 | return voting_label_name, video_label_name, output, intensity 83 | 84 | def render_video(self, data_numpy, voting_label_name, video_label_name, intensity, video): 85 | images = utils.visualization.stgcn_visualize( 86 | data_numpy, 87 | self.model.graph.edge, 88 | intensity, video, 89 | voting_label_name, 90 | video_label_name, 91 | self.arg.height) 92 | return images 93 | 94 | def pose_estimation(self): 95 | # load openpose python api 96 | if self.arg.openpose is not None: 97 | sys.path.append('{}/python'.format(self.arg.openpose)) 98 | sys.path.append('{}/build/python'.format(self.arg.openpose)) 99 | try: 100 | from openpose import pyopenpose as op 101 | except: 102 | print('Can not find Openpose Python API.') 103 | return 104 | 105 | 106 | video_name = self.arg.video.split('/')[-1].split('.')[0] 107 | 108 | # initiate 109 | opWrapper = op.WrapperPython() 110 | params = dict(model_folder='./models', model_pose='COCO') 111 | opWrapper.configure(params) 112 | opWrapper.start() 113 | self.model.eval() 114 | video_capture = cv2.VideoCapture(self.arg.video) 115 | video_length = int(video_capture.get(cv2.CAP_PROP_FRAME_COUNT)) 116 | pose_tracker = naive_pose_tracker(data_frame=video_length) 117 | 118 | # pose estimation 119 | start_time = time.time() 120 | frame_index = 0 121 | video = list() 122 | while(True): 123 | 124 | # get image 125 | ret, orig_image = video_capture.read() 126 | if orig_image is None: 127 | break 128 | source_H, source_W, _ = orig_image.shape 129 | orig_image = cv2.resize( 130 | orig_image, (256 * source_W // source_H, 256)) 131 | H, W, _ = orig_image.shape 132 | video.append(orig_image) 133 | 134 | # pose estimation 135 | datum = op.Datum() 136 | datum.cvInputData = orig_image 137 | opWrapper.emplaceAndPop([datum]) 138 | multi_pose = datum.poseKeypoints # (num_person, num_joint, 3) 139 | if len(multi_pose.shape) != 3: 140 | continue 141 | 142 | # normalization 143 | multi_pose[:, :, 0] = multi_pose[:, :, 0]/W 144 | multi_pose[:, :, 1] = multi_pose[:, :, 1]/H 145 | multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5 146 | multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0 147 | multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0 148 | 149 | # pose tracking 150 | pose_tracker.update(multi_pose, frame_index) 151 | frame_index += 1 152 | 153 | print('Pose estimation ({}/{}).'.format(frame_index, video_length)) 154 | 155 | data_numpy = pose_tracker.get_skeleton_sequence() 156 | return video, data_numpy 157 | 158 | @staticmethod 159 | def get_parser(add_help=False): 160 | 161 | # parameter priority: command line > config > default 162 | parent_parser = IO.get_parser(add_help=False) 163 | parser = argparse.ArgumentParser( 164 | add_help=add_help, 165 | parents=[parent_parser], 166 | description='Demo for Spatial Temporal Graph Convolution Network') 167 | 168 | # region arguments yapf: disable 169 | parser.add_argument('--video', 170 | default='./resource/media/skateboarding.mp4', 171 | help='Path to video') 172 | parser.add_argument('--openpose', 173 | default=None, 174 | help='Path to openpose') 175 | parser.add_argument('--model_input_frame', 176 | default=128, 177 | type=int) 178 | parser.add_argument('--model_fps', 179 | default=30, 180 | type=int) 181 | parser.add_argument('--height', 182 | default=1080, 183 | type=int, 184 | help='height of frame in the output video.') 185 | parser.set_defaults( 186 | config='./config/st_gcn/kinetics-skeleton/demo_offline.yaml') 187 | parser.set_defaults(print_log=False) 188 | # endregion yapf: enable 189 | 190 | return parser 191 | 192 | class naive_pose_tracker(): 193 | """ A simple tracker for recording person poses and generating skeleton sequences. 194 | For actual occasion, I recommend you to implement a robuster tracker. 195 | Pull-requests are welcomed. 196 | """ 197 | 198 | def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf): 199 | self.data_frame = data_frame 200 | self.num_joint = num_joint 201 | self.max_frame_dis = max_frame_dis 202 | self.latest_frame = 0 203 | self.trace_info = list() 204 | 205 | def update(self, multi_pose, current_frame): 206 | # multi_pose.shape: (num_person, num_joint, 3) 207 | 208 | if current_frame <= self.latest_frame: 209 | return 210 | 211 | if len(multi_pose.shape) != 3: 212 | return 213 | 214 | score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0) 215 | for p in multi_pose[score_order]: 216 | 217 | # match existing traces 218 | matching_trace = None 219 | matching_dis = None 220 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info): 221 | # trace.shape: (num_frame, num_joint, 3) 222 | if current_frame <= latest_frame: 223 | continue 224 | mean_dis, is_close = self.get_dis(trace, p) 225 | if is_close: 226 | if matching_trace is None: 227 | matching_trace = trace_index 228 | matching_dis = mean_dis 229 | elif matching_dis > mean_dis: 230 | matching_trace = trace_index 231 | matching_dis = mean_dis 232 | 233 | # update trace information 234 | if matching_trace is not None: 235 | trace, latest_frame = self.trace_info[matching_trace] 236 | 237 | # padding zero if the trace is fractured 238 | pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero' 239 | pad = current_frame-latest_frame-1 240 | new_trace = self.cat_pose(trace, p, pad, pad_mode) 241 | self.trace_info[matching_trace] = (new_trace, current_frame) 242 | 243 | else: 244 | new_trace = np.array([p]) 245 | self.trace_info.append((new_trace, current_frame)) 246 | 247 | self.latest_frame = current_frame 248 | 249 | def get_skeleton_sequence(self): 250 | 251 | # remove old traces 252 | valid_trace_index = [] 253 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info): 254 | if self.latest_frame - latest_frame < self.data_frame: 255 | valid_trace_index.append(trace_index) 256 | self.trace_info = [self.trace_info[v] for v in valid_trace_index] 257 | 258 | num_trace = len(self.trace_info) 259 | if num_trace == 0: 260 | return None 261 | 262 | data = np.zeros((3, self.data_frame, self.num_joint, num_trace)) 263 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info): 264 | end = self.data_frame - (self.latest_frame - latest_frame) 265 | d = trace[-end:] 266 | beg = end - len(d) 267 | data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1)) 268 | 269 | return data 270 | 271 | # concatenate pose to a trace 272 | def cat_pose(self, trace, pose, pad, pad_mode): 273 | # trace.shape: (num_frame, num_joint, 3) 274 | num_joint = pose.shape[0] 275 | num_channel = pose.shape[1] 276 | if pad != 0: 277 | if pad_mode == 'zero': 278 | trace = np.concatenate( 279 | (trace, np.zeros((pad, num_joint, 3))), 0) 280 | elif pad_mode == 'interp': 281 | last_pose = trace[-1] 282 | coeff = [(p+1)/(pad+1) for p in range(pad)] 283 | interp_pose = [(1-c)*last_pose + c*pose for c in coeff] 284 | trace = np.concatenate((trace, interp_pose), 0) 285 | new_trace = np.concatenate((trace, [pose]), 0) 286 | return new_trace 287 | 288 | # calculate the distance between a existing trace and the input pose 289 | 290 | def get_dis(self, trace, pose): 291 | last_pose_xy = trace[-1, :, 0:2] 292 | curr_pose_xy = pose[:, 0:2] 293 | 294 | mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean() 295 | wh = last_pose_xy.max(0) - last_pose_xy.min(0) 296 | scale = (wh[0] * wh[1]) ** 0.5 + 0.0001 297 | is_close = mean_dis < scale * self.max_frame_dis 298 | return mean_dis, is_close 299 | -------------------------------------------------------------------------------- /processor/demo_old.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import argparse 4 | import json 5 | import shutil 6 | 7 | import numpy as np 8 | import torch 9 | import skvideo.io 10 | 11 | from .io import IO 12 | import tools 13 | import tools.utils as utils 14 | 15 | class Demo(IO): 16 | """ 17 | Demo for Skeleton-based Action Recgnition 18 | """ 19 | def start(self): 20 | 21 | openpose = '{}/examples/openpose/openpose.bin'.format(self.arg.openpose) 22 | video_name = self.arg.video.split('/')[-1].split('.')[0] 23 | output_snippets_dir = './data/openpose_estimation/snippets/{}'.format(video_name) 24 | output_sequence_dir = './data/openpose_estimation/data' 25 | output_sequence_path = '{}/{}.json'.format(output_sequence_dir, video_name) 26 | output_result_dir = self.arg.output_dir 27 | output_result_path = '{}/{}.mp4'.format(output_result_dir, video_name) 28 | label_name_path = './resource/kinetics_skeleton/label_name.txt' 29 | with open(label_name_path) as f: 30 | label_name = f.readlines() 31 | label_name = [line.rstrip() for line in label_name] 32 | 33 | # pose estimation 34 | openpose_args = dict( 35 | video=self.arg.video, 36 | write_json=output_snippets_dir, 37 | display=0, 38 | render_pose=0, 39 | model_pose='COCO') 40 | command_line = openpose + ' ' 41 | command_line += ' '.join(['--{} {}'.format(k, v) for k, v in openpose_args.items()]) 42 | shutil.rmtree(output_snippets_dir, ignore_errors=True) 43 | os.makedirs(output_snippets_dir) 44 | os.system(command_line) 45 | 46 | # pack openpose ouputs 47 | video = utils.video.get_video_frames(self.arg.video) 48 | height, width, _ = video[0].shape 49 | video_info = utils.openpose.json_pack( 50 | output_snippets_dir, video_name, width, height) 51 | if not os.path.exists(output_sequence_dir): 52 | os.makedirs(output_sequence_dir) 53 | with open(output_sequence_path, 'w') as outfile: 54 | json.dump(video_info, outfile) 55 | if len(video_info['data']) == 0: 56 | print('Can not find pose estimation results.') 57 | return 58 | else: 59 | print('Pose estimation complete.') 60 | 61 | # parse skeleton data 62 | pose, _ = utils.video.video_info_parsing(video_info) 63 | data = torch.from_numpy(pose) 64 | data = data.unsqueeze(0) 65 | data = data.float().to(self.dev).detach() 66 | 67 | # extract feature 68 | print('\nNetwork forwad...') 69 | self.model.eval() 70 | output, feature = self.model.extract_feature(data) 71 | output = output[0] 72 | feature = feature[0] 73 | intensity = (feature*feature).sum(dim=0)**0.5 74 | intensity = intensity.cpu().detach().numpy() 75 | label = output.sum(dim=3).sum(dim=2).sum(dim=1).argmax(dim=0) 76 | print('Prediction result: {}'.format(label_name[label])) 77 | print('Done.') 78 | 79 | # visualization 80 | print('\nVisualization...') 81 | label_sequence = output.sum(dim=2).argmax(dim=0) 82 | label_name_sequence = [[label_name[p] for p in l ]for l in label_sequence] 83 | edge = self.model.graph.edge 84 | images = utils.visualization.stgcn_visualize( 85 | pose, edge, intensity, video,label_name[label] , label_name_sequence, self.arg.height) 86 | print('Done.') 87 | 88 | # save video 89 | print('\nSaving...') 90 | if not os.path.exists(output_result_dir): 91 | os.makedirs(output_result_dir) 92 | writer = skvideo.io.FFmpegWriter(output_result_path, 93 | outputdict={'-b': '300000000'}) 94 | for img in images: 95 | writer.writeFrame(img) 96 | writer.close() 97 | print('The Demo result has been saved in {}.'.format(output_result_path)) 98 | 99 | @staticmethod 100 | def get_parser(add_help=False): 101 | 102 | # parameter priority: command line > config > default 103 | parent_parser = IO.get_parser(add_help=False) 104 | parser = argparse.ArgumentParser( 105 | add_help=add_help, 106 | parents=[parent_parser], 107 | description='Demo for Spatial Temporal Graph Convolution Network') 108 | 109 | # region arguments yapf: disable 110 | parser.add_argument('--video', 111 | default='./resource/media/skateboarding.mp4', 112 | help='Path to video') 113 | parser.add_argument('--openpose', 114 | default='3dparty/openpose/build', 115 | help='Path to openpose') 116 | parser.add_argument('--output_dir', 117 | default='./data/demo_result', 118 | help='Path to save results') 119 | parser.add_argument('--height', 120 | default=1080, 121 | type=int) 122 | parser.set_defaults(config='./config/st_gcn/kinetics-skeleton/demo_old.yaml') 123 | parser.set_defaults(print_log=False) 124 | # endregion yapf: enable 125 | 126 | return parser 127 | -------------------------------------------------------------------------------- /processor/demo_realtime.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | import sys 4 | import argparse 5 | import json 6 | import shutil 7 | import time 8 | 9 | import numpy as np 10 | import torch 11 | import skvideo.io 12 | 13 | from .io import IO 14 | import tools 15 | import tools.utils as utils 16 | 17 | import cv2 18 | 19 | class DemoRealtime(IO): 20 | """ A demo for utilizing st-gcn in the realtime action recognition. 21 | The Openpose python-api is required for this demo. 22 | 23 | Since the pre-trained model is trained on videos with 30fps, 24 | and Openpose is hard to achieve this high speed in the single GPU, 25 | if you want to predict actions by **camera** in realtime, 26 | either data interpolation or new pre-trained model 27 | is required. 28 | 29 | Pull requests are always welcome. 30 | """ 31 | 32 | def start(self): 33 | # load openpose python api 34 | if self.arg.openpose is not None: 35 | sys.path.append('{}/python'.format(self.arg.openpose)) 36 | sys.path.append('{}/build/python'.format(self.arg.openpose)) 37 | try: 38 | from openpose import pyopenpose as op 39 | except: 40 | print('Can not find Openpose Python API.') 41 | return 42 | 43 | video_name = self.arg.video.split('/')[-1].split('.')[0] 44 | label_name_path = './resource/kinetics_skeleton/label_name.txt' 45 | with open(label_name_path) as f: 46 | label_name = f.readlines() 47 | label_name = [line.rstrip() for line in label_name] 48 | self.label_name = label_name 49 | 50 | # initiate 51 | opWrapper = op.WrapperPython() 52 | params = dict(model_folder='./models', model_pose='COCO') 53 | opWrapper.configure(params) 54 | opWrapper.start() 55 | self.model.eval() 56 | pose_tracker = naive_pose_tracker() 57 | 58 | if self.arg.video == 'camera_source': 59 | video_capture = cv2.VideoCapture(0) 60 | else: 61 | video_capture = cv2.VideoCapture(self.arg.video) 62 | 63 | # start recognition 64 | start_time = time.time() 65 | frame_index = 0 66 | while(True): 67 | 68 | tic = time.time() 69 | 70 | # get image 71 | ret, orig_image = video_capture.read() 72 | if orig_image is None: 73 | break 74 | source_H, source_W, _ = orig_image.shape 75 | orig_image = cv2.resize( 76 | orig_image, (256 * source_W // source_H, 256)) 77 | H, W, _ = orig_image.shape 78 | 79 | # pose estimation 80 | datum = op.Datum() 81 | datum.cvInputData = orig_image 82 | opWrapper.emplaceAndPop([datum]) 83 | multi_pose = datum.poseKeypoints # (num_person, num_joint, 3) 84 | if len(multi_pose.shape) != 3: 85 | continue 86 | 87 | # normalization 88 | multi_pose[:, :, 0] = multi_pose[:, :, 0]/W 89 | multi_pose[:, :, 1] = multi_pose[:, :, 1]/H 90 | multi_pose[:, :, 0:2] = multi_pose[:, :, 0:2] - 0.5 91 | multi_pose[:, :, 0][multi_pose[:, :, 2] == 0] = 0 92 | multi_pose[:, :, 1][multi_pose[:, :, 2] == 0] = 0 93 | 94 | # pose tracking 95 | if self.arg.video == 'camera_source': 96 | frame_index = int((time.time() - start_time)*self.arg.fps) 97 | else: 98 | frame_index += 1 99 | pose_tracker.update(multi_pose, frame_index) 100 | data_numpy = pose_tracker.get_skeleton_sequence() 101 | data = torch.from_numpy(data_numpy) 102 | data = data.unsqueeze(0) 103 | data = data.float().to(self.dev).detach() # (1, channel, frame, joint, person) 104 | 105 | # model predict 106 | voting_label_name, video_label_name, output, intensity = self.predict( 107 | data) 108 | 109 | # visualization 110 | app_fps = 1 / (time.time() - tic) 111 | image = self.render(data_numpy, voting_label_name, 112 | video_label_name, intensity, orig_image, app_fps) 113 | cv2.imshow("ST-GCN", image) 114 | if cv2.waitKey(1) & 0xFF == ord('q'): 115 | break 116 | 117 | def predict(self, data): 118 | # forward 119 | output, feature = self.model.extract_feature(data) 120 | output = output[0] 121 | feature = feature[0] 122 | intensity = (feature*feature).sum(dim=0)**0.5 123 | intensity = intensity.cpu().detach().numpy() 124 | 125 | # get result 126 | # classification result of the full sequence 127 | voting_label = output.sum(dim=3).sum( 128 | dim=2).sum(dim=1).argmax(dim=0) 129 | voting_label_name = self.label_name[voting_label] 130 | # classification result for each person of the latest frame 131 | num_person = data.size(4) 132 | latest_frame_label = [output[:, :, :, m].sum( 133 | dim=2)[:, -1].argmax(dim=0) for m in range(num_person)] 134 | latest_frame_label_name = [self.label_name[l] 135 | for l in latest_frame_label] 136 | 137 | num_person = output.size(3) 138 | num_frame = output.size(1) 139 | video_label_name = list() 140 | for t in range(num_frame): 141 | frame_label_name = list() 142 | for m in range(num_person): 143 | person_label = output[:, t, :, m].sum(dim=1).argmax(dim=0) 144 | person_label_name = self.label_name[person_label] 145 | frame_label_name.append(person_label_name) 146 | video_label_name.append(frame_label_name) 147 | return voting_label_name, video_label_name, output, intensity 148 | 149 | def render(self, data_numpy, voting_label_name, video_label_name, intensity, orig_image, fps=0): 150 | images = utils.visualization.stgcn_visualize( 151 | data_numpy[:, [-1]], 152 | self.model.graph.edge, 153 | intensity[[-1]], [orig_image], 154 | voting_label_name, 155 | [video_label_name[-1]], 156 | self.arg.height, 157 | fps=fps) 158 | image = next(images) 159 | image = image.astype(np.uint8) 160 | return image 161 | 162 | @staticmethod 163 | def get_parser(add_help=False): 164 | 165 | # parameter priority: command line > config > default 166 | parent_parser = IO.get_parser(add_help=False) 167 | parser = argparse.ArgumentParser( 168 | add_help=add_help, 169 | parents=[parent_parser], 170 | description='Demo for Spatial Temporal Graph Convolution Network') 171 | 172 | # region arguments yapf: disable 173 | parser.add_argument('--video', 174 | default='./resource/media/skateboarding.mp4', 175 | help='Path to video') 176 | parser.add_argument('--openpose', 177 | default=None, 178 | help='Path to openpose') 179 | parser.add_argument('--model_input_frame', 180 | default=128, 181 | type=int) 182 | parser.add_argument('--model_fps', 183 | default=30, 184 | type=int) 185 | parser.add_argument('--height', 186 | default=1080, 187 | type=int, 188 | help='height of frame in the output video.') 189 | parser.set_defaults( 190 | config='./config/st_gcn/kinetics-skeleton/demo_realtime.yaml') 191 | parser.set_defaults(print_log=False) 192 | # endregion yapf: enable 193 | 194 | return parser 195 | 196 | class naive_pose_tracker(): 197 | """ A simple tracker for recording person poses and generating skeleton sequences. 198 | For actual occasion, I recommend you to implement a robuster tracker. 199 | Pull-requests are welcomed. 200 | """ 201 | 202 | def __init__(self, data_frame=128, num_joint=18, max_frame_dis=np.inf): 203 | self.data_frame = data_frame 204 | self.num_joint = num_joint 205 | self.max_frame_dis = max_frame_dis 206 | self.latest_frame = 0 207 | self.trace_info = list() 208 | 209 | def update(self, multi_pose, current_frame): 210 | # multi_pose.shape: (num_person, num_joint, 3) 211 | 212 | if current_frame <= self.latest_frame: 213 | return 214 | 215 | if len(multi_pose.shape) != 3: 216 | return 217 | 218 | score_order = (-multi_pose[:, :, 2].sum(axis=1)).argsort(axis=0) 219 | for p in multi_pose[score_order]: 220 | 221 | # match existing traces 222 | matching_trace = None 223 | matching_dis = None 224 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info): 225 | # trace.shape: (num_frame, num_joint, 3) 226 | if current_frame <= latest_frame: 227 | continue 228 | mean_dis, is_close = self.get_dis(trace, p) 229 | if is_close: 230 | if matching_trace is None: 231 | matching_trace = trace_index 232 | matching_dis = mean_dis 233 | elif matching_dis > mean_dis: 234 | matching_trace = trace_index 235 | matching_dis = mean_dis 236 | 237 | # update trace information 238 | if matching_trace is not None: 239 | trace, latest_frame = self.trace_info[matching_trace] 240 | 241 | # padding zero if the trace is fractured 242 | pad_mode = 'interp' if latest_frame == self.latest_frame else 'zero' 243 | pad = current_frame-latest_frame-1 244 | new_trace = self.cat_pose(trace, p, pad, pad_mode) 245 | self.trace_info[matching_trace] = (new_trace, current_frame) 246 | 247 | else: 248 | new_trace = np.array([p]) 249 | self.trace_info.append((new_trace, current_frame)) 250 | 251 | self.latest_frame = current_frame 252 | 253 | def get_skeleton_sequence(self): 254 | 255 | # remove old traces 256 | valid_trace_index = [] 257 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info): 258 | if self.latest_frame - latest_frame < self.data_frame: 259 | valid_trace_index.append(trace_index) 260 | self.trace_info = [self.trace_info[v] for v in valid_trace_index] 261 | 262 | num_trace = len(self.trace_info) 263 | if num_trace == 0: 264 | return None 265 | 266 | data = np.zeros((3, self.data_frame, self.num_joint, num_trace)) 267 | for trace_index, (trace, latest_frame) in enumerate(self.trace_info): 268 | end = self.data_frame - (self.latest_frame - latest_frame) 269 | d = trace[-end:] 270 | beg = end - len(d) 271 | data[:, beg:end, :, trace_index] = d.transpose((2, 0, 1)) 272 | 273 | return data 274 | 275 | # concatenate pose to a trace 276 | def cat_pose(self, trace, pose, pad, pad_mode): 277 | # trace.shape: (num_frame, num_joint, 3) 278 | num_joint = pose.shape[0] 279 | num_channel = pose.shape[1] 280 | if pad != 0: 281 | if pad_mode == 'zero': 282 | trace = np.concatenate( 283 | (trace, np.zeros((pad, num_joint, 3))), 0) 284 | elif pad_mode == 'interp': 285 | last_pose = trace[-1] 286 | coeff = [(p+1)/(pad+1) for p in range(pad)] 287 | interp_pose = [(1-c)*last_pose + c*pose for c in coeff] 288 | trace = np.concatenate((trace, interp_pose), 0) 289 | new_trace = np.concatenate((trace, [pose]), 0) 290 | return new_trace 291 | 292 | # calculate the distance between a existing trace and the input pose 293 | 294 | def get_dis(self, trace, pose): 295 | last_pose_xy = trace[-1, :, 0:2] 296 | curr_pose_xy = pose[:, 0:2] 297 | 298 | mean_dis = ((((last_pose_xy - curr_pose_xy)**2).sum(1))**0.5).mean() 299 | wh = last_pose_xy.max(0) - last_pose_xy.min(0) 300 | scale = (wh[0] * wh[1]) ** 0.5 + 0.0001 301 | is_close = mean_dis < scale * self.max_frame_dis 302 | return mean_dis, is_close 303 | -------------------------------------------------------------------------------- /processor/io.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # pylint: disable=W0201 3 | import sys 4 | import argparse 5 | import yaml 6 | import numpy as np 7 | 8 | # torch 9 | import torch 10 | import torch.nn as nn 11 | 12 | # torchlight 13 | import torchlight 14 | from torchlight import str2bool 15 | from torchlight import DictAction 16 | from torchlight import import_class 17 | 18 | class IO(): 19 | """ 20 | IO Processor 21 | """ 22 | 23 | def __init__(self, argv=None): 24 | 25 | self.load_arg(argv) 26 | self.init_environment() 27 | self.load_model() 28 | self.load_weights() 29 | self.gpu() 30 | 31 | def load_arg(self, argv=None): 32 | parser = self.get_parser() 33 | 34 | # load arg form config file 35 | p = parser.parse_args(argv) 36 | if p.config is not None: 37 | # load config file 38 | with open(p.config, 'r') as f: 39 | default_arg = yaml.load(f, Loader=yaml.FullLoader) 40 | 41 | # update parser from config file 42 | key = vars(p).keys() 43 | for k in default_arg.keys(): 44 | if k not in key: 45 | print('Unknown Arguments: {}'.format(k)) 46 | assert k in key 47 | 48 | parser.set_defaults(**default_arg) 49 | 50 | self.arg = parser.parse_args(argv) 51 | 52 | def init_environment(self): 53 | self.io = torchlight.IO( 54 | self.arg.work_dir, 55 | save_log=self.arg.save_log, 56 | print_log=self.arg.print_log) 57 | self.io.save_arg(self.arg) 58 | 59 | # gpu 60 | if self.arg.use_gpu: 61 | gpus = torchlight.visible_gpu(self.arg.device) 62 | torchlight.occupy_gpu(gpus) 63 | self.gpus = gpus 64 | self.dev = "cuda:0" 65 | else: 66 | self.dev = "cpu" 67 | 68 | def load_model(self): 69 | self.model = self.io.load_model(self.arg.model, 70 | **(self.arg.model_args)) 71 | 72 | def load_weights(self): 73 | if self.arg.weights: 74 | self.model = self.io.load_weights(self.model, self.arg.weights, 75 | self.arg.ignore_weights) 76 | 77 | def gpu(self): 78 | # move modules to gpu 79 | self.model = self.model.to(self.dev) 80 | for name, value in vars(self).items(): 81 | cls_name = str(value.__class__) 82 | if cls_name.find('torch.nn.modules') != -1: 83 | setattr(self, name, value.to(self.dev)) 84 | 85 | # model parallel 86 | if self.arg.use_gpu and len(self.gpus) > 1: 87 | self.model = nn.DataParallel(self.model, device_ids=self.gpus) 88 | 89 | def start(self): 90 | self.io.print_log('Parameters:\n{}\n'.format(str(vars(self.arg)))) 91 | 92 | @staticmethod 93 | def get_parser(add_help=False): 94 | 95 | #region arguments yapf: disable 96 | # parameter priority: command line > config > default 97 | parser = argparse.ArgumentParser( add_help=add_help, description='IO Processor') 98 | 99 | parser.add_argument('-w', '--work_dir', default='./work_dir/tmp', help='the work folder for storing results') 100 | parser.add_argument('-c', '--config', default=None, help='path to the configuration file') 101 | 102 | # processor 103 | parser.add_argument('--use_gpu', type=str2bool, default=True, help='use GPUs or not') 104 | parser.add_argument('--device', type=int, default=0, nargs='+', help='the indexes of GPUs for training or testing') 105 | 106 | # visulize and debug 107 | parser.add_argument('--print_log', type=str2bool, default=True, help='print logging or not') 108 | parser.add_argument('--save_log', type=str2bool, default=True, help='save logging or not') 109 | 110 | # model 111 | parser.add_argument('--model', default=None, help='the model will be used') 112 | parser.add_argument('--model_args', action=DictAction, default=dict(), help='the arguments of model') 113 | parser.add_argument('--weights', default=None, help='the weights for network initialization') 114 | parser.add_argument('--ignore_weights', type=str, default=[], nargs='+', help='the name of weights which will be ignored in the initialization') 115 | #endregion yapf: enable 116 | 117 | return parser 118 | -------------------------------------------------------------------------------- /processor/processor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # pylint: disable=W0201 3 | import sys 4 | import argparse 5 | import yaml 6 | import numpy as np 7 | 8 | # torch 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | 13 | # torchlight 14 | import torchlight 15 | from torchlight import str2bool 16 | from torchlight import DictAction 17 | from torchlight import import_class 18 | 19 | from .io import IO 20 | 21 | class Processor(IO): 22 | """ 23 | Base Processor 24 | """ 25 | 26 | def __init__(self, argv=None): 27 | 28 | self.load_arg(argv) 29 | self.init_environment() 30 | self.load_model() 31 | self.load_weights() 32 | self.gpu() 33 | self.load_data() 34 | self.load_optimizer() 35 | 36 | def init_environment(self): 37 | 38 | super().init_environment() 39 | self.result = dict() 40 | self.iter_info = dict() 41 | self.epoch_info = dict() 42 | self.meta_info = dict(epoch=0, iter=0) 43 | 44 | def load_optimizer(self): 45 | pass 46 | 47 | def load_data(self): 48 | Feeder = import_class(self.arg.feeder) 49 | if 'debug' not in self.arg.train_feeder_args: 50 | self.arg.train_feeder_args['debug'] = self.arg.debug 51 | self.data_loader = dict() 52 | if self.arg.phase == 'train': 53 | self.data_loader['train'] = torch.utils.data.DataLoader( 54 | dataset=Feeder(**self.arg.train_feeder_args), 55 | batch_size=self.arg.batch_size, 56 | shuffle=True, 57 | num_workers=self.arg.num_worker * torchlight.ngpu( 58 | self.arg.device), 59 | drop_last=True) 60 | if self.arg.test_feeder_args: 61 | self.data_loader['test'] = torch.utils.data.DataLoader( 62 | dataset=Feeder(**self.arg.test_feeder_args), 63 | batch_size=self.arg.test_batch_size, 64 | shuffle=False, 65 | num_workers=self.arg.num_worker * torchlight.ngpu( 66 | self.arg.device)) 67 | 68 | def show_epoch_info(self): 69 | for k, v in self.epoch_info.items(): 70 | self.io.print_log('\t{}: {}'.format(k, v)) 71 | if self.arg.pavi_log: 72 | self.io.log('train', self.meta_info['iter'], self.epoch_info) 73 | 74 | def show_iter_info(self): 75 | if self.meta_info['iter'] % self.arg.log_interval == 0: 76 | info ='\tIter {} Done.'.format(self.meta_info['iter']) 77 | for k, v in self.iter_info.items(): 78 | if isinstance(v, float): 79 | info = info + ' | {}: {:.4f}'.format(k, v) 80 | else: 81 | info = info + ' | {}: {}'.format(k, v) 82 | 83 | self.io.print_log(info) 84 | 85 | if self.arg.pavi_log: 86 | self.io.log('train', self.meta_info['iter'], self.iter_info) 87 | 88 | def train(self): 89 | for _ in range(100): 90 | self.iter_info['loss'] = 0 91 | self.show_iter_info() 92 | self.meta_info['iter'] += 1 93 | self.epoch_info['mean loss'] = 0 94 | self.show_epoch_info() 95 | 96 | def test(self): 97 | for _ in range(100): 98 | self.iter_info['loss'] = 1 99 | self.show_iter_info() 100 | self.epoch_info['mean loss'] = 1 101 | self.show_epoch_info() 102 | 103 | def start(self): 104 | self.io.print_log('Parameters:\n{}\n'.format(str(vars(self.arg)))) 105 | 106 | # training phase 107 | if self.arg.phase == 'train': 108 | for epoch in range(self.arg.start_epoch, self.arg.num_epoch): 109 | self.meta_info['epoch'] = epoch 110 | 111 | # training 112 | self.io.print_log('Training epoch: {}'.format(epoch)) 113 | self.train() 114 | self.io.print_log('Done.') 115 | 116 | # save model 117 | if ((epoch + 1) % self.arg.save_interval == 0) or ( 118 | epoch + 1 == self.arg.num_epoch): 119 | filename = 'epoch{}_model.pt'.format(epoch + 1) 120 | self.io.save_model(self.model, filename) 121 | 122 | # evaluation 123 | if ((epoch + 1) % self.arg.eval_interval == 0) or ( 124 | epoch + 1 == self.arg.num_epoch): 125 | self.io.print_log('Eval epoch: {}'.format(epoch)) 126 | self.test() 127 | self.io.print_log('Done.') 128 | # test phase 129 | elif self.arg.phase == 'test': 130 | 131 | # the path of weights must be appointed 132 | if self.arg.weights is None: 133 | raise ValueError('Please appoint --weights.') 134 | self.io.print_log('Model: {}.'.format(self.arg.model)) 135 | self.io.print_log('Weights: {}.'.format(self.arg.weights)) 136 | 137 | # evaluation 138 | self.io.print_log('Evaluation Start:') 139 | self.test() 140 | self.io.print_log('Done.\n') 141 | 142 | # save the output of model 143 | if self.arg.save_result: 144 | result_dict = dict( 145 | zip(self.data_loader['test'].dataset.sample_name, 146 | self.result)) 147 | self.io.save_pkl(result_dict, 'test_result.pkl') 148 | 149 | @staticmethod 150 | def get_parser(add_help=False): 151 | 152 | #region arguments yapf: disable 153 | # parameter priority: command line > config > default 154 | parser = argparse.ArgumentParser( add_help=add_help, description='Base Processor') 155 | 156 | parser.add_argument('-w', '--work_dir', default='./work_dir/tmp', help='the work folder for storing results') 157 | parser.add_argument('-c', '--config', default=None, help='path to the configuration file') 158 | 159 | # processor 160 | parser.add_argument('--phase', default='train', help='must be train or test') 161 | parser.add_argument('--save_result', type=str2bool, default=False, help='if ture, the output of the model will be stored') 162 | parser.add_argument('--start_epoch', type=int, default=0, help='start training from which epoch') 163 | parser.add_argument('--num_epoch', type=int, default=80, help='stop training in which epoch') 164 | parser.add_argument('--use_gpu', type=str2bool, default=True, help='use GPUs or not') 165 | parser.add_argument('--device', type=int, default=0, nargs='+', help='the indexes of GPUs for training or testing') 166 | 167 | # visulize and debug 168 | parser.add_argument('--log_interval', type=int, default=100, help='the interval for printing messages (#iteration)') 169 | parser.add_argument('--save_interval', type=int, default=10, help='the interval for storing models (#iteration)') 170 | parser.add_argument('--eval_interval', type=int, default=5, help='the interval for evaluating models (#iteration)') 171 | parser.add_argument('--save_log', type=str2bool, default=True, help='save logging or not') 172 | parser.add_argument('--print_log', type=str2bool, default=True, help='print logging or not') 173 | parser.add_argument('--pavi_log', type=str2bool, default=False, help='logging on pavi or not') 174 | 175 | # feeder 176 | parser.add_argument('--feeder', default='feeder.feeder', help='data loader will be used') 177 | parser.add_argument('--num_worker', type=int, default=4, help='the number of worker per gpu for data loader') 178 | parser.add_argument('--train_feeder_args', action=DictAction, default=dict(), help='the arguments of data loader for training') 179 | parser.add_argument('--test_feeder_args', action=DictAction, default=dict(), help='the arguments of data loader for test') 180 | parser.add_argument('--batch_size', type=int, default=256, help='training batch size') 181 | parser.add_argument('--test_batch_size', type=int, default=256, help='test batch size') 182 | parser.add_argument('--debug', action="store_true", help='less data, faster loading') 183 | 184 | # model 185 | parser.add_argument('--model', default=None, help='the model will be used') 186 | parser.add_argument('--model_args', action=DictAction, default=dict(), help='the arguments of model') 187 | parser.add_argument('--weights', default=None, help='the weights for network initialization') 188 | parser.add_argument('--ignore_weights', type=str, default=[], nargs='+', help='the name of weights which will be ignored in the initialization') 189 | #endregion yapf: enable 190 | 191 | return parser 192 | -------------------------------------------------------------------------------- /processor/recognition.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # pylint: disable=W0201 3 | import sys 4 | import argparse 5 | import yaml 6 | import numpy as np 7 | 8 | # torch 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | 13 | # torchlight 14 | import torchlight 15 | from torchlight import str2bool 16 | from torchlight import DictAction 17 | from torchlight import import_class 18 | 19 | from .processor import Processor 20 | 21 | def weights_init(m): 22 | classname = m.__class__.__name__ 23 | if classname.find('Conv1d') != -1: 24 | m.weight.data.normal_(0.0, 0.02) 25 | if m.bias is not None: 26 | m.bias.data.fill_(0) 27 | elif classname.find('Conv2d') != -1: 28 | m.weight.data.normal_(0.0, 0.02) 29 | if m.bias is not None: 30 | m.bias.data.fill_(0) 31 | elif classname.find('BatchNorm') != -1: 32 | m.weight.data.normal_(1.0, 0.02) 33 | m.bias.data.fill_(0) 34 | 35 | class REC_Processor(Processor): 36 | """ 37 | Processor for Skeleton-based Action Recgnition 38 | """ 39 | 40 | def load_model(self): 41 | self.model = self.io.load_model(self.arg.model, 42 | **(self.arg.model_args)) 43 | self.model.apply(weights_init) 44 | self.loss = nn.CrossEntropyLoss() 45 | 46 | def load_optimizer(self): 47 | if self.arg.optimizer == 'SGD': 48 | self.optimizer = optim.SGD( 49 | self.model.parameters(), 50 | lr=self.arg.base_lr, 51 | momentum=0.9, 52 | nesterov=self.arg.nesterov, 53 | weight_decay=self.arg.weight_decay) 54 | elif self.arg.optimizer == 'Adam': 55 | self.optimizer = optim.Adam( 56 | self.model.parameters(), 57 | lr=self.arg.base_lr, 58 | weight_decay=self.arg.weight_decay) 59 | else: 60 | raise ValueError() 61 | 62 | def adjust_lr(self): 63 | if self.arg.optimizer == 'SGD' and self.arg.step: 64 | lr = self.arg.base_lr * ( 65 | 0.1**np.sum(self.meta_info['epoch']>= np.array(self.arg.step))) 66 | for param_group in self.optimizer.param_groups: 67 | param_group['lr'] = lr 68 | self.lr = lr 69 | else: 70 | self.lr = self.arg.base_lr 71 | 72 | def show_topk(self, k): 73 | rank = self.result.argsort() 74 | hit_top_k = [l in rank[i, -k:] for i, l in enumerate(self.label)] 75 | accuracy = sum(hit_top_k) * 1.0 / len(hit_top_k) 76 | self.io.print_log('\tTop{}: {:.2f}%'.format(k, 100 * accuracy)) 77 | 78 | def train(self): 79 | self.model.train() 80 | self.adjust_lr() 81 | loader = self.data_loader['train'] 82 | loss_value = [] 83 | 84 | for data, label in loader: 85 | 86 | # get data 87 | data = data.float().to(self.dev) 88 | label = label.long().to(self.dev) 89 | 90 | # forward 91 | output = self.model(data) 92 | loss = self.loss(output, label) 93 | 94 | # backward 95 | self.optimizer.zero_grad() 96 | loss.backward() 97 | self.optimizer.step() 98 | 99 | # statistics 100 | self.iter_info['loss'] = loss.data.item() 101 | self.iter_info['lr'] = '{:.6f}'.format(self.lr) 102 | loss_value.append(self.iter_info['loss']) 103 | self.show_iter_info() 104 | self.meta_info['iter'] += 1 105 | 106 | self.epoch_info['mean_loss']= np.mean(loss_value) 107 | self.show_epoch_info() 108 | self.io.print_timer() 109 | 110 | def test(self, evaluation=True): 111 | 112 | self.model.eval() 113 | loader = self.data_loader['test'] 114 | loss_value = [] 115 | result_frag = [] 116 | label_frag = [] 117 | 118 | for data, label in loader: 119 | 120 | # get data 121 | data = data.float().to(self.dev) 122 | label = label.long().to(self.dev) 123 | 124 | # inference 125 | with torch.no_grad(): 126 | output = self.model(data) 127 | result_frag.append(output.data.cpu().numpy()) 128 | 129 | # get loss 130 | if evaluation: 131 | loss = self.loss(output, label) 132 | loss_value.append(loss.item()) 133 | label_frag.append(label.data.cpu().numpy()) 134 | 135 | self.result = np.concatenate(result_frag) 136 | if evaluation: 137 | self.label = np.concatenate(label_frag) 138 | self.epoch_info['mean_loss']= np.mean(loss_value) 139 | self.show_epoch_info() 140 | 141 | # show top-k accuracy 142 | for k in self.arg.show_topk: 143 | self.show_topk(k) 144 | 145 | @staticmethod 146 | def get_parser(add_help=False): 147 | 148 | # parameter priority: command line > config > default 149 | parent_parser = Processor.get_parser(add_help=False) 150 | parser = argparse.ArgumentParser( 151 | add_help=add_help, 152 | parents=[parent_parser], 153 | description='Spatial Temporal Graph Convolution Network') 154 | 155 | # region arguments yapf: disable 156 | # evaluation 157 | parser.add_argument('--show_topk', type=int, default=[1, 5], nargs='+', help='which Top K accuracy will be shown') 158 | # optim 159 | parser.add_argument('--base_lr', type=float, default=0.01, help='initial learning rate') 160 | parser.add_argument('--step', type=int, default=[], nargs='+', help='the epoch where optimizer reduce the learning rate') 161 | parser.add_argument('--optimizer', default='SGD', help='type of optimizer') 162 | parser.add_argument('--nesterov', type=str2bool, default=True, help='use nesterov or not') 163 | parser.add_argument('--weight_decay', type=float, default=0.0001, help='weight decay for optimizer') 164 | # endregion yapf: enable 165 | 166 | return parser 167 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyyaml 2 | argparse 3 | numpy 4 | h5py 5 | opencv-python 6 | imageio 7 | scikit-video 8 | torch 9 | torchvision -------------------------------------------------------------------------------- /resource/NTU-RGB-D/samples_with_missing_skeletons.txt: -------------------------------------------------------------------------------- 1 | S001C002P005R002A008 2 | S001C002P006R001A008 3 | S001C003P002R001A055 4 | S001C003P002R002A012 5 | S001C003P005R002A004 6 | S001C003P005R002A005 7 | S001C003P005R002A006 8 | S001C003P006R002A008 9 | S002C002P011R002A030 10 | S002C003P008R001A020 11 | S002C003P010R002A010 12 | S002C003P011R002A007 13 | S002C003P011R002A011 14 | S002C003P014R002A007 15 | S003C001P019R001A055 16 | S003C002P002R002A055 17 | S003C002P018R002A055 18 | S003C003P002R001A055 19 | S003C003P016R001A055 20 | S003C003P018R002A024 21 | S004C002P003R001A013 22 | S004C002P008R001A009 23 | S004C002P020R001A003 24 | S004C002P020R001A004 25 | S004C002P020R001A012 26 | S004C002P020R001A020 27 | S004C002P020R001A021 28 | S004C002P020R001A036 29 | S005C002P004R001A001 30 | S005C002P004R001A003 31 | S005C002P010R001A016 32 | S005C002P010R001A017 33 | S005C002P010R001A048 34 | S005C002P010R001A049 35 | S005C002P016R001A009 36 | S005C002P016R001A010 37 | S005C002P018R001A003 38 | S005C002P018R001A028 39 | S005C002P018R001A029 40 | S005C003P016R002A009 41 | S005C003P018R002A013 42 | S005C003P021R002A057 43 | S006C001P001R002A055 44 | S006C002P007R001A005 45 | S006C002P007R001A006 46 | S006C002P016R001A043 47 | S006C002P016R001A051 48 | S006C002P016R001A052 49 | S006C002P022R001A012 50 | S006C002P023R001A020 51 | S006C002P023R001A021 52 | S006C002P023R001A022 53 | S006C002P023R001A023 54 | S006C002P024R001A018 55 | S006C002P024R001A019 56 | S006C003P001R002A013 57 | S006C003P007R002A009 58 | S006C003P007R002A010 59 | S006C003P007R002A025 60 | S006C003P016R001A060 61 | S006C003P017R001A055 62 | S006C003P017R002A013 63 | S006C003P017R002A014 64 | S006C003P017R002A015 65 | S006C003P022R002A013 66 | S007C001P018R002A050 67 | S007C001P025R002A051 68 | S007C001P028R001A050 69 | S007C001P028R001A051 70 | S007C001P028R001A052 71 | S007C002P008R002A008 72 | S007C002P015R002A055 73 | S007C002P026R001A008 74 | S007C002P026R001A009 75 | S007C002P026R001A010 76 | S007C002P026R001A011 77 | S007C002P026R001A012 78 | S007C002P026R001A050 79 | S007C002P027R001A011 80 | S007C002P027R001A013 81 | S007C002P028R002A055 82 | S007C003P007R001A002 83 | S007C003P007R001A004 84 | S007C003P019R001A060 85 | S007C003P027R002A001 86 | S007C003P027R002A002 87 | S007C003P027R002A003 88 | S007C003P027R002A004 89 | S007C003P027R002A005 90 | S007C003P027R002A006 91 | S007C003P027R002A007 92 | S007C003P027R002A008 93 | S007C003P027R002A009 94 | S007C003P027R002A010 95 | S007C003P027R002A011 96 | S007C003P027R002A012 97 | S007C003P027R002A013 98 | S008C002P001R001A009 99 | S008C002P001R001A010 100 | S008C002P001R001A014 101 | S008C002P001R001A015 102 | S008C002P001R001A016 103 | S008C002P001R001A018 104 | S008C002P001R001A019 105 | S008C002P008R002A059 106 | S008C002P025R001A060 107 | S008C002P029R001A004 108 | S008C002P031R001A005 109 | S008C002P031R001A006 110 | S008C002P032R001A018 111 | S008C002P034R001A018 112 | S008C002P034R001A019 113 | S008C002P035R001A059 114 | S008C002P035R002A002 115 | S008C002P035R002A005 116 | S008C003P007R001A009 117 | S008C003P007R001A016 118 | S008C003P007R001A017 119 | S008C003P007R001A018 120 | S008C003P007R001A019 121 | S008C003P007R001A020 122 | S008C003P007R001A021 123 | S008C003P007R001A022 124 | S008C003P007R001A023 125 | S008C003P007R001A025 126 | S008C003P007R001A026 127 | S008C003P007R001A028 128 | S008C003P007R001A029 129 | S008C003P007R002A003 130 | S008C003P008R002A050 131 | S008C003P025R002A002 132 | S008C003P025R002A011 133 | S008C003P025R002A012 134 | S008C003P025R002A016 135 | S008C003P025R002A020 136 | S008C003P025R002A022 137 | S008C003P025R002A023 138 | S008C003P025R002A030 139 | S008C003P025R002A031 140 | S008C003P025R002A032 141 | S008C003P025R002A033 142 | S008C003P025R002A049 143 | S008C003P025R002A060 144 | S008C003P031R001A001 145 | S008C003P031R002A004 146 | S008C003P031R002A014 147 | S008C003P031R002A015 148 | S008C003P031R002A016 149 | S008C003P031R002A017 150 | S008C003P032R002A013 151 | S008C003P033R002A001 152 | S008C003P033R002A011 153 | S008C003P033R002A012 154 | S008C003P034R002A001 155 | S008C003P034R002A012 156 | S008C003P034R002A022 157 | S008C003P034R002A023 158 | S008C003P034R002A024 159 | S008C003P034R002A044 160 | S008C003P034R002A045 161 | S008C003P035R002A016 162 | S008C003P035R002A017 163 | S008C003P035R002A018 164 | S008C003P035R002A019 165 | S008C003P035R002A020 166 | S008C003P035R002A021 167 | S009C002P007R001A001 168 | S009C002P007R001A003 169 | S009C002P007R001A014 170 | S009C002P008R001A014 171 | S009C002P015R002A050 172 | S009C002P016R001A002 173 | S009C002P017R001A028 174 | S009C002P017R001A029 175 | S009C003P017R002A030 176 | S009C003P025R002A054 177 | S010C001P007R002A020 178 | S010C002P016R002A055 179 | S010C002P017R001A005 180 | S010C002P017R001A018 181 | S010C002P017R001A019 182 | S010C002P019R001A001 183 | S010C002P025R001A012 184 | S010C003P007R002A043 185 | S010C003P008R002A003 186 | S010C003P016R001A055 187 | S010C003P017R002A055 188 | S011C001P002R001A008 189 | S011C001P018R002A050 190 | S011C002P008R002A059 191 | S011C002P016R002A055 192 | S011C002P017R001A020 193 | S011C002P017R001A021 194 | S011C002P018R002A055 195 | S011C002P027R001A009 196 | S011C002P027R001A010 197 | S011C002P027R001A037 198 | S011C003P001R001A055 199 | S011C003P002R001A055 200 | S011C003P008R002A012 201 | S011C003P015R001A055 202 | S011C003P016R001A055 203 | S011C003P019R001A055 204 | S011C003P025R001A055 205 | S011C003P028R002A055 206 | S012C001P019R001A060 207 | S012C001P019R002A060 208 | S012C002P015R001A055 209 | S012C002P017R002A012 210 | S012C002P025R001A060 211 | S012C003P008R001A057 212 | S012C003P015R001A055 213 | S012C003P015R002A055 214 | S012C003P016R001A055 215 | S012C003P017R002A055 216 | S012C003P018R001A055 217 | S012C003P018R001A057 218 | S012C003P019R002A011 219 | S012C003P019R002A012 220 | S012C003P025R001A055 221 | S012C003P027R001A055 222 | S012C003P027R002A009 223 | S012C003P028R001A035 224 | S012C003P028R002A055 225 | S013C001P015R001A054 226 | S013C001P017R002A054 227 | S013C001P018R001A016 228 | S013C001P028R001A040 229 | S013C002P015R001A054 230 | S013C002P017R002A054 231 | S013C002P028R001A040 232 | S013C003P008R002A059 233 | S013C003P015R001A054 234 | S013C003P017R002A054 235 | S013C003P025R002A022 236 | S013C003P027R001A055 237 | S013C003P028R001A040 238 | S014C001P027R002A040 239 | S014C002P015R001A003 240 | S014C002P019R001A029 241 | S014C002P025R002A059 242 | S014C002P027R002A040 243 | S014C002P039R001A050 244 | S014C003P007R002A059 245 | S014C003P015R002A055 246 | S014C003P019R002A055 247 | S014C003P025R001A048 248 | S014C003P027R002A040 249 | S015C001P008R002A040 250 | S015C001P016R001A055 251 | S015C001P017R001A055 252 | S015C001P017R002A055 253 | S015C002P007R001A059 254 | S015C002P008R001A003 255 | S015C002P008R001A004 256 | S015C002P008R002A040 257 | S015C002P015R001A002 258 | S015C002P016R001A001 259 | S015C002P016R002A055 260 | S015C003P008R002A007 261 | S015C003P008R002A011 262 | S015C003P008R002A012 263 | S015C003P008R002A028 264 | S015C003P008R002A040 265 | S015C003P025R002A012 266 | S015C003P025R002A017 267 | S015C003P025R002A020 268 | S015C003P025R002A021 269 | S015C003P025R002A030 270 | S015C003P025R002A033 271 | S015C003P025R002A034 272 | S015C003P025R002A036 273 | S015C003P025R002A037 274 | S015C003P025R002A044 275 | S016C001P019R002A040 276 | S016C001P025R001A011 277 | S016C001P025R001A012 278 | S016C001P025R001A060 279 | S016C001P040R001A055 280 | S016C001P040R002A055 281 | S016C002P008R001A011 282 | S016C002P019R002A040 283 | S016C002P025R002A012 284 | S016C003P008R001A011 285 | S016C003P008R002A002 286 | S016C003P008R002A003 287 | S016C003P008R002A004 288 | S016C003P008R002A006 289 | S016C003P008R002A009 290 | S016C003P019R002A040 291 | S016C003P039R002A016 292 | S017C001P016R002A031 293 | S017C002P007R001A013 294 | S017C002P008R001A009 295 | S017C002P015R001A042 296 | S017C002P016R002A031 297 | S017C002P016R002A055 298 | S017C003P007R002A013 299 | S017C003P008R001A059 300 | S017C003P016R002A031 301 | S017C003P017R001A055 302 | S017C003P020R001A059 303 | -------------------------------------------------------------------------------- /resource/demo_asset/attention+prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/attention+prediction.png -------------------------------------------------------------------------------- /resource/demo_asset/attention+rgb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/attention+rgb.png -------------------------------------------------------------------------------- /resource/demo_asset/original_video.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/original_video.png -------------------------------------------------------------------------------- /resource/demo_asset/pose_estimation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/demo_asset/pose_estimation.png -------------------------------------------------------------------------------- /resource/info/S001C001P001R001A044_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S001C001P001R001A044_w.gif -------------------------------------------------------------------------------- /resource/info/S001C001P001R001A051_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S001C001P001R001A051_w.gif -------------------------------------------------------------------------------- /resource/info/S002C001P010R001A017_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S002C001P010R001A017_w.gif -------------------------------------------------------------------------------- /resource/info/S003C001P008R001A002_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S003C001P008R001A002_w.gif -------------------------------------------------------------------------------- /resource/info/S003C001P008R001A008_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/S003C001P008R001A008_w.gif -------------------------------------------------------------------------------- /resource/info/clean_and_jerk_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/clean_and_jerk_w.gif -------------------------------------------------------------------------------- /resource/info/demo_video.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/demo_video.gif -------------------------------------------------------------------------------- /resource/info/hammer_throw_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/hammer_throw_w.gif -------------------------------------------------------------------------------- /resource/info/juggling_balls_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/juggling_balls_w.gif -------------------------------------------------------------------------------- /resource/info/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/pipeline.png -------------------------------------------------------------------------------- /resource/info/pull_ups_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/pull_ups_w.gif -------------------------------------------------------------------------------- /resource/info/tai_chi_w.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/info/tai_chi_w.gif -------------------------------------------------------------------------------- /resource/kinetics-motion.txt: -------------------------------------------------------------------------------- 1 | belly dancing 2 | punching bag 3 | capoeira 4 | squat 5 | windsurfing 6 | skipping rope 7 | swimming backstroke 8 | hammer throw 9 | throwing discus 10 | tobogganing 11 | hopscotch 12 | hitting baseball 13 | roller skating 14 | arm wrestling 15 | snatch weight lifting 16 | tai chi 17 | riding mechanical bull 18 | salsa dancing 19 | hurling (sport) 20 | lunge 21 | skateboarding 22 | country line dancing 23 | juggling balls 24 | surfing crowd 25 | deadlifting 26 | clean and jerk 27 | crawling baby 28 | push up 29 | front raises 30 | pull ups -------------------------------------------------------------------------------- /resource/kinetics_skeleton/label_name.txt: -------------------------------------------------------------------------------- 1 | abseiling 2 | air drumming 3 | answering questions 4 | applauding 5 | applying cream 6 | archery 7 | arm wrestling 8 | arranging flowers 9 | assembling computer 10 | auctioning 11 | baby waking up 12 | baking cookies 13 | balloon blowing 14 | bandaging 15 | barbequing 16 | bartending 17 | beatboxing 18 | bee keeping 19 | belly dancing 20 | bench pressing 21 | bending back 22 | bending metal 23 | biking through snow 24 | blasting sand 25 | blowing glass 26 | blowing leaves 27 | blowing nose 28 | blowing out candles 29 | bobsledding 30 | bookbinding 31 | bouncing on trampoline 32 | bowling 33 | braiding hair 34 | breading or breadcrumbing 35 | breakdancing 36 | brush painting 37 | brushing hair 38 | brushing teeth 39 | building cabinet 40 | building shed 41 | bungee jumping 42 | busking 43 | canoeing or kayaking 44 | capoeira 45 | carrying baby 46 | cartwheeling 47 | carving pumpkin 48 | catching fish 49 | catching or throwing baseball 50 | catching or throwing frisbee 51 | catching or throwing softball 52 | celebrating 53 | changing oil 54 | changing wheel 55 | checking tires 56 | cheerleading 57 | chopping wood 58 | clapping 59 | clay pottery making 60 | clean and jerk 61 | cleaning floor 62 | cleaning gutters 63 | cleaning pool 64 | cleaning shoes 65 | cleaning toilet 66 | cleaning windows 67 | climbing a rope 68 | climbing ladder 69 | climbing tree 70 | contact juggling 71 | cooking chicken 72 | cooking egg 73 | cooking on campfire 74 | cooking sausages 75 | counting money 76 | country line dancing 77 | cracking neck 78 | crawling baby 79 | crossing river 80 | crying 81 | curling hair 82 | cutting nails 83 | cutting pineapple 84 | cutting watermelon 85 | dancing ballet 86 | dancing charleston 87 | dancing gangnam style 88 | dancing macarena 89 | deadlifting 90 | decorating the christmas tree 91 | digging 92 | dining 93 | disc golfing 94 | diving cliff 95 | dodgeball 96 | doing aerobics 97 | doing laundry 98 | doing nails 99 | drawing 100 | dribbling basketball 101 | drinking 102 | drinking beer 103 | drinking shots 104 | driving car 105 | driving tractor 106 | drop kicking 107 | drumming fingers 108 | dunking basketball 109 | dying hair 110 | eating burger 111 | eating cake 112 | eating carrots 113 | eating chips 114 | eating doughnuts 115 | eating hotdog 116 | eating ice cream 117 | eating spaghetti 118 | eating watermelon 119 | egg hunting 120 | exercising arm 121 | exercising with an exercise ball 122 | extinguishing fire 123 | faceplanting 124 | feeding birds 125 | feeding fish 126 | feeding goats 127 | filling eyebrows 128 | finger snapping 129 | fixing hair 130 | flipping pancake 131 | flying kite 132 | folding clothes 133 | folding napkins 134 | folding paper 135 | front raises 136 | frying vegetables 137 | garbage collecting 138 | gargling 139 | getting a haircut 140 | getting a tattoo 141 | giving or receiving award 142 | golf chipping 143 | golf driving 144 | golf putting 145 | grinding meat 146 | grooming dog 147 | grooming horse 148 | gymnastics tumbling 149 | hammer throw 150 | headbanging 151 | headbutting 152 | high jump 153 | high kick 154 | hitting baseball 155 | hockey stop 156 | holding snake 157 | hopscotch 158 | hoverboarding 159 | hugging 160 | hula hooping 161 | hurdling 162 | hurling (sport) 163 | ice climbing 164 | ice fishing 165 | ice skating 166 | ironing 167 | javelin throw 168 | jetskiing 169 | jogging 170 | juggling balls 171 | juggling fire 172 | juggling soccer ball 173 | jumping into pool 174 | jumpstyle dancing 175 | kicking field goal 176 | kicking soccer ball 177 | kissing 178 | kitesurfing 179 | knitting 180 | krumping 181 | laughing 182 | laying bricks 183 | long jump 184 | lunge 185 | making a cake 186 | making a sandwich 187 | making bed 188 | making jewelry 189 | making pizza 190 | making snowman 191 | making sushi 192 | making tea 193 | marching 194 | massaging back 195 | massaging feet 196 | massaging legs 197 | massaging person's head 198 | milking cow 199 | mopping floor 200 | motorcycling 201 | moving furniture 202 | mowing lawn 203 | news anchoring 204 | opening bottle 205 | opening present 206 | paragliding 207 | parasailing 208 | parkour 209 | passing American football (in game) 210 | passing American football (not in game) 211 | peeling apples 212 | peeling potatoes 213 | petting animal (not cat) 214 | petting cat 215 | picking fruit 216 | planting trees 217 | plastering 218 | playing accordion 219 | playing badminton 220 | playing bagpipes 221 | playing basketball 222 | playing bass guitar 223 | playing cards 224 | playing cello 225 | playing chess 226 | playing clarinet 227 | playing controller 228 | playing cricket 229 | playing cymbals 230 | playing didgeridoo 231 | playing drums 232 | playing flute 233 | playing guitar 234 | playing harmonica 235 | playing harp 236 | playing ice hockey 237 | playing keyboard 238 | playing kickball 239 | playing monopoly 240 | playing organ 241 | playing paintball 242 | playing piano 243 | playing poker 244 | playing recorder 245 | playing saxophone 246 | playing squash or racquetball 247 | playing tennis 248 | playing trombone 249 | playing trumpet 250 | playing ukulele 251 | playing violin 252 | playing volleyball 253 | playing xylophone 254 | pole vault 255 | presenting weather forecast 256 | pull ups 257 | pumping fist 258 | pumping gas 259 | punching bag 260 | punching person (boxing) 261 | push up 262 | pushing car 263 | pushing cart 264 | pushing wheelchair 265 | reading book 266 | reading newspaper 267 | recording music 268 | riding a bike 269 | riding camel 270 | riding elephant 271 | riding mechanical bull 272 | riding mountain bike 273 | riding mule 274 | riding or walking with horse 275 | riding scooter 276 | riding unicycle 277 | ripping paper 278 | robot dancing 279 | rock climbing 280 | rock scissors paper 281 | roller skating 282 | running on treadmill 283 | sailing 284 | salsa dancing 285 | sanding floor 286 | scrambling eggs 287 | scuba diving 288 | setting table 289 | shaking hands 290 | shaking head 291 | sharpening knives 292 | sharpening pencil 293 | shaving head 294 | shaving legs 295 | shearing sheep 296 | shining shoes 297 | shooting basketball 298 | shooting goal (soccer) 299 | shot put 300 | shoveling snow 301 | shredding paper 302 | shuffling cards 303 | side kick 304 | sign language interpreting 305 | singing 306 | situp 307 | skateboarding 308 | ski jumping 309 | skiing (not slalom or crosscountry) 310 | skiing crosscountry 311 | skiing slalom 312 | skipping rope 313 | skydiving 314 | slacklining 315 | slapping 316 | sled dog racing 317 | smoking 318 | smoking hookah 319 | snatch weight lifting 320 | sneezing 321 | sniffing 322 | snorkeling 323 | snowboarding 324 | snowkiting 325 | snowmobiling 326 | somersaulting 327 | spinning poi 328 | spray painting 329 | spraying 330 | springboard diving 331 | squat 332 | sticking tongue out 333 | stomping grapes 334 | stretching arm 335 | stretching leg 336 | strumming guitar 337 | surfing crowd 338 | surfing water 339 | sweeping floor 340 | swimming backstroke 341 | swimming breast stroke 342 | swimming butterfly stroke 343 | swing dancing 344 | swinging legs 345 | swinging on something 346 | sword fighting 347 | tai chi 348 | taking a shower 349 | tango dancing 350 | tap dancing 351 | tapping guitar 352 | tapping pen 353 | tasting beer 354 | tasting food 355 | testifying 356 | texting 357 | throwing axe 358 | throwing ball 359 | throwing discus 360 | tickling 361 | tobogganing 362 | tossing coin 363 | tossing salad 364 | training dog 365 | trapezing 366 | trimming or shaving beard 367 | trimming trees 368 | triple jump 369 | tying bow tie 370 | tying knot (not on a tie) 371 | tying tie 372 | unboxing 373 | unloading truck 374 | using computer 375 | using remote controller (not gaming) 376 | using segway 377 | vault 378 | waiting in line 379 | walking the dog 380 | washing dishes 381 | washing feet 382 | washing hair 383 | washing hands 384 | water skiing 385 | water sliding 386 | watering plants 387 | waxing back 388 | waxing chest 389 | waxing eyebrows 390 | waxing legs 391 | weaving basket 392 | welding 393 | whistling 394 | windsurfing 395 | wrapping present 396 | wrestling 397 | writing 398 | yawning 399 | yoga 400 | zumba 401 | -------------------------------------------------------------------------------- /resource/media/clean_and_jerk.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/clean_and_jerk.mp4 -------------------------------------------------------------------------------- /resource/media/skateboarding.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/skateboarding.mp4 -------------------------------------------------------------------------------- /resource/media/ta_chi.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yysijie/st-gcn/221c0e152054b8da593774c0d483e59befdb9061/resource/media/ta_chi.mp4 -------------------------------------------------------------------------------- /resource/reference_model.txt: -------------------------------------------------------------------------------- 1 | st_gcn.kinetics.pt 2 | st_gcn.ntu-xview.pt 3 | st_gcn.ntu-xsub.pt -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | from . import utils -------------------------------------------------------------------------------- /tools/get_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | out_path="models/" 4 | link="https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmskeleton/models/st-gcn/" 5 | reference_model="resource/reference_model.txt" 6 | 7 | mkdir -p $out_path 8 | while IFS='' read -r line || [[ -n "$line" ]]; do 9 | wget -c $link$line -O $out_path$line 10 | done < "$reference_model" 11 | 12 | 13 | # Downloading models for pose estimation 14 | OPENPOSE_URL="http://posefs1.perception.cs.cmu.edu/OpenPose/models/" 15 | POSE_FOLDER="pose/" 16 | 17 | # Body (COCO) 18 | COCO_FOLDER=${POSE_FOLDER}"coco/" 19 | OUT_FOLDER="models/${COCO_FOLDER}" 20 | COCO_MODEL=${COCO_FOLDER}"pose_iter_440000.caffemodel" 21 | wget -c ${OPENPOSE_URL}${COCO_MODEL} -P ${OUT_FOLDER} -------------------------------------------------------------------------------- /tools/kinetics_gendata.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import sys 4 | import pickle 5 | import argparse 6 | 7 | import numpy as np 8 | from numpy.lib.format import open_memmap 9 | 10 | sys.path.append( 11 | os.path.abspath(os.path.join(os.path.dirname(__file__), os.path.pardir))) 12 | from feeder.feeder_kinetics import Feeder_kinetics 13 | 14 | toolbar_width = 30 15 | 16 | def print_toolbar(rate, annotation=''): 17 | # setup toolbar 18 | sys.stdout.write("{}[".format(annotation)) 19 | for i in range(toolbar_width): 20 | if i * 1.0 / toolbar_width > rate: 21 | sys.stdout.write(' ') 22 | else: 23 | sys.stdout.write('-') 24 | sys.stdout.flush() 25 | sys.stdout.write(']\r') 26 | 27 | 28 | def end_toolbar(): 29 | sys.stdout.write("\n") 30 | 31 | 32 | def gendata( 33 | data_path, 34 | label_path, 35 | data_out_path, 36 | label_out_path, 37 | num_person_in=5, #observe the first 5 persons 38 | num_person_out=2, #then choose 2 persons with the highest score 39 | max_frame=300): 40 | 41 | feeder = Feeder_kinetics( 42 | data_path=data_path, 43 | label_path=label_path, 44 | num_person_in=num_person_in, 45 | num_person_out=num_person_out, 46 | window_size=max_frame) 47 | 48 | sample_name = feeder.sample_name 49 | sample_label = [] 50 | 51 | fp = open_memmap( 52 | data_out_path, 53 | dtype='float32', 54 | mode='w+', 55 | shape=(len(sample_name), 3, max_frame, 18, num_person_out)) 56 | 57 | for i, s in enumerate(sample_name): 58 | data, label = feeder[i] 59 | print_toolbar(i * 1.0 / len(sample_name), 60 | '({:>5}/{:<5}) Processing data: '.format( 61 | i + 1, len(sample_name))) 62 | fp[i, :, 0:data.shape[1], :, :] = data 63 | sample_label.append(label) 64 | 65 | with open(label_out_path, 'wb') as f: 66 | pickle.dump((sample_name, list(sample_label)), f) 67 | 68 | 69 | if __name__ == '__main__': 70 | parser = argparse.ArgumentParser( 71 | description='Kinetics-skeleton Data Converter.') 72 | parser.add_argument( 73 | '--data_path', default='data/Kinetics/kinetics-skeleton') 74 | parser.add_argument( 75 | '--out_folder', default='data/Kinetics/kinetics-skeleton') 76 | arg = parser.parse_args() 77 | 78 | part = ['train', 'val'] 79 | for p in part: 80 | data_path = '{}/kinetics_{}'.format(arg.data_path, p) 81 | label_path = '{}/kinetics_{}_label.json'.format(arg.data_path, p) 82 | data_out_path = '{}/{}_data.npy'.format(arg.out_folder, p) 83 | label_out_path = '{}/{}_label.pkl'.format(arg.out_folder, p) 84 | 85 | if not os.path.exists(arg.out_folder): 86 | os.makedirs(arg.out_folder) 87 | gendata(data_path, label_path, data_out_path, label_out_path) -------------------------------------------------------------------------------- /tools/ntu_gendata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import pickle 4 | 5 | import argparse 6 | import numpy as np 7 | from numpy.lib.format import open_memmap 8 | 9 | from utils.ntu_read_skeleton import read_xyz 10 | 11 | training_subjects = [ 12 | 1, 2, 4, 5, 8, 9, 13, 14, 15, 16, 17, 18, 19, 25, 27, 28, 31, 34, 35, 38 13 | ] 14 | training_cameras = [2, 3] 15 | max_body = 2 16 | num_joint = 25 17 | max_frame = 300 18 | toolbar_width = 30 19 | 20 | def print_toolbar(rate, annotation=''): 21 | # setup toolbar 22 | sys.stdout.write("{}[".format(annotation)) 23 | for i in range(toolbar_width): 24 | if i * 1.0 / toolbar_width > rate: 25 | sys.stdout.write(' ') 26 | else: 27 | sys.stdout.write('-') 28 | sys.stdout.flush() 29 | sys.stdout.write(']\r') 30 | 31 | 32 | def end_toolbar(): 33 | sys.stdout.write("\n") 34 | 35 | 36 | def gendata(data_path, 37 | out_path, 38 | ignored_sample_path=None, 39 | benchmark='xview', 40 | part='eval'): 41 | if ignored_sample_path != None: 42 | with open(ignored_sample_path, 'r') as f: 43 | ignored_samples = [ 44 | line.strip() + '.skeleton' for line in f.readlines() 45 | ] 46 | else: 47 | ignored_samples = [] 48 | sample_name = [] 49 | sample_label = [] 50 | for filename in os.listdir(data_path): 51 | if filename in ignored_samples: 52 | continue 53 | action_class = int( 54 | filename[filename.find('A') + 1:filename.find('A') + 4]) 55 | subject_id = int( 56 | filename[filename.find('P') + 1:filename.find('P') + 4]) 57 | camera_id = int( 58 | filename[filename.find('C') + 1:filename.find('C') + 4]) 59 | 60 | if benchmark == 'xview': 61 | istraining = (camera_id in training_cameras) 62 | elif benchmark == 'xsub': 63 | istraining = (subject_id in training_subjects) 64 | else: 65 | raise ValueError() 66 | 67 | if part == 'train': 68 | issample = istraining 69 | elif part == 'val': 70 | issample = not (istraining) 71 | else: 72 | raise ValueError() 73 | 74 | if issample: 75 | sample_name.append(filename) 76 | sample_label.append(action_class - 1) 77 | 78 | with open('{}/{}_label.pkl'.format(out_path, part), 'wb') as f: 79 | pickle.dump((sample_name, list(sample_label)), f) 80 | # np.save('{}/{}_label.npy'.format(out_path, part), sample_label) 81 | 82 | fp = open_memmap( 83 | '{}/{}_data.npy'.format(out_path, part), 84 | dtype='float32', 85 | mode='w+', 86 | shape=(len(sample_label), 3, max_frame, num_joint, max_body)) 87 | 88 | for i, s in enumerate(sample_name): 89 | print_toolbar(i * 1.0 / len(sample_label), 90 | '({:>5}/{:<5}) Processing {:>5}-{:<5} data: '.format( 91 | i + 1, len(sample_name), benchmark, part)) 92 | data = read_xyz( 93 | os.path.join(data_path, s), max_body=max_body, num_joint=num_joint) 94 | fp[i, :, 0:data.shape[1], :, :] = data 95 | end_toolbar() 96 | 97 | 98 | if __name__ == '__main__': 99 | 100 | parser = argparse.ArgumentParser(description='NTU-RGB-D Data Converter.') 101 | parser.add_argument( 102 | '--data_path', default='data/NTU-RGB-D/nturgb+d_skeletons') 103 | parser.add_argument( 104 | '--ignored_sample_path', 105 | default='resource/NTU-RGB-D/samples_with_missing_skeletons.txt') 106 | parser.add_argument('--out_folder', default='data/NTU-RGB-D') 107 | 108 | benchmark = ['xsub', 'xview'] 109 | part = ['train', 'val'] 110 | arg = parser.parse_args() 111 | 112 | for b in benchmark: 113 | for p in part: 114 | out_path = os.path.join(arg.out_folder, b) 115 | if not os.path.exists(out_path): 116 | os.makedirs(out_path) 117 | gendata( 118 | arg.data_path, 119 | out_path, 120 | arg.ignored_sample_path, 121 | benchmark=b, 122 | part=p) 123 | -------------------------------------------------------------------------------- /tools/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from . import video 2 | from . import openpose 3 | from . import visualization -------------------------------------------------------------------------------- /tools/utils/ntu_read_skeleton.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | 4 | 5 | def read_skeleton(file): 6 | with open(file, 'r') as f: 7 | skeleton_sequence = {} 8 | skeleton_sequence['numFrame'] = int(f.readline()) 9 | skeleton_sequence['frameInfo'] = [] 10 | for t in range(skeleton_sequence['numFrame']): 11 | frame_info = {} 12 | frame_info['numBody'] = int(f.readline()) 13 | frame_info['bodyInfo'] = [] 14 | for m in range(frame_info['numBody']): 15 | body_info = {} 16 | body_info_key = [ 17 | 'bodyID', 'clipedEdges', 'handLeftConfidence', 18 | 'handLeftState', 'handRightConfidence', 'handRightState', 19 | 'isResticted', 'leanX', 'leanY', 'trackingState' 20 | ] 21 | body_info = { 22 | k: float(v) 23 | for k, v in zip(body_info_key, f.readline().split()) 24 | } 25 | body_info['numJoint'] = int(f.readline()) 26 | body_info['jointInfo'] = [] 27 | for v in range(body_info['numJoint']): 28 | joint_info_key = [ 29 | 'x', 'y', 'z', 'depthX', 'depthY', 'colorX', 'colorY', 30 | 'orientationW', 'orientationX', 'orientationY', 31 | 'orientationZ', 'trackingState' 32 | ] 33 | joint_info = { 34 | k: float(v) 35 | for k, v in zip(joint_info_key, f.readline().split()) 36 | } 37 | body_info['jointInfo'].append(joint_info) 38 | frame_info['bodyInfo'].append(body_info) 39 | skeleton_sequence['frameInfo'].append(frame_info) 40 | return skeleton_sequence 41 | 42 | 43 | def read_xyz(file, max_body=2, num_joint=25): 44 | seq_info = read_skeleton(file) 45 | data = np.zeros((3, seq_info['numFrame'], num_joint, max_body)) 46 | for n, f in enumerate(seq_info['frameInfo']): 47 | for m, b in enumerate(f['bodyInfo']): 48 | for j, v in enumerate(b['jointInfo']): 49 | if m < max_body and j < num_joint: 50 | data[:, n, j, m] = [v['x'], v['y'], v['z']] 51 | else: 52 | pass 53 | return data -------------------------------------------------------------------------------- /tools/utils/openpose.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import json 3 | 4 | def json_pack(snippets_dir, video_name, frame_width, frame_height, label='unknown', label_index=-1): 5 | sequence_info = [] 6 | p = Path(snippets_dir) 7 | for path in p.glob(video_name+'*.json'): 8 | json_path = str(path) 9 | print(path) 10 | frame_id = int(path.stem.split('_')[-2]) 11 | frame_data = {'frame_index': frame_id} 12 | data = json.load(open(json_path)) 13 | skeletons = [] 14 | for person in data['people']: 15 | score, coordinates = [], [] 16 | skeleton = {} 17 | keypoints = person['pose_keypoints_2d'] 18 | for i in range(0, len(keypoints), 3): 19 | coordinates += [keypoints[i]/frame_width, keypoints[i + 1]/frame_height] 20 | score += [keypoints[i + 2]] 21 | skeleton['pose'] = coordinates 22 | skeleton['score'] = score 23 | skeletons += [skeleton] 24 | frame_data['skeleton'] = skeletons 25 | sequence_info += [frame_data] 26 | 27 | video_info = dict() 28 | video_info['data'] = sequence_info 29 | video_info['label'] = label 30 | video_info['label_index'] = label_index 31 | 32 | return video_info -------------------------------------------------------------------------------- /tools/utils/video.py: -------------------------------------------------------------------------------- 1 | import skvideo.io 2 | import numpy as np 3 | import cv2 4 | 5 | def video_info_parsing(video_info, num_person_in=5, num_person_out=2): 6 | data_numpy = np.zeros((3, len(video_info['data']), 18, num_person_in)) 7 | for frame_info in video_info['data']: 8 | frame_index = frame_info['frame_index'] 9 | for m, skeleton_info in enumerate(frame_info["skeleton"]): 10 | if m >= num_person_in: 11 | break 12 | pose = skeleton_info['pose'] 13 | score = skeleton_info['score'] 14 | data_numpy[0, frame_index, :, m] = pose[0::2] 15 | data_numpy[1, frame_index, :, m] = pose[1::2] 16 | data_numpy[2, frame_index, :, m] = score 17 | 18 | # centralization 19 | data_numpy[0:2] = data_numpy[0:2] - 0.5 20 | data_numpy[0][data_numpy[2] == 0] = 0 21 | data_numpy[1][data_numpy[2] == 0] = 0 22 | 23 | sort_index = (-data_numpy[2, :, :, :].sum(axis=1)).argsort(axis=1) 24 | for t, s in enumerate(sort_index): 25 | data_numpy[:, t, :, :] = data_numpy[:, t, :, s].transpose((1, 2, 26 | 0)) 27 | data_numpy = data_numpy[:, :, :, :num_person_out] 28 | 29 | label = video_info['label_index'] 30 | return data_numpy, label 31 | 32 | def get_video_frames(video_path): 33 | vread = skvideo.io.vread(video_path) 34 | video = [] 35 | for frame in vread: 36 | video.append(frame) 37 | return video 38 | 39 | def video_play(video_path, fps=30): 40 | cap = cv2.VideoCapture(video_path) 41 | 42 | while(cap.isOpened()): 43 | ret, frame = cap.read() 44 | 45 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 46 | 47 | cv2.imshow('frame',gray) 48 | if cv2.waitKey(1000/fps) & 0xFF == ord('q'): 49 | break 50 | 51 | cap.release() 52 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /tools/utils/visualization.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def stgcn_visualize(pose, 6 | edge, 7 | feature, 8 | video, 9 | label=None, 10 | label_sequence=None, 11 | height=1080, 12 | fps=None): 13 | 14 | _, T, V, M = pose.shape 15 | T = len(video) 16 | pos_track = [None] * M 17 | for t in range(T): 18 | frame = video[t] 19 | 20 | # image resize 21 | H, W, c = frame.shape 22 | frame = cv2.resize(frame, (height * W // H // 2, height//2)) 23 | H, W, c = frame.shape 24 | scale_factor = 2 * height / 1080 25 | 26 | # draw skeleton 27 | skeleton = frame * 0 28 | text = frame * 0 29 | for m in range(M): 30 | 31 | score = pose[2, t, :, m].max() 32 | if score < 0.3: 33 | continue 34 | 35 | for i, j in edge: 36 | xi = pose[0, t, i, m] 37 | yi = pose[1, t, i, m] 38 | xj = pose[0, t, j, m] 39 | yj = pose[1, t, j, m] 40 | if xi + yi == 0 or xj + yj == 0: 41 | continue 42 | else: 43 | xi = int((xi + 0.5) * W) 44 | yi = int((yi + 0.5) * H) 45 | xj = int((xj + 0.5) * W) 46 | yj = int((yj + 0.5) * H) 47 | cv2.line(skeleton, (xi, yi), (xj, yj), (255, 255, 255), 48 | int(np.ceil(2 * scale_factor))) 49 | 50 | if label_sequence is not None: 51 | body_label = label_sequence[t // 4][m] 52 | else: 53 | body_label = '' 54 | x_nose = int((pose[0, t, 0, m] + 0.5) * W) 55 | y_nose = int((pose[1, t, 0, m] + 0.5) * H) 56 | x_neck = int((pose[0, t, 1, m] + 0.5) * W) 57 | y_neck = int((pose[1, t, 1, m] + 0.5) * H) 58 | 59 | half_head = int(((x_neck - x_nose)**2 + (y_neck - y_nose)**2)**0.5) 60 | pos = (x_nose + half_head, y_nose - half_head) 61 | if pos_track[m] is None: 62 | pos_track[m] = pos 63 | else: 64 | new_x = int(pos_track[m][0] + (pos[0] - pos_track[m][0]) * 0.2) 65 | new_y = int(pos_track[m][1] + (pos[1] - pos_track[m][1]) * 0.2) 66 | pos_track[m] = (new_x, new_y) 67 | cv2.putText(text, body_label, pos_track[m], 68 | cv2.FONT_HERSHEY_TRIPLEX, 0.5 * scale_factor, 69 | (255, 255, 255)) 70 | 71 | # generate mask 72 | mask = frame * 0 73 | feature = np.abs(feature) 74 | feature = feature / feature.mean() 75 | for m in range(M): 76 | score = pose[2, t, :, m].max() 77 | if score < 0.3: 78 | continue 79 | 80 | f = feature[t // 4, :, m]**5 81 | if f.mean() != 0: 82 | f = f / f.mean() 83 | for v in range(V): 84 | x = pose[0, t, v, m] 85 | y = pose[1, t, v, m] 86 | if x + y == 0: 87 | continue 88 | else: 89 | x = int((x + 0.5) * W) 90 | y = int((y + 0.5) * H) 91 | cv2.circle(mask, (x, y), 0, (255, 255, 255), 92 | int(np.ceil(f[v]**0.5 * 8 * scale_factor))) 93 | blurred_mask = cv2.blur(mask, (12, 12)) 94 | 95 | skeleton_result = blurred_mask.astype(float) * 0.75 96 | skeleton_result += skeleton.astype(float) * 0.25 97 | skeleton_result += text.astype(float) 98 | skeleton_result[skeleton_result > 255] = 255 99 | skeleton_result.astype(np.uint8) 100 | 101 | rgb_result = blurred_mask.astype(float) * 0.75 102 | rgb_result += frame.astype(float) * 0.5 103 | rgb_result += skeleton.astype(float) * 0.25 104 | rgb_result[rgb_result > 255] = 255 105 | rgb_result.astype(np.uint8) 106 | 107 | put_text(skeleton, 'inputs of st-gcn', (0.15, 0.5)) 108 | 109 | text_1 = cv2.imread( 110 | './resource/demo_asset/original_video.png', cv2.IMREAD_UNCHANGED) 111 | text_2 = cv2.imread( 112 | './resource/demo_asset/pose_estimation.png', cv2.IMREAD_UNCHANGED) 113 | text_3 = cv2.imread( 114 | './resource/demo_asset/attention+prediction.png', cv2.IMREAD_UNCHANGED) 115 | text_4 = cv2.imread( 116 | './resource/demo_asset/attention+rgb.png', cv2.IMREAD_UNCHANGED) 117 | 118 | try: 119 | blend(frame, text_1) 120 | blend(skeleton, text_2) 121 | blend(skeleton_result, text_3) 122 | blend(rgb_result, text_4) 123 | except: 124 | pass 125 | 126 | if label is not None: 127 | label_name = 'voting result: ' + label 128 | put_text(skeleton_result, label_name, (0.1, 0.5)) 129 | 130 | if fps is not None: 131 | put_text(skeleton, 'fps:{:.2f}'.format(fps), (0.9, 0.5)) 132 | 133 | img0 = np.concatenate((frame, skeleton), axis=1) 134 | img1 = np.concatenate((skeleton_result, rgb_result), axis=1) 135 | img = np.concatenate((img0, img1), axis=0) 136 | 137 | yield img 138 | 139 | 140 | def put_text(img, text, position, scale_factor=1): 141 | t_w, t_h = cv2.getTextSize( 142 | text, cv2.FONT_HERSHEY_TRIPLEX, scale_factor, thickness=1)[0] 143 | H, W, _ = img.shape 144 | position = (int(W * position[1] - t_w * 0.5), 145 | int(H * position[0] - t_h * 0.5)) 146 | params = (position, cv2.FONT_HERSHEY_TRIPLEX, scale_factor, 147 | (255, 255, 255)) 148 | cv2.putText(img, text, *params) 149 | 150 | 151 | def blend(background, foreground, dx=20, dy=10, fy=0.7): 152 | 153 | foreground = cv2.resize(foreground, (0, 0), fx=fy, fy=fy) 154 | h, w = foreground.shape[:2] 155 | b, g, r, a = cv2.split(foreground) 156 | mask = np.dstack((a, a, a)) 157 | rgb = np.dstack((b, g, r)) 158 | 159 | canvas = background[-h-dy:-dy, dx:w+dx] 160 | imask = mask > 0 161 | canvas[imask] = rgb[imask] 162 | -------------------------------------------------------------------------------- /torchlight/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name='torchlight', 5 | version='1.0', 6 | description='A mini framework for pytorch', 7 | packages=find_packages(), 8 | install_requires=[]) 9 | -------------------------------------------------------------------------------- /torchlight/torchlight/__init__.py: -------------------------------------------------------------------------------- 1 | from .io import IO 2 | from .io import str2bool 3 | from .io import str2dict 4 | from .io import DictAction 5 | from .io import import_class 6 | from .gpu import visible_gpu 7 | from .gpu import occupy_gpu 8 | from .gpu import ngpu 9 | -------------------------------------------------------------------------------- /torchlight/torchlight/gpu.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | 4 | 5 | def visible_gpu(gpus): 6 | """ 7 | set visible gpu. 8 | 9 | can be a single id, or a list 10 | 11 | return a list of new gpus ids 12 | """ 13 | gpus = [gpus] if isinstance(gpus, int) else list(gpus) 14 | os.environ['CUDA_VISIBLE_DEVICES'] = ','.join(list(map(str, gpus))) 15 | return list(range(len(gpus))) 16 | 17 | 18 | def ngpu(gpus): 19 | """ 20 | count how many gpus used. 21 | """ 22 | gpus = [gpus] if isinstance(gpus, int) else list(gpus) 23 | return len(gpus) 24 | 25 | 26 | def occupy_gpu(gpus=None): 27 | """ 28 | make program appear on nvidia-smi. 29 | """ 30 | if gpus is None: 31 | torch.zeros(1).cuda() 32 | else: 33 | gpus = [gpus] if isinstance(gpus, int) else list(gpus) 34 | for g in gpus: 35 | torch.zeros(1).cuda(g) 36 | -------------------------------------------------------------------------------- /torchlight/torchlight/io.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | import sys 5 | import traceback 6 | import time 7 | import warnings 8 | import pickle 9 | from collections import OrderedDict 10 | import yaml 11 | import numpy as np 12 | # torch 13 | import torch 14 | import torch.nn as nn 15 | import torch.optim as optim 16 | from torch.autograd import Variable 17 | 18 | with warnings.catch_warnings(): 19 | warnings.filterwarnings("ignore",category=FutureWarning) 20 | import h5py 21 | 22 | class IO(): 23 | def __init__(self, work_dir, save_log=True, print_log=True): 24 | self.work_dir = work_dir 25 | self.save_log = save_log 26 | self.print_to_screen = print_log 27 | self.cur_time = time.time() 28 | self.split_timer = {} 29 | self.pavi_logger = None 30 | self.session_file = None 31 | self.model_text = '' 32 | 33 | # PaviLogger is removed in this version 34 | def log(self, *args, **kwargs): 35 | pass 36 | # try: 37 | # if self.pavi_logger is None: 38 | # from torchpack.runner.hooks import PaviLogger 39 | # url = 'http://pavi.parrotsdnn.org/log' 40 | # with open(self.session_file, 'r') as f: 41 | # info = dict( 42 | # session_file=self.session_file, 43 | # session_text=f.read(), 44 | # model_text=self.model_text) 45 | # self.pavi_logger = PaviLogger(url) 46 | # self.pavi_logger.connect(self.work_dir, info=info) 47 | # self.pavi_logger.log(*args, **kwargs) 48 | # except: #pylint: disable=W0702 49 | # pass 50 | 51 | def load_model(self, model, **model_args): 52 | Model = import_class(model) 53 | model = Model(**model_args) 54 | self.model_text += '\n\n' + str(model) 55 | return model 56 | 57 | def load_weights(self, model, weights_path, ignore_weights=None): 58 | if ignore_weights is None: 59 | ignore_weights = [] 60 | if isinstance(ignore_weights, str): 61 | ignore_weights = [ignore_weights] 62 | 63 | self.print_log('Load weights from {}.'.format(weights_path)) 64 | weights = torch.load(weights_path) 65 | weights = OrderedDict([[k.split('module.')[-1], 66 | v.cpu()] for k, v in weights.items()]) 67 | 68 | # filter weights 69 | for i in ignore_weights: 70 | ignore_name = list() 71 | for w in weights: 72 | if w.find(i) == 0: 73 | ignore_name.append(w) 74 | for n in ignore_name: 75 | weights.pop(n) 76 | self.print_log('Filter [{}] remove weights [{}].'.format(i,n)) 77 | 78 | for w in weights: 79 | self.print_log('Load weights [{}].'.format(w)) 80 | 81 | try: 82 | model.load_state_dict(weights) 83 | except (KeyError, RuntimeError): 84 | state = model.state_dict() 85 | diff = list(set(state.keys()).difference(set(weights.keys()))) 86 | for d in diff: 87 | self.print_log('Can not find weights [{}].'.format(d)) 88 | state.update(weights) 89 | model.load_state_dict(state) 90 | return model 91 | 92 | def save_pkl(self, result, filename): 93 | with open('{}/{}'.format(self.work_dir, filename), 'wb') as f: 94 | pickle.dump(result, f) 95 | 96 | def save_h5(self, result, filename): 97 | with h5py.File('{}/{}'.format(self.work_dir, filename), 'w') as f: 98 | for k in result.keys(): 99 | f[k] = result[k] 100 | 101 | def save_model(self, model, name): 102 | model_path = '{}/{}'.format(self.work_dir, name) 103 | state_dict = model.state_dict() 104 | weights = OrderedDict([[''.join(k.split('module.')), 105 | v.cpu()] for k, v in state_dict.items()]) 106 | torch.save(weights, model_path) 107 | self.print_log('The model has been saved as {}.'.format(model_path)) 108 | 109 | def save_arg(self, arg): 110 | 111 | self.session_file = '{}/config.yaml'.format(self.work_dir) 112 | 113 | # save arg 114 | arg_dict = vars(arg) 115 | if not os.path.exists(self.work_dir): 116 | os.makedirs(self.work_dir) 117 | with open(self.session_file, 'w') as f: 118 | f.write('# command line: {}\n\n'.format(' '.join(sys.argv))) 119 | yaml.dump(arg_dict, f, default_flow_style=False, indent=4) 120 | 121 | def print_log(self, str, print_time=True): 122 | if print_time: 123 | # localtime = time.asctime(time.localtime(time.time())) 124 | str = time.strftime("[%m.%d.%y|%X] ", time.localtime()) + str 125 | 126 | if self.print_to_screen: 127 | print(str) 128 | if self.save_log: 129 | with open('{}/log.txt'.format(self.work_dir), 'a') as f: 130 | print(str, file=f) 131 | 132 | def init_timer(self, *name): 133 | self.record_time() 134 | self.split_timer = {k: 0.0000001 for k in name} 135 | 136 | def check_time(self, name): 137 | self.split_timer[name] += self.split_time() 138 | 139 | def record_time(self): 140 | self.cur_time = time.time() 141 | return self.cur_time 142 | 143 | def split_time(self): 144 | split_time = time.time() - self.cur_time 145 | self.record_time() 146 | return split_time 147 | 148 | def print_timer(self): 149 | proportion = { 150 | k: '{:02d}%'.format(int(round(v * 100 / sum(self.split_timer.values())))) 151 | for k, v in self.split_timer.items() 152 | } 153 | self.print_log('Time consumption:') 154 | for k in proportion: 155 | self.print_log( 156 | '\t[{}][{}]: {:.4f}'.format(k, proportion[k],self.split_timer[k]) 157 | ) 158 | 159 | 160 | def str2bool(v): 161 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 162 | return True 163 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 164 | return False 165 | else: 166 | raise argparse.ArgumentTypeError('Boolean value expected.') 167 | 168 | 169 | def str2dict(v): 170 | return eval('dict({})'.format(v)) #pylint: disable=W0123 171 | 172 | 173 | def _import_class_0(name): 174 | components = name.split('.') 175 | mod = __import__(components[0]) 176 | for comp in components[1:]: 177 | mod = getattr(mod, comp) 178 | return mod 179 | 180 | 181 | def import_class(import_str): 182 | mod_str, _sep, class_str = import_str.rpartition('.') 183 | __import__(mod_str) 184 | try: 185 | return getattr(sys.modules[mod_str], class_str) 186 | except AttributeError: 187 | raise ImportError('Class %s cannot be found (%s)' % 188 | (class_str, 189 | traceback.format_exception(*sys.exc_info()))) 190 | 191 | 192 | class DictAction(argparse.Action): 193 | def __init__(self, option_strings, dest, nargs=None, **kwargs): 194 | if nargs is not None: 195 | raise ValueError("nargs not allowed") 196 | super(DictAction, self).__init__(option_strings, dest, **kwargs) 197 | 198 | def __call__(self, parser, namespace, values, option_string=None): 199 | input_dict = eval('dict({})'.format(values)) #pylint: disable=W0123 200 | output_dict = getattr(namespace, self.dest) 201 | for k in input_dict: 202 | output_dict[k] = input_dict[k] 203 | setattr(namespace, self.dest, output_dict) 204 | --------------------------------------------------------------------------------