├── .github └── workflows │ └── lint.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── configs ├── aagcn │ ├── README.md │ ├── aagcn_pyskl_ntu120_xset_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── aagcn_pyskl_ntu120_xset_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── aagcn_pyskl_ntu120_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── aagcn_pyskl_ntu120_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── aagcn_pyskl_ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── aagcn_pyskl_ntu60_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── aagcn_pyskl_ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ └── aagcn_pyskl_ntu60_xview_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py ├── ctrgcn │ ├── README.md │ ├── ctrgcn_pyskl_ntu120_xset_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ctrgcn_pyskl_ntu120_xset_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ctrgcn_pyskl_ntu120_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ctrgcn_pyskl_ntu120_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ctrgcn_pyskl_ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ctrgcn_pyskl_ntu60_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ctrgcn_pyskl_ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ └── ctrgcn_pyskl_ntu60_xview_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py ├── dgstgcn │ ├── README.md │ ├── ntu120_xset_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ntu120_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ └── ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py ├── msg3d │ ├── README.md │ ├── msg3d_pyskl_ntu120_xset_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── msg3d_pyskl_ntu120_xset_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── msg3d_pyskl_ntu120_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── msg3d_pyskl_ntu120_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── msg3d_pyskl_ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── msg3d_pyskl_ntu60_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── msg3d_pyskl_ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ └── msg3d_pyskl_ntu60_xview_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py ├── posec3d │ ├── README.md │ ├── c3d_light_gym │ │ ├── joint.py │ │ └── limb.py │ ├── c3d_light_ntu60_xsub │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_346_k400 │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_463_k400 │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_diving48 │ │ └── joint.py │ ├── slowonly_r50_gym │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_hmdb51_k400p │ │ └── s1_joint.py │ ├── slowonly_r50_ntu120_xset │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_ntu120_xsub │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_ntu60_xsub │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_ntu60_xview │ │ ├── joint.py │ │ └── limb.py │ ├── slowonly_r50_ucf101_k400p │ │ └── s1_joint.py │ ├── x3d_shallow_gym │ │ ├── joint.py │ │ └── limb.py │ └── x3d_shallow_ntu60_xsub │ │ ├── joint.py │ │ └── limb.py ├── rgbpose_conv3d │ ├── README.md │ ├── compress_nturgbd.py │ ├── merge_pretrain.ipynb │ ├── pose_only.py │ ├── rgb_only.py │ └── rgbpose_conv3d.py ├── stgcn++ │ ├── README.md │ ├── stgcn++_ntu120_xset_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn++_ntu120_xset_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn++_ntu120_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn++_ntu120_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn++_ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn++_ntu60_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn++_ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ └── stgcn++_ntu60_xview_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py ├── stgcn │ ├── README.md │ ├── stgcn_pyskl_ntu120_xset_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu120_xset_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu120_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu120_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu60_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_pyskl_ntu60_xview_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_vanilla_ntu60_xsub_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_vanilla_ntu60_xsub_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ ├── stgcn_vanilla_ntu60_xview_3dkp │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py │ └── stgcn_vanilla_ntu60_xview_hrnet │ │ ├── b.py │ │ ├── bm.py │ │ ├── j.py │ │ └── jm.py └── strong_aug │ ├── README.md │ ├── ntu120_xset_3dkp │ ├── b.py │ ├── bm.py │ ├── j.py │ └── jm.py │ ├── ntu120_xsub_3dkp │ ├── b.py │ ├── bm.py │ ├── j.py │ └── jm.py │ ├── ntu60_xsub_3dkp │ ├── b.py │ ├── bm.py │ ├── j.py │ └── jm.py │ └── ntu60_xview_3dkp │ ├── b.py │ ├── bm.py │ ├── j.py │ └── jm.py ├── demo ├── demo.md ├── demo_gesture.py ├── demo_skeleton.py ├── faster_rcnn_r50_fpn_1x_coco-person.py ├── hagrid.pth ├── hrnet_w32_coco_256x192.py ├── ntu_sample.avi ├── stgcnpp_gesture.py ├── vis_skeleton.ipynb └── visualize_heatmap_volume.ipynb ├── examples ├── extract_diving48_skeleton │ └── diving48_example.ipynb └── inference_speed.ipynb ├── pyskl.yaml ├── pyskl ├── __init__.py ├── apis │ ├── __init__.py │ ├── inference.py │ └── train.py ├── core │ ├── __init__.py │ ├── evaluation.py │ └── hooks.py ├── datasets │ ├── __init__.py │ ├── base.py │ ├── builder.py │ ├── dataset_wrappers.py │ ├── gesture_dataset.py │ ├── pipelines │ │ ├── __init__.py │ │ ├── augmentations.py │ │ ├── compose.py │ │ ├── formatting.py │ │ ├── heatmap_related.py │ │ ├── loading.py │ │ ├── multi_modality.py │ │ ├── pose_related.py │ │ └── sampling.py │ ├── pose_dataset.py │ ├── samplers │ │ ├── __init__.py │ │ └── distributed_sampler.py │ └── video_dataset.py ├── models │ ├── __init__.py │ ├── builder.py │ ├── cnns │ │ ├── __init__.py │ │ ├── c3d.py │ │ ├── potion.py │ │ ├── resnet.py │ │ ├── resnet3d.py │ │ ├── resnet3d_slowfast.py │ │ ├── resnet3d_slowonly.py │ │ ├── rgbposeconv3d.py │ │ └── x3d.py │ ├── gcns │ │ ├── __init__.py │ │ ├── aagcn.py │ │ ├── ctrgcn.py │ │ ├── dgstgcn.py │ │ ├── msg3d.py │ │ ├── sgn.py │ │ ├── stgcn.py │ │ └── utils │ │ │ ├── __init__.py │ │ │ ├── gcn.py │ │ │ ├── init_func.py │ │ │ ├── msg3d_utils.py │ │ │ └── tcn.py │ ├── heads │ │ ├── __init__.py │ │ ├── base.py │ │ ├── rgbpose_head.py │ │ └── simple_head.py │ ├── losses │ │ ├── __init__.py │ │ ├── base.py │ │ └── cross_entropy_loss.py │ └── recognizers │ │ ├── __init__.py │ │ ├── base.py │ │ ├── mm_recognizer3d.py │ │ ├── recognizer2d.py │ │ ├── recognizer3d.py │ │ └── recognizergcn.py ├── smp.py ├── utils │ ├── __init__.py │ ├── collect_env.py │ ├── graph.py │ ├── misc.py │ └── visualize.py └── version.py ├── pyskl_310.yaml ├── requirements.txt ├── setup.cfg ├── setup.py └── tools ├── data ├── README.md ├── custom_2d_skeleton.py ├── label_map │ ├── diving48.txt │ ├── gym.txt │ ├── hmdb51.txt │ ├── k400.txt │ ├── nturgbd_120.txt │ └── ucf101.txt ├── ntu120_missing.txt └── ntu_preproc.py ├── dist_run.sh ├── dist_test.sh ├── dist_train.sh ├── test.py └── train.py /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: lint 2 | 3 | on: [push, pull_request] 4 | 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | jobs: 10 | lint: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 3.7 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: 3.7 18 | - name: Install pre-commit hook 19 | run: | 20 | pip install pre-commit 21 | pre-commit install 22 | - name: Linting 23 | run: pre-commit run --all-files 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | **/*.pyc 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | db.sqlite3 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # Environments 86 | .env 87 | .venv 88 | env/ 89 | venv/ 90 | ENV/ 91 | env.bak/ 92 | venv.bak/ 93 | 94 | # mkdocs documentation 95 | /site 96 | 97 | # mypy 98 | .mypy_cache/ 99 | 100 | # custom 101 | /data 102 | .vscode 103 | .idea 104 | *.pkl 105 | *.pkl.json 106 | *.log.json 107 | benchlist.txt 108 | work_dirs/ 109 | .cache/ 110 | 111 | # Profile 112 | *.prof 113 | 114 | # lmdb 115 | *.mdb 116 | 117 | # avoid soft links created by MIM 118 | pyskl/configs/* 119 | pyskl/tools/* 120 | 121 | # unignore ipython notebook files in demo 122 | !demo/*.ipynb 123 | pyskl/.mim 124 | .VScodeCounter 125 | tmp/* 126 | 127 | examples/extract_diving48_skeleton/*json 128 | examples/extract_diving48_skeleton/*list 129 | examples/extract_diving48_skeleton/*pkl 130 | configs/gen_config.ipynb 131 | dev.ipynb 132 | test.ipynb 133 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | exclude: > 2 | (?x)^( 3 | ^tests/data/ 4 | ^configs/ 5 | )$ 6 | repos: 7 | - repo: https://github.com/PyCQA/flake8 8 | rev: 5.0.4 9 | hooks: 10 | - id: flake8 11 | args: ["--max-line-length=120"] 12 | exclude: ^configs/ 13 | - repo: https://github.com/PyCQA/isort 14 | rev: 5.11.5 15 | hooks: 16 | - id: isort 17 | - repo: https://github.com/pre-commit/mirrors-yapf 18 | rev: v0.30.0 19 | hooks: 20 | - id: yapf 21 | args: ["--style={column_limit=120}"] 22 | - repo: https://github.com/pre-commit/pre-commit-hooks 23 | rev: v3.1.0 24 | hooks: 25 | - id: trailing-whitespace 26 | - id: check-yaml 27 | - id: end-of-file-fixer 28 | - id: requirements-txt-fixer 29 | - id: double-quote-string-fixer 30 | - id: check-merge-conflict 31 | - id: fix-encoding-pragma 32 | args: ["--remove"] 33 | - id: mixed-line-ending 34 | args: ["--fix=lf"] 35 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xset_3dkp/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='nturgb+d', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xset_3dkp/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xset_3dkp/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='nturgb+d', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xset_3dkp/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xset_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xsub_3dkp/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='nturgb+d', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xsub_3dkp/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xsub_3dkp/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='nturgb+d', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xsub_3dkp/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu120_xsub_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xsub_3dkp/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='nturgb+d', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xsub_3dkp/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xsub_3dkp/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='nturgb+d', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xsub_3dkp/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xsub_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='AAGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/aagcn/aagcn_pyskl_ntu60_xview_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xset_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu120_xsub_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xsub_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='CTRGCN', 5 | graph_cfg=dict(layout='coco', mode='spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/ctrgcn/ctrgcn_pyskl_ntu60_xview_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/dgstgcn/ntu60_xview_3dkp/b.py: -------------------------------------------------------------------------------- 1 | modality = 'b' 2 | graph = 'nturgb+d' 3 | work_dir = f'./work_dirs/dgstgcn/ntu60_xview_3dkp/{modality}' 4 | 5 | model = dict( 6 | type='RecognizerGCN', 7 | backbone=dict( 8 | type='DGSTGCN', 9 | gcn_ratio=0.125, 10 | gcn_ctr='T', 11 | gcn_ada='T', 12 | tcn_ms_cfg=[(3, 1), (3, 2), (3, 3), (3, 4), ('max', 3), '1x1'], 13 | graph_cfg=dict(layout=graph, mode='random', num_filter=8, init_off=.04, init_std=.02)), 14 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 15 | 16 | dataset_type = 'PoseDataset' 17 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 18 | train_pipeline = [ 19 | dict(type='PreNormalize3D'), 20 | dict(type='RandomRot', theta=0.3), 21 | dict(type='GenSkeFeat', feats=[modality]), 22 | dict(type='UniformSampleDecode', clip_len=100), 23 | dict(type='FormatGCNInput'), 24 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 25 | dict(type='ToTensor', keys=['keypoint']) 26 | ] 27 | val_pipeline = [ 28 | dict(type='PreNormalize3D'), 29 | dict(type='GenSkeFeat', feats=[modality]), 30 | dict(type='UniformSampleDecode', clip_len=100, num_clips=1), 31 | dict(type='FormatGCNInput'), 32 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 33 | dict(type='ToTensor', keys=['keypoint']) 34 | ] 35 | test_pipeline = [ 36 | dict(type='PreNormalize3D'), 37 | dict(type='GenSkeFeat', feats=[modality]), 38 | dict(type='UniformSampleDecode', clip_len=100, num_clips=10), 39 | dict(type='FormatGCNInput'), 40 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 41 | dict(type='ToTensor', keys=['keypoint']) 42 | ] 43 | data = dict( 44 | videos_per_gpu=16, 45 | workers_per_gpu=4, 46 | test_dataloader=dict(videos_per_gpu=1), 47 | train=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train'), 48 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 49 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 50 | 51 | # optimizer, 4GPU 52 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 53 | optimizer_config = dict(grad_clip=None) 54 | # learning policy 55 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 56 | total_epochs = 150 57 | checkpoint_config = dict(interval=1) 58 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 59 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 60 | -------------------------------------------------------------------------------- /configs/dgstgcn/ntu60_xview_3dkp/bm.py: -------------------------------------------------------------------------------- 1 | modality = 'bm' 2 | graph = 'nturgb+d' 3 | work_dir = f'./work_dirs/dgstgcn/ntu60_xview_3dkp/{modality}' 4 | 5 | model = dict( 6 | type='RecognizerGCN', 7 | backbone=dict( 8 | type='DGSTGCN', 9 | gcn_ratio=0.125, 10 | gcn_ctr='T', 11 | gcn_ada='T', 12 | tcn_ms_cfg=[(3, 1), (3, 2), (3, 3), (3, 4), ('max', 3), '1x1'], 13 | graph_cfg=dict(layout=graph, mode='random', num_filter=8, init_off=.04, init_std=.02)), 14 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 15 | 16 | dataset_type = 'PoseDataset' 17 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 18 | train_pipeline = [ 19 | dict(type='PreNormalize3D'), 20 | dict(type='RandomRot', theta=0.3), 21 | dict(type='GenSkeFeat', feats=[modality]), 22 | dict(type='UniformSampleDecode', clip_len=100), 23 | dict(type='FormatGCNInput'), 24 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 25 | dict(type='ToTensor', keys=['keypoint']) 26 | ] 27 | val_pipeline = [ 28 | dict(type='PreNormalize3D'), 29 | dict(type='GenSkeFeat', feats=[modality]), 30 | dict(type='UniformSampleDecode', clip_len=100, num_clips=1), 31 | dict(type='FormatGCNInput'), 32 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 33 | dict(type='ToTensor', keys=['keypoint']) 34 | ] 35 | test_pipeline = [ 36 | dict(type='PreNormalize3D'), 37 | dict(type='GenSkeFeat', feats=[modality]), 38 | dict(type='UniformSampleDecode', clip_len=100, num_clips=10), 39 | dict(type='FormatGCNInput'), 40 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 41 | dict(type='ToTensor', keys=['keypoint']) 42 | ] 43 | data = dict( 44 | videos_per_gpu=16, 45 | workers_per_gpu=4, 46 | test_dataloader=dict(videos_per_gpu=1), 47 | train=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train'), 48 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 49 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 50 | 51 | # optimizer, 4GPU 52 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 53 | optimizer_config = dict(grad_clip=None) 54 | # learning policy 55 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 56 | total_epochs = 150 57 | checkpoint_config = dict(interval=1) 58 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 59 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 60 | -------------------------------------------------------------------------------- /configs/dgstgcn/ntu60_xview_3dkp/j.py: -------------------------------------------------------------------------------- 1 | modality = 'j' 2 | graph = 'nturgb+d' 3 | work_dir = f'./work_dirs/dgstgcn/ntu60_xview_3dkp/{modality}' 4 | 5 | model = dict( 6 | type='RecognizerGCN', 7 | backbone=dict( 8 | type='DGSTGCN', 9 | gcn_ratio=0.125, 10 | gcn_ctr='T', 11 | gcn_ada='T', 12 | tcn_ms_cfg=[(3, 1), (3, 2), (3, 3), (3, 4), ('max', 3), '1x1'], 13 | graph_cfg=dict(layout=graph, mode='random', num_filter=8, init_off=.04, init_std=.02)), 14 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 15 | 16 | dataset_type = 'PoseDataset' 17 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 18 | train_pipeline = [ 19 | dict(type='PreNormalize3D'), 20 | dict(type='RandomRot', theta=0.3), 21 | dict(type='GenSkeFeat', feats=[modality]), 22 | dict(type='UniformSampleDecode', clip_len=100), 23 | dict(type='FormatGCNInput'), 24 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 25 | dict(type='ToTensor', keys=['keypoint']) 26 | ] 27 | val_pipeline = [ 28 | dict(type='PreNormalize3D'), 29 | dict(type='GenSkeFeat', feats=[modality]), 30 | dict(type='UniformSampleDecode', clip_len=100, num_clips=1), 31 | dict(type='FormatGCNInput'), 32 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 33 | dict(type='ToTensor', keys=['keypoint']) 34 | ] 35 | test_pipeline = [ 36 | dict(type='PreNormalize3D'), 37 | dict(type='GenSkeFeat', feats=[modality]), 38 | dict(type='UniformSampleDecode', clip_len=100, num_clips=10), 39 | dict(type='FormatGCNInput'), 40 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 41 | dict(type='ToTensor', keys=['keypoint']) 42 | ] 43 | data = dict( 44 | videos_per_gpu=16, 45 | workers_per_gpu=4, 46 | test_dataloader=dict(videos_per_gpu=1), 47 | train=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train'), 48 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 49 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 50 | 51 | # optimizer, 4GPU 52 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 53 | optimizer_config = dict(grad_clip=None) 54 | # learning policy 55 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 56 | total_epochs = 150 57 | checkpoint_config = dict(interval=1) 58 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 59 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 60 | -------------------------------------------------------------------------------- /configs/dgstgcn/ntu60_xview_3dkp/jm.py: -------------------------------------------------------------------------------- 1 | modality = 'jm' 2 | graph = 'nturgb+d' 3 | work_dir = f'./work_dirs/dgstgcn/ntu60_xview_3dkp/{modality}' 4 | 5 | model = dict( 6 | type='RecognizerGCN', 7 | backbone=dict( 8 | type='DGSTGCN', 9 | gcn_ratio=0.125, 10 | gcn_ctr='T', 11 | gcn_ada='T', 12 | tcn_ms_cfg=[(3, 1), (3, 2), (3, 3), (3, 4), ('max', 3), '1x1'], 13 | graph_cfg=dict(layout=graph, mode='random', num_filter=8, init_off=.04, init_std=.02)), 14 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 15 | 16 | dataset_type = 'PoseDataset' 17 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 18 | train_pipeline = [ 19 | dict(type='PreNormalize3D'), 20 | dict(type='RandomRot', theta=0.3), 21 | dict(type='GenSkeFeat', feats=[modality]), 22 | dict(type='UniformSampleDecode', clip_len=100), 23 | dict(type='FormatGCNInput'), 24 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 25 | dict(type='ToTensor', keys=['keypoint']) 26 | ] 27 | val_pipeline = [ 28 | dict(type='PreNormalize3D'), 29 | dict(type='GenSkeFeat', feats=[modality]), 30 | dict(type='UniformSampleDecode', clip_len=100, num_clips=1), 31 | dict(type='FormatGCNInput'), 32 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 33 | dict(type='ToTensor', keys=['keypoint']) 34 | ] 35 | test_pipeline = [ 36 | dict(type='PreNormalize3D'), 37 | dict(type='GenSkeFeat', feats=[modality]), 38 | dict(type='UniformSampleDecode', clip_len=100, num_clips=10), 39 | dict(type='FormatGCNInput'), 40 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 41 | dict(type='ToTensor', keys=['keypoint']) 42 | ] 43 | data = dict( 44 | videos_per_gpu=16, 45 | workers_per_gpu=4, 46 | test_dataloader=dict(videos_per_gpu=1), 47 | train=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train'), 48 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 49 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 50 | 51 | # optimizer, 4GPU 52 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 53 | optimizer_config = dict(grad_clip=None) 54 | # learning policy 55 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 56 | total_epochs = 150 57 | checkpoint_config = dict(interval=1) 58 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 59 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 60 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xset_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu120_xsub_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xsub_3dkp/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='nturgb+d', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xsub_3dkp/b' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xsub_3dkp/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='nturgb+d', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize3D'), 12 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize3D'), 21 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize3D'), 30 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xsub_3dkp/j' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xsub_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/bm' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='MSG3D', 5 | graph_cfg=dict(layout='coco', mode='binary_adj')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=384)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xview_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xview_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xview_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/msg3d/msg3d_pyskl_ntu60_xview_hrnet/jm' 61 | -------------------------------------------------------------------------------- /configs/rgbpose_conv3d/compress_nturgbd.py: -------------------------------------------------------------------------------- 1 | import multiprocessing as mp 2 | import os 3 | import subprocess 4 | 5 | from pyskl.smp import * 6 | 7 | 8 | def get_shape(vid): 9 | cmd = 'ffprobe -v error -select_streams v:0 -show_entries stream=width,height -of csv=s=x:p=0 \"{}\"'.format(vid) 10 | w, h = subprocess.check_output(cmd, shell=True).decode('utf-8').split('x') 11 | return int(w), int(h) 12 | 13 | 14 | def compress(src, dest, shape=None, target_size=540, fps=-1): 15 | if shape is None: 16 | shape = get_shape(src) 17 | w, h = shape 18 | scale_str = f'-vf scale=-2:{target_size}' if w >= h else f'-vf scale={target_size}:-2' 19 | fps_str = f'-r {fps}' if fps > 0 else '' 20 | quality_str = '-q:v 1' 21 | vcodec_str = '-c:v libx264' 22 | cmd = f'ffmpeg -y -loglevel error -i {src} -threads 1 {quality_str} {scale_str} {fps_str} {vcodec_str} {dest}' 23 | os.system(cmd) 24 | 25 | 26 | def compress_nturgbd(name): 27 | src = name 28 | dest = src.replace('nturgbd_raw', 'nturgbd_videos').replace('_rgb.avi', '.mp4') 29 | shape = (1920, 1080) 30 | compress(src, dest, shape) 31 | 32 | os.makedirs('../../data/nturgbd_videos', exist_ok=True) 33 | files = ls('../../data/nturgbd_raw', match='.avi') 34 | pool = mp.Pool(32) 35 | pool.map(compress_nturgbd, files) 36 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_pyskl_ntu120_xset_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=120, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu120_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xset_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xset_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xset_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/stgcn/stgcn_pyskl_ntu120_xset_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_pyskl_ntu60_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/stgcn/stgcn_pyskl_ntu60_xsub_hrnet/b' 61 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_pyskl_ntu60_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 6 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 7 | 8 | dataset_type = 'PoseDataset' 9 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 10 | train_pipeline = [ 11 | dict(type='PreNormalize2D'), 12 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 13 | dict(type='UniformSample', clip_len=100), 14 | dict(type='PoseDecode'), 15 | dict(type='FormatGCNInput', num_person=2), 16 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 17 | dict(type='ToTensor', keys=['keypoint']) 18 | ] 19 | val_pipeline = [ 20 | dict(type='PreNormalize2D'), 21 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 22 | dict(type='UniformSample', clip_len=100, num_clips=1), 23 | dict(type='PoseDecode'), 24 | dict(type='FormatGCNInput', num_person=2), 25 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 26 | dict(type='ToTensor', keys=['keypoint']) 27 | ] 28 | test_pipeline = [ 29 | dict(type='PreNormalize2D'), 30 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 31 | dict(type='UniformSample', clip_len=100, num_clips=10), 32 | dict(type='PoseDecode'), 33 | dict(type='FormatGCNInput', num_person=2), 34 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 35 | dict(type='ToTensor', keys=['keypoint']) 36 | ] 37 | data = dict( 38 | videos_per_gpu=16, 39 | workers_per_gpu=2, 40 | test_dataloader=dict(videos_per_gpu=1), 41 | train=dict( 42 | type='RepeatDataset', 43 | times=5, 44 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=train_pipeline, split='xsub_train')), 45 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=val_pipeline, split='xsub_val'), 46 | test=dict(type=dataset_type, ann_file=ann_file, pipeline=test_pipeline, split='xsub_val')) 47 | 48 | # optimizer 49 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0005, nesterov=True) 50 | optimizer_config = dict(grad_clip=None) 51 | # learning policy 52 | lr_config = dict(policy='CosineAnnealing', min_lr=0, by_epoch=False) 53 | total_epochs = 16 54 | checkpoint_config = dict(interval=1) 55 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 56 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 57 | 58 | # runtime settings 59 | log_level = 'INFO' 60 | work_dir = './work_dirs/stgcn/stgcn_pyskl_ntu60_xsub_hrnet/j' 61 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/b' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['bm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/bm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/j' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['jm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_3dkp/jm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/b' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/bm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/j' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xsub_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xsub_hrnet/jm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['b']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/b' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['bm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/bm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['j']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/j' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='nturgb+d', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_3danno.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize3D'), 13 | dict(type='GenSkeFeat', dataset='nturgb+d', feats=['jm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_3dkp/jm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/b.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['b']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/b' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/bm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['bm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/bm' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/j.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['j']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/j' 44 | -------------------------------------------------------------------------------- /configs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/jm.py: -------------------------------------------------------------------------------- 1 | model = dict( 2 | type='RecognizerGCN', 3 | backbone=dict( 4 | type='STGCN', 5 | tcn_dropout=0.5, 6 | graph_cfg=dict(layout='coco', mode='stgcn_spatial')), 7 | cls_head=dict(type='GCNHead', num_classes=60, in_channels=256)) 8 | 9 | dataset_type = 'PoseDataset' 10 | ann_file = 'data/nturgbd/ntu60_hrnet.pkl' 11 | pipeline = [ 12 | dict(type='PreNormalize2D'), 13 | dict(type='GenSkeFeat', dataset='coco', feats=['jm']), 14 | dict(type='PadTo', length=300, mode='zero'), 15 | dict(type='PoseDecode'), 16 | dict(type='FormatGCNInput', num_person=2), 17 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 18 | dict(type='ToTensor', keys=['keypoint']) 19 | ] 20 | data = dict( 21 | videos_per_gpu=16, 22 | workers_per_gpu=2, 23 | test_dataloader=dict(videos_per_gpu=1), 24 | train=dict( 25 | type='RepeatDataset', 26 | times=5, 27 | dataset=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_train')), 28 | val=dict(type=dataset_type, ann_file=ann_file, pipeline=pipeline, split='xview_val')) 29 | data['test'] = data['val'] 30 | 31 | # optimizer 32 | optimizer = dict(type='SGD', lr=0.1, momentum=0.9, weight_decay=0.0001) 33 | optimizer_config = dict(grad_clip=None) 34 | # learning policy 35 | lr_config = dict(policy='step', step=[2, 10]) 36 | total_epochs = 16 37 | checkpoint_config = dict(interval=1) 38 | evaluation = dict(interval=1, metrics=['top_k_accuracy']) 39 | log_config = dict(interval=100, hooks=[dict(type='TextLoggerHook')]) 40 | 41 | # runtime settings 42 | log_level = 'INFO' 43 | work_dir = './work_dirs/stgcn/stgcn_vanilla_ntu60_xview_hrnet/jm' 44 | -------------------------------------------------------------------------------- /demo/hagrid.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kennymckormick/pyskl/3f6795f2573953342457abdb338f8943542f13c4/demo/hagrid.pth -------------------------------------------------------------------------------- /demo/ntu_sample.avi: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kennymckormick/pyskl/3f6795f2573953342457abdb338f8943542f13c4/demo/ntu_sample.avi -------------------------------------------------------------------------------- /demo/stgcnpp_gesture.py: -------------------------------------------------------------------------------- 1 | graph = 'handmp' 2 | modality = 'j' 3 | 4 | model = dict( 5 | type='RecognizerGCN', 6 | backbone=dict( 7 | type='STGCN', 8 | in_channels=2, 9 | gcn_adaptive='init', 10 | gcn_with_res=True, 11 | tcn_type='mstcn', 12 | num_stages=6, 13 | down_stages=[6], 14 | inflate_stages=[6], 15 | graph_cfg=dict(layout=graph, mode='spatial')), 16 | cls_head=dict(type='GCNHead', num_classes=40, in_channels=128)) 17 | 18 | test_pipeline = [ 19 | dict(type='PreNormalize2D', threshold=0, mode='auto'), 20 | dict(type='GenSkeFeat', dataset=graph, feats=[modality]), 21 | dict(type='UniformSample', clip_len=10, num_clips=1), 22 | dict(type='PoseDecode'), 23 | dict(type='FormatGCNInput', num_person=1), 24 | dict(type='Collect', keys=['keypoint', 'label'], meta_keys=[]), 25 | dict(type='ToTensor', keys=['keypoint']) 26 | ] 27 | -------------------------------------------------------------------------------- /pyskl/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import mmcv 3 | from mmcv import digit_version 4 | 5 | from .version import __version__ 6 | 7 | mmcv_minimum_version = '1.3.6' 8 | mmcv_maximum_version = '2.1.0' 9 | mmcv_version = digit_version(mmcv.__version__) 10 | 11 | assert (digit_version(mmcv_minimum_version) <= mmcv_version 12 | <= digit_version(mmcv_maximum_version)), \ 13 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 14 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 15 | 16 | __all__ = ['__version__'] 17 | -------------------------------------------------------------------------------- /pyskl/apis/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.engine import multi_gpu_test, single_gpu_test 3 | 4 | from .inference import inference_recognizer, init_recognizer 5 | from .train import init_random_seed, train_model 6 | 7 | __all__ = [ 8 | 'train_model', 'init_recognizer', 'inference_recognizer', 'multi_gpu_test', 9 | 'single_gpu_test', 'init_random_seed' 10 | ] 11 | -------------------------------------------------------------------------------- /pyskl/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .evaluation import * # noqa: F401, F403 3 | from .hooks import * # noqa: F401, F403 4 | -------------------------------------------------------------------------------- /pyskl/core/hooks.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import functools 3 | import torch 4 | import warnings 5 | 6 | 7 | class OutputHook: 8 | """Output feature map of some layers. 9 | 10 | Args: 11 | module (nn.Module): The whole module to get layers. 12 | outputs (tuple[str] | list[str]): Layer name to output. Default: None. 13 | as_tensor (bool): Determine to return a tensor or a numpy array. 14 | Default: False. 15 | """ 16 | 17 | def __init__(self, module, outputs=None, as_tensor=False): 18 | self.outputs = outputs 19 | self.as_tensor = as_tensor 20 | self.layer_outputs = {} 21 | self.handles = [] 22 | self.register(module) 23 | 24 | def register(self, module): 25 | 26 | def hook_wrapper(name): 27 | 28 | def hook(model, input, output): 29 | if not isinstance(output, torch.Tensor): 30 | warnings.warn(f'Directly return the output from {name}, ' 31 | f'since it is not a tensor') 32 | self.layer_outputs[name] = output 33 | elif self.as_tensor: 34 | self.layer_outputs[name] = output 35 | else: 36 | self.layer_outputs[name] = output.detach().cpu().numpy() 37 | 38 | return hook 39 | 40 | if isinstance(self.outputs, (list, tuple)): 41 | for name in self.outputs: 42 | try: 43 | layer = rgetattr(module, name) 44 | h = layer.register_forward_hook(hook_wrapper(name)) 45 | except AttributeError: 46 | raise AttributeError(f'Module {name} not found') 47 | self.handles.append(h) 48 | 49 | def remove(self): 50 | for h in self.handles: 51 | h.remove() 52 | 53 | def __enter__(self): 54 | return self 55 | 56 | def __exit__(self, exc_type, exc_val, exc_tb): 57 | self.remove() 58 | 59 | 60 | # using wonder's beautiful simplification: 61 | # https://stackoverflow.com/questions/31174295/getattr-and-setattr-on-nested-objects 62 | def rgetattr(obj, attr, *args): 63 | 64 | def _getattr(obj, attr): 65 | return getattr(obj, attr, *args) 66 | 67 | return functools.reduce(_getattr, [obj] + attr.split('.')) 68 | -------------------------------------------------------------------------------- /pyskl/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import BaseDataset 3 | from .builder import DATASETS, PIPELINES, build_dataloader, build_dataset 4 | from .dataset_wrappers import ConcatDataset, RepeatDataset 5 | from .gesture_dataset import GestureDataset 6 | from .pose_dataset import PoseDataset 7 | from .video_dataset import VideoDataset 8 | 9 | __all__ = [ 10 | 'VideoDataset', 'build_dataloader', 'build_dataset', 'RepeatDataset', 11 | 'BaseDataset', 'DATASETS', 'PIPELINES', 'PoseDataset', 'ConcatDataset', 'GestureDataset' 12 | ] 13 | -------------------------------------------------------------------------------- /pyskl/datasets/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .augmentations import * # noqa: F401, F403 3 | from .compose import Compose # noqa: F401, F403 4 | from .formatting import * # noqa: F401, F403 5 | from .heatmap_related import * # noqa: F401, F403 6 | from .loading import * # noqa: F401, F403 7 | from .multi_modality import * # noqa: F401, F403 8 | from .pose_related import * # noqa: F401, F403 9 | from .sampling import * # noqa: F401, F403 10 | -------------------------------------------------------------------------------- /pyskl/datasets/pipelines/compose.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from collections.abc import Sequence 3 | from mmcv.utils import build_from_cfg 4 | 5 | from ..builder import PIPELINES 6 | 7 | 8 | @PIPELINES.register_module() 9 | class Compose: 10 | """Compose a data pipeline with a sequence of transforms. 11 | 12 | Args: 13 | transforms (list[dict | callable]): 14 | Either config dicts of transforms or transform objects. 15 | """ 16 | 17 | def __init__(self, transforms): 18 | assert isinstance(transforms, Sequence) 19 | self.transforms = [] 20 | for transform in transforms: 21 | if isinstance(transform, dict): 22 | transform = build_from_cfg(transform, PIPELINES) 23 | self.transforms.append(transform) 24 | elif callable(transform): 25 | self.transforms.append(transform) 26 | else: 27 | raise TypeError(f'transform must be callable or a dict, ' 28 | f'but got {type(transform)}') 29 | 30 | def __call__(self, data): 31 | """Call function to apply transforms sequentially. 32 | 33 | Args: 34 | data (dict): A result dict contains the data to transform. 35 | 36 | Returns: 37 | dict: Transformed data. 38 | """ 39 | 40 | for t in self.transforms: 41 | data = t(data) 42 | if data is None: 43 | return None 44 | return data 45 | 46 | def __repr__(self): 47 | format_string = self.__class__.__name__ + '(' 48 | for t in self.transforms: 49 | format_string += '\n' 50 | format_string += ' {0}'.format(t) 51 | format_string += '\n)' 52 | return format_string 53 | -------------------------------------------------------------------------------- /pyskl/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .distributed_sampler import ClassSpecificDistributedSampler, DistributedSampler 3 | 4 | __all__ = ['DistributedSampler', 'ClassSpecificDistributedSampler'] 5 | -------------------------------------------------------------------------------- /pyskl/datasets/video_dataset.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import os.path as osp 3 | 4 | from .base import BaseDataset 5 | from .builder import DATASETS 6 | 7 | 8 | @DATASETS.register_module() 9 | class VideoDataset(BaseDataset): 10 | """Video dataset for action recognition. 11 | 12 | The dataset loads raw videos and apply specified transforms to return a 13 | dict containing the frame tensors and other information. 14 | 15 | The ann_file is a text file with multiple lines, and each line indicates 16 | a sample video with the filepath and label, which are split with a 17 | whitespace. Example of a annotation file: 18 | 19 | .. code-block:: txt 20 | 21 | some/path/000.mp4 1 22 | some/path/001.mp4 1 23 | some/path/002.mp4 2 24 | some/path/003.mp4 2 25 | some/path/004.mp4 3 26 | some/path/005.mp4 3 27 | 28 | 29 | Args: 30 | ann_file (str): Path to the annotation file. 31 | pipeline (list[dict | callable]): A sequence of data transforms. 32 | start_index (int): Specify a start index for frames in consideration of 33 | different filename format. However, when taking videos as input, 34 | it should be set to 0, since frames loaded from videos count 35 | from 0. Default: 0. 36 | **kwargs: Keyword arguments for ``BaseDataset``. 37 | """ 38 | 39 | def __init__(self, ann_file, pipeline, start_index=0, **kwargs): 40 | super().__init__(ann_file, pipeline, start_index=start_index, **kwargs) 41 | 42 | def load_annotations(self): 43 | """Load annotation file to get video information.""" 44 | if self.ann_file.endswith('.json'): 45 | return self.load_json_annotations() 46 | 47 | video_infos = [] 48 | with open(self.ann_file, 'r') as fin: 49 | for line in fin: 50 | line_split = line.strip().split() 51 | if self.multi_class: 52 | assert self.num_classes is not None 53 | filename, label = line_split[0], line_split[1:] 54 | label = list(map(int, label)) 55 | else: 56 | filename, label = line_split 57 | label = int(label) 58 | filename = osp.join(self.data_prefix, filename) 59 | video_infos.append(dict(filename=filename, label=label)) 60 | return video_infos 61 | -------------------------------------------------------------------------------- /pyskl/models/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .builder import * # noqa: F401, F403 3 | from .cnns import * # noqa: F401, F403 4 | from .gcns import * # noqa: F401, F403 5 | from .heads import * # noqa: F401, F403 6 | from .losses import * # noqa: F401, F403 7 | from .recognizers import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /pyskl/models/builder.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.cnn import MODELS as MMCV_MODELS 3 | from mmcv.utils import Registry 4 | 5 | MODELS = Registry('models', parent=MMCV_MODELS) 6 | BACKBONES = MODELS 7 | HEADS = MODELS 8 | RECOGNIZERS = MODELS 9 | LOSSES = MODELS 10 | 11 | 12 | def build_backbone(cfg): 13 | """Build backbone.""" 14 | return BACKBONES.build(cfg) 15 | 16 | 17 | def build_head(cfg): 18 | """Build head.""" 19 | return HEADS.build(cfg) 20 | 21 | 22 | def build_recognizer(cfg): 23 | """Build recognizer.""" 24 | return RECOGNIZERS.build(cfg) 25 | 26 | 27 | def build_loss(cfg): 28 | """Build loss.""" 29 | return LOSSES.build(cfg) 30 | 31 | 32 | def build_model(cfg): 33 | """Build model.""" 34 | args = cfg.copy() 35 | obj_type = args.pop('type') 36 | if obj_type in RECOGNIZERS: 37 | return build_recognizer(cfg) 38 | raise ValueError(f'{obj_type} is not registered') 39 | -------------------------------------------------------------------------------- /pyskl/models/cnns/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .c3d import C3D 3 | from .potion import PoTion 4 | from .resnet import ResNet 5 | from .resnet3d import ResNet3d 6 | from .resnet3d_slowfast import ResNet3dSlowFast 7 | from .resnet3d_slowonly import ResNet3dSlowOnly 8 | from .rgbposeconv3d import RGBPoseConv3D 9 | from .x3d import X3D 10 | 11 | __all__ = [ 12 | 'C3D', 'X3D', 'ResNet', 'ResNet3d', 'ResNet3dSlowFast', 'ResNet3dSlowOnly', 'RGBPoseConv3D', 'PoTion' 13 | ] 14 | -------------------------------------------------------------------------------- /pyskl/models/cnns/resnet3d_slowonly.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from ..builder import BACKBONES 3 | from .resnet3d import ResNet3d 4 | 5 | 6 | @BACKBONES.register_module() 7 | class ResNet3dSlowOnly(ResNet3d): 8 | """SlowOnly backbone based on ResNet3d. 9 | 10 | Args: 11 | conv1_kernel (tuple[int]): Kernel size of the first conv layer. Default: (1, 7, 7). 12 | inflate (tuple[int]): Inflate Dims of each block. Default: (0, 0, 1, 1). 13 | **kwargs (keyword arguments): Other keywords arguments for 'ResNet3d'. 14 | """ 15 | 16 | def __init__(self, conv1_kernel=(1, 7, 7), inflate=(0, 0, 1, 1), **kwargs): 17 | super().__init__(conv1_kernel=conv1_kernel, inflate=inflate, **kwargs) 18 | -------------------------------------------------------------------------------- /pyskl/models/gcns/__init__.py: -------------------------------------------------------------------------------- 1 | from .aagcn import AAGCN # noqa: F401, F403 2 | from .ctrgcn import CTRGCN # noqa: F401, F403 3 | from .dgstgcn import DGSTGCN # noqa: F401, F403 4 | from .msg3d import MSG3D # noqa: F401, F403 5 | from .sgn import SGN # noqa: F401, F403 6 | from .stgcn import STGCN # noqa: F401, F403 7 | from .utils import * # noqa: F401, F403 8 | -------------------------------------------------------------------------------- /pyskl/models/gcns/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .gcn import dggcn, unit_aagcn, unit_ctrgcn, unit_gcn, unit_sgn 2 | from .init_func import bn_init, conv_branch_init, conv_init 3 | from .msg3d_utils import MSGCN, MSTCN, MW_MSG3DBlock 4 | from .tcn import dgmstcn, mstcn, unit_tcn 5 | 6 | __all__ = [ 7 | # GCN Modules 8 | 'unit_gcn', 'unit_aagcn', 'unit_ctrgcn', 'unit_sgn', 'dggcn', 9 | # TCN Modules 10 | 'unit_tcn', 'mstcn', 'dgmstcn', 11 | # MSG3D Utils 12 | 'MSGCN', 'MSTCN', 'MW_MSG3DBlock', 13 | # Init functions 14 | 'bn_init', 'conv_branch_init', 'conv_init' 15 | ] 16 | -------------------------------------------------------------------------------- /pyskl/models/gcns/utils/init_func.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch.nn as nn 3 | 4 | 5 | def conv_branch_init(conv, branches): 6 | weight = conv.weight 7 | n = weight.size(0) 8 | k1 = weight.size(1) 9 | k2 = weight.size(2) 10 | nn.init.normal_(weight, 0, math.sqrt(2. / (n * k1 * k2 * branches))) 11 | nn.init.constant_(conv.bias, 0) 12 | 13 | 14 | def conv_init(conv): 15 | nn.init.kaiming_normal_(conv.weight, mode='fan_out') 16 | nn.init.constant_(conv.bias, 0) 17 | 18 | 19 | def bn_init(bn, scale): 20 | nn.init.constant_(bn.weight, scale) 21 | nn.init.constant_(bn.bias, 0) 22 | -------------------------------------------------------------------------------- /pyskl/models/heads/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .base import * # noqa: F401, F403 3 | from .rgbpose_head import * # noqa: F401, F403 4 | from .simple_head import * # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /pyskl/models/losses/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | # flake8: noqa: F401 3 | 4 | from .base import BaseWeightedLoss 5 | from .cross_entropy_loss import BCELossWithLogits, CrossEntropyLoss 6 | -------------------------------------------------------------------------------- /pyskl/models/losses/base.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | import torch.nn as nn 3 | from abc import ABCMeta, abstractmethod 4 | 5 | 6 | class BaseWeightedLoss(nn.Module, metaclass=ABCMeta): 7 | """Base class for loss. 8 | 9 | All subclass should overwrite the ``_forward()`` method which returns the 10 | normal loss without loss weights. 11 | 12 | Args: 13 | loss_weight (float): Factor scalar multiplied on the loss. 14 | Default: 1.0. 15 | """ 16 | 17 | def __init__(self, loss_weight=1.0): 18 | super().__init__() 19 | self.loss_weight = loss_weight 20 | 21 | @abstractmethod 22 | def _forward(self, *args, **kwargs): 23 | pass 24 | 25 | def forward(self, *args, **kwargs): 26 | """Defines the computation performed at every call. 27 | 28 | Args: 29 | *args: The positional arguments for the corresponding 30 | loss. 31 | **kwargs: The keyword arguments for the corresponding 32 | loss. 33 | 34 | Returns: 35 | torch.Tensor: The calculated loss. 36 | """ 37 | ret = self._forward(*args, **kwargs) 38 | if isinstance(ret, dict): 39 | for k in ret: 40 | if 'loss' in k: 41 | ret[k] *= self.loss_weight 42 | else: 43 | ret *= self.loss_weight 44 | return ret 45 | -------------------------------------------------------------------------------- /pyskl/models/recognizers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .mm_recognizer3d import MMRecognizer3D 3 | from .recognizer2d import Recognizer2D 4 | from .recognizer3d import Recognizer3D 5 | from .recognizergcn import RecognizerGCN 6 | 7 | __all__ = ['Recognizer2D', 'Recognizer3D', 'RecognizerGCN', 'MMRecognizer3D'] 8 | -------------------------------------------------------------------------------- /pyskl/models/recognizers/recognizer2d.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from torch import nn 3 | 4 | from ..builder import RECOGNIZERS 5 | from .base import BaseRecognizer 6 | 7 | 8 | @RECOGNIZERS.register_module() 9 | class Recognizer2D(BaseRecognizer): 10 | """2D recognizer model framework.""" 11 | 12 | def forward_train(self, imgs, label, **kwargs): 13 | """Defines the computation performed at every call when training.""" 14 | 15 | assert self.with_cls_head 16 | batches = imgs.shape[0] 17 | imgs = imgs.reshape((-1, ) + imgs.shape[2:]) 18 | num_segs = imgs.shape[0] // batches 19 | 20 | losses = dict() 21 | 22 | x = self.extract_feat(imgs) 23 | x = x.reshape((batches, num_segs) + x.shape[1:]) 24 | 25 | cls_score = self.cls_head(x) 26 | gt_label = label.squeeze() 27 | loss_cls = self.cls_head.loss(cls_score, gt_label, **kwargs) 28 | losses.update(loss_cls) 29 | 30 | return losses 31 | 32 | def forward_test(self, imgs, **kwargs): 33 | """Defines the computation performed at every call when evaluation and testing.""" 34 | batches = imgs.shape[0] 35 | imgs = imgs.reshape((-1, ) + imgs.shape[2:]) 36 | 37 | x = self.extract_feat(imgs) 38 | assert 'num_segs' in self.test_cfg 39 | num_segs = self.test_cfg['num_segs'] 40 | assert x.shape[0] % (batches * num_segs) == 0 41 | num_crops = x.shape[0] // (batches * num_segs) 42 | 43 | if self.test_cfg.get('feat_ext', False): 44 | # perform spatial pooling 45 | avg_pool = nn.AdaptiveAvgPool2d(1) 46 | x = avg_pool(x) 47 | # squeeze dimensions 48 | x = x.reshape((batches, num_crops, num_segs, -1)) 49 | # temporal average pooling 50 | x = x.mean(axis=1).mean(axis=1) 51 | return x.cpu().numpy() 52 | 53 | x = x.reshape((batches * num_crops, num_segs) + x.shape[1:]) 54 | cls_score = self.cls_head(x) 55 | cls_score = cls_score.reshape(batches, num_crops, cls_score.shape[-1]) 56 | # calculate num_crops automatically 57 | cls_score = self.average_clip(cls_score) 58 | return cls_score.cpu().numpy() 59 | -------------------------------------------------------------------------------- /pyskl/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from .collect_env import * # noqa: F401, F403 3 | from .graph import * # noqa: F401, F403 4 | from .misc import * # noqa: F401, F403 5 | 6 | try: 7 | from .visualize import * # noqa: F401, F403 8 | except ImportError: 9 | pass 10 | -------------------------------------------------------------------------------- /pyskl/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) OpenMMLab. All rights reserved. 2 | from mmcv.utils import collect_env as collect_basic_env 3 | from mmcv.utils import get_git_hash 4 | 5 | import pyskl 6 | 7 | 8 | def collect_env(): 9 | env_info = collect_basic_env() 10 | env_info['pyskl'] = ( 11 | pyskl.__version__ + '+' + get_git_hash(digits=7)) 12 | return env_info 13 | 14 | 15 | if __name__ == '__main__': 16 | for name, val in collect_env().items(): 17 | print(f'{name}: {val}') 18 | -------------------------------------------------------------------------------- /pyskl/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '0.1.0' 4 | 5 | 6 | def parse_version_info(version_str): 7 | version_info = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | version_info.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | version_info.append(int(patch_version[0])) 14 | version_info.append(f'rc{patch_version[1]}') 15 | return tuple(version_info) 16 | 17 | 18 | version_info = parse_version_info(__version__) 19 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | decord>=0.6.0 2 | fvcore 3 | matplotlib 4 | mmcv-full==1.5.0 5 | mmdet==2.23.0 6 | mmpose==0.24.0 7 | moviepy 8 | numpy>=1.19.5 9 | opencv-contrib-python 10 | opencv-python 11 | pymemcache 12 | scipy 13 | torch>=1.5 14 | tqdm 15 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [bdist_wheel] 2 | universal=1 3 | 4 | [aliases] 5 | test=pytest 6 | 7 | [tool:pytest] 8 | addopts=tests/ 9 | 10 | [yapf] 11 | based_on_style = pep8 12 | blank_line_before_nested_class_or_def = true 13 | split_before_expression_after_opening_paren = true 14 | split_penalty_import_names=0 15 | SPLIT_PENALTY_AFTER_OPENING_BRACKET=800 16 | 17 | [isort] 18 | line_length = 119 19 | multi_line_output = 0 20 | known_standard_library = pkg_resources,setuptools 21 | known_first_party = pyskl 22 | known_third_party = cv2,decord,fvcore,matplotlib,mmcv,moviepy,numpy,requests,scipy,torch,tqdm 23 | no_lines_before = STDLIB,LOCALFOLDER 24 | default_section = THIRDPARTY 25 | -------------------------------------------------------------------------------- /tools/data/label_map/diving48.txt: -------------------------------------------------------------------------------- 1 | Back+15som+05Twis+FREE 2 | Back+15som+15Twis+FREE 3 | Back+15som+25Twis+FREE 4 | Back+15som+NoTwis+PIKE 5 | Back+15som+NoTwis+TUCK 6 | Back+25som+15Twis+PIKE 7 | Back+25som+25Twis+PIKE 8 | Back+25som+NoTwis+PIKE 9 | Back+25som+NoTwis+TUCK 10 | Back+2som+15Twis+FREE 11 | Back+2som+25Twis+FREE 12 | Back+35som+NoTwis+PIKE 13 | Back+35som+NoTwis+TUCK 14 | Back+3som+NoTwis+PIKE 15 | Back+3som+NoTwis+TUCK 16 | Back+Dive+NoTwis+PIKE 17 | Back+Dive+NoTwis+TUCK 18 | Forward+15som+1Twis+FREE 19 | Forward+15som+2Twis+FREE 20 | Forward+15som+NoTwis+PIKE 21 | Forward+1som+NoTwis+PIKE 22 | Forward+25som+1Twis+PIKE 23 | Forward+25som+2Twis+PIKE 24 | Forward+25som+3Twis+PIKE 25 | Forward+25som+NoTwis+PIKE 26 | Forward+25som+NoTwis+TUCK 27 | Forward+35som+NoTwis+PIKE 28 | Forward+35som+NoTwis+TUCK 29 | Forward+45som+NoTwis+TUCK 30 | Forward+Dive+NoTwis+PIKE 31 | Forward+Dive+NoTwis+STR 32 | Inward+15som+NoTwis+PIKE 33 | Inward+15som+NoTwis+TUCK 34 | Inward+25som+NoTwis+PIKE 35 | Inward+25som+NoTwis+TUCK 36 | Inward+35som+NoTwis+TUCK 37 | Inward+Dive+NoTwis+PIKE 38 | Reverse+15som+05Twis+FREE 39 | Reverse+15som+15Twis+FREE 40 | Reverse+15som+25Twis+FREE 41 | Reverse+15som+35Twis+FREE 42 | Reverse+15som+NoTwis+PIKE 43 | Reverse+25som+15Twis+PIKE 44 | Reverse+25som+NoTwis+PIKE 45 | Reverse+25som+NoTwis+TUCK 46 | Reverse+35som+NoTwis+TUCK 47 | Reverse+Dive+NoTwis+PIKE 48 | Reverse+Dive+NoTwis+TUCK 49 | -------------------------------------------------------------------------------- /tools/data/label_map/hmdb51.txt: -------------------------------------------------------------------------------- 1 | brush_hair 2 | cartwheel 3 | catch 4 | chew 5 | clap 6 | climb 7 | climb_stairs 8 | dive 9 | draw_sword 10 | dribble 11 | drink 12 | eat 13 | fall_floor 14 | fencing 15 | flic_flac 16 | golf 17 | handstand 18 | hit 19 | hug 20 | jump 21 | kick 22 | kick_ball 23 | kiss 24 | laugh 25 | pick 26 | pour 27 | pullup 28 | punch 29 | push 30 | pushup 31 | ride_bike 32 | ride_horse 33 | run 34 | shake_hands 35 | shoot_ball 36 | shoot_bow 37 | shoot_gun 38 | sit 39 | situp 40 | smile 41 | smoke 42 | somersault 43 | stand 44 | swing_baseball 45 | sword 46 | sword_exercise 47 | talk 48 | throw 49 | turn 50 | walk 51 | wave 52 | -------------------------------------------------------------------------------- /tools/data/label_map/ucf101.txt: -------------------------------------------------------------------------------- 1 | ApplyEyeMakeup 2 | ApplyLipstick 3 | Archery 4 | BabyCrawling 5 | BalanceBeam 6 | BandMarching 7 | BaseballPitch 8 | Basketball 9 | BasketballDunk 10 | BenchPress 11 | Biking 12 | Billiards 13 | BlowDryHair 14 | BlowingCandles 15 | BodyWeightSquats 16 | Bowling 17 | BoxingPunchingBag 18 | BoxingSpeedBag 19 | BreastStroke 20 | BrushingTeeth 21 | CleanAndJerk 22 | CliffDiving 23 | CricketBowling 24 | CricketShot 25 | CuttingInKitchen 26 | Diving 27 | Drumming 28 | Fencing 29 | FieldHockeyPenalty 30 | FloorGymnastics 31 | FrisbeeCatch 32 | FrontCrawl 33 | GolfSwing 34 | Haircut 35 | Hammering 36 | HammerThrow 37 | HandstandPushups 38 | HandstandWalking 39 | HeadMassage 40 | HighJump 41 | HorseRace 42 | HorseRiding 43 | HulaHoop 44 | IceDancing 45 | JavelinThrow 46 | JugglingBalls 47 | JumpingJack 48 | JumpRope 49 | Kayaking 50 | Knitting 51 | LongJump 52 | Lunges 53 | MilitaryParade 54 | Mixing 55 | MoppingFloor 56 | Nunchucks 57 | ParallelBars 58 | PizzaTossing 59 | PlayingCello 60 | PlayingDaf 61 | PlayingDhol 62 | PlayingFlute 63 | PlayingGuitar 64 | PlayingPiano 65 | PlayingSitar 66 | PlayingTabla 67 | PlayingViolin 68 | PoleVault 69 | PommelHorse 70 | PullUps 71 | Punch 72 | PushUps 73 | Rafting 74 | RockClimbingIndoor 75 | RopeClimbing 76 | Rowing 77 | SalsaSpin 78 | ShavingBeard 79 | Shotput 80 | SkateBoarding 81 | Skiing 82 | Skijet 83 | SkyDiving 84 | SoccerJuggling 85 | SoccerPenalty 86 | StillRings 87 | SumoWrestling 88 | Surfing 89 | Swing 90 | TableTennisShot 91 | TaiChi 92 | TennisSwing 93 | ThrowDiscus 94 | TrampolineJumping 95 | Typing 96 | UnevenBars 97 | VolleyballSpiking 98 | WalkingWithDog 99 | WallPushups 100 | WritingOnBoard 101 | YoYo 102 | -------------------------------------------------------------------------------- /tools/dist_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export MASTER_PORT=$((12000 + $RANDOM % 20000)) 4 | set -x 5 | 6 | SCRIPT=$1 7 | GPUS=$2 8 | 9 | MKL_SERVICE_FORCE_INTEL=1 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$MASTER_PORT $SCRIPT ${@:3} 11 | # Any arguments from the third one are captured by ${@:3} 12 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export MASTER_PORT=$((12000 + $RANDOM % 20000)) 4 | set -x 5 | 6 | CONFIG=$1 7 | CHECKPOINT=$2 8 | GPUS=$3 9 | 10 | MKL_SERVICE_FORCE_INTEL=1 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 11 | # Arguments starting from the forth one are captured by ${@:4} 12 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$MASTER_PORT \ 13 | $(dirname "$0")/test.py $CONFIG -C $CHECKPOINT --launcher pytorch ${@:4} 14 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | export MASTER_PORT=$((12000 + $RANDOM % 20000)) 4 | set -x 5 | 6 | CONFIG=$1 7 | GPUS=$2 8 | 9 | MKL_SERVICE_FORCE_INTEL=1 PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 10 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$MASTER_PORT \ 11 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 12 | # Any arguments from the third one are captured by ${@:3} 13 | --------------------------------------------------------------------------------