├── requirements
    ├── docs.txt
    ├── build.txt
    ├── optional.txt
    ├── tests.txt
    ├── readthedocs.txt
    └── runtime.txt
├── tools
    ├── torchstat
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── analyzer.cpython-37.pyc
    │   │   ├── analyzer.cpython-38.pyc
    │   │   ├── reporter.cpython-37.pyc
    │   │   ├── reporter.cpython-38.pyc
    │   │   ├── stat_tree.cpython-37.pyc
    │   │   ├── stat_tree.cpython-38.pyc
    │   │   ├── compute_madd.cpython-37.pyc
    │   │   ├── compute_madd.cpython-38.pyc
    │   │   ├── compute_flops.cpython-37.pyc
    │   │   ├── compute_flops.cpython-38.pyc
    │   │   ├── compute_memory.cpython-37.pyc
    │   │   └── compute_memory.cpython-38.pyc
    │   ├── README.md
    │   ├── compute_memory.py
    │   ├── reporter.py
    │   ├── compute_madd.py
    │   ├── stat_tree.py
    │   ├── analyzer.py
    │   └── compute_flops.py
    ├── __pycache__
    │   ├── torchstat_utils.cpython-37.pyc
    │   └── torchstat_utils.cpython-38.pyc
    ├── dist_train.sh
    ├── dist_test.sh
    ├── summary_network.py
    ├── torchstat_utils.py
    ├── test.py
    └── train.py
├── models
    ├── backbones
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── xhrnet.cpython-38.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── __init__.cpython-38.pyc
    │   │   ├── hwhrnet.cpython-37.pyc
    │   │   ├── hwhrnet.cpython-38.pyc
    │   │   ├── whthrnet.cpython-37.pyc
    │   │   ├── whthrnet.cpython-38.pyc
    │   │   ├── litehrnet.cpython-37.pyc
    │   │   └── litehrnet.cpython-38.pyc
    │   └── xhrnet.py
    ├── __pycache__
    │   ├── __init__.cpython-38.pyc
    │   └── builder.cpython-38.pyc
    ├── __init__.py
    └── builder.py
├── requirements.txt
├── configs
    └── xhrnet
    │   ├── pxhrnet_30_coco_256x192.py
    │   ├── pxhrnet_30_coco_384x288.py
    │   ├── sxhrnet_18_coco_256x192.py
    │   ├── sxhrnet_18_coco_384x288.py
    │   ├── sxhrnet_30_coco_256x192.py
    │   └── sxhrnet_30_coco_384x288.py
└── README.md


/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmpose
2 | numpy
3 | torch>=1.3
4 | 


--------------------------------------------------------------------------------
/tools/torchstat/__init__.py:
--------------------------------------------------------------------------------
1 | from .analyzer import analyze
2 | 
3 | __all__ = [
4 |     'analyze',
5 | ]


--------------------------------------------------------------------------------
/models/backbones/__init__.py:
--------------------------------------------------------------------------------
1 | from .xhrnet import XHRNet
2 | 
3 | __all__ = [
4 |     'XHRNet',
5 | ]
6 | 


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/builder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/__pycache__/builder.cpython-38.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/runtime.txt
3 | -r requirements/tests.txt
4 | -r requirements/optional.txt
5 | 


--------------------------------------------------------------------------------
/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | albumentations>=0.3.2
2 | onnx
3 | onnxruntime
4 | poseval@git+https://github.com/svenkreiss/poseval.git
5 | smplx
6 | 


--------------------------------------------------------------------------------
/models/backbones/__pycache__/xhrnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/xhrnet.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/torchstat_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/__pycache__/torchstat_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/__pycache__/torchstat_utils.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/__pycache__/torchstat_utils.cpython-38.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/hwhrnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/hwhrnet.cpython-37.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/hwhrnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/hwhrnet.cpython-38.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/whthrnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/whthrnet.cpython-37.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/whthrnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/whthrnet.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/analyzer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/analyzer.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/analyzer.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/analyzer.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/reporter.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/reporter.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/reporter.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/reporter.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/stat_tree.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/stat_tree.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/stat_tree.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/stat_tree.cpython-38.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/litehrnet.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/litehrnet.cpython-37.pyc


--------------------------------------------------------------------------------
/models/backbones/__pycache__/litehrnet.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/litehrnet.cpython-38.pyc


--------------------------------------------------------------------------------
/requirements/tests.txt:
--------------------------------------------------------------------------------
 1 | coverage
 2 | flake8
 3 | interrogate
 4 | isort==4.3.21
 5 | pytest
 6 | pytest-runner
 7 | smplx
 8 | xdoctest >= 0.10.0
 9 | yapf
10 | 


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/compute_madd.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_madd.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/compute_madd.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_madd.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/compute_flops.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_flops.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/compute_flops.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_flops.cpython-38.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/compute_memory.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_memory.cpython-37.pyc


--------------------------------------------------------------------------------
/tools/torchstat/__pycache__/compute_memory.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_memory.cpython-38.pyc


--------------------------------------------------------------------------------
/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | mmcv-full
2 | munkres
3 | poseval@git+https://github.com/svenkreiss/poseval.git
4 | scipy
5 | titlecase
6 | torch
7 | torchvision
8 | xtcocotools>=1.6
9 | 


--------------------------------------------------------------------------------
/requirements/runtime.txt:
--------------------------------------------------------------------------------
 1 | chumpy
 2 | dataclasses; python_version == '3.9'
 3 | json_tricks
 4 | matplotlib
 5 | munkres
 6 | numpy
 7 | opencv-python
 8 | pillow
 9 | scipy
10 | torchvision
11 | xtcocotools>=1.6
12 | future
13 | tensorboard
14 | mmpose
15 | 


--------------------------------------------------------------------------------
/tools/torchstat/README.md:
--------------------------------------------------------------------------------
1 | # Credits
2 | 
3 | Code in this folder is almost as-is from torchstat repository located at https://github.com/Swall0w/torchstat.
4 | 
5 | Additional merges are from:
6 | - https://github.com/kenshohara/torchstat
7 | - https://github.com/lyakaap/torchstat


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbones import *  # noqa
2 | from .builder import (build_backbone, build_head, build_loss, build_neck,
3 |                       build_posenet)
4 | 
5 | __all__ = [
6 |     'build_backbone', 'build_head',
7 |     'build_loss', 'build_posenet', 'build_neck'
8 | ]
9 | 


--------------------------------------------------------------------------------
/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29600}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/models/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import build_from_cfg
 2 | from torch import nn
 3 | 
 4 | from mmpose.models.builder import BACKBONES, HEADS, LOSSES, NECKS, POSENETS
 5 | 
 6 | 
 7 | def build(cfg, registry, default_args=None):
 8 |     """Build a module.
 9 | 
10 |     Args:
11 |         cfg (dict, list[dict]): The config of modules, it is either a dict
12 |             or a list of configs.
13 |         registry (:obj:`Registry`): A registry the module belongs to.
14 |         default_args (dict, optional): Default arguments to build the module.
15 |             Defaults to None.
16 | 
17 |     Returns:
18 |         nn.Module: A built nn module.
19 |     """
20 | 
21 |     if isinstance(cfg, list):
22 |         modules = [
23 |             build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg
24 |         ]
25 |         return nn.Sequential(*modules)
26 | 
27 |     return build_from_cfg(cfg, registry, default_args)
28 | 
29 | 
30 | def build_backbone(cfg):
31 |     """Build backbone."""
32 |     return build(cfg, BACKBONES)
33 | 
34 | 
35 | def build_neck(cfg):
36 |     """Build neck."""
37 |     return build(cfg, NECKS)
38 | 
39 | 
40 | def build_head(cfg):
41 |     """Build head."""
42 |     return build(cfg, HEADS)
43 | 
44 | 
45 | def build_loss(cfg):
46 |     """Build loss."""
47 |     return build(cfg, LOSSES)
48 | 
49 | 
50 | def build_posenet(cfg):
51 |     """Build posenet."""
52 |     return build(cfg, POSENETS)
53 | 


--------------------------------------------------------------------------------
/tools/summary_network.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | # import tensorwatch as tw
 3 | 
 4 | from mmcv import Config
 5 | from mmcv.cnn import get_model_complexity_info
 6 | from torchstat_utils import model_stats
 7 | 
 8 | import sys
 9 | sys.path.append('.')
10 | from models import build_posenet
11 | 
12 | 
13 | def parse_args():
14 |     parser = argparse.ArgumentParser(description='Train a segmentor')
15 |     parser.add_argument('config', help='train config file path')
16 |     parser.add_argument(
17 |         '--shape',
18 |         type=int,
19 |         nargs='+',
20 |         default=[256, 192],
21 |         help='input image size')
22 |     parser.add_argument(
23 |         '--method', 
24 |         type=str, 
25 |         choices=['torchstat', 'mmcv'], 
26 |         default='torchstat',
27 |     )
28 |     parser.add_argument('--out-file', type=str,
29 |                         help='Output file name') 
30 |     args = parser.parse_args()
31 |     return args
32 | 
33 | 
34 | def main():
35 | 
36 |     args = parse_args()
37 | 
38 |     if len(args.shape) == 1:
39 |         input_shape = (1, 3, args.shape[0], args.shape[0])
40 |     elif len(args.shape) == 2:
41 |         input_shape = (1, 3, ) + tuple(args.shape)
42 |     else:
43 |         raise ValueError('invalid input shape')
44 | 
45 |     cfg = Config.fromfile(args.config)
46 |     model = build_posenet(cfg.model)
47 |     model.eval()
48 | 
49 |     if hasattr(model, 'forward_dummy'):
50 |         model.forward = model.forward_dummy
51 |     else:
52 |         raise NotImplementedError(
53 |             'FLOPs counter is currently not currently supported with {}'.
54 |             format(model.__class__.__name__))
55 | 
56 |     if args.method == 'torchstat':
57 |         df = model_stats(model, input_shape)
58 |         print(df)
59 |         if args.out_file:
60 |             df.to_html(args.out_file + '.html')
61 |             df.to_csv(args.out_file + '.csv')
62 |     elif args.method == 'mmcv':
63 |         flops, params = get_model_complexity_info(model, input_shape[1:], False)
64 |         split_line = '=' * 30
65 |         print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format(
66 |             split_line, input_shape, flops, params))
67 | 
68 |     print('!!!Please be cautious if you use the results in papers. '
69 |           'You may need to check if all ops are supported and verify that the '
70 |           'flops computation is correct.')
71 | 
72 | 
73 | if __name__ == '__main__':
74 |     main()
75 | 


--------------------------------------------------------------------------------
/tools/torchstat/compute_memory.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import numpy as np
 4 | 
 5 | 
 6 | def compute_memory(module, inp, out):
 7 |     if isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)):
 8 |         return compute_ReLU_memory(module, inp[0], out[0])
 9 |     elif isinstance(module, nn.PReLU):
10 |         return compute_PReLU_memory(module, inp[0], out[0])
11 |     elif isinstance(module, nn.Conv2d):
12 |         return compute_Conv2d_memory(module, inp[0], out[0])
13 |     elif isinstance(module, nn.BatchNorm2d):
14 |         return compute_BatchNorm2d_memory(module, inp[0], out[0])
15 |     elif isinstance(module, nn.Linear):
16 |         return compute_Linear_memory(module, inp[0], out[0])
17 |     elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)):
18 |         return compute_Pool2d_memory(module, inp[0], out[0])
19 |     else:
20 |         #print(f"[Memory]: {type(module).__name__} is not supported!")
21 |         return 0, 0
22 |     pass
23 | 
24 | 
25 | def num_params(module):
26 |     return sum(p.numel() for p in module.parameters() if p.requires_grad)
27 | 
28 | 
29 | def compute_ReLU_memory(module, inp, out):
30 |     assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU))
31 | 
32 |     mread = inp.numel()
33 |     mwrite = out.numel()
34 | 
35 |     return mread * inp.element_size(), mwrite * out.element_size()
36 | 
37 | 
38 | def compute_PReLU_memory(module, inp, out):
39 |     assert isinstance(module, nn.PReLU)
40 | 
41 |     batch_size = inp.size()[0]
42 |     mread = batch_size * (inp[0].numel() + num_params(module))
43 |     mwrite = out.numel()
44 | 
45 |     return mread * inp.element_size(), mwrite * out.element_size()
46 | 
47 | 
48 | def compute_Conv2d_memory(module, inp, out):
49 |     # Can have multiple inputs, getting the first one
50 |     assert isinstance(module, nn.Conv2d)
51 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
52 | 
53 |     batch_size = inp.size()[0]
54 | 
55 |     # This includes weights with bias if the module contains it.
56 |     mread = batch_size * (inp[0].numel() + num_params(module))
57 |     mwrite = out.numel()
58 | 
59 |     return mread * inp.element_size(), mwrite * out.element_size()
60 | 
61 | 
62 | def compute_BatchNorm2d_memory(module, inp, out):
63 |     assert isinstance(module, nn.BatchNorm2d)
64 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
65 | 
66 |     batch_size, in_c, in_h, in_w = inp.size()
67 |     mread = batch_size * (inp[0].numel() + 2 * in_c)
68 |     mwrite = out.numel()
69 | 
70 |     return mread * inp.element_size(), mwrite * out.element_size()
71 | 
72 | 
73 | def compute_Linear_memory(module, inp, out):
74 |     assert isinstance(module, nn.Linear)
75 |     assert len(inp.size()) == 2 and len(out.size()) == 2
76 | 
77 |     batch_size = inp.size()[0]
78 | 
79 |     # This includes weights with bias if the module contains it.
80 |     mread = batch_size * (inp[0].numel() + num_params(module))
81 |     mwrite = out.numel()
82 | 
83 |     return mread * inp.element_size(), mwrite * out.element_size()
84 | 
85 | 
86 | def compute_Pool2d_memory(module, inp, out):
87 |     assert isinstance(module, (nn.MaxPool2d, nn.AvgPool2d))
88 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
89 | 
90 |     mread = inp.numel()
91 |     mwrite = out.numel()
92 | 
93 |     return mread * inp.element_size(), mwrite * out.element_size()
94 | 


--------------------------------------------------------------------------------
/tools/torchstat/reporter.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | pd.set_option('display.width', 1000)
 4 | pd.set_option('display.max_rows', 10000)
 5 | pd.set_option('display.max_columns', 10000)
 6 | 
 7 | 
 8 | def round_value(value, binary=False):
 9 |     divisor = 1024. if binary else 1000.
10 | 
11 |     if value // divisor**4 > 0:
12 |         return str(round(value / divisor**4, 2)) + 'T'
13 |     elif value // divisor**3 > 0:
14 |         return str(round(value / divisor**3, 2)) + 'G'
15 |     elif value // divisor**2 > 0:
16 |         return str(round(value / divisor**2, 2)) + 'M'
17 |     elif value // divisor > 0:
18 |         return str(round(value / divisor, 2)) + 'K'
19 |     return str(value)
20 | 
21 | 
22 | def report_format(collected_nodes):
23 |     data = list()
24 |     for node in collected_nodes:
25 |         name = node.name
26 |         input_shape = ' '.join(
27 |             ['{:>3d}'] *
28 |             len(node.input_shape)).format(*[e for e in node.input_shape])
29 |         output_shape = ' '.join(
30 |             ['{:>3d}'] *
31 |             len(node.output_shape)).format(*[e for e in node.output_shape])
32 |         parameter_quantity = node.parameter_quantity
33 |         inference_memory = node.inference_memory
34 |         MAdd = node.MAdd
35 |         Flops = node.Flops
36 |         mread, mwrite = [i for i in node.Memory]
37 |         duration = node.duration
38 |         data.append([
39 |             name, input_shape, output_shape, parameter_quantity,
40 |             inference_memory, MAdd, duration, Flops, mread, mwrite
41 |         ])
42 |     df = pd.DataFrame(data)
43 |     df.columns = [
44 |         'module name', 'input shape', 'output shape', 'params', 'memory(MB)',
45 |         'MAdd', 'duration', 'Flops', 'MemRead(B)', 'MemWrite(B)'
46 |     ]
47 |     df['duration[%]'] = df['duration'] / (df['duration'].sum() + 1e-7)
48 |     df['MemR+W(B)'] = df['MemRead(B)'] + df['MemWrite(B)']
49 |     total_parameters_quantity = df['params'].sum()
50 |     total_memory = df['memory(MB)'].sum()
51 |     total_operation_quantity = df['MAdd'].sum()
52 |     total_flops = df['Flops'].sum()
53 |     total_duration = df['duration[%]'].sum()
54 |     total_mread = df['MemRead(B)'].sum()
55 |     total_mwrite = df['MemWrite(B)'].sum()
56 |     total_memrw = df['MemR+W(B)'].sum()
57 |     del df['duration']
58 | 
59 |     # Add Total row
60 |     total_df = pd.Series([
61 |         total_parameters_quantity, total_memory, total_operation_quantity,
62 |         total_flops, total_duration, mread, mwrite, total_memrw
63 |     ],
64 |                          index=[
65 |                              'params', 'memory(MB)', 'MAdd', 'Flops',
66 |                              'duration[%]', 'MemRead(B)', 'MemWrite(B)',
67 |                              'MemR+W(B)'
68 |                          ],
69 |                          name='total')
70 |     df = df.append(total_df)
71 | 
72 |     df = df.fillna(' ')
73 |     df['memory(MB)'] = df['memory(MB)'].apply(lambda x: '{:.2f}'.format(x))
74 |     df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x))
75 |     df['MAdd'] = df['MAdd'].apply(lambda x: '{:,}'.format(x))
76 |     df['Flops'] = df['Flops'].apply(lambda x: '{:,}'.format(x))
77 | 
78 |     summary = str(df) + '\n'
79 |     summary += "=" * len(str(df).split('\n')[0])
80 |     summary += '\n'
81 |     summary += "Total params: {:,}\n".format(total_parameters_quantity)
82 | 
83 |     summary += "-" * len(str(df).split('\n')[0])
84 |     summary += '\n'
85 |     summary += "Total memory: {:.2f}MB\n".format(total_memory)
86 |     summary += "Total MAdd: {}MAdd\n".format(
87 |         round_value(total_operation_quantity))
88 |     summary += "Total Flops: {}Flops\n".format(round_value(total_flops))
89 |     summary += "Total MemR+W: {}B\n".format(round_value(total_memrw, True))
90 |     return summary
91 | 


--------------------------------------------------------------------------------
/tools/torchstat_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | from torchstat import analyze
  5 | import pandas as pd
  6 | import copy
  7 | 
  8 | 
  9 | class LayerStats:
 10 | 
 11 |     def __init__(self, node) -> None:
 12 |         self.name = node.name
 13 |         self.input_shape = node.input_shape
 14 |         self.output_shape = node.output_shape
 15 |         self.parameters = node.parameter_quantity
 16 |         self.inference_memory = node.inference_memory
 17 |         self.MAdd = node.MAdd
 18 |         self.Flops = node.Flops
 19 |         self.mread, self.mwrite = node.Memory[0], node.Memory[1]
 20 |         self.duration = node.duration
 21 | 
 22 | 
 23 | class ModelStats(LayerStats):
 24 | 
 25 |     def __init__(self, model, input_shape, clone_model=False) -> None:
 26 |         if clone_model:
 27 |             model = copy.deepcopy(model)
 28 |         collected_nodes = analyze(model, input_shape, 1)
 29 |         self.layer_stats = []
 30 |         for node in collected_nodes:
 31 |             self.layer_stats.append(LayerStats(node))
 32 | 
 33 |         self.name = 'Model'
 34 |         self.input_shape = input_shape
 35 |         self.output_shape = self.layer_stats[-1].output_shape
 36 |         self.parameters = sum((l.parameters for l in self.layer_stats))
 37 |         self.inference_memory = sum(
 38 |             (l.inference_memory for l in self.layer_stats))
 39 |         self.MAdd = sum((l.MAdd for l in self.layer_stats))
 40 |         self.Flops = sum((l.Flops for l in self.layer_stats))
 41 |         self.mread = sum((l.mread for l in self.layer_stats))
 42 |         self.mwrite = sum((l.mwrite for l in self.layer_stats))
 43 |         self.duration = sum((l.duration for l in self.layer_stats))
 44 | 
 45 | 
 46 | def model_stats(model, input_shape):
 47 |     ms = ModelStats(model, input_shape)
 48 |     return model_stats2df(ms)
 49 | 
 50 | 
 51 | def _round_value(value, binary=False):
 52 |     divisor = 1024. if binary else 1000.
 53 | 
 54 |     if value // divisor**4 > 0:
 55 |         return str(round(value / divisor**4, 2)) + 'T'
 56 |     elif value // divisor**3 > 0:
 57 |         return str(round(value / divisor**3, 2)) + 'G'
 58 |     elif value // divisor**2 > 0:
 59 |         return str(round(value / divisor**2, 2)) + 'M'
 60 |     elif value // divisor > 0:
 61 |         return str(round(value / divisor, 2)) + 'K'
 62 |     return str(value)
 63 | 
 64 | 
 65 | def model_stats2df(model_stats: ModelStats):
 66 |     pd.set_option('display.width', 1000)
 67 |     pd.set_option('display.max_rows', 10000)
 68 |     pd.set_option('display.max_columns', 10000)
 69 | 
 70 |     df = pd.DataFrame([l.__dict__ for l in model_stats.layer_stats])
 71 |     total_df = pd.Series(model_stats.__dict__, name='Total')
 72 |     df = df.append(total_df[df.columns], ignore_index=True)
 73 | 
 74 |     df = df.fillna(' ')
 75 |     # df['memory(MB)'] = df['memory(MB)'].apply(
 76 |     #     lambda x: '{:.2f}'.format(x))
 77 |     # df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x))
 78 |     for c in [
 79 |             'MAdd', 'Flops', 'parameters', 'inference_memory', 'mread',
 80 |             'mwrite'
 81 |     ]:  
 82 |         if c == 'Flops':
 83 |             df[c] = df[c].apply(lambda x: _round_value(x, True))
 84 |         elif c == 'parameters':
 85 |             df[c] = df[c].apply(lambda x: _round_value(x))
 86 |         else:
 87 |             df[c] = df[c].apply(lambda x: '{:,}'.format(x))
 88 | 
 89 |     df.rename(
 90 |         columns={
 91 |             'name': 'module name',
 92 |             'input_shape': 'input shape',
 93 |             'input_shape': 'input shape',
 94 |             'inference_memory': 'infer memory(MB)',
 95 |             'mread': 'MemRead(B)',
 96 |             'mwrite': 'MemWrite(B)'
 97 |         },
 98 |         inplace=True)
 99 | 
100 |     #summary = "Total params: {:,}\n".format(total_parameters_quantity)
101 | 
102 |     #summary += "-" * len(str(df).split('\n')[0])
103 |     #summary += '\n'
104 |     #summary += "Total memory: {:.2f}MB\n".format(total_memory)
105 |     #summary += "Total MAdd: {}MAdd\n".format(_round_value(total_operation_quantity))
106 |     #summary += "Total Flops: {}Flops\n".format(_round_value(total_flops))
107 |     #summary += "Total MemR+W: {}B\n".format(_round_value(total_memrw, True))
108 |     return df


--------------------------------------------------------------------------------
/tools/test.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import os.path as osp
  4 | 
  5 | import mmcv
  6 | import torch
  7 | from mmcv import Config, DictAction
  8 | from mmcv.cnn import fuse_conv_bn
  9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel
 10 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint
 11 | 
 12 | from mmpose.apis import multi_gpu_test, single_gpu_test
 13 | from mmpose.core import wrap_fp16_model
 14 | from mmpose.datasets import build_dataloader, build_dataset
 15 | from models import build_posenet
 16 | 
 17 | 
 18 | def parse_args():
 19 |     parser = argparse.ArgumentParser(description='mmpose test model')
 20 |     parser.add_argument('config', help='test config file path')
 21 |     parser.add_argument('checkpoint', help='checkpoint file')
 22 |     parser.add_argument('--out', help='output result file')
 23 |     parser.add_argument(
 24 |         '--fuse-conv-bn',
 25 |         action='store_true',
 26 |         help='Whether to fuse conv and bn, this will slightly increase'
 27 |         'the inference speed')
 28 |     parser.add_argument(
 29 |         '--eval',
 30 |         default=None,
 31 |         nargs='+',
 32 |         help='evaluation metric, which depends on the dataset,'
 33 |         ' e.g., "mAP" for MSCOCO')
 34 |     parser.add_argument(
 35 |         '--gpu_collect',
 36 |         action='store_true',
 37 |         help='whether to use gpu to collect results')
 38 |     parser.add_argument('--tmpdir', help='tmp dir for writing some results')
 39 |     parser.add_argument(
 40 |         '--cfg-options',
 41 |         nargs='+',
 42 |         action=DictAction,
 43 |         default={},
 44 |         help='override some settings in the used config, the key-value pair '
 45 |         'in xxx=yyy format will be merged into config file. For example, '
 46 |         "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
 47 |     parser.add_argument(
 48 |         '--launcher',
 49 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 50 |         default='none',
 51 |         help='job launcher')
 52 |     parser.add_argument('--local_rank', type=int, default=0)
 53 |     args = parser.parse_args()
 54 |     if 'LOCAL_RANK' not in os.environ:
 55 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 56 |     return args
 57 | 
 58 | 
 59 | def merge_configs(cfg1, cfg2):
 60 |     # Merge cfg2 into cfg1
 61 |     # Overwrite cfg1 if repeated, ignore if value is None.
 62 |     cfg1 = {} if cfg1 is None else cfg1.copy()
 63 |     cfg2 = {} if cfg2 is None else cfg2
 64 |     for k, v in cfg2.items():
 65 |         if v:
 66 |             cfg1[k] = v
 67 |     return cfg1
 68 | 
 69 | 
 70 | def main():
 71 |     args = parse_args()
 72 | 
 73 |     cfg = Config.fromfile(args.config)
 74 | 
 75 |     if args.cfg_options is not None:
 76 |         cfg.merge_from_dict(args.cfg_options)
 77 | 
 78 |     # set cudnn_benchmark
 79 |     if cfg.get('cudnn_benchmark', False):
 80 |         torch.backends.cudnn.benchmark = True
 81 |     cfg.model.pretrained = None
 82 |     cfg.data.test.test_mode = True
 83 | 
 84 |     # args.work_dir = osp.join('./work_dirs',
 85 |     #                          osp.splitext(osp.basename(args.config))[0])
 86 |     args.work_dir = cfg.work_dir
 87 |     mmcv.mkdir_or_exist(osp.abspath(args.work_dir))
 88 | 
 89 |     # init distributed env first, since logger depends on the dist info.
 90 |     if args.launcher == 'none':
 91 |         distributed = False
 92 |     else:
 93 |         distributed = True
 94 |         init_dist(args.launcher, **cfg.dist_params)
 95 | 
 96 |     # build the dataloader
 97 |     dataset = build_dataset(cfg.data.test, dict(test_mode=True))
 98 |     dataloader_setting = dict(
 99 |         samples_per_gpu=1,
100 |         workers_per_gpu=cfg.data.get('workers_per_gpu', 1),
101 |         dist=distributed,
102 |         shuffle=False,
103 |         drop_last=False)
104 |     dataloader_setting = dict(dataloader_setting,
105 |                               **cfg.data.get('test_dataloader', {}))
106 |     data_loader = build_dataloader(dataset, **dataloader_setting)
107 | 
108 |     # build the model and load checkpoint
109 |     model = build_posenet(cfg.model)
110 |     fp16_cfg = cfg.get('fp16', None)
111 |     if fp16_cfg is not None:
112 |         wrap_fp16_model(model)
113 |     load_checkpoint(model, args.checkpoint, map_location='cpu')
114 | 
115 |     if args.fuse_conv_bn:
116 |         model = fuse_conv_bn(model)
117 | 
118 |     if not distributed:
119 |         model = MMDataParallel(model, device_ids=[0])
120 |         outputs = single_gpu_test(model, data_loader)
121 |     else:
122 |         model = MMDistributedDataParallel(
123 |             model.cuda(),
124 |             device_ids=[torch.cuda.current_device()],
125 |             broadcast_buffers=False)
126 |         outputs = multi_gpu_test(model, data_loader, args.tmpdir,
127 |                                  args.gpu_collect)
128 | 
129 |     rank, _ = get_dist_info()
130 |     eval_config = cfg.get('evaluation', {})
131 |     eval_config = merge_configs(eval_config, dict(metric=args.eval))
132 | 
133 |     if rank == 0:
134 |         if args.out:
135 |             print(f'\nwriting results to {args.out}')
136 |             mmcv.dump(outputs, args.out)
137 | 
138 |         print(dataset.evaluate(outputs, args.work_dir, **eval_config))
139 | 
140 | 
141 | if __name__ == '__main__':
142 |     main()
143 | 


--------------------------------------------------------------------------------
/tools/torchstat/compute_madd.py:
--------------------------------------------------------------------------------
  1 | """
  2 | compute Multiply-Adds(MAdd) of each leaf module
  3 | """
  4 | 
  5 | import torch.nn as nn
  6 | 
  7 | 
  8 | def compute_Conv2d_madd(module, inp, out):
  9 |     assert isinstance(module, nn.Conv2d)
 10 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 11 | 
 12 |     in_c = inp.size()[1]
 13 |     k_h, k_w = module.kernel_size
 14 |     out_c, out_h, out_w = out.size()[1:]
 15 |     groups = module.groups
 16 | 
 17 |     # ops per output element
 18 |     kernel_mul = k_h * k_w * (in_c // groups)
 19 |     kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1)
 20 | 
 21 |     kernel_mul_group = kernel_mul * out_h * out_w * (out_c // groups)
 22 |     kernel_add_group = kernel_add * out_h * out_w * (out_c // groups)
 23 | 
 24 |     total_mul = kernel_mul_group * groups
 25 |     total_add = kernel_add_group * groups
 26 | 
 27 |     return total_mul + total_add
 28 | 
 29 | 
 30 | def compute_ConvTranspose2d_madd(module, inp, out):
 31 |     assert isinstance(module, nn.ConvTranspose2d)
 32 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 33 | 
 34 |     in_c, in_h, in_w = inp.size()[1:]
 35 |     k_h, k_w = module.kernel_size
 36 |     out_c, out_h, out_w = out.size()[1:]
 37 |     groups = module.groups
 38 | 
 39 |     kernel_mul = k_h * k_w * (in_c // groups)
 40 |     kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1)
 41 | 
 42 |     kernel_mul_group = kernel_mul * in_h * in_w * (out_c // groups)
 43 |     kernel_add_group = kernel_add * in_h * in_w * (out_c // groups)
 44 | 
 45 |     total_mul = kernel_mul_group * groups
 46 |     total_add = kernel_add_group * groups
 47 | 
 48 |     return total_mul + total_add
 49 | 
 50 | 
 51 | def compute_BatchNorm2d_madd(module, inp, out):
 52 |     assert isinstance(module, nn.BatchNorm2d)
 53 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 54 | 
 55 |     in_c, in_h, in_w = inp.size()[1:]
 56 | 
 57 |     # 1. sub mean
 58 |     # 2. div standard deviation
 59 |     # 3. mul alpha
 60 |     # 4. add beta
 61 |     return 4 * in_c * in_h * in_w
 62 | 
 63 | 
 64 | def compute_MaxPool2d_madd(module, inp, out):
 65 |     assert isinstance(module, nn.MaxPool2d)
 66 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 67 | 
 68 |     if isinstance(module.kernel_size, (tuple, list)):
 69 |         k_h, k_w = module.kernel_size
 70 |     else:
 71 |         k_h, k_w = module.kernel_size, module.kernel_size
 72 |     out_c, out_h, out_w = out.size()[1:]
 73 | 
 74 |     return (k_h * k_w - 1) * out_h * out_w * out_c
 75 | 
 76 | 
 77 | def compute_AvgPool2d_madd(module, inp, out):
 78 |     assert isinstance(module, nn.AvgPool2d)
 79 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 80 | 
 81 |     if isinstance(module.kernel_size, (tuple, list)):
 82 |         k_h, k_w = module.kernel_size
 83 |     else:
 84 |         k_h, k_w = module.kernel_size, module.kernel_size
 85 |     out_c, out_h, out_w = out.size()[1:]
 86 | 
 87 |     kernel_add = k_h * k_w - 1
 88 |     kernel_avg = 1
 89 | 
 90 |     return (kernel_add + kernel_avg) * (out_h * out_w) * out_c
 91 | 
 92 | 
 93 | def compute_ReLU_madd(module, inp, out):
 94 |     assert isinstance(module, (nn.ReLU, nn.ReLU6))
 95 | 
 96 |     count = 1
 97 |     for i in inp.size()[1:]:
 98 |         count *= i
 99 |     return count
100 | 
101 | 
102 | def compute_Softmax_madd(module, inp, out):
103 |     assert isinstance(module, nn.Softmax)
104 |     assert len(inp.size()) > 1
105 | 
106 |     count = 1
107 |     for s in inp.size()[1:]:
108 |         count *= s
109 |     exp = count
110 |     add = count - 1
111 |     div = count
112 |     return exp + add + div
113 | 
114 | 
115 | def compute_Linear_madd(module, inp, out):
116 |     assert isinstance(module, nn.Linear)
117 |     assert len(inp.size()) == 2 and len(out.size()) == 2
118 | 
119 |     num_in_features = inp.size()[1]
120 |     num_out_features = out.size()[1]
121 | 
122 |     mul = num_in_features
123 |     add = num_in_features - 1
124 |     return num_out_features * (mul + add)
125 | 
126 | 
127 | def compute_Bilinear_madd(module, inp1, inp2, out):
128 |     assert isinstance(module, nn.Bilinear)
129 |     assert len(inp1.size()) == 2 and len(inp2.size()) == 2 and len(
130 |         out.size()) == 2
131 | 
132 |     num_in_features_1 = inp1.size()[1]
133 |     num_in_features_2 = inp2.size()[1]
134 |     num_out_features = out.size()[1]
135 | 
136 |     mul = num_in_features_1 * num_in_features_2 + num_in_features_2
137 |     add = num_in_features_1 * num_in_features_2 + num_in_features_2 - 1
138 |     return num_out_features * (mul + add)
139 | 
140 | 
141 | def compute_madd(module, inp, out):
142 |     if isinstance(module, nn.Conv2d):
143 |         return compute_Conv2d_madd(module, inp[0], out[0])
144 |     elif isinstance(module, nn.ConvTranspose2d):
145 |         return compute_ConvTranspose2d_madd(module, inp[0], out[0])
146 |     elif isinstance(module, nn.BatchNorm2d):
147 |         return compute_BatchNorm2d_madd(module, inp[0], out[0])
148 |     elif isinstance(module, nn.MaxPool2d):
149 |         return compute_MaxPool2d_madd(module, inp[0], out[0])
150 |     elif isinstance(module, nn.AvgPool2d):
151 |         return compute_AvgPool2d_madd(module, inp[0], out[0])
152 |     elif isinstance(module, (nn.ReLU, nn.ReLU6)):
153 |         return compute_ReLU_madd(module, inp[0], out[0])
154 |     elif isinstance(module, nn.Softmax):
155 |         return compute_Softmax_madd(module, inp[0], out[0])
156 |     elif isinstance(module, nn.Linear):
157 |         return compute_Linear_madd(module, inp[0], out[0])
158 |     elif isinstance(module, nn.Bilinear):
159 |         return compute_Bilinear_madd(module, inp[0], inp[1], out)
160 |     else:
161 |         #print(f"[MAdd]: {type(module).__name__} is not supported!")
162 |         return 0
163 | 


--------------------------------------------------------------------------------
/configs/xhrnet/pxhrnet_30_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | log_level = 'INFO'
  2 | load_from = None
  3 | resume_from = None
  4 | dist_params = dict(backend='nccl')
  5 | workflow = [('train', 1)]
  6 | checkpoint_config = dict(interval=10)
  7 | evaluation = dict(interval=10, metric='mAP')
  8 | work_dir = "work_dirs/xhrnt/pxhrnet_30_coco_256x192"
  9 | 
 10 | optimizer = dict(
 11 |     type='Adam',
 12 |     lr=2e-3,
 13 | )
 14 | optimizer_config = dict(grad_clip=None)
 15 | # learning policy
 16 | lr_config = dict(
 17 |     policy='step',
 18 |     # warmup=None,
 19 |     warmup='linear',
 20 |     warmup_iters=500,
 21 |     warmup_ratio=0.001,
 22 |     step=[170, 200])
 23 | total_epochs = 210
 24 | log_config = dict(
 25 |     interval=50, hooks=[
 26 |         dict(type='TextLoggerHook'),
 27 |         dict(type='TensorboardLoggerHook')
 28 |     ])
 29 | 
 30 | channel_cfg = dict(
 31 |     num_output_channels=17,
 32 |     dataset_joints=17,
 33 |     dataset_channel=[
 34 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 35 |     ],
 36 |     inference_channel=[
 37 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 38 |     ])
 39 | 
 40 | # model settings
 41 | model = dict(
 42 |     type='TopDown',
 43 |     pretrained=None,
 44 |     backbone=dict(
 45 |         type='XHRNet',
 46 |         in_channels=3,
 47 |         extra=dict(
 48 |             stem=dict(  
 49 |                 stem_channels=32,
 50 |                 out_channels=32,
 51 |                 expand_ratio=1, 
 52 |                 down_ratio=4),
 53 |             num_stages=3,
 54 |             stages_spec=dict(
 55 |                 num_modules=(3, 8, 3),
 56 |                 num_branches=(2, 3, 4),
 57 |                 num_blocks=(2, 2, 2),
 58 |                 module_type=('parallel', 'parallel', 'parallel'),
 59 |                 with_fuse=(True, True, True),
 60 |                 num_channels=(
 61 |                     (40, 80),
 62 |                     (40, 80, 160),
 63 |                     (40, 80, 160, 320),
 64 |                 )),
 65 |             with_head=True,
 66 |             )),
 67 |     keypoint_head=dict(
 68 |         type='TopdownHeatmapSimpleHead',
 69 |         in_channels=40,
 70 |         out_channels=channel_cfg['num_output_channels'],
 71 |         num_deconv_layers=0,
 72 |         extra=dict(final_conv_kernel=1, ),
 73 |     ),
 74 |     train_cfg=dict(),
 75 |     test_cfg=dict(
 76 |         flip_test=True,
 77 |         post_process=True,
 78 |         shift_heatmap=True,
 79 |         unbiased_decoding=False,
 80 |         modulate_kernel=11),
 81 |     loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
 82 | 
 83 | data_cfg = dict(
 84 |     image_size=[192, 256],
 85 |     heatmap_size=[48, 64],
 86 |     num_output_channels=channel_cfg['num_output_channels'],
 87 |     num_joints=channel_cfg['dataset_joints'],
 88 |     dataset_channel=channel_cfg['dataset_channel'],
 89 |     inference_channel=channel_cfg['inference_channel'],
 90 |     soft_nms=False,
 91 |     nms_thr=1.0,
 92 |     oks_thr=0.9,
 93 |     vis_thr=0.2,
 94 |     bbox_thr=1.0,
 95 |     use_gt_bbox=True,
 96 |     image_thr=0.0,
 97 |     bbox_file='data/coco/person_detection_results/'
 98 |     'COCO_val2017_detections_AP_H_56_person.json',
 99 | )
100 | 
101 | val_data_cfg = dict(
102 |     image_size=[192, 256],
103 |     heatmap_size=[48, 64],
104 |     num_output_channels=channel_cfg['num_output_channels'],
105 |     num_joints=channel_cfg['dataset_joints'],
106 |     dataset_channel=channel_cfg['dataset_channel'],
107 |     inference_channel=channel_cfg['inference_channel'],
108 |     soft_nms=False,
109 |     nms_thr=1.0,
110 |     oks_thr=0.9,
111 |     vis_thr=0.2,
112 |     bbox_thr=1.0,
113 |     use_gt_bbox=True,
114 |     image_thr=0.0,
115 |     bbox_file='data/coco/person_detection_results/'
116 |     'COCO_val2017_detections_AP_H_56_person.json',
117 | )
118 | 
119 | train_pipeline = [
120 |     dict(type='LoadImageFromFile'),
121 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
122 |     dict(
123 |         type='TopDownHalfBodyTransform',
124 |         num_joints_half_body=8,
125 |         prob_half_body=0.3),
126 |     dict(
127 |         type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
128 |     dict(type='TopDownAffine'),
129 |     dict(type='ToTensor'),
130 |     dict(
131 |         type='NormalizeTensor',
132 |         mean=[0.485, 0.456, 0.406],
133 |         std=[0.229, 0.224, 0.225]),
134 |     dict(type='TopDownGenerateTarget', sigma=2),
135 |     dict(
136 |         type='Collect',
137 |         keys=['img', 'target', 'target_weight'],
138 |         meta_keys=[
139 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
140 |             'rotation', 'bbox_score', 'flip_pairs'
141 |         ]),
142 | ]
143 | 
144 | val_pipeline = [
145 |     dict(type='LoadImageFromFile'),
146 |     dict(type='TopDownAffine'),
147 |     dict(type='ToTensor'),
148 |     dict(
149 |         type='NormalizeTensor',
150 |         mean=[0.485, 0.456, 0.406],
151 |         std=[0.229, 0.224, 0.225]),
152 |     dict(
153 |         type='Collect',
154 |         keys=[
155 |             'img',
156 |         ],
157 |         meta_keys=[
158 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
159 |             'flip_pairs'
160 |         ]),
161 | ]
162 | test_pipeline = val_pipeline
163 | data_root = 'data/coco'
164 | data = dict(
165 |     samples_per_gpu=64,
166 |     workers_per_gpu=4,
167 |     train=dict(
168 |         type='TopDownCocoDataset',
169 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
170 |         img_prefix=f'{data_root}/train2017/',
171 |         data_cfg=data_cfg,
172 |         pipeline=train_pipeline),
173 |     val=dict(
174 |         type='TopDownCocoDataset',
175 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
176 |         img_prefix=f'{data_root}/val2017/',
177 |         data_cfg=val_data_cfg,
178 |         pipeline=val_pipeline),
179 |     test=dict(
180 |         type='TopDownCocoDataset',
181 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
182 |         img_prefix=f'{data_root}/val2017/',
183 |         data_cfg=data_cfg,
184 |         pipeline=val_pipeline),
185 | )
186 | 
187 | find_unused_parameters=False


--------------------------------------------------------------------------------
/configs/xhrnet/pxhrnet_30_coco_384x288.py:
--------------------------------------------------------------------------------
  1 | log_level = 'INFO'
  2 | load_from = None
  3 | resume_from = None
  4 | dist_params = dict(backend='nccl')
  5 | workflow = [('train', 1)]
  6 | checkpoint_config = dict(interval=10)
  7 | evaluation = dict(interval=10, metric='mAP')
  8 | work_dir = "work_dirs/xhrnt/pxhrnet_30_coco_384x288"
  9 | 
 10 | optimizer = dict(
 11 |     type='Adam',
 12 |     lr=2e-3,
 13 | )
 14 | optimizer_config = dict(grad_clip=None)
 15 | # learning policy
 16 | lr_config = dict(
 17 |     policy='step',
 18 |     # warmup=None,
 19 |     warmup='linear',
 20 |     warmup_iters=500,
 21 |     warmup_ratio=0.001,
 22 |     step=[170, 200])
 23 | total_epochs = 210
 24 | log_config = dict(
 25 |     interval=50, hooks=[
 26 |         dict(type='TextLoggerHook'),
 27 |         dict(type='TensorboardLoggerHook')
 28 |     ])
 29 | 
 30 | channel_cfg = dict(
 31 |     num_output_channels=17,
 32 |     dataset_joints=17,
 33 |     dataset_channel=[
 34 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 35 |     ],
 36 |     inference_channel=[
 37 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 38 |     ])
 39 | 
 40 | # model settings
 41 | model = dict(
 42 |     type='TopDown',
 43 |     pretrained=None,
 44 |     backbone=dict(
 45 |         type='XHRNet',
 46 |         in_channels=3,
 47 |         extra=dict(
 48 |             stem=dict(  
 49 |                 stem_channels=32,
 50 |                 out_channels=32,
 51 |                 expand_ratio=1, 
 52 |                 down_ratio=4),
 53 |             num_stages=3,
 54 |             stages_spec=dict(
 55 |                 num_modules=(2, 4, 2),
 56 |                 num_branches=(2, 3, 4),
 57 |                 num_blocks=(2, 2, 2),
 58 |                 module_type=('parallel', 'parallel', 'parallel'),
 59 |                 with_fuse=(True, True, True),
 60 |                 num_channels=(
 61 |                     (40, 80),
 62 |                     (40, 80, 160),
 63 |                     (40, 80, 160, 320),
 64 |                 )),
 65 |             with_head=True,
 66 |             )),
 67 |     keypoint_head=dict(
 68 |         type='TopdownHeatmapSimpleHead',
 69 |         in_channels=40,
 70 |         out_channels=channel_cfg['num_output_channels'],
 71 |         num_deconv_layers=0,
 72 |         extra=dict(final_conv_kernel=1, ),
 73 |     ),
 74 |     train_cfg=dict(),
 75 |     test_cfg=dict(
 76 |         flip_test=True,
 77 |         post_process=True,
 78 |         shift_heatmap=True,
 79 |         unbiased_decoding=False,
 80 |         modulate_kernel=11),
 81 |     loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
 82 | 
 83 | data_cfg = dict(
 84 |     image_size=[288, 384],
 85 |     heatmap_size=[72, 96],
 86 |     num_output_channels=channel_cfg['num_output_channels'],
 87 |     num_joints=channel_cfg['dataset_joints'],
 88 |     dataset_channel=channel_cfg['dataset_channel'],
 89 |     inference_channel=channel_cfg['inference_channel'],
 90 |     soft_nms=False,
 91 |     nms_thr=1.0,
 92 |     oks_thr=0.9,
 93 |     vis_thr=0.2,
 94 |     bbox_thr=1.0,
 95 |     use_gt_bbox=True,
 96 |     image_thr=0.0,
 97 |     bbox_file='data/coco/person_detection_results/'
 98 |     'COCO_val2017_detections_AP_H_56_person.json',
 99 | )
100 | 
101 | val_data_cfg = dict(
102 |     image_size=[288, 384],
103 |     heatmap_size=[72, 96],
104 |     num_output_channels=channel_cfg['num_output_channels'],
105 |     num_joints=channel_cfg['dataset_joints'],
106 |     dataset_channel=channel_cfg['dataset_channel'],
107 |     inference_channel=channel_cfg['inference_channel'],
108 |     soft_nms=False,
109 |     nms_thr=1.0,
110 |     oks_thr=0.9,
111 |     vis_thr=0.2,
112 |     bbox_thr=1.0,
113 |     use_gt_bbox=True,
114 |     image_thr=0.0,
115 |     bbox_file='data/coco/person_detection_results/'
116 |     'COCO_val2017_detections_AP_H_56_person.json',
117 | )
118 | 
119 | train_pipeline = [
120 |     dict(type='LoadImageFromFile'),
121 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
122 |     dict(
123 |         type='TopDownHalfBodyTransform',
124 |         num_joints_half_body=8,
125 |         prob_half_body=0.3),
126 |     dict(
127 |         type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
128 |     dict(type='TopDownAffine'),
129 |     dict(type='ToTensor'),
130 |     dict(
131 |         type='NormalizeTensor',
132 |         mean=[0.485, 0.456, 0.406],
133 |         std=[0.229, 0.224, 0.225]),
134 |     dict(type='TopDownGenerateTarget', sigma=3),
135 |     dict(
136 |         type='Collect',
137 |         keys=['img', 'target', 'target_weight'],
138 |         meta_keys=[
139 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
140 |             'rotation', 'bbox_score', 'flip_pairs'
141 |         ]),
142 | ]
143 | 
144 | val_pipeline = [
145 |     dict(type='LoadImageFromFile'),
146 |     dict(type='TopDownAffine'),
147 |     dict(type='ToTensor'),
148 |     dict(
149 |         type='NormalizeTensor',
150 |         mean=[0.485, 0.456, 0.406],
151 |         std=[0.229, 0.224, 0.225]),
152 |     dict(
153 |         type='Collect',
154 |         keys=[
155 |             'img',
156 |         ],
157 |         meta_keys=[
158 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
159 |             'flip_pairs'
160 |         ]),
161 | ]
162 | test_pipeline = val_pipeline
163 | data_root = 'data/coco'
164 | data = dict(
165 |     samples_per_gpu=64,
166 |     workers_per_gpu=4,
167 |     train=dict(
168 |         type='TopDownCocoDataset',
169 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
170 |         img_prefix=f'{data_root}/train2017/',
171 |         data_cfg=data_cfg,
172 |         pipeline=train_pipeline),
173 |     val=dict(
174 |         type='TopDownCocoDataset',
175 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
176 |         img_prefix=f'{data_root}/val2017/',
177 |         data_cfg=val_data_cfg,
178 |         pipeline=val_pipeline),
179 |     test=dict(
180 |         type='TopDownCocoDataset',
181 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
182 |         img_prefix=f'{data_root}/val2017/',
183 |         data_cfg=data_cfg,
184 |         pipeline=val_pipeline),
185 | )
186 | 
187 | find_unused_parameters=False


--------------------------------------------------------------------------------
/configs/xhrnet/sxhrnet_18_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | log_level = 'INFO'
  2 | load_from = None
  3 | resume_from = None
  4 | dist_params = dict(backend='nccl')
  5 | workflow = [('train', 1)]
  6 | checkpoint_config = dict(interval=10)
  7 | evaluation = dict(interval=10, metric='mAP')
  8 | work_dir = "work_dirs/xhrnt/sxhrnet_18_coco_256x192"
  9 | 
 10 | optimizer = dict(
 11 |     type='Adam',
 12 |     lr=2e-3,
 13 | )
 14 | optimizer_config = dict(grad_clip=None)
 15 | # learning policy
 16 | lr_config = dict(
 17 |     policy='step',
 18 |     # warmup=None,
 19 |     warmup='linear',
 20 |     warmup_iters=500,
 21 |     warmup_ratio=0.001,
 22 |     step=[170, 200])
 23 | total_epochs = 210
 24 | log_config = dict(
 25 |     interval=50, hooks=[
 26 |         dict(type='TextLoggerHook'),
 27 |         dict(type='TensorboardLoggerHook')
 28 |     ])
 29 | 
 30 | channel_cfg = dict(
 31 |     num_output_channels=17,
 32 |     dataset_joints=17,
 33 |     dataset_channel=[
 34 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 35 |     ],
 36 |     inference_channel=[
 37 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 38 |     ])
 39 | 
 40 | # model settings
 41 | model = dict(
 42 |     type='TopDown',
 43 |     pretrained=None,
 44 |     backbone=dict(
 45 |         type='XHRNet',
 46 |         in_channels=3,
 47 |         extra=dict(
 48 |             stem=dict(  
 49 |                 stem_channels=32,
 50 |                 out_channels=32,
 51 |                 expand_ratio=1, 
 52 |                 down_ratio=4),
 53 |             num_stages=3,
 54 |             stages_spec=dict(
 55 |                 num_modules=(2, 4, 2),
 56 |                 num_branches=(2, 3, 4),
 57 |                 num_blocks=(2, 2, 2),
 58 |                 module_type=('sequential', 'sequential', 'sequential'),
 59 |                 with_fuse=(True, True, True),
 60 |                 num_channels=(
 61 |                     (40, 80),
 62 |                     (40, 80, 160),
 63 |                     (40, 80, 160, 320),
 64 |                 )),
 65 |             with_head=True,
 66 |             )),
 67 |     keypoint_head=dict(
 68 |         type='TopdownHeatmapSimpleHead',
 69 |         in_channels=40,
 70 |         out_channels=channel_cfg['num_output_channels'],
 71 |         num_deconv_layers=0,
 72 |         extra=dict(final_conv_kernel=1, ),
 73 |     ),
 74 |     train_cfg=dict(),
 75 |     test_cfg=dict(
 76 |         flip_test=True,
 77 |         post_process=True,
 78 |         shift_heatmap=True,
 79 |         unbiased_decoding=False,
 80 |         modulate_kernel=11),
 81 |     loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
 82 | 
 83 | data_cfg = dict(
 84 |     image_size=[192, 256],
 85 |     heatmap_size=[48, 64],
 86 |     num_output_channels=channel_cfg['num_output_channels'],
 87 |     num_joints=channel_cfg['dataset_joints'],
 88 |     dataset_channel=channel_cfg['dataset_channel'],
 89 |     inference_channel=channel_cfg['inference_channel'],
 90 |     soft_nms=False,
 91 |     nms_thr=1.0,
 92 |     oks_thr=0.9,
 93 |     vis_thr=0.2,
 94 |     bbox_thr=1.0,
 95 |     use_gt_bbox=True,
 96 |     image_thr=0.0,
 97 |     bbox_file='data/coco/person_detection_results/'
 98 |     'COCO_val2017_detections_AP_H_56_person.json',
 99 | )
100 | 
101 | val_data_cfg = dict(
102 |     image_size=[192, 256],
103 |     heatmap_size=[48, 64],
104 |     num_output_channels=channel_cfg['num_output_channels'],
105 |     num_joints=channel_cfg['dataset_joints'],
106 |     dataset_channel=channel_cfg['dataset_channel'],
107 |     inference_channel=channel_cfg['inference_channel'],
108 |     soft_nms=False,
109 |     nms_thr=1.0,
110 |     oks_thr=0.9,
111 |     vis_thr=0.2,
112 |     bbox_thr=1.0,
113 |     use_gt_bbox=True,
114 |     image_thr=0.0,
115 |     bbox_file='data/coco/person_detection_results/'
116 |     'COCO_val2017_detections_AP_H_56_person.json',
117 | )
118 | 
119 | train_pipeline = [
120 |     dict(type='LoadImageFromFile'),
121 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
122 |     dict(
123 |         type='TopDownHalfBodyTransform',
124 |         num_joints_half_body=8,
125 |         prob_half_body=0.3),
126 |     dict(
127 |         type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
128 |     dict(type='TopDownAffine'),
129 |     dict(type='ToTensor'),
130 |     dict(
131 |         type='NormalizeTensor',
132 |         mean=[0.485, 0.456, 0.406],
133 |         std=[0.229, 0.224, 0.225]),
134 |     dict(type='TopDownGenerateTarget', sigma=2),
135 |     dict(
136 |         type='Collect',
137 |         keys=['img', 'target', 'target_weight'],
138 |         meta_keys=[
139 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
140 |             'rotation', 'bbox_score', 'flip_pairs'
141 |         ]),
142 | ]
143 | 
144 | val_pipeline = [
145 |     dict(type='LoadImageFromFile'),
146 |     dict(type='TopDownAffine'),
147 |     dict(type='ToTensor'),
148 |     dict(
149 |         type='NormalizeTensor',
150 |         mean=[0.485, 0.456, 0.406],
151 |         std=[0.229, 0.224, 0.225]),
152 |     dict(
153 |         type='Collect',
154 |         keys=[
155 |             'img',
156 |         ],
157 |         meta_keys=[
158 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
159 |             'flip_pairs'
160 |         ]),
161 | ]
162 | test_pipeline = val_pipeline
163 | data_root = 'data/coco'
164 | data = dict(
165 |     samples_per_gpu=64,
166 |     workers_per_gpu=4,
167 |     train=dict(
168 |         type='TopDownCocoDataset',
169 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
170 |         img_prefix=f'{data_root}/train2017/',
171 |         data_cfg=data_cfg,
172 |         pipeline=train_pipeline),
173 |     val=dict(
174 |         type='TopDownCocoDataset',
175 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
176 |         img_prefix=f'{data_root}/val2017/',
177 |         data_cfg=val_data_cfg,
178 |         pipeline=val_pipeline),
179 |     test=dict(
180 |         type='TopDownCocoDataset',
181 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
182 |         img_prefix=f'{data_root}/val2017/',
183 |         data_cfg=data_cfg,
184 |         pipeline=val_pipeline),
185 | )
186 | 
187 | find_unused_parameters=False


--------------------------------------------------------------------------------
/configs/xhrnet/sxhrnet_18_coco_384x288.py:
--------------------------------------------------------------------------------
  1 | log_level = 'INFO'
  2 | load_from = None
  3 | resume_from = None
  4 | dist_params = dict(backend='nccl')
  5 | workflow = [('train', 1)]
  6 | checkpoint_config = dict(interval=10)
  7 | evaluation = dict(interval=10, metric='mAP')
  8 | work_dir = "work_dirs/xhrnt/sxhrnet_18_coco_384x288"
  9 | 
 10 | optimizer = dict(
 11 |     type='Adam',
 12 |     lr=2e-3,
 13 | )
 14 | optimizer_config = dict(grad_clip=None)
 15 | # learning policy
 16 | lr_config = dict(
 17 |     policy='step',
 18 |     # warmup=None,
 19 |     warmup='linear',
 20 |     warmup_iters=500,
 21 |     warmup_ratio=0.001,
 22 |     step=[170, 200])
 23 | total_epochs = 210
 24 | log_config = dict(
 25 |     interval=50, hooks=[
 26 |         dict(type='TextLoggerHook'),
 27 |         dict(type='TensorboardLoggerHook')
 28 |     ])
 29 | 
 30 | channel_cfg = dict(
 31 |     num_output_channels=17,
 32 |     dataset_joints=17,
 33 |     dataset_channel=[
 34 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 35 |     ],
 36 |     inference_channel=[
 37 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 38 |     ])
 39 | 
 40 | # model settings
 41 | model = dict(
 42 |     type='TopDown',
 43 |     pretrained=None,
 44 |     backbone=dict(
 45 |         type='XHRNet',
 46 |         in_channels=3,
 47 |         extra=dict(
 48 |             stem=dict(  
 49 |                 stem_channels=32,
 50 |                 out_channels=32,
 51 |                 expand_ratio=1, 
 52 |                 down_ratio=4),
 53 |             num_stages=3,
 54 |             stages_spec=dict(
 55 |                 num_modules=(2, 4, 2),
 56 |                 num_branches=(2, 3, 4),
 57 |                 num_blocks=(2, 2, 2),
 58 |                 module_type=('sequential', 'sequential', 'sequential'),
 59 |                 with_fuse=(True, True, True),
 60 |                 num_channels=(
 61 |                     (40, 80),
 62 |                     (40, 80, 160),
 63 |                     (40, 80, 160, 320),
 64 |                 )),
 65 |             with_head=True,
 66 |             )),
 67 |     keypoint_head=dict(
 68 |         type='TopdownHeatmapSimpleHead',
 69 |         in_channels=40,
 70 |         out_channels=channel_cfg['num_output_channels'],
 71 |         num_deconv_layers=0,
 72 |         extra=dict(final_conv_kernel=1, ),
 73 |     ),
 74 |     train_cfg=dict(),
 75 |     test_cfg=dict(
 76 |         flip_test=True,
 77 |         post_process=True,
 78 |         shift_heatmap=True,
 79 |         unbiased_decoding=False,
 80 |         modulate_kernel=11),
 81 |     loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
 82 | 
 83 | data_cfg = dict(
 84 |     image_size=[288, 384],
 85 |     heatmap_size=[72, 96],
 86 |     num_output_channels=channel_cfg['num_output_channels'],
 87 |     num_joints=channel_cfg['dataset_joints'],
 88 |     dataset_channel=channel_cfg['dataset_channel'],
 89 |     inference_channel=channel_cfg['inference_channel'],
 90 |     soft_nms=False,
 91 |     nms_thr=1.0,
 92 |     oks_thr=0.9,
 93 |     vis_thr=0.2,
 94 |     bbox_thr=1.0,
 95 |     use_gt_bbox=True,
 96 |     image_thr=0.0,
 97 |     bbox_file='data/coco/person_detection_results/'
 98 |     'COCO_val2017_detections_AP_H_56_person.json',
 99 | )
100 | 
101 | val_data_cfg = dict(
102 |     image_size=[288, 384],
103 |     heatmap_size=[72, 96],
104 |     num_output_channels=channel_cfg['num_output_channels'],
105 |     num_joints=channel_cfg['dataset_joints'],
106 |     dataset_channel=channel_cfg['dataset_channel'],
107 |     inference_channel=channel_cfg['inference_channel'],
108 |     soft_nms=False,
109 |     nms_thr=1.0,
110 |     oks_thr=0.9,
111 |     vis_thr=0.2,
112 |     bbox_thr=1.0,
113 |     use_gt_bbox=True,
114 |     image_thr=0.0,
115 |     bbox_file='data/coco/person_detection_results/'
116 |     'COCO_val2017_detections_AP_H_56_person.json',
117 | )
118 | 
119 | train_pipeline = [
120 |     dict(type='LoadImageFromFile'),
121 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
122 |     dict(
123 |         type='TopDownHalfBodyTransform',
124 |         num_joints_half_body=8,
125 |         prob_half_body=0.3),
126 |     dict(
127 |         type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
128 |     dict(type='TopDownAffine'),
129 |     dict(type='ToTensor'),
130 |     dict(
131 |         type='NormalizeTensor',
132 |         mean=[0.485, 0.456, 0.406],
133 |         std=[0.229, 0.224, 0.225]),
134 |     dict(type='TopDownGenerateTarget', sigma=3),
135 |     dict(
136 |         type='Collect',
137 |         keys=['img', 'target', 'target_weight'],
138 |         meta_keys=[
139 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
140 |             'rotation', 'bbox_score', 'flip_pairs'
141 |         ]),
142 | ]
143 | 
144 | val_pipeline = [
145 |     dict(type='LoadImageFromFile'),
146 |     dict(type='TopDownAffine'),
147 |     dict(type='ToTensor'),
148 |     dict(
149 |         type='NormalizeTensor',
150 |         mean=[0.485, 0.456, 0.406],
151 |         std=[0.229, 0.224, 0.225]),
152 |     dict(
153 |         type='Collect',
154 |         keys=[
155 |             'img',
156 |         ],
157 |         meta_keys=[
158 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
159 |             'flip_pairs'
160 |         ]),
161 | ]
162 | test_pipeline = val_pipeline
163 | data_root = 'data/coco'
164 | data = dict(
165 |     samples_per_gpu=64,
166 |     workers_per_gpu=4,
167 |     train=dict(
168 |         type='TopDownCocoDataset',
169 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
170 |         img_prefix=f'{data_root}/train2017/',
171 |         data_cfg=data_cfg,
172 |         pipeline=train_pipeline),
173 |     val=dict(
174 |         type='TopDownCocoDataset',
175 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
176 |         img_prefix=f'{data_root}/val2017/',
177 |         data_cfg=val_data_cfg,
178 |         pipeline=val_pipeline),
179 |     test=dict(
180 |         type='TopDownCocoDataset',
181 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
182 |         img_prefix=f'{data_root}/val2017/',
183 |         data_cfg=data_cfg,
184 |         pipeline=val_pipeline),
185 | )
186 | 
187 | find_unused_parameters=False


--------------------------------------------------------------------------------
/configs/xhrnet/sxhrnet_30_coco_256x192.py:
--------------------------------------------------------------------------------
  1 | log_level = 'INFO'
  2 | load_from = None
  3 | resume_from = None
  4 | dist_params = dict(backend='nccl')
  5 | workflow = [('train', 1)]
  6 | checkpoint_config = dict(interval=10)
  7 | evaluation = dict(interval=10, metric='mAP')
  8 | work_dir = "work_dirs/xhrnt/sxhrnet_30_coco_256x192"
  9 | 
 10 | optimizer = dict(
 11 |     type='Adam',
 12 |     lr=2e-3,
 13 | )
 14 | optimizer_config = dict(grad_clip=None)
 15 | # learning policy
 16 | lr_config = dict(
 17 |     policy='step',
 18 |     # warmup=None,
 19 |     warmup='linear',
 20 |     warmup_iters=500,
 21 |     warmup_ratio=0.001,
 22 |     step=[170, 200])
 23 | total_epochs = 210
 24 | log_config = dict(
 25 |     interval=50, hooks=[
 26 |         dict(type='TextLoggerHook'),
 27 |         dict(type='TensorboardLoggerHook')
 28 |     ])
 29 | 
 30 | channel_cfg = dict(
 31 |     num_output_channels=17,
 32 |     dataset_joints=17,
 33 |     dataset_channel=[
 34 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 35 |     ],
 36 |     inference_channel=[
 37 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 38 |     ])
 39 | 
 40 | # model settings
 41 | model = dict(
 42 |     type='TopDown',
 43 |     pretrained=None,
 44 |     backbone=dict(
 45 |         type='XHRNet',
 46 |         in_channels=3,
 47 |         extra=dict(
 48 |             stem=dict(  
 49 |                 stem_channels=32,
 50 |                 out_channels=32,
 51 |                 expand_ratio=1, 
 52 |                 down_ratio=4),
 53 |             num_stages=3,
 54 |             stages_spec=dict(
 55 |                 num_modules=(3, 8, 3),
 56 |                 num_branches=(2, 3, 4),
 57 |                 num_blocks=(2, 2, 2),
 58 |                 module_type=('sequential', 'sequential', 'sequential'),
 59 |                 with_fuse=(True, True, True),
 60 |                 num_channels=(
 61 |                     (40, 80),
 62 |                     (40, 80, 160),
 63 |                     (40, 80, 160, 320),
 64 |                 )),
 65 |             with_head=True,
 66 |             )),
 67 |     keypoint_head=dict(
 68 |         type='TopdownHeatmapSimpleHead',
 69 |         in_channels=40,
 70 |         out_channels=channel_cfg['num_output_channels'],
 71 |         num_deconv_layers=0,
 72 |         extra=dict(final_conv_kernel=1, ),
 73 |     ),
 74 |     train_cfg=dict(),
 75 |     test_cfg=dict(
 76 |         flip_test=True,
 77 |         post_process=True,
 78 |         shift_heatmap=True,
 79 |         unbiased_decoding=False,
 80 |         modulate_kernel=11),
 81 |     loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
 82 | 
 83 | data_cfg = dict(
 84 |     image_size=[192, 256],
 85 |     heatmap_size=[48, 64],
 86 |     num_output_channels=channel_cfg['num_output_channels'],
 87 |     num_joints=channel_cfg['dataset_joints'],
 88 |     dataset_channel=channel_cfg['dataset_channel'],
 89 |     inference_channel=channel_cfg['inference_channel'],
 90 |     soft_nms=False,
 91 |     nms_thr=1.0,
 92 |     oks_thr=0.9,
 93 |     vis_thr=0.2,
 94 |     bbox_thr=1.0,
 95 |     use_gt_bbox=True,
 96 |     image_thr=0.0,
 97 |     bbox_file='data/coco/person_detection_results/'
 98 |     'COCO_val2017_detections_AP_H_56_person.json',
 99 | )
100 | 
101 | val_data_cfg = dict(
102 |     image_size=[192, 256],
103 |     heatmap_size=[48, 64],
104 |     num_output_channels=channel_cfg['num_output_channels'],
105 |     num_joints=channel_cfg['dataset_joints'],
106 |     dataset_channel=channel_cfg['dataset_channel'],
107 |     inference_channel=channel_cfg['inference_channel'],
108 |     soft_nms=False,
109 |     nms_thr=1.0,
110 |     oks_thr=0.9,
111 |     vis_thr=0.2,
112 |     bbox_thr=1.0,
113 |     use_gt_bbox=True,
114 |     image_thr=0.0,
115 |     bbox_file='data/coco/person_detection_results/'
116 |     'COCO_val2017_detections_AP_H_56_person.json',
117 | )
118 | 
119 | train_pipeline = [
120 |     dict(type='LoadImageFromFile'),
121 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
122 |     dict(
123 |         type='TopDownHalfBodyTransform',
124 |         num_joints_half_body=8,
125 |         prob_half_body=0.3),
126 |     dict(
127 |         type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
128 |     dict(type='TopDownAffine'),
129 |     dict(type='ToTensor'),
130 |     dict(
131 |         type='NormalizeTensor',
132 |         mean=[0.485, 0.456, 0.406],
133 |         std=[0.229, 0.224, 0.225]),
134 |     dict(type='TopDownGenerateTarget', sigma=2),
135 |     dict(
136 |         type='Collect',
137 |         keys=['img', 'target', 'target_weight'],
138 |         meta_keys=[
139 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
140 |             'rotation', 'bbox_score', 'flip_pairs'
141 |         ]),
142 | ]
143 | 
144 | val_pipeline = [
145 |     dict(type='LoadImageFromFile'),
146 |     dict(type='TopDownAffine'),
147 |     dict(type='ToTensor'),
148 |     dict(
149 |         type='NormalizeTensor',
150 |         mean=[0.485, 0.456, 0.406],
151 |         std=[0.229, 0.224, 0.225]),
152 |     dict(
153 |         type='Collect',
154 |         keys=[
155 |             'img',
156 |         ],
157 |         meta_keys=[
158 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
159 |             'flip_pairs'
160 |         ]),
161 | ]
162 | test_pipeline = val_pipeline
163 | data_root = 'data/coco'
164 | data = dict(
165 |     samples_per_gpu=64,
166 |     workers_per_gpu=4,
167 |     train=dict(
168 |         type='TopDownCocoDataset',
169 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
170 |         img_prefix=f'{data_root}/train2017/',
171 |         data_cfg=data_cfg,
172 |         pipeline=train_pipeline),
173 |     val=dict(
174 |         type='TopDownCocoDataset',
175 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
176 |         img_prefix=f'{data_root}/val2017/',
177 |         data_cfg=val_data_cfg,
178 |         pipeline=val_pipeline),
179 |     test=dict(
180 |         type='TopDownCocoDataset',
181 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
182 |         img_prefix=f'{data_root}/val2017/',
183 |         data_cfg=data_cfg,
184 |         pipeline=val_pipeline),
185 | )
186 | 
187 | find_unused_parameters=False


--------------------------------------------------------------------------------
/configs/xhrnet/sxhrnet_30_coco_384x288.py:
--------------------------------------------------------------------------------
  1 | log_level = 'INFO'
  2 | load_from = None
  3 | resume_from = None
  4 | dist_params = dict(backend='nccl')
  5 | workflow = [('train', 1)]
  6 | checkpoint_config = dict(interval=10)
  7 | evaluation = dict(interval=10, metric='mAP')
  8 | work_dir = "work_dirs/xhrnt/sxhrnet_18_coco_256x192"
  9 | 
 10 | optimizer = dict(
 11 |     type='Adam',
 12 |     lr=2e-3,
 13 | )
 14 | optimizer_config = dict(grad_clip=None)
 15 | # learning policy
 16 | lr_config = dict(
 17 |     policy='step',
 18 |     # warmup=None,
 19 |     warmup='linear',
 20 |     warmup_iters=500,
 21 |     warmup_ratio=0.001,
 22 |     step=[170, 200])
 23 | total_epochs = 210
 24 | log_config = dict(
 25 |     interval=50, hooks=[
 26 |         dict(type='TextLoggerHook'),
 27 |         dict(type='TensorboardLoggerHook')
 28 |     ])
 29 | 
 30 | channel_cfg = dict(
 31 |     num_output_channels=17,
 32 |     dataset_joints=17,
 33 |     dataset_channel=[
 34 |         [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
 35 |     ],
 36 |     inference_channel=[
 37 |         0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
 38 |     ])
 39 | 
 40 | # model settings
 41 | model = dict(
 42 |     type='TopDown',
 43 |     pretrained=None,
 44 |     backbone=dict(
 45 |         type='XHRNet',
 46 |         in_channels=3,
 47 |         extra=dict(
 48 |             stem=dict(  
 49 |                 stem_channels=32,
 50 |                 out_channels=32,
 51 |                 expand_ratio=1, 
 52 |                 down_ratio=4),
 53 |             num_stages=3,
 54 |             stages_spec=dict(
 55 |                 num_modules=(2, 4, 2),
 56 |                 num_branches=(2, 3, 4),
 57 |                 num_blocks=(2, 2, 2),
 58 |                 module_type=('sequential', 'sequential', 'sequential'),
 59 |                 with_fuse=(True, True, True),
 60 |                 num_channels=(
 61 |                     (40, 80),
 62 |                     (40, 80, 160),
 63 |                     (40, 80, 160, 320),
 64 |                 )),
 65 |             with_head=True,
 66 |             )),
 67 |     keypoint_head=dict(
 68 |         type='TopdownHeatmapSimpleHead',
 69 |         in_channels=40,
 70 |         out_channels=channel_cfg['num_output_channels'],
 71 |         num_deconv_layers=0,
 72 |         extra=dict(final_conv_kernel=1, ),
 73 |     ),
 74 |     train_cfg=dict(),
 75 |     test_cfg=dict(
 76 |         flip_test=True,
 77 |         post_process=True,
 78 |         shift_heatmap=True,
 79 |         unbiased_decoding=False,
 80 |         modulate_kernel=11),
 81 |     loss_pose=dict(type='JointsMSELoss', use_target_weight=True))
 82 | 
 83 | data_cfg = dict(
 84 |     image_size=[288, 384],
 85 |     heatmap_size=[72, 96],
 86 |     num_output_channels=channel_cfg['num_output_channels'],
 87 |     num_joints=channel_cfg['dataset_joints'],
 88 |     dataset_channel=channel_cfg['dataset_channel'],
 89 |     inference_channel=channel_cfg['inference_channel'],
 90 |     soft_nms=False,
 91 |     nms_thr=1.0,
 92 |     oks_thr=0.9,
 93 |     vis_thr=0.2,
 94 |     bbox_thr=1.0,
 95 |     use_gt_bbox=True,
 96 |     image_thr=0.0,
 97 |     bbox_file='data/coco/person_detection_results/'
 98 |     'COCO_val2017_detections_AP_H_56_person.json',
 99 | )
100 | 
101 | val_data_cfg = dict(
102 |     image_size=[288, 384],
103 |     heatmap_size=[72, 96],
104 |     num_output_channels=channel_cfg['num_output_channels'],
105 |     num_joints=channel_cfg['dataset_joints'],
106 |     dataset_channel=channel_cfg['dataset_channel'],
107 |     inference_channel=channel_cfg['inference_channel'],
108 |     soft_nms=False,
109 |     nms_thr=1.0,
110 |     oks_thr=0.9,
111 |     vis_thr=0.2,
112 |     bbox_thr=1.0,
113 |     use_gt_bbox=True,
114 |     image_thr=0.0,
115 |     bbox_file='data/coco/person_detection_results/'
116 |     'COCO_val2017_detections_AP_H_56_person.json',
117 | )
118 | 
119 | train_pipeline = [
120 |     dict(type='LoadImageFromFile'),
121 |     dict(type='TopDownRandomFlip', flip_prob=0.5),
122 |     dict(
123 |         type='TopDownHalfBodyTransform',
124 |         num_joints_half_body=8,
125 |         prob_half_body=0.3),
126 |     dict(
127 |         type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25),
128 |     dict(type='TopDownAffine'),
129 |     dict(type='ToTensor'),
130 |     dict(
131 |         type='NormalizeTensor',
132 |         mean=[0.485, 0.456, 0.406],
133 |         std=[0.229, 0.224, 0.225]),
134 |     dict(type='TopDownGenerateTarget', sigma=3),
135 |     dict(
136 |         type='Collect',
137 |         keys=['img', 'target', 'target_weight'],
138 |         meta_keys=[
139 |             'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale',
140 |             'rotation', 'bbox_score', 'flip_pairs'
141 |         ]),
142 | ]
143 | 
144 | val_pipeline = [
145 |     dict(type='LoadImageFromFile'),
146 |     dict(type='TopDownAffine'),
147 |     dict(type='ToTensor'),
148 |     dict(
149 |         type='NormalizeTensor',
150 |         mean=[0.485, 0.456, 0.406],
151 |         std=[0.229, 0.224, 0.225]),
152 |     dict(
153 |         type='Collect',
154 |         keys=[
155 |             'img',
156 |         ],
157 |         meta_keys=[
158 |             'image_file', 'center', 'scale', 'rotation', 'bbox_score',
159 |             'flip_pairs'
160 |         ]),
161 | ]
162 | test_pipeline = val_pipeline
163 | data_root = 'data/coco'
164 | data = dict(
165 |     samples_per_gpu=64,
166 |     workers_per_gpu=4,
167 |     train=dict(
168 |         type='TopDownCocoDataset',
169 |         ann_file=f'{data_root}/annotations/person_keypoints_train2017.json',
170 |         img_prefix=f'{data_root}/train2017/',
171 |         data_cfg=data_cfg,
172 |         pipeline=train_pipeline),
173 |     val=dict(
174 |         type='TopDownCocoDataset',
175 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
176 |         img_prefix=f'{data_root}/val2017/',
177 |         data_cfg=val_data_cfg,
178 |         pipeline=val_pipeline),
179 |     test=dict(
180 |         type='TopDownCocoDataset',
181 |         ann_file=f'{data_root}/annotations/person_keypoints_val2017.json',
182 |         img_prefix=f'{data_root}/val2017/',
183 |         data_cfg=data_cfg,
184 |         pipeline=val_pipeline),
185 | )
186 | 
187 | find_unused_parameters=False


--------------------------------------------------------------------------------
/tools/torchstat/stat_tree.py:
--------------------------------------------------------------------------------
  1 | import queue
  2 | 
  3 | 
  4 | class StatTree(object):
  5 | 
  6 |     def __init__(self, root_node):
  7 |         assert isinstance(root_node, StatNode)
  8 | 
  9 |         self.root_node = root_node
 10 | 
 11 |     def get_same_level_max_node_depth(self, query_node):
 12 |         if query_node.name == self.root_node.name:
 13 |             return 0
 14 |         same_level_depth = max(
 15 |             [child.depth for child in query_node.parent.children])
 16 |         return same_level_depth
 17 | 
 18 |     def update_stat_nodes_granularity(self):
 19 |         q = queue.Queue()
 20 |         q.put(self.root_node)
 21 |         while not q.empty():
 22 |             node = q.get()
 23 |             node.granularity = self.get_same_level_max_node_depth(node)
 24 |             for child in node.children:
 25 |                 q.put(child)
 26 | 
 27 |     def get_collected_stat_nodes(self, query_granularity):
 28 |         self.update_stat_nodes_granularity()
 29 | 
 30 |         collected_nodes = []
 31 |         stack = list()
 32 |         stack.append(self.root_node)
 33 |         while len(stack) > 0:
 34 |             node = stack.pop()
 35 |             for child in reversed(node.children):
 36 |                 stack.append(child)
 37 |             if node.depth == query_granularity:
 38 |                 collected_nodes.append(node)
 39 |             if node.depth < query_granularity <= node.granularity:
 40 |                 collected_nodes.append(node)
 41 |         return collected_nodes
 42 | 
 43 | 
 44 | class StatNode(object):
 45 | 
 46 |     def __init__(self, name=str(), parent=None):
 47 |         self._name = name
 48 |         self._input_shape = None
 49 |         self._output_shape = None
 50 |         self._parameter_quantity = 0
 51 |         self._inference_memory = 0
 52 |         self._MAdd = 0
 53 |         self._Memory = (0, 0)
 54 |         self._Flops = 0
 55 |         self._duration = 0
 56 |         self._duration_percent = 0
 57 | 
 58 |         self._granularity = 1
 59 |         self._depth = 1
 60 |         self.parent = parent
 61 |         self.children = list()
 62 | 
 63 |     @property
 64 |     def name(self):
 65 |         return self._name
 66 | 
 67 |     @name.setter
 68 |     def name(self, name):
 69 |         self._name = name
 70 | 
 71 |     @property
 72 |     def granularity(self):
 73 |         return self._granularity
 74 | 
 75 |     @granularity.setter
 76 |     def granularity(self, g):
 77 |         self._granularity = g
 78 | 
 79 |     @property
 80 |     def depth(self):
 81 |         d = self._depth
 82 |         if len(self.children) > 0:
 83 |             d += max([child.depth for child in self.children])
 84 |         return d
 85 | 
 86 |     @property
 87 |     def input_shape(self):
 88 |         if len(self.children) == 0:  # leaf
 89 |             return self._input_shape
 90 |         else:
 91 |             return self.children[0].input_shape
 92 | 
 93 |     @input_shape.setter
 94 |     def input_shape(self, input_shape):
 95 |         assert isinstance(input_shape, (list, tuple))
 96 |         self._input_shape = input_shape
 97 | 
 98 |     @property
 99 |     def output_shape(self):
100 |         if len(self.children) == 0:  # leaf
101 |             return self._output_shape
102 |         else:
103 |             return self.children[-1].output_shape
104 | 
105 |     @output_shape.setter
106 |     def output_shape(self, output_shape):
107 |         assert isinstance(output_shape, (list, tuple))
108 |         self._output_shape = output_shape
109 | 
110 |     @property
111 |     def parameter_quantity(self):
112 |         # return self.parameters_quantity
113 |         total_parameter_quantity = self._parameter_quantity
114 |         for child in self.children:
115 |             total_parameter_quantity += child.parameter_quantity
116 |         return total_parameter_quantity
117 | 
118 |     @parameter_quantity.setter
119 |     def parameter_quantity(self, parameter_quantity):
120 |         assert parameter_quantity >= 0
121 |         self._parameter_quantity = parameter_quantity
122 | 
123 |     @property
124 |     def inference_memory(self):
125 |         total_inference_memory = self._inference_memory
126 |         for child in self.children:
127 |             total_inference_memory += child.inference_memory
128 |         return total_inference_memory
129 | 
130 |     @inference_memory.setter
131 |     def inference_memory(self, inference_memory):
132 |         self._inference_memory = inference_memory
133 | 
134 |     @property
135 |     def MAdd(self):
136 |         total_MAdd = self._MAdd
137 |         for child in self.children:
138 |             total_MAdd += child.MAdd
139 |         return total_MAdd
140 | 
141 |     @MAdd.setter
142 |     def MAdd(self, MAdd):
143 |         self._MAdd = MAdd
144 | 
145 |     @property
146 |     def Flops(self):
147 |         total_Flops = self._Flops
148 |         for child in self.children:
149 |             total_Flops += child.Flops
150 |         return total_Flops
151 | 
152 |     @Flops.setter
153 |     def Flops(self, Flops):
154 |         self._Flops = Flops
155 | 
156 |     @property
157 |     def Memory(self):
158 |         total_Memory = self._Memory
159 |         for child in self.children:
160 |             total_Memory[0] += child.Memory[0]
161 |             total_Memory[1] += child.Memory[1]
162 |             print(total_Memory)
163 |         return total_Memory
164 | 
165 |     @Memory.setter
166 |     def Memory(self, Memory):
167 |         assert isinstance(Memory, (list, tuple))
168 |         self._Memory = Memory
169 | 
170 |     @property
171 |     def duration(self):
172 |         total_duration = self._duration
173 |         for child in self.children:
174 |             total_duration += child.duration
175 |         return total_duration
176 | 
177 |     @duration.setter
178 |     def duration(self, duration):
179 |         self._duration = duration
180 | 
181 |     def find_child_index(self, child_name):
182 |         assert isinstance(child_name, str)
183 | 
184 |         index = -1
185 |         for i in range(len(self.children)):
186 |             if child_name == self.children[i].name:
187 |                 index = i
188 |         return index
189 | 
190 |     def add_child(self, node):
191 |         assert isinstance(node, StatNode)
192 | 
193 |         if self.find_child_index(node.name) == -1:  # not exist
194 |             self.children.append(node)
195 | 


--------------------------------------------------------------------------------
/tools/train.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import copy
  3 | import os
  4 | import os.path as osp
  5 | import time
  6 | 
  7 | import mmcv
  8 | import torch
  9 | from mmcv import Config, DictAction
 10 | from mmcv.runner import init_dist, set_random_seed
 11 | from mmcv.utils import get_git_hash
 12 | 
 13 | from mmpose import __version__
 14 | from mmpose.apis import train_model
 15 | from mmpose.datasets import build_dataset
 16 | from models import build_posenet
 17 | from mmpose.utils import collect_env, get_root_logger
 18 | 
 19 | 
 20 | def parse_args():
 21 |     parser = argparse.ArgumentParser(description='Train a pose model')
 22 |     parser.add_argument('config', help='train config file path')
 23 |     parser.add_argument('--work-dir', help='the dir to save logs and models')
 24 |     parser.add_argument(
 25 |         '--resume-from', help='the checkpoint file to resume from')
 26 |     parser.add_argument(
 27 |         '--no-validate',
 28 |         action='store_true',
 29 |         help='whether not to evaluate the checkpoint during training')
 30 |     group_gpus = parser.add_mutually_exclusive_group()
 31 |     group_gpus.add_argument(
 32 |         '--gpus',
 33 |         type=int,
 34 |         help='number of gpus to use '
 35 |         '(only applicable to non-distributed training)')
 36 |     group_gpus.add_argument(
 37 |         '--gpu-ids',
 38 |         type=int,
 39 |         nargs='+',
 40 |         help='ids of gpus to use '
 41 |         '(only applicable to non-distributed training)')
 42 |     parser.add_argument('--seed', type=int, default=None, help='random seed')
 43 |     parser.add_argument(
 44 |         '--deterministic',
 45 |         action='store_true',
 46 |         help='whether to set deterministic options for CUDNN backend.')
 47 |     parser.add_argument(
 48 |         '--cfg-options',
 49 |         nargs='+',
 50 |         action=DictAction,
 51 |         default={},
 52 |         help='override some settings in the used config, the key-value pair '
 53 |         'in xxx=yyy format will be merged into config file. For example, '
 54 |         "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'")
 55 |     parser.add_argument(
 56 |         '--launcher',
 57 |         choices=['none', 'pytorch', 'slurm', 'mpi'],
 58 |         default='none',
 59 |         help='job launcher')
 60 |     parser.add_argument('--local_rank', type=int, default=0)
 61 |     parser.add_argument(
 62 |         '--autoscale-lr',
 63 |         action='store_true',
 64 |         help='automatically scale lr with the number of gpus')
 65 |     args = parser.parse_args()
 66 |     if 'LOCAL_RANK' not in os.environ:
 67 |         os.environ['LOCAL_RANK'] = str(args.local_rank)
 68 | 
 69 |     return args
 70 | 
 71 | 
 72 | def main():
 73 |     args = parse_args()
 74 | 
 75 |     cfg = Config.fromfile(args.config)
 76 | 
 77 |     if args.cfg_options is not None:
 78 |         cfg.merge_from_dict(args.cfg_options)
 79 | 
 80 |     # set cudnn_benchmark
 81 |     if cfg.get('cudnn_benchmark', False):
 82 |         torch.backends.cudnn.benchmark = True
 83 | 
 84 |     # work_dir is determined in this priority: CLI > segment in file > filename
 85 |     if args.work_dir is not None:
 86 |         # update configs according to CLI args if args.work_dir is not None
 87 |         cfg.work_dir = args.work_dir
 88 |     elif cfg.get('work_dir', None) is None:
 89 |         # use config filename as default work_dir if cfg.work_dir is None
 90 |         cfg.work_dir = osp.join('./work_dirs',
 91 |                                 osp.splitext(osp.basename(args.config))[0])
 92 |     if args.resume_from is not None:
 93 |         cfg.resume_from = args.resume_from
 94 |     if args.gpu_ids is not None:
 95 |         cfg.gpu_ids = args.gpu_ids
 96 |     else:
 97 |         cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus)
 98 | 
 99 |     if args.autoscale_lr:
100 |         # apply the linear scaling rule (https://arxiv.org/abs/1706.02677)
101 |         cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8
102 | 
103 |     # init distributed env first, since logger depends on the dist info.
104 |     if args.launcher == 'none':
105 |         distributed = False
106 |     else:
107 |         distributed = True
108 |         init_dist(args.launcher, **cfg.dist_params)
109 | 
110 |     # create work_dir
111 |     mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
112 |     # init the logger before other steps
113 |     timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
114 |     log_file = osp.join(cfg.work_dir, f'{timestamp}.log')
115 |     logger = get_root_logger(log_file=log_file, log_level=cfg.log_level)
116 | 
117 |     # init the meta dict to record some important information such as
118 |     # environment info and seed, which will be logged
119 |     meta = dict()
120 |     # log env info
121 |     env_info_dict = collect_env()
122 |     env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()])
123 |     dash_line = '-' * 60 + '\n'
124 |     logger.info('Environment info:\n' + dash_line + env_info + '\n' +
125 |                 dash_line)
126 |     meta['env_info'] = env_info
127 | 
128 |     # log some basic info
129 |     logger.info(f'Distributed training: {distributed}')
130 |     logger.info(f'Config:\n{cfg.pretty_text}')
131 | 
132 |     # set random seeds
133 |     if args.seed is not None:
134 |         logger.info(f'Set random seed to {args.seed}, '
135 |                     f'deterministic: {args.deterministic}')
136 |         set_random_seed(args.seed, deterministic=args.deterministic)
137 |     cfg.seed = args.seed
138 |     meta['seed'] = args.seed
139 | 
140 |     model = build_posenet(cfg.model)
141 |     datasets = [build_dataset(cfg.data.train)]
142 | 
143 |     if len(cfg.workflow) == 2:
144 |         val_dataset = copy.deepcopy(cfg.data.val)
145 |         val_dataset.pipeline = cfg.data.train.pipeline
146 |         datasets.append(build_dataset(val_dataset))
147 | 
148 |     if cfg.checkpoint_config is not None:
149 |         # save mmpose version, config file content
150 |         # checkpoints as meta data
151 |         cfg.checkpoint_config.meta = dict(
152 |             mmpose_version=__version__ + get_git_hash(digits=7),
153 |             config=cfg.pretty_text,
154 |         )
155 |     train_model(
156 |         model,
157 |         datasets,
158 |         cfg,
159 |         distributed=distributed,
160 |         validate=(not args.no_validate),
161 |         timestamp=timestamp,
162 |         meta=meta)
163 | 
164 | 
165 | if __name__ == '__main__':
166 |     main()
167 | 


--------------------------------------------------------------------------------
/tools/torchstat/analyzer.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | from collections import OrderedDict
  3 | from typing import Dict, Sequence
  4 | import functools
  5 | import itertools
  6 | 
  7 | import numpy as np
  8 | import torch
  9 | import torch.nn as nn
 10 | 
 11 | from .compute_madd import compute_madd
 12 | from .compute_flops import compute_flops
 13 | from .compute_memory import compute_memory
 14 | from .stat_tree import StatTree, StatNode
 15 | from .reporter import report_format
 16 | 
 17 | 
 18 | class ModuleStats:
 19 | 
 20 |     def __init__(self, name) -> None:
 21 |         self.name = name
 22 |         self.start_time = 0.0
 23 |         self.end_time = 0.0
 24 |         self.inference_memory = 0
 25 |         self.input_shape: Sequence[int] = []
 26 |         self.output_shape: Sequence[int] = []
 27 |         self.MAdd = 0
 28 |         self.duration = 0.0
 29 |         self.Flops = 0
 30 |         self.Memory = 0, 0
 31 |         self.parameter_quantity = 0
 32 |         self.done = False
 33 | 
 34 | 
 35 | def print_report(self, collected_nodes):
 36 |     report = report_format(self.collected_nodes)
 37 |     print(report)
 38 | 
 39 | 
 40 | def analyze(model: nn.Module, input_size, query_granularity: int):
 41 |     assert isinstance(model, nn.Module)
 42 |     assert isinstance(input_size, (list, tuple))
 43 | 
 44 |     pre_hooks, post_hooks = [], []
 45 |     stats: OrderedDict[str, ModuleStats] = OrderedDict()
 46 | 
 47 |     try:
 48 |         _for_leaf(model, _register_hooks, pre_hooks, post_hooks, stats)
 49 | 
 50 |         x = torch.rand(*input_size)  # add module duration time
 51 |         x = x.to(next(model.parameters()).device)
 52 |         model.eval()
 53 |         model(x)
 54 | 
 55 |         stat_tree = _convert_leaf_modules_to_stat_tree(stats)
 56 | 
 57 |         return stat_tree.get_collected_stat_nodes(query_granularity)
 58 | 
 59 |     finally:
 60 |         for stat in stats.values():
 61 |             stat.done = True
 62 |         for hook in itertools.chain(pre_hooks, post_hooks):
 63 |             hook.remove()
 64 | 
 65 | 
 66 | def _for_leaf(model, fn, *args):
 67 |     for name, module in model.named_modules():
 68 |         if len(list(module.children())) == 0:
 69 |             fn(name, module, *args)
 70 | 
 71 | 
 72 | def _register_hooks(name: str, module: nn.Module, pre_hooks, post_hooks,
 73 |                     stats):
 74 |     assert isinstance(module, nn.Module) and len(list(module.children())) == 0
 75 | 
 76 |     if name in stats:
 77 |         return
 78 | 
 79 |     module_stats = ModuleStats(name)
 80 |     stats[name] = module_stats
 81 | 
 82 |     post_hook = module.register_forward_hook(
 83 |         functools.partial(_forward_post_hook, module_stats))
 84 |     post_hooks.append(post_hook)
 85 | 
 86 |     pre_hook = module.register_forward_pre_hook(
 87 |         functools.partial(_forward_pre_hook, module_stats))
 88 |     pre_hooks.append(pre_hook)
 89 | 
 90 | 
 91 | def _flatten(x):
 92 |     """Flattens the tree of tensors to flattened sequence of tensors"""
 93 |     if isinstance(x, torch.Tensor):
 94 |         return [x]
 95 |     if isinstance(x, Sequence):
 96 |         res = []
 97 |         for xi in x:
 98 |             res += _flatten(xi)
 99 |         return res
100 |     return []
101 | 
102 | 
103 | def _forward_pre_hook(module_stats: ModuleStats, module: nn.Module, input):
104 |     assert not module_stats.done
105 |     module_stats.start_time = time.time()
106 | 
107 | 
108 | def _forward_post_hook(module_stats: ModuleStats, module: nn.Module, input,
109 |                        output):
110 |     assert not module_stats.done
111 | 
112 |     module_stats.end_time = time.time()
113 |     module_stats.duration = module_stats.end_time - module_stats.start_time
114 | 
115 |     inputs, outputs = _flatten(input), _flatten(output)
116 |     module_stats.input_shape = inputs[0].size()
117 |     module_stats.output_shape = outputs[0].size()
118 | 
119 |     parameter_quantity = 0
120 |     # iterate through parameters and count num params
121 |     for name, p in module.named_parameters():
122 |         parameter_quantity += (0 if p is None else torch.numel(p.data))
123 |     module_stats.parameter_quantity = parameter_quantity
124 | 
125 |     inference_memory = 1
126 |     for oi in outputs:
127 |         for s in oi.size():
128 |             inference_memory *= s
129 |     # memory += parameters_number  # exclude parameter memory
130 |     inference_memory = inference_memory * 4 / (1024**2)  # shown as MB unit
131 |     module_stats.inference_memory = inference_memory
132 |     module_stats.MAdd = compute_madd(module, inputs, outputs)
133 |     module_stats.Flops = compute_flops(module, inputs, outputs)
134 |     module_stats.Memory = compute_memory(module, inputs, outputs)
135 | 
136 |     return output
137 | 
138 | 
139 | def get_parent_node(root_node, stat_node_name):
140 |     assert isinstance(root_node, StatNode)
141 | 
142 |     node = root_node
143 |     names = stat_node_name.split('.')
144 |     for i in range(len(names) - 1):
145 |         node_name = '.'.join(names[0:i + 1])
146 |         child_index = node.find_child_index(node_name)
147 |         assert child_index != -1
148 |         node = node.children[child_index]
149 |     return node
150 | 
151 | 
152 | def _convert_leaf_modules_to_stat_tree(leaf_modules):
153 |     assert isinstance(leaf_modules, OrderedDict)
154 | 
155 |     create_index = 1
156 |     root_node = StatNode(name='root', parent=None)
157 |     for name, module_stats in leaf_modules.items():
158 |         names = name.split('.')
159 |         for i in range(len(names)):
160 |             create_index += 1
161 |             stat_node_name = '.'.join(names[0:i + 1])
162 |             parent_node = get_parent_node(root_node, stat_node_name)
163 |             node = StatNode(name=stat_node_name, parent=parent_node)
164 |             parent_node.add_child(node)
165 |             if i == len(names) - 1:  # leaf module itself
166 |                 input_shape = module_stats.input_shape
167 |                 output_shape = module_stats.output_shape
168 |                 node.input_shape = input_shape
169 |                 node.output_shape = output_shape
170 |                 node.parameter_quantity = module_stats.parameter_quantity
171 |                 node.inference_memory = module_stats.inference_memory
172 |                 node.MAdd = module_stats.MAdd
173 |                 node.Flops = module_stats.Flops
174 |                 node.duration = module_stats.duration
175 |                 node.Memory = module_stats.Memory
176 |     return StatTree(root_node)
177 | 


--------------------------------------------------------------------------------
/tools/torchstat/compute_flops.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch
  3 | import numpy as np
  4 | import math
  5 | 
  6 | 
  7 | def compute_flops(module, inp, out):
  8 |     if isinstance(module, nn.Conv2d):
  9 |         return compute_Conv2d_flops(module, inp[0], out[0])
 10 |     elif type(module).__name__ == 'ConvFunction':
 11 |         return compute_Conv2d_flops(module, inp[0], out[0])
 12 |     elif type(module).__name__ == 'SplitKernelConvFunction':
 13 |         return compute_Conv2d_flops(module, inp[0], out[0])
 14 |     elif isinstance(module, nn.ConvTranspose2d):
 15 |         return compute_ConvTranspose2d_flops(module, inp[0], out[0])
 16 |     elif isinstance(module, nn.BatchNorm2d):
 17 |         return compute_BatchNorm2d_flops(module, inp[0], out[0])
 18 |     elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)):
 19 |         return compute_Pool2d_flops(module, inp[0], out[0])
 20 |     elif isinstance(module, (nn.AdaptiveAvgPool2d, nn.AdaptiveMaxPool2d)):
 21 |         return compute_adaptivepool_flops(module, inp[0], out[0])
 22 |     elif isinstance(module,
 23 |                     (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)):
 24 |         return compute_ReLU_flops(module, inp[0], out[0])
 25 |     elif isinstance(module, nn.Upsample):
 26 |         return compute_Upsample_flops(module, inp[0], out[0])
 27 |     elif isinstance(module, nn.Linear):
 28 |         return compute_Linear_flops(module, inp[0], out[0])
 29 |     elif type(module).__name__ == 'MatMul':
 30 |         return compute_matmul_flops(module, inp, out)
 31 |     else:
 32 |         #print(f"[Flops]: {type(module).__name__} is not supported!")
 33 |         return 0
 34 |     pass
 35 | 
 36 | 
 37 | def compute_matmul_flops(moudle, inp, out):
 38 |     x, y = inp
 39 |     batch_size = x.size(0)
 40 |     _, l, m = x.size()
 41 |     _, _, n = y.size()
 42 |     return batch_size * 2 * l * m * n
 43 | 
 44 | 
 45 | def compute_Conv2d_flops(module, inp, out):
 46 |     # Can have multiple inputs, getting the first one
 47 |     # assert isinstance(module, nn.Conv2d)
 48 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 49 | 
 50 |     batch_size = inp.size()[0]
 51 |     in_c = inp.size()[1]
 52 |     k_h, k_w = module.kernel_size
 53 |     out_c, out_h, out_w = out.size()[1:]
 54 |     groups = module.groups
 55 | 
 56 |     filters_per_channel = out_c // groups
 57 |     conv_per_position_flops = k_h * k_w * in_c * filters_per_channel
 58 |     active_elements_count = batch_size * out_h * out_w
 59 | 
 60 |     total_conv_flops = conv_per_position_flops * active_elements_count
 61 | 
 62 |     bias_flops = 0
 63 |     if module.bias is not None:
 64 |         bias_flops = out_c * active_elements_count
 65 | 
 66 |     total_flops = total_conv_flops + bias_flops
 67 |     return total_flops
 68 | 
 69 | 
 70 | def compute_ConvTranspose2d_flops(module, inp, out):
 71 |     # Can have multiple inputs, getting the first one
 72 |     assert isinstance(module, nn.ConvTranspose2d)
 73 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
 74 | 
 75 |     batch_size = inp.size()[0]
 76 |     in_h, in_w = inp.size()[2:]
 77 | 
 78 |     k_h, k_w = module.kernel_size
 79 |     in_c = module.in_channels
 80 |     out_c = module.out_channels
 81 |     groups = module.groups
 82 | 
 83 |     filters_per_channel = out_c // groups
 84 |     conv_per_position_flops = k_h * k_w * in_c * filters_per_channel
 85 |     active_elements_count = batch_size * in_h * in_w
 86 | 
 87 |     total_conv_flops = conv_per_position_flops * active_elements_count
 88 | 
 89 |     bias_flops = 0
 90 |     if module.bias is not None:
 91 |         out_h, out_w = out.size()[2:]
 92 |         bias_flops = out_c * batch_size * out_h * out_w
 93 | 
 94 |     total_flops = total_conv_flops + bias_flops
 95 | 
 96 |     return total_flops
 97 | 
 98 | 
 99 | def compute_adaptivepool_flops(module, input, output):
100 |     # credits: https://github.com/xternalz/SDPoint/blob/master/utils/flops.py
101 |     batch_size = input.size(0)
102 |     input_planes = input.size(1)
103 |     input_height = input.size(2)
104 |     input_width = input.size(3)
105 | 
106 |     flops = 0
107 |     for i in range(output.size(2)):
108 |         y_start = int(math.floor(float(i * input_height) / output.size(2)))
109 |         y_end = int(math.ceil(float((i + 1) * input_height) / output.size(2)))
110 |         for j in range(output.size(3)):
111 |             x_start = int(math.floor(float(j * input_width) / output.size(3)))
112 |             x_end = int(
113 |                 math.ceil(float((j + 1) * input_width) / output.size(3)))
114 | 
115 |             flops += batch_size * input_planes * (y_end - y_start + 1) * (
116 |                 x_end - x_start + 1)
117 |     return flops
118 | 
119 | 
120 | def compute_BatchNorm2d_flops(module, inp, out):
121 |     assert isinstance(module, nn.BatchNorm2d)
122 |     assert len(inp.size()) == 4 and len(inp.size()) == len(out.size())
123 |     in_c, in_h, in_w = inp.size()[1:]
124 |     batch_flops = np.prod(inp.shape)
125 |     if module.affine:
126 |         batch_flops *= 2
127 |     return batch_flops
128 | 
129 | 
130 | def compute_ReLU_flops(module, inp, out):
131 |     assert isinstance(module,
132 |                       (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU))
133 |     batch_size = inp.size()[0]
134 |     active_elements_count = batch_size
135 | 
136 |     for s in inp.size()[1:]:
137 |         active_elements_count *= s
138 | 
139 |     return active_elements_count
140 | 
141 | 
142 | def compute_Pool2d_flops(module, input, out):
143 |     batch_size = input.size(0)
144 |     input_planes = input.size(1)
145 |     input_height = input.size(2)
146 |     input_width = input.size(3)
147 |     kernel_size = ('int' in str(type(module.kernel_size))) and [
148 |         module.kernel_size, module.kernel_size
149 |     ] or module.kernel_size
150 |     kernel_ops = kernel_size[0] * kernel_size[1]
151 |     stride = ('int' in str(type(
152 |         module.stride))) and [module.stride, module.stride] or module.stride
153 |     padding = ('int' in str(type(module.padding))) and [
154 |         module.padding, module.padding
155 |     ] or module.padding
156 | 
157 |     output_width = math.floor((input_width + 2 * padding[0] - kernel_size[0]) /
158 |                               float(stride[0]) + 1)
159 |     output_height = math.floor(
160 |         (input_height + 2 * padding[1] - kernel_size[1]) / float(stride[0]) +
161 |         1)
162 |     return batch_size * input_planes * output_width * output_height * kernel_ops
163 | 
164 | 
165 | def compute_Linear_flops(module, inp, out):
166 |     assert isinstance(module, nn.Linear)
167 |     assert len(inp.size()) == 2 and len(out.size()) == 2
168 |     batch_size = inp.size()[0]
169 |     return batch_size * inp.size()[1] * out.size()[1]
170 | 
171 | 
172 | def compute_Upsample_flops(module, inp, out):
173 |     assert isinstance(module, nn.Upsample)
174 |     output_size = out[0]
175 |     batch_size = inp.size()[0]
176 |     output_elements_count = batch_size
177 |     for s in output_size.shape[1:]:
178 |         output_elements_count *= s
179 | 
180 |     return output_elements_count
181 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # x-hrnet
  2 | Official code for "X-HRNet: Towards Lightweight Human Pose Estimation with Spatially Unidimensional Self-Attention"
  3 | 
  4 | ## Enviroment
  5 | The code is developed using python 3.8 on Ubuntu 20.04. NVIDIA GPUs are needed. The code is developed and tested using 8 NVIDIA V100S GPU cards. Other platforms or GPU cards are not fully tested.
  6 | ## Quick Start
  7 | 
  8 | ### Requirements
  9 | 
 10 | - Linux (Windows is not officially supported)
 11 | - Python 3.8
 12 | - PyTorch 1.8
 13 | - CUDA 11.1
 14 | - GCC 5+
 15 | - [mmcv](https://github.com/open-mmlab/mmcv) (Please install the latest version of mmcv-full)
 16 | - Numpy
 17 | - cv2
 18 | - json_tricks
 19 | - [xtcocotools](https://github.com/jin-s13/xtcocoapi)
 20 | 
 21 | 
 22 | ### Installation
 23 | <!-- The code is based on [MMPose](https://github.com/open-mmlab/mmpose).
 24 | You need clone the mmpose project and integrate the codes into mmpose first. -->
 25 | 
 26 | a. Install mmcv, we recommend you to install the pre-build mmcv as below.
 27 | 
 28 | ```shell
 29 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html
 30 | ```
 31 | 
 32 | Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example, to install the latest ``mmcv-full`` with ``CUDA 11`` and ``PyTorch 1.8.0``, use the following command:
 33 | 
 34 | ```shell
 35 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html
 36 | ```
 37 | 
 38 | If it compiles during installation, then please check that the cuda version and pytorch version **exactly"" matches the version in the mmcv-full installation command. For example, pytorch 1.7.0 and 1.7.1 are treated differently.
 39 | See [here](https://github.com/open-mmlab/mmcv#installation) for different versions of MMCV compatible to different PyTorch and CUDA versions.
 40 | 
 41 | you can compile mmcv from source by the following command
 42 | 
 43 | ```shell
 44 | pip install mmcv-full==1.3.9
 45 | # alternative: pip install mmcv
 46 | ```
 47 | **Important:** You need to run `pip uninstall mmcv` first if you have mmcv installed. If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`.
 48 | 
 49 | b. Install build requirements
 50 | 
 51 | ```shell
 52 | pip install -r requirements.txt
 53 | ```
 54 | 
 55 | ### Prepare datasets
 56 | 
 57 | It is recommended to symlink the dataset root to `$LITE_HRNET/data`.
 58 | If your folder structure is different, you may need to change the corresponding paths in config files.
 59 | 
 60 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. [HRNet-Human-Pose-Estimation](https://github.com/HRNet/HRNet-Human-Pose-Estimation) provides person detection result of COCO val2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-)
 61 | Download and extract them under `$LITE_HRNET/data`, and make them look like this:
 62 | 
 63 | ```
 64 | lite_hrnet
 65 | ├── configs
 66 | ├── models
 67 | ├── tools
 68 | `── data
 69 |     │── coco
 70 |         │-- annotations
 71 |         │   │-- person_keypoints_train2017.json
 72 |         │   |-- person_keypoints_val2017.json
 73 |         |-- person_detection_results
 74 |         |   |-- COCO_val2017_detections_AP_H_56_person.json
 75 |         │-- train2017
 76 |         │   │-- 000000000009.jpg
 77 |         │   │-- 000000000025.jpg
 78 |         │   │-- 000000000030.jpg
 79 |         │   │-- ...
 80 |         `-- val2017
 81 |             │-- 000000000139.jpg
 82 |             │-- 000000000285.jpg
 83 |             │-- 000000000632.jpg
 84 |             │-- ...
 85 | 
 86 | ```
 87 | 
 88 | ## Training and Testing
 89 | All outputs (log files and checkpoints) will be saved to the working directory,
 90 | which is specified by `work_dir` in the config file.
 91 | 
 92 | By default we evaluate the model on the validation set after each epoch, you can change the evaluation interval by modifying the interval argument in the training config
 93 | 
 94 | ```python
 95 | evaluation = dict(interval=5)  # This evaluate the model per 5 epoch.
 96 | ```
 97 | 
 98 | According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you need to set the learning rate proportional to the batch size if you use different GPUs or videos per GPU, e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu.
 99 | 
100 | ### Training
101 | 
102 | ```shell
103 | # train with a signle GPU
104 | python tools/train.py ${CONFIG_FILE} [optional arguments]
105 | 
106 | # train with multiple GPUs
107 | ./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments]
108 | ```
109 | 
110 | Optional arguments are:
111 | 
112 | - `--validate` (**strongly recommended**): Perform evaluation at every k (default value is 5 epochs during the training.
113 | - `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file.
114 | - `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file.
115 | - `--gpus ${GPU_NUM}`: Number of gpus to use, which is only applicable to non-distributed training.
116 | - `--seed ${SEED}`: Seed id for random state in python, numpy and pytorch to generate random numbers.
117 | - `--deterministic`: If specified, it will set deterministic options for CUDNN backend.
118 | - `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode.
119 | - `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0.
120 | - `--autoscale-lr`: If specified, it will automatically scale lr with the number of gpus by [Linear Scaling Rule](https://arxiv.org/abs/1706.02677).
121 | 
122 | Difference between `resume-from` and `load-from`:
123 | `resume-from` loads both the model weights and optimizer status, and the epoch is also inherited from the specified checkpoint. It is usually used for resuming the training process that is interrupted accidentally.
124 | `load-from` only loads the model weights and the training epoch starts from 0. It is usually used for finetuning.
125 | 
126 | Examples:
127 | 
128 | #### Training on COCO train2017 dataset
129 | ```shell
130 | ./tools/dist_train.sh configs/xhrnet/sxhrnet_18_coco_256x192.py 8
131 | ```
132 | 
133 | ### Testing
134 | You can use the following commands to test a dataset.
135 | 
136 | ```shell
137 | # single-gpu testing
138 | python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRIC}] \
139 |     [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \
140 |     [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
141 | 
142 | # multiple-gpu testing
143 | ./tools/dist_test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRIC}] \
144 |     [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \
145 |     [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}]
146 | ```
147 | 
148 | Optional arguments:
149 | 
150 | - `RESULT_FILE`: Filename of the output results. If not specified, the results will not be saved to a file.
151 | - `EVAL_METRIC`: Items to be evaluated on the results. Allowed values depend on the dataset.
152 | - `NUM_PROC_PER_GPU`: Number of processes per GPU. If not specified, only one process will be assigned for a single gpu.
153 | - `--gpu_collect`: If specified, recognition results will be collected using gpu communication. Otherwise, it will save the results on different gpus to `TMPDIR` and collect them by the rank 0 worker.
154 | - `TMPDIR`: Temporary directory used for collecting results from multiple workers, available when `--gpu_collect` is not specified.
155 | - `AVG_TYPE`: Items to average the test clips. If set to `prob`, it will apply softmax before averaging the clip scores. Otherwise, it will directly average the clip scores.
156 | - `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode.
157 | - `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0.
158 | 
159 | Examples:
160 | #### Test SX-HRNet-18 on COCO with 8 GPUS, and evaluate the mAP.
161 | 
162 | ```shell
163 | ./tools/dist_test.sh configs/xhrnet/sxhrnet_18_coco_256x192.py \
164 |     checkpoints/SOME_CHECKPOINT.pth 8 \
165 |     --eval mAP
166 | ```
167 | 
168 | ### Get the compulationaly complexity
169 | You can use the following commands to compute the complexity of one model.
170 | ```shell
171 | python tools/summary_network.py ${CONFIG_FILE} --shape ${SHAPE}
172 | ```
173 | 
174 | Arguments:
175 | 
176 | - `SHAPE`: Input size.
177 | 
178 | Examples:
179 | 
180 | #### Test the complexity of LiteHRNet-18 with 256x256 resolution input.
181 | 
182 | ```shell
183 | python tools/summary_network.py configs/xhrnet/sxhrnet_18_coco_256x192.py --shape 256 256
184 | ```
185 | 
186 | ## Acknowledgement
187 | 
188 | Thanks to:
189 | 
190 | - [MMPose](https://github.com/open-mmlab/mmpose)
191 | - [HRNet](https://github.com/HRNet/deep-high-resolution-net.pytorch)
192 | - [Lite-HRNet](https://github.com/HRNet/Lite-HRNet)
193 | 
194 | ## Citation
195 | 
196 | If you use our code or models in your research, please cite with:
197 | ```
198 | @inproceedings{xuan2022xhrnet,
199 |   title={X-HRNet: Towards Lightweight Human Pose Estimation with Spatially Unidimensional Self-Attention},
200 |   author={Zhou, Yixuan and Wang, Xuanhan and Xu, Xing and Zhao, Lei and Song, Jingkuan},
201 |   booktitle={ICME},
202 |   year={2022}
203 | }
204 | ```


--------------------------------------------------------------------------------
/models/backbones/xhrnet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule,
  5 |                       build_conv_layer, build_norm_layer, constant_init,
  6 |                       normal_init)
  7 | from torch.nn.modules.batchnorm import _BatchNorm
  8 | import torch.utils.checkpoint as cp
  9 | 
 10 | from mmpose.utils import get_root_logger
 11 | from mmpose.models import BACKBONES
 12 | from mmpose.models.backbones.resnet import BasicBlock, Bottleneck
 13 | from mmpose.models.backbones.utils import load_checkpoint, channel_shuffle
 14 | 
 15 | 
 16 | class HSUSA(nn.Module):
 17 | 
 18 |     def __init__(self,
 19 |                  channels,
 20 |                  ln_enable=True):
 21 |         super().__init__()
 22 | 
 23 |         self.conv = ConvModule(
 24 |                 channels,
 25 |                 channels,
 26 |                 kernel_size=1,
 27 |                 stride=1,
 28 |                 padding=0,
 29 |                 conv_cfg=dict(type='Conv2d'),
 30 |                 act_cfg=None)
 31 |         self.mean_conv = ConvModule(
 32 |             channels,
 33 |             channels,
 34 |             groups=channels,
 35 |             kernel_size=1,
 36 |             stride=1,
 37 |             padding=0,
 38 |             conv_cfg=dict(type='Conv2d'),
 39 |             act_cfg=None)
 40 |         self.group_weight = nn.Parameter(torch.ones([1, 1, 96, 1]), requires_grad=True)
 41 | 
 42 |         self.ln_enable = ln_enable
 43 |         if ln_enable:
 44 |             self.ln = nn.LayerNorm(channels)
 45 |             self.sigmoid = nn.Sigmoid()
 46 | 
 47 |     def forward(self, x: torch.Tensor):
 48 |         
 49 |         h, w = x.shape[2:]
 50 |         xq = self.mean_conv((x * self.group_weight[:, :, :h]).mean(-2, keepdim=True)) # n,c,1,w
 51 |         xq = xq.transpose(2, 3).softmax(-2) # n,c,w,1
 52 |         xv = x # n,c,h,w
 53 | 
 54 |         atten = torch.matmul(xv, xq) # n,c,h,1
 55 |         atten = self.conv(atten)
 56 |         if self.ln_enable:
 57 |             atten = self.ln(atten.transpose(1, -1)).transpose(1, -1)
 58 |             atten = self.sigmoid(atten)
 59 | 
 60 |         return x * atten
 61 | 
 62 | 
 63 | class WSUSA(nn.Module):
 64 | 
 65 |     def __init__(self,
 66 |                  channels,
 67 |                  ln_enable=True):
 68 |         super().__init__()
 69 | 
 70 |         self.conv = ConvModule(
 71 |                 channels,
 72 |                 channels,
 73 |                 kernel_size=1,
 74 |                 stride=1,
 75 |                 padding=0,
 76 |                 conv_cfg=dict(type='Conv2d'),
 77 |                 act_cfg=None)
 78 |         self.mean_conv = ConvModule(
 79 |             channels,
 80 |             channels,
 81 |             groups=channels,
 82 |             kernel_size=1,
 83 |             stride=1,
 84 |             padding=0,
 85 |             conv_cfg=dict(type='Conv2d'),
 86 |             act_cfg=None)
 87 |         self.group_weight = nn.Parameter(torch.ones([1, 1, 1, 72]), requires_grad=True)
 88 |         self.ln_enable = ln_enable
 89 |         if ln_enable:
 90 |             self.ln = nn.LayerNorm(channels)
 91 |             self.sigmoid = nn.Sigmoid()
 92 | 
 93 |     def forward(self, x: torch.Tensor):
 94 |         
 95 |         h, w = x.shape[2:]
 96 |         xq = self.mean_conv((x * self.group_weight[:, :, :, :w]).mean(-1, keepdim=True)) # n,c,h,1
 97 |         xq = xq.softmax(-2) # n,c,h,1
 98 |         xv = x.transpose(2, 3) # n,c,w,h
 99 | 
100 |         atten = torch.matmul(xv, xq).transpose(2, 3) # n,c,w,1
101 |         atten = self.conv(atten)
102 |         if self.ln_enable:
103 |             atten = self.ln(atten.transpose(1, -1)).transpose(1, -1)
104 |             atten = self.sigmoid(atten)
105 | 
106 |         return x * atten
107 | 
108 | 
109 | class HWSUSA(nn.Module):
110 | 
111 |     def __init__(self,
112 |                  channels,
113 |                  mode=None, 
114 |                  ln_enable=False,
115 |                  hw_shuffle=False):
116 |         super().__init__()
117 |         assert mode in ['hw', 'wh', None]
118 | 
119 |         self.mode = mode
120 |         self.hw_shuffle = hw_shuffle
121 |         self.shuffle_groups = 4
122 |         if mode == 'hw':
123 |             h_channels = channels // 4 * 3
124 |             w_channels = channels // 4
125 |         elif mode == 'wh':
126 |             h_channels = channels // 4
127 |             w_channels = channels // 4 * 3
128 |         else:
129 |             h_channels = w_channels = channels // 2
130 |             self.shuffle_groups = 2
131 |         self.h_channels = h_channels
132 |         self.w_channels = w_channels
133 | 
134 |         self.hconv = HSUSA(self.h_channels, ln_enable)
135 |         self.wconv = WSUSA(self.w_channels, ln_enable)
136 | 
137 |     def forward(self, x):
138 |         x1, x2 = x.split([self.h_channels, self.w_channels], dim=1)
139 |         out = torch.cat([self.hconv(x1), self.wconv(x2)], dim=1)
140 |         if self.hw_shuffle:
141 |             out = channel_shuffle(out, self.shuffle_groups)
142 |         
143 |         return out
144 | 
145 | 
146 | class SXShuffleUnit(nn.Module):
147 | 
148 |     def __init__(self,
149 |                  in_channels,
150 |                  out_channels,
151 |                  stride=1,
152 |                  conv_cfg=None,
153 |                  norm_cfg=dict(type='BN'),
154 |                  act_cfg=None,
155 |                  with_cb=False, 
156 |                  with_cp=False):
157 |         super().__init__()
158 |         self.stride = stride
159 |         self.with_cb = with_cb
160 |         self.with_cp = with_cp
161 | 
162 |         branch_features = out_channels // 2
163 |         if self.stride == 1:
164 |             assert in_channels == branch_features * 2, (
165 |                 f'in_channels ({in_channels}) should equal to '
166 |                 f'branch_features * 2 ({branch_features * 2}) '
167 |                 'when stride is 1')
168 | 
169 |         if in_channels != branch_features * 2:
170 |             assert self.stride != 1, (
171 |                 f'stride ({self.stride}) should not equal 1 when '
172 |                 f'in_channels != branch_features * 2')
173 | 
174 |         self.branch2 = nn.Sequential(
175 |             WSUSA(branch_features, ln_enable=True),
176 |             ConvModule(
177 |                 branch_features,
178 |                 branch_features,
179 |                 kernel_size=3,
180 |                 stride=1,
181 |                 padding=1,
182 |                 groups=branch_features,
183 |                 conv_cfg=conv_cfg,
184 |                 norm_cfg=norm_cfg,
185 |                 act_cfg=None), 
186 |             HSUSA(branch_features, ln_enable=True))
187 | 
188 |     def forward(self, x):
189 | 
190 |         def _inner_forward(x):
191 |             x1, x2 = x.chunk(2, dim=1)
192 |             out = torch.cat((x1, self.branch2(x2)), dim=1)
193 | 
194 |             out = channel_shuffle(out, 2)
195 |             if self.with_cb:
196 |                 out = self.cb(out)
197 | 
198 |             return out
199 | 
200 |         if self.with_cp and x.requires_grad:
201 |             out = cp.checkpoint(_inner_forward, x)
202 |         else:
203 |             out = _inner_forward(x)
204 | 
205 |         return out
206 | 
207 | 
208 | class PXShuffleUnit(nn.Module):
209 | 
210 |     def __init__(self,
211 |                  in_channels,
212 |                  out_channels,
213 |                  stride=1,
214 |                  conv_cfg=None,
215 |                  norm_cfg=dict(type='BN'),
216 |                  act_cfg=None,
217 |                  with_cb=False, 
218 |                  with_cp=False):
219 |         super().__init__()
220 |         self.stride = stride
221 |         self.with_cb = with_cb
222 |         self.with_cp = with_cp
223 | 
224 |         branch_features = out_channels // 2
225 |         if self.stride == 1:
226 |             assert in_channels == branch_features * 2, (
227 |                 f'in_channels ({in_channels}) should equal to '
228 |                 f'branch_features * 2 ({branch_features * 2}) '
229 |                 'when stride is 1')
230 | 
231 |         if in_channels != branch_features * 2:
232 |             assert self.stride != 1, (
233 |                 f'stride ({self.stride}) should not equal 1 when '
234 |                 f'in_channels != branch_features * 2')
235 | 
236 |         self.branch2 = nn.Sequential(
237 |             HWSUSA(
238 |                 branch_features,
239 |                 ln_enable=True,
240 |                 hw_shuffle=True),
241 |             ConvModule(
242 |                 branch_features,
243 |                 branch_features,
244 |                 kernel_size=3,
245 |                 stride=1,
246 |                 padding=1,
247 |                 groups=branch_features,
248 |                 conv_cfg=conv_cfg,
249 |                 norm_cfg=norm_cfg,
250 |                 act_cfg=None), 
251 |             HWSUSA(
252 |                 branch_features,
253 |                 ln_enable=True,
254 |                 hw_shuffle=True))
255 | 
256 |     def forward(self, x):
257 | 
258 |         def _inner_forward(x):
259 |             x1, x2 = x.chunk(2, dim=1)
260 |             out = torch.cat((x1, self.branch2(x2)), dim=1)
261 | 
262 |             out = channel_shuffle(out, 2)
263 |             if self.with_cb:
264 |                 out = self.cb(out)
265 | 
266 |             return out
267 | 
268 |         if self.with_cp and x.requires_grad:
269 |             out = cp.checkpoint(_inner_forward, x)
270 |         else:
271 |             out = _inner_forward(x)
272 | 
273 |         return out
274 | 
275 | 
276 | class Stem(nn.Module):
277 | 
278 |     def __init__(self,
279 |                  in_channels,
280 |                  stem_channels,
281 |                  out_channels,
282 |                  expand_ratio,
283 |                  conv_cfg=None,
284 |                  norm_cfg=dict(type='BN'),
285 |                  with_cp=False):
286 |         super().__init__()
287 |         self.in_channels = in_channels
288 |         self.out_channels = out_channels
289 |         self.conv_cfg = conv_cfg
290 |         self.norm_cfg = norm_cfg
291 |         self.with_cp = with_cp
292 | 
293 |         self.conv1 = ConvModule(
294 |             in_channels=in_channels,
295 |             out_channels=stem_channels,
296 |             kernel_size=3,
297 |             stride=2,
298 |             padding=1,
299 |             conv_cfg=self.conv_cfg,
300 |             norm_cfg=self.norm_cfg,
301 |             act_cfg=dict(type='ReLU'))
302 | 
303 |         mid_channels = int(round(stem_channels * expand_ratio))
304 |         branch_channels = stem_channels // 2
305 |         if stem_channels == self.out_channels:
306 |             inc_channels = self.out_channels - branch_channels
307 |         else:
308 |             inc_channels = self.out_channels - stem_channels
309 | 
310 |         self.branch1 = nn.Sequential(
311 |             ConvModule(
312 |                 branch_channels,
313 |                 branch_channels,
314 |                 kernel_size=3,
315 |                 stride=2,
316 |                 padding=1,
317 |                 groups=branch_channels,
318 |                 conv_cfg=conv_cfg,
319 |                 norm_cfg=norm_cfg,
320 |                 act_cfg=None),
321 |             ConvModule(
322 |                 branch_channels,
323 |                 inc_channels,
324 |                 kernel_size=1,
325 |                 stride=1,
326 |                 padding=0,
327 |                 conv_cfg=conv_cfg,
328 |                 norm_cfg=norm_cfg,
329 |                 act_cfg=dict(type='ReLU')),
330 |         )
331 | 
332 |         self.branch2 = nn.Sequential(
333 |             ConvModule(
334 |                 branch_channels,
335 |                 mid_channels,
336 |                 kernel_size=1,
337 |                 stride=1,
338 |                 padding=0,
339 |                 conv_cfg=conv_cfg,
340 |                 norm_cfg=norm_cfg,
341 |                 act_cfg=dict(type='ReLU')),
342 |             ConvModule(
343 |                 mid_channels,
344 |                 mid_channels,
345 |                 kernel_size=3,
346 |                 stride=2,
347 |                 padding=1,
348 |                 groups=mid_channels,
349 |                 conv_cfg=conv_cfg,
350 |                 norm_cfg=norm_cfg,
351 |                 act_cfg=None),
352 |             ConvModule(
353 |                 mid_channels,
354 |                 branch_channels
355 |                 if stem_channels == self.out_channels else stem_channels,
356 |                 kernel_size=1,
357 |                 stride=1,
358 |                 padding=0,
359 |                 conv_cfg=conv_cfg,
360 |                 norm_cfg=norm_cfg,
361 |                 act_cfg=dict(type='ReLU')))
362 | 
363 |     def forward(self, x):
364 | 
365 |         def _inner_forward(x):
366 |             x = self.conv1(x)
367 |             x1, x2 = x.chunk(2, dim=1)
368 | 
369 |             out = torch.cat((self.branch1(x1), self.branch2(x2)), dim=1)
370 |             out = channel_shuffle(out, 2)
371 | 
372 |             return out
373 | 
374 |         if self.with_cp and x.requires_grad:
375 |             out = cp.checkpoint(_inner_forward, x)
376 |         else:
377 |             out = _inner_forward(x)
378 | 
379 |         return out
380 | 
381 | 
382 | class IterativeHead(nn.Module):
383 | 
384 |     def __init__(self, in_channels, conv_cfg=None, norm_cfg=dict(type='BN')):
385 |         super().__init__()
386 |         projects = []
387 |         num_branchs = len(in_channels)
388 |         self.in_channels = in_channels[::-1]
389 | 
390 |         for i in range(num_branchs):
391 |             if i != num_branchs - 1:
392 |                 projects.append(
393 |                     DepthwiseSeparableConvModule(
394 |                         in_channels=self.in_channels[i],
395 |                         out_channels=self.in_channels[i + 1],
396 |                         kernel_size=3,
397 |                         stride=1,
398 |                         padding=1,
399 |                         norm_cfg=norm_cfg,
400 |                         act_cfg=dict(type='ReLU'),
401 |                         dw_act_cfg=None,
402 |                         pw_act_cfg=dict(type='ReLU')))
403 |             else:
404 |                 projects.append(
405 |                     DepthwiseSeparableConvModule(
406 |                         in_channels=self.in_channels[i],
407 |                         out_channels=self.in_channels[i],
408 |                         kernel_size=3,
409 |                         stride=1,
410 |                         padding=1,
411 |                         norm_cfg=norm_cfg,
412 |                         act_cfg=dict(type='ReLU'),
413 |                         dw_act_cfg=None,
414 |                         pw_act_cfg=dict(type='ReLU')))
415 |         self.projects = nn.ModuleList(projects)
416 | 
417 |     def forward(self, x):
418 |         x = x[::-1]
419 | 
420 |         y = []
421 |         last_x = None
422 |         for i, s in enumerate(x):
423 |             if last_x is not None:
424 |                 last_x = F.interpolate(
425 |                     last_x,
426 |                     size=s.size()[-2:],
427 |                     mode='bilinear',
428 |                     align_corners=True)
429 |                 s = s + last_x
430 |             s = self.projects[i](s)
431 |             y.append(s)
432 |             last_x = s
433 | 
434 |         return y[::-1]
435 | 
436 | 
437 | class ShuffleUnit(nn.Module):
438 |     """InvertedResidual block for ShuffleNetV2 backbone.
439 |     Args:
440 |         in_channels (int): The input channels of the block.
441 |         out_channels (int): The output channels of the block.
442 |         stride (int): Stride of the 3x3 convolution layer. Default: 1
443 |         conv_cfg (dict): Config dict for convolution layer.
444 |             Default: None, which means using conv2d.
445 |         norm_cfg (dict): Config dict for normalization layer.
446 |             Default: dict(type='BN').
447 |         act_cfg (dict): Config dict for activation layer.
448 |             Default: dict(type='ReLU').
449 |         with_cp (bool): Use checkpoint or not. Using checkpoint will save some
450 |             memory while slowing down the training speed. Default: False.
451 |     """
452 | 
453 |     def __init__(self,
454 |                  in_channels,
455 |                  out_channels,
456 |                  stride=1,
457 |                  conv_cfg=None,
458 |                  norm_cfg=dict(type='BN'),
459 |                  act_cfg=dict(type='ReLU'),
460 |                  with_cp=False):
461 |         super().__init__()
462 |         self.stride = stride
463 |         self.with_cp = with_cp
464 | 
465 |         branch_features = out_channels // 2
466 |         if self.stride == 1:
467 |             assert in_channels == branch_features * 2, (
468 |                 f'in_channels ({in_channels}) should equal to '
469 |                 f'branch_features * 2 ({branch_features * 2}) '
470 |                 'when stride is 1')
471 | 
472 |         if in_channels != branch_features * 2:
473 |             assert self.stride != 1, (
474 |                 f'stride ({self.stride}) should not equal 1 when '
475 |                 f'in_channels != branch_features * 2')
476 | 
477 |         if self.stride > 1:
478 |             self.branch1 = nn.Sequential(
479 |                 ConvModule(
480 |                     in_channels,
481 |                     in_channels,
482 |                     kernel_size=3,
483 |                     stride=self.stride,
484 |                     padding=1,
485 |                     groups=in_channels,
486 |                     conv_cfg=conv_cfg,
487 |                     norm_cfg=norm_cfg,
488 |                     act_cfg=None),
489 |                 ConvModule(
490 |                     in_channels,
491 |                     branch_features,
492 |                     kernel_size=1,
493 |                     stride=1,
494 |                     padding=0,
495 |                     conv_cfg=conv_cfg,
496 |                     norm_cfg=norm_cfg,
497 |                     act_cfg=act_cfg),
498 |             )
499 | 
500 |         self.branch2 = nn.Sequential(
501 |             ConvModule(
502 |                 in_channels if (self.stride > 1) else branch_features,
503 |                 branch_features,
504 |                 kernel_size=1,
505 |                 stride=1,
506 |                 padding=0,
507 |                 conv_cfg=conv_cfg,
508 |                 norm_cfg=norm_cfg,
509 |                 act_cfg=act_cfg),
510 |             ConvModule(
511 |                 branch_features,
512 |                 branch_features,
513 |                 kernel_size=3,
514 |                 stride=self.stride,
515 |                 padding=1,
516 |                 groups=branch_features,
517 |                 conv_cfg=conv_cfg,
518 |                 norm_cfg=norm_cfg,
519 |                 act_cfg=None),
520 |             ConvModule(
521 |                 branch_features,
522 |                 branch_features,
523 |                 kernel_size=1,
524 |                 stride=1,
525 |                 padding=0,
526 |                 conv_cfg=conv_cfg,
527 |                 norm_cfg=norm_cfg,
528 |                 act_cfg=act_cfg))
529 | 
530 |     def forward(self, x):
531 | 
532 |         def _inner_forward(x):
533 |             if self.stride > 1:
534 |                 out = torch.cat((self.branch1(x), self.branch2(x)), dim=1)
535 |             else:
536 |                 x1, x2 = x.chunk(2, dim=1)
537 |                 out = torch.cat((x1, self.branch2(x2)), dim=1)
538 | 
539 |             out = channel_shuffle(out, 2)
540 | 
541 |             return out
542 | 
543 |         if self.with_cp and x.requires_grad:
544 |             out = cp.checkpoint(_inner_forward, x)
545 |         else:
546 |             out = _inner_forward(x)
547 | 
548 |         return out
549 | 
550 | 
551 | class LiteHRModule(nn.Module):
552 | 
553 |     def __init__(
554 |             self,
555 |             num_branches,
556 |             num_blocks,
557 |             in_channels,
558 |             module_type,
559 |             multiscale_output=False,
560 |             with_fuse=True,
561 |             conv_cfg=None,
562 |             norm_cfg=dict(type='BN'),
563 |             with_cp=False,
564 |     ):
565 |         super().__init__()
566 |         self._check_branches(num_branches, in_channels)
567 | 
568 |         self.in_channels = in_channels
569 |         self.num_branches = num_branches
570 | 
571 |         self.multiscale_output = multiscale_output
572 |         self.with_fuse = with_fuse
573 |         self.norm_cfg = norm_cfg
574 |         self.conv_cfg = conv_cfg
575 |         self.with_cp = with_cp
576 | 
577 |         if module_type == 'naive':
578 |             block = ShuffleUnit
579 |         elif module_type == 'sequential':
580 |             block = SXShuffleUnit
581 |         elif module_type == 'parallel':
582 |             block = PXShuffleUnit
583 |         else:
584 |             print('Not support')
585 |             exit()
586 |         self.layers = self._make_branches(num_branches, num_blocks, block)
587 |         if self.with_fuse:
588 |             self.fuse_layers = self._make_fuse_layers()
589 |             self.relu = nn.ReLU()
590 | 
591 |     def _check_branches(self, num_branches, in_channels):
592 |         """Check input to avoid ValueError."""
593 |         if num_branches != len(in_channels):
594 |             error_msg = f'NUM_BRANCHES({num_branches}) ' \
595 |                 f'!= NUM_INCHANNELS({len(in_channels)})'
596 |             raise ValueError(error_msg)
597 | 
598 |     def _make_one_branch(self, branch_index, num_blocks, block):
599 |         """Make one branch."""
600 |         layers = []
601 |         for i in range(num_blocks):
602 |             layers.append(
603 |                 block(
604 |                     self.in_channels[branch_index],
605 |                     self.in_channels[branch_index],
606 |                     stride=1,
607 |                     conv_cfg=self.conv_cfg,
608 |                     norm_cfg=self.norm_cfg,
609 |                     act_cfg=dict(type='ReLU'),
610 |                     with_cp=self.with_cp))
611 | 
612 |         return nn.Sequential(*layers)
613 | 
614 |     def _make_branches(self, num_branches, num_blocks, block):
615 |         """Make branches."""
616 |         branches = []
617 | 
618 |         for i in range(num_branches):
619 |             branches.append(self._make_one_branch(i, num_blocks, block))
620 | 
621 |         return nn.ModuleList(branches)
622 | 
623 |     def _make_fuse_layers(self):
624 |         """Make fuse layer."""
625 |         if self.num_branches == 1:
626 |             return None
627 | 
628 |         num_branches = self.num_branches
629 |         in_channels = self.in_channels
630 |         fuse_layers = []
631 |         num_out_branches = num_branches if self.multiscale_output else 1
632 |         for i in range(num_out_branches):
633 |             fuse_layer = []
634 |             for j in range(num_branches):
635 |                 if j > i:
636 |                     fuse_layer.append(
637 |                         nn.Sequential(
638 |                             build_conv_layer(
639 |                                 self.conv_cfg,
640 |                                 in_channels[j],
641 |                                 in_channels[i],
642 |                                 kernel_size=1,
643 |                                 stride=1,
644 |                                 padding=0,
645 |                                 bias=False),
646 |                             build_norm_layer(self.norm_cfg, in_channels[i])[1],
647 |                             nn.Upsample(
648 |                                 scale_factor=2**(j - i), mode='nearest')))
649 |                 elif j == i:
650 |                     fuse_layer.append(None)
651 |                 else:
652 |                     conv_downsamples = []
653 |                     for k in range(i - j):
654 |                         if k == i - j - 1:
655 |                             conv_downsamples.append(
656 |                                 nn.Sequential(
657 |                                     build_conv_layer(
658 |                                         self.conv_cfg,
659 |                                         in_channels[j],
660 |                                         in_channels[j],
661 |                                         kernel_size=3,
662 |                                         stride=2,
663 |                                         padding=1,
664 |                                         groups=in_channels[j],
665 |                                         bias=False),
666 |                                     build_norm_layer(self.norm_cfg,
667 |                                                      in_channels[j])[1],
668 |                                     build_conv_layer(
669 |                                         self.conv_cfg,
670 |                                         in_channels[j],
671 |                                         in_channels[i],
672 |                                         kernel_size=1,
673 |                                         stride=1,
674 |                                         padding=0,
675 |                                         bias=False),
676 |                                     build_norm_layer(self.norm_cfg,
677 |                                                      in_channels[i])[1]))
678 |                         else:
679 |                             conv_downsamples.append(
680 |                                 nn.Sequential(
681 |                                     build_conv_layer(
682 |                                         self.conv_cfg,
683 |                                         in_channels[j],
684 |                                         in_channels[j],
685 |                                         kernel_size=3,
686 |                                         stride=2,
687 |                                         padding=1,
688 |                                         groups=in_channels[j],
689 |                                         bias=False),
690 |                                     build_norm_layer(self.norm_cfg,
691 |                                                      in_channels[j])[1],
692 |                                     build_conv_layer(
693 |                                         self.conv_cfg,
694 |                                         in_channels[j],
695 |                                         in_channels[j],
696 |                                         kernel_size=1,
697 |                                         stride=1,
698 |                                         padding=0,
699 |                                         bias=False),
700 |                                     build_norm_layer(self.norm_cfg,
701 |                                                      in_channels[j])[1],
702 |                                     nn.ReLU(inplace=True)))
703 |                                     
704 |                     fuse_layer.append(nn.Sequential(*conv_downsamples))
705 |             fuse_layers.append(nn.ModuleList(fuse_layer))
706 | 
707 |         return nn.ModuleList(fuse_layers)
708 | 
709 |     def forward(self, x):
710 |         """Forward function."""
711 |         if self.num_branches == 1:
712 |             return [self.layers[0](x[0])]
713 | 
714 |         for i in range(self.num_branches):
715 |             x[i] = self.layers[i](x[i])
716 |         out = x
717 | 
718 |         if self.with_fuse:
719 |             out_fuse = []
720 |             for i in range(len(self.fuse_layers)):
721 |                 y = out[0] if i == 0 else self.fuse_layers[i][0](out[0])
722 |                 for j in range(self.num_branches):
723 |                     if i == j:
724 |                         y += out[j]
725 |                     else:
726 |                         y += self.fuse_layers[i][j](out[j])
727 |                 out_fuse.append(self.relu(y))
728 |             out = out_fuse
729 |         elif not self.multiscale_output:
730 |             out = [out[0]]
731 |         return out
732 | 
733 | 
734 | @BACKBONES.register_module()
735 | class XHRNet(nn.Module):
736 |     """Lite-HRNet backbone.
737 |     `High-Resolution Representations for Labeling Pixels and Regions
738 |     <https://arxiv.org/abs/1904.04514>`_
739 |     Args:
740 |         extra (dict): detailed configuration for each stage of HRNet.
741 |         in_channels (int): Number of input image channels. Default: 3.
742 |         conv_cfg (dict): dictionary to construct and config conv layer.
743 |         norm_cfg (dict): dictionary to construct and config norm layer.
744 |         norm_eval (bool): Whether to set norm layers to eval mode, namely,
745 |             freeze running stats (mean and var). Note: Effect on Batch Norm
746 |             and its variants only. Default: False
747 |         with_cp (bool): Use checkpoint or not. Using checkpoint will save some
748 |             memory while slowing down the training speed.
749 |         zero_init_residual (bool): whether to use zero init for last norm layer
750 |             in resblocks to let them behave as identity.
751 |     Example:
752 |         >>> from mmpose.models import HRNet
753 |         >>> import torch
754 |         >>> extra = dict(
755 |         >>>     stage1=dict(
756 |         >>>         num_modules=1,
757 |         >>>         num_branches=1,
758 |         >>>         block='BOTTLENECK',
759 |         >>>         num_blocks=(4, ),
760 |         >>>         num_channels=(64, )),
761 |         >>>     stage2=dict(
762 |         >>>         num_modules=1,
763 |         >>>         num_branches=2,
764 |         >>>         block='BASIC',
765 |         >>>         num_blocks=(4, 4),
766 |         >>>         num_channels=(32, 64)),
767 |         >>>     stage3=dict(
768 |         >>>         num_modules=4,
769 |         >>>         num_branches=3,
770 |         >>>         block='BASIC',
771 |         >>>         num_blocks=(4, 4, 4),
772 |         >>>         num_channels=(32, 64, 128)),
773 |         >>>     stage4=dict(
774 |         >>>         num_modules=3,
775 |         >>>         num_branches=4,
776 |         >>>         block='BASIC',
777 |         >>>         num_blocks=(4, 4, 4, 4),
778 |         >>>         num_channels=(32, 64, 128, 256)))
779 |         >>> self = HRNet(extra, in_channels=1)
780 |         >>> self.eval()
781 |         >>> inputs = torch.rand(1, 1, 32, 32)
782 |         >>> level_outputs = self.forward(inputs)
783 |         >>> for level_out in level_outputs:
784 |         ...     print(tuple(level_out.shape))
785 |         (1, 32, 8, 8)
786 |         (1, 64, 4, 4)
787 |         (1, 128, 2, 2)
788 |         (1, 256, 1, 1)
789 |     """
790 | 
791 |     def __init__(self,
792 |                  extra,
793 |                  in_channels=3,
794 |                  conv_cfg=None,
795 |                  norm_cfg=dict(type='BN'),
796 |                  norm_eval=False,
797 |                  with_cp=False,
798 |                  zero_init_residual=False):
799 |         super().__init__()
800 |         self.extra = extra
801 |         self.conv_cfg = conv_cfg
802 |         self.norm_cfg = norm_cfg
803 |         self.norm_eval = norm_eval
804 |         self.with_cp = with_cp
805 |         self.zero_init_residual = zero_init_residual
806 | 
807 |         self.stem = Stem(
808 |             in_channels,
809 |             stem_channels=self.extra['stem']['stem_channels'],
810 |             out_channels=self.extra['stem']['out_channels'],
811 |             expand_ratio=self.extra['stem']['expand_ratio'],
812 |             conv_cfg=self.conv_cfg,
813 |             norm_cfg=self.norm_cfg)
814 | 
815 |         self.num_stages = self.extra['num_stages']
816 |         self.stages_spec = self.extra['stages_spec']
817 | 
818 |         num_channels_last = [
819 |             self.stem.out_channels,
820 |         ]
821 |         for i in range(self.num_stages):
822 |             num_channels = self.stages_spec['num_channels'][i]
823 |             num_channels = [num_channels[i] for i in range(len(num_channels))]
824 |             setattr(
825 |                 self, 'transition{}'.format(i),
826 |                 self._make_transition_layer(num_channels_last, num_channels))
827 | 
828 |             stage, num_channels_last = self._make_stage(
829 |                 self.stages_spec, i, num_channels, multiscale_output=True)
830 |             setattr(self, 'stage{}'.format(i), stage)
831 | 
832 |         self.with_head = self.extra['with_head']
833 |         if self.with_head:
834 |             self.head_layer = IterativeHead(
835 |                 in_channels=num_channels_last,
836 |                 conv_cfg=self.conv_cfg,
837 |                 norm_cfg=self.norm_cfg,
838 |             )
839 | 
840 |     def _make_transition_layer(self, num_channels_pre_layer,
841 |                                num_channels_cur_layer):
842 |         """Make transition layer."""
843 |         num_branches_cur = len(num_channels_cur_layer)
844 |         num_branches_pre = len(num_channels_pre_layer)
845 | 
846 |         transition_layers = []
847 |         for i in range(num_branches_cur):
848 |             if i < num_branches_pre:
849 |                 if num_channels_cur_layer[i] != num_channels_pre_layer[i]:
850 |                     transition_layers.append(
851 |                         nn.Sequential(
852 |                             build_conv_layer(
853 |                                 self.conv_cfg,
854 |                                 num_channels_pre_layer[i],
855 |                                 num_channels_pre_layer[i],
856 |                                 kernel_size=3,
857 |                                 stride=1,
858 |                                 padding=1,
859 |                                 groups=num_channels_pre_layer[i],
860 |                                 bias=False),
861 |                             build_norm_layer(self.norm_cfg,
862 |                                              num_channels_pre_layer[i])[1],
863 |                             build_conv_layer(
864 |                                 self.conv_cfg,
865 |                                 num_channels_pre_layer[i],
866 |                                 num_channels_cur_layer[i],
867 |                                 kernel_size=1,
868 |                                 stride=1,
869 |                                 padding=0,
870 |                                 bias=False),
871 |                             build_norm_layer(self.norm_cfg,
872 |                                              num_channels_cur_layer[i])[1]))
873 |                 else:
874 |                     transition_layers.append(None)
875 |             else:
876 |                 conv_downsamples = []
877 |                 for j in range(i + 1 - num_branches_pre):
878 |                     in_channels = num_channels_pre_layer[-1]
879 |                     out_channels = num_channels_cur_layer[i] \
880 |                         if j == i - num_branches_pre else in_channels
881 |                     conv_downsamples.append(
882 |                         nn.Sequential(
883 |                             build_conv_layer(
884 |                                 self.conv_cfg,
885 |                                 in_channels,
886 |                                 in_channels,
887 |                                 kernel_size=3,
888 |                                 stride=2,
889 |                                 padding=1,
890 |                                 groups=in_channels,
891 |                                 bias=False),
892 |                             build_norm_layer(self.norm_cfg, in_channels)[1],
893 |                             build_conv_layer(
894 |                                 self.conv_cfg,
895 |                                 in_channels,
896 |                                 out_channels,
897 |                                 kernel_size=1,
898 |                                 stride=1,
899 |                                 padding=0,
900 |                                 bias=False),
901 |                             build_norm_layer(self.norm_cfg, out_channels)[1]))
902 |                 transition_layers.append(nn.Sequential(*conv_downsamples))
903 | 
904 |         return nn.ModuleList(transition_layers)
905 | 
906 |     def _make_stage(self,
907 |                     stages_spec,
908 |                     stage_index,
909 |                     in_channels,
910 |                     multiscale_output=True):
911 |         num_modules = stages_spec['num_modules'][stage_index]
912 |         num_branches = stages_spec['num_branches'][stage_index]
913 |         num_blocks = stages_spec['num_blocks'][stage_index]
914 |         with_fuse = stages_spec['with_fuse'][stage_index]
915 |         module_type = stages_spec['module_type'][stage_index]
916 | 
917 |         modules = []
918 |         for i in range(num_modules):
919 |             # multi_scale_output is only used last module
920 |             if not multiscale_output and i == num_modules - 1:
921 |                 reset_multiscale_output = False
922 |             else:
923 |                 reset_multiscale_output = True
924 | 
925 |             modules.append(
926 |                 LiteHRModule(
927 |                     num_branches,
928 |                     num_blocks,
929 |                     in_channels,
930 |                     module_type,
931 |                     multiscale_output=reset_multiscale_output,
932 |                     with_fuse=with_fuse,
933 |                     conv_cfg=self.conv_cfg,
934 |                     norm_cfg=self.norm_cfg,
935 |                     with_cp=self.with_cp))
936 |             in_channels = modules[-1].in_channels
937 | 
938 |         return nn.Sequential(*modules), in_channels
939 | 
940 |     def init_weights(self, pretrained=None):
941 |         """Initialize the weights in backbone.
942 |         Args:
943 |             pretrained (str, optional): Path to pre-trained weights.
944 |                 Defaults to None.
945 |         """
946 |         if isinstance(pretrained, str):
947 |             logger = get_root_logger()
948 |             load_checkpoint(self, pretrained, strict=False, logger=logger)
949 |         elif pretrained is None:
950 |             for m in self.modules():
951 |                 if isinstance(m, nn.Conv2d):
952 |                     normal_init(m, std=0.001)
953 |                 elif isinstance(m, (_BatchNorm, nn.GroupNorm)):
954 |                     constant_init(m, 1)
955 | 
956 |             if self.zero_init_residual:
957 |                 for m in self.modules():
958 |                     if isinstance(m, Bottleneck):
959 |                         constant_init(m.norm3, 0)
960 |                     elif isinstance(m, BasicBlock):
961 |                         constant_init(m.norm2, 0)
962 |         else:
963 |             raise TypeError('pretrained must be a str or None')
964 | 
965 |     def forward(self, x):
966 |         """Forward function."""
967 |         x = self.stem(x)
968 | 
969 |         y_list = [x]
970 |         for i in range(self.num_stages):
971 |             x_list = []
972 |             transition = getattr(self, 'transition{}'.format(i))
973 |             for j in range(self.stages_spec['num_branches'][i]):
974 |                 if transition[j]:
975 |                     if j >= len(y_list):
976 |                         x_list.append(transition[j](y_list[-1]))
977 |                     else:
978 |                         x_list.append(transition[j](y_list[j]))
979 |                 else:
980 |                     x_list.append(y_list[j])
981 |             y_list = getattr(self, 'stage{}'.format(i))(x_list)
982 | 
983 |         x = y_list
984 |         if self.with_head:
985 |             x = self.head_layer(x)
986 | 
987 |         return [x[0]]
988 | 
989 |     def train(self, mode=True):
990 |         """Convert the model into training mode."""
991 |         super().train(mode)
992 |         if mode and self.norm_eval:
993 |             for m in self.modules():
994 |                 if isinstance(m, _BatchNorm):
995 |                     m.eval()


--------------------------------------------------------------------------------