├── requirements ├── docs.txt ├── build.txt ├── optional.txt ├── tests.txt ├── readthedocs.txt └── runtime.txt ├── tools ├── torchstat │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── analyzer.cpython-37.pyc │ │ ├── analyzer.cpython-38.pyc │ │ ├── reporter.cpython-37.pyc │ │ ├── reporter.cpython-38.pyc │ │ ├── stat_tree.cpython-37.pyc │ │ ├── stat_tree.cpython-38.pyc │ │ ├── compute_madd.cpython-37.pyc │ │ ├── compute_madd.cpython-38.pyc │ │ ├── compute_flops.cpython-37.pyc │ │ ├── compute_flops.cpython-38.pyc │ │ ├── compute_memory.cpython-37.pyc │ │ └── compute_memory.cpython-38.pyc │ ├── README.md │ ├── compute_memory.py │ ├── reporter.py │ ├── compute_madd.py │ ├── stat_tree.py │ ├── analyzer.py │ └── compute_flops.py ├── __pycache__ │ ├── torchstat_utils.cpython-37.pyc │ └── torchstat_utils.cpython-38.pyc ├── dist_train.sh ├── dist_test.sh ├── summary_network.py ├── torchstat_utils.py ├── test.py └── train.py ├── models ├── backbones │ ├── __init__.py │ ├── __pycache__ │ │ ├── xhrnet.cpython-38.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── __init__.cpython-38.pyc │ │ ├── hwhrnet.cpython-37.pyc │ │ ├── hwhrnet.cpython-38.pyc │ │ ├── whthrnet.cpython-37.pyc │ │ ├── whthrnet.cpython-38.pyc │ │ ├── litehrnet.cpython-37.pyc │ │ └── litehrnet.cpython-38.pyc │ └── xhrnet.py ├── __pycache__ │ ├── __init__.cpython-38.pyc │ └── builder.cpython-38.pyc ├── __init__.py └── builder.py ├── requirements.txt ├── configs └── xhrnet │ ├── pxhrnet_30_coco_256x192.py │ ├── pxhrnet_30_coco_384x288.py │ ├── sxhrnet_18_coco_256x192.py │ ├── sxhrnet_18_coco_384x288.py │ ├── sxhrnet_30_coco_256x192.py │ └── sxhrnet_30_coco_384x288.py └── README.md /requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmpose 2 | numpy 3 | torch>=1.3 4 | -------------------------------------------------------------------------------- /tools/torchstat/__init__.py: -------------------------------------------------------------------------------- 1 | from .analyzer import analyze 2 | 3 | __all__ = [ 4 | 'analyze', 5 | ] -------------------------------------------------------------------------------- /models/backbones/__init__.py: -------------------------------------------------------------------------------- 1 | from .xhrnet import XHRNet 2 | 3 | __all__ = [ 4 | 'XHRNet', 5 | ] 6 | -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/builder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/__pycache__/builder.cpython-38.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/runtime.txt 3 | -r requirements/tests.txt 4 | -r requirements/optional.txt 5 | -------------------------------------------------------------------------------- /requirements/optional.txt: -------------------------------------------------------------------------------- 1 | albumentations>=0.3.2 2 | onnx 3 | onnxruntime 4 | poseval@git+https://github.com/svenkreiss/poseval.git 5 | smplx 6 | -------------------------------------------------------------------------------- /models/backbones/__pycache__/xhrnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/xhrnet.cpython-38.pyc -------------------------------------------------------------------------------- /tools/__pycache__/torchstat_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/__pycache__/torchstat_utils.cpython-37.pyc -------------------------------------------------------------------------------- /tools/__pycache__/torchstat_utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/__pycache__/torchstat_utils.cpython-38.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/hwhrnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/hwhrnet.cpython-37.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/hwhrnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/hwhrnet.cpython-38.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/whthrnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/whthrnet.cpython-37.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/whthrnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/whthrnet.cpython-38.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/analyzer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/analyzer.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/analyzer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/analyzer.cpython-38.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/reporter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/reporter.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/reporter.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/reporter.cpython-38.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/stat_tree.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/stat_tree.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/stat_tree.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/stat_tree.cpython-38.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/litehrnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/litehrnet.cpython-37.pyc -------------------------------------------------------------------------------- /models/backbones/__pycache__/litehrnet.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/models/backbones/__pycache__/litehrnet.cpython-38.pyc -------------------------------------------------------------------------------- /requirements/tests.txt: -------------------------------------------------------------------------------- 1 | coverage 2 | flake8 3 | interrogate 4 | isort==4.3.21 5 | pytest 6 | pytest-runner 7 | smplx 8 | xdoctest >= 0.10.0 9 | yapf 10 | -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/compute_madd.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_madd.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/compute_madd.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_madd.cpython-38.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/compute_flops.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_flops.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/compute_flops.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_flops.cpython-38.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/compute_memory.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_memory.cpython-37.pyc -------------------------------------------------------------------------------- /tools/torchstat/__pycache__/compute_memory.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cool-xuan/x-hrnet/HEAD/tools/torchstat/__pycache__/compute_memory.cpython-38.pyc -------------------------------------------------------------------------------- /requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv-full 2 | munkres 3 | poseval@git+https://github.com/svenkreiss/poseval.git 4 | scipy 5 | titlecase 6 | torch 7 | torchvision 8 | xtcocotools>=1.6 9 | -------------------------------------------------------------------------------- /requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | chumpy 2 | dataclasses; python_version == '3.9' 3 | json_tricks 4 | matplotlib 5 | munkres 6 | numpy 7 | opencv-python 8 | pillow 9 | scipy 10 | torchvision 11 | xtcocotools>=1.6 12 | future 13 | tensorboard 14 | mmpose 15 | -------------------------------------------------------------------------------- /tools/torchstat/README.md: -------------------------------------------------------------------------------- 1 | # Credits 2 | 3 | Code in this folder is almost as-is from torchstat repository located at https://github.com/Swall0w/torchstat. 4 | 5 | Additional merges are from: 6 | - https://github.com/kenshohara/torchstat 7 | - https://github.com/lyakaap/torchstat -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbones import * # noqa 2 | from .builder import (build_backbone, build_head, build_loss, build_neck, 3 | build_posenet) 4 | 5 | __all__ = [ 6 | 'build_backbone', 'build_head', 7 | 'build_loss', 'build_posenet', 'build_neck' 8 | ] 9 | -------------------------------------------------------------------------------- /tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29600} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /models/builder.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import build_from_cfg 2 | from torch import nn 3 | 4 | from mmpose.models.builder import BACKBONES, HEADS, LOSSES, NECKS, POSENETS 5 | 6 | 7 | def build(cfg, registry, default_args=None): 8 | """Build a module. 9 | 10 | Args: 11 | cfg (dict, list[dict]): The config of modules, it is either a dict 12 | or a list of configs. 13 | registry (:obj:`Registry`): A registry the module belongs to. 14 | default_args (dict, optional): Default arguments to build the module. 15 | Defaults to None. 16 | 17 | Returns: 18 | nn.Module: A built nn module. 19 | """ 20 | 21 | if isinstance(cfg, list): 22 | modules = [ 23 | build_from_cfg(cfg_, registry, default_args) for cfg_ in cfg 24 | ] 25 | return nn.Sequential(*modules) 26 | 27 | return build_from_cfg(cfg, registry, default_args) 28 | 29 | 30 | def build_backbone(cfg): 31 | """Build backbone.""" 32 | return build(cfg, BACKBONES) 33 | 34 | 35 | def build_neck(cfg): 36 | """Build neck.""" 37 | return build(cfg, NECKS) 38 | 39 | 40 | def build_head(cfg): 41 | """Build head.""" 42 | return build(cfg, HEADS) 43 | 44 | 45 | def build_loss(cfg): 46 | """Build loss.""" 47 | return build(cfg, LOSSES) 48 | 49 | 50 | def build_posenet(cfg): 51 | """Build posenet.""" 52 | return build(cfg, POSENETS) 53 | -------------------------------------------------------------------------------- /tools/summary_network.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | # import tensorwatch as tw 3 | 4 | from mmcv import Config 5 | from mmcv.cnn import get_model_complexity_info 6 | from torchstat_utils import model_stats 7 | 8 | import sys 9 | sys.path.append('.') 10 | from models import build_posenet 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description='Train a segmentor') 15 | parser.add_argument('config', help='train config file path') 16 | parser.add_argument( 17 | '--shape', 18 | type=int, 19 | nargs='+', 20 | default=[256, 192], 21 | help='input image size') 22 | parser.add_argument( 23 | '--method', 24 | type=str, 25 | choices=['torchstat', 'mmcv'], 26 | default='torchstat', 27 | ) 28 | parser.add_argument('--out-file', type=str, 29 | help='Output file name') 30 | args = parser.parse_args() 31 | return args 32 | 33 | 34 | def main(): 35 | 36 | args = parse_args() 37 | 38 | if len(args.shape) == 1: 39 | input_shape = (1, 3, args.shape[0], args.shape[0]) 40 | elif len(args.shape) == 2: 41 | input_shape = (1, 3, ) + tuple(args.shape) 42 | else: 43 | raise ValueError('invalid input shape') 44 | 45 | cfg = Config.fromfile(args.config) 46 | model = build_posenet(cfg.model) 47 | model.eval() 48 | 49 | if hasattr(model, 'forward_dummy'): 50 | model.forward = model.forward_dummy 51 | else: 52 | raise NotImplementedError( 53 | 'FLOPs counter is currently not currently supported with {}'. 54 | format(model.__class__.__name__)) 55 | 56 | if args.method == 'torchstat': 57 | df = model_stats(model, input_shape) 58 | print(df) 59 | if args.out_file: 60 | df.to_html(args.out_file + '.html') 61 | df.to_csv(args.out_file + '.csv') 62 | elif args.method == 'mmcv': 63 | flops, params = get_model_complexity_info(model, input_shape[1:], False) 64 | split_line = '=' * 30 65 | print('{0}\nInput shape: {1}\nFlops: {2}\nParams: {3}\n{0}'.format( 66 | split_line, input_shape, flops, params)) 67 | 68 | print('!!!Please be cautious if you use the results in papers. ' 69 | 'You may need to check if all ops are supported and verify that the ' 70 | 'flops computation is correct.') 71 | 72 | 73 | if __name__ == '__main__': 74 | main() 75 | -------------------------------------------------------------------------------- /tools/torchstat/compute_memory.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | 5 | 6 | def compute_memory(module, inp, out): 7 | if isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)): 8 | return compute_ReLU_memory(module, inp[0], out[0]) 9 | elif isinstance(module, nn.PReLU): 10 | return compute_PReLU_memory(module, inp[0], out[0]) 11 | elif isinstance(module, nn.Conv2d): 12 | return compute_Conv2d_memory(module, inp[0], out[0]) 13 | elif isinstance(module, nn.BatchNorm2d): 14 | return compute_BatchNorm2d_memory(module, inp[0], out[0]) 15 | elif isinstance(module, nn.Linear): 16 | return compute_Linear_memory(module, inp[0], out[0]) 17 | elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)): 18 | return compute_Pool2d_memory(module, inp[0], out[0]) 19 | else: 20 | #print(f"[Memory]: {type(module).__name__} is not supported!") 21 | return 0, 0 22 | pass 23 | 24 | 25 | def num_params(module): 26 | return sum(p.numel() for p in module.parameters() if p.requires_grad) 27 | 28 | 29 | def compute_ReLU_memory(module, inp, out): 30 | assert isinstance(module, (nn.ReLU, nn.ReLU6, nn.ELU, nn.LeakyReLU)) 31 | 32 | mread = inp.numel() 33 | mwrite = out.numel() 34 | 35 | return mread * inp.element_size(), mwrite * out.element_size() 36 | 37 | 38 | def compute_PReLU_memory(module, inp, out): 39 | assert isinstance(module, nn.PReLU) 40 | 41 | batch_size = inp.size()[0] 42 | mread = batch_size * (inp[0].numel() + num_params(module)) 43 | mwrite = out.numel() 44 | 45 | return mread * inp.element_size(), mwrite * out.element_size() 46 | 47 | 48 | def compute_Conv2d_memory(module, inp, out): 49 | # Can have multiple inputs, getting the first one 50 | assert isinstance(module, nn.Conv2d) 51 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 52 | 53 | batch_size = inp.size()[0] 54 | 55 | # This includes weights with bias if the module contains it. 56 | mread = batch_size * (inp[0].numel() + num_params(module)) 57 | mwrite = out.numel() 58 | 59 | return mread * inp.element_size(), mwrite * out.element_size() 60 | 61 | 62 | def compute_BatchNorm2d_memory(module, inp, out): 63 | assert isinstance(module, nn.BatchNorm2d) 64 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 65 | 66 | batch_size, in_c, in_h, in_w = inp.size() 67 | mread = batch_size * (inp[0].numel() + 2 * in_c) 68 | mwrite = out.numel() 69 | 70 | return mread * inp.element_size(), mwrite * out.element_size() 71 | 72 | 73 | def compute_Linear_memory(module, inp, out): 74 | assert isinstance(module, nn.Linear) 75 | assert len(inp.size()) == 2 and len(out.size()) == 2 76 | 77 | batch_size = inp.size()[0] 78 | 79 | # This includes weights with bias if the module contains it. 80 | mread = batch_size * (inp[0].numel() + num_params(module)) 81 | mwrite = out.numel() 82 | 83 | return mread * inp.element_size(), mwrite * out.element_size() 84 | 85 | 86 | def compute_Pool2d_memory(module, inp, out): 87 | assert isinstance(module, (nn.MaxPool2d, nn.AvgPool2d)) 88 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 89 | 90 | mread = inp.numel() 91 | mwrite = out.numel() 92 | 93 | return mread * inp.element_size(), mwrite * out.element_size() 94 | -------------------------------------------------------------------------------- /tools/torchstat/reporter.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | pd.set_option('display.width', 1000) 4 | pd.set_option('display.max_rows', 10000) 5 | pd.set_option('display.max_columns', 10000) 6 | 7 | 8 | def round_value(value, binary=False): 9 | divisor = 1024. if binary else 1000. 10 | 11 | if value // divisor**4 > 0: 12 | return str(round(value / divisor**4, 2)) + 'T' 13 | elif value // divisor**3 > 0: 14 | return str(round(value / divisor**3, 2)) + 'G' 15 | elif value // divisor**2 > 0: 16 | return str(round(value / divisor**2, 2)) + 'M' 17 | elif value // divisor > 0: 18 | return str(round(value / divisor, 2)) + 'K' 19 | return str(value) 20 | 21 | 22 | def report_format(collected_nodes): 23 | data = list() 24 | for node in collected_nodes: 25 | name = node.name 26 | input_shape = ' '.join( 27 | ['{:>3d}'] * 28 | len(node.input_shape)).format(*[e for e in node.input_shape]) 29 | output_shape = ' '.join( 30 | ['{:>3d}'] * 31 | len(node.output_shape)).format(*[e for e in node.output_shape]) 32 | parameter_quantity = node.parameter_quantity 33 | inference_memory = node.inference_memory 34 | MAdd = node.MAdd 35 | Flops = node.Flops 36 | mread, mwrite = [i for i in node.Memory] 37 | duration = node.duration 38 | data.append([ 39 | name, input_shape, output_shape, parameter_quantity, 40 | inference_memory, MAdd, duration, Flops, mread, mwrite 41 | ]) 42 | df = pd.DataFrame(data) 43 | df.columns = [ 44 | 'module name', 'input shape', 'output shape', 'params', 'memory(MB)', 45 | 'MAdd', 'duration', 'Flops', 'MemRead(B)', 'MemWrite(B)' 46 | ] 47 | df['duration[%]'] = df['duration'] / (df['duration'].sum() + 1e-7) 48 | df['MemR+W(B)'] = df['MemRead(B)'] + df['MemWrite(B)'] 49 | total_parameters_quantity = df['params'].sum() 50 | total_memory = df['memory(MB)'].sum() 51 | total_operation_quantity = df['MAdd'].sum() 52 | total_flops = df['Flops'].sum() 53 | total_duration = df['duration[%]'].sum() 54 | total_mread = df['MemRead(B)'].sum() 55 | total_mwrite = df['MemWrite(B)'].sum() 56 | total_memrw = df['MemR+W(B)'].sum() 57 | del df['duration'] 58 | 59 | # Add Total row 60 | total_df = pd.Series([ 61 | total_parameters_quantity, total_memory, total_operation_quantity, 62 | total_flops, total_duration, mread, mwrite, total_memrw 63 | ], 64 | index=[ 65 | 'params', 'memory(MB)', 'MAdd', 'Flops', 66 | 'duration[%]', 'MemRead(B)', 'MemWrite(B)', 67 | 'MemR+W(B)' 68 | ], 69 | name='total') 70 | df = df.append(total_df) 71 | 72 | df = df.fillna(' ') 73 | df['memory(MB)'] = df['memory(MB)'].apply(lambda x: '{:.2f}'.format(x)) 74 | df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x)) 75 | df['MAdd'] = df['MAdd'].apply(lambda x: '{:,}'.format(x)) 76 | df['Flops'] = df['Flops'].apply(lambda x: '{:,}'.format(x)) 77 | 78 | summary = str(df) + '\n' 79 | summary += "=" * len(str(df).split('\n')[0]) 80 | summary += '\n' 81 | summary += "Total params: {:,}\n".format(total_parameters_quantity) 82 | 83 | summary += "-" * len(str(df).split('\n')[0]) 84 | summary += '\n' 85 | summary += "Total memory: {:.2f}MB\n".format(total_memory) 86 | summary += "Total MAdd: {}MAdd\n".format( 87 | round_value(total_operation_quantity)) 88 | summary += "Total Flops: {}Flops\n".format(round_value(total_flops)) 89 | summary += "Total MemR+W: {}B\n".format(round_value(total_memrw, True)) 90 | return summary 91 | -------------------------------------------------------------------------------- /tools/torchstat_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from torchstat import analyze 5 | import pandas as pd 6 | import copy 7 | 8 | 9 | class LayerStats: 10 | 11 | def __init__(self, node) -> None: 12 | self.name = node.name 13 | self.input_shape = node.input_shape 14 | self.output_shape = node.output_shape 15 | self.parameters = node.parameter_quantity 16 | self.inference_memory = node.inference_memory 17 | self.MAdd = node.MAdd 18 | self.Flops = node.Flops 19 | self.mread, self.mwrite = node.Memory[0], node.Memory[1] 20 | self.duration = node.duration 21 | 22 | 23 | class ModelStats(LayerStats): 24 | 25 | def __init__(self, model, input_shape, clone_model=False) -> None: 26 | if clone_model: 27 | model = copy.deepcopy(model) 28 | collected_nodes = analyze(model, input_shape, 1) 29 | self.layer_stats = [] 30 | for node in collected_nodes: 31 | self.layer_stats.append(LayerStats(node)) 32 | 33 | self.name = 'Model' 34 | self.input_shape = input_shape 35 | self.output_shape = self.layer_stats[-1].output_shape 36 | self.parameters = sum((l.parameters for l in self.layer_stats)) 37 | self.inference_memory = sum( 38 | (l.inference_memory for l in self.layer_stats)) 39 | self.MAdd = sum((l.MAdd for l in self.layer_stats)) 40 | self.Flops = sum((l.Flops for l in self.layer_stats)) 41 | self.mread = sum((l.mread for l in self.layer_stats)) 42 | self.mwrite = sum((l.mwrite for l in self.layer_stats)) 43 | self.duration = sum((l.duration for l in self.layer_stats)) 44 | 45 | 46 | def model_stats(model, input_shape): 47 | ms = ModelStats(model, input_shape) 48 | return model_stats2df(ms) 49 | 50 | 51 | def _round_value(value, binary=False): 52 | divisor = 1024. if binary else 1000. 53 | 54 | if value // divisor**4 > 0: 55 | return str(round(value / divisor**4, 2)) + 'T' 56 | elif value // divisor**3 > 0: 57 | return str(round(value / divisor**3, 2)) + 'G' 58 | elif value // divisor**2 > 0: 59 | return str(round(value / divisor**2, 2)) + 'M' 60 | elif value // divisor > 0: 61 | return str(round(value / divisor, 2)) + 'K' 62 | return str(value) 63 | 64 | 65 | def model_stats2df(model_stats: ModelStats): 66 | pd.set_option('display.width', 1000) 67 | pd.set_option('display.max_rows', 10000) 68 | pd.set_option('display.max_columns', 10000) 69 | 70 | df = pd.DataFrame([l.__dict__ for l in model_stats.layer_stats]) 71 | total_df = pd.Series(model_stats.__dict__, name='Total') 72 | df = df.append(total_df[df.columns], ignore_index=True) 73 | 74 | df = df.fillna(' ') 75 | # df['memory(MB)'] = df['memory(MB)'].apply( 76 | # lambda x: '{:.2f}'.format(x)) 77 | # df['duration[%]'] = df['duration[%]'].apply(lambda x: '{:.2%}'.format(x)) 78 | for c in [ 79 | 'MAdd', 'Flops', 'parameters', 'inference_memory', 'mread', 80 | 'mwrite' 81 | ]: 82 | if c == 'Flops': 83 | df[c] = df[c].apply(lambda x: _round_value(x, True)) 84 | elif c == 'parameters': 85 | df[c] = df[c].apply(lambda x: _round_value(x)) 86 | else: 87 | df[c] = df[c].apply(lambda x: '{:,}'.format(x)) 88 | 89 | df.rename( 90 | columns={ 91 | 'name': 'module name', 92 | 'input_shape': 'input shape', 93 | 'input_shape': 'input shape', 94 | 'inference_memory': 'infer memory(MB)', 95 | 'mread': 'MemRead(B)', 96 | 'mwrite': 'MemWrite(B)' 97 | }, 98 | inplace=True) 99 | 100 | #summary = "Total params: {:,}\n".format(total_parameters_quantity) 101 | 102 | #summary += "-" * len(str(df).split('\n')[0]) 103 | #summary += '\n' 104 | #summary += "Total memory: {:.2f}MB\n".format(total_memory) 105 | #summary += "Total MAdd: {}MAdd\n".format(_round_value(total_operation_quantity)) 106 | #summary += "Total Flops: {}Flops\n".format(_round_value(total_flops)) 107 | #summary += "Total MemR+W: {}B\n".format(_round_value(total_memrw, True)) 108 | return df -------------------------------------------------------------------------------- /tools/test.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import os.path as osp 4 | 5 | import mmcv 6 | import torch 7 | from mmcv import Config, DictAction 8 | from mmcv.cnn import fuse_conv_bn 9 | from mmcv.parallel import MMDataParallel, MMDistributedDataParallel 10 | from mmcv.runner import get_dist_info, init_dist, load_checkpoint 11 | 12 | from mmpose.apis import multi_gpu_test, single_gpu_test 13 | from mmpose.core import wrap_fp16_model 14 | from mmpose.datasets import build_dataloader, build_dataset 15 | from models import build_posenet 16 | 17 | 18 | def parse_args(): 19 | parser = argparse.ArgumentParser(description='mmpose test model') 20 | parser.add_argument('config', help='test config file path') 21 | parser.add_argument('checkpoint', help='checkpoint file') 22 | parser.add_argument('--out', help='output result file') 23 | parser.add_argument( 24 | '--fuse-conv-bn', 25 | action='store_true', 26 | help='Whether to fuse conv and bn, this will slightly increase' 27 | 'the inference speed') 28 | parser.add_argument( 29 | '--eval', 30 | default=None, 31 | nargs='+', 32 | help='evaluation metric, which depends on the dataset,' 33 | ' e.g., "mAP" for MSCOCO') 34 | parser.add_argument( 35 | '--gpu_collect', 36 | action='store_true', 37 | help='whether to use gpu to collect results') 38 | parser.add_argument('--tmpdir', help='tmp dir for writing some results') 39 | parser.add_argument( 40 | '--cfg-options', 41 | nargs='+', 42 | action=DictAction, 43 | default={}, 44 | help='override some settings in the used config, the key-value pair ' 45 | 'in xxx=yyy format will be merged into config file. For example, ' 46 | "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'") 47 | parser.add_argument( 48 | '--launcher', 49 | choices=['none', 'pytorch', 'slurm', 'mpi'], 50 | default='none', 51 | help='job launcher') 52 | parser.add_argument('--local_rank', type=int, default=0) 53 | args = parser.parse_args() 54 | if 'LOCAL_RANK' not in os.environ: 55 | os.environ['LOCAL_RANK'] = str(args.local_rank) 56 | return args 57 | 58 | 59 | def merge_configs(cfg1, cfg2): 60 | # Merge cfg2 into cfg1 61 | # Overwrite cfg1 if repeated, ignore if value is None. 62 | cfg1 = {} if cfg1 is None else cfg1.copy() 63 | cfg2 = {} if cfg2 is None else cfg2 64 | for k, v in cfg2.items(): 65 | if v: 66 | cfg1[k] = v 67 | return cfg1 68 | 69 | 70 | def main(): 71 | args = parse_args() 72 | 73 | cfg = Config.fromfile(args.config) 74 | 75 | if args.cfg_options is not None: 76 | cfg.merge_from_dict(args.cfg_options) 77 | 78 | # set cudnn_benchmark 79 | if cfg.get('cudnn_benchmark', False): 80 | torch.backends.cudnn.benchmark = True 81 | cfg.model.pretrained = None 82 | cfg.data.test.test_mode = True 83 | 84 | # args.work_dir = osp.join('./work_dirs', 85 | # osp.splitext(osp.basename(args.config))[0]) 86 | args.work_dir = cfg.work_dir 87 | mmcv.mkdir_or_exist(osp.abspath(args.work_dir)) 88 | 89 | # init distributed env first, since logger depends on the dist info. 90 | if args.launcher == 'none': 91 | distributed = False 92 | else: 93 | distributed = True 94 | init_dist(args.launcher, **cfg.dist_params) 95 | 96 | # build the dataloader 97 | dataset = build_dataset(cfg.data.test, dict(test_mode=True)) 98 | dataloader_setting = dict( 99 | samples_per_gpu=1, 100 | workers_per_gpu=cfg.data.get('workers_per_gpu', 1), 101 | dist=distributed, 102 | shuffle=False, 103 | drop_last=False) 104 | dataloader_setting = dict(dataloader_setting, 105 | **cfg.data.get('test_dataloader', {})) 106 | data_loader = build_dataloader(dataset, **dataloader_setting) 107 | 108 | # build the model and load checkpoint 109 | model = build_posenet(cfg.model) 110 | fp16_cfg = cfg.get('fp16', None) 111 | if fp16_cfg is not None: 112 | wrap_fp16_model(model) 113 | load_checkpoint(model, args.checkpoint, map_location='cpu') 114 | 115 | if args.fuse_conv_bn: 116 | model = fuse_conv_bn(model) 117 | 118 | if not distributed: 119 | model = MMDataParallel(model, device_ids=[0]) 120 | outputs = single_gpu_test(model, data_loader) 121 | else: 122 | model = MMDistributedDataParallel( 123 | model.cuda(), 124 | device_ids=[torch.cuda.current_device()], 125 | broadcast_buffers=False) 126 | outputs = multi_gpu_test(model, data_loader, args.tmpdir, 127 | args.gpu_collect) 128 | 129 | rank, _ = get_dist_info() 130 | eval_config = cfg.get('evaluation', {}) 131 | eval_config = merge_configs(eval_config, dict(metric=args.eval)) 132 | 133 | if rank == 0: 134 | if args.out: 135 | print(f'\nwriting results to {args.out}') 136 | mmcv.dump(outputs, args.out) 137 | 138 | print(dataset.evaluate(outputs, args.work_dir, **eval_config)) 139 | 140 | 141 | if __name__ == '__main__': 142 | main() 143 | -------------------------------------------------------------------------------- /tools/torchstat/compute_madd.py: -------------------------------------------------------------------------------- 1 | """ 2 | compute Multiply-Adds(MAdd) of each leaf module 3 | """ 4 | 5 | import torch.nn as nn 6 | 7 | 8 | def compute_Conv2d_madd(module, inp, out): 9 | assert isinstance(module, nn.Conv2d) 10 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 11 | 12 | in_c = inp.size()[1] 13 | k_h, k_w = module.kernel_size 14 | out_c, out_h, out_w = out.size()[1:] 15 | groups = module.groups 16 | 17 | # ops per output element 18 | kernel_mul = k_h * k_w * (in_c // groups) 19 | kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1) 20 | 21 | kernel_mul_group = kernel_mul * out_h * out_w * (out_c // groups) 22 | kernel_add_group = kernel_add * out_h * out_w * (out_c // groups) 23 | 24 | total_mul = kernel_mul_group * groups 25 | total_add = kernel_add_group * groups 26 | 27 | return total_mul + total_add 28 | 29 | 30 | def compute_ConvTranspose2d_madd(module, inp, out): 31 | assert isinstance(module, nn.ConvTranspose2d) 32 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 33 | 34 | in_c, in_h, in_w = inp.size()[1:] 35 | k_h, k_w = module.kernel_size 36 | out_c, out_h, out_w = out.size()[1:] 37 | groups = module.groups 38 | 39 | kernel_mul = k_h * k_w * (in_c // groups) 40 | kernel_add = kernel_mul - 1 + (0 if module.bias is None else 1) 41 | 42 | kernel_mul_group = kernel_mul * in_h * in_w * (out_c // groups) 43 | kernel_add_group = kernel_add * in_h * in_w * (out_c // groups) 44 | 45 | total_mul = kernel_mul_group * groups 46 | total_add = kernel_add_group * groups 47 | 48 | return total_mul + total_add 49 | 50 | 51 | def compute_BatchNorm2d_madd(module, inp, out): 52 | assert isinstance(module, nn.BatchNorm2d) 53 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 54 | 55 | in_c, in_h, in_w = inp.size()[1:] 56 | 57 | # 1. sub mean 58 | # 2. div standard deviation 59 | # 3. mul alpha 60 | # 4. add beta 61 | return 4 * in_c * in_h * in_w 62 | 63 | 64 | def compute_MaxPool2d_madd(module, inp, out): 65 | assert isinstance(module, nn.MaxPool2d) 66 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 67 | 68 | if isinstance(module.kernel_size, (tuple, list)): 69 | k_h, k_w = module.kernel_size 70 | else: 71 | k_h, k_w = module.kernel_size, module.kernel_size 72 | out_c, out_h, out_w = out.size()[1:] 73 | 74 | return (k_h * k_w - 1) * out_h * out_w * out_c 75 | 76 | 77 | def compute_AvgPool2d_madd(module, inp, out): 78 | assert isinstance(module, nn.AvgPool2d) 79 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 80 | 81 | if isinstance(module.kernel_size, (tuple, list)): 82 | k_h, k_w = module.kernel_size 83 | else: 84 | k_h, k_w = module.kernel_size, module.kernel_size 85 | out_c, out_h, out_w = out.size()[1:] 86 | 87 | kernel_add = k_h * k_w - 1 88 | kernel_avg = 1 89 | 90 | return (kernel_add + kernel_avg) * (out_h * out_w) * out_c 91 | 92 | 93 | def compute_ReLU_madd(module, inp, out): 94 | assert isinstance(module, (nn.ReLU, nn.ReLU6)) 95 | 96 | count = 1 97 | for i in inp.size()[1:]: 98 | count *= i 99 | return count 100 | 101 | 102 | def compute_Softmax_madd(module, inp, out): 103 | assert isinstance(module, nn.Softmax) 104 | assert len(inp.size()) > 1 105 | 106 | count = 1 107 | for s in inp.size()[1:]: 108 | count *= s 109 | exp = count 110 | add = count - 1 111 | div = count 112 | return exp + add + div 113 | 114 | 115 | def compute_Linear_madd(module, inp, out): 116 | assert isinstance(module, nn.Linear) 117 | assert len(inp.size()) == 2 and len(out.size()) == 2 118 | 119 | num_in_features = inp.size()[1] 120 | num_out_features = out.size()[1] 121 | 122 | mul = num_in_features 123 | add = num_in_features - 1 124 | return num_out_features * (mul + add) 125 | 126 | 127 | def compute_Bilinear_madd(module, inp1, inp2, out): 128 | assert isinstance(module, nn.Bilinear) 129 | assert len(inp1.size()) == 2 and len(inp2.size()) == 2 and len( 130 | out.size()) == 2 131 | 132 | num_in_features_1 = inp1.size()[1] 133 | num_in_features_2 = inp2.size()[1] 134 | num_out_features = out.size()[1] 135 | 136 | mul = num_in_features_1 * num_in_features_2 + num_in_features_2 137 | add = num_in_features_1 * num_in_features_2 + num_in_features_2 - 1 138 | return num_out_features * (mul + add) 139 | 140 | 141 | def compute_madd(module, inp, out): 142 | if isinstance(module, nn.Conv2d): 143 | return compute_Conv2d_madd(module, inp[0], out[0]) 144 | elif isinstance(module, nn.ConvTranspose2d): 145 | return compute_ConvTranspose2d_madd(module, inp[0], out[0]) 146 | elif isinstance(module, nn.BatchNorm2d): 147 | return compute_BatchNorm2d_madd(module, inp[0], out[0]) 148 | elif isinstance(module, nn.MaxPool2d): 149 | return compute_MaxPool2d_madd(module, inp[0], out[0]) 150 | elif isinstance(module, nn.AvgPool2d): 151 | return compute_AvgPool2d_madd(module, inp[0], out[0]) 152 | elif isinstance(module, (nn.ReLU, nn.ReLU6)): 153 | return compute_ReLU_madd(module, inp[0], out[0]) 154 | elif isinstance(module, nn.Softmax): 155 | return compute_Softmax_madd(module, inp[0], out[0]) 156 | elif isinstance(module, nn.Linear): 157 | return compute_Linear_madd(module, inp[0], out[0]) 158 | elif isinstance(module, nn.Bilinear): 159 | return compute_Bilinear_madd(module, inp[0], inp[1], out) 160 | else: 161 | #print(f"[MAdd]: {type(module).__name__} is not supported!") 162 | return 0 163 | -------------------------------------------------------------------------------- /configs/xhrnet/pxhrnet_30_coco_256x192.py: -------------------------------------------------------------------------------- 1 | log_level = 'INFO' 2 | load_from = None 3 | resume_from = None 4 | dist_params = dict(backend='nccl') 5 | workflow = [('train', 1)] 6 | checkpoint_config = dict(interval=10) 7 | evaluation = dict(interval=10, metric='mAP') 8 | work_dir = "work_dirs/xhrnt/pxhrnet_30_coco_256x192" 9 | 10 | optimizer = dict( 11 | type='Adam', 12 | lr=2e-3, 13 | ) 14 | optimizer_config = dict(grad_clip=None) 15 | # learning policy 16 | lr_config = dict( 17 | policy='step', 18 | # warmup=None, 19 | warmup='linear', 20 | warmup_iters=500, 21 | warmup_ratio=0.001, 22 | step=[170, 200]) 23 | total_epochs = 210 24 | log_config = dict( 25 | interval=50, hooks=[ 26 | dict(type='TextLoggerHook'), 27 | dict(type='TensorboardLoggerHook') 28 | ]) 29 | 30 | channel_cfg = dict( 31 | num_output_channels=17, 32 | dataset_joints=17, 33 | dataset_channel=[ 34 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 35 | ], 36 | inference_channel=[ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 38 | ]) 39 | 40 | # model settings 41 | model = dict( 42 | type='TopDown', 43 | pretrained=None, 44 | backbone=dict( 45 | type='XHRNet', 46 | in_channels=3, 47 | extra=dict( 48 | stem=dict( 49 | stem_channels=32, 50 | out_channels=32, 51 | expand_ratio=1, 52 | down_ratio=4), 53 | num_stages=3, 54 | stages_spec=dict( 55 | num_modules=(3, 8, 3), 56 | num_branches=(2, 3, 4), 57 | num_blocks=(2, 2, 2), 58 | module_type=('parallel', 'parallel', 'parallel'), 59 | with_fuse=(True, True, True), 60 | num_channels=( 61 | (40, 80), 62 | (40, 80, 160), 63 | (40, 80, 160, 320), 64 | )), 65 | with_head=True, 66 | )), 67 | keypoint_head=dict( 68 | type='TopdownHeatmapSimpleHead', 69 | in_channels=40, 70 | out_channels=channel_cfg['num_output_channels'], 71 | num_deconv_layers=0, 72 | extra=dict(final_conv_kernel=1, ), 73 | ), 74 | train_cfg=dict(), 75 | test_cfg=dict( 76 | flip_test=True, 77 | post_process=True, 78 | shift_heatmap=True, 79 | unbiased_decoding=False, 80 | modulate_kernel=11), 81 | loss_pose=dict(type='JointsMSELoss', use_target_weight=True)) 82 | 83 | data_cfg = dict( 84 | image_size=[192, 256], 85 | heatmap_size=[48, 64], 86 | num_output_channels=channel_cfg['num_output_channels'], 87 | num_joints=channel_cfg['dataset_joints'], 88 | dataset_channel=channel_cfg['dataset_channel'], 89 | inference_channel=channel_cfg['inference_channel'], 90 | soft_nms=False, 91 | nms_thr=1.0, 92 | oks_thr=0.9, 93 | vis_thr=0.2, 94 | bbox_thr=1.0, 95 | use_gt_bbox=True, 96 | image_thr=0.0, 97 | bbox_file='data/coco/person_detection_results/' 98 | 'COCO_val2017_detections_AP_H_56_person.json', 99 | ) 100 | 101 | val_data_cfg = dict( 102 | image_size=[192, 256], 103 | heatmap_size=[48, 64], 104 | num_output_channels=channel_cfg['num_output_channels'], 105 | num_joints=channel_cfg['dataset_joints'], 106 | dataset_channel=channel_cfg['dataset_channel'], 107 | inference_channel=channel_cfg['inference_channel'], 108 | soft_nms=False, 109 | nms_thr=1.0, 110 | oks_thr=0.9, 111 | vis_thr=0.2, 112 | bbox_thr=1.0, 113 | use_gt_bbox=True, 114 | image_thr=0.0, 115 | bbox_file='data/coco/person_detection_results/' 116 | 'COCO_val2017_detections_AP_H_56_person.json', 117 | ) 118 | 119 | train_pipeline = [ 120 | dict(type='LoadImageFromFile'), 121 | dict(type='TopDownRandomFlip', flip_prob=0.5), 122 | dict( 123 | type='TopDownHalfBodyTransform', 124 | num_joints_half_body=8, 125 | prob_half_body=0.3), 126 | dict( 127 | type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25), 128 | dict(type='TopDownAffine'), 129 | dict(type='ToTensor'), 130 | dict( 131 | type='NormalizeTensor', 132 | mean=[0.485, 0.456, 0.406], 133 | std=[0.229, 0.224, 0.225]), 134 | dict(type='TopDownGenerateTarget', sigma=2), 135 | dict( 136 | type='Collect', 137 | keys=['img', 'target', 'target_weight'], 138 | meta_keys=[ 139 | 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 140 | 'rotation', 'bbox_score', 'flip_pairs' 141 | ]), 142 | ] 143 | 144 | val_pipeline = [ 145 | dict(type='LoadImageFromFile'), 146 | dict(type='TopDownAffine'), 147 | dict(type='ToTensor'), 148 | dict( 149 | type='NormalizeTensor', 150 | mean=[0.485, 0.456, 0.406], 151 | std=[0.229, 0.224, 0.225]), 152 | dict( 153 | type='Collect', 154 | keys=[ 155 | 'img', 156 | ], 157 | meta_keys=[ 158 | 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 159 | 'flip_pairs' 160 | ]), 161 | ] 162 | test_pipeline = val_pipeline 163 | data_root = 'data/coco' 164 | data = dict( 165 | samples_per_gpu=64, 166 | workers_per_gpu=4, 167 | train=dict( 168 | type='TopDownCocoDataset', 169 | ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', 170 | img_prefix=f'{data_root}/train2017/', 171 | data_cfg=data_cfg, 172 | pipeline=train_pipeline), 173 | val=dict( 174 | type='TopDownCocoDataset', 175 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 176 | img_prefix=f'{data_root}/val2017/', 177 | data_cfg=val_data_cfg, 178 | pipeline=val_pipeline), 179 | test=dict( 180 | type='TopDownCocoDataset', 181 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 182 | img_prefix=f'{data_root}/val2017/', 183 | data_cfg=data_cfg, 184 | pipeline=val_pipeline), 185 | ) 186 | 187 | find_unused_parameters=False -------------------------------------------------------------------------------- /configs/xhrnet/pxhrnet_30_coco_384x288.py: -------------------------------------------------------------------------------- 1 | log_level = 'INFO' 2 | load_from = None 3 | resume_from = None 4 | dist_params = dict(backend='nccl') 5 | workflow = [('train', 1)] 6 | checkpoint_config = dict(interval=10) 7 | evaluation = dict(interval=10, metric='mAP') 8 | work_dir = "work_dirs/xhrnt/pxhrnet_30_coco_384x288" 9 | 10 | optimizer = dict( 11 | type='Adam', 12 | lr=2e-3, 13 | ) 14 | optimizer_config = dict(grad_clip=None) 15 | # learning policy 16 | lr_config = dict( 17 | policy='step', 18 | # warmup=None, 19 | warmup='linear', 20 | warmup_iters=500, 21 | warmup_ratio=0.001, 22 | step=[170, 200]) 23 | total_epochs = 210 24 | log_config = dict( 25 | interval=50, hooks=[ 26 | dict(type='TextLoggerHook'), 27 | dict(type='TensorboardLoggerHook') 28 | ]) 29 | 30 | channel_cfg = dict( 31 | num_output_channels=17, 32 | dataset_joints=17, 33 | dataset_channel=[ 34 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 35 | ], 36 | inference_channel=[ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 38 | ]) 39 | 40 | # model settings 41 | model = dict( 42 | type='TopDown', 43 | pretrained=None, 44 | backbone=dict( 45 | type='XHRNet', 46 | in_channels=3, 47 | extra=dict( 48 | stem=dict( 49 | stem_channels=32, 50 | out_channels=32, 51 | expand_ratio=1, 52 | down_ratio=4), 53 | num_stages=3, 54 | stages_spec=dict( 55 | num_modules=(2, 4, 2), 56 | num_branches=(2, 3, 4), 57 | num_blocks=(2, 2, 2), 58 | module_type=('parallel', 'parallel', 'parallel'), 59 | with_fuse=(True, True, True), 60 | num_channels=( 61 | (40, 80), 62 | (40, 80, 160), 63 | (40, 80, 160, 320), 64 | )), 65 | with_head=True, 66 | )), 67 | keypoint_head=dict( 68 | type='TopdownHeatmapSimpleHead', 69 | in_channels=40, 70 | out_channels=channel_cfg['num_output_channels'], 71 | num_deconv_layers=0, 72 | extra=dict(final_conv_kernel=1, ), 73 | ), 74 | train_cfg=dict(), 75 | test_cfg=dict( 76 | flip_test=True, 77 | post_process=True, 78 | shift_heatmap=True, 79 | unbiased_decoding=False, 80 | modulate_kernel=11), 81 | loss_pose=dict(type='JointsMSELoss', use_target_weight=True)) 82 | 83 | data_cfg = dict( 84 | image_size=[288, 384], 85 | heatmap_size=[72, 96], 86 | num_output_channels=channel_cfg['num_output_channels'], 87 | num_joints=channel_cfg['dataset_joints'], 88 | dataset_channel=channel_cfg['dataset_channel'], 89 | inference_channel=channel_cfg['inference_channel'], 90 | soft_nms=False, 91 | nms_thr=1.0, 92 | oks_thr=0.9, 93 | vis_thr=0.2, 94 | bbox_thr=1.0, 95 | use_gt_bbox=True, 96 | image_thr=0.0, 97 | bbox_file='data/coco/person_detection_results/' 98 | 'COCO_val2017_detections_AP_H_56_person.json', 99 | ) 100 | 101 | val_data_cfg = dict( 102 | image_size=[288, 384], 103 | heatmap_size=[72, 96], 104 | num_output_channels=channel_cfg['num_output_channels'], 105 | num_joints=channel_cfg['dataset_joints'], 106 | dataset_channel=channel_cfg['dataset_channel'], 107 | inference_channel=channel_cfg['inference_channel'], 108 | soft_nms=False, 109 | nms_thr=1.0, 110 | oks_thr=0.9, 111 | vis_thr=0.2, 112 | bbox_thr=1.0, 113 | use_gt_bbox=True, 114 | image_thr=0.0, 115 | bbox_file='data/coco/person_detection_results/' 116 | 'COCO_val2017_detections_AP_H_56_person.json', 117 | ) 118 | 119 | train_pipeline = [ 120 | dict(type='LoadImageFromFile'), 121 | dict(type='TopDownRandomFlip', flip_prob=0.5), 122 | dict( 123 | type='TopDownHalfBodyTransform', 124 | num_joints_half_body=8, 125 | prob_half_body=0.3), 126 | dict( 127 | type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25), 128 | dict(type='TopDownAffine'), 129 | dict(type='ToTensor'), 130 | dict( 131 | type='NormalizeTensor', 132 | mean=[0.485, 0.456, 0.406], 133 | std=[0.229, 0.224, 0.225]), 134 | dict(type='TopDownGenerateTarget', sigma=3), 135 | dict( 136 | type='Collect', 137 | keys=['img', 'target', 'target_weight'], 138 | meta_keys=[ 139 | 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 140 | 'rotation', 'bbox_score', 'flip_pairs' 141 | ]), 142 | ] 143 | 144 | val_pipeline = [ 145 | dict(type='LoadImageFromFile'), 146 | dict(type='TopDownAffine'), 147 | dict(type='ToTensor'), 148 | dict( 149 | type='NormalizeTensor', 150 | mean=[0.485, 0.456, 0.406], 151 | std=[0.229, 0.224, 0.225]), 152 | dict( 153 | type='Collect', 154 | keys=[ 155 | 'img', 156 | ], 157 | meta_keys=[ 158 | 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 159 | 'flip_pairs' 160 | ]), 161 | ] 162 | test_pipeline = val_pipeline 163 | data_root = 'data/coco' 164 | data = dict( 165 | samples_per_gpu=64, 166 | workers_per_gpu=4, 167 | train=dict( 168 | type='TopDownCocoDataset', 169 | ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', 170 | img_prefix=f'{data_root}/train2017/', 171 | data_cfg=data_cfg, 172 | pipeline=train_pipeline), 173 | val=dict( 174 | type='TopDownCocoDataset', 175 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 176 | img_prefix=f'{data_root}/val2017/', 177 | data_cfg=val_data_cfg, 178 | pipeline=val_pipeline), 179 | test=dict( 180 | type='TopDownCocoDataset', 181 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 182 | img_prefix=f'{data_root}/val2017/', 183 | data_cfg=data_cfg, 184 | pipeline=val_pipeline), 185 | ) 186 | 187 | find_unused_parameters=False -------------------------------------------------------------------------------- /configs/xhrnet/sxhrnet_18_coco_256x192.py: -------------------------------------------------------------------------------- 1 | log_level = 'INFO' 2 | load_from = None 3 | resume_from = None 4 | dist_params = dict(backend='nccl') 5 | workflow = [('train', 1)] 6 | checkpoint_config = dict(interval=10) 7 | evaluation = dict(interval=10, metric='mAP') 8 | work_dir = "work_dirs/xhrnt/sxhrnet_18_coco_256x192" 9 | 10 | optimizer = dict( 11 | type='Adam', 12 | lr=2e-3, 13 | ) 14 | optimizer_config = dict(grad_clip=None) 15 | # learning policy 16 | lr_config = dict( 17 | policy='step', 18 | # warmup=None, 19 | warmup='linear', 20 | warmup_iters=500, 21 | warmup_ratio=0.001, 22 | step=[170, 200]) 23 | total_epochs = 210 24 | log_config = dict( 25 | interval=50, hooks=[ 26 | dict(type='TextLoggerHook'), 27 | dict(type='TensorboardLoggerHook') 28 | ]) 29 | 30 | channel_cfg = dict( 31 | num_output_channels=17, 32 | dataset_joints=17, 33 | dataset_channel=[ 34 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 35 | ], 36 | inference_channel=[ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 38 | ]) 39 | 40 | # model settings 41 | model = dict( 42 | type='TopDown', 43 | pretrained=None, 44 | backbone=dict( 45 | type='XHRNet', 46 | in_channels=3, 47 | extra=dict( 48 | stem=dict( 49 | stem_channels=32, 50 | out_channels=32, 51 | expand_ratio=1, 52 | down_ratio=4), 53 | num_stages=3, 54 | stages_spec=dict( 55 | num_modules=(2, 4, 2), 56 | num_branches=(2, 3, 4), 57 | num_blocks=(2, 2, 2), 58 | module_type=('sequential', 'sequential', 'sequential'), 59 | with_fuse=(True, True, True), 60 | num_channels=( 61 | (40, 80), 62 | (40, 80, 160), 63 | (40, 80, 160, 320), 64 | )), 65 | with_head=True, 66 | )), 67 | keypoint_head=dict( 68 | type='TopdownHeatmapSimpleHead', 69 | in_channels=40, 70 | out_channels=channel_cfg['num_output_channels'], 71 | num_deconv_layers=0, 72 | extra=dict(final_conv_kernel=1, ), 73 | ), 74 | train_cfg=dict(), 75 | test_cfg=dict( 76 | flip_test=True, 77 | post_process=True, 78 | shift_heatmap=True, 79 | unbiased_decoding=False, 80 | modulate_kernel=11), 81 | loss_pose=dict(type='JointsMSELoss', use_target_weight=True)) 82 | 83 | data_cfg = dict( 84 | image_size=[192, 256], 85 | heatmap_size=[48, 64], 86 | num_output_channels=channel_cfg['num_output_channels'], 87 | num_joints=channel_cfg['dataset_joints'], 88 | dataset_channel=channel_cfg['dataset_channel'], 89 | inference_channel=channel_cfg['inference_channel'], 90 | soft_nms=False, 91 | nms_thr=1.0, 92 | oks_thr=0.9, 93 | vis_thr=0.2, 94 | bbox_thr=1.0, 95 | use_gt_bbox=True, 96 | image_thr=0.0, 97 | bbox_file='data/coco/person_detection_results/' 98 | 'COCO_val2017_detections_AP_H_56_person.json', 99 | ) 100 | 101 | val_data_cfg = dict( 102 | image_size=[192, 256], 103 | heatmap_size=[48, 64], 104 | num_output_channels=channel_cfg['num_output_channels'], 105 | num_joints=channel_cfg['dataset_joints'], 106 | dataset_channel=channel_cfg['dataset_channel'], 107 | inference_channel=channel_cfg['inference_channel'], 108 | soft_nms=False, 109 | nms_thr=1.0, 110 | oks_thr=0.9, 111 | vis_thr=0.2, 112 | bbox_thr=1.0, 113 | use_gt_bbox=True, 114 | image_thr=0.0, 115 | bbox_file='data/coco/person_detection_results/' 116 | 'COCO_val2017_detections_AP_H_56_person.json', 117 | ) 118 | 119 | train_pipeline = [ 120 | dict(type='LoadImageFromFile'), 121 | dict(type='TopDownRandomFlip', flip_prob=0.5), 122 | dict( 123 | type='TopDownHalfBodyTransform', 124 | num_joints_half_body=8, 125 | prob_half_body=0.3), 126 | dict( 127 | type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25), 128 | dict(type='TopDownAffine'), 129 | dict(type='ToTensor'), 130 | dict( 131 | type='NormalizeTensor', 132 | mean=[0.485, 0.456, 0.406], 133 | std=[0.229, 0.224, 0.225]), 134 | dict(type='TopDownGenerateTarget', sigma=2), 135 | dict( 136 | type='Collect', 137 | keys=['img', 'target', 'target_weight'], 138 | meta_keys=[ 139 | 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 140 | 'rotation', 'bbox_score', 'flip_pairs' 141 | ]), 142 | ] 143 | 144 | val_pipeline = [ 145 | dict(type='LoadImageFromFile'), 146 | dict(type='TopDownAffine'), 147 | dict(type='ToTensor'), 148 | dict( 149 | type='NormalizeTensor', 150 | mean=[0.485, 0.456, 0.406], 151 | std=[0.229, 0.224, 0.225]), 152 | dict( 153 | type='Collect', 154 | keys=[ 155 | 'img', 156 | ], 157 | meta_keys=[ 158 | 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 159 | 'flip_pairs' 160 | ]), 161 | ] 162 | test_pipeline = val_pipeline 163 | data_root = 'data/coco' 164 | data = dict( 165 | samples_per_gpu=64, 166 | workers_per_gpu=4, 167 | train=dict( 168 | type='TopDownCocoDataset', 169 | ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', 170 | img_prefix=f'{data_root}/train2017/', 171 | data_cfg=data_cfg, 172 | pipeline=train_pipeline), 173 | val=dict( 174 | type='TopDownCocoDataset', 175 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 176 | img_prefix=f'{data_root}/val2017/', 177 | data_cfg=val_data_cfg, 178 | pipeline=val_pipeline), 179 | test=dict( 180 | type='TopDownCocoDataset', 181 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 182 | img_prefix=f'{data_root}/val2017/', 183 | data_cfg=data_cfg, 184 | pipeline=val_pipeline), 185 | ) 186 | 187 | find_unused_parameters=False -------------------------------------------------------------------------------- /configs/xhrnet/sxhrnet_18_coco_384x288.py: -------------------------------------------------------------------------------- 1 | log_level = 'INFO' 2 | load_from = None 3 | resume_from = None 4 | dist_params = dict(backend='nccl') 5 | workflow = [('train', 1)] 6 | checkpoint_config = dict(interval=10) 7 | evaluation = dict(interval=10, metric='mAP') 8 | work_dir = "work_dirs/xhrnt/sxhrnet_18_coco_384x288" 9 | 10 | optimizer = dict( 11 | type='Adam', 12 | lr=2e-3, 13 | ) 14 | optimizer_config = dict(grad_clip=None) 15 | # learning policy 16 | lr_config = dict( 17 | policy='step', 18 | # warmup=None, 19 | warmup='linear', 20 | warmup_iters=500, 21 | warmup_ratio=0.001, 22 | step=[170, 200]) 23 | total_epochs = 210 24 | log_config = dict( 25 | interval=50, hooks=[ 26 | dict(type='TextLoggerHook'), 27 | dict(type='TensorboardLoggerHook') 28 | ]) 29 | 30 | channel_cfg = dict( 31 | num_output_channels=17, 32 | dataset_joints=17, 33 | dataset_channel=[ 34 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 35 | ], 36 | inference_channel=[ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 38 | ]) 39 | 40 | # model settings 41 | model = dict( 42 | type='TopDown', 43 | pretrained=None, 44 | backbone=dict( 45 | type='XHRNet', 46 | in_channels=3, 47 | extra=dict( 48 | stem=dict( 49 | stem_channels=32, 50 | out_channels=32, 51 | expand_ratio=1, 52 | down_ratio=4), 53 | num_stages=3, 54 | stages_spec=dict( 55 | num_modules=(2, 4, 2), 56 | num_branches=(2, 3, 4), 57 | num_blocks=(2, 2, 2), 58 | module_type=('sequential', 'sequential', 'sequential'), 59 | with_fuse=(True, True, True), 60 | num_channels=( 61 | (40, 80), 62 | (40, 80, 160), 63 | (40, 80, 160, 320), 64 | )), 65 | with_head=True, 66 | )), 67 | keypoint_head=dict( 68 | type='TopdownHeatmapSimpleHead', 69 | in_channels=40, 70 | out_channels=channel_cfg['num_output_channels'], 71 | num_deconv_layers=0, 72 | extra=dict(final_conv_kernel=1, ), 73 | ), 74 | train_cfg=dict(), 75 | test_cfg=dict( 76 | flip_test=True, 77 | post_process=True, 78 | shift_heatmap=True, 79 | unbiased_decoding=False, 80 | modulate_kernel=11), 81 | loss_pose=dict(type='JointsMSELoss', use_target_weight=True)) 82 | 83 | data_cfg = dict( 84 | image_size=[288, 384], 85 | heatmap_size=[72, 96], 86 | num_output_channels=channel_cfg['num_output_channels'], 87 | num_joints=channel_cfg['dataset_joints'], 88 | dataset_channel=channel_cfg['dataset_channel'], 89 | inference_channel=channel_cfg['inference_channel'], 90 | soft_nms=False, 91 | nms_thr=1.0, 92 | oks_thr=0.9, 93 | vis_thr=0.2, 94 | bbox_thr=1.0, 95 | use_gt_bbox=True, 96 | image_thr=0.0, 97 | bbox_file='data/coco/person_detection_results/' 98 | 'COCO_val2017_detections_AP_H_56_person.json', 99 | ) 100 | 101 | val_data_cfg = dict( 102 | image_size=[288, 384], 103 | heatmap_size=[72, 96], 104 | num_output_channels=channel_cfg['num_output_channels'], 105 | num_joints=channel_cfg['dataset_joints'], 106 | dataset_channel=channel_cfg['dataset_channel'], 107 | inference_channel=channel_cfg['inference_channel'], 108 | soft_nms=False, 109 | nms_thr=1.0, 110 | oks_thr=0.9, 111 | vis_thr=0.2, 112 | bbox_thr=1.0, 113 | use_gt_bbox=True, 114 | image_thr=0.0, 115 | bbox_file='data/coco/person_detection_results/' 116 | 'COCO_val2017_detections_AP_H_56_person.json', 117 | ) 118 | 119 | train_pipeline = [ 120 | dict(type='LoadImageFromFile'), 121 | dict(type='TopDownRandomFlip', flip_prob=0.5), 122 | dict( 123 | type='TopDownHalfBodyTransform', 124 | num_joints_half_body=8, 125 | prob_half_body=0.3), 126 | dict( 127 | type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25), 128 | dict(type='TopDownAffine'), 129 | dict(type='ToTensor'), 130 | dict( 131 | type='NormalizeTensor', 132 | mean=[0.485, 0.456, 0.406], 133 | std=[0.229, 0.224, 0.225]), 134 | dict(type='TopDownGenerateTarget', sigma=3), 135 | dict( 136 | type='Collect', 137 | keys=['img', 'target', 'target_weight'], 138 | meta_keys=[ 139 | 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 140 | 'rotation', 'bbox_score', 'flip_pairs' 141 | ]), 142 | ] 143 | 144 | val_pipeline = [ 145 | dict(type='LoadImageFromFile'), 146 | dict(type='TopDownAffine'), 147 | dict(type='ToTensor'), 148 | dict( 149 | type='NormalizeTensor', 150 | mean=[0.485, 0.456, 0.406], 151 | std=[0.229, 0.224, 0.225]), 152 | dict( 153 | type='Collect', 154 | keys=[ 155 | 'img', 156 | ], 157 | meta_keys=[ 158 | 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 159 | 'flip_pairs' 160 | ]), 161 | ] 162 | test_pipeline = val_pipeline 163 | data_root = 'data/coco' 164 | data = dict( 165 | samples_per_gpu=64, 166 | workers_per_gpu=4, 167 | train=dict( 168 | type='TopDownCocoDataset', 169 | ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', 170 | img_prefix=f'{data_root}/train2017/', 171 | data_cfg=data_cfg, 172 | pipeline=train_pipeline), 173 | val=dict( 174 | type='TopDownCocoDataset', 175 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 176 | img_prefix=f'{data_root}/val2017/', 177 | data_cfg=val_data_cfg, 178 | pipeline=val_pipeline), 179 | test=dict( 180 | type='TopDownCocoDataset', 181 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 182 | img_prefix=f'{data_root}/val2017/', 183 | data_cfg=data_cfg, 184 | pipeline=val_pipeline), 185 | ) 186 | 187 | find_unused_parameters=False -------------------------------------------------------------------------------- /configs/xhrnet/sxhrnet_30_coco_256x192.py: -------------------------------------------------------------------------------- 1 | log_level = 'INFO' 2 | load_from = None 3 | resume_from = None 4 | dist_params = dict(backend='nccl') 5 | workflow = [('train', 1)] 6 | checkpoint_config = dict(interval=10) 7 | evaluation = dict(interval=10, metric='mAP') 8 | work_dir = "work_dirs/xhrnt/sxhrnet_30_coco_256x192" 9 | 10 | optimizer = dict( 11 | type='Adam', 12 | lr=2e-3, 13 | ) 14 | optimizer_config = dict(grad_clip=None) 15 | # learning policy 16 | lr_config = dict( 17 | policy='step', 18 | # warmup=None, 19 | warmup='linear', 20 | warmup_iters=500, 21 | warmup_ratio=0.001, 22 | step=[170, 200]) 23 | total_epochs = 210 24 | log_config = dict( 25 | interval=50, hooks=[ 26 | dict(type='TextLoggerHook'), 27 | dict(type='TensorboardLoggerHook') 28 | ]) 29 | 30 | channel_cfg = dict( 31 | num_output_channels=17, 32 | dataset_joints=17, 33 | dataset_channel=[ 34 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 35 | ], 36 | inference_channel=[ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 38 | ]) 39 | 40 | # model settings 41 | model = dict( 42 | type='TopDown', 43 | pretrained=None, 44 | backbone=dict( 45 | type='XHRNet', 46 | in_channels=3, 47 | extra=dict( 48 | stem=dict( 49 | stem_channels=32, 50 | out_channels=32, 51 | expand_ratio=1, 52 | down_ratio=4), 53 | num_stages=3, 54 | stages_spec=dict( 55 | num_modules=(3, 8, 3), 56 | num_branches=(2, 3, 4), 57 | num_blocks=(2, 2, 2), 58 | module_type=('sequential', 'sequential', 'sequential'), 59 | with_fuse=(True, True, True), 60 | num_channels=( 61 | (40, 80), 62 | (40, 80, 160), 63 | (40, 80, 160, 320), 64 | )), 65 | with_head=True, 66 | )), 67 | keypoint_head=dict( 68 | type='TopdownHeatmapSimpleHead', 69 | in_channels=40, 70 | out_channels=channel_cfg['num_output_channels'], 71 | num_deconv_layers=0, 72 | extra=dict(final_conv_kernel=1, ), 73 | ), 74 | train_cfg=dict(), 75 | test_cfg=dict( 76 | flip_test=True, 77 | post_process=True, 78 | shift_heatmap=True, 79 | unbiased_decoding=False, 80 | modulate_kernel=11), 81 | loss_pose=dict(type='JointsMSELoss', use_target_weight=True)) 82 | 83 | data_cfg = dict( 84 | image_size=[192, 256], 85 | heatmap_size=[48, 64], 86 | num_output_channels=channel_cfg['num_output_channels'], 87 | num_joints=channel_cfg['dataset_joints'], 88 | dataset_channel=channel_cfg['dataset_channel'], 89 | inference_channel=channel_cfg['inference_channel'], 90 | soft_nms=False, 91 | nms_thr=1.0, 92 | oks_thr=0.9, 93 | vis_thr=0.2, 94 | bbox_thr=1.0, 95 | use_gt_bbox=True, 96 | image_thr=0.0, 97 | bbox_file='data/coco/person_detection_results/' 98 | 'COCO_val2017_detections_AP_H_56_person.json', 99 | ) 100 | 101 | val_data_cfg = dict( 102 | image_size=[192, 256], 103 | heatmap_size=[48, 64], 104 | num_output_channels=channel_cfg['num_output_channels'], 105 | num_joints=channel_cfg['dataset_joints'], 106 | dataset_channel=channel_cfg['dataset_channel'], 107 | inference_channel=channel_cfg['inference_channel'], 108 | soft_nms=False, 109 | nms_thr=1.0, 110 | oks_thr=0.9, 111 | vis_thr=0.2, 112 | bbox_thr=1.0, 113 | use_gt_bbox=True, 114 | image_thr=0.0, 115 | bbox_file='data/coco/person_detection_results/' 116 | 'COCO_val2017_detections_AP_H_56_person.json', 117 | ) 118 | 119 | train_pipeline = [ 120 | dict(type='LoadImageFromFile'), 121 | dict(type='TopDownRandomFlip', flip_prob=0.5), 122 | dict( 123 | type='TopDownHalfBodyTransform', 124 | num_joints_half_body=8, 125 | prob_half_body=0.3), 126 | dict( 127 | type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25), 128 | dict(type='TopDownAffine'), 129 | dict(type='ToTensor'), 130 | dict( 131 | type='NormalizeTensor', 132 | mean=[0.485, 0.456, 0.406], 133 | std=[0.229, 0.224, 0.225]), 134 | dict(type='TopDownGenerateTarget', sigma=2), 135 | dict( 136 | type='Collect', 137 | keys=['img', 'target', 'target_weight'], 138 | meta_keys=[ 139 | 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 140 | 'rotation', 'bbox_score', 'flip_pairs' 141 | ]), 142 | ] 143 | 144 | val_pipeline = [ 145 | dict(type='LoadImageFromFile'), 146 | dict(type='TopDownAffine'), 147 | dict(type='ToTensor'), 148 | dict( 149 | type='NormalizeTensor', 150 | mean=[0.485, 0.456, 0.406], 151 | std=[0.229, 0.224, 0.225]), 152 | dict( 153 | type='Collect', 154 | keys=[ 155 | 'img', 156 | ], 157 | meta_keys=[ 158 | 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 159 | 'flip_pairs' 160 | ]), 161 | ] 162 | test_pipeline = val_pipeline 163 | data_root = 'data/coco' 164 | data = dict( 165 | samples_per_gpu=64, 166 | workers_per_gpu=4, 167 | train=dict( 168 | type='TopDownCocoDataset', 169 | ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', 170 | img_prefix=f'{data_root}/train2017/', 171 | data_cfg=data_cfg, 172 | pipeline=train_pipeline), 173 | val=dict( 174 | type='TopDownCocoDataset', 175 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 176 | img_prefix=f'{data_root}/val2017/', 177 | data_cfg=val_data_cfg, 178 | pipeline=val_pipeline), 179 | test=dict( 180 | type='TopDownCocoDataset', 181 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 182 | img_prefix=f'{data_root}/val2017/', 183 | data_cfg=data_cfg, 184 | pipeline=val_pipeline), 185 | ) 186 | 187 | find_unused_parameters=False -------------------------------------------------------------------------------- /configs/xhrnet/sxhrnet_30_coco_384x288.py: -------------------------------------------------------------------------------- 1 | log_level = 'INFO' 2 | load_from = None 3 | resume_from = None 4 | dist_params = dict(backend='nccl') 5 | workflow = [('train', 1)] 6 | checkpoint_config = dict(interval=10) 7 | evaluation = dict(interval=10, metric='mAP') 8 | work_dir = "work_dirs/xhrnt/sxhrnet_18_coco_256x192" 9 | 10 | optimizer = dict( 11 | type='Adam', 12 | lr=2e-3, 13 | ) 14 | optimizer_config = dict(grad_clip=None) 15 | # learning policy 16 | lr_config = dict( 17 | policy='step', 18 | # warmup=None, 19 | warmup='linear', 20 | warmup_iters=500, 21 | warmup_ratio=0.001, 22 | step=[170, 200]) 23 | total_epochs = 210 24 | log_config = dict( 25 | interval=50, hooks=[ 26 | dict(type='TextLoggerHook'), 27 | dict(type='TensorboardLoggerHook') 28 | ]) 29 | 30 | channel_cfg = dict( 31 | num_output_channels=17, 32 | dataset_joints=17, 33 | dataset_channel=[ 34 | [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16], 35 | ], 36 | inference_channel=[ 37 | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 38 | ]) 39 | 40 | # model settings 41 | model = dict( 42 | type='TopDown', 43 | pretrained=None, 44 | backbone=dict( 45 | type='XHRNet', 46 | in_channels=3, 47 | extra=dict( 48 | stem=dict( 49 | stem_channels=32, 50 | out_channels=32, 51 | expand_ratio=1, 52 | down_ratio=4), 53 | num_stages=3, 54 | stages_spec=dict( 55 | num_modules=(2, 4, 2), 56 | num_branches=(2, 3, 4), 57 | num_blocks=(2, 2, 2), 58 | module_type=('sequential', 'sequential', 'sequential'), 59 | with_fuse=(True, True, True), 60 | num_channels=( 61 | (40, 80), 62 | (40, 80, 160), 63 | (40, 80, 160, 320), 64 | )), 65 | with_head=True, 66 | )), 67 | keypoint_head=dict( 68 | type='TopdownHeatmapSimpleHead', 69 | in_channels=40, 70 | out_channels=channel_cfg['num_output_channels'], 71 | num_deconv_layers=0, 72 | extra=dict(final_conv_kernel=1, ), 73 | ), 74 | train_cfg=dict(), 75 | test_cfg=dict( 76 | flip_test=True, 77 | post_process=True, 78 | shift_heatmap=True, 79 | unbiased_decoding=False, 80 | modulate_kernel=11), 81 | loss_pose=dict(type='JointsMSELoss', use_target_weight=True)) 82 | 83 | data_cfg = dict( 84 | image_size=[288, 384], 85 | heatmap_size=[72, 96], 86 | num_output_channels=channel_cfg['num_output_channels'], 87 | num_joints=channel_cfg['dataset_joints'], 88 | dataset_channel=channel_cfg['dataset_channel'], 89 | inference_channel=channel_cfg['inference_channel'], 90 | soft_nms=False, 91 | nms_thr=1.0, 92 | oks_thr=0.9, 93 | vis_thr=0.2, 94 | bbox_thr=1.0, 95 | use_gt_bbox=True, 96 | image_thr=0.0, 97 | bbox_file='data/coco/person_detection_results/' 98 | 'COCO_val2017_detections_AP_H_56_person.json', 99 | ) 100 | 101 | val_data_cfg = dict( 102 | image_size=[288, 384], 103 | heatmap_size=[72, 96], 104 | num_output_channels=channel_cfg['num_output_channels'], 105 | num_joints=channel_cfg['dataset_joints'], 106 | dataset_channel=channel_cfg['dataset_channel'], 107 | inference_channel=channel_cfg['inference_channel'], 108 | soft_nms=False, 109 | nms_thr=1.0, 110 | oks_thr=0.9, 111 | vis_thr=0.2, 112 | bbox_thr=1.0, 113 | use_gt_bbox=True, 114 | image_thr=0.0, 115 | bbox_file='data/coco/person_detection_results/' 116 | 'COCO_val2017_detections_AP_H_56_person.json', 117 | ) 118 | 119 | train_pipeline = [ 120 | dict(type='LoadImageFromFile'), 121 | dict(type='TopDownRandomFlip', flip_prob=0.5), 122 | dict( 123 | type='TopDownHalfBodyTransform', 124 | num_joints_half_body=8, 125 | prob_half_body=0.3), 126 | dict( 127 | type='TopDownGetRandomScaleRotation', rot_factor=30, scale_factor=0.25), 128 | dict(type='TopDownAffine'), 129 | dict(type='ToTensor'), 130 | dict( 131 | type='NormalizeTensor', 132 | mean=[0.485, 0.456, 0.406], 133 | std=[0.229, 0.224, 0.225]), 134 | dict(type='TopDownGenerateTarget', sigma=3), 135 | dict( 136 | type='Collect', 137 | keys=['img', 'target', 'target_weight'], 138 | meta_keys=[ 139 | 'image_file', 'joints_3d', 'joints_3d_visible', 'center', 'scale', 140 | 'rotation', 'bbox_score', 'flip_pairs' 141 | ]), 142 | ] 143 | 144 | val_pipeline = [ 145 | dict(type='LoadImageFromFile'), 146 | dict(type='TopDownAffine'), 147 | dict(type='ToTensor'), 148 | dict( 149 | type='NormalizeTensor', 150 | mean=[0.485, 0.456, 0.406], 151 | std=[0.229, 0.224, 0.225]), 152 | dict( 153 | type='Collect', 154 | keys=[ 155 | 'img', 156 | ], 157 | meta_keys=[ 158 | 'image_file', 'center', 'scale', 'rotation', 'bbox_score', 159 | 'flip_pairs' 160 | ]), 161 | ] 162 | test_pipeline = val_pipeline 163 | data_root = 'data/coco' 164 | data = dict( 165 | samples_per_gpu=64, 166 | workers_per_gpu=4, 167 | train=dict( 168 | type='TopDownCocoDataset', 169 | ann_file=f'{data_root}/annotations/person_keypoints_train2017.json', 170 | img_prefix=f'{data_root}/train2017/', 171 | data_cfg=data_cfg, 172 | pipeline=train_pipeline), 173 | val=dict( 174 | type='TopDownCocoDataset', 175 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 176 | img_prefix=f'{data_root}/val2017/', 177 | data_cfg=val_data_cfg, 178 | pipeline=val_pipeline), 179 | test=dict( 180 | type='TopDownCocoDataset', 181 | ann_file=f'{data_root}/annotations/person_keypoints_val2017.json', 182 | img_prefix=f'{data_root}/val2017/', 183 | data_cfg=data_cfg, 184 | pipeline=val_pipeline), 185 | ) 186 | 187 | find_unused_parameters=False -------------------------------------------------------------------------------- /tools/torchstat/stat_tree.py: -------------------------------------------------------------------------------- 1 | import queue 2 | 3 | 4 | class StatTree(object): 5 | 6 | def __init__(self, root_node): 7 | assert isinstance(root_node, StatNode) 8 | 9 | self.root_node = root_node 10 | 11 | def get_same_level_max_node_depth(self, query_node): 12 | if query_node.name == self.root_node.name: 13 | return 0 14 | same_level_depth = max( 15 | [child.depth for child in query_node.parent.children]) 16 | return same_level_depth 17 | 18 | def update_stat_nodes_granularity(self): 19 | q = queue.Queue() 20 | q.put(self.root_node) 21 | while not q.empty(): 22 | node = q.get() 23 | node.granularity = self.get_same_level_max_node_depth(node) 24 | for child in node.children: 25 | q.put(child) 26 | 27 | def get_collected_stat_nodes(self, query_granularity): 28 | self.update_stat_nodes_granularity() 29 | 30 | collected_nodes = [] 31 | stack = list() 32 | stack.append(self.root_node) 33 | while len(stack) > 0: 34 | node = stack.pop() 35 | for child in reversed(node.children): 36 | stack.append(child) 37 | if node.depth == query_granularity: 38 | collected_nodes.append(node) 39 | if node.depth < query_granularity <= node.granularity: 40 | collected_nodes.append(node) 41 | return collected_nodes 42 | 43 | 44 | class StatNode(object): 45 | 46 | def __init__(self, name=str(), parent=None): 47 | self._name = name 48 | self._input_shape = None 49 | self._output_shape = None 50 | self._parameter_quantity = 0 51 | self._inference_memory = 0 52 | self._MAdd = 0 53 | self._Memory = (0, 0) 54 | self._Flops = 0 55 | self._duration = 0 56 | self._duration_percent = 0 57 | 58 | self._granularity = 1 59 | self._depth = 1 60 | self.parent = parent 61 | self.children = list() 62 | 63 | @property 64 | def name(self): 65 | return self._name 66 | 67 | @name.setter 68 | def name(self, name): 69 | self._name = name 70 | 71 | @property 72 | def granularity(self): 73 | return self._granularity 74 | 75 | @granularity.setter 76 | def granularity(self, g): 77 | self._granularity = g 78 | 79 | @property 80 | def depth(self): 81 | d = self._depth 82 | if len(self.children) > 0: 83 | d += max([child.depth for child in self.children]) 84 | return d 85 | 86 | @property 87 | def input_shape(self): 88 | if len(self.children) == 0: # leaf 89 | return self._input_shape 90 | else: 91 | return self.children[0].input_shape 92 | 93 | @input_shape.setter 94 | def input_shape(self, input_shape): 95 | assert isinstance(input_shape, (list, tuple)) 96 | self._input_shape = input_shape 97 | 98 | @property 99 | def output_shape(self): 100 | if len(self.children) == 0: # leaf 101 | return self._output_shape 102 | else: 103 | return self.children[-1].output_shape 104 | 105 | @output_shape.setter 106 | def output_shape(self, output_shape): 107 | assert isinstance(output_shape, (list, tuple)) 108 | self._output_shape = output_shape 109 | 110 | @property 111 | def parameter_quantity(self): 112 | # return self.parameters_quantity 113 | total_parameter_quantity = self._parameter_quantity 114 | for child in self.children: 115 | total_parameter_quantity += child.parameter_quantity 116 | return total_parameter_quantity 117 | 118 | @parameter_quantity.setter 119 | def parameter_quantity(self, parameter_quantity): 120 | assert parameter_quantity >= 0 121 | self._parameter_quantity = parameter_quantity 122 | 123 | @property 124 | def inference_memory(self): 125 | total_inference_memory = self._inference_memory 126 | for child in self.children: 127 | total_inference_memory += child.inference_memory 128 | return total_inference_memory 129 | 130 | @inference_memory.setter 131 | def inference_memory(self, inference_memory): 132 | self._inference_memory = inference_memory 133 | 134 | @property 135 | def MAdd(self): 136 | total_MAdd = self._MAdd 137 | for child in self.children: 138 | total_MAdd += child.MAdd 139 | return total_MAdd 140 | 141 | @MAdd.setter 142 | def MAdd(self, MAdd): 143 | self._MAdd = MAdd 144 | 145 | @property 146 | def Flops(self): 147 | total_Flops = self._Flops 148 | for child in self.children: 149 | total_Flops += child.Flops 150 | return total_Flops 151 | 152 | @Flops.setter 153 | def Flops(self, Flops): 154 | self._Flops = Flops 155 | 156 | @property 157 | def Memory(self): 158 | total_Memory = self._Memory 159 | for child in self.children: 160 | total_Memory[0] += child.Memory[0] 161 | total_Memory[1] += child.Memory[1] 162 | print(total_Memory) 163 | return total_Memory 164 | 165 | @Memory.setter 166 | def Memory(self, Memory): 167 | assert isinstance(Memory, (list, tuple)) 168 | self._Memory = Memory 169 | 170 | @property 171 | def duration(self): 172 | total_duration = self._duration 173 | for child in self.children: 174 | total_duration += child.duration 175 | return total_duration 176 | 177 | @duration.setter 178 | def duration(self, duration): 179 | self._duration = duration 180 | 181 | def find_child_index(self, child_name): 182 | assert isinstance(child_name, str) 183 | 184 | index = -1 185 | for i in range(len(self.children)): 186 | if child_name == self.children[i].name: 187 | index = i 188 | return index 189 | 190 | def add_child(self, node): 191 | assert isinstance(node, StatNode) 192 | 193 | if self.find_child_index(node.name) == -1: # not exist 194 | self.children.append(node) 195 | -------------------------------------------------------------------------------- /tools/train.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import copy 3 | import os 4 | import os.path as osp 5 | import time 6 | 7 | import mmcv 8 | import torch 9 | from mmcv import Config, DictAction 10 | from mmcv.runner import init_dist, set_random_seed 11 | from mmcv.utils import get_git_hash 12 | 13 | from mmpose import __version__ 14 | from mmpose.apis import train_model 15 | from mmpose.datasets import build_dataset 16 | from models import build_posenet 17 | from mmpose.utils import collect_env, get_root_logger 18 | 19 | 20 | def parse_args(): 21 | parser = argparse.ArgumentParser(description='Train a pose model') 22 | parser.add_argument('config', help='train config file path') 23 | parser.add_argument('--work-dir', help='the dir to save logs and models') 24 | parser.add_argument( 25 | '--resume-from', help='the checkpoint file to resume from') 26 | parser.add_argument( 27 | '--no-validate', 28 | action='store_true', 29 | help='whether not to evaluate the checkpoint during training') 30 | group_gpus = parser.add_mutually_exclusive_group() 31 | group_gpus.add_argument( 32 | '--gpus', 33 | type=int, 34 | help='number of gpus to use ' 35 | '(only applicable to non-distributed training)') 36 | group_gpus.add_argument( 37 | '--gpu-ids', 38 | type=int, 39 | nargs='+', 40 | help='ids of gpus to use ' 41 | '(only applicable to non-distributed training)') 42 | parser.add_argument('--seed', type=int, default=None, help='random seed') 43 | parser.add_argument( 44 | '--deterministic', 45 | action='store_true', 46 | help='whether to set deterministic options for CUDNN backend.') 47 | parser.add_argument( 48 | '--cfg-options', 49 | nargs='+', 50 | action=DictAction, 51 | default={}, 52 | help='override some settings in the used config, the key-value pair ' 53 | 'in xxx=yyy format will be merged into config file. For example, ' 54 | "'--cfg-options model.backbone.depth=18 model.backbone.with_cp=True'") 55 | parser.add_argument( 56 | '--launcher', 57 | choices=['none', 'pytorch', 'slurm', 'mpi'], 58 | default='none', 59 | help='job launcher') 60 | parser.add_argument('--local_rank', type=int, default=0) 61 | parser.add_argument( 62 | '--autoscale-lr', 63 | action='store_true', 64 | help='automatically scale lr with the number of gpus') 65 | args = parser.parse_args() 66 | if 'LOCAL_RANK' not in os.environ: 67 | os.environ['LOCAL_RANK'] = str(args.local_rank) 68 | 69 | return args 70 | 71 | 72 | def main(): 73 | args = parse_args() 74 | 75 | cfg = Config.fromfile(args.config) 76 | 77 | if args.cfg_options is not None: 78 | cfg.merge_from_dict(args.cfg_options) 79 | 80 | # set cudnn_benchmark 81 | if cfg.get('cudnn_benchmark', False): 82 | torch.backends.cudnn.benchmark = True 83 | 84 | # work_dir is determined in this priority: CLI > segment in file > filename 85 | if args.work_dir is not None: 86 | # update configs according to CLI args if args.work_dir is not None 87 | cfg.work_dir = args.work_dir 88 | elif cfg.get('work_dir', None) is None: 89 | # use config filename as default work_dir if cfg.work_dir is None 90 | cfg.work_dir = osp.join('./work_dirs', 91 | osp.splitext(osp.basename(args.config))[0]) 92 | if args.resume_from is not None: 93 | cfg.resume_from = args.resume_from 94 | if args.gpu_ids is not None: 95 | cfg.gpu_ids = args.gpu_ids 96 | else: 97 | cfg.gpu_ids = range(1) if args.gpus is None else range(args.gpus) 98 | 99 | if args.autoscale_lr: 100 | # apply the linear scaling rule (https://arxiv.org/abs/1706.02677) 101 | cfg.optimizer['lr'] = cfg.optimizer['lr'] * len(cfg.gpu_ids) / 8 102 | 103 | # init distributed env first, since logger depends on the dist info. 104 | if args.launcher == 'none': 105 | distributed = False 106 | else: 107 | distributed = True 108 | init_dist(args.launcher, **cfg.dist_params) 109 | 110 | # create work_dir 111 | mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir)) 112 | # init the logger before other steps 113 | timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime()) 114 | log_file = osp.join(cfg.work_dir, f'{timestamp}.log') 115 | logger = get_root_logger(log_file=log_file, log_level=cfg.log_level) 116 | 117 | # init the meta dict to record some important information such as 118 | # environment info and seed, which will be logged 119 | meta = dict() 120 | # log env info 121 | env_info_dict = collect_env() 122 | env_info = '\n'.join([(f'{k}: {v}') for k, v in env_info_dict.items()]) 123 | dash_line = '-' * 60 + '\n' 124 | logger.info('Environment info:\n' + dash_line + env_info + '\n' + 125 | dash_line) 126 | meta['env_info'] = env_info 127 | 128 | # log some basic info 129 | logger.info(f'Distributed training: {distributed}') 130 | logger.info(f'Config:\n{cfg.pretty_text}') 131 | 132 | # set random seeds 133 | if args.seed is not None: 134 | logger.info(f'Set random seed to {args.seed}, ' 135 | f'deterministic: {args.deterministic}') 136 | set_random_seed(args.seed, deterministic=args.deterministic) 137 | cfg.seed = args.seed 138 | meta['seed'] = args.seed 139 | 140 | model = build_posenet(cfg.model) 141 | datasets = [build_dataset(cfg.data.train)] 142 | 143 | if len(cfg.workflow) == 2: 144 | val_dataset = copy.deepcopy(cfg.data.val) 145 | val_dataset.pipeline = cfg.data.train.pipeline 146 | datasets.append(build_dataset(val_dataset)) 147 | 148 | if cfg.checkpoint_config is not None: 149 | # save mmpose version, config file content 150 | # checkpoints as meta data 151 | cfg.checkpoint_config.meta = dict( 152 | mmpose_version=__version__ + get_git_hash(digits=7), 153 | config=cfg.pretty_text, 154 | ) 155 | train_model( 156 | model, 157 | datasets, 158 | cfg, 159 | distributed=distributed, 160 | validate=(not args.no_validate), 161 | timestamp=timestamp, 162 | meta=meta) 163 | 164 | 165 | if __name__ == '__main__': 166 | main() 167 | -------------------------------------------------------------------------------- /tools/torchstat/analyzer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import OrderedDict 3 | from typing import Dict, Sequence 4 | import functools 5 | import itertools 6 | 7 | import numpy as np 8 | import torch 9 | import torch.nn as nn 10 | 11 | from .compute_madd import compute_madd 12 | from .compute_flops import compute_flops 13 | from .compute_memory import compute_memory 14 | from .stat_tree import StatTree, StatNode 15 | from .reporter import report_format 16 | 17 | 18 | class ModuleStats: 19 | 20 | def __init__(self, name) -> None: 21 | self.name = name 22 | self.start_time = 0.0 23 | self.end_time = 0.0 24 | self.inference_memory = 0 25 | self.input_shape: Sequence[int] = [] 26 | self.output_shape: Sequence[int] = [] 27 | self.MAdd = 0 28 | self.duration = 0.0 29 | self.Flops = 0 30 | self.Memory = 0, 0 31 | self.parameter_quantity = 0 32 | self.done = False 33 | 34 | 35 | def print_report(self, collected_nodes): 36 | report = report_format(self.collected_nodes) 37 | print(report) 38 | 39 | 40 | def analyze(model: nn.Module, input_size, query_granularity: int): 41 | assert isinstance(model, nn.Module) 42 | assert isinstance(input_size, (list, tuple)) 43 | 44 | pre_hooks, post_hooks = [], [] 45 | stats: OrderedDict[str, ModuleStats] = OrderedDict() 46 | 47 | try: 48 | _for_leaf(model, _register_hooks, pre_hooks, post_hooks, stats) 49 | 50 | x = torch.rand(*input_size) # add module duration time 51 | x = x.to(next(model.parameters()).device) 52 | model.eval() 53 | model(x) 54 | 55 | stat_tree = _convert_leaf_modules_to_stat_tree(stats) 56 | 57 | return stat_tree.get_collected_stat_nodes(query_granularity) 58 | 59 | finally: 60 | for stat in stats.values(): 61 | stat.done = True 62 | for hook in itertools.chain(pre_hooks, post_hooks): 63 | hook.remove() 64 | 65 | 66 | def _for_leaf(model, fn, *args): 67 | for name, module in model.named_modules(): 68 | if len(list(module.children())) == 0: 69 | fn(name, module, *args) 70 | 71 | 72 | def _register_hooks(name: str, module: nn.Module, pre_hooks, post_hooks, 73 | stats): 74 | assert isinstance(module, nn.Module) and len(list(module.children())) == 0 75 | 76 | if name in stats: 77 | return 78 | 79 | module_stats = ModuleStats(name) 80 | stats[name] = module_stats 81 | 82 | post_hook = module.register_forward_hook( 83 | functools.partial(_forward_post_hook, module_stats)) 84 | post_hooks.append(post_hook) 85 | 86 | pre_hook = module.register_forward_pre_hook( 87 | functools.partial(_forward_pre_hook, module_stats)) 88 | pre_hooks.append(pre_hook) 89 | 90 | 91 | def _flatten(x): 92 | """Flattens the tree of tensors to flattened sequence of tensors""" 93 | if isinstance(x, torch.Tensor): 94 | return [x] 95 | if isinstance(x, Sequence): 96 | res = [] 97 | for xi in x: 98 | res += _flatten(xi) 99 | return res 100 | return [] 101 | 102 | 103 | def _forward_pre_hook(module_stats: ModuleStats, module: nn.Module, input): 104 | assert not module_stats.done 105 | module_stats.start_time = time.time() 106 | 107 | 108 | def _forward_post_hook(module_stats: ModuleStats, module: nn.Module, input, 109 | output): 110 | assert not module_stats.done 111 | 112 | module_stats.end_time = time.time() 113 | module_stats.duration = module_stats.end_time - module_stats.start_time 114 | 115 | inputs, outputs = _flatten(input), _flatten(output) 116 | module_stats.input_shape = inputs[0].size() 117 | module_stats.output_shape = outputs[0].size() 118 | 119 | parameter_quantity = 0 120 | # iterate through parameters and count num params 121 | for name, p in module.named_parameters(): 122 | parameter_quantity += (0 if p is None else torch.numel(p.data)) 123 | module_stats.parameter_quantity = parameter_quantity 124 | 125 | inference_memory = 1 126 | for oi in outputs: 127 | for s in oi.size(): 128 | inference_memory *= s 129 | # memory += parameters_number # exclude parameter memory 130 | inference_memory = inference_memory * 4 / (1024**2) # shown as MB unit 131 | module_stats.inference_memory = inference_memory 132 | module_stats.MAdd = compute_madd(module, inputs, outputs) 133 | module_stats.Flops = compute_flops(module, inputs, outputs) 134 | module_stats.Memory = compute_memory(module, inputs, outputs) 135 | 136 | return output 137 | 138 | 139 | def get_parent_node(root_node, stat_node_name): 140 | assert isinstance(root_node, StatNode) 141 | 142 | node = root_node 143 | names = stat_node_name.split('.') 144 | for i in range(len(names) - 1): 145 | node_name = '.'.join(names[0:i + 1]) 146 | child_index = node.find_child_index(node_name) 147 | assert child_index != -1 148 | node = node.children[child_index] 149 | return node 150 | 151 | 152 | def _convert_leaf_modules_to_stat_tree(leaf_modules): 153 | assert isinstance(leaf_modules, OrderedDict) 154 | 155 | create_index = 1 156 | root_node = StatNode(name='root', parent=None) 157 | for name, module_stats in leaf_modules.items(): 158 | names = name.split('.') 159 | for i in range(len(names)): 160 | create_index += 1 161 | stat_node_name = '.'.join(names[0:i + 1]) 162 | parent_node = get_parent_node(root_node, stat_node_name) 163 | node = StatNode(name=stat_node_name, parent=parent_node) 164 | parent_node.add_child(node) 165 | if i == len(names) - 1: # leaf module itself 166 | input_shape = module_stats.input_shape 167 | output_shape = module_stats.output_shape 168 | node.input_shape = input_shape 169 | node.output_shape = output_shape 170 | node.parameter_quantity = module_stats.parameter_quantity 171 | node.inference_memory = module_stats.inference_memory 172 | node.MAdd = module_stats.MAdd 173 | node.Flops = module_stats.Flops 174 | node.duration = module_stats.duration 175 | node.Memory = module_stats.Memory 176 | return StatTree(root_node) 177 | -------------------------------------------------------------------------------- /tools/torchstat/compute_flops.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import numpy as np 4 | import math 5 | 6 | 7 | def compute_flops(module, inp, out): 8 | if isinstance(module, nn.Conv2d): 9 | return compute_Conv2d_flops(module, inp[0], out[0]) 10 | elif type(module).__name__ == 'ConvFunction': 11 | return compute_Conv2d_flops(module, inp[0], out[0]) 12 | elif type(module).__name__ == 'SplitKernelConvFunction': 13 | return compute_Conv2d_flops(module, inp[0], out[0]) 14 | elif isinstance(module, nn.ConvTranspose2d): 15 | return compute_ConvTranspose2d_flops(module, inp[0], out[0]) 16 | elif isinstance(module, nn.BatchNorm2d): 17 | return compute_BatchNorm2d_flops(module, inp[0], out[0]) 18 | elif isinstance(module, (nn.AvgPool2d, nn.MaxPool2d)): 19 | return compute_Pool2d_flops(module, inp[0], out[0]) 20 | elif isinstance(module, (nn.AdaptiveAvgPool2d, nn.AdaptiveMaxPool2d)): 21 | return compute_adaptivepool_flops(module, inp[0], out[0]) 22 | elif isinstance(module, 23 | (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)): 24 | return compute_ReLU_flops(module, inp[0], out[0]) 25 | elif isinstance(module, nn.Upsample): 26 | return compute_Upsample_flops(module, inp[0], out[0]) 27 | elif isinstance(module, nn.Linear): 28 | return compute_Linear_flops(module, inp[0], out[0]) 29 | elif type(module).__name__ == 'MatMul': 30 | return compute_matmul_flops(module, inp, out) 31 | else: 32 | #print(f"[Flops]: {type(module).__name__} is not supported!") 33 | return 0 34 | pass 35 | 36 | 37 | def compute_matmul_flops(moudle, inp, out): 38 | x, y = inp 39 | batch_size = x.size(0) 40 | _, l, m = x.size() 41 | _, _, n = y.size() 42 | return batch_size * 2 * l * m * n 43 | 44 | 45 | def compute_Conv2d_flops(module, inp, out): 46 | # Can have multiple inputs, getting the first one 47 | # assert isinstance(module, nn.Conv2d) 48 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 49 | 50 | batch_size = inp.size()[0] 51 | in_c = inp.size()[1] 52 | k_h, k_w = module.kernel_size 53 | out_c, out_h, out_w = out.size()[1:] 54 | groups = module.groups 55 | 56 | filters_per_channel = out_c // groups 57 | conv_per_position_flops = k_h * k_w * in_c * filters_per_channel 58 | active_elements_count = batch_size * out_h * out_w 59 | 60 | total_conv_flops = conv_per_position_flops * active_elements_count 61 | 62 | bias_flops = 0 63 | if module.bias is not None: 64 | bias_flops = out_c * active_elements_count 65 | 66 | total_flops = total_conv_flops + bias_flops 67 | return total_flops 68 | 69 | 70 | def compute_ConvTranspose2d_flops(module, inp, out): 71 | # Can have multiple inputs, getting the first one 72 | assert isinstance(module, nn.ConvTranspose2d) 73 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 74 | 75 | batch_size = inp.size()[0] 76 | in_h, in_w = inp.size()[2:] 77 | 78 | k_h, k_w = module.kernel_size 79 | in_c = module.in_channels 80 | out_c = module.out_channels 81 | groups = module.groups 82 | 83 | filters_per_channel = out_c // groups 84 | conv_per_position_flops = k_h * k_w * in_c * filters_per_channel 85 | active_elements_count = batch_size * in_h * in_w 86 | 87 | total_conv_flops = conv_per_position_flops * active_elements_count 88 | 89 | bias_flops = 0 90 | if module.bias is not None: 91 | out_h, out_w = out.size()[2:] 92 | bias_flops = out_c * batch_size * out_h * out_w 93 | 94 | total_flops = total_conv_flops + bias_flops 95 | 96 | return total_flops 97 | 98 | 99 | def compute_adaptivepool_flops(module, input, output): 100 | # credits: https://github.com/xternalz/SDPoint/blob/master/utils/flops.py 101 | batch_size = input.size(0) 102 | input_planes = input.size(1) 103 | input_height = input.size(2) 104 | input_width = input.size(3) 105 | 106 | flops = 0 107 | for i in range(output.size(2)): 108 | y_start = int(math.floor(float(i * input_height) / output.size(2))) 109 | y_end = int(math.ceil(float((i + 1) * input_height) / output.size(2))) 110 | for j in range(output.size(3)): 111 | x_start = int(math.floor(float(j * input_width) / output.size(3))) 112 | x_end = int( 113 | math.ceil(float((j + 1) * input_width) / output.size(3))) 114 | 115 | flops += batch_size * input_planes * (y_end - y_start + 1) * ( 116 | x_end - x_start + 1) 117 | return flops 118 | 119 | 120 | def compute_BatchNorm2d_flops(module, inp, out): 121 | assert isinstance(module, nn.BatchNorm2d) 122 | assert len(inp.size()) == 4 and len(inp.size()) == len(out.size()) 123 | in_c, in_h, in_w = inp.size()[1:] 124 | batch_flops = np.prod(inp.shape) 125 | if module.affine: 126 | batch_flops *= 2 127 | return batch_flops 128 | 129 | 130 | def compute_ReLU_flops(module, inp, out): 131 | assert isinstance(module, 132 | (nn.ReLU, nn.ReLU6, nn.PReLU, nn.ELU, nn.LeakyReLU)) 133 | batch_size = inp.size()[0] 134 | active_elements_count = batch_size 135 | 136 | for s in inp.size()[1:]: 137 | active_elements_count *= s 138 | 139 | return active_elements_count 140 | 141 | 142 | def compute_Pool2d_flops(module, input, out): 143 | batch_size = input.size(0) 144 | input_planes = input.size(1) 145 | input_height = input.size(2) 146 | input_width = input.size(3) 147 | kernel_size = ('int' in str(type(module.kernel_size))) and [ 148 | module.kernel_size, module.kernel_size 149 | ] or module.kernel_size 150 | kernel_ops = kernel_size[0] * kernel_size[1] 151 | stride = ('int' in str(type( 152 | module.stride))) and [module.stride, module.stride] or module.stride 153 | padding = ('int' in str(type(module.padding))) and [ 154 | module.padding, module.padding 155 | ] or module.padding 156 | 157 | output_width = math.floor((input_width + 2 * padding[0] - kernel_size[0]) / 158 | float(stride[0]) + 1) 159 | output_height = math.floor( 160 | (input_height + 2 * padding[1] - kernel_size[1]) / float(stride[0]) + 161 | 1) 162 | return batch_size * input_planes * output_width * output_height * kernel_ops 163 | 164 | 165 | def compute_Linear_flops(module, inp, out): 166 | assert isinstance(module, nn.Linear) 167 | assert len(inp.size()) == 2 and len(out.size()) == 2 168 | batch_size = inp.size()[0] 169 | return batch_size * inp.size()[1] * out.size()[1] 170 | 171 | 172 | def compute_Upsample_flops(module, inp, out): 173 | assert isinstance(module, nn.Upsample) 174 | output_size = out[0] 175 | batch_size = inp.size()[0] 176 | output_elements_count = batch_size 177 | for s in output_size.shape[1:]: 178 | output_elements_count *= s 179 | 180 | return output_elements_count 181 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # x-hrnet 2 | Official code for "X-HRNet: Towards Lightweight Human Pose Estimation with Spatially Unidimensional Self-Attention" 3 | 4 | ## Enviroment 5 | The code is developed using python 3.8 on Ubuntu 20.04. NVIDIA GPUs are needed. The code is developed and tested using 8 NVIDIA V100S GPU cards. Other platforms or GPU cards are not fully tested. 6 | ## Quick Start 7 | 8 | ### Requirements 9 | 10 | - Linux (Windows is not officially supported) 11 | - Python 3.8 12 | - PyTorch 1.8 13 | - CUDA 11.1 14 | - GCC 5+ 15 | - [mmcv](https://github.com/open-mmlab/mmcv) (Please install the latest version of mmcv-full) 16 | - Numpy 17 | - cv2 18 | - json_tricks 19 | - [xtcocotools](https://github.com/jin-s13/xtcocoapi) 20 | 21 | 22 | ### Installation 23 | 25 | 26 | a. Install mmcv, we recommend you to install the pre-build mmcv as below. 27 | 28 | ```shell 29 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/{cu_version}/{torch_version}/index.html 30 | ``` 31 | 32 | Please replace ``{cu_version}`` and ``{torch_version}`` in the url to your desired one. For example, to install the latest ``mmcv-full`` with ``CUDA 11`` and ``PyTorch 1.8.0``, use the following command: 33 | 34 | ```shell 35 | pip install mmcv-full -f https://download.openmmlab.com/mmcv/dist/cu111/torch1.8.0/index.html 36 | ``` 37 | 38 | If it compiles during installation, then please check that the cuda version and pytorch version **exactly"" matches the version in the mmcv-full installation command. For example, pytorch 1.7.0 and 1.7.1 are treated differently. 39 | See [here](https://github.com/open-mmlab/mmcv#installation) for different versions of MMCV compatible to different PyTorch and CUDA versions. 40 | 41 | you can compile mmcv from source by the following command 42 | 43 | ```shell 44 | pip install mmcv-full==1.3.9 45 | # alternative: pip install mmcv 46 | ``` 47 | **Important:** You need to run `pip uninstall mmcv` first if you have mmcv installed. If mmcv and mmcv-full are both installed, there will be `ModuleNotFoundError`. 48 | 49 | b. Install build requirements 50 | 51 | ```shell 52 | pip install -r requirements.txt 53 | ``` 54 | 55 | ### Prepare datasets 56 | 57 | It is recommended to symlink the dataset root to `$LITE_HRNET/data`. 58 | If your folder structure is different, you may need to change the corresponding paths in config files. 59 | 60 | **For COCO data**, please download from [COCO download](http://cocodataset.org/#download), 2017 Train/Val is needed for COCO keypoints training and validation. [HRNet-Human-Pose-Estimation](https://github.com/HRNet/HRNet-Human-Pose-Estimation) provides person detection result of COCO val2017 to reproduce our multi-person pose estimation results. Please download from [OneDrive](https://1drv.ms/f/s!AhIXJn_J-blWzzDXoz5BeFl8sWM-) 61 | Download and extract them under `$LITE_HRNET/data`, and make them look like this: 62 | 63 | ``` 64 | lite_hrnet 65 | ├── configs 66 | ├── models 67 | ├── tools 68 | `── data 69 | │── coco 70 | │-- annotations 71 | │ │-- person_keypoints_train2017.json 72 | │ |-- person_keypoints_val2017.json 73 | |-- person_detection_results 74 | | |-- COCO_val2017_detections_AP_H_56_person.json 75 | │-- train2017 76 | │ │-- 000000000009.jpg 77 | │ │-- 000000000025.jpg 78 | │ │-- 000000000030.jpg 79 | │ │-- ... 80 | `-- val2017 81 | │-- 000000000139.jpg 82 | │-- 000000000285.jpg 83 | │-- 000000000632.jpg 84 | │-- ... 85 | 86 | ``` 87 | 88 | ## Training and Testing 89 | All outputs (log files and checkpoints) will be saved to the working directory, 90 | which is specified by `work_dir` in the config file. 91 | 92 | By default we evaluate the model on the validation set after each epoch, you can change the evaluation interval by modifying the interval argument in the training config 93 | 94 | ```python 95 | evaluation = dict(interval=5) # This evaluate the model per 5 epoch. 96 | ``` 97 | 98 | According to the [Linear Scaling Rule](https://arxiv.org/abs/1706.02677), you need to set the learning rate proportional to the batch size if you use different GPUs or videos per GPU, e.g., lr=0.01 for 4 GPUs x 2 video/gpu and lr=0.08 for 16 GPUs x 4 video/gpu. 99 | 100 | ### Training 101 | 102 | ```shell 103 | # train with a signle GPU 104 | python tools/train.py ${CONFIG_FILE} [optional arguments] 105 | 106 | # train with multiple GPUs 107 | ./tools/dist_train.sh ${CONFIG_FILE} ${GPU_NUM} [optional arguments] 108 | ``` 109 | 110 | Optional arguments are: 111 | 112 | - `--validate` (**strongly recommended**): Perform evaluation at every k (default value is 5 epochs during the training. 113 | - `--work-dir ${WORK_DIR}`: Override the working directory specified in the config file. 114 | - `--resume-from ${CHECKPOINT_FILE}`: Resume from a previous checkpoint file. 115 | - `--gpus ${GPU_NUM}`: Number of gpus to use, which is only applicable to non-distributed training. 116 | - `--seed ${SEED}`: Seed id for random state in python, numpy and pytorch to generate random numbers. 117 | - `--deterministic`: If specified, it will set deterministic options for CUDNN backend. 118 | - `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode. 119 | - `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0. 120 | - `--autoscale-lr`: If specified, it will automatically scale lr with the number of gpus by [Linear Scaling Rule](https://arxiv.org/abs/1706.02677). 121 | 122 | Difference between `resume-from` and `load-from`: 123 | `resume-from` loads both the model weights and optimizer status, and the epoch is also inherited from the specified checkpoint. It is usually used for resuming the training process that is interrupted accidentally. 124 | `load-from` only loads the model weights and the training epoch starts from 0. It is usually used for finetuning. 125 | 126 | Examples: 127 | 128 | #### Training on COCO train2017 dataset 129 | ```shell 130 | ./tools/dist_train.sh configs/xhrnet/sxhrnet_18_coco_256x192.py 8 131 | ``` 132 | 133 | ### Testing 134 | You can use the following commands to test a dataset. 135 | 136 | ```shell 137 | # single-gpu testing 138 | python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [--eval ${EVAL_METRIC}] \ 139 | [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \ 140 | [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}] 141 | 142 | # multiple-gpu testing 143 | ./tools/dist_test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRIC}] \ 144 | [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \ 145 | [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}] 146 | ``` 147 | 148 | Optional arguments: 149 | 150 | - `RESULT_FILE`: Filename of the output results. If not specified, the results will not be saved to a file. 151 | - `EVAL_METRIC`: Items to be evaluated on the results. Allowed values depend on the dataset. 152 | - `NUM_PROC_PER_GPU`: Number of processes per GPU. If not specified, only one process will be assigned for a single gpu. 153 | - `--gpu_collect`: If specified, recognition results will be collected using gpu communication. Otherwise, it will save the results on different gpus to `TMPDIR` and collect them by the rank 0 worker. 154 | - `TMPDIR`: Temporary directory used for collecting results from multiple workers, available when `--gpu_collect` is not specified. 155 | - `AVG_TYPE`: Items to average the test clips. If set to `prob`, it will apply softmax before averaging the clip scores. Otherwise, it will directly average the clip scores. 156 | - `JOB_LAUNCHER`: Items for distributed job initialization launcher. Allowed choices are `none`, `pytorch`, `slurm`, `mpi`. Especially, if set to none, it will test in a non-distributed mode. 157 | - `LOCAL_RANK`: ID for local rank. If not specified, it will be set to 0. 158 | 159 | Examples: 160 | #### Test SX-HRNet-18 on COCO with 8 GPUS, and evaluate the mAP. 161 | 162 | ```shell 163 | ./tools/dist_test.sh configs/xhrnet/sxhrnet_18_coco_256x192.py \ 164 | checkpoints/SOME_CHECKPOINT.pth 8 \ 165 | --eval mAP 166 | ``` 167 | 168 | ### Get the compulationaly complexity 169 | You can use the following commands to compute the complexity of one model. 170 | ```shell 171 | python tools/summary_network.py ${CONFIG_FILE} --shape ${SHAPE} 172 | ``` 173 | 174 | Arguments: 175 | 176 | - `SHAPE`: Input size. 177 | 178 | Examples: 179 | 180 | #### Test the complexity of LiteHRNet-18 with 256x256 resolution input. 181 | 182 | ```shell 183 | python tools/summary_network.py configs/xhrnet/sxhrnet_18_coco_256x192.py --shape 256 256 184 | ``` 185 | 186 | ## Acknowledgement 187 | 188 | Thanks to: 189 | 190 | - [MMPose](https://github.com/open-mmlab/mmpose) 191 | - [HRNet](https://github.com/HRNet/deep-high-resolution-net.pytorch) 192 | - [Lite-HRNet](https://github.com/HRNet/Lite-HRNet) 193 | 194 | ## Citation 195 | 196 | If you use our code or models in your research, please cite with: 197 | ``` 198 | @inproceedings{xuan2022xhrnet, 199 | title={X-HRNet: Towards Lightweight Human Pose Estimation with Spatially Unidimensional Self-Attention}, 200 | author={Zhou, Yixuan and Wang, Xuanhan and Xu, Xing and Zhao, Lei and Song, Jingkuan}, 201 | booktitle={ICME}, 202 | year={2022} 203 | } 204 | ``` -------------------------------------------------------------------------------- /models/backbones/xhrnet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from mmcv.cnn import (ConvModule, DepthwiseSeparableConvModule, 5 | build_conv_layer, build_norm_layer, constant_init, 6 | normal_init) 7 | from torch.nn.modules.batchnorm import _BatchNorm 8 | import torch.utils.checkpoint as cp 9 | 10 | from mmpose.utils import get_root_logger 11 | from mmpose.models import BACKBONES 12 | from mmpose.models.backbones.resnet import BasicBlock, Bottleneck 13 | from mmpose.models.backbones.utils import load_checkpoint, channel_shuffle 14 | 15 | 16 | class HSUSA(nn.Module): 17 | 18 | def __init__(self, 19 | channels, 20 | ln_enable=True): 21 | super().__init__() 22 | 23 | self.conv = ConvModule( 24 | channels, 25 | channels, 26 | kernel_size=1, 27 | stride=1, 28 | padding=0, 29 | conv_cfg=dict(type='Conv2d'), 30 | act_cfg=None) 31 | self.mean_conv = ConvModule( 32 | channels, 33 | channels, 34 | groups=channels, 35 | kernel_size=1, 36 | stride=1, 37 | padding=0, 38 | conv_cfg=dict(type='Conv2d'), 39 | act_cfg=None) 40 | self.group_weight = nn.Parameter(torch.ones([1, 1, 96, 1]), requires_grad=True) 41 | 42 | self.ln_enable = ln_enable 43 | if ln_enable: 44 | self.ln = nn.LayerNorm(channels) 45 | self.sigmoid = nn.Sigmoid() 46 | 47 | def forward(self, x: torch.Tensor): 48 | 49 | h, w = x.shape[2:] 50 | xq = self.mean_conv((x * self.group_weight[:, :, :h]).mean(-2, keepdim=True)) # n,c,1,w 51 | xq = xq.transpose(2, 3).softmax(-2) # n,c,w,1 52 | xv = x # n,c,h,w 53 | 54 | atten = torch.matmul(xv, xq) # n,c,h,1 55 | atten = self.conv(atten) 56 | if self.ln_enable: 57 | atten = self.ln(atten.transpose(1, -1)).transpose(1, -1) 58 | atten = self.sigmoid(atten) 59 | 60 | return x * atten 61 | 62 | 63 | class WSUSA(nn.Module): 64 | 65 | def __init__(self, 66 | channels, 67 | ln_enable=True): 68 | super().__init__() 69 | 70 | self.conv = ConvModule( 71 | channels, 72 | channels, 73 | kernel_size=1, 74 | stride=1, 75 | padding=0, 76 | conv_cfg=dict(type='Conv2d'), 77 | act_cfg=None) 78 | self.mean_conv = ConvModule( 79 | channels, 80 | channels, 81 | groups=channels, 82 | kernel_size=1, 83 | stride=1, 84 | padding=0, 85 | conv_cfg=dict(type='Conv2d'), 86 | act_cfg=None) 87 | self.group_weight = nn.Parameter(torch.ones([1, 1, 1, 72]), requires_grad=True) 88 | self.ln_enable = ln_enable 89 | if ln_enable: 90 | self.ln = nn.LayerNorm(channels) 91 | self.sigmoid = nn.Sigmoid() 92 | 93 | def forward(self, x: torch.Tensor): 94 | 95 | h, w = x.shape[2:] 96 | xq = self.mean_conv((x * self.group_weight[:, :, :, :w]).mean(-1, keepdim=True)) # n,c,h,1 97 | xq = xq.softmax(-2) # n,c,h,1 98 | xv = x.transpose(2, 3) # n,c,w,h 99 | 100 | atten = torch.matmul(xv, xq).transpose(2, 3) # n,c,w,1 101 | atten = self.conv(atten) 102 | if self.ln_enable: 103 | atten = self.ln(atten.transpose(1, -1)).transpose(1, -1) 104 | atten = self.sigmoid(atten) 105 | 106 | return x * atten 107 | 108 | 109 | class HWSUSA(nn.Module): 110 | 111 | def __init__(self, 112 | channels, 113 | mode=None, 114 | ln_enable=False, 115 | hw_shuffle=False): 116 | super().__init__() 117 | assert mode in ['hw', 'wh', None] 118 | 119 | self.mode = mode 120 | self.hw_shuffle = hw_shuffle 121 | self.shuffle_groups = 4 122 | if mode == 'hw': 123 | h_channels = channels // 4 * 3 124 | w_channels = channels // 4 125 | elif mode == 'wh': 126 | h_channels = channels // 4 127 | w_channels = channels // 4 * 3 128 | else: 129 | h_channels = w_channels = channels // 2 130 | self.shuffle_groups = 2 131 | self.h_channels = h_channels 132 | self.w_channels = w_channels 133 | 134 | self.hconv = HSUSA(self.h_channels, ln_enable) 135 | self.wconv = WSUSA(self.w_channels, ln_enable) 136 | 137 | def forward(self, x): 138 | x1, x2 = x.split([self.h_channels, self.w_channels], dim=1) 139 | out = torch.cat([self.hconv(x1), self.wconv(x2)], dim=1) 140 | if self.hw_shuffle: 141 | out = channel_shuffle(out, self.shuffle_groups) 142 | 143 | return out 144 | 145 | 146 | class SXShuffleUnit(nn.Module): 147 | 148 | def __init__(self, 149 | in_channels, 150 | out_channels, 151 | stride=1, 152 | conv_cfg=None, 153 | norm_cfg=dict(type='BN'), 154 | act_cfg=None, 155 | with_cb=False, 156 | with_cp=False): 157 | super().__init__() 158 | self.stride = stride 159 | self.with_cb = with_cb 160 | self.with_cp = with_cp 161 | 162 | branch_features = out_channels // 2 163 | if self.stride == 1: 164 | assert in_channels == branch_features * 2, ( 165 | f'in_channels ({in_channels}) should equal to ' 166 | f'branch_features * 2 ({branch_features * 2}) ' 167 | 'when stride is 1') 168 | 169 | if in_channels != branch_features * 2: 170 | assert self.stride != 1, ( 171 | f'stride ({self.stride}) should not equal 1 when ' 172 | f'in_channels != branch_features * 2') 173 | 174 | self.branch2 = nn.Sequential( 175 | WSUSA(branch_features, ln_enable=True), 176 | ConvModule( 177 | branch_features, 178 | branch_features, 179 | kernel_size=3, 180 | stride=1, 181 | padding=1, 182 | groups=branch_features, 183 | conv_cfg=conv_cfg, 184 | norm_cfg=norm_cfg, 185 | act_cfg=None), 186 | HSUSA(branch_features, ln_enable=True)) 187 | 188 | def forward(self, x): 189 | 190 | def _inner_forward(x): 191 | x1, x2 = x.chunk(2, dim=1) 192 | out = torch.cat((x1, self.branch2(x2)), dim=1) 193 | 194 | out = channel_shuffle(out, 2) 195 | if self.with_cb: 196 | out = self.cb(out) 197 | 198 | return out 199 | 200 | if self.with_cp and x.requires_grad: 201 | out = cp.checkpoint(_inner_forward, x) 202 | else: 203 | out = _inner_forward(x) 204 | 205 | return out 206 | 207 | 208 | class PXShuffleUnit(nn.Module): 209 | 210 | def __init__(self, 211 | in_channels, 212 | out_channels, 213 | stride=1, 214 | conv_cfg=None, 215 | norm_cfg=dict(type='BN'), 216 | act_cfg=None, 217 | with_cb=False, 218 | with_cp=False): 219 | super().__init__() 220 | self.stride = stride 221 | self.with_cb = with_cb 222 | self.with_cp = with_cp 223 | 224 | branch_features = out_channels // 2 225 | if self.stride == 1: 226 | assert in_channels == branch_features * 2, ( 227 | f'in_channels ({in_channels}) should equal to ' 228 | f'branch_features * 2 ({branch_features * 2}) ' 229 | 'when stride is 1') 230 | 231 | if in_channels != branch_features * 2: 232 | assert self.stride != 1, ( 233 | f'stride ({self.stride}) should not equal 1 when ' 234 | f'in_channels != branch_features * 2') 235 | 236 | self.branch2 = nn.Sequential( 237 | HWSUSA( 238 | branch_features, 239 | ln_enable=True, 240 | hw_shuffle=True), 241 | ConvModule( 242 | branch_features, 243 | branch_features, 244 | kernel_size=3, 245 | stride=1, 246 | padding=1, 247 | groups=branch_features, 248 | conv_cfg=conv_cfg, 249 | norm_cfg=norm_cfg, 250 | act_cfg=None), 251 | HWSUSA( 252 | branch_features, 253 | ln_enable=True, 254 | hw_shuffle=True)) 255 | 256 | def forward(self, x): 257 | 258 | def _inner_forward(x): 259 | x1, x2 = x.chunk(2, dim=1) 260 | out = torch.cat((x1, self.branch2(x2)), dim=1) 261 | 262 | out = channel_shuffle(out, 2) 263 | if self.with_cb: 264 | out = self.cb(out) 265 | 266 | return out 267 | 268 | if self.with_cp and x.requires_grad: 269 | out = cp.checkpoint(_inner_forward, x) 270 | else: 271 | out = _inner_forward(x) 272 | 273 | return out 274 | 275 | 276 | class Stem(nn.Module): 277 | 278 | def __init__(self, 279 | in_channels, 280 | stem_channels, 281 | out_channels, 282 | expand_ratio, 283 | conv_cfg=None, 284 | norm_cfg=dict(type='BN'), 285 | with_cp=False): 286 | super().__init__() 287 | self.in_channels = in_channels 288 | self.out_channels = out_channels 289 | self.conv_cfg = conv_cfg 290 | self.norm_cfg = norm_cfg 291 | self.with_cp = with_cp 292 | 293 | self.conv1 = ConvModule( 294 | in_channels=in_channels, 295 | out_channels=stem_channels, 296 | kernel_size=3, 297 | stride=2, 298 | padding=1, 299 | conv_cfg=self.conv_cfg, 300 | norm_cfg=self.norm_cfg, 301 | act_cfg=dict(type='ReLU')) 302 | 303 | mid_channels = int(round(stem_channels * expand_ratio)) 304 | branch_channels = stem_channels // 2 305 | if stem_channels == self.out_channels: 306 | inc_channels = self.out_channels - branch_channels 307 | else: 308 | inc_channels = self.out_channels - stem_channels 309 | 310 | self.branch1 = nn.Sequential( 311 | ConvModule( 312 | branch_channels, 313 | branch_channels, 314 | kernel_size=3, 315 | stride=2, 316 | padding=1, 317 | groups=branch_channels, 318 | conv_cfg=conv_cfg, 319 | norm_cfg=norm_cfg, 320 | act_cfg=None), 321 | ConvModule( 322 | branch_channels, 323 | inc_channels, 324 | kernel_size=1, 325 | stride=1, 326 | padding=0, 327 | conv_cfg=conv_cfg, 328 | norm_cfg=norm_cfg, 329 | act_cfg=dict(type='ReLU')), 330 | ) 331 | 332 | self.branch2 = nn.Sequential( 333 | ConvModule( 334 | branch_channels, 335 | mid_channels, 336 | kernel_size=1, 337 | stride=1, 338 | padding=0, 339 | conv_cfg=conv_cfg, 340 | norm_cfg=norm_cfg, 341 | act_cfg=dict(type='ReLU')), 342 | ConvModule( 343 | mid_channels, 344 | mid_channels, 345 | kernel_size=3, 346 | stride=2, 347 | padding=1, 348 | groups=mid_channels, 349 | conv_cfg=conv_cfg, 350 | norm_cfg=norm_cfg, 351 | act_cfg=None), 352 | ConvModule( 353 | mid_channels, 354 | branch_channels 355 | if stem_channels == self.out_channels else stem_channels, 356 | kernel_size=1, 357 | stride=1, 358 | padding=0, 359 | conv_cfg=conv_cfg, 360 | norm_cfg=norm_cfg, 361 | act_cfg=dict(type='ReLU'))) 362 | 363 | def forward(self, x): 364 | 365 | def _inner_forward(x): 366 | x = self.conv1(x) 367 | x1, x2 = x.chunk(2, dim=1) 368 | 369 | out = torch.cat((self.branch1(x1), self.branch2(x2)), dim=1) 370 | out = channel_shuffle(out, 2) 371 | 372 | return out 373 | 374 | if self.with_cp and x.requires_grad: 375 | out = cp.checkpoint(_inner_forward, x) 376 | else: 377 | out = _inner_forward(x) 378 | 379 | return out 380 | 381 | 382 | class IterativeHead(nn.Module): 383 | 384 | def __init__(self, in_channels, conv_cfg=None, norm_cfg=dict(type='BN')): 385 | super().__init__() 386 | projects = [] 387 | num_branchs = len(in_channels) 388 | self.in_channels = in_channels[::-1] 389 | 390 | for i in range(num_branchs): 391 | if i != num_branchs - 1: 392 | projects.append( 393 | DepthwiseSeparableConvModule( 394 | in_channels=self.in_channels[i], 395 | out_channels=self.in_channels[i + 1], 396 | kernel_size=3, 397 | stride=1, 398 | padding=1, 399 | norm_cfg=norm_cfg, 400 | act_cfg=dict(type='ReLU'), 401 | dw_act_cfg=None, 402 | pw_act_cfg=dict(type='ReLU'))) 403 | else: 404 | projects.append( 405 | DepthwiseSeparableConvModule( 406 | in_channels=self.in_channels[i], 407 | out_channels=self.in_channels[i], 408 | kernel_size=3, 409 | stride=1, 410 | padding=1, 411 | norm_cfg=norm_cfg, 412 | act_cfg=dict(type='ReLU'), 413 | dw_act_cfg=None, 414 | pw_act_cfg=dict(type='ReLU'))) 415 | self.projects = nn.ModuleList(projects) 416 | 417 | def forward(self, x): 418 | x = x[::-1] 419 | 420 | y = [] 421 | last_x = None 422 | for i, s in enumerate(x): 423 | if last_x is not None: 424 | last_x = F.interpolate( 425 | last_x, 426 | size=s.size()[-2:], 427 | mode='bilinear', 428 | align_corners=True) 429 | s = s + last_x 430 | s = self.projects[i](s) 431 | y.append(s) 432 | last_x = s 433 | 434 | return y[::-1] 435 | 436 | 437 | class ShuffleUnit(nn.Module): 438 | """InvertedResidual block for ShuffleNetV2 backbone. 439 | Args: 440 | in_channels (int): The input channels of the block. 441 | out_channels (int): The output channels of the block. 442 | stride (int): Stride of the 3x3 convolution layer. Default: 1 443 | conv_cfg (dict): Config dict for convolution layer. 444 | Default: None, which means using conv2d. 445 | norm_cfg (dict): Config dict for normalization layer. 446 | Default: dict(type='BN'). 447 | act_cfg (dict): Config dict for activation layer. 448 | Default: dict(type='ReLU'). 449 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 450 | memory while slowing down the training speed. Default: False. 451 | """ 452 | 453 | def __init__(self, 454 | in_channels, 455 | out_channels, 456 | stride=1, 457 | conv_cfg=None, 458 | norm_cfg=dict(type='BN'), 459 | act_cfg=dict(type='ReLU'), 460 | with_cp=False): 461 | super().__init__() 462 | self.stride = stride 463 | self.with_cp = with_cp 464 | 465 | branch_features = out_channels // 2 466 | if self.stride == 1: 467 | assert in_channels == branch_features * 2, ( 468 | f'in_channels ({in_channels}) should equal to ' 469 | f'branch_features * 2 ({branch_features * 2}) ' 470 | 'when stride is 1') 471 | 472 | if in_channels != branch_features * 2: 473 | assert self.stride != 1, ( 474 | f'stride ({self.stride}) should not equal 1 when ' 475 | f'in_channels != branch_features * 2') 476 | 477 | if self.stride > 1: 478 | self.branch1 = nn.Sequential( 479 | ConvModule( 480 | in_channels, 481 | in_channels, 482 | kernel_size=3, 483 | stride=self.stride, 484 | padding=1, 485 | groups=in_channels, 486 | conv_cfg=conv_cfg, 487 | norm_cfg=norm_cfg, 488 | act_cfg=None), 489 | ConvModule( 490 | in_channels, 491 | branch_features, 492 | kernel_size=1, 493 | stride=1, 494 | padding=0, 495 | conv_cfg=conv_cfg, 496 | norm_cfg=norm_cfg, 497 | act_cfg=act_cfg), 498 | ) 499 | 500 | self.branch2 = nn.Sequential( 501 | ConvModule( 502 | in_channels if (self.stride > 1) else branch_features, 503 | branch_features, 504 | kernel_size=1, 505 | stride=1, 506 | padding=0, 507 | conv_cfg=conv_cfg, 508 | norm_cfg=norm_cfg, 509 | act_cfg=act_cfg), 510 | ConvModule( 511 | branch_features, 512 | branch_features, 513 | kernel_size=3, 514 | stride=self.stride, 515 | padding=1, 516 | groups=branch_features, 517 | conv_cfg=conv_cfg, 518 | norm_cfg=norm_cfg, 519 | act_cfg=None), 520 | ConvModule( 521 | branch_features, 522 | branch_features, 523 | kernel_size=1, 524 | stride=1, 525 | padding=0, 526 | conv_cfg=conv_cfg, 527 | norm_cfg=norm_cfg, 528 | act_cfg=act_cfg)) 529 | 530 | def forward(self, x): 531 | 532 | def _inner_forward(x): 533 | if self.stride > 1: 534 | out = torch.cat((self.branch1(x), self.branch2(x)), dim=1) 535 | else: 536 | x1, x2 = x.chunk(2, dim=1) 537 | out = torch.cat((x1, self.branch2(x2)), dim=1) 538 | 539 | out = channel_shuffle(out, 2) 540 | 541 | return out 542 | 543 | if self.with_cp and x.requires_grad: 544 | out = cp.checkpoint(_inner_forward, x) 545 | else: 546 | out = _inner_forward(x) 547 | 548 | return out 549 | 550 | 551 | class LiteHRModule(nn.Module): 552 | 553 | def __init__( 554 | self, 555 | num_branches, 556 | num_blocks, 557 | in_channels, 558 | module_type, 559 | multiscale_output=False, 560 | with_fuse=True, 561 | conv_cfg=None, 562 | norm_cfg=dict(type='BN'), 563 | with_cp=False, 564 | ): 565 | super().__init__() 566 | self._check_branches(num_branches, in_channels) 567 | 568 | self.in_channels = in_channels 569 | self.num_branches = num_branches 570 | 571 | self.multiscale_output = multiscale_output 572 | self.with_fuse = with_fuse 573 | self.norm_cfg = norm_cfg 574 | self.conv_cfg = conv_cfg 575 | self.with_cp = with_cp 576 | 577 | if module_type == 'naive': 578 | block = ShuffleUnit 579 | elif module_type == 'sequential': 580 | block = SXShuffleUnit 581 | elif module_type == 'parallel': 582 | block = PXShuffleUnit 583 | else: 584 | print('Not support') 585 | exit() 586 | self.layers = self._make_branches(num_branches, num_blocks, block) 587 | if self.with_fuse: 588 | self.fuse_layers = self._make_fuse_layers() 589 | self.relu = nn.ReLU() 590 | 591 | def _check_branches(self, num_branches, in_channels): 592 | """Check input to avoid ValueError.""" 593 | if num_branches != len(in_channels): 594 | error_msg = f'NUM_BRANCHES({num_branches}) ' \ 595 | f'!= NUM_INCHANNELS({len(in_channels)})' 596 | raise ValueError(error_msg) 597 | 598 | def _make_one_branch(self, branch_index, num_blocks, block): 599 | """Make one branch.""" 600 | layers = [] 601 | for i in range(num_blocks): 602 | layers.append( 603 | block( 604 | self.in_channels[branch_index], 605 | self.in_channels[branch_index], 606 | stride=1, 607 | conv_cfg=self.conv_cfg, 608 | norm_cfg=self.norm_cfg, 609 | act_cfg=dict(type='ReLU'), 610 | with_cp=self.with_cp)) 611 | 612 | return nn.Sequential(*layers) 613 | 614 | def _make_branches(self, num_branches, num_blocks, block): 615 | """Make branches.""" 616 | branches = [] 617 | 618 | for i in range(num_branches): 619 | branches.append(self._make_one_branch(i, num_blocks, block)) 620 | 621 | return nn.ModuleList(branches) 622 | 623 | def _make_fuse_layers(self): 624 | """Make fuse layer.""" 625 | if self.num_branches == 1: 626 | return None 627 | 628 | num_branches = self.num_branches 629 | in_channels = self.in_channels 630 | fuse_layers = [] 631 | num_out_branches = num_branches if self.multiscale_output else 1 632 | for i in range(num_out_branches): 633 | fuse_layer = [] 634 | for j in range(num_branches): 635 | if j > i: 636 | fuse_layer.append( 637 | nn.Sequential( 638 | build_conv_layer( 639 | self.conv_cfg, 640 | in_channels[j], 641 | in_channels[i], 642 | kernel_size=1, 643 | stride=1, 644 | padding=0, 645 | bias=False), 646 | build_norm_layer(self.norm_cfg, in_channels[i])[1], 647 | nn.Upsample( 648 | scale_factor=2**(j - i), mode='nearest'))) 649 | elif j == i: 650 | fuse_layer.append(None) 651 | else: 652 | conv_downsamples = [] 653 | for k in range(i - j): 654 | if k == i - j - 1: 655 | conv_downsamples.append( 656 | nn.Sequential( 657 | build_conv_layer( 658 | self.conv_cfg, 659 | in_channels[j], 660 | in_channels[j], 661 | kernel_size=3, 662 | stride=2, 663 | padding=1, 664 | groups=in_channels[j], 665 | bias=False), 666 | build_norm_layer(self.norm_cfg, 667 | in_channels[j])[1], 668 | build_conv_layer( 669 | self.conv_cfg, 670 | in_channels[j], 671 | in_channels[i], 672 | kernel_size=1, 673 | stride=1, 674 | padding=0, 675 | bias=False), 676 | build_norm_layer(self.norm_cfg, 677 | in_channels[i])[1])) 678 | else: 679 | conv_downsamples.append( 680 | nn.Sequential( 681 | build_conv_layer( 682 | self.conv_cfg, 683 | in_channels[j], 684 | in_channels[j], 685 | kernel_size=3, 686 | stride=2, 687 | padding=1, 688 | groups=in_channels[j], 689 | bias=False), 690 | build_norm_layer(self.norm_cfg, 691 | in_channels[j])[1], 692 | build_conv_layer( 693 | self.conv_cfg, 694 | in_channels[j], 695 | in_channels[j], 696 | kernel_size=1, 697 | stride=1, 698 | padding=0, 699 | bias=False), 700 | build_norm_layer(self.norm_cfg, 701 | in_channels[j])[1], 702 | nn.ReLU(inplace=True))) 703 | 704 | fuse_layer.append(nn.Sequential(*conv_downsamples)) 705 | fuse_layers.append(nn.ModuleList(fuse_layer)) 706 | 707 | return nn.ModuleList(fuse_layers) 708 | 709 | def forward(self, x): 710 | """Forward function.""" 711 | if self.num_branches == 1: 712 | return [self.layers[0](x[0])] 713 | 714 | for i in range(self.num_branches): 715 | x[i] = self.layers[i](x[i]) 716 | out = x 717 | 718 | if self.with_fuse: 719 | out_fuse = [] 720 | for i in range(len(self.fuse_layers)): 721 | y = out[0] if i == 0 else self.fuse_layers[i][0](out[0]) 722 | for j in range(self.num_branches): 723 | if i == j: 724 | y += out[j] 725 | else: 726 | y += self.fuse_layers[i][j](out[j]) 727 | out_fuse.append(self.relu(y)) 728 | out = out_fuse 729 | elif not self.multiscale_output: 730 | out = [out[0]] 731 | return out 732 | 733 | 734 | @BACKBONES.register_module() 735 | class XHRNet(nn.Module): 736 | """Lite-HRNet backbone. 737 | `High-Resolution Representations for Labeling Pixels and Regions 738 | `_ 739 | Args: 740 | extra (dict): detailed configuration for each stage of HRNet. 741 | in_channels (int): Number of input image channels. Default: 3. 742 | conv_cfg (dict): dictionary to construct and config conv layer. 743 | norm_cfg (dict): dictionary to construct and config norm layer. 744 | norm_eval (bool): Whether to set norm layers to eval mode, namely, 745 | freeze running stats (mean and var). Note: Effect on Batch Norm 746 | and its variants only. Default: False 747 | with_cp (bool): Use checkpoint or not. Using checkpoint will save some 748 | memory while slowing down the training speed. 749 | zero_init_residual (bool): whether to use zero init for last norm layer 750 | in resblocks to let them behave as identity. 751 | Example: 752 | >>> from mmpose.models import HRNet 753 | >>> import torch 754 | >>> extra = dict( 755 | >>> stage1=dict( 756 | >>> num_modules=1, 757 | >>> num_branches=1, 758 | >>> block='BOTTLENECK', 759 | >>> num_blocks=(4, ), 760 | >>> num_channels=(64, )), 761 | >>> stage2=dict( 762 | >>> num_modules=1, 763 | >>> num_branches=2, 764 | >>> block='BASIC', 765 | >>> num_blocks=(4, 4), 766 | >>> num_channels=(32, 64)), 767 | >>> stage3=dict( 768 | >>> num_modules=4, 769 | >>> num_branches=3, 770 | >>> block='BASIC', 771 | >>> num_blocks=(4, 4, 4), 772 | >>> num_channels=(32, 64, 128)), 773 | >>> stage4=dict( 774 | >>> num_modules=3, 775 | >>> num_branches=4, 776 | >>> block='BASIC', 777 | >>> num_blocks=(4, 4, 4, 4), 778 | >>> num_channels=(32, 64, 128, 256))) 779 | >>> self = HRNet(extra, in_channels=1) 780 | >>> self.eval() 781 | >>> inputs = torch.rand(1, 1, 32, 32) 782 | >>> level_outputs = self.forward(inputs) 783 | >>> for level_out in level_outputs: 784 | ... print(tuple(level_out.shape)) 785 | (1, 32, 8, 8) 786 | (1, 64, 4, 4) 787 | (1, 128, 2, 2) 788 | (1, 256, 1, 1) 789 | """ 790 | 791 | def __init__(self, 792 | extra, 793 | in_channels=3, 794 | conv_cfg=None, 795 | norm_cfg=dict(type='BN'), 796 | norm_eval=False, 797 | with_cp=False, 798 | zero_init_residual=False): 799 | super().__init__() 800 | self.extra = extra 801 | self.conv_cfg = conv_cfg 802 | self.norm_cfg = norm_cfg 803 | self.norm_eval = norm_eval 804 | self.with_cp = with_cp 805 | self.zero_init_residual = zero_init_residual 806 | 807 | self.stem = Stem( 808 | in_channels, 809 | stem_channels=self.extra['stem']['stem_channels'], 810 | out_channels=self.extra['stem']['out_channels'], 811 | expand_ratio=self.extra['stem']['expand_ratio'], 812 | conv_cfg=self.conv_cfg, 813 | norm_cfg=self.norm_cfg) 814 | 815 | self.num_stages = self.extra['num_stages'] 816 | self.stages_spec = self.extra['stages_spec'] 817 | 818 | num_channels_last = [ 819 | self.stem.out_channels, 820 | ] 821 | for i in range(self.num_stages): 822 | num_channels = self.stages_spec['num_channels'][i] 823 | num_channels = [num_channels[i] for i in range(len(num_channels))] 824 | setattr( 825 | self, 'transition{}'.format(i), 826 | self._make_transition_layer(num_channels_last, num_channels)) 827 | 828 | stage, num_channels_last = self._make_stage( 829 | self.stages_spec, i, num_channels, multiscale_output=True) 830 | setattr(self, 'stage{}'.format(i), stage) 831 | 832 | self.with_head = self.extra['with_head'] 833 | if self.with_head: 834 | self.head_layer = IterativeHead( 835 | in_channels=num_channels_last, 836 | conv_cfg=self.conv_cfg, 837 | norm_cfg=self.norm_cfg, 838 | ) 839 | 840 | def _make_transition_layer(self, num_channels_pre_layer, 841 | num_channels_cur_layer): 842 | """Make transition layer.""" 843 | num_branches_cur = len(num_channels_cur_layer) 844 | num_branches_pre = len(num_channels_pre_layer) 845 | 846 | transition_layers = [] 847 | for i in range(num_branches_cur): 848 | if i < num_branches_pre: 849 | if num_channels_cur_layer[i] != num_channels_pre_layer[i]: 850 | transition_layers.append( 851 | nn.Sequential( 852 | build_conv_layer( 853 | self.conv_cfg, 854 | num_channels_pre_layer[i], 855 | num_channels_pre_layer[i], 856 | kernel_size=3, 857 | stride=1, 858 | padding=1, 859 | groups=num_channels_pre_layer[i], 860 | bias=False), 861 | build_norm_layer(self.norm_cfg, 862 | num_channels_pre_layer[i])[1], 863 | build_conv_layer( 864 | self.conv_cfg, 865 | num_channels_pre_layer[i], 866 | num_channels_cur_layer[i], 867 | kernel_size=1, 868 | stride=1, 869 | padding=0, 870 | bias=False), 871 | build_norm_layer(self.norm_cfg, 872 | num_channels_cur_layer[i])[1])) 873 | else: 874 | transition_layers.append(None) 875 | else: 876 | conv_downsamples = [] 877 | for j in range(i + 1 - num_branches_pre): 878 | in_channels = num_channels_pre_layer[-1] 879 | out_channels = num_channels_cur_layer[i] \ 880 | if j == i - num_branches_pre else in_channels 881 | conv_downsamples.append( 882 | nn.Sequential( 883 | build_conv_layer( 884 | self.conv_cfg, 885 | in_channels, 886 | in_channels, 887 | kernel_size=3, 888 | stride=2, 889 | padding=1, 890 | groups=in_channels, 891 | bias=False), 892 | build_norm_layer(self.norm_cfg, in_channels)[1], 893 | build_conv_layer( 894 | self.conv_cfg, 895 | in_channels, 896 | out_channels, 897 | kernel_size=1, 898 | stride=1, 899 | padding=0, 900 | bias=False), 901 | build_norm_layer(self.norm_cfg, out_channels)[1])) 902 | transition_layers.append(nn.Sequential(*conv_downsamples)) 903 | 904 | return nn.ModuleList(transition_layers) 905 | 906 | def _make_stage(self, 907 | stages_spec, 908 | stage_index, 909 | in_channels, 910 | multiscale_output=True): 911 | num_modules = stages_spec['num_modules'][stage_index] 912 | num_branches = stages_spec['num_branches'][stage_index] 913 | num_blocks = stages_spec['num_blocks'][stage_index] 914 | with_fuse = stages_spec['with_fuse'][stage_index] 915 | module_type = stages_spec['module_type'][stage_index] 916 | 917 | modules = [] 918 | for i in range(num_modules): 919 | # multi_scale_output is only used last module 920 | if not multiscale_output and i == num_modules - 1: 921 | reset_multiscale_output = False 922 | else: 923 | reset_multiscale_output = True 924 | 925 | modules.append( 926 | LiteHRModule( 927 | num_branches, 928 | num_blocks, 929 | in_channels, 930 | module_type, 931 | multiscale_output=reset_multiscale_output, 932 | with_fuse=with_fuse, 933 | conv_cfg=self.conv_cfg, 934 | norm_cfg=self.norm_cfg, 935 | with_cp=self.with_cp)) 936 | in_channels = modules[-1].in_channels 937 | 938 | return nn.Sequential(*modules), in_channels 939 | 940 | def init_weights(self, pretrained=None): 941 | """Initialize the weights in backbone. 942 | Args: 943 | pretrained (str, optional): Path to pre-trained weights. 944 | Defaults to None. 945 | """ 946 | if isinstance(pretrained, str): 947 | logger = get_root_logger() 948 | load_checkpoint(self, pretrained, strict=False, logger=logger) 949 | elif pretrained is None: 950 | for m in self.modules(): 951 | if isinstance(m, nn.Conv2d): 952 | normal_init(m, std=0.001) 953 | elif isinstance(m, (_BatchNorm, nn.GroupNorm)): 954 | constant_init(m, 1) 955 | 956 | if self.zero_init_residual: 957 | for m in self.modules(): 958 | if isinstance(m, Bottleneck): 959 | constant_init(m.norm3, 0) 960 | elif isinstance(m, BasicBlock): 961 | constant_init(m.norm2, 0) 962 | else: 963 | raise TypeError('pretrained must be a str or None') 964 | 965 | def forward(self, x): 966 | """Forward function.""" 967 | x = self.stem(x) 968 | 969 | y_list = [x] 970 | for i in range(self.num_stages): 971 | x_list = [] 972 | transition = getattr(self, 'transition{}'.format(i)) 973 | for j in range(self.stages_spec['num_branches'][i]): 974 | if transition[j]: 975 | if j >= len(y_list): 976 | x_list.append(transition[j](y_list[-1])) 977 | else: 978 | x_list.append(transition[j](y_list[j])) 979 | else: 980 | x_list.append(y_list[j]) 981 | y_list = getattr(self, 'stage{}'.format(i))(x_list) 982 | 983 | x = y_list 984 | if self.with_head: 985 | x = self.head_layer(x) 986 | 987 | return [x[0]] 988 | 989 | def train(self, mode=True): 990 | """Convert the model into training mode.""" 991 | super().train(mode) 992 | if mode and self.norm_eval: 993 | for m in self.modules(): 994 | if isinstance(m, _BatchNorm): 995 | m.eval() --------------------------------------------------------------------------------