├── README.md
├── data
    └── jaws_dolly_zoom_mask
    │   ├── img_0001.png
    │   ├── img_0002.png
    │   ├── img_0003.png
    │   ├── img_0004.png
    │   ├── img_0005.png
    │   ├── img_0006.png
    │   ├── img_0007.png
    │   ├── img_0008.png
    │   ├── img_0009.png
    │   ├── img_0010.png
    │   ├── img_0011.png
    │   ├── img_0012.png
    │   └── saved_data.pk
├── jaws
    ├── configs
    │   ├── batch_jaws.yaml
    │   ├── compnode
    │   │   ├── cpu_debug.yaml
    │   │   └── light_1n_1g_28b.yaml
    │   ├── datamodule
    │   │   └── jaws_dollyzoom.yaml
    │   ├── demo_jaws.yaml
    │   ├── jaws
    │   │   └── batch.yaml
    │   ├── model
    │   │   ├── dnerf.yaml
    │   │   ├── dnerf_rendering.yaml
    │   │   └── nerf.yaml
    │   ├── render_dnerf.yaml
    │   ├── render_jaws.yaml
    │   ├── train_dnerf.yaml
    │   └── train_nerf.yaml
    ├── run.py
    └── src
    │   ├── datamodules
    │       ├── datasets
    │       │   ├── dnerf_dataset.py
    │       │   └── nerf_dataset.py
    │       └── nerf_datamodule.py
    │   ├── infer.py
    │   ├── jaws.py
    │   ├── models
    │       ├── base_model.py
    │       ├── callbacks
    │       │   ├── early_stopping.py
    │       │   └── grad_norm.py
    │       ├── dnerf_model.py
    │       ├── jaws_model.py
    │       ├── metrics
    │       │   ├── angular_loss.py
    │       │   ├── pose_loss.py
    │       │   └── vgg_loss.py
    │       ├── modules
    │       │   ├── camera_transform.py
    │       │   ├── dnerf
    │       │   │   ├── network.py
    │       │   │   └── renderer.py
    │       │   ├── feature
    │       │   │   ├── flow_estimator.py
    │       │   │   └── raft.py
    │       │   ├── nerf
    │       │   │   ├── network.py
    │       │   │   ├── network_ff.py
    │       │   │   └── renderer.py
    │       │   └── nerf_factory.py
    │       └── nerf_model.py
    │   ├── render.py
    │   └── train.py
├── lib
    └── LitePose
    │   ├── _init_paths.py
    │   ├── experiments
    │       ├── coco
    │       │   ├── ddrnet
    │       │   │   └── ddrnet23s.yaml
    │       │   ├── higher_hrnet
    │       │   │   ├── w32_512_adam_lr1e-3.yaml
    │       │   │   ├── w32_640_adam_lr1e-3.yaml
    │       │   │   └── w48_640_adam_lr1e-3.yaml
    │       │   └── mobilenet
    │       │   │   ├── mobile.yaml
    │       │   │   └── supermobile.yaml
    │       └── crowd_pose
    │       │   ├── ddrnet
    │       │       └── ddrnet23s.yaml
    │       │   ├── efficient_hrnet
    │       │       ├── H-1.yaml
    │       │       ├── H-2.yaml
    │       │       ├── H-3.yaml
    │       │       └── H-4.yaml
    │       │   ├── higher_hrnet
    │       │       ├── w16_512_adam_lr1e-3.yaml
    │       │       ├── w32_512_adam_lr1e-3.yaml
    │       │       ├── w32_512_adam_lr1e-3_coco.yaml
    │       │       ├── w32_512_adam_lr1e-3_syncbn.yaml
    │       │       ├── w32_640_adam_lr1e-3.yaml
    │       │       └── w48_640_adam_lr1e-3.yaml
    │       │   ├── mobilenet
    │       │       ├── mobile.yaml
    │       │       └── supermobile.yaml
    │       │   ├── resnet
    │       │       ├── resnet.yaml
    │       │       └── superresnet.yaml
    │       │   └── simplenet
    │       │       └── simplenet.yaml
    │   ├── lib
    │       ├── arch_manager.py
    │       ├── config
    │       │   ├── __init__.py
    │       │   ├── default.py
    │       │   └── models.py
    │       ├── core
    │       │   ├── group.py
    │       │   ├── inference.py
    │       │   ├── loss.py
    │       │   └── trainer.py
    │       ├── dataset
    │       │   ├── COCODataset.py
    │       │   ├── COCOKeypoints.py
    │       │   ├── CrowdPoseDataset.py
    │       │   ├── CrowdPoseKeypoints.py
    │       │   ├── __init__.py
    │       │   ├── build.py
    │       │   ├── target_generators
    │       │   │   ├── __init__.py
    │       │   │   └── target_generators.py
    │       │   └── transforms
    │       │   │   ├── __init__.py
    │       │   │   ├── build.py
    │       │   │   └── transforms.py
    │       ├── fp16_utils
    │       │   ├── __init__.py
    │       │   ├── fp16_optimizer.py
    │       │   ├── fp16util.py
    │       │   └── loss_scaler.py
    │       ├── models
    │       │   ├── __init__.py
    │       │   ├── layers
    │       │   │   ├── efficient_blocks.py
    │       │   │   ├── layers.py
    │       │   │   └── super_layers.py
    │       │   ├── pose_efficient_hrnet.py
    │       │   ├── pose_higher_hrnet.py
    │       │   ├── pose_mobilenet.py
    │       │   ├── pose_resnet.py
    │       │   ├── pose_simplenet.py
    │       │   ├── pose_supermobilenet.py
    │       │   └── pose_superresnet.py
    │       └── utils
    │       │   ├── transforms.py
    │       │   ├── utils.py
    │       │   ├── vis.py
    │       │   └── zipreader.py
    │   └── mobile_configs
    │       ├── prune-L.json
    │       ├── prune-M.json
    │       ├── prune-S.json
    │       ├── search-L.json
    │       ├── search-M.json
    │       ├── search-S.json
    │       └── search-XS.json
├── requirements.txt
├── run_demo.py
└── utils
    ├── camera_utils.py
    ├── coord_utils.py
    ├── data_utils.py
    ├── file_utils.py
    ├── flow_utils.py
    ├── image_utils.py
    ├── loss_utils.py
    ├── misc_utils.py
    └── nerf_utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # JAWS: Just a Wild Shot for Cinematic Transfer in Neural Radiance Fields
 2 | 
 3 | By Xi Wang*, Robin Courant*, Jinglei Shi, Eric Marchand and Marc Christie
 4 | 
 5 | CVPR 2023
 6 | 
 7 | ### [Project Page](https://www.lix.polytechnique.fr/vista/projects/2023_cvpr_wang/) | [arXiv](https://arxiv.org/pdf/2303.15427.pdf) | [Paper + Supp](https://inria.hal.science/hal-04046701v1/file/main.pdf)
 8 | 
 9 | ## Installation
10 | 
11 | 1. Create working environment:
12 | ```
13 | conda create --name jaws -y python=3.10
14 | conda activate jaws
15 | ```
16 | 
17 | 2. Install dependencies (adapt it according to your CUDA version):
18 | ```
19 | conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.3 -c pytorch
20 | pip install -r requirements.txt
21 | ```
22 | 
23 | 3. Use the correct torch-ngp version:
24 | ```
25 | mkdir ./lib
26 | git clone git@github.com:ashawkey/torch-ngp.git
27 | mv torch-ngp torch_ngp
28 | cd torch_ngp
29 | git checkout 3c14ad5d1a8a36f8d36604d1bbd91515fb4416fa
30 | ln -s lib/torch_ngp dir_to/torch_ngp
31 | ```
32 | 
33 | 4. Download `LitePose` [checkpoints](https://drive.google.com/drive/folders/1Jlh-bmS85RDWuspZUG-ncWYA7F8iXsa_?usp=drive_link) and puth them in `lib/LitePose/ckpt`
34 | 
35 | 5. Download example dataset [flame_steak_frms_time](https://drive.google.com/file/d/15fO8J3G7k9X9cDb6LEorU60CdVnwMh1D/view?usp=drive_link) and put it in `./data`
36 | 
37 | # Usage
38 | 
39 | Train NeRF:
40 | ```
41 | python jaws/run.py --config-name train_nerf data_dir=/path/to/dataset  xp_name=xp_name datamodule=jaws_dollyzoom.yaml
42 | ```
43 | 
44 | Launch JAWS
45 | ```
46 | python jaws/run.py --config-name batch_jaws data_dir=path/to/data/dir/flame_steak_frms_time/ xp_name=xp_name jaws.target_dir=data/jaws_dolly_zoom_mask datamodule=jaws_dollyzoom.yaml
47 | ```
48 | 
49 | Render Images
50 | ```
51 | python jaws/run.py --config-name render_jaws data_dir=path/to/data/dir/flame_steak_frms_time/ xp_name=xp_name jaws.target_dir=data/jaws_dolly_zoom_mask datamodule=jaws_dollyzoom.yaml render_target_dir=path/to/results/dir/final_res_n
52 | ```
53 | 
54 | # Citation:
55 | 
56 | ```
57 | @InProceedings{Wang_2023_CVPR,
58 |     author    = {Wang, Xi and Courant, Robin and Shi, Jinglei and Marchand, Eric and Christie, Marc},
59 |     title     = {JAWS: Just a Wild Shot for Cinematic Transfer in Neural Radiance Fields},
60 |     booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
61 |     year      = {2023},
62 | }
63 | ```
64 | 


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0001.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0001.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0002.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0002.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0003.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0003.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0004.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0004.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0005.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0005.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0006.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0006.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0007.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0007.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0008.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0008.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0009.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0009.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0010.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0010.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0011.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0011.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/img_0012.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0012.png


--------------------------------------------------------------------------------
/data/jaws_dolly_zoom_mask/saved_data.pk:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/saved_data.pk


--------------------------------------------------------------------------------
/jaws/configs/batch_jaws.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - compnode: light_1n_1g_28b.yaml
 4 |   - model: nerf.yaml
 5 |   - datamodule: lego.yaml
 6 |   - jaws: batch.yaml
 7 | 
 8 | #########################################################
 9 | #           Parameters
10 | 
11 | # num of sanity checking step
12 | num_sanity_val_steps: 0
13 | # num of epochs
14 | num_epochs: 501
15 | # num of training epoch after validation
16 | check_val_every_n_epoch: 50
17 | # num rays sampled per image for each training step
18 | num_rays: 4096
19 | # num steps sampled per ray
20 | num_steps: 128
21 | # num steps up-sampled per ray
22 | upsample_steps: 0
23 | # batch size of rays at inference to avoid OOM
24 | max_ray_batch: 4096
25 | # num of checkpoints to keep
26 | num_checkpoints: 2
27 | # Type of run to launch (current: train TODO: debug/eval/infer/...)
28 | run_type: jaws
29 | dynamic: false
30 | error_map: false
31 | saturation_loss: false
32 | floater_ratio: 0
33 | 
34 | aabb: 1.0
35 | #########################################################
36 | #              Wandb
37 | # Name of the project is accessed by loggers
38 | project_name: jaws
39 | # Name of the run is accessed by loggers
40 | xp_name: ${xp_name}
41 | # Name of the group by loggers
42 | group_name: 3imgs_translation_z
43 | # Name of the job type
44 | job_type: null
45 | # Wether to synced the logs or not (WandB)
46 | log_offline: false
47 | # Metric to monitor to save models
48 | checkpoint_metric: train/loss
49 | 
50 | #########################################################
51 | #            Hydra
52 | root: ${hydra:runtime.cwd}
53 | # Path to folder with data
54 | data_dir: ${data_dir}
55 | # Path to folder to save results
56 | result_dir: ${root}/results/${xp_name}
57 | 
58 | 
59 | # Pretty print config at the start of the run using Rich library
60 | print_config: True
61 | 
62 | hydra:
63 |   run:
64 |     dir: ${hydra:runtime.cwd}/logs
65 |   output_subdir: null
66 |   sweep:
67 |     dir: ${hydra:runtime.cwd}/logs
68 |     subdir: ${hydra:runtime.cwd}/logs
69 | 
70 | #########################################################
71 | #            System
72 | # Disable (or not) python warnings
73 | ignore_warnings: True
74 | device: cuda
75 | # Seed for random number generators
76 | seed: 1


--------------------------------------------------------------------------------
/jaws/configs/compnode/cpu_debug.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 4
2 | num_workers: 0
3 | num_gpus: 0
4 | num_nodes: 1
5 | accelerator: ddp


--------------------------------------------------------------------------------
/jaws/configs/compnode/light_1n_1g_28b.yaml:
--------------------------------------------------------------------------------
1 | batch_size: 28
2 | num_workers: 15
3 | num_gpus: 1
4 | num_nodes: 1
5 | accelerator: cuda


--------------------------------------------------------------------------------
/jaws/configs/datamodule/jaws_dollyzoom.yaml:
--------------------------------------------------------------------------------
 1 | # dataset mode supports (colmap blender)
 2 | mode: blender
 3 | independent_calibration: false
 4 | # preload all data into GPU accelerate training but use more GPU memory
 5 | preload: false
 6 | # assume the scene is bounded in box[-bound bound]^3 if > 1 "will invoke adaptive ray marching." the fox dataset
 7 | bound: 2
 8 | # scale camera location into box[-bound bound]^3
 9 | scale: 0.3
10 | # dt_gamma (>=0) for adaptive ray marching. set to 0 to disable" >0 to accelerate rendering (but usually with worse quality)
11 | dt_gamma: 0.05
12 | # minimum near distance for camera"25
13 | min_near: 0.0
14 | # threshold for density grid to be occupied
15 | density_thresh: 0.01
16 | # <0 uses no rand pose, =0 only uses rand pose, >0 sample one rand pose every $ known poses
17 | rand_pose: -1
18 | 
19 | # use fully-fused MLP
20 | ff: False
21 | background_radius: -1
22 | background_perlin_noise: False
23 | 
24 | # during INERF
25 | 
26 | focal_resize_factor: 1.0 # if -1, dynamic search
27 | init_pose_dist_factor: 1.0
28 | 
29 | init_search: saved # ground_truth, index
30 | init_cam_idx: 12 
31 | 
32 | blur_pred: false
33 | 
34 | aabb: [-2, -2, -1, 2, 2, 0.8]
35 | # aabb: [-2, -2, -2, 2, 2, 1.3]
36 | 
37 | anim_start_time: 0.0 # if negative, used saved 
38 | auto_anim_time: false
39 | only_init_focal_search: false # all auto 
40 | two_strokes: true
41 | 
42 | alpha_losses: 0.3
43 | # first temporal second spatial
44 | alpha_two_strokes: 0.1


--------------------------------------------------------------------------------
/jaws/configs/demo_jaws.yaml:
--------------------------------------------------------------------------------
 1 | # Config file used to override default config values for demo_jaws.py
 2 | 
 3 | data_dir: ${hydra:runtime.cwd}/data/flame_steak
 4 | xp_name: flame_steak
 5 | model: nerf
 6 | target_dir_video: ${hydra:runtime.cwd}/data/jaws_dolly_zoom
 7 | datamodule: jaws_dollyzoom
 8 | 
 9 | num_epochs: 101
10 | num_mixed_grad: 8000
11 | num_steps: 256
12 | lr: 0.015
13 | loss_type: pose_flow
14 | flow_loss_type: EE
15 | alpha_losses: 0.97
16 | alpha_two_strokes: 0.1
17 | guidance_type: guidance
18 | 
19 | init_cam_idx_same: 14
20 | init_focal_search: False
21 | two_strokes: True
22 | 
23 | diff_temporal: False
24 | diff_focal: True
25 | seed: 1
26 | 
27 | #########################################################################################
28 | defaults:
29 |   - _self_
30 |   - override hydra/hydra_logging: disabled
31 |   - override hydra/job_logging: disabled
32 | 
33 | hydra:
34 |   run:
35 |     dir: ${hydra:runtime.cwd}
36 |   output_subdir: null
37 |   sweep:
38 |     dir: ${hydra:runtime.cwd}/logs
39 |     subdir: ${hydra:runtime.cwd}/logs


--------------------------------------------------------------------------------
/jaws/configs/jaws/batch.yaml:
--------------------------------------------------------------------------------
 1 | num_epochs: 201
 2 | num_sample_rays: 2048
 3 | num_sample_grad: 8000
 4 | image_size: [224, 224]
 5 | log_interval: 25
 6 | blur_kernel: [3, 3]
 7 | blur_sigma: 1
 8 | learning_rate: 0.005
 9 | target_dir: null
10 | pixel_loss: false
11 | pixel_loss_type: vgg # vgg, mse
12 | flow_loss: true
13 | flow_loss_type: EE # EE, AN
14 | pose_loss: true
15 | pose_loss_type: heatmap # euclidean
16 | grad_norm: true
17 | tag: null
18 | regularize_loss_range: 1.0
19 | regularize_loss_weight: 5
20 | guidance_map: true
21 | guidance_type: guidance # guidance, inerf_original, random
22 | #
23 | early_stop_num: 401
24 | early_stop_delta: 0
25 | 
26 | #
27 | diff_temporal: false
28 | diff_focal: true
29 | allow_backward_t: false


--------------------------------------------------------------------------------
/jaws/configs/model/dnerf.yaml:
--------------------------------------------------------------------------------
 1 | # initial learning rate
 2 | lr: 1e-2
 3 | lr_net: 1e-3
 4 | ckpt: "latest"
 5 | # use amp mixed precision training
 6 | fp16: True
 7 | 
 8 | warp_encoding: frequency
 9 | time_encoding: frequency
10 | sigma_encoding: tiledgrid
11 | direction_encoding: sphere_harmonics
12 | background_encoding: hashgrid
13 | n_warp_layers: 5
14 | n_sigma_layers: 2
15 | n_color_layers: 3
16 | n_background_layers: 2
17 | warp_hidden_dim: 128
18 | sigma_hidden_dim: 64
19 | color_hidden_dim: 64
20 | background_hidden_dim: 64
21 | geo_feat_dim: 15
22 | 
23 | raft_checkpoint: ${root}/checkpoints/raft-things.pth
24 | encoder_checkpoint: ${root}/checkpoints/autoencoder-mse-unit-224-noskip-490-last.ckpt
25 | model_size: small
26 | encoder_num_levels: 16


--------------------------------------------------------------------------------
/jaws/configs/model/dnerf_rendering.yaml:
--------------------------------------------------------------------------------
 1 | # initial learning rate
 2 | lr: 1e-2
 3 | lr_net: 1e-3
 4 | ckpt: "latest"
 5 | # use amp mixed precision training
 6 | fp16: True
 7 | 
 8 | warp_encoding: frequency
 9 | time_encoding: frequency
10 | sigma_encoding: tiledgrid
11 | direction_encoding: sphere_harmonics
12 | background_encoding: hashgrid
13 | n_warp_layers: 7
14 | n_sigma_layers: 5
15 | n_color_layers: 5
16 | n_background_layers: 2
17 | warp_hidden_dim: 128
18 | sigma_hidden_dim: 128
19 | color_hidden_dim: 128
20 | background_hidden_dim: 128
21 | geo_feat_dim: 15
22 | 
23 | raft_checkpoint: ${root}/checkpoints/raft-things.pth
24 | encoder_checkpoint: ${root}/checkpoints/autoencoder-mse-unit-224-noskip-490-last.ckpt
25 | model_size: small
26 | encoder_num_levels: 32


--------------------------------------------------------------------------------
/jaws/configs/model/nerf.yaml:
--------------------------------------------------------------------------------
 1 | # initial learning rate
 2 | lr: 1e-2
 3 | ckpt: "latest"
 4 | # use amp mixed precision training
 5 | fp16: True
 6 | 
 7 | sigma_encoding: hashgrid
 8 | direction_encoding: sphere_harmonics
 9 | background_encoding: hashgrid
10 | n_sigma_layers: 2
11 | n_color_layers: 3
12 | n_background_layers: 2
13 | sigma_hidden_dim: 64
14 | color_hidden_dim: 64
15 | background_hidden_dim: 64
16 | geo_feat_dim: 15
17 | 
18 | raft_checkpoint: ${root}/checkpoints/raft-things.pth
19 | encoder_checkpoint: ${root}/checkpoints/autoencoder-mse-unit-224-noskip-490-last.ckpt
20 | model_size: small
21 | encoder_num_levels: 16


--------------------------------------------------------------------------------
/jaws/configs/render_dnerf.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - compnode: light_1n_1g_28b.yaml
 4 |   - model: nerf.yaml
 5 |   - datamodule: lego.yaml
 6 |   # - datamodule: lego_dataset_small.yaml
 7 |   # - datamodule: 1per_dataset.yaml
 8 |   # - datamodule: firekeeper.yaml
 9 | 
10 | # num of sanity checking step
11 | num_sanity_val_steps: 0
12 | # num of epochs
13 | num_epochs: 1
14 | # num of training epoch after validation
15 | check_val_every_n_epoch: 50
16 | # num rays sampled per image for each training step
17 | num_rays: 8192
18 | # num steps sampled per ray
19 | num_steps: 800
20 | # num steps up-sampled per ray
21 | upsample_steps: 0
22 | # batch size of rays at inference to avoid OOM
23 | max_ray_batch: 4096
24 | # num of checkpoints to keep
25 | num_checkpoints: 2
26 | # if activate error map
27 | error_map: false
28 | # if using RGB+S for training loss
29 | saturation_loss: false
30 | floater_ratio: 0
31 | 
32 | 
33 | ###################################################################
34 | # Type of run to launch (current: train TODO: debug/eval/infer/...)
35 | run_type: val
36 | dynamic: True
37 | 
38 | # Name of the project is accessed by loggers
39 | project_name: jaws
40 | # Name of the run is accessed by loggers
41 | xp_name: ${xp_name}
42 | # Wether to synced the logs or not (WandB)
43 | log_offline: false
44 | # Metric to monitor to save models
45 | checkpoint_metric: train/loss
46 | 
47 | root: ${hydra:runtime.cwd}
48 | # Path to folder with data
49 | data_dir: ${data_dir}
50 | # Path to folder to save results
51 | result_dir: ${root}/results/${xp_name}
52 | 
53 | # Seed for random number generators
54 | seed: 1
55 | # Pretty print config at the start of the run using Rich library
56 | print_config: True
57 | # Disable (or not) python warnings
58 | ignore_warnings: True
59 | 
60 | device: cuda
61 | hydra:
62 |   run:
63 |     dir: ${hydra:runtime.cwd}/logs
64 |   output_subdir: null
65 |   sweep:
66 |     dir: ${hydra:runtime.cwd}/logs
67 |     subdir: ${hydra:runtime.cwd}/logs
68 | 
69 | 
70 | srv_target_dir: /home/xi/Work/jaws/misc/matrix
71 | img_size: [224,224]
72 | render_target_dir: none
73 | render_frame_num: 1


--------------------------------------------------------------------------------
/jaws/configs/render_jaws.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - compnode: light_1n_1g_28b.yaml
 4 |   - model: nerf.yaml
 5 |   - datamodule: lego.yaml
 6 |   - jaws: batch.yaml
 7 | 
 8 | #########################################################
 9 | #           Parameters
10 | 
11 | # num of sanity checking step
12 | num_sanity_val_steps: 0
13 | # num of epochs
14 | num_epochs: 501
15 | # num of training epoch after validation
16 | check_val_every_n_epoch: 50
17 | # num rays sampled per image for each training step
18 | num_rays: 4096
19 | # num steps sampled per ray
20 | num_steps: 128
21 | # num steps up-sampled per ray
22 | upsample_steps: 0
23 | # batch size of rays at inference to avoid OOM
24 | max_ray_batch: 4096
25 | # num of checkpoints to keep
26 | num_checkpoints: 2
27 | # Type of run to launch (current: train TODO: debug/eval/infer/...)
28 | run_type: render
29 | dynamic: false
30 | error_map: false
31 | saturation_loss: false
32 | floater_ratio: 0
33 | 
34 | aabb: 1.0
35 | #########################################################
36 | #             Render
37 | render_target_dir: null
38 | render_frame_num: 100
39 | #########################################################
40 | #              Wandb
41 | # Name of the project is accessed by loggers
42 | project_name: jaws
43 | # Name of the run is accessed by loggers
44 | xp_name: ${xp_name}
45 | # Name of the group by loggers
46 | group_name: 3imgs_translation_z
47 | # Name of the job type
48 | job_type: null
49 | # Wether to synced the logs or not (WandB)
50 | log_offline: false
51 | # Metric to monitor to save models
52 | checkpoint_metric: train/loss
53 | 
54 | #########################################################
55 | #            Hydra
56 | root: ${hydra:runtime.cwd}
57 | # Path to folder with data
58 | data_dir: ${data_dir}
59 | # Path to folder to save results
60 | result_dir: ${root}/results/${xp_name}
61 | 
62 | 
63 | # Pretty print config at the start of the run using Rich library
64 | print_config: True
65 | 
66 | hydra:
67 |   run:
68 |     dir: ${hydra:runtime.cwd}/logs
69 |   output_subdir: null
70 |   sweep:
71 |     dir: ${hydra:runtime.cwd}/logs
72 |     subdir: ${hydra:runtime.cwd}/logs
73 | 
74 | #########################################################
75 | #            System
76 | # Disable (or not) python warnings
77 | ignore_warnings: True
78 | device: cuda
79 | # Seed for random number generators
80 | seed: 1


--------------------------------------------------------------------------------
/jaws/configs/train_dnerf.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - compnode: light_1n_1g_28b.yaml
 4 |   - model: dnerf.yaml
 5 |   - datamodule: mutant.yaml
 6 | 
 7 | # num of sanity checking step
 8 | num_sanity_val_steps: 0
 9 | # num of epochs
10 | num_epochs: 1401
11 | # num of training epoch after validation
12 | check_val_every_n_epoch: 25
13 | # num rays sampled per image for each training step
14 | num_rays: 2048
15 | # num steps sampled per ray
16 | num_steps: 512
17 | # num steps up-sampled per ray
18 | upsample_steps: 2
19 | # batch size of rays at inference to avoid OOM
20 | max_ray_batch: 4096
21 | # num of checkpoints to keep
22 | num_checkpoints: 2
23 | # if activate error map
24 | error_map: false
25 | # if using RGB+S for training loss
26 | saturation_loss: false
27 | # normally between 0.01 to 0.001, zero when not applied
28 | floater_ratio: 0.0005
29 | 
30 | 
31 | ###################################################################
32 | # Type of run to launch (current: train TODO: debug/eval/infer/...)
33 | run_type: train
34 | dynamic: true
35 | 
36 | # Name of the project is accessed by loggers
37 | project_name: jaws
38 | # Name of the run is accessed by loggers
39 | xp_name: ${xp_name}
40 | # Wether to synced the logs or not (WandB)
41 | log_offline: false
42 | # Metric to monitor to save models
43 | checkpoint_metric: train/loss
44 | 
45 | root: ${hydra:runtime.cwd}
46 | # Path to folder with data
47 | data_dir: ${data_dir}
48 | # Path to folder to save results
49 | result_dir: ${root}/results/${xp_name}
50 | 
51 | # Seed for random number generators
52 | seed: 1
53 | # Pretty print config at the start of the run using Rich library
54 | print_config: True
55 | # Disable (or not) python warnings
56 | ignore_warnings: True
57 | 
58 | device: cuda
59 | hydra:
60 |   run:
61 |     dir: ${hydra:runtime.cwd}/logs
62 |   output_subdir: null
63 |   sweep:
64 |     dir: ${hydra:runtime.cwd}/logs
65 |     subdir: ${hydra:runtime.cwd}/logs
66 | 
67 | 
68 | 
69 | srv_target_dir: /home/xi/Work/jaws/misc/kid_rocky
70 | img_size: [224,224]


--------------------------------------------------------------------------------
/jaws/configs/train_nerf.yaml:
--------------------------------------------------------------------------------
 1 | defaults:
 2 |   - _self_
 3 |   - compnode: light_1n_1g_28b.yaml
 4 |   - model: nerf.yaml
 5 |   - datamodule: lego.yaml
 6 |   # - datamodule: lego_dataset_small.yaml
 7 |   # - datamodule: 1per_dataset.yaml
 8 |   # - datamodule: firekeeper.yaml
 9 | 
10 | # num of sanity checking step
11 | num_sanity_val_steps: 0
12 | # num of epochs
13 | num_epochs: 2048
14 | # num of training epoch after validation
15 | check_val_every_n_epoch: 15
16 | # num rays sampled per image for each training step
17 | num_rays: 8096
18 | # num steps sampled per ray
19 | num_steps: 512
20 | # num steps up-sampled per ray
21 | upsample_steps: 0
22 | # batch size of rays at infernce to avoid OOM
23 | max_ray_batch: 4096
24 | # num of checkpoints to keep
25 | num_checkpoints: 2
26 | # if activate error map
27 | error_map: false
28 | # if using RGB+S for training loss
29 | saturation_loss: false
30 | # normally between 0.01 to 0.001, zero when not applied
31 | floater_ratio: -1
32 | 
33 | ###################################################################
34 | # Type of run to launch (current: train TODO: debug/eval/infer/...)
35 | run_type: train
36 | dynamic: false
37 | 
38 | # Name of the project is accessed by loggers
39 | project_name: jaws
40 | # Name of the run is accessed by loggers
41 | xp_name: ${xp_name}
42 | # Wether to synced the logs or not (WandB)
43 | log_offline: false
44 | # Metric to monitor to save models
45 | checkpoint_metric: train/loss
46 | 
47 | root: ${hydra:runtime.cwd}
48 | # Path to folder with data
49 | data_dir: ${data_dir}
50 | # Path to folder to save results
51 | result_dir: ${root}/results/${xp_name}
52 | 
53 | # Seed for random number generators
54 | seed: 1
55 | # Pretty print config at the start of the run using Rich library
56 | print_config: True
57 | # Disable (or not) python warnings
58 | ignore_warnings: True
59 | 
60 | device: cuda
61 | hydra:
62 |   run:
63 |     dir: ${hydra:runtime.cwd}/logs
64 |   output_subdir: null
65 |   sweep:
66 |     dir: ${hydra:runtime.cwd}/logs
67 |     subdir: ${hydra:runtime.cwd}/logs


--------------------------------------------------------------------------------
/jaws/run.py:
--------------------------------------------------------------------------------
 1 | import os.path as osp
 2 | import sys
 3 | 
 4 | import hydra
 5 | from omegaconf import DictConfig
 6 | 
 7 | from utils.nerf_utils import seed_everything
 8 | 
 9 | 
10 | @hydra.main(
11 |     config_path="configs/", config_name="train_nerf.yaml", version_base="1.2"
12 | )
13 | def main(config: DictConfig):
14 |     sys.path.append(osp.join(config.root, "lib", "torch_ngp"))
15 |     seed_everything(config.seed)
16 | 
17 |     if config.run_type == "train":
18 |         from jaws.src.train import train
19 | 
20 |         train(config)
21 | 
22 |     if config.run_type == "jaws":
23 |         from jaws.src.jaws import jaws
24 | 
25 |         jaws(config)
26 | 
27 |     if config.run_type == "render":
28 |         from jaws.src.render import render
29 | 
30 |         render(config)
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     main()
35 | 


--------------------------------------------------------------------------------
/jaws/src/datamodules/nerf_datamodule.py:
--------------------------------------------------------------------------------
 1 | from pytorch_lightning import LightningDataModule
 2 | from torch.utils.data import DataLoader
 3 | 
 4 | import jaws.src.datamodules.datasets.dnerf_dataset as dnerf
 5 | import jaws.src.datamodules.datasets.nerf_dataset as nerf
 6 | 
 7 | 
 8 | class NeRFDataModule(LightningDataModule):
 9 |     """Initialize train, val and test base data loader."""
10 | 
11 |     def __init__(
12 |         self,
13 |         data_type: str,
14 |         num_rays: int,
15 |         path: str,
16 |         mode: str,
17 |         preload: bool,
18 |         scale: float,
19 |         bound: int,
20 |         rand_pose: bool,
21 |         ind_calib: bool = False,
22 |         error_map: bool = False,
23 |         aabb=None,
24 |     ):
25 |         super().__init__()
26 |         self._num_rays = num_rays
27 |         self._path = path
28 |         self._mode = mode
29 |         self._preload = preload
30 |         self._scale = scale
31 |         self._bound = bound
32 |         self._aabb = aabb
33 |         self._rand_pose = rand_pose
34 |         self._ind_calib = ind_calib
35 |         self._error_map = error_map
36 | 
37 |         if data_type == "dynamic":
38 |             self.dataset = dnerf.DNeRFDataset
39 |         else:
40 |             self.dataset = nerf.NeRFDataset
41 | 
42 |     def train_dataloader(self) -> DataLoader:
43 |         """Load train set loader."""
44 |         self.train_dataset = self.dataset(
45 |             num_rays=self._num_rays,
46 |             path=self._path,
47 |             mode=self._mode,
48 |             preload=self._preload,
49 |             scale=self._scale,
50 |             bound=self._bound,
51 |             aabb=self._aabb,
52 |             rand_pose=self._rand_pose,
53 |             type="train",
54 |             ind_calibration=self._ind_calib,
55 |             error_map=self._error_map,
56 |         )
57 |         return self.train_dataset.dataloader()
58 | 
59 |     def val_dataloader(self) -> DataLoader:
60 |         """Load val set loader."""
61 |         return self.dataset(
62 |             num_rays=self._num_rays,
63 |             path=self._path,
64 |             mode=self._mode,
65 |             preload=self._preload,
66 |             scale=self._scale,
67 |             bound=self._bound,
68 |             aabb=self._aabb,
69 |             rand_pose=self._rand_pose,
70 |             type="val",
71 |             ind_calibration=self._ind_calib,
72 |             error_map=self._error_map,
73 |         ).dataloader()
74 | 
75 |     def test_dataloader(self) -> DataLoader:
76 |         """Load test set loader."""
77 |         return self.dataset(
78 |             num_rays=self._num_rays,
79 |             path=self._path,
80 |             mode=self._mode,
81 |             preload=self._preload,
82 |             scale=self._scale,
83 |             bound=self._bound,
84 |             aabb=self._aabb,
85 |             rand_pose=self._rand_pose,
86 |             type="test",
87 |             ind_calibration=self._ind_calib,
88 |             error_map=self._error_map,
89 |         ).dataloader()
90 | 


--------------------------------------------------------------------------------
/jaws/src/infer.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from omegaconf import DictConfig
 3 | import os
 4 | import os.path as osp
 5 | import sys
 6 | 
 7 | import torch
 8 | from torch import optim
 9 | from pytorch_lightning import Trainer
10 | from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
11 | from pytorch_lightning.loggers import WandbLogger
12 | 
13 | from jaws.src.models.nerf_model import NeRFModel
14 | from jaws.src.datamodules.nerf_datamodule import NeRFDataModule
15 | from utils.file_utils import create_dir
16 | from jaws.src.models.modules.nerf_factory import create_nerf_model
17 | 
18 | 
19 | def infer(config: DictConfig):
20 |     sys.path.append(osp.join(".", "lib", "torch_ngp"))
21 |     model = create_nerf_model(config)
22 | 
23 |     # Initialize dataset
24 |     data_module = NeRFDataModule(
25 |         data_type="dynamic" if config.dynamic else "static",
26 |         num_rays=config.num_rays,
27 |         path=config.data_dir,
28 |         mode=config.datamodule.mode,
29 |         preload=config.datamodule.preload,
30 |         scale=config.datamodule.scale,
31 |         bound=config.datamodule.bound,
32 |         rand_pose=config.datamodule.rand_pose,
33 |         ind_calib=config.datamodule.independent_calibration,
34 |         error_map=config.error_map,
35 |     )
36 | 
37 |     # Initialize trainer
38 |     checkpoint_dir = osp.join(config.result_dir, "checkpoints")
39 |     if not osp.exists(checkpoint_dir):
40 |         create_dir(checkpoint_dir)
41 |     if config.model.ckpt == "latest":
42 |         checkpoint_list = sorted(os.listdir(checkpoint_dir))
43 |         if len(checkpoint_list) > 0:
44 |             checkpoint_path = osp.join(checkpoint_dir, checkpoint_list[-1])
45 |         else:
46 |             checkpoint_path = None
47 |     elif config.model.ckpt == "scratch":
48 |         checkpoint_path = None
49 |     else:
50 |         checkpoint_path = config.model.ckpt
51 |     checkpoint = ModelCheckpoint(
52 |         monitor=config.checkpoint_metric,
53 |         mode="min",
54 |         save_top_k=config.num_checkpoints,
55 |         dirpath=checkpoint_dir,
56 |         filename="{epoch}",
57 |         save_on_train_epoch_end=True,
58 |     )
59 |     timestamp = datetime.now().strftime("%m-%d_%H-%M")
60 |     wandb_logger = WandbLogger(
61 |         name="_".join([config.xp_name, "nerf", timestamp]),
62 |         project=config.project_name,
63 |         offline=config.log_offline,
64 |     )
65 |     lr_monitor = LearningRateMonitor(logging_interval="epoch")
66 |     callbacks = [lr_monitor, checkpoint]
67 |     trainer = Trainer(
68 |         gpus=config.compnode.num_gpus,
69 |         num_nodes=config.compnode.num_nodes,
70 |         accelerator=config.compnode.accelerator,
71 |         max_epochs=config.num_epochs,
72 |         callbacks=callbacks,
73 |         logger=wandb_logger,
74 |         check_val_every_n_epoch=config.check_val_every_n_epoch,
75 |         log_every_n_steps=5,
76 |         precision=16 if config.model.fp16 else 32,
77 |         num_sanity_val_steps=config.num_sanity_val_steps,
78 |     )
79 |     # Launch model training
80 |     trainer.test(model, data_module, ckpt_path=checkpoint_path)
81 | 


--------------------------------------------------------------------------------
/jaws/src/models/base_model.py:
--------------------------------------------------------------------------------
  1 | import os.path as osp
  2 | import socket
  3 | import struct
  4 | from datetime import datetime
  5 | from typing import Any, Dict, Tuple, List, Callable
  6 | import cv2
  7 | import numpy as np
  8 | from pytorch_lightning import LightningModule
  9 | import torch
 10 | import torch.nn as nn
 11 | import torch.optim as optim
 12 | import torchvision.transforms as T
 13 | from tqdm import tqdm
 14 | from scipy.spatial.transform import Rotation as R
 15 | 
 16 | from utils.file_utils import create_dir, save_pickle, load_pickle
 17 | from utils.nerf_utils import get_rays
 18 | from utils.image_utils import (
 19 |     save_torch_image,
 20 |     save_loss_marginal_image,
 21 |     save_heatmaps,
 22 |     save_heatmap,
 23 |     put_text_on_image,
 24 | )
 25 | from utils.camera_utils import CameraPoseGenerator as cam_gen, pose_distance
 26 | 
 27 | from kornia.color.hsv import rgb_to_hsv
 28 | 
 29 | 
 30 | class BaseModel(LightningModule):
 31 |     def __init__(
 32 |         self,
 33 |         result_dir: str,
 34 |         optimizer: nn.Module,
 35 |         lr_scheduler: nn.Module,
 36 |         criterion: nn.Module,
 37 |         run_type: str,
 38 |         num_steps: int,
 39 |         upsample_steps: int,
 40 |         max_ray_batch: int,
 41 |         saturation_loss: bool,
 42 |         error_map: bool,
 43 |         floater_ratio: float,
 44 |     ):
 45 |         super().__init__()
 46 | 
 47 |         self._optimizer = optimizer
 48 |         self._lr_scheduler = lr_scheduler
 49 |         self._result_dir = result_dir
 50 |         self.criterion = criterion
 51 |         self._val_dir = osp.join(result_dir, "validation")
 52 |         self.benchmark_dir = osp.join(result_dir, "benchmark")
 53 |         create_dir(self._val_dir)
 54 |         if run_type == "infer":
 55 |             timestamp = datetime.now().strftime("%m-%d_%H-%M")
 56 |             self._test_dir = osp.join(result_dir, "test", f"test_{timestamp}")
 57 |             create_dir(self._test_dir)
 58 | 
 59 |         self._num_steps = num_steps
 60 |         self._upsample_steps = upsample_steps
 61 |         self._max_ray_batch = max_ray_batch
 62 |         self._saturation_loss = saturation_loss
 63 |         self._error_map = error_map
 64 |         self._floater_ratio = floater_ratio
 65 | 
 66 |     def _save_step(self, rgb_pred: torch.Tensor, batch_idx: int):
 67 |         """Save predicted RGB images."""
 68 |         pred_path = osp.join(
 69 |             self._val_dir, f"{batch_idx:03}_{self.current_epoch+1:02}.png"
 70 |         )
 71 |         img = cv2.cvtColor(
 72 |             (rgb_pred[0].detach().cpu().numpy() * 255).astype(np.uint8),
 73 |             cv2.COLOR_RGB2BGR,
 74 |         )
 75 |         cv2.imwrite(
 76 |             pred_path,
 77 |             img,
 78 |         )
 79 |         # use directly logger to log image
 80 |         self.logger.log_image(key="val_img", images=[img[:, :, ::-1]])
 81 | 
 82 |         if self._error_map:
 83 |             emap = (
 84 |                 self.trainer.datamodule.train_dataset.error_map[0]
 85 |                 .view(128, 128)
 86 |                 .cpu()
 87 |                 .numpy()
 88 |             )
 89 |             emap = (emap - emap.min()) / (emap.max() - emap.min())
 90 |             emap_path = osp.join(
 91 |                 self._val_dir,
 92 |                 f"{batch_idx:03}_{self.current_epoch+1:02}_emap.png",
 93 |             )
 94 |             cv2.imwrite(
 95 |                 emap_path,
 96 |                 (emap * 255).astype(np.uint8),
 97 |             )
 98 | 
 99 |     def _test_save_step(
100 |         self, rgb_pred: torch.Tensor, batch_idx: int, pose: torch.Tensor
101 |     ):
102 |         """Save predicted RGB images in prediction"""
103 |         pred_path = osp.join(self._test_dir, f"test_{batch_idx:03}.png")
104 |         img = cv2.cvtColor(
105 |             (rgb_pred[0].detach().cpu().numpy() * 255).astype(np.uint8),
106 |             cv2.COLOR_RGB2BGR,
107 |         )
108 |         cv2.imwrite(
109 |             pred_path,
110 |             img,
111 |         )
112 | 
113 |         file_dir = osp.join(self._test_dir, "traj.txt")
114 |         with open(file_dir, "a+") as output_file:
115 |             output_file.write(
116 |                 " ".join(
117 |                     [
118 |                         str(elem)
119 |                         for elem in pose.cpu().detach().numpy().flatten()[:-4]
120 |                     ]
121 |                 )
122 |                 + "\n"
123 |             )
124 | 
125 |     def _log_step(
126 |         self,
127 |         mode: str,
128 |         loss: torch.Tensor,
129 |     ):
130 |         """Log metrics at each epoch and each step for the training."""
131 |         on_step = True if mode == "train" else False
132 |         self.log(
133 |             f"{mode}/loss",
134 |             loss,
135 |             on_step=on_step,
136 |             on_epoch=True,
137 |             prog_bar=False,
138 |             logger=True,
139 |             sync_dist=True,
140 |         )
141 | 
142 |     def _eval_step_wo_gt(self):
143 |         raise NotImplementedError()
144 | 
145 |     def _eval_step_w_gt(self):
146 |         raise NotImplementedError()
147 | 
148 |     def training_step(self):
149 |         raise NotImplementedError()
150 | 
151 |     def validation_step(self):
152 |         raise NotImplementedError()
153 | 
154 |     def test_step(self):
155 |         raise NotImplementedError()
156 | 
157 |     def configure_optimizers(self) -> Dict[str, Any]:
158 |         """Define optimizers and LR schedulers."""
159 |         if self._optimizer is None:
160 |             optimizer = optim.Adam(
161 |                 self.model.parameters(), lr=0.001, weight_decay=5e-4
162 |             )  # naive adam
163 |         else:
164 |             optimizer = self._optimizer(self.model)
165 | 
166 |         if self._lr_scheduler is None:
167 |             lr_scheduler = optim.lr_scheduler.LambdaLR(
168 |                 self.optimizer, lr_lambda=lambda epoch: 1
169 |             )  # fake scheduler
170 |         else:
171 |             lr_scheduler = self._lr_scheduler(optimizer)
172 | 
173 |         return {
174 |             "optimizer": optimizer,
175 |             "lr_scheduler": lr_scheduler,
176 |             "monitor": "train/loss",
177 |         }
178 | 


--------------------------------------------------------------------------------
/jaws/src/models/callbacks/early_stopping.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | 
 3 | 
 4 | class EarlyStopping(nn.Module):
 5 |     """Callback adapting stopping training if criteria are reached."""
 6 | 
 7 |     def __init__(self, num_patience: int, min_delta: float):
 8 |         super().__init__()
 9 |         self.num_patience = num_patience
10 |         self.min_delta = min_delta
11 |         self.loss_buffer = None
12 |         self.wait_count = 0
13 | 
14 |     def run_early_stopping_check(self, current_loss: float) -> bool:
15 |         if self.wait_count == 0:
16 |             self.loss_buffer = current_loss
17 |             self.wait_count += 1
18 |             return False
19 | 
20 |         if current_loss < self.loss_buffer - self.min_delta:
21 |             self.loss_buffer = current_loss
22 |             self.wait_count = 0
23 |             return False
24 | 
25 |         if self.wait_count > self.num_patience:
26 |             return True
27 | 
28 |         self.wait_count += 1
29 |         return False
30 | 


--------------------------------------------------------------------------------
/jaws/src/models/callbacks/grad_norm.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class GradNorm(nn.Module):
 8 |     """
 9 |     Callback adapting loss weights during the training for MTL.
10 |     Implementation of https://arxiv.org/pdf/1711.02257.pdf.
11 |     Note that: `pl_module` must have `loss_weights` and `task_losses`
12 |     attributes and `model._get_shared_layer` method.
13 | 
14 |     Code adapted from: https://github.com/falkaer/artist-group-factors/
15 |     """
16 | 
17 |     def __init__(self, num_tasks: int, alpha: float):
18 |         super().__init__()
19 |         self.num_tasks = num_tasks
20 |         self.loss_weights = nn.Parameter(torch.ones(num_tasks, requires_grad=True))
21 |         self.alpha = alpha
22 |         self._batch_index = 0
23 | 
24 |     def fit(self, task_losses: torch.Tensor, shared_parameters: nn.Parameter):
25 |         """Fit the loss weights according to the gradnorm."""
26 |         # Zero the w_i(t) gradients to update the weights using gradnorm loss
27 |         self.loss_weights.grad = 0.0 * self.loss_weights.grad
28 |         W = list(shared_parameters)
29 | 
30 |         norms = []
31 |         for task_index, (w_i, L_i) in enumerate(zip(self.loss_weights, task_losses)):
32 |             # Retain the graph until the last pass
33 |             retain_graph = True if task_index != self.num_tasks - 1 else False
34 |             # Gradient of L_i(t) w.r.t. W
35 |             gLgW = torch.autograd.grad(L_i, W, retain_graph=retain_graph)
36 |             # G^{(i)}_W(t)
37 |             norms.append(torch.norm(w_i * gLgW[0]))
38 |         norms = torch.stack(norms)
39 | 
40 |         # Set L(0)
41 |         if self._batch_index == 0:
42 |             self.initial_losses = task_losses.detach()
43 | 
44 |         # Compute the constant term without accumulating gradients
45 |         # as it should stay constant during back-propagation
46 |         with torch.no_grad():
47 |             # Loss ratios \curl{L}(t)
48 |             loss_ratios = task_losses / self.initial_losses
49 |             # Inverse training rate r(t)
50 |             inverse_train_rates = loss_ratios / loss_ratios.mean()
51 |             constant_term = norms.mean() * (inverse_train_rates**self.alpha)
52 | 
53 |         # Write out the gradnorm loss L_grad and set the weight gradients
54 |         grad_norm_loss = (norms - constant_term).abs().sum()
55 |         self.loss_weights.grad = torch.autograd.grad(grad_norm_loss, self.loss_weights)[
56 |             0
57 |         ]
58 | 
59 |         self._batch_index += 1
60 | 
61 |     def normalize_weights(self) -> torch.Tensor:
62 |         """Renormalize the gradient weights."""
63 |         with torch.no_grad():
64 |             normalize_coeff = len(self.loss_weights) / self.loss_weights.sum()
65 |             self.loss_weights.data = self.loss_weights.data * normalize_coeff
66 | 
67 |     def _get_loss_weights(self, mask_weights: List[torch.Tensor]) -> torch.Tensor:
68 |         """Return the loss weights for the current batch."""
69 |         gradnorm_index, loss_weights = 0, []
70 |         for task_weight in mask_weights:
71 |             if task_weight:
72 |                 weight = self.loss_weights[gradnorm_index]
73 |                 loss_weights.append(weight)
74 |                 gradnorm_index += 1
75 |             else:
76 |                 loss_weights.append(torch.tensor(0, device=self.loss_weights.device))
77 |         loss_weights = torch.stack(loss_weights).clamp(min=0.05)
78 | 
79 |         return loss_weights
80 | 


--------------------------------------------------------------------------------
/jaws/src/models/metrics/angular_loss.py:
--------------------------------------------------------------------------------
 1 | """Code adapted from: https://github.com/jadarve/optical-flow-filter."""
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | 
 7 | class AngularLoss(nn.Module):
 8 |     def forward(self, flow1: torch.Tensor, flow2: torch.Tensor):
 9 |         """Compute the angular error between two flow fields.
10 | 
11 |         :param flow1: first optical flow field.
12 |         :param flow2: second optical flow field.
13 |         :return: angular error field in degrees.
14 |         """
15 |         f1_x = flow1[..., 0]
16 |         f1_y = flow1[..., 1]
17 | 
18 |         f2_x = flow2[..., 0]
19 |         f2_y = flow2[..., 1]
20 | 
21 |         top = 1.0 + f1_x * f2_x + f1_y * f2_y
22 |         bottom = torch.sqrt(1.0 + f1_x * f1_x + f1_y * f1_y) * torch.sqrt(
23 |             1.0 + f2_x * f2_x + f2_y * f2_y
24 |         )
25 |         div = torch.clamp(top / bottom, min=-1, max=1)
26 |         loss = torch.rad2deg(torch.arccos(div)).mean() / 180.0
27 | 
28 |         return loss
29 | 


--------------------------------------------------------------------------------
/jaws/src/models/metrics/vgg_loss.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch.nn.functional import mse_loss
 5 | import torchvision
 6 | from torchvision.models.vgg import VGG16_Weights
 7 | 
 8 | 
 9 | class VGGLoss(torch.nn.Module):
10 |     """
11 |     VGG perceptual loss:
12 |     Paper: https://arxiv.org/pdf/1603.08155.pdf
13 |     Code: https://gist.github.com/alper111/8233cdb0414b4cb5853f2f730ab95a49
14 |     """
15 | 
16 |     def __init__(
17 |         self,
18 |         resize: bool = False,
19 |         feature_blocks: List[int] = [0, 1, 2, 3],
20 |         style_blocks: List[int] = [],
21 |     ):
22 |         super(VGGLoss, self).__init__()
23 | 
24 |         # Initialize VGG blocks
25 |         weights = VGG16_Weights.DEFAULT
26 |         blocks = [
27 |             torchvision.models.vgg16(weights=weights).features[:4].eval(),
28 |             torchvision.models.vgg16(weights=weights).features[4:9].eval(),
29 |             torchvision.models.vgg16(weights=weights).features[9:16].eval(),
30 |             torchvision.models.vgg16(weights=weights).features[16:23].eval(),
31 |         ]
32 |         self.feature_blocks = feature_blocks
33 |         self.style_blocks = style_blocks
34 | 
35 |         # Freeze VGG's parameters
36 |         for bl in blocks:
37 |             for p in bl.parameters():
38 |                 p.requires_grad = False
39 |         self.blocks = torch.nn.ModuleList(blocks)
40 | 
41 |         # Initialize transformation parameters
42 |         self.transform = torch.nn.functional.interpolate if resize else None
43 |         mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1)
44 |         self.register_buffer("mean", mean)
45 |         std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1)
46 |         self.register_buffer("std", std)
47 | 
48 |     def forward(
49 |         self,
50 |         x: torch.Tensor,
51 |         y: torch.Tensor,
52 |     ) -> torch.Tensor:
53 |         # Order channels: [B, C, H, W]
54 |         if x.shape[1] != 3:
55 |             x = x.permute(0, 3, 1, 2)
56 |             y = y.permute(0, 3, 1, 2)
57 |         # Normalize in/outputs
58 |         x = (x - self.mean) / self.std
59 |         y = (y - self.mean) / self.std
60 |         # Resize in/outputs
61 |         if self.transform:
62 |             x = self.transform(x, mode="bilinear", size=(224, 224), align_corners=False)
63 |             y = self.transform(y, mode="bilinear", size=(224, 224), align_corners=False)
64 | 
65 |         # Evaluate loss value
66 |         loss = 0
67 |         for i, block in enumerate(self.blocks):
68 |             x = block(x)
69 |             y = block(y)
70 |             # Compute feature loss
71 |             if i in self.feature_blocks:
72 |                 loss += mse_loss(x, y)
73 |             # Compute style loss
74 |             if i in self.style_blocks:
75 |                 act_x = x.reshape(x.shape[0], x.shape[1], -1)
76 |                 act_y = y.reshape(y.shape[0], y.shape[1], -1)
77 |                 gram_x = act_x @ act_x.permute(0, 2, 1) / act_x.numel()
78 |                 gram_y = act_y @ act_y.permute(0, 2, 1) / act_x.numel()
79 |                 loss += torch.norm(gram_x - gram_y)
80 | 
81 |         return loss
82 | 


--------------------------------------------------------------------------------
/jaws/src/models/modules/feature/flow_estimator.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Tuple
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.nn as nn
  6 | 
  7 | from jaws.src.models.modules.feature.raft import make_raft_estimator
  8 | from utils.flow_utils import FlowUtils
  9 | 
 10 | 
 11 | def g_sigmoid(x, q, b):
 12 |     return 1.0 / (1.0 + torch.exp(b * q) * torch.exp(-b * x))
 13 | 
 14 | 
 15 | def f_inverse(x):
 16 |     return 1 - (1 / (torch.max(x, torch.ones_like(x))))
 17 | 
 18 | 
 19 | class FlowEstimator(nn.Module):
 20 |     """Optical flow estimator.
 21 | 
 22 |     :param raft_pretrained_path: path to the pretrained raft model.
 23 |     """
 24 | 
 25 |     def __init__(self, raft_pretrained_path: str):
 26 |         super(FlowEstimator, self).__init__()
 27 |         self.flow_estimator = make_raft_estimator(freeze=True)
 28 |         self._flow_utils = FlowUtils()
 29 | 
 30 |     def _flow_polar(self, flow: torch.Tensor, step_module: float = 0) -> torch.Tensor:
 31 |         """Normalize flow by inversing polar modules (H, W, C)."""
 32 |         polar_flow = self._flow_utils.xy_to_polar(flow)
 33 | 
 34 |         scaled_polar_flow = torch.zeros_like(polar_flow)
 35 |         scaled_polar_flow[:, :, 0] = polar_flow[:, :, 0]
 36 |         scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1]
 37 |         return scaled_polar_flow
 38 | 
 39 |     def _unit_normalize_flow(self, flow: torch.Tensor) -> torch.Tensor:
 40 |         """Normalize flow by unitarize their polar modules (H, W, C)."""
 41 |         polar_flow = self._flow_utils.xy_to_polar(flow)
 42 | 
 43 |         scaled_polar_flow = torch.zeros_like(polar_flow)
 44 |         scaled_polar_flow[:, :, 0] = torch.ones_like(polar_flow[:, :, 0])
 45 |         scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1]
 46 | 
 47 |         scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow)
 48 | 
 49 |         return scaled_flow
 50 | 
 51 |     def _step_normalize_flow(
 52 |         self, flow: torch.Tensor, step_module: float = 0
 53 |     ) -> torch.Tensor:
 54 |         """
 55 |         Normalize flow by unitarize their polar modules (H, W, C) greater than
 56 |         a threshold (`step_module`), otherwise, zero.
 57 |         """
 58 |         polar_flow = self._flow_utils.xy_to_polar(flow)
 59 | 
 60 |         scaled_polar_flow = torch.zeros_like(polar_flow)
 61 |         scaled_polar_flow[:, :, 0] = 1 * (polar_flow[:, :, 0] > step_module)
 62 |         scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1]
 63 | 
 64 |         scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow)
 65 | 
 66 |         return scaled_flow
 67 | 
 68 |     def _sigmoid_normalize_flow(
 69 |         self, flow: torch.Tensor, step_module: float = 0
 70 |     ) -> torch.Tensor:
 71 |         """
 72 |         Normalize flow by aplying a sigmoid on their polar modules (H, W, C).
 73 |         """
 74 |         polar_flow = self._flow_utils.xy_to_polar(flow)
 75 | 
 76 |         scaled_polar_flow = torch.zeros_like(polar_flow)
 77 |         scaled_polar_flow[:, :, 0] = g_sigmoid(
 78 |             polar_flow[:, :, 0], q=torch.tensor(20), b=torch.tensor(0.1)
 79 |         )
 80 |         scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1]
 81 |         scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow)
 82 | 
 83 |         return scaled_flow
 84 | 
 85 |     def _inverse_normalize_flow(
 86 |         self, flow: torch.Tensor, step_module: float = 0
 87 |     ) -> torch.Tensor:
 88 |         """Normalize flow by inversing polar modules (H, W, C)."""
 89 |         polar_flow = self._flow_utils.xy_to_polar(flow)
 90 | 
 91 |         scaled_polar_flow = torch.zeros_like(polar_flow)
 92 |         scaled_polar_flow[:, :, 0] = f_inverse(polar_flow[:, :, 0])
 93 |         scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1]
 94 |         scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow)
 95 | 
 96 |         return scaled_flow
 97 | 
 98 |     def _inverse_normalize_flow_polar(
 99 |         self, flow: torch.Tensor, step_module: float = 0
100 |     ) -> torch.Tensor:
101 |         """Normalize flow by inversing polar modules (H, W, C)."""
102 |         polar_flow = self._flow_utils.xy_to_polar(flow)
103 | 
104 |         scaled_polar_flow = torch.zeros_like(polar_flow)
105 |         scaled_polar_flow[:, :, 0] = f_inverse(polar_flow[:, :, 0])
106 |         scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1]
107 | 
108 |         return scaled_polar_flow
109 | 
110 |     def _estimate_flow(self, frames: torch.Tensor) -> torch.Tensor:
111 |         """Estimate frows from RGB frames."""
112 |         flows = self.flow_estimator([frames[:-1], frames[1:]]).permute([0, 2, 3, 1])
113 |         return flows
114 | 
115 |     def compute_flow(
116 |         self, frames: List[np.array], ftype: str = "EE"
117 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
118 |         """
119 |         Extract flow style features from a list of frames.
120 |         WARNING: For inference only, please don't forget `.eval()` and
121 |         `torch.no_grad()`.
122 |         Types: including:
123 | 
124 |         EE: EndPoint flow -> XY
125 |         NEE: normalised Endpoint -> Norm xy flow
126 |         AN: Angular -> XY
127 | 
128 |         :param frames: list of raw RGB frames 0-255 range (T, C, H, W).
129 |         :return: encoded flow style vectors (B, C_f, T_f, W_f, H_f).
130 | 
131 |         """
132 |         # Estimate flows, output shape: (T, H, W, C)
133 |         flows = self._estimate_flow(frames)
134 | 
135 |         # Normalize flow chunks, output shape: (T, H, W, C)
136 |         if ftype == "NEE":
137 |             normalized_flows = torch.stack(
138 |                 [self._inverse_normalize_flow(f) for f in flows]
139 |             )
140 |             return normalized_flows, flows.unsqueeze(0)
141 | 
142 |         if ftype == "EE" or ftype == "AN":
143 |             return flows, flows.unsqueeze(0)  # Ck, chk_size, H, W, C
144 | 


--------------------------------------------------------------------------------
/jaws/src/models/modules/feature/raft.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple
 2 | 
 3 | from pytorch_lightning import LightningModule
 4 | import torch
 5 | 
 6 | from torchvision.models.optical_flow import raft_small, Raft_Small_Weights
 7 | import torchvision.transforms.functional as F
 8 | 
 9 | 
10 | class RAFT_tv(LightningModule):
11 |     def __init__(self):
12 |         super(RAFT_tv, self).__init__()
13 |         self.model = raft_small(weights=Raft_Small_Weights.DEFAULT, progress=False)
14 | 
15 |     def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor:
16 |         x1, x2 = x
17 |         B, C, H, W = x1.shape
18 |         x1 = 2 * (x1 / 255.0) - 1.0
19 |         x2 = 2 * (x2 / 255.0) - 1.0
20 |         x1, x2 = self.preprocess(x1, x2)
21 |         flow_raw = self.model(x1, x2, num_flow_updates=12)[-1]
22 |         flow_resized = torch.nn.functional.interpolate(flow_raw, size=[H, W])
23 |         return flow_resized
24 | 
25 |     def preprocess(self, img1_batch, img2_batch):
26 |         transforms = Raft_Small_Weights.DEFAULT.transforms()
27 |         img1_batch = F.resize(img1_batch, size=[224, 224])
28 |         img2_batch = F.resize(img2_batch, size=[224, 224])
29 |         return transforms(img1_batch, img2_batch)
30 | 
31 |     def postprocess(self, flow, img_size):
32 |         flow_resized = F.resize(flow, size=img_size)
33 |         return flow_resized
34 | 
35 | 
36 | def make_raft_estimator(freeze: bool):
37 |     model = RAFT_tv().eval()
38 | 
39 |     if freeze:
40 |         for p in model.model.parameters():
41 |             p.requires_grad = False
42 | 
43 |     return model
44 | 


--------------------------------------------------------------------------------
/jaws/src/models/modules/nerf/network_ff.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, Tuple, List
  2 | 
  3 | import torch
  4 | 
  5 | from jaws.src.models.modules.nerf.renderer import NeRFRenderer
  6 | from lib.torch_ngp.encoding import get_encoder
  7 | from lib.torch_ngp.activation import trunc_exp
  8 | from lib.torch_ngp.ffmlp import FFMLP
  9 | 
 10 | 
 11 | class NeRFNetwork(NeRFRenderer):
 12 |     def __init__(
 13 |         self,
 14 |         sigma_encoding: str,
 15 |         direction_encoding: str,
 16 |         n_sigma_layers: int,
 17 |         n_color_layers: int,
 18 |         sigma_hidden_dim: int,
 19 |         color_hidden_dim: int,
 20 |         geo_feat_dim: int,
 21 |         bound: int,
 22 |         aabb: List,
 23 |         encoder_num_levels: int,
 24 |         **kwargs
 25 |     ):
 26 |         super().__init__(
 27 |             bound, aabb, background_radius=0, background_perlin_noise=False, **kwargs
 28 |         )
 29 | 
 30 |         # Density network
 31 |         self._n_sigma_layers = n_sigma_layers
 32 |         self._sigma_hidden_dim = sigma_hidden_dim
 33 |         self._geo_feat_dim = geo_feat_dim
 34 |         self.sigma_encoder, self._sigma_in_dim = get_encoder(
 35 |             sigma_encoding,
 36 |             desired_resolution=2048 * bound,
 37 |             num_levels=encoder_num_levels,
 38 |         )
 39 |         self.sigma_net = FFMLP(
 40 |             input_dim=self._sigma_in_dim,
 41 |             output_dim=1 + self._geo_feat_dim,
 42 |             hidden_dim=self._sigma_hidden_dim,
 43 |             num_layers=self._n_sigma_layers,
 44 |         )
 45 | 
 46 |         # Color network
 47 |         self._n_color_layers = n_color_layers
 48 |         self._color_hidden_dim = color_hidden_dim
 49 |         self.color_encoder, self._color_in_dim = get_encoder(
 50 |             direction_encoding,
 51 |             desired_resolution=2048,
 52 |             num_levels=encoder_num_levels,
 53 |         )
 54 |         self._color_in_dim += self._geo_feat_dim + 1
 55 |         self.color_net = FFMLP(
 56 |             input_dim=self._color_in_dim,
 57 |             output_dim=3,
 58 |             hidden_dim=self._color_hidden_dim,
 59 |             num_layers=self._n_color_layers,
 60 |         )
 61 | 
 62 |     def forward(
 63 |         self, x: torch.Tensor, d: torch.Tensor
 64 |     ) -> Tuple[torch.Tensor, torch.Tensor]:
 65 |         """
 66 |         :param x: [N, 3], in [-bound, bound]
 67 |         :param d: [N, 3], nomalized in [-1, 1]
 68 |         """
 69 |         # Sigma
 70 |         x = self.sigma_encoder(x, bound=self.bound)
 71 |         h = self.sigma_net(x)
 72 |         sigma = trunc_exp(h[..., 0])
 73 |         geo_feat = h[..., 1:]
 74 | 
 75 |         # Color
 76 |         d = self.color_encoder(d)
 77 |         p = torch.zeros_like(geo_feat[..., :1])  # manual input padding
 78 |         h = torch.cat([d, geo_feat, p], dim=-1)
 79 |         h = self.color_net(h)
 80 | 
 81 |         # Sigmoid activation for rgb
 82 |         rgb = torch.sigmoid(h)
 83 | 
 84 |         return sigma, rgb
 85 | 
 86 |     def density(self, _x: torch.Tensor) -> Dict[str, torch.Tensor]:
 87 |         """
 88 |         :param x: [N, 3], in [-bound, bound]
 89 |         """
 90 |         x = self.sigma_encoder(_x, bound=self.bound)
 91 |         h = self.sigma_net(x)
 92 | 
 93 |         sigma = trunc_exp(h[..., 0])
 94 |         geo_feat = h[..., 1:]
 95 | 
 96 |         if torch.isnan(sigma).any():
 97 |             assert False
 98 |         return {
 99 |             "sigma": sigma,
100 |             "geo_feat": geo_feat,
101 |         }
102 | 
103 |     def color(
104 |         self,
105 |         x: torch.Tensor,
106 |         d: torch.Tensor,
107 |         mask: torch.Tensor = None,
108 |         geo_feat: torch.Tensor = None,
109 |         **kwargs
110 |     ) -> torch.Tensor:
111 |         """
112 |         Allow masked inference.
113 | 
114 |         :param x: [N, 3] in [-bound, bound]
115 |         :param mask: [N,], bool, indicates where rgb is needed to be computed.
116 |         """
117 |         if mask is not None:
118 |             # [N, 3]
119 |             rgbs = torch.zeros(mask.shape[0], 3, dtype=x.dtype, device=x.device)
120 |             # Empty mask
121 |             if not mask.any():
122 |                 return rgbs
123 |             x = x[mask]
124 |             d = d[mask]
125 |             geo_feat = geo_feat[mask]
126 | 
127 |         d = self.color_encoder(d)
128 | 
129 |         p = torch.zeros_like(geo_feat[..., :1])  # manual input padding
130 |         h = torch.cat([d, geo_feat, p], dim=-1)
131 |         h = self.color_net(h)
132 | 
133 |         # Sigmoid activation for rgb
134 |         h = torch.sigmoid(h)
135 | 
136 |         if mask is not None:
137 |             rgbs[mask] = h.to(rgbs.dtype)
138 |         else:
139 |             rgbs = h
140 | 
141 |         return rgbs
142 | 
143 |     def get_params(self, lr: float) -> Dict[str, Any]:
144 |         params = [
145 |             {"params": self.sigma_encoder.parameters(), "lr": lr},
146 |             {"params": self.sigma_net.parameters(), "lr": lr},
147 |             {"params": self.color_encoder.parameters(), "lr": lr},
148 |             {"params": self.color_net.parameters(), "lr": lr},
149 |         ]
150 |         return params
151 | 


--------------------------------------------------------------------------------
/jaws/src/models/modules/nerf_factory.py:
--------------------------------------------------------------------------------
  1 | from pytorch_lightning import LightningModule
  2 | import torch
  3 | from torch import optim
  4 | 
  5 | 
  6 | def create_nerf_model(config) -> LightningModule:
  7 |     criterion = torch.nn.MSELoss(reduction="none")
  8 | 
  9 |     ff = config.datamodule.ff
 10 |     background_radius = 0 if ff else config.datamodule.background_radius
 11 |     background_encoding = None if ff else config.model.background_encoding
 12 |     n_background_layers = None if ff else config.model.n_background_layers
 13 |     background_hidden_dim = None if ff else config.model.background_hidden_dim
 14 |     background_perlin_noise = None if ff else config.datamodule.background_perlin_noise
 15 | 
 16 |     if config.dynamic:
 17 |         from jaws.src.models.dnerf_model import DNeRFModel
 18 | 
 19 |         # Initialize model
 20 |         optimizer = lambda model: torch.optim.Adam(
 21 |             model.get_params(config.model.lr, config.model.lr_net),
 22 |             betas=(0.9, 0.99),
 23 |             eps=1e-14,
 24 |         )
 25 |         lr_scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(
 26 |             optimizer,
 27 |             lambda iter: 0.1 ** min(iter / (config.num_epochs * 100), 1),
 28 |         )
 29 | 
 30 |         model = DNeRFModel(
 31 |             result_dir=config.result_dir,
 32 |             optimizer=optimizer,
 33 |             lr_scheduler=lr_scheduler,
 34 |             criterion=criterion,
 35 |             bound=config.datamodule.bound,
 36 |             aabb=config.datamodule.aabb,
 37 |             run_type=config.run_type,
 38 |             min_near=config.datamodule.min_near,
 39 |             density_thresh=config.datamodule.density_thresh,
 40 |             num_steps=config.num_steps,
 41 |             upsample_steps=config.upsample_steps,
 42 |             max_ray_batch=config.max_ray_batch,
 43 |             background_radius=background_radius,
 44 |             time_encoding=config.model.time_encoding,
 45 |             warp_encoding=config.model.warp_encoding,
 46 |             sigma_encoding=config.model.sigma_encoding,
 47 |             direction_encoding=config.model.direction_encoding,
 48 |             background_encoding=background_encoding,
 49 |             background_perlin_noise=background_perlin_noise,
 50 |             n_warp_layers=config.model.n_warp_layers,
 51 |             n_sigma_layers=config.model.n_sigma_layers,
 52 |             n_color_layers=config.model.n_color_layers,
 53 |             n_background_layers=n_background_layers,
 54 |             warp_hidden_dim=config.model.warp_hidden_dim,
 55 |             sigma_hidden_dim=config.model.sigma_hidden_dim,
 56 |             color_hidden_dim=config.model.color_hidden_dim,
 57 |             background_hidden_dim=background_hidden_dim,
 58 |             geo_feat_dim=config.model.geo_feat_dim,
 59 |             encoder_num_levels=config.model.encoder_num_levels,
 60 |             saturation_loss=config.saturation_loss,
 61 |             error_map=config.error_map,
 62 |             floater_ratio=config.floater_ratio,
 63 |         )
 64 |     else:
 65 |         from jaws.src.models.nerf_model import NeRFModel
 66 | 
 67 |         # Initialize model
 68 |         optimizer = lambda model: torch.optim.Adam(
 69 |             model.get_params(config.model.lr), betas=(0.9, 0.99), eps=1e-14
 70 |         )
 71 |         lr_scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR(
 72 |             optimizer,
 73 |             lambda iter: 0.1 ** min(iter / (config.num_epochs * 100), 1),
 74 |         )
 75 | 
 76 |         model = NeRFModel(
 77 |             result_dir=config.result_dir,
 78 |             optimizer=optimizer,
 79 |             lr_scheduler=lr_scheduler,
 80 |             criterion=criterion,
 81 |             bound=config.datamodule.bound,
 82 |             aabb=config.datamodule.aabb,
 83 |             run_type=config.run_type,
 84 |             min_near=config.datamodule.min_near,
 85 |             density_thresh=config.datamodule.density_thresh,
 86 |             num_steps=config.num_steps,
 87 |             upsample_steps=config.upsample_steps,
 88 |             max_ray_batch=config.max_ray_batch,
 89 |             fully_fuse=ff,
 90 |             background_radius=background_radius,
 91 |             sigma_encoding=config.model.sigma_encoding,
 92 |             direction_encoding=config.model.direction_encoding,
 93 |             background_encoding=background_encoding,
 94 |             background_perlin_noise=background_perlin_noise,
 95 |             n_sigma_layers=config.model.n_sigma_layers,
 96 |             n_color_layers=config.model.n_color_layers,
 97 |             n_background_layers=n_background_layers,
 98 |             sigma_hidden_dim=config.model.sigma_hidden_dim,
 99 |             color_hidden_dim=config.model.color_hidden_dim,
100 |             background_hidden_dim=background_hidden_dim,
101 |             geo_feat_dim=config.model.geo_feat_dim,
102 |             encoder_num_levels=config.model.encoder_num_levels,
103 |             saturation_loss=config.saturation_loss,  # [TODO:]
104 |             error_map=config.error_map,
105 |             floater_ratio=config.floater_ratio,
106 |         )
107 |     if config.run_type != "train":
108 |         model.training = False
109 |     return model
110 | 


--------------------------------------------------------------------------------
/jaws/src/render.py:
--------------------------------------------------------------------------------
  1 | from omegaconf import DictConfig
  2 | import os
  3 | import os.path as osp
  4 | import sys
  5 | 
  6 | import torch
  7 | import numpy as np
  8 | 
  9 | from utils.file_utils import create_dir, load_pickle, save_pickle
 10 | from jaws.src.models.modules.nerf_factory import create_nerf_model
 11 | from utils.camera_utils import PoseInterpolator
 12 | from utils.image_utils import save_gif, save_torch_image, save_poses_kitti
 13 | from tqdm import tqdm
 14 | 
 15 | 
 16 | def render(config: DictConfig):
 17 |     sys.path.append(osp.join(".", "lib", "torch_ngp"))
 18 |     model = create_nerf_model(config)
 19 | 
 20 |     # Initialize trainer
 21 |     checkpoint_dir = osp.join(config.result_dir, "checkpoints")
 22 |     if not osp.exists(checkpoint_dir):
 23 |         create_dir(checkpoint_dir)
 24 |     if config.model.ckpt == "latest":
 25 |         checkpoint_list = sorted(os.listdir(checkpoint_dir))
 26 |         if len(checkpoint_list) > 0:
 27 |             checkpoint_path = osp.join(checkpoint_dir, checkpoint_list[-1])
 28 |         else:
 29 |             checkpoint_path = None
 30 |     else:
 31 |         checkpoint_path = config.model.ckpt
 32 | 
 33 |     checkpoint = torch.load(checkpoint_path)
 34 |     model.load_state_dict(checkpoint["state_dict"])
 35 |     device = "cuda" if config.compnode.num_gpus > 0 else "cpu"
 36 |     model.to(device)
 37 | 
 38 |     # intrinsic parameters - constant during interpolating
 39 |     params_path = osp.join(config.render_target_dir, "params.pkl")
 40 |     params = load_pickle(params_path)
 41 | 
 42 |     # parameters can be interpolated
 43 |     focals_path = osp.join(config.render_target_dir, "focals.pkl")
 44 |     times_path = osp.join(config.render_target_dir, "times.pkl")
 45 |     poses_path = osp.join(config.render_target_dir, "poses.pkl")
 46 |     focals = load_pickle(focals_path)
 47 |     times = load_pickle(times_path)
 48 |     poses = load_pickle(poses_path)
 49 | 
 50 |     # interpolation here before rendering:
 51 |     (
 52 |         focals,
 53 |         times,
 54 |         poses,
 55 |     ) = PoseInterpolator.inpterpolate_render_sequence_from_keyframes_cubic(
 56 |         focals=focals,
 57 |         times=times,
 58 |         poses=poses,
 59 |         frm_num=config.render_frame_num,
 60 |     )
 61 | 
 62 |     res_factor = 720.0 / params[0]["H"]
 63 |     frames = []
 64 |     H = int(params[0]["H"] * res_factor)  # 224 -> control image
 65 |     W = int(params[0]["W"] * res_factor)
 66 |     intrinsics = params[0]["intrinsics"]
 67 |     scale_factor = H / (intrinsics[3] * 2)
 68 |     intrinsics = intrinsics * scale_factor
 69 |     intrinsics[2] = W / 2
 70 |     intrinsics[3] = H / 2
 71 | 
 72 |     # high resolution
 73 |     for camera_index in tqdm(range(len(poses))):
 74 |         focal = focals[camera_index]
 75 |         time = torch.tensor([[times[camera_index]]]).to(device)
 76 |         current_intrinsics = np.copy(intrinsics)
 77 |         current_intrinsics[:2] = intrinsics[:2] * focal
 78 |         pose = poses[camera_index].to(device)
 79 |         if config.dynamic:
 80 |             frames.append(
 81 |                 (
 82 |                     model.render(pose, time, current_intrinsics, H, W).cpu()
 83 |                     * 255.0
 84 |                 )
 85 |                 .numpy()
 86 |                 .astype(np.uint8)
 87 |             )
 88 |         else:
 89 |             frames.append(
 90 |                 (model.render(pose, current_intrinsics, H, W).cpu() * 255.0)
 91 |                 .numpy()
 92 |                 .astype(np.uint8)
 93 |             )
 94 |     save_gif(
 95 |         frames, osp.join(config.render_target_dir, "interpolated.gif"), 25
 96 |     )
 97 |     save_pickle(
 98 |         poses, osp.join(config.render_target_dir, "interpolated_poses.pkl")
 99 |     )
100 |     save_pickle(
101 |         focals, osp.join(config.render_target_dir, "interpolated_focals.pkl")
102 |     )
103 |     save_poses_kitti(
104 |         poses,
105 |         config.render_target_dir,
106 |         "interpolated_poses_kitti.csv",
107 |     )
108 | 
109 |     # save_pickle(frames, config.render_target_dir, "frames.pkl")
110 |     # save fig here too
111 | 


--------------------------------------------------------------------------------
/jaws/src/train.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from omegaconf import DictConfig
 3 | import os
 4 | import os.path as osp
 5 | import sys
 6 | 
 7 | from pytorch_lightning import Trainer
 8 | from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor
 9 | from pytorch_lightning.loggers import WandbLogger
10 | 
11 | from jaws.src.datamodules.nerf_datamodule import NeRFDataModule
12 | from utils.file_utils import create_dir
13 | 
14 | from jaws.src.models.modules.nerf_factory import create_nerf_model
15 | 
16 | 
17 | def train(config: DictConfig):
18 |     sys.path.append(osp.join(".", "lib", "torch_ngp"))
19 |     model = create_nerf_model(config)
20 | 
21 |     # Initialize dataset
22 |     data_module = NeRFDataModule(
23 |         data_type="dynamic" if config.dynamic else "static",
24 |         num_rays=config.num_rays,
25 |         path=config.data_dir,
26 |         mode=config.datamodule.mode,
27 |         preload=config.datamodule.preload,
28 |         scale=config.datamodule.scale,
29 |         bound=config.datamodule.bound,
30 |         rand_pose=config.datamodule.rand_pose,
31 |         ind_calib=config.datamodule.independent_calibration,
32 |         error_map=config.error_map,
33 |     )
34 | 
35 |     # Initialize trainer
36 |     checkpoint_dir = osp.join(config.result_dir, "checkpoints")
37 |     if not osp.exists(checkpoint_dir):
38 |         create_dir(checkpoint_dir)
39 |     if config.model.ckpt == "latest":
40 |         checkpoint_list = sorted(os.listdir(checkpoint_dir))
41 |         if len(checkpoint_list) > 0:
42 |             checkpoint_path = osp.join(checkpoint_dir, checkpoint_list[-1])
43 |         else:
44 |             checkpoint_path = None
45 |     elif config.model.ckpt == "scratch":
46 |         checkpoint_path = None
47 |     else:
48 |         checkpoint_path = config.model.ckpt
49 |     checkpoint = ModelCheckpoint(
50 |         monitor=config.checkpoint_metric,
51 |         mode="min",
52 |         save_top_k=config.num_checkpoints,
53 |         dirpath=checkpoint_dir,
54 |         filename="{epoch}",
55 |         save_on_train_epoch_end=True,
56 |     )
57 |     timestamp = datetime.now().strftime("%m-%d_%H-%M")
58 |     wandb_logger = WandbLogger(
59 |         name="_".join([config.xp_name, "nerf", timestamp]),
60 |         project=config.project_name,
61 |         offline=config.log_offline,
62 |     )
63 |     lr_monitor = LearningRateMonitor(logging_interval="epoch")
64 |     callbacks = [lr_monitor, checkpoint]
65 |     trainer = Trainer(
66 |         gpus=config.compnode.num_gpus,
67 |         num_nodes=config.compnode.num_nodes,
68 |         accelerator=config.compnode.accelerator,
69 |         max_epochs=config.num_epochs,
70 |         callbacks=callbacks,
71 |         logger=wandb_logger,
72 |         check_val_every_n_epoch=config.check_val_every_n_epoch,
73 |         log_every_n_steps=5,
74 |         precision=16 if config.model.fp16 else 32,
75 |         num_sanity_val_steps=config.num_sanity_val_steps,
76 |     )
77 | 
78 |     # Launch model training
79 |     trainer.fit(model, data_module, ckpt_path=checkpoint_path)
80 | 


--------------------------------------------------------------------------------
/lib/LitePose/_init_paths.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os.path as osp
12 | import sys
13 | 
14 | 
15 | def add_path(path):
16 |     if path not in sys.path:
17 |         sys.path.insert(0, path)
18 | 
19 | 
20 | this_dir = osp.dirname(__file__)
21 | 
22 | lib_path = osp.join(this_dir, "lib")
23 | add_path(lib_path)
24 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/coco/ddrnet/ddrnet23s.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: coco_kpt
14 |   DATASET_TEST: coco
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 512
18 |   OUTPUT_SIZE: [128, 256]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 17
26 |   ROOT: '../data/coco'
27 |   TEST: val2017
28 |   TRAIN: train2017
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     PRETRAINED_LAYERS: ['*']
47 |   INIT_WEIGHTS: True
48 |   NAME: pose_ddrnet
49 |   NUM_JOINTS: 17
50 |   PRETRAINED: ''
51 |   TAG_PER_JOINT: True
52 | TEST:
53 |   FLIP_TEST: True
54 |   IMAGES_PER_GPU: 1
55 |   MODEL_FILE: ''
56 |   SCALE_FACTOR: [1]
57 |   DETECTION_THRESHOLD: 0.1
58 |   WITH_HEATMAPS: (True, True)
59 |   WITH_AE: (True, False)
60 |   PROJECT2IMAGE: True
61 |   NMS_KERNEL: 5
62 |   NMS_PADDING: 2
63 | TRAIN:
64 |   BEGIN_EPOCH: 0
65 |   CHECKPOINT: ''
66 |   END_EPOCH: 180
67 |   GAMMA1: 0.99
68 |   GAMMA2: 0.0
69 |   IMAGES_PER_GPU: 16
70 |   LR: 0.002
71 |   LR_FACTOR: 0.1
72 |   LR_STEP: [250, 330]
73 |   MOMENTUM: 0.9
74 |   NESTEROV: False
75 |   OPTIMIZER: adam
76 |   RESUME: False
77 |   SHUFFLE: True
78 |   WD: 0.0001
79 | WORKERS: 4
80 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/coco/higher_hrnet/w32_512_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: False
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: coco_kpt
 14 |   DATASET_TEST: coco
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 512
 18 |   OUTPUT_SIZE: [128, 256]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 17
 26 |   ROOT: '../data/coco'
 27 |   TEST: val2017
 28 |   TRAIN: train2017
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 64
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 4
 55 |       - 4
 56 |       NUM_CHANNELS:
 57 |       - 32
 58 |       - 64
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 4
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 4
 66 |       - 4
 67 |       - 4
 68 |       NUM_CHANNELS:
 69 |       - 32
 70 |       - 64
 71 |       - 128
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 3
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 4
 79 |       - 4
 80 |       - 4
 81 |       - 4
 82 |       NUM_CHANNELS:
 83 |       - 32
 84 |       - 64
 85 |       - 128
 86 |       - 256
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 4
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   NAME: pose_higher_hrnet
 99 |   NUM_JOINTS: 17
100 |   PRETRAINED: '../data/models/pytorch/imagenet/hrnet_w32-36af842e.pth'
101 |   TAG_PER_JOINT: True
102 | TEST:
103 |   FLIP_TEST: True
104 |   IMAGES_PER_GPU: 1
105 |   MODEL_FILE: '' 
106 |   SCALE_FACTOR: [1]
107 |   DETECTION_THRESHOLD: 0.1
108 |   WITH_HEATMAPS: (True, True)
109 |   WITH_AE: (True, False)
110 |   PROJECT2IMAGE: True
111 |   NMS_KERNEL: 5
112 |   NMS_PADDING: 2
113 | TRAIN:
114 |   BEGIN_EPOCH: 0
115 |   CHECKPOINT: ''
116 |   END_EPOCH: 300
117 |   GAMMA1: 0.99
118 |   GAMMA2: 0.0
119 |   IMAGES_PER_GPU: 6
120 |   LR: 0.001
121 |   LR_FACTOR: 0.1
122 |   LR_STEP: [200, 260]
123 |   MOMENTUM: 0.9
124 |   NESTEROV: False
125 |   OPTIMIZER: adam
126 |   RESUME: False
127 |   SHUFFLE: True
128 |   WD: 0.0001
129 | WORKERS: 4
130 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/coco/higher_hrnet/w32_640_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | FP16:
  8 |   ENABLED: True
  9 |   DYNAMIC_LOSS_SCALE: True
 10 | CUDNN:
 11 |   BENCHMARK: True
 12 |   DETERMINISTIC: False
 13 |   ENABLED: True
 14 | DATASET:
 15 |   SIGMA: 2
 16 |   DATASET: coco_kpt
 17 |   DATASET_TEST: coco
 18 |   DATA_FORMAT: jpg
 19 |   FLIP: 0.5
 20 |   INPUT_SIZE: 640
 21 |   OUTPUT_SIZE: [160, 320]
 22 |   MAX_NUM_PEOPLE: 30
 23 |   MAX_ROTATION: 30
 24 |   MAX_SCALE: 1.5
 25 |   SCALE_TYPE: 'short'
 26 |   MAX_TRANSLATE: 40
 27 |   MIN_SCALE: 0.75
 28 |   NUM_JOINTS: 17
 29 |   ROOT: 'data/coco'
 30 |   TEST: val2017
 31 |   TRAIN: train2017
 32 | DEBUG:
 33 |   DEBUG: True
 34 |   SAVE_BATCH_IMAGES_GT: False
 35 |   SAVE_BATCH_IMAGES_PRED: False
 36 |   SAVE_HEATMAPS_GT: True
 37 |   SAVE_HEATMAPS_PRED: True
 38 |   SAVE_TAGMAPS_PRED: True
 39 | LOSS:
 40 |   NUM_STAGES: 2
 41 |   AE_LOSS_TYPE: exp
 42 |   WITH_AE_LOSS: [True, False]
 43 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 44 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 45 |   WITH_HEATMAPS_LOSS: [True, True]
 46 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 47 | MODEL:
 48 |   EXTRA:
 49 |     FINAL_CONV_KERNEL: 1
 50 |     PRETRAINED_LAYERS: ['*']
 51 |     STEM_INPLANES: 64
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 |     DECONV:
 92 |       NUM_DECONVS: 1
 93 |       NUM_CHANNELS:
 94 |       - 32
 95 |       KERNEL_SIZE:
 96 |       - 4
 97 |       NUM_BASIC_BLOCKS: 4
 98 |       CAT_OUTPUT:
 99 |       - True
100 |   INIT_WEIGHTS: True
101 |   NAME: pose_higher_hrnet
102 |   NUM_JOINTS: 17
103 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: '' 
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 12
123 |   LR: 0.001
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 4
133 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/coco/higher_hrnet/w48_640_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | FP16:
  8 |   ENABLED: True
  9 |   DYNAMIC_LOSS_SCALE: True
 10 | CUDNN:
 11 |   BENCHMARK: True
 12 |   DETERMINISTIC: False
 13 |   ENABLED: True
 14 | DATASET:
 15 |   SIGMA: 2
 16 |   DATASET: coco_kpt
 17 |   DATASET_TEST: coco
 18 |   DATA_FORMAT: jpg
 19 |   FLIP: 0.5
 20 |   INPUT_SIZE: 640
 21 |   OUTPUT_SIZE: [160, 320]
 22 |   MAX_NUM_PEOPLE: 30
 23 |   MAX_ROTATION: 30
 24 |   MAX_SCALE: 1.5
 25 |   SCALE_TYPE: 'short'
 26 |   MAX_TRANSLATE: 40
 27 |   MIN_SCALE: 0.75
 28 |   NUM_JOINTS: 17
 29 |   ROOT: 'data/coco'
 30 |   TEST: val2017
 31 |   TRAIN: train2017
 32 | DEBUG:
 33 |   DEBUG: True
 34 |   SAVE_BATCH_IMAGES_GT: False
 35 |   SAVE_BATCH_IMAGES_PRED: False
 36 |   SAVE_HEATMAPS_GT: True
 37 |   SAVE_HEATMAPS_PRED: True
 38 |   SAVE_TAGMAPS_PRED: True
 39 | LOSS:
 40 |   NUM_STAGES: 2
 41 |   AE_LOSS_TYPE: exp
 42 |   WITH_AE_LOSS: [True, False]
 43 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 44 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 45 |   WITH_HEATMAPS_LOSS: [True, True]
 46 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 47 | MODEL:
 48 |   EXTRA:
 49 |     FINAL_CONV_KERNEL: 1
 50 |     PRETRAINED_LAYERS: ['*']
 51 |     STEM_INPLANES: 64
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 |     DECONV:
 92 |       NUM_DECONVS: 1
 93 |       NUM_CHANNELS:
 94 |       - 48
 95 |       KERNEL_SIZE:
 96 |       - 4
 97 |       NUM_BASIC_BLOCKS: 4
 98 |       CAT_OUTPUT:
 99 |       - True
100 |   INIT_WEIGHTS: True
101 |   NAME: pose_higher_hrnet
102 |   NUM_JOINTS: 17
103 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: '' 
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 10
123 |   LR: 0.001
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 4
133 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/coco/mobilenet/mobile.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: coco_kpt
14 |   DATASET_TEST: coco
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 256
18 |   OUTPUT_SIZE: [64, 128]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 17
26 |   ROOT: '/dataset/coco'
27 |   TEST: val2017
28 |   TRAIN: train2017
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 64
51 |       - 48
52 |       - 32
53 |     NUM_DECONV_KERNELS:
54 |       - 4
55 |       - 4
56 |       - 4
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_mobilenet
59 |   NUM_JOINTS: 17
60 |   PRETRAINED: ''
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 500
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 0.004
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [350, 480]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 0.0001
89 | WORKERS: 4
90 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/coco/mobilenet/supermobile.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: coco_kpt
14 |   DATASET_TEST: coco
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 448
18 |   OUTPUT_SIZE: [112, 224]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 17
26 |   ROOT: '/dataset/mscoco'
27 |   TEST: val2017
28 |   TRAIN: train2017
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 64
51 |       - 48
52 |       - 32
53 |     NUM_DECONV_KERNELS:
54 |       - 4
55 |       - 4
56 |       - 4
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_supermobilenet
59 |   NUM_JOINTS: 17
60 |   PRETRAINED: './pretrain/search_pretrain.pth.tar'
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 2400
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 4e-3
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [10000,18000]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 0.0001
89 | WORKERS: 4
90 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/ddrnet/ddrnet23s.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: crowd_pose_kpt
14 |   DATASET_TEST: crowd_pose
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 512
18 |   OUTPUT_SIZE: [128, 256]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 14
26 |   ROOT: '../data/crowd_pose'
27 |   TEST: test
28 |   TRAIN: trainval
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     PRETRAINED_LAYERS: ['*']
47 |   INIT_WEIGHTS: True
48 |   NAME: pose_ddrnet
49 |   NUM_JOINTS: 14
50 |   PRETRAINED: ''
51 |   TAG_PER_JOINT: True
52 | TEST:
53 |   FLIP_TEST: True
54 |   IMAGES_PER_GPU: 1
55 |   MODEL_FILE: ''
56 |   SCALE_FACTOR: [1]
57 |   DETECTION_THRESHOLD: 0.1
58 |   WITH_HEATMAPS: (True, True)
59 |   WITH_AE: (True, False)
60 |   PROJECT2IMAGE: True
61 |   NMS_KERNEL: 5
62 |   NMS_PADDING: 2
63 | TRAIN:
64 |   BEGIN_EPOCH: 0
65 |   CHECKPOINT: ''
66 |   END_EPOCH: 500
67 |   GAMMA1: 0.99
68 |   GAMMA2: 0.0
69 |   IMAGES_PER_GPU: 16
70 |   LR: 1e-3
71 |   LR_FACTOR: 0.1
72 |   LR_STEP: [350, 480]
73 |   MOMENTUM: 0.9
74 |   NESTEROV: False
75 |   OPTIMIZER: adam
76 |   RESUME: False
77 |   SHUFFLE: True
78 |   WD: 1e-4
79 | WORKERS: 4
80 | 
81 | 
82 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-1.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output_H-1
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: True
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: coco_kpt
 14 |   DATASET_TEST: coco
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 480
 18 |   OUTPUT_SIZE: [120, 240]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 17
 26 |   ROOT: 'data/coco' #Dataset Root Folder
 27 |   TEST: val2017
 28 |   TRAIN: train2017
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 64
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 2
 55 |       - 2
 56 |       NUM_CHANNELS:
 57 |       - 32
 58 |       - 64
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 3
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 2
 66 |       - 2
 67 |       - 2
 68 |       NUM_CHANNELS:
 69 |       - 32
 70 |       - 64
 71 |       - 128
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 3
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 2
 79 |       - 2
 80 |       - 2
 81 |       - 2
 82 |       NUM_CHANNELS:
 83 |       - 32
 84 |       - 64
 85 |       - 128
 86 |       - 256
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 2
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   SCALE_FACTOR: -1
 99 |   WIDTH_MULT: 0.909
100 |   DEPTH_MULT: 0.833
101 |   NAME: pose_efficient_hrnet
102 |   NUM_JOINTS: 14
103 |   PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: ''
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 6
123 |   LR: 0.001
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 8
133 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-2.yaml:
--------------------------------------------------------------------------------
  1 | 
  2 | AUTO_RESUME: True
  3 | DATA_DIR: ''
  4 | GPUS: (0,)
  5 | LOG_DIR: log
  6 | OUTPUT_DIR: output_H-2
  7 | PRINT_FREQ: 100
  8 | CUDNN:
  9 |   BENCHMARK: True
 10 |   DETERMINISTIC: True
 11 |   ENABLED: True
 12 | DATASET:
 13 |   SIGMA: 2
 14 |   DATASET: coco_kpt
 15 |   DATASET_TEST: coco
 16 |   DATA_FORMAT: jpg
 17 |   FLIP: 0.5
 18 |   INPUT_SIZE: 448
 19 |   OUTPUT_SIZE: [112, 224]
 20 |   MAX_NUM_PEOPLE: 30
 21 |   MAX_ROTATION: 30
 22 |   MAX_SCALE: 1.5
 23 |   SCALE_TYPE: 'short'
 24 |   MAX_TRANSLATE: 40
 25 |   MIN_SCALE: 0.75
 26 |   NUM_JOINTS: 14
 27 |   ROOT: 'data/coco' #Dataset Root Folder
 28 |   TEST: val2017
 29 |   TRAIN: train2017
 30 | DEBUG:
 31 |   DEBUG: True
 32 |   SAVE_BATCH_IMAGES_GT: False
 33 |   SAVE_BATCH_IMAGES_PRED: False
 34 |   SAVE_HEATMAPS_GT: True
 35 |   SAVE_HEATMAPS_PRED: True
 36 |   SAVE_TAGMAPS_PRED: True
 37 | LOSS:
 38 |   NUM_STAGES: 2
 39 |   AE_LOSS_TYPE: exp
 40 |   WITH_AE_LOSS: [True, False]
 41 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 42 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 43 |   WITH_HEATMAPS_LOSS: [True, True]
 44 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 45 | MODEL:
 46 |   EXTRA:
 47 |     FINAL_CONV_KERNEL: 1
 48 |     PRETRAINED_LAYERS: ['*']
 49 |     STEM_INPLANES: 64
 50 |     STAGE2:
 51 |       NUM_MODULES: 1
 52 |       NUM_BRANCHES: 2
 53 |       BLOCK: BASIC
 54 |       NUM_BLOCKS:
 55 |       - 2
 56 |       - 2
 57 |       NUM_CHANNELS:
 58 |       - 32
 59 |       - 64
 60 |       FUSE_METHOD: SUM
 61 |     STAGE3:
 62 |       NUM_MODULES: 2
 63 |       NUM_BRANCHES: 3
 64 |       BLOCK: BASIC
 65 |       NUM_BLOCKS:
 66 |       - 2
 67 |       - 2
 68 |       - 2
 69 |       NUM_CHANNELS:
 70 |       - 32
 71 |       - 64
 72 |       - 128
 73 |       FUSE_METHOD: SUM
 74 |     STAGE4:
 75 |       NUM_MODULES: 3
 76 |       NUM_BRANCHES: 4
 77 |       BLOCK: BASIC
 78 |       NUM_BLOCKS:
 79 |       - 2
 80 |       - 2
 81 |       - 2
 82 |       - 2
 83 |       NUM_CHANNELS:
 84 |       - 32
 85 |       - 64
 86 |       - 128
 87 |       - 256
 88 |       FUSE_METHOD: SUM
 89 |     DECONV:
 90 |       NUM_DECONVS: 1
 91 |       NUM_CHANNELS:
 92 |       - 32
 93 |       KERNEL_SIZE:
 94 |       - 4
 95 |       NUM_BASIC_BLOCKS: 2
 96 |       CAT_OUTPUT:
 97 |       - True
 98 |   INIT_WEIGHTS: True
 99 |   SCALE_FACTOR: -2
100 |   WIDTH_MULT: 0.826
101 |   DEPTH_MULT: 0.694
102 |   NAME: pose_efficient_hrnet
103 |   NUM_JOINTS: 17
104 |   PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
105 |   TAG_PER_JOINT: True
106 | TEST:
107 |   FLIP_TEST: True
108 |   IMAGES_PER_GPU: 1
109 |   MODEL_FILE: ''
110 |   SCALE_FACTOR: [1]
111 |   DETECTION_THRESHOLD: 0.1
112 |   WITH_HEATMAPS: (True, True)
113 |   WITH_AE: (True, False)
114 |   PROJECT2IMAGE: True
115 |   NMS_KERNEL: 5
116 |   NMS_PADDING: 2
117 | TRAIN:
118 |   BEGIN_EPOCH: 0
119 |   CHECKPOINT: ''
120 |   END_EPOCH: 300
121 |   GAMMA1: 0.99
122 |   GAMMA2: 0.0
123 |   IMAGES_PER_GPU: 8
124 |   LR: 0.001
125 |   LR_FACTOR: 0.1
126 |   LR_STEP: [200, 260]
127 |   MOMENTUM: 0.9
128 |   NESTEROV: False
129 |   OPTIMIZER: adam
130 |   RESUME: False
131 |   SHUFFLE: True
132 |   WD: 0.0001
133 | WORKERS: 8
134 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output_H-3
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: True
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: crowd_pose_kpt
 14 |   DATASET_TEST: crowd_pose
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 416
 18 |   OUTPUT_SIZE: [104, 208]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 14
 26 |   ROOT: '/dataset/pose/data/crowd_pose' #Dataset Root Folder
 27 |   TEST: test
 28 |   TRAIN: trainval
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 64
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 2
 55 |       - 2
 56 |       NUM_CHANNELS:
 57 |       - 32
 58 |       - 64
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 1
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 2
 66 |       - 2
 67 |       - 2
 68 |       NUM_CHANNELS:
 69 |       - 32
 70 |       - 64
 71 |       - 128
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 3
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 2
 79 |       - 2
 80 |       - 2
 81 |       - 2
 82 |       NUM_CHANNELS:
 83 |       - 32
 84 |       - 64
 85 |       - 128
 86 |       - 256
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 2
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   SCALE_FACTOR: -3
 99 |   WIDTH_MULT: 0.751
100 |   DEPTH_MULT: 0.578
101 |   NAME: pose_efficient_hrnet
102 |   NUM_JOINTS: 14
103 |   PRETRAINED: '' #Path to pretrained backbone model
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: ''
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 10
123 |   LR: 0.004
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 8


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-4.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output_H-4
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: True
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: coco_kpt
 14 |   DATASET_TEST: coco
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 384
 18 |   OUTPUT_SIZE: [96, 192]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 14
 26 |   ROOT: 'data/coco' #Dataset Root Folder
 27 |   TEST: val2017
 28 |   TRAIN: train2017
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 64
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 2
 55 |       - 2
 56 |       NUM_CHANNELS:
 57 |       - 32
 58 |       - 64
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 1
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 2
 66 |       - 2
 67 |       - 2
 68 |       NUM_CHANNELS:
 69 |       - 32
 70 |       - 64
 71 |       - 128
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 2
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 2
 79 |       - 2
 80 |       - 2
 81 |       - 2
 82 |       NUM_CHANNELS:
 83 |       - 32
 84 |       - 64
 85 |       - 128
 86 |       - 256
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 2
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   SCALE_FACTOR: -4
 99 |   WIDTH_MULT: 0.684
100 |   DEPTH_MULT: 0.483
101 |   NAME: pose_efficient_hrnet
102 |   NUM_JOINTS: 17
103 |   PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: ''
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 48
123 |   LR: 0.001
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 8
133 | 
134 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/higher_hrnet/w16_512_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: False
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: crowd_pose_kpt
 14 |   DATASET_TEST: crowd_pose
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 512
 18 |   OUTPUT_SIZE: [128, 256]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 14
 26 |   ROOT: '../data/crowd_pose'
 27 |   TEST: test
 28 |   TRAIN: trainval
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 36
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 0
 55 |       - 4
 56 |       NUM_CHANNELS:
 57 |       - 18
 58 |       - 36
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 4
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 0
 66 |       - 0
 67 |       - 4
 68 |       NUM_CHANNELS:
 69 |       - 18
 70 |       - 36
 71 |       - 72
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 3
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 0
 79 |       - 0
 80 |       - 0
 81 |       - 4
 82 |       NUM_CHANNELS:
 83 |       - 18
 84 |       - 36
 85 |       - 72
 86 |       - 144
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 0
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   NAME: pose_higher_hrnet
 99 |   NUM_JOINTS: 14
100 |   PRETRAINED: '../models/pytorch/imagenet/hrnet_w32-36af842e.pth'
101 |   TAG_PER_JOINT: True
102 | TEST:
103 |   FLIP_TEST: True
104 |   IMAGES_PER_GPU: 1
105 |   MODEL_FILE: '' 
106 |   SCALE_FACTOR: [1]
107 |   DETECTION_THRESHOLD: 0.1
108 |   WITH_HEATMAPS: (True, True)
109 |   WITH_AE: (True, False)
110 |   PROJECT2IMAGE: True
111 |   NMS_KERNEL: 5
112 |   NMS_PADDING: 2
113 | TRAIN:
114 |   BEGIN_EPOCH: 0
115 |   CHECKPOINT: ''
116 |   END_EPOCH: 300
117 |   GAMMA1: 0.99
118 |   GAMMA2: 0.0
119 |   IMAGES_PER_GPU: 24
120 |   LR: 0.001
121 |   LR_FACTOR: 0.1
122 |   LR_STEP: [200, 260]
123 |   MOMENTUM: 0.9
124 |   NESTEROV: False
125 |   OPTIMIZER: adam
126 |   RESUME: False
127 |   SHUFFLE: True
128 |   WD: 0.0001
129 | WORKERS: 4
130 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: False
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: crowd_pose_kpt
 14 |   DATASET_TEST: crowd_pose
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 512
 18 |   OUTPUT_SIZE: [128, 256]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 14
 26 |   ROOT: '/dataset/pose/data/crowd_pose'
 27 |   TEST: search
 28 |   TRAIN: trainval
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 64
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 4
 55 |       - 4
 56 |       NUM_CHANNELS:
 57 |       - 32
 58 |       - 64
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 4
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 4
 66 |       - 4
 67 |       - 4
 68 |       NUM_CHANNELS:
 69 |       - 32
 70 |       - 64
 71 |       - 128
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 3
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 4
 79 |       - 4
 80 |       - 4
 81 |       - 4
 82 |       NUM_CHANNELS:
 83 |       - 32
 84 |       - 64
 85 |       - 128
 86 |       - 256
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 4
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   NAME: pose_higher_hrnet
 99 |   NUM_JOINTS: 14
100 |   PRETRAINED: '../models/pytorch/imagenet/hrnet_w32-36af842e.pth'
101 |   TAG_PER_JOINT: True
102 | TEST:
103 |   FLIP_TEST: True
104 |   IMAGES_PER_GPU: 1
105 |   MODEL_FILE: '' 
106 |   SCALE_FACTOR: [1]
107 |   DETECTION_THRESHOLD: 0.1
108 |   WITH_HEATMAPS: (True, True)
109 |   WITH_AE: (True, False)
110 |   PROJECT2IMAGE: True
111 |   NMS_KERNEL: 5
112 |   NMS_PADDING: 2
113 | TRAIN:
114 |   BEGIN_EPOCH: 0
115 |   CHECKPOINT: ''
116 |   END_EPOCH: 300
117 |   GAMMA1: 0.99
118 |   GAMMA2: 0.0
119 |   IMAGES_PER_GPU: 12
120 |   LR: 0.001
121 |   LR_FACTOR: 0.1
122 |   LR_STEP: [200, 260]
123 |   MOMENTUM: 0.9
124 |   NESTEROV: False
125 |   OPTIMIZER: adam
126 |   RESUME: False
127 |   SHUFFLE: True
128 |   WD: 0.0001
129 | WORKERS: 4
130 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_coco.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: False
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: crowd_pose_kpt
 14 |   DATASET_TEST: crowd_pose
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 512
 18 |   OUTPUT_SIZE: [128, 256]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 14
 26 |   ROOT: 'data/crowd_pose'
 27 |   TEST: test
 28 |   TRAIN: trainval
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS:
 48 |       - 'conv1'
 49 |       - 'bn1'
 50 |       - 'conv2'
 51 |       - 'bn2'
 52 |       - 'layer1'
 53 |       - 'transition1'
 54 |       - 'stage2'
 55 |       - 'transition2'
 56 |       - 'stage3'
 57 |       - 'transition3'
 58 |       - 'stage4'
 59 |     STEM_INPLANES: 64
 60 |     STAGE2:
 61 |       NUM_MODULES: 1
 62 |       NUM_BRANCHES: 2
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 4
 66 |       - 4
 67 |       NUM_CHANNELS:
 68 |       - 32
 69 |       - 64
 70 |       FUSE_METHOD: SUM
 71 |     STAGE3:
 72 |       NUM_MODULES: 4
 73 |       NUM_BRANCHES: 3
 74 |       BLOCK: BASIC
 75 |       NUM_BLOCKS:
 76 |       - 4
 77 |       - 4
 78 |       - 4
 79 |       NUM_CHANNELS:
 80 |       - 32
 81 |       - 64
 82 |       - 128
 83 |       FUSE_METHOD: SUM
 84 |     STAGE4:
 85 |       NUM_MODULES: 3
 86 |       NUM_BRANCHES: 4
 87 |       BLOCK: BASIC
 88 |       NUM_BLOCKS:
 89 |       - 4
 90 |       - 4
 91 |       - 4
 92 |       - 4
 93 |       NUM_CHANNELS:
 94 |       - 32
 95 |       - 64
 96 |       - 128
 97 |       - 256
 98 |       FUSE_METHOD: SUM
 99 |     DECONV:
100 |       NUM_DECONVS: 1
101 |       NUM_CHANNELS:
102 |       - 32
103 |       KERNEL_SIZE:
104 |       - 4
105 |       NUM_BASIC_BLOCKS: 4
106 |       CAT_OUTPUT:
107 |       - True
108 |   INIT_WEIGHTS: True
109 |   NAME: pose_higher_hrnet
110 |   NUM_JOINTS: 14
111 |   PRETRAINED: 'models/pytorch/pose_coco/pose_higher_hrnet_w32_512.pth'
112 |   TAG_PER_JOINT: True
113 | TEST:
114 |   FLIP_TEST: True
115 |   IMAGES_PER_GPU: 1
116 |   MODEL_FILE: '' 
117 |   SCALE_FACTOR: [1]
118 |   DETECTION_THRESHOLD: 0.1
119 |   WITH_HEATMAPS: (True, True)
120 |   WITH_AE: (True, False)
121 |   PROJECT2IMAGE: True
122 |   NMS_KERNEL: 5
123 |   NMS_PADDING: 2
124 | TRAIN:
125 |   BEGIN_EPOCH: 0
126 |   CHECKPOINT: ''
127 |   END_EPOCH: 300
128 |   GAMMA1: 0.99
129 |   GAMMA2: 0.0
130 |   IMAGES_PER_GPU: 12
131 |   LR: 0.001
132 |   LR_FACTOR: 0.1
133 |   LR_STEP: [200, 260]
134 |   MOMENTUM: 0.9
135 |   NESTEROV: False
136 |   OPTIMIZER: adam
137 |   RESUME: False
138 |   SHUFFLE: True
139 |   WD: 0.0001
140 | WORKERS: 4
141 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_syncbn.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | CUDNN:
  8 |   BENCHMARK: True
  9 |   DETERMINISTIC: False
 10 |   ENABLED: True
 11 | DATASET:
 12 |   SIGMA: 2
 13 |   DATASET: crowd_pose_kpt
 14 |   DATASET_TEST: crowd_pose
 15 |   DATA_FORMAT: jpg
 16 |   FLIP: 0.5
 17 |   INPUT_SIZE: 512
 18 |   OUTPUT_SIZE: [128, 256]
 19 |   MAX_NUM_PEOPLE: 30
 20 |   MAX_ROTATION: 30
 21 |   MAX_SCALE: 1.5
 22 |   SCALE_TYPE: 'short'
 23 |   MAX_TRANSLATE: 40
 24 |   MIN_SCALE: 0.75
 25 |   NUM_JOINTS: 14
 26 |   ROOT: 'data/crowd_pose'
 27 |   TEST: test
 28 |   TRAIN: trainval
 29 | DEBUG:
 30 |   DEBUG: True
 31 |   SAVE_BATCH_IMAGES_GT: False
 32 |   SAVE_BATCH_IMAGES_PRED: False
 33 |   SAVE_HEATMAPS_GT: True
 34 |   SAVE_HEATMAPS_PRED: True
 35 |   SAVE_TAGMAPS_PRED: True
 36 | LOSS:
 37 |   NUM_STAGES: 2
 38 |   AE_LOSS_TYPE: exp
 39 |   WITH_AE_LOSS: [True, False]
 40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 42 |   WITH_HEATMAPS_LOSS: [True, True]
 43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 44 | MODEL:
 45 |   EXTRA:
 46 |     FINAL_CONV_KERNEL: 1
 47 |     PRETRAINED_LAYERS: ['*']
 48 |     STEM_INPLANES: 64
 49 |     STAGE2:
 50 |       NUM_MODULES: 1
 51 |       NUM_BRANCHES: 2
 52 |       BLOCK: BASIC
 53 |       NUM_BLOCKS:
 54 |       - 4
 55 |       - 4
 56 |       NUM_CHANNELS:
 57 |       - 32
 58 |       - 64
 59 |       FUSE_METHOD: SUM
 60 |     STAGE3:
 61 |       NUM_MODULES: 4
 62 |       NUM_BRANCHES: 3
 63 |       BLOCK: BASIC
 64 |       NUM_BLOCKS:
 65 |       - 4
 66 |       - 4
 67 |       - 4
 68 |       NUM_CHANNELS:
 69 |       - 32
 70 |       - 64
 71 |       - 128
 72 |       FUSE_METHOD: SUM
 73 |     STAGE4:
 74 |       NUM_MODULES: 3
 75 |       NUM_BRANCHES: 4
 76 |       BLOCK: BASIC
 77 |       NUM_BLOCKS:
 78 |       - 4
 79 |       - 4
 80 |       - 4
 81 |       - 4
 82 |       NUM_CHANNELS:
 83 |       - 32
 84 |       - 64
 85 |       - 128
 86 |       - 256
 87 |       FUSE_METHOD: SUM
 88 |     DECONV:
 89 |       NUM_DECONVS: 1
 90 |       NUM_CHANNELS:
 91 |       - 32
 92 |       KERNEL_SIZE:
 93 |       - 4
 94 |       NUM_BASIC_BLOCKS: 4
 95 |       CAT_OUTPUT:
 96 |       - True
 97 |   INIT_WEIGHTS: True
 98 |   NAME: pose_higher_hrnet
 99 |   NUM_JOINTS: 14
100 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
101 |   TAG_PER_JOINT: True
102 |   SYNC_BN: True
103 | TEST:
104 |   FLIP_TEST: True
105 |   IMAGES_PER_GPU: 1
106 |   MODEL_FILE: '' 
107 |   SCALE_FACTOR: [1]
108 |   DETECTION_THRESHOLD: 0.1
109 |   WITH_HEATMAPS: (True, True)
110 |   WITH_AE: (True, False)
111 |   PROJECT2IMAGE: True
112 |   NMS_KERNEL: 5
113 |   NMS_PADDING: 2
114 | TRAIN:
115 |   BEGIN_EPOCH: 0
116 |   CHECKPOINT: ''
117 |   END_EPOCH: 300
118 |   GAMMA1: 0.99
119 |   GAMMA2: 0.0
120 |   IMAGES_PER_GPU: 12
121 |   LR: 0.001
122 |   LR_FACTOR: 0.1
123 |   LR_STEP: [200, 260]
124 |   MOMENTUM: 0.9
125 |   NESTEROV: False
126 |   OPTIMIZER: adam
127 |   RESUME: False
128 |   SHUFFLE: True
129 |   WD: 0.0001
130 | WORKERS: 4
131 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_640_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | FP16:
  8 |   ENABLED: True
  9 |   DYNAMIC_LOSS_SCALE: True
 10 | CUDNN:
 11 |   BENCHMARK: True
 12 |   DETERMINISTIC: False
 13 |   ENABLED: True
 14 | DATASET:
 15 |   SIGMA: 2
 16 |   DATASET: crowd_pose_kpt
 17 |   DATASET_TEST: crowd_pose
 18 |   DATA_FORMAT: jpg
 19 |   FLIP: 0.5
 20 |   INPUT_SIZE: 640
 21 |   OUTPUT_SIZE: [160, 320]
 22 |   MAX_NUM_PEOPLE: 30
 23 |   MAX_ROTATION: 30
 24 |   MAX_SCALE: 1.5
 25 |   SCALE_TYPE: 'short'
 26 |   MAX_TRANSLATE: 40
 27 |   MIN_SCALE: 0.75
 28 |   NUM_JOINTS: 14
 29 |   ROOT: 'data/crowd_pose'
 30 |   TEST: test
 31 |   TRAIN: trainval
 32 | DEBUG:
 33 |   DEBUG: True
 34 |   SAVE_BATCH_IMAGES_GT: False
 35 |   SAVE_BATCH_IMAGES_PRED: False
 36 |   SAVE_HEATMAPS_GT: True
 37 |   SAVE_HEATMAPS_PRED: True
 38 |   SAVE_TAGMAPS_PRED: True
 39 | LOSS:
 40 |   NUM_STAGES: 2
 41 |   AE_LOSS_TYPE: exp
 42 |   WITH_AE_LOSS: [True, False]
 43 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 44 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 45 |   WITH_HEATMAPS_LOSS: [True, True]
 46 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 47 | MODEL:
 48 |   EXTRA:
 49 |     FINAL_CONV_KERNEL: 1
 50 |     PRETRAINED_LAYERS: ['*']
 51 |     STEM_INPLANES: 64
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 32
 61 |       - 64
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 32
 73 |       - 64
 74 |       - 128
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 32
 87 |       - 64
 88 |       - 128
 89 |       - 256
 90 |       FUSE_METHOD: SUM
 91 |     DECONV:
 92 |       NUM_DECONVS: 1
 93 |       NUM_CHANNELS:
 94 |       - 32
 95 |       KERNEL_SIZE:
 96 |       - 4
 97 |       NUM_BASIC_BLOCKS: 4
 98 |       CAT_OUTPUT:
 99 |       - True
100 |   INIT_WEIGHTS: True
101 |   NAME: pose_higher_hrnet
102 |   NUM_JOINTS: 14
103 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth'
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: '' 
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 12
123 |   LR: 0.001
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 4
133 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/higher_hrnet/w48_640_adam_lr1e-3.yaml:
--------------------------------------------------------------------------------
  1 | AUTO_RESUME: True
  2 | DATA_DIR: ''
  3 | GPUS: (0,)
  4 | LOG_DIR: log
  5 | OUTPUT_DIR: output
  6 | PRINT_FREQ: 100
  7 | FP16:
  8 |   ENABLED: True
  9 |   DYNAMIC_LOSS_SCALE: True
 10 | CUDNN:
 11 |   BENCHMARK: True
 12 |   DETERMINISTIC: False
 13 |   ENABLED: True
 14 | DATASET:
 15 |   SIGMA: 2
 16 |   DATASET: crowd_pose_kpt
 17 |   DATASET_TEST: crowd_pose
 18 |   DATA_FORMAT: jpg
 19 |   FLIP: 0.5
 20 |   INPUT_SIZE: 640
 21 |   OUTPUT_SIZE: [160, 320]
 22 |   MAX_NUM_PEOPLE: 30
 23 |   MAX_ROTATION: 30
 24 |   MAX_SCALE: 1.5
 25 |   SCALE_TYPE: 'short'
 26 |   MAX_TRANSLATE: 40
 27 |   MIN_SCALE: 0.75
 28 |   NUM_JOINTS: 14
 29 |   ROOT: 'data/crowd_pose'
 30 |   TEST: test
 31 |   TRAIN: trainval
 32 | DEBUG:
 33 |   DEBUG: True
 34 |   SAVE_BATCH_IMAGES_GT: False
 35 |   SAVE_BATCH_IMAGES_PRED: False
 36 |   SAVE_HEATMAPS_GT: True
 37 |   SAVE_HEATMAPS_PRED: True
 38 |   SAVE_TAGMAPS_PRED: True
 39 | LOSS:
 40 |   NUM_STAGES: 2
 41 |   AE_LOSS_TYPE: exp
 42 |   WITH_AE_LOSS: [True, False]
 43 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
 44 |   PULL_LOSS_FACTOR: [0.001, 0.001]
 45 |   WITH_HEATMAPS_LOSS: [True, True]
 46 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
 47 | MODEL:
 48 |   EXTRA:
 49 |     FINAL_CONV_KERNEL: 1
 50 |     PRETRAINED_LAYERS: ['*']
 51 |     STEM_INPLANES: 64
 52 |     STAGE2:
 53 |       NUM_MODULES: 1
 54 |       NUM_BRANCHES: 2
 55 |       BLOCK: BASIC
 56 |       NUM_BLOCKS:
 57 |       - 4
 58 |       - 4
 59 |       NUM_CHANNELS:
 60 |       - 48
 61 |       - 96
 62 |       FUSE_METHOD: SUM
 63 |     STAGE3:
 64 |       NUM_MODULES: 4
 65 |       NUM_BRANCHES: 3
 66 |       BLOCK: BASIC
 67 |       NUM_BLOCKS:
 68 |       - 4
 69 |       - 4
 70 |       - 4
 71 |       NUM_CHANNELS:
 72 |       - 48
 73 |       - 96
 74 |       - 192
 75 |       FUSE_METHOD: SUM
 76 |     STAGE4:
 77 |       NUM_MODULES: 3
 78 |       NUM_BRANCHES: 4
 79 |       BLOCK: BASIC
 80 |       NUM_BLOCKS:
 81 |       - 4
 82 |       - 4
 83 |       - 4
 84 |       - 4
 85 |       NUM_CHANNELS:
 86 |       - 48
 87 |       - 96
 88 |       - 192
 89 |       - 384
 90 |       FUSE_METHOD: SUM
 91 |     DECONV:
 92 |       NUM_DECONVS: 1
 93 |       NUM_CHANNELS:
 94 |       - 48
 95 |       KERNEL_SIZE:
 96 |       - 4
 97 |       NUM_BASIC_BLOCKS: 4
 98 |       CAT_OUTPUT:
 99 |       - True
100 |   INIT_WEIGHTS: True
101 |   NAME: pose_higher_hrnet
102 |   NUM_JOINTS: 14
103 |   PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth'
104 |   TAG_PER_JOINT: True
105 | TEST:
106 |   FLIP_TEST: True
107 |   IMAGES_PER_GPU: 1
108 |   MODEL_FILE: '' 
109 |   SCALE_FACTOR: [1]
110 |   DETECTION_THRESHOLD: 0.1
111 |   WITH_HEATMAPS: (True, True)
112 |   WITH_AE: (True, False)
113 |   PROJECT2IMAGE: True
114 |   NMS_KERNEL: 5
115 |   NMS_PADDING: 2
116 | TRAIN:
117 |   BEGIN_EPOCH: 0
118 |   CHECKPOINT: ''
119 |   END_EPOCH: 300
120 |   GAMMA1: 0.99
121 |   GAMMA2: 0.0
122 |   IMAGES_PER_GPU: 10
123 |   LR: 0.001
124 |   LR_FACTOR: 0.1
125 |   LR_STEP: [200, 260]
126 |   MOMENTUM: 0.9
127 |   NESTEROV: False
128 |   OPTIMIZER: adam
129 |   RESUME: False
130 |   SHUFFLE: True
131 |   WD: 0.0001
132 | WORKERS: 4
133 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/mobilenet/mobile.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 30
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: crowd_pose_kpt
14 |   DATASET_TEST: crowd_pose
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 256
18 |   OUTPUT_SIZE: [64, 128]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 14
26 |   ROOT: '/dataset/crowdpose'
27 |   TEST: test
28 |   TRAIN: trainval
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 64
51 |       - 48
52 |       - 32
53 |     NUM_DECONV_KERNELS:
54 |       - 4
55 |       - 4
56 |       - 4
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_mobilenet
59 |   NUM_JOINTS: 14
60 |   PRETRAINED: ''
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 500
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 4e-3
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [350, 480]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 1e-4
89 | WORKERS: 4
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/mobilenet/supermobile.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 50
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: crowd_pose_kpt
14 |   DATASET_TEST: crowd_pose
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 512
18 |   OUTPUT_SIZE: [128, 256]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 14
26 |   ROOT: '/dataset/crowdpose'
27 |   TEST: test
28 |   TRAIN: trainval
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 64
51 |       - 48
52 |       - 32
53 |     NUM_DECONV_KERNELS:
54 |       - 4
55 |       - 4
56 |       - 4
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_supermobilenet
59 |   NUM_JOINTS: 14
60 |   PRETRAINED: 'pretrained_models/crowdpose-pretrain.pth.tar'
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 2400
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 4e-3
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [36000, 40000]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 1e-4
89 | WORKERS: 4
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/resnet/resnet.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 30
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: crowd_pose_kpt
14 |   DATASET_TEST: crowd_pose
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 256
18 |   OUTPUT_SIZE: [64, 128]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 14
26 |   ROOT: '/dataset/crowdpose'
27 |   TEST: test
28 |   TRAIN: trainval
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 16
51 |       - 24
52 |       - 24
53 |     NUM_DECONV_KERNELS:
54 |       - 3
55 |       - 3
56 |       - 3
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_resnet
59 |   NUM_JOINTS: 14
60 |   PRETRAINED: ''
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 500
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 2e-3
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [350, 480]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 1e-4
89 | WORKERS: 4
90 | 
91 | 
92 |  
93 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/resnet/superresnet.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 30
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: crowd_pose_kpt
14 |   DATASET_TEST: crowd_pose
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 512
18 |   OUTPUT_SIZE: [128, 256]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 14
26 |   ROOT: '/dataset/crowdpose'
27 |   TEST: search
28 |   TRAIN: trainval
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 64
51 |       - 48
52 |       - 32
53 |     NUM_DECONV_KERNELS:
54 |       - 3
55 |       - 3
56 |       - 3
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_superresnet
59 |   NUM_JOINTS: 14
60 |   PRETRAINED: ''
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 2400
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 2e-3
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [36000, 40000]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 1e-4
89 | WORKERS: 4
90 | 
91 | 
92 | 


--------------------------------------------------------------------------------
/lib/LitePose/experiments/crowd_pose/simplenet/simplenet.yaml:
--------------------------------------------------------------------------------
 1 | AUTO_RESUME: True
 2 | DATA_DIR: ''
 3 | GPUS: (0,)
 4 | LOG_DIR: log
 5 | OUTPUT_DIR: output
 6 | PRINT_FREQ: 100
 7 | CUDNN:
 8 |   BENCHMARK: True
 9 |   DETERMINISTIC: False
10 |   ENABLED: True
11 | DATASET:
12 |   SIGMA: 2
13 |   DATASET: crowd_pose_kpt
14 |   DATASET_TEST: crowd_pose
15 |   DATA_FORMAT: jpg
16 |   FLIP: 0.5
17 |   INPUT_SIZE: 512
18 |   OUTPUT_SIZE: [128, 256]
19 |   MAX_NUM_PEOPLE: 30
20 |   MAX_ROTATION: 30
21 |   MAX_SCALE: 1.5
22 |   SCALE_TYPE: 'short'
23 |   MAX_TRANSLATE: 40
24 |   MIN_SCALE: 0.75
25 |   NUM_JOINTS: 14
26 |   ROOT: '../data/crowd_pose'
27 |   TEST: test
28 |   TRAIN: trainval
29 | DEBUG:
30 |   DEBUG: True
31 |   SAVE_BATCH_IMAGES_GT: False
32 |   SAVE_BATCH_IMAGES_PRED: False
33 |   SAVE_HEATMAPS_GT: True
34 |   SAVE_HEATMAPS_PRED: True
35 |   SAVE_TAGMAPS_PRED: True
36 | LOSS:
37 |   NUM_STAGES: 2
38 |   AE_LOSS_TYPE: exp
39 |   WITH_AE_LOSS: [True, False]
40 |   PUSH_LOSS_FACTOR: [0.001, 0.001]
41 |   PULL_LOSS_FACTOR: [0.001, 0.001]
42 |   WITH_HEATMAPS_LOSS: [True, True]
43 |   HEATMAPS_LOSS_FACTOR: [1.0, 1.0]
44 | MODEL:
45 |   EXTRA:
46 |     FINAL_CONV_KERNEL: 1
47 |     PRETRAINED_LAYERS: ['*']
48 |     NUM_DECONV_LAYERS: 3
49 |     NUM_DECONV_FILTERS:
50 |       - 64
51 |       - 48
52 |       - 32
53 |     NUM_DECONV_KERNELS:
54 |       - 4
55 |       - 4
56 |       - 4
57 |   INIT_WEIGHTS: True
58 |   NAME: pose_simplenet
59 |   NUM_JOINTS: 14
60 |   PRETRAINED: ''
61 |   TAG_PER_JOINT: True
62 | TEST:
63 |   FLIP_TEST: True
64 |   IMAGES_PER_GPU: 1
65 |   MODEL_FILE: ''
66 |   SCALE_FACTOR: [1]
67 |   DETECTION_THRESHOLD: 0.1
68 |   WITH_HEATMAPS: (True, True)
69 |   WITH_AE: (True, False)
70 |   PROJECT2IMAGE: True
71 |   NMS_KERNEL: 5
72 |   NMS_PADDING: 2
73 | TRAIN:
74 |   BEGIN_EPOCH: 0
75 |   CHECKPOINT: ''
76 |   END_EPOCH: 500
77 |   GAMMA1: 0.99
78 |   GAMMA2: 0.0
79 |   IMAGES_PER_GPU: 16
80 |   LR: 1e-3
81 |   LR_FACTOR: 0.1
82 |   LR_STEP: [350, 480]
83 |   MOMENTUM: 0.9
84 |   NESTEROV: False
85 |   OPTIMIZER: adam
86 |   RESUME: False
87 |   SHUFFLE: True
88 |   WD: 1e-4
89 | WORKERS: 4
90 | 
91 | 
92 |  
93 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/arch_manager.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import copy
 4 | 
 5 | def rand(c):
 6 | 	return random.randint(0, c - 1)
 7 | 
 8 | def _make_divisible(v, divisor, min_value=None):
 9 |     if min_value is None:
10 |         min_value = divisor
11 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
12 |     # Make sure that round down does not go down by more than 10%.
13 |     if new_v < 0.9 * v:
14 |         new_v += divisor
15 |     return new_v
16 | 
17 | 
18 | class ArchManager:
19 | 	def __init__(self, cfg):
20 | 		self.cfg = cfg
21 | 		self.expansion = [6]
22 | 		self.kernel_size = [7]
23 | 		self.input_channel = 24
24 | 		self.width_mult = [1.0, 0.75, 0.5, 0.25]
25 | 		self.deconv_setting = cfg.MODEL.EXTRA.NUM_DECONV_FILTERS
26 | 		self.is_search = False
27 | 		self.search_arch = None
28 | 		self.arch_setting = [
29 |             # c, n, s
30 |             [32, 4, 2],
31 |             [64, 6, 2],
32 |             [96, 8, 2],
33 |             [160, 8, 1]
34 |         ]
35 | 
36 | 	def rand_kernel_size(self):
37 | 		l = len(self.kernel_size)
38 | 		return self.kernel_size[rand(l)]
39 | 
40 | 	def rand_expansion(self):
41 | 		l = len(self.expansion)
42 | 		return self.expansion[rand(l)]
43 | 	
44 | 	def rand_channel(self, c):
45 | 		l = len(self.width_mult)
46 | 		new_c = c * self.width_mult[rand(l)]
47 | 		return _make_divisible(new_c, 8)
48 | 
49 | 	def random_sample(self):
50 | 		if self.is_search == True:
51 | 			return self.search_arch
52 | 		cfg_arch = {}
53 | 		cfg_arch['img_size'] = 256 + 64 * rand(5)
54 | 		cfg_arch['input_channel'] = self.rand_channel(self.input_channel)
55 | 		cfg_arch['deconv_setting'] = []
56 | 		for i in range(len(self.deconv_setting)):
57 | 			cfg_arch['deconv_setting'].append(self.rand_channel(self.deconv_setting[i]))
58 | 		cfg_arch['backbone_setting'] = []
59 | 		for i in range(len(self.arch_setting)):
60 | 			stage = {}
61 | 			c, n, s = self.arch_setting[i]
62 | 			stage['num_blocks'] = n
63 | 			stage['stride'] = s
64 | 			stage['channel'] = self.rand_channel(c)
65 | 			stage['block_setting'] = []
66 | 			for j in range(stage['num_blocks']):
67 | 				stage['block_setting'].append([6, 7])
68 | 			cfg_arch['backbone_setting'].append(stage)
69 | 		return cfg_arch
70 | 
71 | 	def fixed_sample(self, reso=256, ratio=0.5):
72 | 		cfg_arch = {}
73 | 		cfg_arch['img_size'] = reso
74 | 		cfg_arch['input_channel'] = _make_divisible(self.input_channel * ratio, 8)
75 | 		cfg_arch['deconv_setting'] = []
76 | 		for i in range(len(self.deconv_setting)):
77 | 			cfg_arch['deconv_setting'].append(_make_divisible(self.deconv_setting[i] * ratio, 8))
78 | 		cfg_arch['backbone_setting'] = []
79 | 		for i in range(len(self.arch_setting)):
80 | 			stage = {}
81 | 			c, n, s = self.arch_setting[i]
82 | 			stage['num_blocks'] = n
83 | 			stage['stride'] = s
84 | 			stage['channel'] = _make_divisible(c * ratio, 8)
85 | 			stage['block_setting'] = []
86 | 			for j in range(stage['num_blocks']):
87 | 				stage['block_setting'].append([6, 7])
88 | 			cfg_arch['backbone_setting'].append(stage)
89 | 		return cfg_arch
90 | 
91 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/config/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from .default import _C as cfg
 8 | from .default import update_config
 9 | from .default import check_config
10 | from .default import update_config_dict
11 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/config/models.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | from yacs.config import CfgNode as CN
12 | 
13 | 
14 | # pose_multi_resoluton_net related params
15 | POSE_HIGHER_RESOLUTION_NET = CN()
16 | POSE_HIGHER_RESOLUTION_NET.PRETRAINED_LAYERS = ['*']
17 | POSE_HIGHER_RESOLUTION_NET.STEM_INPLANES = 64
18 | POSE_HIGHER_RESOLUTION_NET.FINAL_CONV_KERNEL = 1
19 | 
20 | POSE_HIGHER_RESOLUTION_NET.STAGE1 = CN()
21 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_MODULES = 1
22 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BRANCHES = 1
23 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BLOCKS = [4]
24 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_CHANNELS = [64]
25 | POSE_HIGHER_RESOLUTION_NET.STAGE1.BLOCK = 'BOTTLENECK'
26 | POSE_HIGHER_RESOLUTION_NET.STAGE1.FUSE_METHOD = 'SUM'
27 | 
28 | POSE_HIGHER_RESOLUTION_NET.STAGE2 = CN()
29 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_MODULES = 1
30 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2
31 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4]
32 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [24, 48]
33 | POSE_HIGHER_RESOLUTION_NET.STAGE2.BLOCK = 'BOTTLENECK'
34 | POSE_HIGHER_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM'
35 | 
36 | POSE_HIGHER_RESOLUTION_NET.STAGE3 = CN()
37 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_MODULES = 1
38 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3
39 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4]
40 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [24, 48, 92]
41 | POSE_HIGHER_RESOLUTION_NET.STAGE3.BLOCK = 'BOTTLENECK'
42 | POSE_HIGHER_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM'
43 | 
44 | POSE_HIGHER_RESOLUTION_NET.STAGE4 = CN()
45 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_MODULES = 1
46 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4
47 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4]
48 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [24, 48, 92, 192]
49 | POSE_HIGHER_RESOLUTION_NET.STAGE4.BLOCK = 'BOTTLENECK'
50 | POSE_HIGHER_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM'
51 | 
52 | POSE_HIGHER_RESOLUTION_NET.DECONV = CN()
53 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_DCONVS = 2
54 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_CHANNELS = [32, 32]
55 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_BASIC_BLOCKS = 4
56 | POSE_HIGHER_RESOLUTION_NET.DECONV.KERNEL_SIZE = [2, 2]
57 | POSE_HIGHER_RESOLUTION_NET.DECONV.CAT_OUTPUT = [True, True]
58 | 
59 | 
60 | MODEL_EXTRAS = {
61 |     'pose_multi_resolution_net_v16': POSE_HIGHER_RESOLUTION_NET,
62 | }
63 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/COCOKeypoints.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
  5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import logging
 13 | 
 14 | import numpy as np
 15 | 
 16 | import pycocotools
 17 | from .COCODataset import CocoDataset
 18 | from .target_generators import HeatmapGenerator
 19 | 
 20 | 
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | 
 24 | class CocoKeypoints(CocoDataset):
 25 |     def __init__(self,
 26 |                  cfg,
 27 |                  dataset_name,
 28 |                  remove_images_without_annotations,
 29 |                  heatmap_generator,
 30 |                  joints_generator,
 31 |                  transforms=None):
 32 |         super().__init__(cfg.DATASET.ROOT,
 33 |                          dataset_name,
 34 |                          cfg.DATASET.DATA_FORMAT)
 35 | 
 36 |         if cfg.DATASET.WITH_CENTER:
 37 |             assert cfg.DATASET.NUM_JOINTS == 18, 'Number of joint with center for COCO is 18'
 38 |         else:
 39 |             assert cfg.DATASET.NUM_JOINTS == 17, 'Number of joint for COCO is 17'
 40 | 
 41 |         self.num_scales = self._init_check(heatmap_generator, joints_generator)
 42 | 
 43 |         self.num_joints = cfg.DATASET.NUM_JOINTS
 44 |         self.with_center = cfg.DATASET.WITH_CENTER
 45 |         self.num_joints_without_center = self.num_joints - 1 \
 46 |             if self.with_center else self.num_joints
 47 |         self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA
 48 |         self.base_sigma = cfg.DATASET.BASE_SIGMA
 49 |         self.base_size = cfg.DATASET.BASE_SIZE
 50 |         self.int_sigma = cfg.DATASET.INT_SIGMA
 51 | 
 52 |         if remove_images_without_annotations:
 53 |             self.ids = [
 54 |                 img_id
 55 |                 for img_id in self.ids
 56 |                 if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
 57 |             ]
 58 | 
 59 |         self.transforms = transforms
 60 |         self.heatmap_generator = heatmap_generator
 61 |         self.joints_generator = joints_generator
 62 | 
 63 |     def __getitem__(self, idx):
 64 |         img, anno = super().__getitem__(idx)
 65 | 
 66 |         mask = self.get_mask(anno, idx)
 67 | 
 68 |         anno = [
 69 |             obj for obj in anno
 70 |             if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
 71 |         ]
 72 | 
 73 |         # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint
 74 |         joints = self.get_joints(anno)
 75 | 
 76 |         mask_list = [mask.copy() for _ in range(self.num_scales)]
 77 |         joints_list = [joints.copy() for _ in range(self.num_scales)]
 78 |         target_list = list()
 79 | 
 80 |         if self.transforms:
 81 |             img, mask_list, joints_list = self.transforms(
 82 |                 img, mask_list, joints_list
 83 |             )
 84 | 
 85 |         for scale_id in range(self.num_scales):
 86 |             target_t = self.heatmap_generator[scale_id](joints_list[scale_id])
 87 |             joints_t = self.joints_generator[scale_id](joints_list[scale_id])
 88 | 
 89 |             target_list.append(target_t.astype(np.float32))
 90 |             mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
 91 |             joints_list[scale_id] = joints_t.astype(np.int32)
 92 | 
 93 |         return img, target_list, mask_list, joints_list
 94 | 
 95 |     def get_joints(self, anno):
 96 |         num_people = len(anno)
 97 | 
 98 |         if self.scale_aware_sigma:
 99 |             joints = np.zeros((num_people, self.num_joints, 4))
100 |         else:
101 |             joints = np.zeros((num_people, self.num_joints, 3))
102 | 
103 |         for i, obj in enumerate(anno):
104 |             joints[i, :self.num_joints_without_center, :3] = \
105 |                 np.array(obj['keypoints']).reshape([-1, 3])
106 |             if self.with_center:
107 |                 joints_sum = np.sum(joints[i, :-1, :2], axis=0)
108 |                 num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0])
109 |                 if num_vis_joints > 0:
110 |                     joints[i, -1, :2] = joints_sum / num_vis_joints
111 |                     joints[i, -1, 2] = 1
112 |             if self.scale_aware_sigma:
113 |                 # get person box
114 |                 box = obj['bbox']
115 |                 size = max(box[2], box[3])
116 |                 sigma = size / self.base_size * self.base_sigma
117 |                 if self.int_sigma:
118 |                     sigma = int(np.round(sigma + 0.5))
119 |                 assert sigma > 0, sigma
120 |                 joints[i, :, 3] = sigma
121 | 
122 |         return joints
123 | 
124 |     def get_mask(self, anno, idx):
125 |         coco = self.coco
126 |         img_info = coco.loadImgs(self.ids[idx])[0]
127 | 
128 |         m = np.zeros((img_info['height'], img_info['width']))
129 | 
130 |         for obj in anno:
131 |             if obj['iscrowd']:
132 |                 rle = pycocotools.mask.frPyObjects(
133 |                     obj['segmentation'], img_info['height'], img_info['width'])
134 |                 m += pycocotools.mask.decode(rle)
135 |             elif obj['num_keypoints'] == 0:
136 |                 rles = pycocotools.mask.frPyObjects(
137 |                     obj['segmentation'], img_info['height'], img_info['width'])
138 |                 for rle in rles:
139 |                     m += pycocotools.mask.decode(rle)
140 | 
141 |         return m < 0.5
142 | 
143 |     def _init_check(self, heatmap_generator, joints_generator):
144 |         assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple'
145 |         assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple'
146 |         assert len(heatmap_generator) == len(joints_generator), \
147 |             'heatmap_generator and joints_generator should have same length,'\
148 |             'got {} vs {}.'.format(
149 |                 len(heatmap_generator), len(joints_generator)
150 |             )
151 |         return len(heatmap_generator)
152 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/CrowdPoseKeypoints.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import logging
 12 | 
 13 | import numpy as np
 14 | 
 15 | import crowdposetools
 16 | from .CrowdPoseDataset import CrowdPoseDataset
 17 | from .target_generators import HeatmapGenerator
 18 | 
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class CrowdPoseKeypoints(CrowdPoseDataset):
 24 |     def __init__(self,
 25 |                  cfg,
 26 |                  dataset_name,
 27 |                  remove_images_without_annotations,
 28 |                  heatmap_generator,
 29 |                  joints_generator,
 30 |                  transforms=None):
 31 |         super().__init__(cfg.DATASET.ROOT,
 32 |                          dataset_name,
 33 |                          cfg.DATASET.DATA_FORMAT)
 34 | 
 35 |         if cfg.DATASET.WITH_CENTER:
 36 |             assert cfg.DATASET.NUM_JOINTS == 15, 'Number of joint with center for CrowdPose is 15'
 37 |         else:
 38 |             assert cfg.DATASET.NUM_JOINTS == 14, 'Number of joint for CrowdPose is 14'
 39 | 
 40 |         self.num_scales = self._init_check(heatmap_generator, joints_generator)
 41 | 
 42 |         self.num_joints = cfg.DATASET.NUM_JOINTS
 43 |         self.with_center = cfg.DATASET.WITH_CENTER
 44 |         self.num_joints_without_center = self.num_joints - 1 \
 45 |             if self.with_center else self.num_joints
 46 |         self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA
 47 |         self.base_sigma = cfg.DATASET.BASE_SIGMA
 48 |         self.base_size = cfg.DATASET.BASE_SIZE
 49 |         self.int_sigma = cfg.DATASET.INT_SIGMA
 50 | 
 51 |         if remove_images_without_annotations:
 52 |             self.ids = [
 53 |                 img_id
 54 |                 for img_id in self.ids
 55 |                 if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0
 56 |             ]
 57 | 
 58 |         self.transforms = transforms
 59 |         self.heatmap_generator = heatmap_generator
 60 |         self.joints_generator = joints_generator
 61 | 
 62 |     def __getitem__(self, idx):
 63 |         img, anno = super().__getitem__(idx)
 64 | 
 65 |         mask = self.get_mask(anno, idx)
 66 | 
 67 |         anno = [
 68 |             obj for obj in anno
 69 |             if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0
 70 |         ]
 71 | 
 72 |         # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint
 73 |         joints = self.get_joints(anno)
 74 | 
 75 |         mask_list = [mask.copy() for _ in range(self.num_scales)]
 76 |         joints_list = [joints.copy() for _ in range(self.num_scales)]
 77 |         target_list = list()
 78 | 
 79 |         if self.transforms:
 80 |             img, mask_list, joints_list = self.transforms(
 81 |                 img, mask_list, joints_list
 82 |             )
 83 | 
 84 |         for scale_id in range(self.num_scales):
 85 |             target_t = self.heatmap_generator[scale_id](joints_list[scale_id])
 86 |             joints_t = self.joints_generator[scale_id](joints_list[scale_id])
 87 | 
 88 |             target_list.append(target_t.astype(np.float32))
 89 |             mask_list[scale_id] = mask_list[scale_id].astype(np.float32)
 90 |             joints_list[scale_id] = joints_t.astype(np.int32)
 91 | 
 92 |         return img, target_list, mask_list, joints_list
 93 | 
 94 |     def get_joints(self, anno):
 95 |         num_people = len(anno)
 96 | 
 97 |         if self.scale_aware_sigma:
 98 |             joints = np.zeros((num_people, self.num_joints, 4))
 99 |         else:
100 |             joints = np.zeros((num_people, self.num_joints, 3))
101 | 
102 |         for i, obj in enumerate(anno):
103 |             joints[i, :self.num_joints_without_center, :3] = \
104 |                 np.array(obj['keypoints']).reshape([-1, 3])
105 |             if self.with_center:
106 |                 joints_sum = np.sum(joints[i, :-1, :2], axis=0)
107 |                 num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0])
108 |                 if num_vis_joints > 0:
109 |                     joints[i, -1, :2] = joints_sum / num_vis_joints
110 |                     joints[i, -1, 2] = 1
111 |             if self.scale_aware_sigma:
112 |                 # get person box
113 |                 box = obj['bbox']
114 |                 size = max(box[2], box[3])
115 |                 sigma = size / self.base_size * self.base_sigma
116 |                 if self.int_sigma:
117 |                     sigma = int(np.round(sigma + 0.5))
118 |                 assert sigma > 0, sigma
119 |                 joints[i, :, 3] = sigma
120 | 
121 |         return joints
122 | 
123 |     def get_mask(self, anno, idx):
124 |         coco = self.coco
125 |         img_info = coco.loadImgs(self.ids[idx])[0]
126 | 
127 |         m = np.zeros((img_info['height'], img_info['width']))
128 | 
129 |         return m < 0.5
130 | 
131 |     def _init_check(self, heatmap_generator, joints_generator):
132 |         assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple'
133 |         assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple'
134 |         assert len(heatmap_generator) == len(joints_generator), \
135 |             'heatmap_generator and joints_generator should have same length,'\
136 |             'got {} vs {}.'.format(
137 |                 len(heatmap_generator), len(joints_generator)
138 |             )
139 |         return len(heatmap_generator)
140 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from .COCOKeypoints import CocoKeypoints as coco
 8 | # from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose
 9 | from .build import make_dataloader
10 | from .build import make_test_dataloader
11 | from .build import make_train_dataloader
12 | 
13 | # dataset dependent configuration for visualization
14 | coco_part_labels = [
15 |     'nose', 'eye_l', 'eye_r', 'ear_l', 'ear_r',
16 |     'sho_l', 'sho_r', 'elb_l', 'elb_r', 'wri_l', 'wri_r',
17 |     'hip_l', 'hip_r', 'kne_l', 'kne_r', 'ank_l', 'ank_r'
18 | ]
19 | coco_part_idx = {
20 |     b: a for a, b in enumerate(coco_part_labels)
21 | }
22 | coco_part_orders = [
23 |     ('nose', 'eye_l'), ('eye_l', 'eye_r'), ('eye_r', 'nose'),
24 |     ('eye_l', 'ear_l'), ('eye_r', 'ear_r'), ('ear_l', 'sho_l'),
25 |     ('ear_r', 'sho_r'), ('sho_l', 'sho_r'), ('sho_l', 'hip_l'),
26 |     ('sho_r', 'hip_r'), ('hip_l', 'hip_r'), ('sho_l', 'elb_l'),
27 |     ('elb_l', 'wri_l'), ('sho_r', 'elb_r'), ('elb_r', 'wri_r'),
28 |     ('hip_l', 'kne_l'), ('kne_l', 'ank_l'), ('hip_r', 'kne_r'),
29 |     ('kne_r', 'ank_r')
30 | ]
31 | 
32 | crowd_pose_part_labels = [
33 |     'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow',
34 |     'left_wrist', 'right_wrist', 'left_hip', 'right_hip',
35 |     'left_knee', 'right_knee', 'left_ankle', 'right_ankle',
36 |     'head', 'neck'
37 | ]
38 | crowd_pose_part_idx = {
39 |     b: a for a, b in enumerate(crowd_pose_part_labels)
40 | }
41 | crowd_pose_part_orders = [
42 |     ('head', 'neck'), ('neck', 'left_shoulder'), ('neck', 'right_shoulder'),
43 |     ('left_shoulder', 'right_shoulder'), ('left_shoulder', 'left_hip'),
44 |     ('right_shoulder', 'right_hip'), ('left_hip', 'right_hip'), ('left_shoulder', 'left_elbow'),
45 |     ('left_elbow', 'left_wrist'), ('right_shoulder', 'right_elbow'), ('right_elbow', 'right_wrist'),
46 |     ('left_hip', 'left_knee'), ('left_knee', 'left_ankle'), ('right_hip', 'right_knee'),
47 |     ('right_knee', 'right_ankle')
48 | ]
49 | 
50 | VIS_CONFIG = {
51 |     'COCO': {
52 |         'part_labels': coco_part_labels,
53 |         'part_idx': coco_part_idx,
54 |         'part_orders': coco_part_orders
55 |     },
56 |     'CROWDPOSE': {
57 |         'part_labels': crowd_pose_part_labels,
58 |         'part_idx': crowd_pose_part_idx,
59 |         'part_orders': crowd_pose_part_orders
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/build.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
  5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import torch.utils.data
 13 | 
 14 | from .COCODataset import CocoDataset as coco
 15 | from .COCOKeypoints import CocoKeypoints as coco_kpt
 16 | # # from .CrowdPoseDataset import CrowdPoseDataset as crowd_pose
 17 | # from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose_kpt
 18 | from .transforms import build_transforms
 19 | from .target_generators import HeatmapGenerator
 20 | from .target_generators import ScaleAwareHeatmapGenerator
 21 | from .target_generators import JointsGenerator
 22 | # from .random_resolution_dataloader import RandomResolutionDataLoader
 23 | 
 24 | 
 25 | def build_dataset(cfg, is_train):
 26 |     transforms = build_transforms(cfg, is_train)
 27 | 
 28 |     if cfg.DATASET.SCALE_AWARE_SIGMA:
 29 |         _HeatmapGenerator = ScaleAwareHeatmapGenerator
 30 |     else:
 31 |         _HeatmapGenerator = HeatmapGenerator
 32 | 
 33 |     heatmap_generator = [
 34 |         _HeatmapGenerator(
 35 |             output_size, cfg.DATASET.NUM_JOINTS, cfg.DATASET.SIGMA
 36 |         ) for output_size in cfg.DATASET.OUTPUT_SIZE
 37 |     ]
 38 |     joints_generator = [
 39 |         JointsGenerator(
 40 |             cfg.DATASET.MAX_NUM_PEOPLE,
 41 |             cfg.DATASET.NUM_JOINTS,
 42 |             output_size,
 43 |             cfg.MODEL.TAG_PER_JOINT
 44 |         ) for output_size in cfg.DATASET.OUTPUT_SIZE
 45 |     ]
 46 | 
 47 |     dataset_name = cfg.DATASET.TRAIN if is_train else cfg.DATASET.TEST
 48 | 
 49 |     dataset = eval(cfg.DATASET.DATASET)(
 50 |         cfg,
 51 |         dataset_name,
 52 |         is_train,
 53 |         heatmap_generator,
 54 |         joints_generator,
 55 |         transforms
 56 |     )
 57 | 
 58 |     return dataset
 59 | 
 60 | 
 61 | def make_dataloader(cfg, is_train=True, distributed=False):
 62 |     if is_train:
 63 |         images_per_gpu = cfg.TRAIN.IMAGES_PER_GPU
 64 |         shuffle = True
 65 |     else:
 66 |         images_per_gpu = cfg.TEST.IMAGES_PER_GPU
 67 |         shuffle = False
 68 |     images_per_batch = images_per_gpu * len(cfg.GPUS)
 69 | 
 70 |     dataset = build_dataset(cfg, is_train)
 71 | 
 72 |     if is_train and distributed:
 73 |         train_sampler = torch.utils.data.distributed.DistributedSampler(
 74 |             dataset
 75 |         )
 76 |         shuffle = False
 77 |     else:
 78 |         train_sampler = None
 79 | 
 80 |     data_loader = torch.utils.data.DataLoader(
 81 |     # data_loader = RandomResolutionDataLoader(
 82 |         dataset,
 83 |         batch_size=images_per_batch,
 84 |         shuffle=shuffle,
 85 |         num_workers=cfg.WORKERS,
 86 |         pin_memory=cfg.PIN_MEMORY,
 87 |         sampler=train_sampler
 88 |     )
 89 | 
 90 |     return data_loader
 91 | 
 92 | def make_train_dataloader(cfg):
 93 |     is_train = True
 94 |     images_per_gpu = cfg.TRAIN.IMAGES_PER_GPU
 95 |     images_per_batch = images_per_gpu * len(cfg.GPUS)
 96 |     dataset = build_dataset(cfg, is_train)
 97 |     data_loader = torch.utils.data.DataLoader(
 98 |         dataset,
 99 |         batch_size=images_per_batch,
100 |         shuffle=False,
101 |         num_workers=cfg.WORKERS,
102 |         pin_memory=False
103 |     )
104 |     return data_loader, dataset
105 | 
106 | 
107 | def make_test_dataloader(cfg):
108 |     transforms = None
109 |     dataset = eval(cfg.DATASET.DATASET_TEST)(
110 |         cfg.DATASET.ROOT,
111 |         cfg.DATASET.TEST,
112 |         cfg.DATASET.DATA_FORMAT,
113 |         transforms
114 |     )
115 | 
116 |     data_loader = torch.utils.data.DataLoader(
117 |         dataset,
118 |         batch_size=1,
119 |         shuffle=False,
120 |         num_workers=0,
121 |         pin_memory=False
122 |     )
123 | 
124 |     return data_loader, dataset
125 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/target_generators/__init__.py:
--------------------------------------------------------------------------------
1 | from .target_generators import HeatmapGenerator
2 | from .target_generators import ScaleAwareHeatmapGenerator
3 | from .target_generators import JointsGenerator
4 | 
5 | __all__ = ['HeatmapGenerator', 'ScaleAwareHeatmapGenerator', 'JointsGenerator']
6 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/target_generators/target_generators.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
  5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import numpy as np
 13 | 
 14 | 
 15 | class HeatmapGenerator():
 16 |     def __init__(self, output_res, num_joints, sigma=-1):
 17 |         self.output_res = output_res
 18 |         self.num_joints = num_joints
 19 |         if sigma < 0:
 20 |             sigma = self.output_res/64
 21 |         self.sigma = sigma
 22 |         size = 6*sigma + 3
 23 |         x = np.arange(0, size, 1, float)
 24 |         y = x[:, np.newaxis]
 25 |         x0, y0 = 3*sigma + 1, 3*sigma + 1
 26 |         self.g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
 27 | 
 28 |     def __call__(self, joints):
 29 |         hms = np.zeros((self.num_joints, self.output_res, self.output_res),
 30 |                        dtype=np.float32)
 31 |         sigma = self.sigma
 32 |         for p in joints:
 33 |             for idx, pt in enumerate(p):
 34 |                 if pt[2] > 0:
 35 |                     x, y = int(pt[0]), int(pt[1])
 36 |                     if x < 0 or y < 0 or \
 37 |                        x >= self.output_res or y >= self.output_res:
 38 |                         continue
 39 | 
 40 |                     ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1))
 41 |                     br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2))
 42 | 
 43 |                     c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0]
 44 |                     a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1]
 45 | 
 46 |                     cc, dd = max(0, ul[0]), min(br[0], self.output_res)
 47 |                     aa, bb = max(0, ul[1]), min(br[1], self.output_res)
 48 |                     hms[idx, aa:bb, cc:dd] = np.maximum(
 49 |                         hms[idx, aa:bb, cc:dd], self.g[a:b, c:d])
 50 |         return hms
 51 | 
 52 | 
 53 | class ScaleAwareHeatmapGenerator():
 54 |     def __init__(self, output_res, num_joints):
 55 |         self.output_res = output_res
 56 |         self.num_joints = num_joints
 57 | 
 58 |     def get_gaussian_kernel(self, sigma):
 59 |         size = 6*sigma + 3
 60 |         x = np.arange(0, size, 1, float)
 61 |         y = x[:, np.newaxis]
 62 |         x0, y0 = 3*sigma + 1, 3*sigma + 1
 63 |         g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
 64 |         return g
 65 | 
 66 |     def __call__(self, joints):
 67 |         hms = np.zeros((self.num_joints, self.output_res, self.output_res),
 68 |                        dtype=np.float32)
 69 |         for p in joints:
 70 |             sigma = p[0, 3]
 71 |             g = self.get_gaussian_kernel(sigma)
 72 |             for idx, pt in enumerate(p):
 73 |                 if pt[2] > 0:
 74 |                     x, y = int(pt[0]), int(pt[1])
 75 |                     if x < 0 or y < 0 or \
 76 |                        x >= self.output_res or y >= self.output_res:
 77 |                         continue
 78 | 
 79 |                     ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1))
 80 |                     br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2))
 81 | 
 82 |                     c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0]
 83 |                     a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1]
 84 | 
 85 |                     cc, dd = max(0, ul[0]), min(br[0], self.output_res)
 86 |                     aa, bb = max(0, ul[1]), min(br[1], self.output_res)
 87 |                     hms[idx, aa:bb, cc:dd] = np.maximum(
 88 |                         hms[idx, aa:bb, cc:dd], g[a:b, c:d])
 89 |         return hms
 90 | 
 91 | 
 92 | class JointsGenerator():
 93 |     def __init__(self, max_num_people, num_joints, output_res, tag_per_joint):
 94 |         self.max_num_people = max_num_people
 95 |         self.num_joints = num_joints
 96 |         self.output_res = output_res
 97 |         self.tag_per_joint = tag_per_joint
 98 | 
 99 |     def __call__(self, joints):
100 |         visible_nodes = np.zeros((self.max_num_people, self.num_joints, 2))
101 |         output_res = self.output_res
102 |         for i in range(len(joints)):
103 |             tot = 0
104 |             for idx, pt in enumerate(joints[i]):
105 |                 x, y = int(pt[0]), int(pt[1])
106 |                 if pt[2] > 0 and x >= 0 and y >= 0 \
107 |                    and x < self.output_res and y < self.output_res:
108 |                     if self.tag_per_joint:
109 |                         visible_nodes[i][tot] = \
110 |                             (idx * output_res**2 + y * output_res + x, 1)
111 |                     else:
112 |                         visible_nodes[i][tot] = \
113 |                             (y * output_res + x, 1)
114 |                     tot += 1
115 |         return visible_nodes
116 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .transforms import Compose
2 | from .transforms import RandomAffineTransform
3 | from .transforms import ToTensor
4 | from .transforms import Normalize
5 | from .transforms import RandomHorizontalFlip
6 | 
7 | from .build import build_transforms
8 | from .build import FLIP_CONFIG
9 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/transforms/build.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
 6 | # ------------------------------------------------------------------------------
 7 | 
 8 | from __future__ import absolute_import
 9 | from __future__ import division
10 | from __future__ import print_function
11 | 
12 | from . import transforms as T
13 | 
14 | 
15 | FLIP_CONFIG = {
16 |     'COCO': [
17 |         0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15
18 |     ],
19 |     'COCO_WITH_CENTER': [
20 |         0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 17
21 |     ],
22 |     'CROWDPOSE': [
23 |         1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13
24 |     ],
25 |     'CROWDPOSE_WITH_CENTER': [
26 |         1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13, 14
27 |     ]
28 | }
29 | 
30 | 
31 | def build_transforms(cfg, is_train=True):
32 |     assert is_train is True, 'Please only use build_transforms for training.'
33 |     assert isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)), 'DATASET.OUTPUT_SIZE should be list or tuple'
34 |     if is_train:
35 |         max_rotation = cfg.DATASET.MAX_ROTATION
36 |         min_scale = cfg.DATASET.MIN_SCALE
37 |         max_scale = cfg.DATASET.MAX_SCALE
38 |         max_translate = cfg.DATASET.MAX_TRANSLATE
39 |         input_size = cfg.DATASET.INPUT_SIZE
40 |         output_size = cfg.DATASET.OUTPUT_SIZE
41 |         flip = cfg.DATASET.FLIP
42 |         scale_type = cfg.DATASET.SCALE_TYPE
43 |     else:
44 |         scale_type = cfg.DATASET.SCALE_TYPE
45 |         max_rotation = 0
46 |         min_scale = 1
47 |         max_scale = 1
48 |         max_translate = 0
49 |         input_size = 512
50 |         output_size = [128]
51 |         flip = 0
52 | 
53 |     # coco_flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
54 |     # if cfg.DATASET.WITH_CENTER:
55 |         # coco_flip_index.append(17)
56 |     if 'coco' in cfg.DATASET.DATASET:
57 |         dataset_name = 'COCO'
58 |     elif 'crowd_pose' in cfg.DATASET.DATASET:
59 |         dataset_name = 'CROWDPOSE'
60 |     else:
61 |         raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET)
62 |     if cfg.DATASET.WITH_CENTER:
63 |         coco_flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER']
64 |     else:
65 |         coco_flip_index = FLIP_CONFIG[dataset_name]
66 | 
67 |     transforms = T.Compose(
68 |         [
69 |             T.RandomAffineTransform(
70 |                 input_size,
71 |                 output_size,
72 |                 max_rotation,
73 |                 min_scale,
74 |                 max_scale,
75 |                 scale_type,
76 |                 max_translate,
77 |                 scale_aware_sigma=cfg.DATASET.SCALE_AWARE_SIGMA
78 |             ),
79 |             T.RandomHorizontalFlip(coco_flip_index, output_size, flip),
80 |             T.ToTensor(),
81 |             T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
82 |         ]
83 |     )
84 | 
85 |     return transforms
86 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/dataset/transforms/transforms.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
  5 | # Modified by Bowen Cheng (bcheng9@illinois.edu)
  6 | # ------------------------------------------------------------------------------
  7 | 
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | from __future__ import print_function
 11 | 
 12 | import random
 13 | 
 14 | import cv2
 15 | import numpy as np
 16 | import torch
 17 | import torchvision
 18 | from torchvision.transforms import functional as F
 19 | 
 20 | 
 21 | class Compose(object):
 22 |     def __init__(self, transforms):
 23 |         self.transforms = transforms
 24 | 
 25 |     def __call__(self, image, mask, joints):
 26 |         for t in self.transforms:
 27 |             image, mask, joints = t(image, mask, joints)
 28 |         return image, mask, joints
 29 | 
 30 |     def __repr__(self):
 31 |         format_string = self.__class__.__name__ + "("
 32 |         for t in self.transforms:
 33 |             format_string += "\n"
 34 |             format_string += "    {0}".format(t)
 35 |         format_string += "\n)"
 36 |         return format_string
 37 | 
 38 | 
 39 | class ToTensor(object):
 40 |     def __call__(self, image, mask, joints):
 41 |         return F.to_tensor(image), mask, joints
 42 | 
 43 | 
 44 | class Normalize(object):
 45 |     def __init__(self, mean, std):
 46 |         self.mean = mean
 47 |         self.std = std
 48 | 
 49 |     def __call__(self, image, mask, joints):
 50 |         image = F.normalize(image, mean=self.mean, std=self.std)
 51 |         return image, mask, joints
 52 | 
 53 | 
 54 | class RandomHorizontalFlip(object):
 55 |     def __init__(self, flip_index, output_size, prob=0.5):
 56 |         self.flip_index = flip_index
 57 |         self.prob = prob
 58 |         self.output_size = output_size if isinstance(output_size, list) \
 59 |             else [output_size]
 60 | 
 61 |     def __call__(self, image, mask, joints):
 62 |         assert isinstance(mask, list)
 63 |         assert isinstance(joints, list)
 64 |         assert len(mask) == len(joints)
 65 |         assert len(mask) == len(self.output_size)
 66 | 
 67 |         if random.random() < self.prob:
 68 |             image = image[:, ::-1] - np.zeros_like(image)
 69 |             for i, _output_size in enumerate(self.output_size):
 70 |                 mask[i] = mask[i][:, ::-1] - np.zeros_like(mask[i])
 71 |                 joints[i] = joints[i][:, self.flip_index]
 72 |                 joints[i][:, :, 0] = _output_size - joints[i][:, :, 0] - 1
 73 | 
 74 |         return image, mask, joints
 75 | 
 76 | 
 77 | class RandomAffineTransform(object):
 78 |     def __init__(self,
 79 |                  input_size,
 80 |                  output_size,
 81 |                  max_rotation,
 82 |                  min_scale,
 83 |                  max_scale,
 84 |                  scale_type,
 85 |                  max_translate,
 86 |                  scale_aware_sigma=False):
 87 |         self.input_size = input_size
 88 |         self.output_size = output_size if isinstance(output_size, list) \
 89 |             else [output_size]
 90 | 
 91 |         self.max_rotation = max_rotation
 92 |         self.min_scale = min_scale
 93 |         self.max_scale = max_scale
 94 |         self.scale_type = scale_type
 95 |         self.max_translate = max_translate
 96 |         self.scale_aware_sigma = scale_aware_sigma
 97 | 
 98 |     def _get_affine_matrix(self, center, scale, res, rot=0):
 99 |         # Generate transformation matrix
100 |         h = 200 * scale
101 |         t = np.zeros((3, 3))
102 |         t[0, 0] = float(res[1]) / h
103 |         t[1, 1] = float(res[0]) / h
104 |         t[0, 2] = res[1] * (-float(center[0]) / h + .5)
105 |         t[1, 2] = res[0] * (-float(center[1]) / h + .5)
106 |         t[2, 2] = 1
107 |         if not rot == 0:
108 |             rot = -rot  # To match direction of rotation from cropping
109 |             rot_mat = np.zeros((3, 3))
110 |             rot_rad = rot * np.pi / 180
111 |             sn, cs = np.sin(rot_rad), np.cos(rot_rad)
112 |             rot_mat[0, :2] = [cs, -sn]
113 |             rot_mat[1, :2] = [sn, cs]
114 |             rot_mat[2, 2] = 1
115 |             # Need to rotate around center
116 |             t_mat = np.eye(3)
117 |             t_mat[0, 2] = -res[1]/2
118 |             t_mat[1, 2] = -res[0]/2
119 |             t_inv = t_mat.copy()
120 |             t_inv[:2, 2] *= -1
121 |             t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
122 |         return t
123 | 
124 |     def _affine_joints(self, joints, mat):
125 |         joints = np.array(joints)
126 |         shape = joints.shape
127 |         joints = joints.reshape(-1, 2)
128 |         return np.dot(np.concatenate(
129 |             (joints, joints[:, 0:1]*0+1), axis=1), mat.T).reshape(shape)
130 | 
131 |     def __call__(self, image, mask, joints):
132 |         assert isinstance(mask, list)
133 |         assert isinstance(joints, list)
134 |         assert len(mask) == len(joints)
135 |         assert len(mask) == len(self.output_size)
136 | 
137 |         height, width = image.shape[:2]
138 | 
139 |         center = np.array((width/2, height/2))
140 |         if self.scale_type == 'long':
141 |             scale = max(height, width)/200
142 |         elif self.scale_type == 'short':
143 |             scale = min(height, width)/200
144 |         else:
145 |             raise ValueError('Unkonw scale type: {}'.format(self.scale_type))
146 |         aug_scale = np.random.random() * (self.max_scale - self.min_scale) \
147 |             + self.min_scale
148 |         scale *= aug_scale
149 |         aug_rot = (np.random.random() * 2 - 1) * self.max_rotation
150 | 
151 |         if self.max_translate > 0:
152 |             dx = np.random.randint(
153 |                 -self.max_translate*scale, self.max_translate*scale)
154 |             dy = np.random.randint(
155 |                 -self.max_translate*scale, self.max_translate*scale)
156 |             center[0] += dx
157 |             center[1] += dy
158 | 
159 |         for i, _output_size in enumerate(self.output_size):
160 |             mat_output = self._get_affine_matrix(
161 |                 center, scale, (_output_size, _output_size), aug_rot
162 |             )[:2]
163 |             mask[i] = cv2.warpAffine(
164 |                 (mask[i]*255).astype(np.uint8), mat_output,
165 |                 (_output_size, _output_size)
166 |             ) / 255
167 |             mask[i] = (mask[i] > 0.5).astype(np.float32)
168 | 
169 |             joints[i][:, :, 0:2] = self._affine_joints(
170 |                 joints[i][:, :, 0:2], mat_output
171 |             )
172 |             if self.scale_aware_sigma:
173 |                 joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale
174 | 
175 |         mat_input = self._get_affine_matrix(
176 |             center, scale, (self.input_size, self.input_size), aug_rot
177 |         )[:2]
178 |         image = cv2.warpAffine(
179 |             image, mat_input, (self.input_size, self.input_size)
180 |         )
181 | 
182 |         return image, mask, joints
183 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/fp16_utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/lib/LitePose/lib/fp16_utils/__init__.py


--------------------------------------------------------------------------------
/lib/LitePose/lib/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import models.pose_higher_hrnet
12 | import models.pose_mobilenet
13 | import models.pose_simplenet
14 | import models.pose_supermobilenet
15 | import models.pose_resnet
16 | import models.pose_superresnet
17 | import models.pose_efficient_hrnet
18 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/models/layers/efficient_blocks.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | 
  7 | def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True):
  8 |     modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)]
  9 |     if bn:
 10 |         modules.append(nn.BatchNorm2d(out_channels))
 11 |     if relu:
 12 |         modules.append(nn.ReLU(inplace=True))
 13 |     return nn.Sequential(*modules)
 14 | 
 15 | def conv_bn(inp, oup, stride, use_batch_norm=True):
 16 |     #ReLU = nn.ReLU if onnx_compatible else nn.ReLU6
 17 | 
 18 |     if use_batch_norm:
 19 |         return nn.Sequential(
 20 |             nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 21 |             nn.BatchNorm2d(oup),
 22 |             nn.ReLU(inplace=True),
 23 |         )
 24 |     else:
 25 |         return nn.Sequential(
 26 |             nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
 27 |             nn.ReLU(inplace=True),
 28 |         )
 29 | 
 30 | def conv1(in_channels,out_channels,stride):
 31 |     return nn.Sequential(
 32 |         nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False),
 33 |         nn.BatchNorm2d(out_channels),
 34 |         nn.ReLU6(inplace=True),
 35 |     )
 36 | 
 37 | def conv_pw(in_channels, out_channels):
 38 |     return nn.Sequential(
 39 |         nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
 40 |         nn.BatchNorm2d(out_channels),
 41 |         nn.ReLU6(inplace=True),
 42 |     )
 43 | 
 44 | def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1):
 45 |     return nn.Sequential(
 46 |         nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False),
 47 |         nn.ELU(inplace=True),
 48 | 
 49 |         nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False),
 50 |         nn.ELU(inplace=True),
 51 |     )
 52 | 
 53 | def conv_bn_act(in_, out_, kernel_size,
 54 |                 stride=1, groups=1, bias=True,
 55 |                 eps=1e-3, momentum=0.01):
 56 |     return nn.Sequential(
 57 |         SamePadConv2d(in_, out_, kernel_size, stride, groups=groups, bias=bias),
 58 |         nn.BatchNorm2d(out_, eps, momentum),
 59 |         Swish()
 60 |     )
 61 | 
 62 | 
 63 | class SamePadConv2d(nn.Conv2d):
 64 |     """
 65 |     Conv with TF padding='same'
 66 |     https://github.com/pytorch/pytorch/issues/3867#issuecomment-349279036
 67 |     """
 68 | 
 69 |     def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True, padding_mode="zeros"):
 70 |         super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias, padding_mode)
 71 | 
 72 |     def get_pad_odd(self, in_, weight, stride, dilation):
 73 |         effective_filter_size_rows = (weight - 1) * dilation + 1
 74 |         out_rows = (in_ + stride - 1) // stride
 75 |         padding_needed = max(0, (out_rows - 1) * stride + effective_filter_size_rows - in_)
 76 |         padding_rows = max(0, (out_rows - 1) * stride + (weight - 1) * dilation + 1 - in_)
 77 |         rows_odd = (padding_rows % 2 != 0)
 78 |         return padding_rows, rows_odd
 79 | 
 80 |     def forward(self, x):
 81 |         padding_rows, rows_odd = self.get_pad_odd(x.shape[2], self.weight.shape[2], self.stride[0], self.dilation[0])
 82 |         padding_cols, cols_odd = self.get_pad_odd(x.shape[3], self.weight.shape[3], self.stride[1], self.dilation[1])
 83 | 
 84 |         if rows_odd or cols_odd:
 85 |             x = F.pad(x, [0, int(cols_odd), 0, int(rows_odd)])
 86 | 
 87 |         return F.conv2d(x, self.weight, self.bias, self.stride,
 88 |                         padding=(padding_rows // 2, padding_cols // 2),
 89 |                         dilation=self.dilation, groups=self.groups)
 90 | 
 91 | 
 92 | class Swish(nn.Module):
 93 |     def forward(self, x):
 94 |         return x * torch.sigmoid(x)
 95 | 
 96 | 
 97 | class Flatten(nn.Module):
 98 |     def forward(self, x):
 99 |         return x.view(x.shape[0], -1)
100 | 
101 | 
102 | class SEModule(nn.Module):
103 |     def __init__(self, in_, squeeze_ch):
104 |         super().__init__()
105 |         self.se = nn.Sequential(
106 |             nn.AdaptiveAvgPool2d(1),
107 |             nn.Conv2d(in_, squeeze_ch, kernel_size=1, stride=1, padding=0, bias=True),
108 |             Swish(),
109 |             nn.Conv2d(squeeze_ch, in_, kernel_size=1, stride=1, padding=0, bias=True),
110 |         )
111 | 
112 |     def forward(self, x):
113 |         return x * torch.sigmoid(self.se(x))
114 | 
115 | 
116 | class DropConnect(nn.Module):
117 |     def __init__(self, ratio):
118 |         super().__init__()
119 |         self.ratio = 1.0 - ratio
120 | 
121 |     def forward(self, x):
122 |         if not self.training:
123 |             return x
124 | 
125 |         random_tensor = self.ratio
126 |         random_tensor += torch.rand([x.shape[0], 1, 1, 1], dtype=torch.float, device=x.device)
127 |         random_tensor.requires_grad_(False)
128 |         return x / self.ratio * random_tensor.floor()


--------------------------------------------------------------------------------
/lib/LitePose/lib/models/layers/layers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | 
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.functional as F
  7 | 
  8 | def _make_divisible(v, divisor, min_value=None):
  9 |     if min_value is None:
 10 |         min_value = divisor
 11 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 12 |     # Make sure that round down does not go down by more than 10%.
 13 |     if new_v < 0.9 * v:
 14 |         new_v += divisor
 15 |     return new_v
 16 | 
 17 | 
 18 | class convbnrelu(nn.Sequential):
 19 |     def __init__(self, inp, oup, ker=3, stride=1, groups=1):
 20 |         super(convbnrelu, self).__init__(
 21 |             nn.Conv2d(inp, oup, ker, stride, ker // 2, groups=groups, bias=False),
 22 |             nn.BatchNorm2d(oup),
 23 |             nn.ReLU6(inplace=True)
 24 |         )
 25 | 
 26 | class Bottleneck(nn.Module):
 27 | 
 28 |     def __init__(self, inp, oup, s=1, k=3, r=4):
 29 |         super(Bottleneck, self).__init__()
 30 |         mid_dim = oup // r
 31 |         if inp == oup and s == 1:
 32 |             self.residual = True
 33 |         else:
 34 |             self.residual = False
 35 |         self.conv1 = nn.Conv2d(inp, mid_dim, kernel_size=1, bias=False)
 36 |         self.bn1 = nn.BatchNorm2d(mid_dim)
 37 |         self.conv2 = nn.Conv2d(mid_dim, mid_dim, kernel_size=k, stride=s, padding=k//2, bias=False)
 38 |         self.bn2 = nn.BatchNorm2d(mid_dim)
 39 |         self.conv3 = nn.Conv2d(mid_dim, oup, kernel_size=1, bias=False)
 40 |         self.bn3 = nn.BatchNorm2d(oup)
 41 |         self.relu = nn.ReLU(inplace=True)
 42 | 
 43 |     def forward(self, x):
 44 |         residual = x
 45 |         out = self.conv1(x)
 46 |         out = self.bn1(out)
 47 |         out = self.relu(out)
 48 |         out = self.conv2(out)
 49 |         out = self.bn2(out)
 50 |         out = self.relu(out)
 51 |         out = self.conv3(out)
 52 |         out = self.bn3(out)
 53 |         if self.residual == True:
 54 |             out += residual
 55 |         out = self.relu(out)
 56 |         return out
 57 | 
 58 | class UpConv(nn.Module):
 59 |     def __init__(self, inp, oup, k=3):
 60 |         super(UpConv, self).__init__()
 61 |         self.conv = nn.Conv2d(inp, oup, k, 1, k // 2, bias=False)
 62 |     def forward(self, x):
 63 |         x = F.interpolate(x, scale_factor=2)
 64 |         x = self.conv(x)
 65 |         return x
 66 | 
 67 | class FusedMBConv(nn.Module):
 68 | 
 69 |     def __init__(self, inp, oup, s=1, k=3, r=4):
 70 |         super(FusedMBConv, self).__init__()
 71 |         feature_dim = _make_divisible(round(inp * r), 8)
 72 |         self.inv = nn.Sequential(
 73 |             nn.Conv2d(inp, feature_dim, k, s, k // 2, bias=False),
 74 |             nn.BatchNorm2d(feature_dim),
 75 |             nn.ReLU6(inplace = True)
 76 |         )
 77 |         self.point_conv = nn.Sequential(
 78 |             nn.Conv2d(feature_dim, oup, 1, 1, 0, bias=False),
 79 |             nn.BatchNorm2d(oup)
 80 |         )
 81 |         self.use_residual_connection = s == 1 and inp == oup
 82 |         
 83 |     def forward(self, x):
 84 |         out = self.inv(x)
 85 |         out = self.point_conv(out)
 86 |         if self.use_residual_connection:
 87 |             out += x
 88 |         return out
 89 | 
 90 | class InvBottleneck(nn.Module):
 91 | 
 92 |     def __init__(self, inplanes, planes, stride=1, ker=3, exp=6):
 93 |         super(InvBottleneck, self).__init__()
 94 |         feature_dim = _make_divisible(round(inplanes * exp), 8)
 95 |         self.inv = nn.Sequential(
 96 |             nn.Conv2d(inplanes, feature_dim, 1, 1, 0, bias=False),
 97 |             nn.BatchNorm2d(feature_dim),
 98 |             nn.ReLU6(inplace = True)
 99 |         )
100 |         self.depth_conv = nn.Sequential(
101 |             nn.Conv2d(feature_dim, feature_dim, ker, stride, ker // 2, groups=feature_dim, bias=False),
102 |             nn.BatchNorm2d(feature_dim),
103 |             nn.ReLU6(inplace = True)
104 |         )
105 |         self.point_conv = nn.Sequential(
106 |             nn.Conv2d(feature_dim, planes, 1, 1, 0, bias=False),
107 |             nn.BatchNorm2d(planes)
108 |         )
109 |         self.stride = stride
110 |         self.use_residual_connection = stride == 1 and inplanes == planes
111 |         
112 |     def forward(self, x):
113 |         out = self.inv(x)
114 |         out = self.depth_conv(out)
115 |         out = self.point_conv(out)
116 |         if self.use_residual_connection:
117 |             out += x
118 |         return out
119 | 
120 | class SepConv2d(nn.Module):
121 |     def __init__(self, inp, oup, ker=3, stride=1):
122 |         super(SepConv2d, self).__init__()
123 |         conv = [
124 |             nn.Conv2d(inp, inp, ker, stride, ker // 2, groups=inp, bias=False),
125 |             nn.BatchNorm2d(inp),
126 |             nn.ReLU(inplace=True),
127 |             nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
128 |         ]
129 |         self.conv = nn.Sequential(*conv)
130 | 
131 |     def forward(self, x):
132 |         output = self.conv(x)
133 |         return output


--------------------------------------------------------------------------------
/lib/LitePose/lib/models/pose_resnet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import random
  6 | import time
  7 | from models.layers.layers import Bottleneck, convbnrelu, UpConv, FusedMBConv
  8 | 
  9 | def rand(c):
 10 |     return random.randint(0, c - 1)
 11 | 
 12 | def _make_divisible(v, divisor, min_value=None):
 13 |     if min_value is None:
 14 |         min_value = divisor
 15 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 16 |     # Make sure that round down does not go down by more than 10%.
 17 |     if new_v < 0.9 * v:
 18 |         new_v += divisor
 19 |     return new_v
 20 | 
 21 | class LitePose(nn.Module):
 22 |     def __init__(self, cfg, width_mult=1.0, round_nearest=8):
 23 |         super(LitePose, self).__init__()
 24 |         input_channel = 16
 25 |         backbone_setting = [
 26 |             # r, k, c, n, s
 27 |             [4, 7, 16, 4, 2],
 28 |             [4, 7, 32, 6, 2],
 29 |             [4, 5, 48, 8, 2],
 30 |             [4, 3, 80, 8, 1],
 31 |         ]
 32 |         # building first layer
 33 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
 34 |         self.first = nn.Sequential(
 35 |             convbnrelu(3, 32, ker=7, stride=2),
 36 |             convbnrelu(32, input_channel, ker=7, stride=1)
 37 |         )
 38 |         self.channel = [input_channel]
 39 |         self.stage = []
 40 |         for id_stage in range(len(backbone_setting)):
 41 |             r, k, c, n, s = backbone_setting[id_stage]
 42 |             c = _make_divisible(c * width_mult, round_nearest)
 43 |             layer = []
 44 |             for id_block in range(n):
 45 |                 stride = s if id_block == 0 else 1
 46 |                 layer.append(FusedMBConv(input_channel, c, s=stride, k=k, r=r))
 47 |                 input_channel = c
 48 |             layer = nn.Sequential(*layer)
 49 |             self.stage.append(layer)
 50 |             self.channel.append(c)
 51 |         self.stage = nn.ModuleList(self.stage)
 52 |         extra = cfg.MODEL.EXTRA
 53 |         self.filters = extra['NUM_DECONV_FILTERS']
 54 |         self.inplanes = self.channel[-1]
 55 |         self.deconv_refined, self.deconv_raw, self.deconv_bnrelu  = self._make_deconv_layers(
 56 |             extra.NUM_DECONV_LAYERS,
 57 |             self.filters,
 58 |             extra.NUM_DECONV_KERNELS,
 59 |         )
 60 |         self.final_refined, self.final_raw, self.final_channel = self._make_final_layers(cfg, self.filters)
 61 |         self.num_deconv_layers = extra.NUM_DECONV_LAYERS
 62 |         self.loss_config = cfg.LOSS
 63 | 
 64 |     def _get_deconv_cfg(self, deconv_kernel):
 65 |         if deconv_kernel == 4:
 66 |             padding = 1
 67 |             output_padding = 0
 68 |         elif deconv_kernel == 3:
 69 |             padding = 1
 70 |             output_padding = 1
 71 |         elif deconv_kernel == 2:
 72 |             padding = 0
 73 |             output_padding = 0
 74 | 
 75 |         return deconv_kernel, padding, output_padding
 76 | 
 77 |     def _make_final_layers(self, cfg, num_filters):
 78 |         dim_tag = cfg.MODEL.NUM_JOINTS if cfg.MODEL.TAG_PER_JOINT else 1
 79 |         extra = cfg.MODEL.EXTRA
 80 |         final_raw= []
 81 |         final_refined = []
 82 |         final_channel = []
 83 |         for i in range(1, extra.NUM_DECONV_LAYERS):
 84 |             # input_channels = num_filters[i] + self.channel[-i-3]
 85 |             oup_joint = cfg.MODEL.NUM_JOINTS if cfg.LOSS.WITH_HEATMAPS_LOSS[i-1] else 0
 86 |             oup_tag = dim_tag if cfg.LOSS.WITH_AE_LOSS[i-1] else 0
 87 |             final_refined.append(nn.Conv2d(num_filters[i], oup_joint + oup_tag, 3, 1, 1, bias=True))
 88 |             final_raw.append(nn.Conv2d(self.channel[-i-3], oup_joint + oup_tag, 3, 1, 1, bias=True))
 89 |             final_channel.append(oup_joint + oup_tag)
 90 | 
 91 |         return nn.ModuleList(final_refined), nn.ModuleList(final_raw), final_channel
 92 | 
 93 |     def _make_deconv_layers(self, num_layers, num_filters, num_kernels):
 94 |         deconv_refined = []
 95 |         deconv_raw = []
 96 |         deconv_bnrelu = []
 97 |         for i in range(num_layers):
 98 |             kernel, padding, output_padding = \
 99 |                 self._get_deconv_cfg(num_kernels[i])
100 |             planes = num_filters[i]
101 |             # inplanes = self.inplanes + self.channel[-i-2]
102 |             layers = []
103 |             deconv_refined.append(UpConv(self.inplanes, planes, k=kernel))
104 |             deconv_raw.append(UpConv(self.channel[-i-2], planes, k=kernel))
105 |             layers.append(nn.BatchNorm2d(planes))
106 |             layers.append(nn.ReLU(inplace=True))
107 |             self.inplanes = planes
108 |             deconv_bnrelu.append(nn.Sequential(*layers))
109 | 
110 |         return nn.ModuleList(deconv_refined), nn.ModuleList(deconv_raw), nn.ModuleList(deconv_bnrelu)
111 | 
112 |     def forward(self, x):
113 |         x = self.first(x)
114 |         x_list = [x]
115 |         for i in range(len(self.stage)):
116 |             tmp = self.stage[i](x_list[-1])
117 |             x_list.append(tmp)
118 |         final_outputs = []
119 |         input_refined = x_list[-1]
120 |         input_raw = x_list[-2]
121 |         for i in range(self.num_deconv_layers):
122 |             next_input_refined = self.deconv_refined[i](input_refined)
123 |             next_input_raw = self.deconv_raw[i](input_raw)
124 |             input_refined= self.deconv_bnrelu[i](next_input_refined + next_input_raw)
125 |             input_raw = x_list[-i-3]
126 |             if i > 0:
127 |                 final_refined = self.final_refined[i-1](input_refined)
128 |                 final_raw = self.final_raw[i-1](input_raw)
129 |                 final_outputs.append(final_refined + final_raw)
130 | 
131 |         return final_outputs
132 | 
133 | def get_pose_net(cfg, is_train=False, cfg_arch=None):
134 |     model = LitePose(cfg)
135 |     if is_train and cfg.MODEL.INIT_WEIGHTS:
136 |         print(cfg.MODEL.PRETRAINED)
137 |         if os.path.isfile(cfg.MODEL.PRETRAINED):
138 |             print("load pre-train model")
139 |             need_init_state_dict = {}
140 |             state_dict = torch.load(cfg.MODEL.PRETRAINED, map_location=torch.device('cpu'))
141 |             for key, value in state_dict.items():
142 |                 if 'deconv' in key:
143 |                     continue
144 |                 if 'final' in key:
145 |                     continue
146 |                 need_init_state_dict[key] = value
147 |             try:
148 |                 model.load_state_dict(need_init_state_dict, strict=False)
149 |             except:
150 |                 print("Error load!")
151 |     return model


--------------------------------------------------------------------------------
/lib/LitePose/lib/models/pose_simplenet.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import random
  6 | from models.layers.layers import InvBottleneck, convbnrelu, SepConv2d
  7 | 
  8 | 
  9 | def rand(c):
 10 |     return random.randint(0, c - 1)
 11 | 
 12 | 
 13 | def _make_divisible(v, divisor, min_value=None):
 14 |     if min_value is None:
 15 |         min_value = divisor
 16 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
 17 |     # Make sure that round down does not go down by more than 10%.
 18 |     if new_v < 0.9 * v:
 19 |         new_v += divisor
 20 |     return new_v
 21 | 
 22 | 
 23 | class LitePose(nn.Module):
 24 |     def __init__(self, cfg, width_mult=1.0, round_nearest=8, cfg_arch=None):
 25 |         super(LitePose, self).__init__()
 26 |         # input_channel = 24
 27 |         # inverted_residual_setting = [
 28 |         #     # t, c, n, s
 29 |         #     [6, 32, 6, 2],
 30 |         #     [6, 64, 8, 2],
 31 |         #     [6, 96, 10, 2],
 32 |         #     [6, 160, 10, 1],
 33 |         # ]
 34 |         backbone_setting = cfg_arch["backbone_setting"]
 35 |         input_channel = cfg_arch["input_channel"]
 36 |         # building first layer
 37 |         input_channel = _make_divisible(input_channel * width_mult, round_nearest)
 38 |         self.first = nn.Sequential(
 39 |             convbnrelu(3, 32, ker=3, stride=2),
 40 |             convbnrelu(32, 32, ker=3, stride=1, groups=32),
 41 |             nn.Conv2d(32, input_channel, 1, 1, 0, bias=False),
 42 |             nn.BatchNorm2d(input_channel),
 43 |         )
 44 |         self.channel = [input_channel]
 45 |         # building inverted residual blocks
 46 |         self.stage = []
 47 |         for id_stage in range(len(backbone_setting)):
 48 |             n = backbone_setting[id_stage]["num_blocks"]
 49 |             s = backbone_setting[id_stage]["stride"]
 50 |             c = backbone_setting[id_stage]["channel"]
 51 |             c = _make_divisible(c * width_mult, round_nearest)
 52 |             block_setting = backbone_setting[id_stage]["block_setting"]
 53 |             layer = []
 54 |             for id_block in range(n):
 55 |                 t, k = block_setting[id_block]
 56 |                 stride = s if id_block == 0 else 1
 57 |                 layer.append(InvBottleneck(input_channel, c, stride, ker=k, exp=t))
 58 |                 input_channel = c
 59 |             layer = nn.Sequential(*layer)
 60 |             self.stage.append(layer)
 61 |             self.channel.append(c)
 62 |         self.stage = nn.ModuleList(self.stage)
 63 |         extra = cfg.MODEL.EXTRA
 64 |         self.filters = cfg_arch["deconv_setting"]
 65 |         self.inplanes = self.channel[-1]
 66 |         self.deconv_refined, self.deconv_bnrelu = self._make_deconv_layers(
 67 |             extra.NUM_DECONV_LAYERS,
 68 |             self.filters,
 69 |             extra.NUM_DECONV_KERNELS,
 70 |         )
 71 |         self.final_refined, self.final_channel = self._make_final_layers(
 72 |             cfg, self.filters
 73 |         )
 74 |         self.num_deconv_layers = extra.NUM_DECONV_LAYERS
 75 |         self.loss_config = cfg.LOSS
 76 | 
 77 |     def _get_deconv_cfg(self, deconv_kernel):
 78 |         if deconv_kernel == 4:
 79 |             padding = 1
 80 |             output_padding = 0
 81 |         elif deconv_kernel == 3:
 82 |             padding = 1
 83 |             output_padding = 1
 84 |         elif deconv_kernel == 2:
 85 |             padding = 0
 86 |             output_padding = 0
 87 | 
 88 |         return deconv_kernel, padding, output_padding
 89 | 
 90 |     def _make_final_layers(self, cfg, num_filters):
 91 |         dim_tag = cfg.MODEL.NUM_JOINTS if cfg.MODEL.TAG_PER_JOINT else 1
 92 |         extra = cfg.MODEL.EXTRA
 93 |         final_refined = []
 94 |         final_channel = []
 95 |         for i in range(1, extra.NUM_DECONV_LAYERS):
 96 |             # input_channels = num_filters[i] + self.channel[-i-3]
 97 |             oup_joint = (
 98 |                 cfg.MODEL.NUM_JOINTS if cfg.LOSS.WITH_HEATMAPS_LOSS[i - 1] else 0
 99 |             )
100 |             oup_tag = dim_tag if cfg.LOSS.WITH_AE_LOSS[i - 1] else 0
101 |             final_refined.append(SepConv2d(num_filters[i], oup_joint + oup_tag, ker=5))
102 |             final_channel.append(oup_joint + oup_tag)
103 | 
104 |         return nn.ModuleList(final_refined), final_channel
105 | 
106 |     def _make_deconv_layers(self, num_layers, num_filters, num_kernels):
107 |         deconv_refined = []
108 |         deconv_bnrelu = []
109 |         for i in range(num_layers):
110 |             kernel, padding, output_padding = self._get_deconv_cfg(num_kernels[i])
111 |             planes = num_filters[i]
112 |             layers = []
113 |             deconv_refined.append(
114 |                 nn.ConvTranspose2d(
115 |                     in_channels=self.inplanes,
116 |                     out_channels=planes,
117 |                     kernel_size=kernel,
118 |                     stride=2,
119 |                     padding=padding,
120 |                     output_padding=output_padding,
121 |                     bias=False,
122 |                 )
123 |             )
124 |             layers.append(nn.BatchNorm2d(planes))
125 |             layers.append(nn.ReLU(inplace=True))
126 |             self.inplanes = planes
127 |             deconv_bnrelu.append(nn.Sequential(*layers))
128 | 
129 |         return nn.ModuleList(deconv_refined), nn.ModuleList(deconv_bnrelu)
130 | 
131 |     def forward(self, x):
132 |         x = self.first(x)
133 |         x_list = [x]
134 |         for i in range(len(self.stage)):
135 |             tmp = self.stage[i](x_list[-1])
136 |             x_list.append(tmp)
137 |         final_outputs = []
138 |         input_refined = x_list[-1]
139 |         for i in range(self.num_deconv_layers):
140 |             next_input_refined = self.deconv_refined[i](input_refined)
141 |             input_refined = self.deconv_bnrelu[i](next_input_refined)
142 |             if i > 0:
143 |                 final_refined = self.final_refined[i - 1](input_refined)
144 |                 final_outputs.append(final_refined)
145 | 
146 |         return final_outputs
147 | 
148 | 
149 | def get_pose_net(cfg, is_train=False, cfg_arch=None):
150 |     model = LitePose(cfg, cfg_arch=cfg_arch)
151 |     if is_train and cfg.MODEL.INIT_WEIGHTS:
152 |         print(cfg.MODEL.PRETRAINED)
153 |         if os.path.isfile(cfg.MODEL.PRETRAINED):
154 |             print("load pre-train model")
155 |             need_init_state_dict = {}
156 |             state_dict = torch.load(cfg.MODEL.PRETRAINED)
157 |             for key, value in state_dict.items():
158 |                 if "deconv" in key:
159 |                     continue
160 |                 if "final" in key:
161 |                     continue
162 |                 need_init_state_dict[key] = value
163 |             model.load_state_dict(need_init_state_dict, strict=False)
164 |     return model
165 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/utils/utils.py:
--------------------------------------------------------------------------------
  1 | # ------------------------------------------------------------------------------
  2 | # Copyright (c) Microsoft
  3 | # Licensed under the MIT License.
  4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
  5 | # ------------------------------------------------------------------------------
  6 | 
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | from __future__ import print_function
 10 | 
 11 | import os
 12 | import logging
 13 | import time
 14 | from collections import namedtuple
 15 | from pathlib import Path
 16 | from ptflops import get_model_complexity_info
 17 | 
 18 | import torch
 19 | import torch.optim as optim
 20 | import torch.nn as nn
 21 | 
 22 | 
 23 | 
 24 | def setup_logger(final_output_dir, rank, phase):
 25 |     time_str = time.strftime('%Y-%m-%d-%H-%M')
 26 |     log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank)
 27 |     final_log_file = os.path.join(final_output_dir, log_file)
 28 |     head = '%(asctime)-15s %(message)s'
 29 |     # logging.basicConfig(format=head)
 30 |     logging.basicConfig(filename=str(final_log_file),
 31 |                         format=head)
 32 |     logger = logging.getLogger()
 33 |     logger.setLevel(logging.INFO)
 34 |     console = logging.StreamHandler()
 35 |     logging.getLogger('').addHandler(console)
 36 | 
 37 |     return logger, time_str
 38 | 
 39 | 
 40 | def create_logger(cfg, cfg_name, phase='train'):
 41 |     root_output_dir = Path(cfg.OUTPUT_DIR)
 42 |     # set up logger
 43 |     if not root_output_dir.exists() and cfg.RANK == 0:
 44 |         print('=> creating {}'.format(root_output_dir))
 45 |         root_output_dir.mkdir()
 46 |     else:
 47 |         while not root_output_dir.exists():
 48 |             print('=> wait for {} created'.format(root_output_dir))
 49 |             time.sleep(30)
 50 | 
 51 |     dataset = cfg.DATASET.DATASET
 52 |     dataset = dataset.replace(':', '_')
 53 |     model = cfg.MODEL.NAME
 54 |     cfg_name = os.path.basename(cfg_name).split('.')[0]
 55 | 
 56 |     final_output_dir = root_output_dir / dataset / model / cfg_name
 57 | 
 58 |     if cfg.RANK == 0:
 59 |         print('=> creating {}'.format(final_output_dir))
 60 |         final_output_dir.mkdir(parents=True, exist_ok=True)
 61 |     else:
 62 |         while not final_output_dir.exists():
 63 |             print('=> wait for {} created'.format(final_output_dir))
 64 |             time.sleep(5)
 65 | 
 66 |     logger, time_str = setup_logger(final_output_dir, cfg.RANK, phase)
 67 | 
 68 |     tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \
 69 |         (cfg_name + '_' + time_str)
 70 | 
 71 |     print('=> creating {}'.format(tensorboard_log_dir))
 72 |     tensorboard_log_dir.mkdir(parents=True, exist_ok=True)
 73 | 
 74 |     return logger, str(final_output_dir), str(tensorboard_log_dir)
 75 | 
 76 | 
 77 | def get_optimizer(cfg, model):
 78 |     optimizer = None
 79 |     if cfg.TRAIN.OPTIMIZER == 'sgd':
 80 |         optimizer = optim.SGD(
 81 |             model.parameters(),
 82 |             lr=cfg.TRAIN.LR,
 83 |             momentum=cfg.TRAIN.MOMENTUM,
 84 |             weight_decay=cfg.TRAIN.WD,
 85 |             nesterov=cfg.TRAIN.NESTEROV
 86 |         )
 87 |     elif cfg.TRAIN.OPTIMIZER == 'adam':
 88 |         optimizer = optim.Adam(
 89 |             model.parameters(),
 90 |             lr=cfg.TRAIN.LR
 91 |         )
 92 | 
 93 |     return optimizer
 94 | 
 95 | 
 96 | def save_checkpoint(states, is_best, output_dir,
 97 |                     filename='checkpoint.pth.tar'):
 98 |     torch.save(states, os.path.join(output_dir, filename))
 99 | 
100 |     if is_best and 'state_dict' in states:
101 |         torch.save(
102 |             states['best_state_dict'],
103 |             os.path.join(output_dir, 'model_best.pth.tar')
104 |         )
105 | 
106 | 
107 | def get_model_summary(input_size, model, *input_tensors, item_length=26, verbose=False):
108 |     """
109 |     :param model:
110 |     :param input_tensors:
111 |     :param item_length:
112 |     :return:
113 |     """
114 | 
115 |     summary = []
116 | 
117 |     ModuleDetails = namedtuple(
118 |         "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"])
119 |     hooks = []
120 |     layer_instances = {}
121 | 
122 |     model.eval()
123 | 
124 |     space_len = item_length
125 | 
126 |     details = ''
127 |     if verbose:
128 |         details = "Model Summary" + \
129 |             os.linesep + \
130 |             "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format(
131 |                 ' ' * (space_len - len("Name")),
132 |                 ' ' * (space_len - len("Input Size")),
133 |                 ' ' * (space_len - len("Output Size")),
134 |                 ' ' * (space_len - len("Parameters")),
135 |                 ' ' * (space_len - len("Multiply Adds (Flops)"))) \
136 |                 + os.linesep + '-' * space_len * 5 + os.linesep
137 | 
138 |     params_sum = 0
139 |     flops_sum = 0
140 |     for layer in summary:
141 |         params_sum += layer.num_parameters
142 |         if layer.multiply_adds != "Not Available":
143 |             flops_sum += layer.multiply_adds
144 |         if verbose:
145 |             details += "{}{}{}{}{}{}{}{}{}{}".format(
146 |                 layer.name,
147 |                 ' ' * (space_len - len(layer.name)),
148 |                 layer.input_size,
149 |                 ' ' * (space_len - len(str(layer.input_size))),
150 |                 layer.output_size,
151 |                 ' ' * (space_len - len(str(layer.output_size))),
152 |                 layer.num_parameters,
153 |                 ' ' * (space_len - len(str(layer.num_parameters))),
154 |                 layer.multiply_adds,
155 |                 ' ' * (space_len - len(str(layer.multiply_adds)))) \
156 |                 + os.linesep + '-' * space_len * 5 + os.linesep
157 | 
158 |     macs, params = get_model_complexity_info(model, (3,input_size,input_size), print_per_layer_stat=False, as_strings=True, verbose=True)
159 | 
160 |     details += os.linesep + "Total Parameters: {}".format(params) + os.linesep + '-' * space_len * 5 + os.linesep
161 |     details += "Total Multiply Adds: {}".format(macs) + os.linesep + '-' * space_len * 5 + os.linesep
162 |     details += "Number of Layers" + os.linesep
163 |     for layer in layer_instances:
164 |         details += "{} : {} layers   ".format(layer, layer_instances[layer])
165 | 
166 |     return details
167 | 
168 | 
169 | class AverageMeter(object):
170 |     """Computes and stores the average and current value"""
171 |     def __init__(self):
172 |         self.reset()
173 | 
174 |     def reset(self):
175 |         self.val = 0
176 |         self.avg = 0
177 |         self.sum = 0
178 |         self.count = 0
179 | 
180 |     def update(self, val, n=1):
181 |         self.val = val
182 |         self.sum += val * n
183 |         self.count += n
184 |         self.avg = self.sum / self.count if self.count != 0 else 0
185 | 


--------------------------------------------------------------------------------
/lib/LitePose/lib/utils/zipreader.py:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------------------------------------------
 2 | # Copyright (c) Microsoft
 3 | # Licensed under the MIT License.
 4 | # Written by Bin Xiao (leoxiaobin@gmail.com)
 5 | # ------------------------------------------------------------------------------
 6 | 
 7 | from __future__ import absolute_import
 8 | from __future__ import division
 9 | from __future__ import print_function
10 | 
11 | import os
12 | import zipfile
13 | import xml.etree.ElementTree as ET
14 | 
15 | import cv2
16 | import numpy as np
17 | 
18 | _im_zfile = []
19 | _xml_path_zip = []
20 | _xml_zfile = []
21 | 
22 | 
23 | def imread(filename, flags=cv2.IMREAD_COLOR):
24 |     global _im_zfile
25 |     path = filename
26 |     pos_at = path.index('@')
27 |     if pos_at == -1:
28 |         print("character '@' is not found from the given path '%s'"%(path))
29 |         assert 0
30 |     path_zip = path[0: pos_at]
31 |     path_img = path[pos_at + 1:]
32 |     if not os.path.isfile(path_zip):
33 |         print("zip file '%s' is not found"%(path_zip))
34 |         assert 0
35 |     for i in range(len(_im_zfile)):
36 |         if _im_zfile[i]['path'] == path_zip:
37 |             data = _im_zfile[i]['zipfile'].read(path_img)
38 |             return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
39 | 
40 |     _im_zfile.append({
41 |         'path': path_zip,
42 |         'zipfile': zipfile.ZipFile(path_zip, 'r')
43 |     })
44 |     data = _im_zfile[-1]['zipfile'].read(path_img)
45 | 
46 |     return cv2.imdecode(np.frombuffer(data, np.uint8), flags)
47 | 
48 | 
49 | def xmlread(filename):
50 |     global _xml_path_zip
51 |     global _xml_zfile
52 |     path = filename
53 |     pos_at = path.index('@')
54 |     if pos_at == -1:
55 |         print("character '@' is not found from the given path '%s'"%(path))
56 |         assert 0
57 |     path_zip = path[0: pos_at]
58 |     path_xml = path[pos_at + 2:]
59 |     if not os.path.isfile(path_zip):
60 |         print("zip file '%s' is not found"%(path_zip))
61 |         assert 0
62 |     for i in xrange(len(_xml_path_zip)):
63 |         if _xml_path_zip[i] == path_zip:
64 |             data = _xml_zfile[i].open(path_xml)
65 |             return ET.fromstring(data.read())
66 |     _xml_path_zip.append(path_zip)
67 |     print("read new xml file '%s'"%(path_zip))
68 |     _xml_zfile.append(zipfile.ZipFile(path_zip, 'r'))
69 |     data = _xml_zfile[-1].open(path_xml)
70 |     return ET.fromstring(data.read())
71 | 


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/prune-L.json:
--------------------------------------------------------------------------------
1 | {"img_size": 512, "input_channel": 24, "deconv_setting": [64, 48, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 64, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 96, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 160, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/prune-M.json:
--------------------------------------------------------------------------------
1 | {"img_size": 512, "input_channel": 24, "deconv_setting": [48, 40, 24], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 24, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 72, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 120, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/prune-S.json:
--------------------------------------------------------------------------------
1 | {"img_size": 512, "input_channel": 16, "deconv_setting": [32, 24, 16], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 16, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 80, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/search-L.json:
--------------------------------------------------------------------------------
1 | {"img_size": 512, "input_channel": 24, "deconv_setting": [64, 40, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 24, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 64, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 96, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 160, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/search-M.json:
--------------------------------------------------------------------------------
1 | {"img_size": 448, "input_channel": 16, "deconv_setting": [64, 40, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 24, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 72, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 120, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/search-S.json:
--------------------------------------------------------------------------------
1 | {"img_size": 448, "input_channel": 16, "deconv_setting": [32, 24, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 16, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 120, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/lib/LitePose/mobile_configs/search-XS.json:
--------------------------------------------------------------------------------
1 | {"img_size": 256, "input_channel": 16, "deconv_setting": [16, 24, 24], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 16, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 80, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]}


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | hydra-core
 2 | pytorch_lightning
 3 | wandb
 4 | imageio
 5 | yacs
 6 | pycocotools
 7 | json_tricks
 8 | ptflops
 9 | kornia
10 | # git+https://github.com/pvigier/perlin-numpy


--------------------------------------------------------------------------------
/run_demo.py:
--------------------------------------------------------------------------------
  1 | import subprocess
  2 | import os
  3 | 
  4 | import hydra
  5 | from omegaconf import DictConfig
  6 | 
  7 | 
  8 | def get_losses(loss_type):
  9 |     if loss_type == "pixel":
 10 |         return ["true", "false", "false"]
 11 |     if loss_type == "pose":
 12 |         return ["false", "true", "false"]
 13 |     if loss_type == "flow":
 14 |         return ["false", "false", "true"]
 15 |     if loss_type == "pose_flow":
 16 |         return ["false", "true", "true"]
 17 | 
 18 | 
 19 | def run_demo(
 20 |     data_dir: str,
 21 |     model: str,
 22 |     num_steps: int,
 23 |     seed: int,
 24 |     xpname: str,
 25 |     datamodule: str,
 26 |     alpha_losses: float,
 27 |     alpha_two_strokes: float,
 28 |     init_idx: int,
 29 |     init_focal_search: bool,
 30 |     two_strokes: float,
 31 |     flow_loss_type: str,
 32 |     guidance_type: str,
 33 |     learning_rate: float,
 34 |     num_epochs: int,
 35 |     num_mixed_grad: int,
 36 |     target_dir_name: str,
 37 |     loss_type: str,
 38 | ):
 39 |     """
 40 |     Run dolly zoom demo with the given parameters.
 41 | 
 42 |     :param data_dir: path to the data directory.
 43 |     :param model: model name.
 44 |     :param num_steps: number of steps sampled per ray.
 45 |     :param seed: random seed.
 46 |     :param xpname: experiment name.
 47 |     :param datamodule: datamodule name.
 48 |     :param alpha_losses: weight of the losses.
 49 |     :param alpha_two_strokes: weight of the two strokes (spatial and temporal).
 50 |     :param init_idx: index of the initial camera.
 51 |     :param init_focal_search: whether to search for the initial focal length.
 52 |     :param two_strokes:  whether to uncouple spatial and temporal optimization.
 53 |     :param flow_loss_type: type of the flow metric (end-to-end, angular).
 54 |     :param guidance_type: type of the guidance (guidance map, inerf, random).
 55 |     :param learning_rate: learning rate.
 56 |     :param num_epochs: number of epochs.
 57 |     :param num_mixed_grad: number of guidance points.
 58 |     :param target_dir_name: name of the target directory.
 59 |     :param loss_type: type of the loss (pixel, pose, flow, pose_flow).
 60 |     """
 61 |     losses = get_losses(loss_type)
 62 |     command = (
 63 |         f"python {os.path.dirname(os.path.abspath(__file__))}/jaws/run.py \
 64 |             --config-name batch_jaws \
 65 |             run_type=jaws \
 66 |             data_dir={data_dir} \
 67 |             dynamic=true \
 68 |             group_name=demo \
 69 |             model={model} \
 70 |             num_steps={num_steps} \
 71 |             seed={seed} \
 72 |             xp_name={xpname} \
 73 |             datamodule={datamodule} \
 74 |             datamodule.alpha_losses={alpha_losses} \
 75 |             datamodule.alpha_two_strokes={alpha_two_strokes} \
 76 |             datamodule.blur_pred=false \
 77 |             datamodule.init_cam_idx={init_idx} \
 78 |             datamodule.only_init_focal_search={init_focal_search} \
 79 |             datamodule.two_strokes={two_strokes} \
 80 |             jaws.diff_focal=true \
 81 |             jaws.diff_temporal=false \
 82 |             jaws.flow_loss={losses[2]} \
 83 |             jaws.flow_loss_type={flow_loss_type} \
 84 |             jaws.grad_norm=true \
 85 |             jaws.guidance_type={guidance_type} \
 86 |             jaws.learning_rate={learning_rate} \
 87 |             jaws.num_epochs={num_epochs} \
 88 |             jaws.num_sample_grad={num_mixed_grad} \
 89 |             jaws.pixel_loss={losses[0]} \
 90 |             jaws.pose_loss={losses[1]} \
 91 |             jaws.target_dir={target_dir_name}",
 92 |     )
 93 |     subprocess.call(command, shell=True)
 94 | 
 95 | 
 96 | @hydra.main(
 97 |     config_path="./jaws/configs",
 98 |     config_name="demo_jaws.yaml",
 99 |     version_base="1.2",
100 | )
101 | def main(config: DictConfig):
102 |     run_demo(
103 |         data_dir=config.data_dir,
104 |         xpname=config.xp_name,
105 |         model=config.model,
106 |         target_dir_name=config.target_dir_video,
107 |         datamodule=config.datamodule,
108 |         num_epochs=config.num_epochs,
109 |         num_mixed_grad=config.num_mixed_grad,
110 |         num_steps=config.num_steps,
111 |         learning_rate=config.lr,
112 |         seed=config.seed,
113 |         loss_type=config.loss_type,
114 |         init_idx=config.init_cam_idx_same,
115 |         init_focal_search=config.init_focal_search,
116 |         two_strokes=config.two_strokes,
117 |         flow_loss_type=config.flow_loss_type,
118 |         alpha_losses=config.alpha_losses,
119 |         alpha_two_strokes=config.alpha_two_strokes,
120 |         guidance_type=config.guidance_type,
121 |     )
122 | 
123 | 
124 | if __name__ == "__main__":
125 |     main()
126 | 


--------------------------------------------------------------------------------
/utils/data_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import trimesh
  4 | 
  5 | 
  6 | def nerf_matrix_to_ngp(pose, scale=0.33):
  7 |     # for the fox dataset, 0.33 scales camera radius to ~ 2
  8 |     new_pose = np.array(
  9 |         [
 10 |             [pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale],
 11 |             [pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale],
 12 |             [pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale],
 13 |             [0, 0, 0, 1],
 14 |         ],
 15 |         dtype=np.float32,
 16 |     )
 17 |     return new_pose
 18 | 
 19 | 
 20 | def visualize_poses(poses, size=0.1):
 21 |     # poses: [B, 4, 4]
 22 | 
 23 |     axes = trimesh.creation.axis(axis_length=4)
 24 |     sphere = trimesh.creation.icosphere(radius=1)
 25 |     objects = [axes, sphere]
 26 | 
 27 |     for pose in poses:
 28 |         # a camera is visualized with 8 line segments.
 29 |         pos = pose[:3, 3]
 30 |         a = pos + size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2]
 31 |         b = pos - size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2]
 32 |         c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]
 33 |         d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]
 34 | 
 35 |         segs = np.array(
 36 |             [
 37 |                 [pos, a],
 38 |                 [pos, b],
 39 |                 [pos, c],
 40 |                 [pos, d],
 41 |                 [a, b],
 42 |                 [b, c],
 43 |                 [c, d],
 44 |                 [d, a],
 45 |             ]
 46 |         )
 47 |         segs = trimesh.load_path(segs)
 48 |         objects.append(segs)
 49 | 
 50 |     trimesh.Scene(objects).show()
 51 | 
 52 | 
 53 | def rand_poses(
 54 |     size,
 55 |     radius=1,
 56 |     theta_range=[np.pi / 3, 2 * np.pi / 3],
 57 |     phi_range=[0, 2 * np.pi],
 58 | ):
 59 |     """generate random poses from an orbit camera
 60 |     Args:
 61 |         size: batch size of generated poses.
 62 |         radius: camera radius
 63 |         theta_range: [min, max], should be in [0, \pi]
 64 |         phi_range: [min, max], should be in [0, 2\pi]
 65 |     Return:
 66 |         poses: [size, 4, 4]
 67 |     """
 68 | 
 69 |     def normalize(vectors):
 70 |         return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10)
 71 | 
 72 |     thetas = (
 73 |         torch.rand(size) * (theta_range[1] - theta_range[0]) + theta_range[0]
 74 |     )
 75 |     phis = torch.rand(size) * (phi_range[1] - phi_range[0]) + phi_range[0]
 76 | 
 77 |     centers = torch.stack(
 78 |         [
 79 |             radius * torch.sin(thetas) * torch.sin(phis),
 80 |             radius * torch.cos(thetas),
 81 |             radius * torch.sin(thetas) * torch.cos(phis),
 82 |         ],
 83 |         dim=-1,
 84 |     )  # [B, 3]
 85 | 
 86 |     # lookat
 87 |     forward_vector = -normalize(centers)
 88 |     up_vector = (
 89 |         torch.FloatTensor([0, -1, 0]).unsqueeze(0).repeat(size, 1)
 90 |     )  # confused at the coordinate system...
 91 |     right_vector = normalize(torch.cross(forward_vector, up_vector, dim=-1))
 92 |     up_vector = normalize(torch.cross(right_vector, forward_vector, dim=-1))
 93 | 
 94 |     poses = torch.eye(4, dtype=torch.float).unsqueeze(0).repeat(size, 1, 1)
 95 |     poses[:, :3, :3] = torch.stack(
 96 |         (right_vector, up_vector, forward_vector), dim=-1
 97 |     )
 98 |     poses[:, :3, 3] = centers
 99 | 
100 |     return poses
101 | 


--------------------------------------------------------------------------------
/utils/file_utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import pickle
 4 | import subprocess
 5 | from typing import Any
 6 | 
 7 | import pandas as pd
 8 | import torch
 9 | 
10 | 
11 | def create_dir(dir_name: str):
12 |     """Create a directory if it does not exist yet."""
13 |     if not osp.exists(dir_name):
14 |         os.makedirs(dir_name)
15 | 
16 | 
17 | def move_files(source_path: str, destpath: str):
18 |     """Move files from `source_path` to `dest_path`."""
19 |     subprocess.call(["mv", source_path, destpath])
20 | 
21 | 
22 | def load_pickle(pickle_path: str) -> Any:
23 |     """Load a pickle file."""
24 |     with open(pickle_path, "rb") as f:
25 |         data = pickle.load(f)
26 |     return data
27 | 
28 | 
29 | def save_pickle(data: Any, pickle_path: str):
30 |     """Save data in a pickle file."""
31 |     with open(pickle_path, "wb") as f:
32 |         pickle.dump(data, f, protocol=4)
33 | 
34 | 
35 | def load_txt(txt_path: str):
36 |     """Load a txt file."""
37 |     with open(txt_path, "r") as f:
38 |         data = f.read()
39 |     return data
40 | 
41 | 
42 | def save_txt(data: str, txt_path: str):
43 |     """Save data in a txt file."""
44 |     with open(txt_path, "w") as f:
45 |         f.write(data)
46 | 
47 | 
48 | def load_pth(pth_path: str) -> Any:
49 |     """Load a pth (PyTorch) file."""
50 |     data = torch.load(pth_path)
51 |     return data
52 | 
53 | 
54 | def save_pth(data: Any, pth_path: str):
55 |     """Save data in a pth (PyTorch) file."""
56 |     torch.save(data, pth_path)
57 | 
58 | 
59 | def load_csv(csv_path: str, header: Any = None) -> pd.DataFrame:
60 |     """Load a csv file."""
61 |     try:
62 |         data = pd.read_csv(csv_path, header=header)
63 |     except pd.errors.EmptyDataError:
64 |         data = pd.DataFrame()
65 |     return data
66 | 
67 | 
68 | def save_csv(data: Any, csv_path: str):
69 |     """Save data in a csv file."""
70 |     pd.DataFrame(data).to_csv(csv_path, header=False, index=False)
71 | 


--------------------------------------------------------------------------------
/utils/misc_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Sequence, Dict
  2 | 
  3 | import numpy as np
  4 | from omegaconf import DictConfig, OmegaConf
  5 | from pytorch_lightning.utilities import rank_zero_only
  6 | import rich.tree
  7 | import rich.syntax
  8 | import cv2
  9 | import os.path as osp
 10 | import imageio
 11 | import trimesh
 12 | import torch
 13 | 
 14 | from utils.file_utils import save_pickle
 15 | 
 16 | 
 17 | @rank_zero_only
 18 | def print_config(
 19 |     config: DictConfig,
 20 |     fields: Sequence[str] = (
 21 |         "compnode",
 22 |         "model",
 23 |         "datamodule",
 24 |         "jaws",
 25 |         "xp_name",
 26 |         "seed",
 27 |     ),
 28 |     resolve: bool = True,
 29 | ) -> None:
 30 |     """
 31 |     Adapted from: https://github.com/ashleve/lightning-hydra-template.
 32 |     Prints content of DictConfig using Rich library and its tree structure.
 33 | 
 34 |     :param config: configuration composed by Hydra.
 35 |     :param fields: determines which main fields from config will be printed and
 36 |         in what order.
 37 |     :param resolve: whether to resolve reference fields of DictConfig.
 38 |     """
 39 |     style = "dim"
 40 |     tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
 41 | 
 42 |     for field in fields:
 43 |         branch = tree.add(field, style=style, guide_style=style)
 44 | 
 45 |         config_section = config.get(field)
 46 |         branch_content = str(config_section)
 47 |         if isinstance(config_section, DictConfig):
 48 |             branch_content = OmegaConf.to_yaml(config_section, resolve=resolve)
 49 | 
 50 |         branch.add(rich.syntax.Syntax(branch_content, "yaml"))
 51 | 
 52 |     rich.print(tree)
 53 | 
 54 |     with open("config_tree.log", "w") as fp:
 55 |         rich.print(tree, file=fp)
 56 | 
 57 | 
 58 | def divide(a: np.array, b: np.array) -> np.array:
 59 |     """Perform array element-wise division, 0 when dividing by 0."""
 60 |     res = np.divide(
 61 |         a,
 62 |         b,
 63 |         out=np.zeros_like(a, dtype=np.float64),
 64 |         where=(b != 0),
 65 |     )
 66 |     return res
 67 | 
 68 | 
 69 | def save_video(
 70 |     chunk, filename, over_write=False, is_resize=False, size=(224, 224)
 71 | ):
 72 |     # for idx, chunk in enumerate(lchunks):
 73 |     if osp.exists(filename) and not over_write:
 74 |         return
 75 |     out = cv2.VideoWriter(
 76 |         filename + ".mp4", cv2.VideoWriter_fourcc(*"mp4v"), 5, (224, 224)
 77 |     )
 78 |     for frm in chunk:
 79 |         # print("np.shape frm: ", np.shape(frm))
 80 |         if np.shape(frm) != (size[0], size[1], 3):
 81 |             frm = cv2.resize(frm, size)
 82 |         out.write(frm)
 83 |     out.release()
 84 | 
 85 | 
 86 | def save_nerf_img(_frm, filename):
 87 |     # to8b
 88 |     frm = (_frm.detach().cpu().numpy() * 255).astype(np.uint8)
 89 |     imageio.imwrite(filename, frm)
 90 | 
 91 | 
 92 | def save_gif(imgs, filename, fps=5, is_resize=False, size=(224, 224)):
 93 |     final_imgs = []
 94 |     for _frm in imgs:
 95 |         # to8b
 96 |         frm = (_frm.detach().cpu().numpy() * 255).astype(np.uint8)
 97 |         if np.shape(frm) != (size[0], size[1], 3) and is_resize:
 98 |             final_imgs.append(cv2.resize(frm, size))
 99 |         else:
100 |             final_imgs.append(frm)
101 |     imageio.mimwrite(filename, final_imgs, fps=fps)
102 | 
103 | 
104 | def save_traj(poses: torch.Tensor, filename: str, saving_format="pkl"):
105 |     if saving_format == "pkl":
106 |         dict_save = {}
107 |         for idx, pose in enumerate(poses):
108 |             dict_save[idx] = pose.cpu().numpy()
109 |         save_pickle(dict_save, filename + "." + saving_format)
110 | 
111 | 
112 | def draw_axis(img, R, t, K):
113 |     # unit is mm
114 |     rotV, _ = cv2.Rodrigues(R)
115 |     points = np.float32(
116 |         [[100, 0, 0], [0, 100, 0], [0, 0, 100], [0, 0, 0]]
117 |     ).reshape(-1, 3)
118 |     axisPoints, _ = cv2.projectPoints(points, rotV, t, K, (0, 0, 0, 0))
119 |     img = cv2.line(
120 |         img,
121 |         tuple(axisPoints[3].ravel()),
122 |         tuple(axisPoints[0].ravel()),
123 |         (255, 0, 0),
124 |         3,
125 |     )
126 |     img = cv2.line(
127 |         img,
128 |         tuple(axisPoints[3].ravel()),
129 |         tuple(axisPoints[1].ravel()),
130 |         (0, 255, 0),
131 |         3,
132 |     )
133 |     img = cv2.line(
134 |         img,
135 |         tuple(axisPoints[3].ravel()),
136 |         tuple(axisPoints[2].ravel()),
137 |         (0, 0, 255),
138 |         3,
139 |     )
140 |     return img
141 | 
142 | 
143 | def render_cam_pose(pose, intrinsics, render_pose, img):
144 |     inv_render_pose = np.linalg.inv(render_pose)
145 |     pose_2_render = pose @ inv_render_pose
146 |     t = pose_2_render[:3, 3]
147 |     R = pose_2_render[:3, :3]
148 | 
149 |     K = np.zeros((3, 3))
150 |     K[0, 0] = intrinsics[0]  # fx
151 |     K[1, 1] = intrinsics[1]  # fy
152 |     K[0, 2] = intrinsics[2]  # cx
153 |     K[1, 2] = intrinsics[3]  # cy
154 |     K[2, 2] = 1
155 | 
156 |     draw_axis(img, R, t, K)
157 | 
158 |     # uvz = K@pose_2_render
159 |     # x = uvz[0]/uvz[2]
160 |     # y = uvz[1]/uvz[2]
161 | 
162 | 
163 | def visualize_poses(poses, file_name, size=0.1):
164 |     # poses: [B, 4, 4]
165 | 
166 |     axes = trimesh.creation.axis(axis_length=4)
167 |     # sphere = trimesh.creation.icosphere(radius=1)
168 |     objects = [axes]
169 | 
170 |     for pose in poses:
171 |         # a camera is visualized with 8 line segments.
172 |         pos = pose[:3, 3]
173 |         a = pos + size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2]
174 |         b = pos - size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2]
175 |         c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]
176 |         d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2]
177 | 
178 |         segs = np.array(
179 |             [
180 |                 [pos, a],
181 |                 [pos, b],
182 |                 [pos, c],
183 |                 [pos, d],
184 |                 [a, b],
185 |                 [b, c],
186 |                 [c, d],
187 |                 [d, a],
188 |             ]
189 |         )
190 |         segs = trimesh.load_path(segs)
191 |         objects.append(segs)
192 |         scene = trimesh.Scene(objects)
193 |         png = scene.save_image(resolution=[800, 800], visible=True)
194 |         with open(file_name, "wb") as f:
195 |             f.write(png)
196 |             f.close()
197 | 
198 | 
199 | def cfg2dict(cfg: DictConfig) -> Dict:
200 |     """
201 |     Recursively convert OmegaConf to vanilla dict
202 |     :param cfg:
203 |     :return:
204 |     """
205 |     cfg_dict = {}
206 |     for k, v in cfg.items():
207 |         if type(v) == DictConfig:
208 |             cfg_dict[k] = cfg2dict(v)
209 |         else:
210 |             cfg_dict[k] = v
211 |     return cfg_dict
212 | 


--------------------------------------------------------------------------------