├── README.md ├── data └── jaws_dolly_zoom_mask │ ├── img_0001.png │ ├── img_0002.png │ ├── img_0003.png │ ├── img_0004.png │ ├── img_0005.png │ ├── img_0006.png │ ├── img_0007.png │ ├── img_0008.png │ ├── img_0009.png │ ├── img_0010.png │ ├── img_0011.png │ ├── img_0012.png │ └── saved_data.pk ├── jaws ├── configs │ ├── batch_jaws.yaml │ ├── compnode │ │ ├── cpu_debug.yaml │ │ └── light_1n_1g_28b.yaml │ ├── datamodule │ │ └── jaws_dollyzoom.yaml │ ├── demo_jaws.yaml │ ├── jaws │ │ └── batch.yaml │ ├── model │ │ ├── dnerf.yaml │ │ ├── dnerf_rendering.yaml │ │ └── nerf.yaml │ ├── render_dnerf.yaml │ ├── render_jaws.yaml │ ├── train_dnerf.yaml │ └── train_nerf.yaml ├── run.py └── src │ ├── datamodules │ ├── datasets │ │ ├── dnerf_dataset.py │ │ └── nerf_dataset.py │ └── nerf_datamodule.py │ ├── infer.py │ ├── jaws.py │ ├── models │ ├── base_model.py │ ├── callbacks │ │ ├── early_stopping.py │ │ └── grad_norm.py │ ├── dnerf_model.py │ ├── jaws_model.py │ ├── metrics │ │ ├── angular_loss.py │ │ ├── pose_loss.py │ │ └── vgg_loss.py │ ├── modules │ │ ├── camera_transform.py │ │ ├── dnerf │ │ │ ├── network.py │ │ │ └── renderer.py │ │ ├── feature │ │ │ ├── flow_estimator.py │ │ │ └── raft.py │ │ ├── nerf │ │ │ ├── network.py │ │ │ ├── network_ff.py │ │ │ └── renderer.py │ │ └── nerf_factory.py │ └── nerf_model.py │ ├── render.py │ └── train.py ├── lib └── LitePose │ ├── _init_paths.py │ ├── experiments │ ├── coco │ │ ├── ddrnet │ │ │ └── ddrnet23s.yaml │ │ ├── higher_hrnet │ │ │ ├── w32_512_adam_lr1e-3.yaml │ │ │ ├── w32_640_adam_lr1e-3.yaml │ │ │ └── w48_640_adam_lr1e-3.yaml │ │ └── mobilenet │ │ │ ├── mobile.yaml │ │ │ └── supermobile.yaml │ └── crowd_pose │ │ ├── ddrnet │ │ └── ddrnet23s.yaml │ │ ├── efficient_hrnet │ │ ├── H-1.yaml │ │ ├── H-2.yaml │ │ ├── H-3.yaml │ │ └── H-4.yaml │ │ ├── higher_hrnet │ │ ├── w16_512_adam_lr1e-3.yaml │ │ ├── w32_512_adam_lr1e-3.yaml │ │ ├── w32_512_adam_lr1e-3_coco.yaml │ │ ├── w32_512_adam_lr1e-3_syncbn.yaml │ │ ├── w32_640_adam_lr1e-3.yaml │ │ └── w48_640_adam_lr1e-3.yaml │ │ ├── mobilenet │ │ ├── mobile.yaml │ │ └── supermobile.yaml │ │ ├── resnet │ │ ├── resnet.yaml │ │ └── superresnet.yaml │ │ └── simplenet │ │ └── simplenet.yaml │ ├── lib │ ├── arch_manager.py │ ├── config │ │ ├── __init__.py │ │ ├── default.py │ │ └── models.py │ ├── core │ │ ├── group.py │ │ ├── inference.py │ │ ├── loss.py │ │ └── trainer.py │ ├── dataset │ │ ├── COCODataset.py │ │ ├── COCOKeypoints.py │ │ ├── CrowdPoseDataset.py │ │ ├── CrowdPoseKeypoints.py │ │ ├── __init__.py │ │ ├── build.py │ │ ├── target_generators │ │ │ ├── __init__.py │ │ │ └── target_generators.py │ │ └── transforms │ │ │ ├── __init__.py │ │ │ ├── build.py │ │ │ └── transforms.py │ ├── fp16_utils │ │ ├── __init__.py │ │ ├── fp16_optimizer.py │ │ ├── fp16util.py │ │ └── loss_scaler.py │ ├── models │ │ ├── __init__.py │ │ ├── layers │ │ │ ├── efficient_blocks.py │ │ │ ├── layers.py │ │ │ └── super_layers.py │ │ ├── pose_efficient_hrnet.py │ │ ├── pose_higher_hrnet.py │ │ ├── pose_mobilenet.py │ │ ├── pose_resnet.py │ │ ├── pose_simplenet.py │ │ ├── pose_supermobilenet.py │ │ └── pose_superresnet.py │ └── utils │ │ ├── transforms.py │ │ ├── utils.py │ │ ├── vis.py │ │ └── zipreader.py │ └── mobile_configs │ ├── prune-L.json │ ├── prune-M.json │ ├── prune-S.json │ ├── search-L.json │ ├── search-M.json │ ├── search-S.json │ └── search-XS.json ├── requirements.txt ├── run_demo.py └── utils ├── camera_utils.py ├── coord_utils.py ├── data_utils.py ├── file_utils.py ├── flow_utils.py ├── image_utils.py ├── loss_utils.py ├── misc_utils.py └── nerf_utils.py /README.md: -------------------------------------------------------------------------------- 1 | # JAWS: Just a Wild Shot for Cinematic Transfer in Neural Radiance Fields 2 | 3 | By Xi Wang*, Robin Courant*, Jinglei Shi, Eric Marchand and Marc Christie 4 | 5 | CVPR 2023 6 | 7 | ### [Project Page](https://www.lix.polytechnique.fr/vista/projects/2023_cvpr_wang/) | [arXiv](https://arxiv.org/pdf/2303.15427.pdf) | [Paper + Supp](https://inria.hal.science/hal-04046701v1/file/main.pdf) 8 | 9 | ## Installation 10 | 11 | 1. Create working environment: 12 | ``` 13 | conda create --name jaws -y python=3.10 14 | conda activate jaws 15 | ``` 16 | 17 | 2. Install dependencies (adapt it according to your CUDA version): 18 | ``` 19 | conda install pytorch==1.12.0 torchvision==0.13.0 torchaudio==0.12.0 cudatoolkit=11.3 -c pytorch 20 | pip install -r requirements.txt 21 | ``` 22 | 23 | 3. Use the correct torch-ngp version: 24 | ``` 25 | mkdir ./lib 26 | git clone git@github.com:ashawkey/torch-ngp.git 27 | mv torch-ngp torch_ngp 28 | cd torch_ngp 29 | git checkout 3c14ad5d1a8a36f8d36604d1bbd91515fb4416fa 30 | ln -s lib/torch_ngp dir_to/torch_ngp 31 | ``` 32 | 33 | 4. Download `LitePose` [checkpoints](https://drive.google.com/drive/folders/1Jlh-bmS85RDWuspZUG-ncWYA7F8iXsa_?usp=drive_link) and puth them in `lib/LitePose/ckpt` 34 | 35 | 5. Download example dataset [flame_steak_frms_time](https://drive.google.com/file/d/15fO8J3G7k9X9cDb6LEorU60CdVnwMh1D/view?usp=drive_link) and put it in `./data` 36 | 37 | # Usage 38 | 39 | Train NeRF: 40 | ``` 41 | python jaws/run.py --config-name train_nerf data_dir=/path/to/dataset xp_name=xp_name datamodule=jaws_dollyzoom.yaml 42 | ``` 43 | 44 | Launch JAWS 45 | ``` 46 | python jaws/run.py --config-name batch_jaws data_dir=path/to/data/dir/flame_steak_frms_time/ xp_name=xp_name jaws.target_dir=data/jaws_dolly_zoom_mask datamodule=jaws_dollyzoom.yaml 47 | ``` 48 | 49 | Render Images 50 | ``` 51 | python jaws/run.py --config-name render_jaws data_dir=path/to/data/dir/flame_steak_frms_time/ xp_name=xp_name jaws.target_dir=data/jaws_dolly_zoom_mask datamodule=jaws_dollyzoom.yaml render_target_dir=path/to/results/dir/final_res_n 52 | ``` 53 | 54 | # Citation: 55 | 56 | ``` 57 | @InProceedings{Wang_2023_CVPR, 58 | author = {Wang, Xi and Courant, Robin and Shi, Jinglei and Marchand, Eric and Christie, Marc}, 59 | title = {JAWS: Just a Wild Shot for Cinematic Transfer in Neural Radiance Fields}, 60 | booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 61 | year = {2023}, 62 | } 63 | ``` 64 | -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0001.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0001.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0002.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0002.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0003.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0003.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0004.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0004.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0005.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0005.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0006.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0006.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0007.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0007.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0008.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0008.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0009.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0009.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0010.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0010.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0011.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0011.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/img_0012.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/img_0012.png -------------------------------------------------------------------------------- /data/jaws_dolly_zoom_mask/saved_data.pk: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/data/jaws_dolly_zoom_mask/saved_data.pk -------------------------------------------------------------------------------- /jaws/configs/batch_jaws.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - compnode: light_1n_1g_28b.yaml 4 | - model: nerf.yaml 5 | - datamodule: lego.yaml 6 | - jaws: batch.yaml 7 | 8 | ######################################################### 9 | # Parameters 10 | 11 | # num of sanity checking step 12 | num_sanity_val_steps: 0 13 | # num of epochs 14 | num_epochs: 501 15 | # num of training epoch after validation 16 | check_val_every_n_epoch: 50 17 | # num rays sampled per image for each training step 18 | num_rays: 4096 19 | # num steps sampled per ray 20 | num_steps: 128 21 | # num steps up-sampled per ray 22 | upsample_steps: 0 23 | # batch size of rays at inference to avoid OOM 24 | max_ray_batch: 4096 25 | # num of checkpoints to keep 26 | num_checkpoints: 2 27 | # Type of run to launch (current: train TODO: debug/eval/infer/...) 28 | run_type: jaws 29 | dynamic: false 30 | error_map: false 31 | saturation_loss: false 32 | floater_ratio: 0 33 | 34 | aabb: 1.0 35 | ######################################################### 36 | # Wandb 37 | # Name of the project is accessed by loggers 38 | project_name: jaws 39 | # Name of the run is accessed by loggers 40 | xp_name: ${xp_name} 41 | # Name of the group by loggers 42 | group_name: 3imgs_translation_z 43 | # Name of the job type 44 | job_type: null 45 | # Wether to synced the logs or not (WandB) 46 | log_offline: false 47 | # Metric to monitor to save models 48 | checkpoint_metric: train/loss 49 | 50 | ######################################################### 51 | # Hydra 52 | root: ${hydra:runtime.cwd} 53 | # Path to folder with data 54 | data_dir: ${data_dir} 55 | # Path to folder to save results 56 | result_dir: ${root}/results/${xp_name} 57 | 58 | 59 | # Pretty print config at the start of the run using Rich library 60 | print_config: True 61 | 62 | hydra: 63 | run: 64 | dir: ${hydra:runtime.cwd}/logs 65 | output_subdir: null 66 | sweep: 67 | dir: ${hydra:runtime.cwd}/logs 68 | subdir: ${hydra:runtime.cwd}/logs 69 | 70 | ######################################################### 71 | # System 72 | # Disable (or not) python warnings 73 | ignore_warnings: True 74 | device: cuda 75 | # Seed for random number generators 76 | seed: 1 -------------------------------------------------------------------------------- /jaws/configs/compnode/cpu_debug.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 4 2 | num_workers: 0 3 | num_gpus: 0 4 | num_nodes: 1 5 | accelerator: ddp -------------------------------------------------------------------------------- /jaws/configs/compnode/light_1n_1g_28b.yaml: -------------------------------------------------------------------------------- 1 | batch_size: 28 2 | num_workers: 15 3 | num_gpus: 1 4 | num_nodes: 1 5 | accelerator: cuda -------------------------------------------------------------------------------- /jaws/configs/datamodule/jaws_dollyzoom.yaml: -------------------------------------------------------------------------------- 1 | # dataset mode supports (colmap blender) 2 | mode: blender 3 | independent_calibration: false 4 | # preload all data into GPU accelerate training but use more GPU memory 5 | preload: false 6 | # assume the scene is bounded in box[-bound bound]^3 if > 1 "will invoke adaptive ray marching." the fox dataset 7 | bound: 2 8 | # scale camera location into box[-bound bound]^3 9 | scale: 0.3 10 | # dt_gamma (>=0) for adaptive ray marching. set to 0 to disable" >0 to accelerate rendering (but usually with worse quality) 11 | dt_gamma: 0.05 12 | # minimum near distance for camera"25 13 | min_near: 0.0 14 | # threshold for density grid to be occupied 15 | density_thresh: 0.01 16 | # <0 uses no rand pose, =0 only uses rand pose, >0 sample one rand pose every $ known poses 17 | rand_pose: -1 18 | 19 | # use fully-fused MLP 20 | ff: False 21 | background_radius: -1 22 | background_perlin_noise: False 23 | 24 | # during INERF 25 | 26 | focal_resize_factor: 1.0 # if -1, dynamic search 27 | init_pose_dist_factor: 1.0 28 | 29 | init_search: saved # ground_truth, index 30 | init_cam_idx: 12 31 | 32 | blur_pred: false 33 | 34 | aabb: [-2, -2, -1, 2, 2, 0.8] 35 | # aabb: [-2, -2, -2, 2, 2, 1.3] 36 | 37 | anim_start_time: 0.0 # if negative, used saved 38 | auto_anim_time: false 39 | only_init_focal_search: false # all auto 40 | two_strokes: true 41 | 42 | alpha_losses: 0.3 43 | # first temporal second spatial 44 | alpha_two_strokes: 0.1 -------------------------------------------------------------------------------- /jaws/configs/demo_jaws.yaml: -------------------------------------------------------------------------------- 1 | # Config file used to override default config values for demo_jaws.py 2 | 3 | data_dir: ${hydra:runtime.cwd}/data/flame_steak 4 | xp_name: flame_steak 5 | model: nerf 6 | target_dir_video: ${hydra:runtime.cwd}/data/jaws_dolly_zoom 7 | datamodule: jaws_dollyzoom 8 | 9 | num_epochs: 101 10 | num_mixed_grad: 8000 11 | num_steps: 256 12 | lr: 0.015 13 | loss_type: pose_flow 14 | flow_loss_type: EE 15 | alpha_losses: 0.97 16 | alpha_two_strokes: 0.1 17 | guidance_type: guidance 18 | 19 | init_cam_idx_same: 14 20 | init_focal_search: False 21 | two_strokes: True 22 | 23 | diff_temporal: False 24 | diff_focal: True 25 | seed: 1 26 | 27 | ######################################################################################### 28 | defaults: 29 | - _self_ 30 | - override hydra/hydra_logging: disabled 31 | - override hydra/job_logging: disabled 32 | 33 | hydra: 34 | run: 35 | dir: ${hydra:runtime.cwd} 36 | output_subdir: null 37 | sweep: 38 | dir: ${hydra:runtime.cwd}/logs 39 | subdir: ${hydra:runtime.cwd}/logs -------------------------------------------------------------------------------- /jaws/configs/jaws/batch.yaml: -------------------------------------------------------------------------------- 1 | num_epochs: 201 2 | num_sample_rays: 2048 3 | num_sample_grad: 8000 4 | image_size: [224, 224] 5 | log_interval: 25 6 | blur_kernel: [3, 3] 7 | blur_sigma: 1 8 | learning_rate: 0.005 9 | target_dir: null 10 | pixel_loss: false 11 | pixel_loss_type: vgg # vgg, mse 12 | flow_loss: true 13 | flow_loss_type: EE # EE, AN 14 | pose_loss: true 15 | pose_loss_type: heatmap # euclidean 16 | grad_norm: true 17 | tag: null 18 | regularize_loss_range: 1.0 19 | regularize_loss_weight: 5 20 | guidance_map: true 21 | guidance_type: guidance # guidance, inerf_original, random 22 | # 23 | early_stop_num: 401 24 | early_stop_delta: 0 25 | 26 | # 27 | diff_temporal: false 28 | diff_focal: true 29 | allow_backward_t: false -------------------------------------------------------------------------------- /jaws/configs/model/dnerf.yaml: -------------------------------------------------------------------------------- 1 | # initial learning rate 2 | lr: 1e-2 3 | lr_net: 1e-3 4 | ckpt: "latest" 5 | # use amp mixed precision training 6 | fp16: True 7 | 8 | warp_encoding: frequency 9 | time_encoding: frequency 10 | sigma_encoding: tiledgrid 11 | direction_encoding: sphere_harmonics 12 | background_encoding: hashgrid 13 | n_warp_layers: 5 14 | n_sigma_layers: 2 15 | n_color_layers: 3 16 | n_background_layers: 2 17 | warp_hidden_dim: 128 18 | sigma_hidden_dim: 64 19 | color_hidden_dim: 64 20 | background_hidden_dim: 64 21 | geo_feat_dim: 15 22 | 23 | raft_checkpoint: ${root}/checkpoints/raft-things.pth 24 | encoder_checkpoint: ${root}/checkpoints/autoencoder-mse-unit-224-noskip-490-last.ckpt 25 | model_size: small 26 | encoder_num_levels: 16 -------------------------------------------------------------------------------- /jaws/configs/model/dnerf_rendering.yaml: -------------------------------------------------------------------------------- 1 | # initial learning rate 2 | lr: 1e-2 3 | lr_net: 1e-3 4 | ckpt: "latest" 5 | # use amp mixed precision training 6 | fp16: True 7 | 8 | warp_encoding: frequency 9 | time_encoding: frequency 10 | sigma_encoding: tiledgrid 11 | direction_encoding: sphere_harmonics 12 | background_encoding: hashgrid 13 | n_warp_layers: 7 14 | n_sigma_layers: 5 15 | n_color_layers: 5 16 | n_background_layers: 2 17 | warp_hidden_dim: 128 18 | sigma_hidden_dim: 128 19 | color_hidden_dim: 128 20 | background_hidden_dim: 128 21 | geo_feat_dim: 15 22 | 23 | raft_checkpoint: ${root}/checkpoints/raft-things.pth 24 | encoder_checkpoint: ${root}/checkpoints/autoencoder-mse-unit-224-noskip-490-last.ckpt 25 | model_size: small 26 | encoder_num_levels: 32 -------------------------------------------------------------------------------- /jaws/configs/model/nerf.yaml: -------------------------------------------------------------------------------- 1 | # initial learning rate 2 | lr: 1e-2 3 | ckpt: "latest" 4 | # use amp mixed precision training 5 | fp16: True 6 | 7 | sigma_encoding: hashgrid 8 | direction_encoding: sphere_harmonics 9 | background_encoding: hashgrid 10 | n_sigma_layers: 2 11 | n_color_layers: 3 12 | n_background_layers: 2 13 | sigma_hidden_dim: 64 14 | color_hidden_dim: 64 15 | background_hidden_dim: 64 16 | geo_feat_dim: 15 17 | 18 | raft_checkpoint: ${root}/checkpoints/raft-things.pth 19 | encoder_checkpoint: ${root}/checkpoints/autoencoder-mse-unit-224-noskip-490-last.ckpt 20 | model_size: small 21 | encoder_num_levels: 16 -------------------------------------------------------------------------------- /jaws/configs/render_dnerf.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - compnode: light_1n_1g_28b.yaml 4 | - model: nerf.yaml 5 | - datamodule: lego.yaml 6 | # - datamodule: lego_dataset_small.yaml 7 | # - datamodule: 1per_dataset.yaml 8 | # - datamodule: firekeeper.yaml 9 | 10 | # num of sanity checking step 11 | num_sanity_val_steps: 0 12 | # num of epochs 13 | num_epochs: 1 14 | # num of training epoch after validation 15 | check_val_every_n_epoch: 50 16 | # num rays sampled per image for each training step 17 | num_rays: 8192 18 | # num steps sampled per ray 19 | num_steps: 800 20 | # num steps up-sampled per ray 21 | upsample_steps: 0 22 | # batch size of rays at inference to avoid OOM 23 | max_ray_batch: 4096 24 | # num of checkpoints to keep 25 | num_checkpoints: 2 26 | # if activate error map 27 | error_map: false 28 | # if using RGB+S for training loss 29 | saturation_loss: false 30 | floater_ratio: 0 31 | 32 | 33 | ################################################################### 34 | # Type of run to launch (current: train TODO: debug/eval/infer/...) 35 | run_type: val 36 | dynamic: True 37 | 38 | # Name of the project is accessed by loggers 39 | project_name: jaws 40 | # Name of the run is accessed by loggers 41 | xp_name: ${xp_name} 42 | # Wether to synced the logs or not (WandB) 43 | log_offline: false 44 | # Metric to monitor to save models 45 | checkpoint_metric: train/loss 46 | 47 | root: ${hydra:runtime.cwd} 48 | # Path to folder with data 49 | data_dir: ${data_dir} 50 | # Path to folder to save results 51 | result_dir: ${root}/results/${xp_name} 52 | 53 | # Seed for random number generators 54 | seed: 1 55 | # Pretty print config at the start of the run using Rich library 56 | print_config: True 57 | # Disable (or not) python warnings 58 | ignore_warnings: True 59 | 60 | device: cuda 61 | hydra: 62 | run: 63 | dir: ${hydra:runtime.cwd}/logs 64 | output_subdir: null 65 | sweep: 66 | dir: ${hydra:runtime.cwd}/logs 67 | subdir: ${hydra:runtime.cwd}/logs 68 | 69 | 70 | srv_target_dir: /home/xi/Work/jaws/misc/matrix 71 | img_size: [224,224] 72 | render_target_dir: none 73 | render_frame_num: 1 -------------------------------------------------------------------------------- /jaws/configs/render_jaws.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - compnode: light_1n_1g_28b.yaml 4 | - model: nerf.yaml 5 | - datamodule: lego.yaml 6 | - jaws: batch.yaml 7 | 8 | ######################################################### 9 | # Parameters 10 | 11 | # num of sanity checking step 12 | num_sanity_val_steps: 0 13 | # num of epochs 14 | num_epochs: 501 15 | # num of training epoch after validation 16 | check_val_every_n_epoch: 50 17 | # num rays sampled per image for each training step 18 | num_rays: 4096 19 | # num steps sampled per ray 20 | num_steps: 128 21 | # num steps up-sampled per ray 22 | upsample_steps: 0 23 | # batch size of rays at inference to avoid OOM 24 | max_ray_batch: 4096 25 | # num of checkpoints to keep 26 | num_checkpoints: 2 27 | # Type of run to launch (current: train TODO: debug/eval/infer/...) 28 | run_type: render 29 | dynamic: false 30 | error_map: false 31 | saturation_loss: false 32 | floater_ratio: 0 33 | 34 | aabb: 1.0 35 | ######################################################### 36 | # Render 37 | render_target_dir: null 38 | render_frame_num: 100 39 | ######################################################### 40 | # Wandb 41 | # Name of the project is accessed by loggers 42 | project_name: jaws 43 | # Name of the run is accessed by loggers 44 | xp_name: ${xp_name} 45 | # Name of the group by loggers 46 | group_name: 3imgs_translation_z 47 | # Name of the job type 48 | job_type: null 49 | # Wether to synced the logs or not (WandB) 50 | log_offline: false 51 | # Metric to monitor to save models 52 | checkpoint_metric: train/loss 53 | 54 | ######################################################### 55 | # Hydra 56 | root: ${hydra:runtime.cwd} 57 | # Path to folder with data 58 | data_dir: ${data_dir} 59 | # Path to folder to save results 60 | result_dir: ${root}/results/${xp_name} 61 | 62 | 63 | # Pretty print config at the start of the run using Rich library 64 | print_config: True 65 | 66 | hydra: 67 | run: 68 | dir: ${hydra:runtime.cwd}/logs 69 | output_subdir: null 70 | sweep: 71 | dir: ${hydra:runtime.cwd}/logs 72 | subdir: ${hydra:runtime.cwd}/logs 73 | 74 | ######################################################### 75 | # System 76 | # Disable (or not) python warnings 77 | ignore_warnings: True 78 | device: cuda 79 | # Seed for random number generators 80 | seed: 1 -------------------------------------------------------------------------------- /jaws/configs/train_dnerf.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - compnode: light_1n_1g_28b.yaml 4 | - model: dnerf.yaml 5 | - datamodule: mutant.yaml 6 | 7 | # num of sanity checking step 8 | num_sanity_val_steps: 0 9 | # num of epochs 10 | num_epochs: 1401 11 | # num of training epoch after validation 12 | check_val_every_n_epoch: 25 13 | # num rays sampled per image for each training step 14 | num_rays: 2048 15 | # num steps sampled per ray 16 | num_steps: 512 17 | # num steps up-sampled per ray 18 | upsample_steps: 2 19 | # batch size of rays at inference to avoid OOM 20 | max_ray_batch: 4096 21 | # num of checkpoints to keep 22 | num_checkpoints: 2 23 | # if activate error map 24 | error_map: false 25 | # if using RGB+S for training loss 26 | saturation_loss: false 27 | # normally between 0.01 to 0.001, zero when not applied 28 | floater_ratio: 0.0005 29 | 30 | 31 | ################################################################### 32 | # Type of run to launch (current: train TODO: debug/eval/infer/...) 33 | run_type: train 34 | dynamic: true 35 | 36 | # Name of the project is accessed by loggers 37 | project_name: jaws 38 | # Name of the run is accessed by loggers 39 | xp_name: ${xp_name} 40 | # Wether to synced the logs or not (WandB) 41 | log_offline: false 42 | # Metric to monitor to save models 43 | checkpoint_metric: train/loss 44 | 45 | root: ${hydra:runtime.cwd} 46 | # Path to folder with data 47 | data_dir: ${data_dir} 48 | # Path to folder to save results 49 | result_dir: ${root}/results/${xp_name} 50 | 51 | # Seed for random number generators 52 | seed: 1 53 | # Pretty print config at the start of the run using Rich library 54 | print_config: True 55 | # Disable (or not) python warnings 56 | ignore_warnings: True 57 | 58 | device: cuda 59 | hydra: 60 | run: 61 | dir: ${hydra:runtime.cwd}/logs 62 | output_subdir: null 63 | sweep: 64 | dir: ${hydra:runtime.cwd}/logs 65 | subdir: ${hydra:runtime.cwd}/logs 66 | 67 | 68 | 69 | srv_target_dir: /home/xi/Work/jaws/misc/kid_rocky 70 | img_size: [224,224] -------------------------------------------------------------------------------- /jaws/configs/train_nerf.yaml: -------------------------------------------------------------------------------- 1 | defaults: 2 | - _self_ 3 | - compnode: light_1n_1g_28b.yaml 4 | - model: nerf.yaml 5 | - datamodule: lego.yaml 6 | # - datamodule: lego_dataset_small.yaml 7 | # - datamodule: 1per_dataset.yaml 8 | # - datamodule: firekeeper.yaml 9 | 10 | # num of sanity checking step 11 | num_sanity_val_steps: 0 12 | # num of epochs 13 | num_epochs: 2048 14 | # num of training epoch after validation 15 | check_val_every_n_epoch: 15 16 | # num rays sampled per image for each training step 17 | num_rays: 8096 18 | # num steps sampled per ray 19 | num_steps: 512 20 | # num steps up-sampled per ray 21 | upsample_steps: 0 22 | # batch size of rays at infernce to avoid OOM 23 | max_ray_batch: 4096 24 | # num of checkpoints to keep 25 | num_checkpoints: 2 26 | # if activate error map 27 | error_map: false 28 | # if using RGB+S for training loss 29 | saturation_loss: false 30 | # normally between 0.01 to 0.001, zero when not applied 31 | floater_ratio: -1 32 | 33 | ################################################################### 34 | # Type of run to launch (current: train TODO: debug/eval/infer/...) 35 | run_type: train 36 | dynamic: false 37 | 38 | # Name of the project is accessed by loggers 39 | project_name: jaws 40 | # Name of the run is accessed by loggers 41 | xp_name: ${xp_name} 42 | # Wether to synced the logs or not (WandB) 43 | log_offline: false 44 | # Metric to monitor to save models 45 | checkpoint_metric: train/loss 46 | 47 | root: ${hydra:runtime.cwd} 48 | # Path to folder with data 49 | data_dir: ${data_dir} 50 | # Path to folder to save results 51 | result_dir: ${root}/results/${xp_name} 52 | 53 | # Seed for random number generators 54 | seed: 1 55 | # Pretty print config at the start of the run using Rich library 56 | print_config: True 57 | # Disable (or not) python warnings 58 | ignore_warnings: True 59 | 60 | device: cuda 61 | hydra: 62 | run: 63 | dir: ${hydra:runtime.cwd}/logs 64 | output_subdir: null 65 | sweep: 66 | dir: ${hydra:runtime.cwd}/logs 67 | subdir: ${hydra:runtime.cwd}/logs -------------------------------------------------------------------------------- /jaws/run.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import sys 3 | 4 | import hydra 5 | from omegaconf import DictConfig 6 | 7 | from utils.nerf_utils import seed_everything 8 | 9 | 10 | @hydra.main( 11 | config_path="configs/", config_name="train_nerf.yaml", version_base="1.2" 12 | ) 13 | def main(config: DictConfig): 14 | sys.path.append(osp.join(config.root, "lib", "torch_ngp")) 15 | seed_everything(config.seed) 16 | 17 | if config.run_type == "train": 18 | from jaws.src.train import train 19 | 20 | train(config) 21 | 22 | if config.run_type == "jaws": 23 | from jaws.src.jaws import jaws 24 | 25 | jaws(config) 26 | 27 | if config.run_type == "render": 28 | from jaws.src.render import render 29 | 30 | render(config) 31 | 32 | 33 | if __name__ == "__main__": 34 | main() 35 | -------------------------------------------------------------------------------- /jaws/src/datamodules/nerf_datamodule.py: -------------------------------------------------------------------------------- 1 | from pytorch_lightning import LightningDataModule 2 | from torch.utils.data import DataLoader 3 | 4 | import jaws.src.datamodules.datasets.dnerf_dataset as dnerf 5 | import jaws.src.datamodules.datasets.nerf_dataset as nerf 6 | 7 | 8 | class NeRFDataModule(LightningDataModule): 9 | """Initialize train, val and test base data loader.""" 10 | 11 | def __init__( 12 | self, 13 | data_type: str, 14 | num_rays: int, 15 | path: str, 16 | mode: str, 17 | preload: bool, 18 | scale: float, 19 | bound: int, 20 | rand_pose: bool, 21 | ind_calib: bool = False, 22 | error_map: bool = False, 23 | aabb=None, 24 | ): 25 | super().__init__() 26 | self._num_rays = num_rays 27 | self._path = path 28 | self._mode = mode 29 | self._preload = preload 30 | self._scale = scale 31 | self._bound = bound 32 | self._aabb = aabb 33 | self._rand_pose = rand_pose 34 | self._ind_calib = ind_calib 35 | self._error_map = error_map 36 | 37 | if data_type == "dynamic": 38 | self.dataset = dnerf.DNeRFDataset 39 | else: 40 | self.dataset = nerf.NeRFDataset 41 | 42 | def train_dataloader(self) -> DataLoader: 43 | """Load train set loader.""" 44 | self.train_dataset = self.dataset( 45 | num_rays=self._num_rays, 46 | path=self._path, 47 | mode=self._mode, 48 | preload=self._preload, 49 | scale=self._scale, 50 | bound=self._bound, 51 | aabb=self._aabb, 52 | rand_pose=self._rand_pose, 53 | type="train", 54 | ind_calibration=self._ind_calib, 55 | error_map=self._error_map, 56 | ) 57 | return self.train_dataset.dataloader() 58 | 59 | def val_dataloader(self) -> DataLoader: 60 | """Load val set loader.""" 61 | return self.dataset( 62 | num_rays=self._num_rays, 63 | path=self._path, 64 | mode=self._mode, 65 | preload=self._preload, 66 | scale=self._scale, 67 | bound=self._bound, 68 | aabb=self._aabb, 69 | rand_pose=self._rand_pose, 70 | type="val", 71 | ind_calibration=self._ind_calib, 72 | error_map=self._error_map, 73 | ).dataloader() 74 | 75 | def test_dataloader(self) -> DataLoader: 76 | """Load test set loader.""" 77 | return self.dataset( 78 | num_rays=self._num_rays, 79 | path=self._path, 80 | mode=self._mode, 81 | preload=self._preload, 82 | scale=self._scale, 83 | bound=self._bound, 84 | aabb=self._aabb, 85 | rand_pose=self._rand_pose, 86 | type="test", 87 | ind_calibration=self._ind_calib, 88 | error_map=self._error_map, 89 | ).dataloader() 90 | -------------------------------------------------------------------------------- /jaws/src/infer.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from omegaconf import DictConfig 3 | import os 4 | import os.path as osp 5 | import sys 6 | 7 | import torch 8 | from torch import optim 9 | from pytorch_lightning import Trainer 10 | from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor 11 | from pytorch_lightning.loggers import WandbLogger 12 | 13 | from jaws.src.models.nerf_model import NeRFModel 14 | from jaws.src.datamodules.nerf_datamodule import NeRFDataModule 15 | from utils.file_utils import create_dir 16 | from jaws.src.models.modules.nerf_factory import create_nerf_model 17 | 18 | 19 | def infer(config: DictConfig): 20 | sys.path.append(osp.join(".", "lib", "torch_ngp")) 21 | model = create_nerf_model(config) 22 | 23 | # Initialize dataset 24 | data_module = NeRFDataModule( 25 | data_type="dynamic" if config.dynamic else "static", 26 | num_rays=config.num_rays, 27 | path=config.data_dir, 28 | mode=config.datamodule.mode, 29 | preload=config.datamodule.preload, 30 | scale=config.datamodule.scale, 31 | bound=config.datamodule.bound, 32 | rand_pose=config.datamodule.rand_pose, 33 | ind_calib=config.datamodule.independent_calibration, 34 | error_map=config.error_map, 35 | ) 36 | 37 | # Initialize trainer 38 | checkpoint_dir = osp.join(config.result_dir, "checkpoints") 39 | if not osp.exists(checkpoint_dir): 40 | create_dir(checkpoint_dir) 41 | if config.model.ckpt == "latest": 42 | checkpoint_list = sorted(os.listdir(checkpoint_dir)) 43 | if len(checkpoint_list) > 0: 44 | checkpoint_path = osp.join(checkpoint_dir, checkpoint_list[-1]) 45 | else: 46 | checkpoint_path = None 47 | elif config.model.ckpt == "scratch": 48 | checkpoint_path = None 49 | else: 50 | checkpoint_path = config.model.ckpt 51 | checkpoint = ModelCheckpoint( 52 | monitor=config.checkpoint_metric, 53 | mode="min", 54 | save_top_k=config.num_checkpoints, 55 | dirpath=checkpoint_dir, 56 | filename="{epoch}", 57 | save_on_train_epoch_end=True, 58 | ) 59 | timestamp = datetime.now().strftime("%m-%d_%H-%M") 60 | wandb_logger = WandbLogger( 61 | name="_".join([config.xp_name, "nerf", timestamp]), 62 | project=config.project_name, 63 | offline=config.log_offline, 64 | ) 65 | lr_monitor = LearningRateMonitor(logging_interval="epoch") 66 | callbacks = [lr_monitor, checkpoint] 67 | trainer = Trainer( 68 | gpus=config.compnode.num_gpus, 69 | num_nodes=config.compnode.num_nodes, 70 | accelerator=config.compnode.accelerator, 71 | max_epochs=config.num_epochs, 72 | callbacks=callbacks, 73 | logger=wandb_logger, 74 | check_val_every_n_epoch=config.check_val_every_n_epoch, 75 | log_every_n_steps=5, 76 | precision=16 if config.model.fp16 else 32, 77 | num_sanity_val_steps=config.num_sanity_val_steps, 78 | ) 79 | # Launch model training 80 | trainer.test(model, data_module, ckpt_path=checkpoint_path) 81 | -------------------------------------------------------------------------------- /jaws/src/models/base_model.py: -------------------------------------------------------------------------------- 1 | import os.path as osp 2 | import socket 3 | import struct 4 | from datetime import datetime 5 | from typing import Any, Dict, Tuple, List, Callable 6 | import cv2 7 | import numpy as np 8 | from pytorch_lightning import LightningModule 9 | import torch 10 | import torch.nn as nn 11 | import torch.optim as optim 12 | import torchvision.transforms as T 13 | from tqdm import tqdm 14 | from scipy.spatial.transform import Rotation as R 15 | 16 | from utils.file_utils import create_dir, save_pickle, load_pickle 17 | from utils.nerf_utils import get_rays 18 | from utils.image_utils import ( 19 | save_torch_image, 20 | save_loss_marginal_image, 21 | save_heatmaps, 22 | save_heatmap, 23 | put_text_on_image, 24 | ) 25 | from utils.camera_utils import CameraPoseGenerator as cam_gen, pose_distance 26 | 27 | from kornia.color.hsv import rgb_to_hsv 28 | 29 | 30 | class BaseModel(LightningModule): 31 | def __init__( 32 | self, 33 | result_dir: str, 34 | optimizer: nn.Module, 35 | lr_scheduler: nn.Module, 36 | criterion: nn.Module, 37 | run_type: str, 38 | num_steps: int, 39 | upsample_steps: int, 40 | max_ray_batch: int, 41 | saturation_loss: bool, 42 | error_map: bool, 43 | floater_ratio: float, 44 | ): 45 | super().__init__() 46 | 47 | self._optimizer = optimizer 48 | self._lr_scheduler = lr_scheduler 49 | self._result_dir = result_dir 50 | self.criterion = criterion 51 | self._val_dir = osp.join(result_dir, "validation") 52 | self.benchmark_dir = osp.join(result_dir, "benchmark") 53 | create_dir(self._val_dir) 54 | if run_type == "infer": 55 | timestamp = datetime.now().strftime("%m-%d_%H-%M") 56 | self._test_dir = osp.join(result_dir, "test", f"test_{timestamp}") 57 | create_dir(self._test_dir) 58 | 59 | self._num_steps = num_steps 60 | self._upsample_steps = upsample_steps 61 | self._max_ray_batch = max_ray_batch 62 | self._saturation_loss = saturation_loss 63 | self._error_map = error_map 64 | self._floater_ratio = floater_ratio 65 | 66 | def _save_step(self, rgb_pred: torch.Tensor, batch_idx: int): 67 | """Save predicted RGB images.""" 68 | pred_path = osp.join( 69 | self._val_dir, f"{batch_idx:03}_{self.current_epoch+1:02}.png" 70 | ) 71 | img = cv2.cvtColor( 72 | (rgb_pred[0].detach().cpu().numpy() * 255).astype(np.uint8), 73 | cv2.COLOR_RGB2BGR, 74 | ) 75 | cv2.imwrite( 76 | pred_path, 77 | img, 78 | ) 79 | # use directly logger to log image 80 | self.logger.log_image(key="val_img", images=[img[:, :, ::-1]]) 81 | 82 | if self._error_map: 83 | emap = ( 84 | self.trainer.datamodule.train_dataset.error_map[0] 85 | .view(128, 128) 86 | .cpu() 87 | .numpy() 88 | ) 89 | emap = (emap - emap.min()) / (emap.max() - emap.min()) 90 | emap_path = osp.join( 91 | self._val_dir, 92 | f"{batch_idx:03}_{self.current_epoch+1:02}_emap.png", 93 | ) 94 | cv2.imwrite( 95 | emap_path, 96 | (emap * 255).astype(np.uint8), 97 | ) 98 | 99 | def _test_save_step( 100 | self, rgb_pred: torch.Tensor, batch_idx: int, pose: torch.Tensor 101 | ): 102 | """Save predicted RGB images in prediction""" 103 | pred_path = osp.join(self._test_dir, f"test_{batch_idx:03}.png") 104 | img = cv2.cvtColor( 105 | (rgb_pred[0].detach().cpu().numpy() * 255).astype(np.uint8), 106 | cv2.COLOR_RGB2BGR, 107 | ) 108 | cv2.imwrite( 109 | pred_path, 110 | img, 111 | ) 112 | 113 | file_dir = osp.join(self._test_dir, "traj.txt") 114 | with open(file_dir, "a+") as output_file: 115 | output_file.write( 116 | " ".join( 117 | [ 118 | str(elem) 119 | for elem in pose.cpu().detach().numpy().flatten()[:-4] 120 | ] 121 | ) 122 | + "\n" 123 | ) 124 | 125 | def _log_step( 126 | self, 127 | mode: str, 128 | loss: torch.Tensor, 129 | ): 130 | """Log metrics at each epoch and each step for the training.""" 131 | on_step = True if mode == "train" else False 132 | self.log( 133 | f"{mode}/loss", 134 | loss, 135 | on_step=on_step, 136 | on_epoch=True, 137 | prog_bar=False, 138 | logger=True, 139 | sync_dist=True, 140 | ) 141 | 142 | def _eval_step_wo_gt(self): 143 | raise NotImplementedError() 144 | 145 | def _eval_step_w_gt(self): 146 | raise NotImplementedError() 147 | 148 | def training_step(self): 149 | raise NotImplementedError() 150 | 151 | def validation_step(self): 152 | raise NotImplementedError() 153 | 154 | def test_step(self): 155 | raise NotImplementedError() 156 | 157 | def configure_optimizers(self) -> Dict[str, Any]: 158 | """Define optimizers and LR schedulers.""" 159 | if self._optimizer is None: 160 | optimizer = optim.Adam( 161 | self.model.parameters(), lr=0.001, weight_decay=5e-4 162 | ) # naive adam 163 | else: 164 | optimizer = self._optimizer(self.model) 165 | 166 | if self._lr_scheduler is None: 167 | lr_scheduler = optim.lr_scheduler.LambdaLR( 168 | self.optimizer, lr_lambda=lambda epoch: 1 169 | ) # fake scheduler 170 | else: 171 | lr_scheduler = self._lr_scheduler(optimizer) 172 | 173 | return { 174 | "optimizer": optimizer, 175 | "lr_scheduler": lr_scheduler, 176 | "monitor": "train/loss", 177 | } 178 | -------------------------------------------------------------------------------- /jaws/src/models/callbacks/early_stopping.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | 3 | 4 | class EarlyStopping(nn.Module): 5 | """Callback adapting stopping training if criteria are reached.""" 6 | 7 | def __init__(self, num_patience: int, min_delta: float): 8 | super().__init__() 9 | self.num_patience = num_patience 10 | self.min_delta = min_delta 11 | self.loss_buffer = None 12 | self.wait_count = 0 13 | 14 | def run_early_stopping_check(self, current_loss: float) -> bool: 15 | if self.wait_count == 0: 16 | self.loss_buffer = current_loss 17 | self.wait_count += 1 18 | return False 19 | 20 | if current_loss < self.loss_buffer - self.min_delta: 21 | self.loss_buffer = current_loss 22 | self.wait_count = 0 23 | return False 24 | 25 | if self.wait_count > self.num_patience: 26 | return True 27 | 28 | self.wait_count += 1 29 | return False 30 | -------------------------------------------------------------------------------- /jaws/src/models/callbacks/grad_norm.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class GradNorm(nn.Module): 8 | """ 9 | Callback adapting loss weights during the training for MTL. 10 | Implementation of https://arxiv.org/pdf/1711.02257.pdf. 11 | Note that: `pl_module` must have `loss_weights` and `task_losses` 12 | attributes and `model._get_shared_layer` method. 13 | 14 | Code adapted from: https://github.com/falkaer/artist-group-factors/ 15 | """ 16 | 17 | def __init__(self, num_tasks: int, alpha: float): 18 | super().__init__() 19 | self.num_tasks = num_tasks 20 | self.loss_weights = nn.Parameter(torch.ones(num_tasks, requires_grad=True)) 21 | self.alpha = alpha 22 | self._batch_index = 0 23 | 24 | def fit(self, task_losses: torch.Tensor, shared_parameters: nn.Parameter): 25 | """Fit the loss weights according to the gradnorm.""" 26 | # Zero the w_i(t) gradients to update the weights using gradnorm loss 27 | self.loss_weights.grad = 0.0 * self.loss_weights.grad 28 | W = list(shared_parameters) 29 | 30 | norms = [] 31 | for task_index, (w_i, L_i) in enumerate(zip(self.loss_weights, task_losses)): 32 | # Retain the graph until the last pass 33 | retain_graph = True if task_index != self.num_tasks - 1 else False 34 | # Gradient of L_i(t) w.r.t. W 35 | gLgW = torch.autograd.grad(L_i, W, retain_graph=retain_graph) 36 | # G^{(i)}_W(t) 37 | norms.append(torch.norm(w_i * gLgW[0])) 38 | norms = torch.stack(norms) 39 | 40 | # Set L(0) 41 | if self._batch_index == 0: 42 | self.initial_losses = task_losses.detach() 43 | 44 | # Compute the constant term without accumulating gradients 45 | # as it should stay constant during back-propagation 46 | with torch.no_grad(): 47 | # Loss ratios \curl{L}(t) 48 | loss_ratios = task_losses / self.initial_losses 49 | # Inverse training rate r(t) 50 | inverse_train_rates = loss_ratios / loss_ratios.mean() 51 | constant_term = norms.mean() * (inverse_train_rates**self.alpha) 52 | 53 | # Write out the gradnorm loss L_grad and set the weight gradients 54 | grad_norm_loss = (norms - constant_term).abs().sum() 55 | self.loss_weights.grad = torch.autograd.grad(grad_norm_loss, self.loss_weights)[ 56 | 0 57 | ] 58 | 59 | self._batch_index += 1 60 | 61 | def normalize_weights(self) -> torch.Tensor: 62 | """Renormalize the gradient weights.""" 63 | with torch.no_grad(): 64 | normalize_coeff = len(self.loss_weights) / self.loss_weights.sum() 65 | self.loss_weights.data = self.loss_weights.data * normalize_coeff 66 | 67 | def _get_loss_weights(self, mask_weights: List[torch.Tensor]) -> torch.Tensor: 68 | """Return the loss weights for the current batch.""" 69 | gradnorm_index, loss_weights = 0, [] 70 | for task_weight in mask_weights: 71 | if task_weight: 72 | weight = self.loss_weights[gradnorm_index] 73 | loss_weights.append(weight) 74 | gradnorm_index += 1 75 | else: 76 | loss_weights.append(torch.tensor(0, device=self.loss_weights.device)) 77 | loss_weights = torch.stack(loss_weights).clamp(min=0.05) 78 | 79 | return loss_weights 80 | -------------------------------------------------------------------------------- /jaws/src/models/metrics/angular_loss.py: -------------------------------------------------------------------------------- 1 | """Code adapted from: https://github.com/jadarve/optical-flow-filter.""" 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | 7 | class AngularLoss(nn.Module): 8 | def forward(self, flow1: torch.Tensor, flow2: torch.Tensor): 9 | """Compute the angular error between two flow fields. 10 | 11 | :param flow1: first optical flow field. 12 | :param flow2: second optical flow field. 13 | :return: angular error field in degrees. 14 | """ 15 | f1_x = flow1[..., 0] 16 | f1_y = flow1[..., 1] 17 | 18 | f2_x = flow2[..., 0] 19 | f2_y = flow2[..., 1] 20 | 21 | top = 1.0 + f1_x * f2_x + f1_y * f2_y 22 | bottom = torch.sqrt(1.0 + f1_x * f1_x + f1_y * f1_y) * torch.sqrt( 23 | 1.0 + f2_x * f2_x + f2_y * f2_y 24 | ) 25 | div = torch.clamp(top / bottom, min=-1, max=1) 26 | loss = torch.rad2deg(torch.arccos(div)).mean() / 180.0 27 | 28 | return loss 29 | -------------------------------------------------------------------------------- /jaws/src/models/metrics/vgg_loss.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import torch 4 | from torch.nn.functional import mse_loss 5 | import torchvision 6 | from torchvision.models.vgg import VGG16_Weights 7 | 8 | 9 | class VGGLoss(torch.nn.Module): 10 | """ 11 | VGG perceptual loss: 12 | Paper: https://arxiv.org/pdf/1603.08155.pdf 13 | Code: https://gist.github.com/alper111/8233cdb0414b4cb5853f2f730ab95a49 14 | """ 15 | 16 | def __init__( 17 | self, 18 | resize: bool = False, 19 | feature_blocks: List[int] = [0, 1, 2, 3], 20 | style_blocks: List[int] = [], 21 | ): 22 | super(VGGLoss, self).__init__() 23 | 24 | # Initialize VGG blocks 25 | weights = VGG16_Weights.DEFAULT 26 | blocks = [ 27 | torchvision.models.vgg16(weights=weights).features[:4].eval(), 28 | torchvision.models.vgg16(weights=weights).features[4:9].eval(), 29 | torchvision.models.vgg16(weights=weights).features[9:16].eval(), 30 | torchvision.models.vgg16(weights=weights).features[16:23].eval(), 31 | ] 32 | self.feature_blocks = feature_blocks 33 | self.style_blocks = style_blocks 34 | 35 | # Freeze VGG's parameters 36 | for bl in blocks: 37 | for p in bl.parameters(): 38 | p.requires_grad = False 39 | self.blocks = torch.nn.ModuleList(blocks) 40 | 41 | # Initialize transformation parameters 42 | self.transform = torch.nn.functional.interpolate if resize else None 43 | mean = torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) 44 | self.register_buffer("mean", mean) 45 | std = torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) 46 | self.register_buffer("std", std) 47 | 48 | def forward( 49 | self, 50 | x: torch.Tensor, 51 | y: torch.Tensor, 52 | ) -> torch.Tensor: 53 | # Order channels: [B, C, H, W] 54 | if x.shape[1] != 3: 55 | x = x.permute(0, 3, 1, 2) 56 | y = y.permute(0, 3, 1, 2) 57 | # Normalize in/outputs 58 | x = (x - self.mean) / self.std 59 | y = (y - self.mean) / self.std 60 | # Resize in/outputs 61 | if self.transform: 62 | x = self.transform(x, mode="bilinear", size=(224, 224), align_corners=False) 63 | y = self.transform(y, mode="bilinear", size=(224, 224), align_corners=False) 64 | 65 | # Evaluate loss value 66 | loss = 0 67 | for i, block in enumerate(self.blocks): 68 | x = block(x) 69 | y = block(y) 70 | # Compute feature loss 71 | if i in self.feature_blocks: 72 | loss += mse_loss(x, y) 73 | # Compute style loss 74 | if i in self.style_blocks: 75 | act_x = x.reshape(x.shape[0], x.shape[1], -1) 76 | act_y = y.reshape(y.shape[0], y.shape[1], -1) 77 | gram_x = act_x @ act_x.permute(0, 2, 1) / act_x.numel() 78 | gram_y = act_y @ act_y.permute(0, 2, 1) / act_x.numel() 79 | loss += torch.norm(gram_x - gram_y) 80 | 81 | return loss 82 | -------------------------------------------------------------------------------- /jaws/src/models/modules/feature/flow_estimator.py: -------------------------------------------------------------------------------- 1 | from typing import List, Tuple 2 | 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | 7 | from jaws.src.models.modules.feature.raft import make_raft_estimator 8 | from utils.flow_utils import FlowUtils 9 | 10 | 11 | def g_sigmoid(x, q, b): 12 | return 1.0 / (1.0 + torch.exp(b * q) * torch.exp(-b * x)) 13 | 14 | 15 | def f_inverse(x): 16 | return 1 - (1 / (torch.max(x, torch.ones_like(x)))) 17 | 18 | 19 | class FlowEstimator(nn.Module): 20 | """Optical flow estimator. 21 | 22 | :param raft_pretrained_path: path to the pretrained raft model. 23 | """ 24 | 25 | def __init__(self, raft_pretrained_path: str): 26 | super(FlowEstimator, self).__init__() 27 | self.flow_estimator = make_raft_estimator(freeze=True) 28 | self._flow_utils = FlowUtils() 29 | 30 | def _flow_polar(self, flow: torch.Tensor, step_module: float = 0) -> torch.Tensor: 31 | """Normalize flow by inversing polar modules (H, W, C).""" 32 | polar_flow = self._flow_utils.xy_to_polar(flow) 33 | 34 | scaled_polar_flow = torch.zeros_like(polar_flow) 35 | scaled_polar_flow[:, :, 0] = polar_flow[:, :, 0] 36 | scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1] 37 | return scaled_polar_flow 38 | 39 | def _unit_normalize_flow(self, flow: torch.Tensor) -> torch.Tensor: 40 | """Normalize flow by unitarize their polar modules (H, W, C).""" 41 | polar_flow = self._flow_utils.xy_to_polar(flow) 42 | 43 | scaled_polar_flow = torch.zeros_like(polar_flow) 44 | scaled_polar_flow[:, :, 0] = torch.ones_like(polar_flow[:, :, 0]) 45 | scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1] 46 | 47 | scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow) 48 | 49 | return scaled_flow 50 | 51 | def _step_normalize_flow( 52 | self, flow: torch.Tensor, step_module: float = 0 53 | ) -> torch.Tensor: 54 | """ 55 | Normalize flow by unitarize their polar modules (H, W, C) greater than 56 | a threshold (`step_module`), otherwise, zero. 57 | """ 58 | polar_flow = self._flow_utils.xy_to_polar(flow) 59 | 60 | scaled_polar_flow = torch.zeros_like(polar_flow) 61 | scaled_polar_flow[:, :, 0] = 1 * (polar_flow[:, :, 0] > step_module) 62 | scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1] 63 | 64 | scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow) 65 | 66 | return scaled_flow 67 | 68 | def _sigmoid_normalize_flow( 69 | self, flow: torch.Tensor, step_module: float = 0 70 | ) -> torch.Tensor: 71 | """ 72 | Normalize flow by aplying a sigmoid on their polar modules (H, W, C). 73 | """ 74 | polar_flow = self._flow_utils.xy_to_polar(flow) 75 | 76 | scaled_polar_flow = torch.zeros_like(polar_flow) 77 | scaled_polar_flow[:, :, 0] = g_sigmoid( 78 | polar_flow[:, :, 0], q=torch.tensor(20), b=torch.tensor(0.1) 79 | ) 80 | scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1] 81 | scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow) 82 | 83 | return scaled_flow 84 | 85 | def _inverse_normalize_flow( 86 | self, flow: torch.Tensor, step_module: float = 0 87 | ) -> torch.Tensor: 88 | """Normalize flow by inversing polar modules (H, W, C).""" 89 | polar_flow = self._flow_utils.xy_to_polar(flow) 90 | 91 | scaled_polar_flow = torch.zeros_like(polar_flow) 92 | scaled_polar_flow[:, :, 0] = f_inverse(polar_flow[:, :, 0]) 93 | scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1] 94 | scaled_flow = self._flow_utils.polar_to_xy(scaled_polar_flow) 95 | 96 | return scaled_flow 97 | 98 | def _inverse_normalize_flow_polar( 99 | self, flow: torch.Tensor, step_module: float = 0 100 | ) -> torch.Tensor: 101 | """Normalize flow by inversing polar modules (H, W, C).""" 102 | polar_flow = self._flow_utils.xy_to_polar(flow) 103 | 104 | scaled_polar_flow = torch.zeros_like(polar_flow) 105 | scaled_polar_flow[:, :, 0] = f_inverse(polar_flow[:, :, 0]) 106 | scaled_polar_flow[:, :, 1] = polar_flow[:, :, 1] 107 | 108 | return scaled_polar_flow 109 | 110 | def _estimate_flow(self, frames: torch.Tensor) -> torch.Tensor: 111 | """Estimate frows from RGB frames.""" 112 | flows = self.flow_estimator([frames[:-1], frames[1:]]).permute([0, 2, 3, 1]) 113 | return flows 114 | 115 | def compute_flow( 116 | self, frames: List[np.array], ftype: str = "EE" 117 | ) -> Tuple[torch.Tensor, torch.Tensor]: 118 | """ 119 | Extract flow style features from a list of frames. 120 | WARNING: For inference only, please don't forget `.eval()` and 121 | `torch.no_grad()`. 122 | Types: including: 123 | 124 | EE: EndPoint flow -> XY 125 | NEE: normalised Endpoint -> Norm xy flow 126 | AN: Angular -> XY 127 | 128 | :param frames: list of raw RGB frames 0-255 range (T, C, H, W). 129 | :return: encoded flow style vectors (B, C_f, T_f, W_f, H_f). 130 | 131 | """ 132 | # Estimate flows, output shape: (T, H, W, C) 133 | flows = self._estimate_flow(frames) 134 | 135 | # Normalize flow chunks, output shape: (T, H, W, C) 136 | if ftype == "NEE": 137 | normalized_flows = torch.stack( 138 | [self._inverse_normalize_flow(f) for f in flows] 139 | ) 140 | return normalized_flows, flows.unsqueeze(0) 141 | 142 | if ftype == "EE" or ftype == "AN": 143 | return flows, flows.unsqueeze(0) # Ck, chk_size, H, W, C 144 | -------------------------------------------------------------------------------- /jaws/src/models/modules/feature/raft.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | from pytorch_lightning import LightningModule 4 | import torch 5 | 6 | from torchvision.models.optical_flow import raft_small, Raft_Small_Weights 7 | import torchvision.transforms.functional as F 8 | 9 | 10 | class RAFT_tv(LightningModule): 11 | def __init__(self): 12 | super(RAFT_tv, self).__init__() 13 | self.model = raft_small(weights=Raft_Small_Weights.DEFAULT, progress=False) 14 | 15 | def forward(self, x: Tuple[torch.Tensor, torch.Tensor]) -> torch.Tensor: 16 | x1, x2 = x 17 | B, C, H, W = x1.shape 18 | x1 = 2 * (x1 / 255.0) - 1.0 19 | x2 = 2 * (x2 / 255.0) - 1.0 20 | x1, x2 = self.preprocess(x1, x2) 21 | flow_raw = self.model(x1, x2, num_flow_updates=12)[-1] 22 | flow_resized = torch.nn.functional.interpolate(flow_raw, size=[H, W]) 23 | return flow_resized 24 | 25 | def preprocess(self, img1_batch, img2_batch): 26 | transforms = Raft_Small_Weights.DEFAULT.transforms() 27 | img1_batch = F.resize(img1_batch, size=[224, 224]) 28 | img2_batch = F.resize(img2_batch, size=[224, 224]) 29 | return transforms(img1_batch, img2_batch) 30 | 31 | def postprocess(self, flow, img_size): 32 | flow_resized = F.resize(flow, size=img_size) 33 | return flow_resized 34 | 35 | 36 | def make_raft_estimator(freeze: bool): 37 | model = RAFT_tv().eval() 38 | 39 | if freeze: 40 | for p in model.model.parameters(): 41 | p.requires_grad = False 42 | 43 | return model 44 | -------------------------------------------------------------------------------- /jaws/src/models/modules/nerf/network_ff.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Tuple, List 2 | 3 | import torch 4 | 5 | from jaws.src.models.modules.nerf.renderer import NeRFRenderer 6 | from lib.torch_ngp.encoding import get_encoder 7 | from lib.torch_ngp.activation import trunc_exp 8 | from lib.torch_ngp.ffmlp import FFMLP 9 | 10 | 11 | class NeRFNetwork(NeRFRenderer): 12 | def __init__( 13 | self, 14 | sigma_encoding: str, 15 | direction_encoding: str, 16 | n_sigma_layers: int, 17 | n_color_layers: int, 18 | sigma_hidden_dim: int, 19 | color_hidden_dim: int, 20 | geo_feat_dim: int, 21 | bound: int, 22 | aabb: List, 23 | encoder_num_levels: int, 24 | **kwargs 25 | ): 26 | super().__init__( 27 | bound, aabb, background_radius=0, background_perlin_noise=False, **kwargs 28 | ) 29 | 30 | # Density network 31 | self._n_sigma_layers = n_sigma_layers 32 | self._sigma_hidden_dim = sigma_hidden_dim 33 | self._geo_feat_dim = geo_feat_dim 34 | self.sigma_encoder, self._sigma_in_dim = get_encoder( 35 | sigma_encoding, 36 | desired_resolution=2048 * bound, 37 | num_levels=encoder_num_levels, 38 | ) 39 | self.sigma_net = FFMLP( 40 | input_dim=self._sigma_in_dim, 41 | output_dim=1 + self._geo_feat_dim, 42 | hidden_dim=self._sigma_hidden_dim, 43 | num_layers=self._n_sigma_layers, 44 | ) 45 | 46 | # Color network 47 | self._n_color_layers = n_color_layers 48 | self._color_hidden_dim = color_hidden_dim 49 | self.color_encoder, self._color_in_dim = get_encoder( 50 | direction_encoding, 51 | desired_resolution=2048, 52 | num_levels=encoder_num_levels, 53 | ) 54 | self._color_in_dim += self._geo_feat_dim + 1 55 | self.color_net = FFMLP( 56 | input_dim=self._color_in_dim, 57 | output_dim=3, 58 | hidden_dim=self._color_hidden_dim, 59 | num_layers=self._n_color_layers, 60 | ) 61 | 62 | def forward( 63 | self, x: torch.Tensor, d: torch.Tensor 64 | ) -> Tuple[torch.Tensor, torch.Tensor]: 65 | """ 66 | :param x: [N, 3], in [-bound, bound] 67 | :param d: [N, 3], nomalized in [-1, 1] 68 | """ 69 | # Sigma 70 | x = self.sigma_encoder(x, bound=self.bound) 71 | h = self.sigma_net(x) 72 | sigma = trunc_exp(h[..., 0]) 73 | geo_feat = h[..., 1:] 74 | 75 | # Color 76 | d = self.color_encoder(d) 77 | p = torch.zeros_like(geo_feat[..., :1]) # manual input padding 78 | h = torch.cat([d, geo_feat, p], dim=-1) 79 | h = self.color_net(h) 80 | 81 | # Sigmoid activation for rgb 82 | rgb = torch.sigmoid(h) 83 | 84 | return sigma, rgb 85 | 86 | def density(self, _x: torch.Tensor) -> Dict[str, torch.Tensor]: 87 | """ 88 | :param x: [N, 3], in [-bound, bound] 89 | """ 90 | x = self.sigma_encoder(_x, bound=self.bound) 91 | h = self.sigma_net(x) 92 | 93 | sigma = trunc_exp(h[..., 0]) 94 | geo_feat = h[..., 1:] 95 | 96 | if torch.isnan(sigma).any(): 97 | assert False 98 | return { 99 | "sigma": sigma, 100 | "geo_feat": geo_feat, 101 | } 102 | 103 | def color( 104 | self, 105 | x: torch.Tensor, 106 | d: torch.Tensor, 107 | mask: torch.Tensor = None, 108 | geo_feat: torch.Tensor = None, 109 | **kwargs 110 | ) -> torch.Tensor: 111 | """ 112 | Allow masked inference. 113 | 114 | :param x: [N, 3] in [-bound, bound] 115 | :param mask: [N,], bool, indicates where rgb is needed to be computed. 116 | """ 117 | if mask is not None: 118 | # [N, 3] 119 | rgbs = torch.zeros(mask.shape[0], 3, dtype=x.dtype, device=x.device) 120 | # Empty mask 121 | if not mask.any(): 122 | return rgbs 123 | x = x[mask] 124 | d = d[mask] 125 | geo_feat = geo_feat[mask] 126 | 127 | d = self.color_encoder(d) 128 | 129 | p = torch.zeros_like(geo_feat[..., :1]) # manual input padding 130 | h = torch.cat([d, geo_feat, p], dim=-1) 131 | h = self.color_net(h) 132 | 133 | # Sigmoid activation for rgb 134 | h = torch.sigmoid(h) 135 | 136 | if mask is not None: 137 | rgbs[mask] = h.to(rgbs.dtype) 138 | else: 139 | rgbs = h 140 | 141 | return rgbs 142 | 143 | def get_params(self, lr: float) -> Dict[str, Any]: 144 | params = [ 145 | {"params": self.sigma_encoder.parameters(), "lr": lr}, 146 | {"params": self.sigma_net.parameters(), "lr": lr}, 147 | {"params": self.color_encoder.parameters(), "lr": lr}, 148 | {"params": self.color_net.parameters(), "lr": lr}, 149 | ] 150 | return params 151 | -------------------------------------------------------------------------------- /jaws/src/models/modules/nerf_factory.py: -------------------------------------------------------------------------------- 1 | from pytorch_lightning import LightningModule 2 | import torch 3 | from torch import optim 4 | 5 | 6 | def create_nerf_model(config) -> LightningModule: 7 | criterion = torch.nn.MSELoss(reduction="none") 8 | 9 | ff = config.datamodule.ff 10 | background_radius = 0 if ff else config.datamodule.background_radius 11 | background_encoding = None if ff else config.model.background_encoding 12 | n_background_layers = None if ff else config.model.n_background_layers 13 | background_hidden_dim = None if ff else config.model.background_hidden_dim 14 | background_perlin_noise = None if ff else config.datamodule.background_perlin_noise 15 | 16 | if config.dynamic: 17 | from jaws.src.models.dnerf_model import DNeRFModel 18 | 19 | # Initialize model 20 | optimizer = lambda model: torch.optim.Adam( 21 | model.get_params(config.model.lr, config.model.lr_net), 22 | betas=(0.9, 0.99), 23 | eps=1e-14, 24 | ) 25 | lr_scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR( 26 | optimizer, 27 | lambda iter: 0.1 ** min(iter / (config.num_epochs * 100), 1), 28 | ) 29 | 30 | model = DNeRFModel( 31 | result_dir=config.result_dir, 32 | optimizer=optimizer, 33 | lr_scheduler=lr_scheduler, 34 | criterion=criterion, 35 | bound=config.datamodule.bound, 36 | aabb=config.datamodule.aabb, 37 | run_type=config.run_type, 38 | min_near=config.datamodule.min_near, 39 | density_thresh=config.datamodule.density_thresh, 40 | num_steps=config.num_steps, 41 | upsample_steps=config.upsample_steps, 42 | max_ray_batch=config.max_ray_batch, 43 | background_radius=background_radius, 44 | time_encoding=config.model.time_encoding, 45 | warp_encoding=config.model.warp_encoding, 46 | sigma_encoding=config.model.sigma_encoding, 47 | direction_encoding=config.model.direction_encoding, 48 | background_encoding=background_encoding, 49 | background_perlin_noise=background_perlin_noise, 50 | n_warp_layers=config.model.n_warp_layers, 51 | n_sigma_layers=config.model.n_sigma_layers, 52 | n_color_layers=config.model.n_color_layers, 53 | n_background_layers=n_background_layers, 54 | warp_hidden_dim=config.model.warp_hidden_dim, 55 | sigma_hidden_dim=config.model.sigma_hidden_dim, 56 | color_hidden_dim=config.model.color_hidden_dim, 57 | background_hidden_dim=background_hidden_dim, 58 | geo_feat_dim=config.model.geo_feat_dim, 59 | encoder_num_levels=config.model.encoder_num_levels, 60 | saturation_loss=config.saturation_loss, 61 | error_map=config.error_map, 62 | floater_ratio=config.floater_ratio, 63 | ) 64 | else: 65 | from jaws.src.models.nerf_model import NeRFModel 66 | 67 | # Initialize model 68 | optimizer = lambda model: torch.optim.Adam( 69 | model.get_params(config.model.lr), betas=(0.9, 0.99), eps=1e-14 70 | ) 71 | lr_scheduler = lambda optimizer: optim.lr_scheduler.LambdaLR( 72 | optimizer, 73 | lambda iter: 0.1 ** min(iter / (config.num_epochs * 100), 1), 74 | ) 75 | 76 | model = NeRFModel( 77 | result_dir=config.result_dir, 78 | optimizer=optimizer, 79 | lr_scheduler=lr_scheduler, 80 | criterion=criterion, 81 | bound=config.datamodule.bound, 82 | aabb=config.datamodule.aabb, 83 | run_type=config.run_type, 84 | min_near=config.datamodule.min_near, 85 | density_thresh=config.datamodule.density_thresh, 86 | num_steps=config.num_steps, 87 | upsample_steps=config.upsample_steps, 88 | max_ray_batch=config.max_ray_batch, 89 | fully_fuse=ff, 90 | background_radius=background_radius, 91 | sigma_encoding=config.model.sigma_encoding, 92 | direction_encoding=config.model.direction_encoding, 93 | background_encoding=background_encoding, 94 | background_perlin_noise=background_perlin_noise, 95 | n_sigma_layers=config.model.n_sigma_layers, 96 | n_color_layers=config.model.n_color_layers, 97 | n_background_layers=n_background_layers, 98 | sigma_hidden_dim=config.model.sigma_hidden_dim, 99 | color_hidden_dim=config.model.color_hidden_dim, 100 | background_hidden_dim=background_hidden_dim, 101 | geo_feat_dim=config.model.geo_feat_dim, 102 | encoder_num_levels=config.model.encoder_num_levels, 103 | saturation_loss=config.saturation_loss, # [TODO:] 104 | error_map=config.error_map, 105 | floater_ratio=config.floater_ratio, 106 | ) 107 | if config.run_type != "train": 108 | model.training = False 109 | return model 110 | -------------------------------------------------------------------------------- /jaws/src/render.py: -------------------------------------------------------------------------------- 1 | from omegaconf import DictConfig 2 | import os 3 | import os.path as osp 4 | import sys 5 | 6 | import torch 7 | import numpy as np 8 | 9 | from utils.file_utils import create_dir, load_pickle, save_pickle 10 | from jaws.src.models.modules.nerf_factory import create_nerf_model 11 | from utils.camera_utils import PoseInterpolator 12 | from utils.image_utils import save_gif, save_torch_image, save_poses_kitti 13 | from tqdm import tqdm 14 | 15 | 16 | def render(config: DictConfig): 17 | sys.path.append(osp.join(".", "lib", "torch_ngp")) 18 | model = create_nerf_model(config) 19 | 20 | # Initialize trainer 21 | checkpoint_dir = osp.join(config.result_dir, "checkpoints") 22 | if not osp.exists(checkpoint_dir): 23 | create_dir(checkpoint_dir) 24 | if config.model.ckpt == "latest": 25 | checkpoint_list = sorted(os.listdir(checkpoint_dir)) 26 | if len(checkpoint_list) > 0: 27 | checkpoint_path = osp.join(checkpoint_dir, checkpoint_list[-1]) 28 | else: 29 | checkpoint_path = None 30 | else: 31 | checkpoint_path = config.model.ckpt 32 | 33 | checkpoint = torch.load(checkpoint_path) 34 | model.load_state_dict(checkpoint["state_dict"]) 35 | device = "cuda" if config.compnode.num_gpus > 0 else "cpu" 36 | model.to(device) 37 | 38 | # intrinsic parameters - constant during interpolating 39 | params_path = osp.join(config.render_target_dir, "params.pkl") 40 | params = load_pickle(params_path) 41 | 42 | # parameters can be interpolated 43 | focals_path = osp.join(config.render_target_dir, "focals.pkl") 44 | times_path = osp.join(config.render_target_dir, "times.pkl") 45 | poses_path = osp.join(config.render_target_dir, "poses.pkl") 46 | focals = load_pickle(focals_path) 47 | times = load_pickle(times_path) 48 | poses = load_pickle(poses_path) 49 | 50 | # interpolation here before rendering: 51 | ( 52 | focals, 53 | times, 54 | poses, 55 | ) = PoseInterpolator.inpterpolate_render_sequence_from_keyframes_cubic( 56 | focals=focals, 57 | times=times, 58 | poses=poses, 59 | frm_num=config.render_frame_num, 60 | ) 61 | 62 | res_factor = 720.0 / params[0]["H"] 63 | frames = [] 64 | H = int(params[0]["H"] * res_factor) # 224 -> control image 65 | W = int(params[0]["W"] * res_factor) 66 | intrinsics = params[0]["intrinsics"] 67 | scale_factor = H / (intrinsics[3] * 2) 68 | intrinsics = intrinsics * scale_factor 69 | intrinsics[2] = W / 2 70 | intrinsics[3] = H / 2 71 | 72 | # high resolution 73 | for camera_index in tqdm(range(len(poses))): 74 | focal = focals[camera_index] 75 | time = torch.tensor([[times[camera_index]]]).to(device) 76 | current_intrinsics = np.copy(intrinsics) 77 | current_intrinsics[:2] = intrinsics[:2] * focal 78 | pose = poses[camera_index].to(device) 79 | if config.dynamic: 80 | frames.append( 81 | ( 82 | model.render(pose, time, current_intrinsics, H, W).cpu() 83 | * 255.0 84 | ) 85 | .numpy() 86 | .astype(np.uint8) 87 | ) 88 | else: 89 | frames.append( 90 | (model.render(pose, current_intrinsics, H, W).cpu() * 255.0) 91 | .numpy() 92 | .astype(np.uint8) 93 | ) 94 | save_gif( 95 | frames, osp.join(config.render_target_dir, "interpolated.gif"), 25 96 | ) 97 | save_pickle( 98 | poses, osp.join(config.render_target_dir, "interpolated_poses.pkl") 99 | ) 100 | save_pickle( 101 | focals, osp.join(config.render_target_dir, "interpolated_focals.pkl") 102 | ) 103 | save_poses_kitti( 104 | poses, 105 | config.render_target_dir, 106 | "interpolated_poses_kitti.csv", 107 | ) 108 | 109 | # save_pickle(frames, config.render_target_dir, "frames.pkl") 110 | # save fig here too 111 | -------------------------------------------------------------------------------- /jaws/src/train.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from omegaconf import DictConfig 3 | import os 4 | import os.path as osp 5 | import sys 6 | 7 | from pytorch_lightning import Trainer 8 | from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor 9 | from pytorch_lightning.loggers import WandbLogger 10 | 11 | from jaws.src.datamodules.nerf_datamodule import NeRFDataModule 12 | from utils.file_utils import create_dir 13 | 14 | from jaws.src.models.modules.nerf_factory import create_nerf_model 15 | 16 | 17 | def train(config: DictConfig): 18 | sys.path.append(osp.join(".", "lib", "torch_ngp")) 19 | model = create_nerf_model(config) 20 | 21 | # Initialize dataset 22 | data_module = NeRFDataModule( 23 | data_type="dynamic" if config.dynamic else "static", 24 | num_rays=config.num_rays, 25 | path=config.data_dir, 26 | mode=config.datamodule.mode, 27 | preload=config.datamodule.preload, 28 | scale=config.datamodule.scale, 29 | bound=config.datamodule.bound, 30 | rand_pose=config.datamodule.rand_pose, 31 | ind_calib=config.datamodule.independent_calibration, 32 | error_map=config.error_map, 33 | ) 34 | 35 | # Initialize trainer 36 | checkpoint_dir = osp.join(config.result_dir, "checkpoints") 37 | if not osp.exists(checkpoint_dir): 38 | create_dir(checkpoint_dir) 39 | if config.model.ckpt == "latest": 40 | checkpoint_list = sorted(os.listdir(checkpoint_dir)) 41 | if len(checkpoint_list) > 0: 42 | checkpoint_path = osp.join(checkpoint_dir, checkpoint_list[-1]) 43 | else: 44 | checkpoint_path = None 45 | elif config.model.ckpt == "scratch": 46 | checkpoint_path = None 47 | else: 48 | checkpoint_path = config.model.ckpt 49 | checkpoint = ModelCheckpoint( 50 | monitor=config.checkpoint_metric, 51 | mode="min", 52 | save_top_k=config.num_checkpoints, 53 | dirpath=checkpoint_dir, 54 | filename="{epoch}", 55 | save_on_train_epoch_end=True, 56 | ) 57 | timestamp = datetime.now().strftime("%m-%d_%H-%M") 58 | wandb_logger = WandbLogger( 59 | name="_".join([config.xp_name, "nerf", timestamp]), 60 | project=config.project_name, 61 | offline=config.log_offline, 62 | ) 63 | lr_monitor = LearningRateMonitor(logging_interval="epoch") 64 | callbacks = [lr_monitor, checkpoint] 65 | trainer = Trainer( 66 | gpus=config.compnode.num_gpus, 67 | num_nodes=config.compnode.num_nodes, 68 | accelerator=config.compnode.accelerator, 69 | max_epochs=config.num_epochs, 70 | callbacks=callbacks, 71 | logger=wandb_logger, 72 | check_val_every_n_epoch=config.check_val_every_n_epoch, 73 | log_every_n_steps=5, 74 | precision=16 if config.model.fp16 else 32, 75 | num_sanity_val_steps=config.num_sanity_val_steps, 76 | ) 77 | 78 | # Launch model training 79 | trainer.fit(model, data_module, ckpt_path=checkpoint_path) 80 | -------------------------------------------------------------------------------- /lib/LitePose/_init_paths.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os.path as osp 12 | import sys 13 | 14 | 15 | def add_path(path): 16 | if path not in sys.path: 17 | sys.path.insert(0, path) 18 | 19 | 20 | this_dir = osp.dirname(__file__) 21 | 22 | lib_path = osp.join(this_dir, "lib") 23 | add_path(lib_path) 24 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/coco/ddrnet/ddrnet23s.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: '../data/coco' 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | PRETRAINED_LAYERS: ['*'] 47 | INIT_WEIGHTS: True 48 | NAME: pose_ddrnet 49 | NUM_JOINTS: 17 50 | PRETRAINED: '' 51 | TAG_PER_JOINT: True 52 | TEST: 53 | FLIP_TEST: True 54 | IMAGES_PER_GPU: 1 55 | MODEL_FILE: '' 56 | SCALE_FACTOR: [1] 57 | DETECTION_THRESHOLD: 0.1 58 | WITH_HEATMAPS: (True, True) 59 | WITH_AE: (True, False) 60 | PROJECT2IMAGE: True 61 | NMS_KERNEL: 5 62 | NMS_PADDING: 2 63 | TRAIN: 64 | BEGIN_EPOCH: 0 65 | CHECKPOINT: '' 66 | END_EPOCH: 180 67 | GAMMA1: 0.99 68 | GAMMA2: 0.0 69 | IMAGES_PER_GPU: 16 70 | LR: 0.002 71 | LR_FACTOR: 0.1 72 | LR_STEP: [250, 330] 73 | MOMENTUM: 0.9 74 | NESTEROV: False 75 | OPTIMIZER: adam 76 | RESUME: False 77 | SHUFFLE: True 78 | WD: 0.0001 79 | WORKERS: 4 80 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/coco/higher_hrnet/w32_512_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: '../data/coco' 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 4 55 | - 4 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | - 4 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 4 79 | - 4 80 | - 4 81 | - 4 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 4 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 17 100 | PRETRAINED: '../data/models/pytorch/imagenet/hrnet_w32-36af842e.pth' 101 | TAG_PER_JOINT: True 102 | TEST: 103 | FLIP_TEST: True 104 | IMAGES_PER_GPU: 1 105 | MODEL_FILE: '' 106 | SCALE_FACTOR: [1] 107 | DETECTION_THRESHOLD: 0.1 108 | WITH_HEATMAPS: (True, True) 109 | WITH_AE: (True, False) 110 | PROJECT2IMAGE: True 111 | NMS_KERNEL: 5 112 | NMS_PADDING: 2 113 | TRAIN: 114 | BEGIN_EPOCH: 0 115 | CHECKPOINT: '' 116 | END_EPOCH: 300 117 | GAMMA1: 0.99 118 | GAMMA2: 0.0 119 | IMAGES_PER_GPU: 6 120 | LR: 0.001 121 | LR_FACTOR: 0.1 122 | LR_STEP: [200, 260] 123 | MOMENTUM: 0.9 124 | NESTEROV: False 125 | OPTIMIZER: adam 126 | RESUME: False 127 | SHUFFLE: True 128 | WD: 0.0001 129 | WORKERS: 4 130 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/coco/higher_hrnet/w32_640_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | FP16: 8 | ENABLED: True 9 | DYNAMIC_LOSS_SCALE: True 10 | CUDNN: 11 | BENCHMARK: True 12 | DETERMINISTIC: False 13 | ENABLED: True 14 | DATASET: 15 | SIGMA: 2 16 | DATASET: coco_kpt 17 | DATASET_TEST: coco 18 | DATA_FORMAT: jpg 19 | FLIP: 0.5 20 | INPUT_SIZE: 640 21 | OUTPUT_SIZE: [160, 320] 22 | MAX_NUM_PEOPLE: 30 23 | MAX_ROTATION: 30 24 | MAX_SCALE: 1.5 25 | SCALE_TYPE: 'short' 26 | MAX_TRANSLATE: 40 27 | MIN_SCALE: 0.75 28 | NUM_JOINTS: 17 29 | ROOT: 'data/coco' 30 | TEST: val2017 31 | TRAIN: train2017 32 | DEBUG: 33 | DEBUG: True 34 | SAVE_BATCH_IMAGES_GT: False 35 | SAVE_BATCH_IMAGES_PRED: False 36 | SAVE_HEATMAPS_GT: True 37 | SAVE_HEATMAPS_PRED: True 38 | SAVE_TAGMAPS_PRED: True 39 | LOSS: 40 | NUM_STAGES: 2 41 | AE_LOSS_TYPE: exp 42 | WITH_AE_LOSS: [True, False] 43 | PUSH_LOSS_FACTOR: [0.001, 0.001] 44 | PULL_LOSS_FACTOR: [0.001, 0.001] 45 | WITH_HEATMAPS_LOSS: [True, True] 46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 47 | MODEL: 48 | EXTRA: 49 | FINAL_CONV_KERNEL: 1 50 | PRETRAINED_LAYERS: ['*'] 51 | STEM_INPLANES: 64 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | DECONV: 92 | NUM_DECONVS: 1 93 | NUM_CHANNELS: 94 | - 32 95 | KERNEL_SIZE: 96 | - 4 97 | NUM_BASIC_BLOCKS: 4 98 | CAT_OUTPUT: 99 | - True 100 | INIT_WEIGHTS: True 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 12 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 4 133 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/coco/higher_hrnet/w48_640_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | FP16: 8 | ENABLED: True 9 | DYNAMIC_LOSS_SCALE: True 10 | CUDNN: 11 | BENCHMARK: True 12 | DETERMINISTIC: False 13 | ENABLED: True 14 | DATASET: 15 | SIGMA: 2 16 | DATASET: coco_kpt 17 | DATASET_TEST: coco 18 | DATA_FORMAT: jpg 19 | FLIP: 0.5 20 | INPUT_SIZE: 640 21 | OUTPUT_SIZE: [160, 320] 22 | MAX_NUM_PEOPLE: 30 23 | MAX_ROTATION: 30 24 | MAX_SCALE: 1.5 25 | SCALE_TYPE: 'short' 26 | MAX_TRANSLATE: 40 27 | MIN_SCALE: 0.75 28 | NUM_JOINTS: 17 29 | ROOT: 'data/coco' 30 | TEST: val2017 31 | TRAIN: train2017 32 | DEBUG: 33 | DEBUG: True 34 | SAVE_BATCH_IMAGES_GT: False 35 | SAVE_BATCH_IMAGES_PRED: False 36 | SAVE_HEATMAPS_GT: True 37 | SAVE_HEATMAPS_PRED: True 38 | SAVE_TAGMAPS_PRED: True 39 | LOSS: 40 | NUM_STAGES: 2 41 | AE_LOSS_TYPE: exp 42 | WITH_AE_LOSS: [True, False] 43 | PUSH_LOSS_FACTOR: [0.001, 0.001] 44 | PULL_LOSS_FACTOR: [0.001, 0.001] 45 | WITH_HEATMAPS_LOSS: [True, True] 46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 47 | MODEL: 48 | EXTRA: 49 | FINAL_CONV_KERNEL: 1 50 | PRETRAINED_LAYERS: ['*'] 51 | STEM_INPLANES: 64 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | DECONV: 92 | NUM_DECONVS: 1 93 | NUM_CHANNELS: 94 | - 48 95 | KERNEL_SIZE: 96 | - 4 97 | NUM_BASIC_BLOCKS: 4 98 | CAT_OUTPUT: 99 | - True 100 | INIT_WEIGHTS: True 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 10 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 4 133 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/coco/mobilenet/mobile.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 256 18 | OUTPUT_SIZE: [64, 128] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: '/dataset/coco' 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 64 51 | - 48 52 | - 32 53 | NUM_DECONV_KERNELS: 54 | - 4 55 | - 4 56 | - 4 57 | INIT_WEIGHTS: True 58 | NAME: pose_mobilenet 59 | NUM_JOINTS: 17 60 | PRETRAINED: '' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 500 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 0.004 81 | LR_FACTOR: 0.1 82 | LR_STEP: [350, 480] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 0.0001 89 | WORKERS: 4 90 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/coco/mobilenet/supermobile.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 448 18 | OUTPUT_SIZE: [112, 224] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: '/dataset/mscoco' 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 64 51 | - 48 52 | - 32 53 | NUM_DECONV_KERNELS: 54 | - 4 55 | - 4 56 | - 4 57 | INIT_WEIGHTS: True 58 | NAME: pose_supermobilenet 59 | NUM_JOINTS: 17 60 | PRETRAINED: './pretrain/search_pretrain.pth.tar' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 2400 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 4e-3 81 | LR_FACTOR: 0.1 82 | LR_STEP: [10000,18000] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 0.0001 89 | WORKERS: 4 90 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/ddrnet/ddrnet23s.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '../data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | PRETRAINED_LAYERS: ['*'] 47 | INIT_WEIGHTS: True 48 | NAME: pose_ddrnet 49 | NUM_JOINTS: 14 50 | PRETRAINED: '' 51 | TAG_PER_JOINT: True 52 | TEST: 53 | FLIP_TEST: True 54 | IMAGES_PER_GPU: 1 55 | MODEL_FILE: '' 56 | SCALE_FACTOR: [1] 57 | DETECTION_THRESHOLD: 0.1 58 | WITH_HEATMAPS: (True, True) 59 | WITH_AE: (True, False) 60 | PROJECT2IMAGE: True 61 | NMS_KERNEL: 5 62 | NMS_PADDING: 2 63 | TRAIN: 64 | BEGIN_EPOCH: 0 65 | CHECKPOINT: '' 66 | END_EPOCH: 500 67 | GAMMA1: 0.99 68 | GAMMA2: 0.0 69 | IMAGES_PER_GPU: 16 70 | LR: 1e-3 71 | LR_FACTOR: 0.1 72 | LR_STEP: [350, 480] 73 | MOMENTUM: 0.9 74 | NESTEROV: False 75 | OPTIMIZER: adam 76 | RESUME: False 77 | SHUFFLE: True 78 | WD: 1e-4 79 | WORKERS: 4 80 | 81 | 82 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-1.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-1 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 480 18 | OUTPUT_SIZE: [120, 240] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 17 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 3 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -1 99 | WIDTH_MULT: 0.909 100 | DEPTH_MULT: 0.833 101 | NAME: pose_efficient_hrnet 102 | NUM_JOINTS: 14 103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 6 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-2.yaml: -------------------------------------------------------------------------------- 1 | 2 | AUTO_RESUME: True 3 | DATA_DIR: '' 4 | GPUS: (0,) 5 | LOG_DIR: log 6 | OUTPUT_DIR: output_H-2 7 | PRINT_FREQ: 100 8 | CUDNN: 9 | BENCHMARK: True 10 | DETERMINISTIC: True 11 | ENABLED: True 12 | DATASET: 13 | SIGMA: 2 14 | DATASET: coco_kpt 15 | DATASET_TEST: coco 16 | DATA_FORMAT: jpg 17 | FLIP: 0.5 18 | INPUT_SIZE: 448 19 | OUTPUT_SIZE: [112, 224] 20 | MAX_NUM_PEOPLE: 30 21 | MAX_ROTATION: 30 22 | MAX_SCALE: 1.5 23 | SCALE_TYPE: 'short' 24 | MAX_TRANSLATE: 40 25 | MIN_SCALE: 0.75 26 | NUM_JOINTS: 14 27 | ROOT: 'data/coco' #Dataset Root Folder 28 | TEST: val2017 29 | TRAIN: train2017 30 | DEBUG: 31 | DEBUG: True 32 | SAVE_BATCH_IMAGES_GT: False 33 | SAVE_BATCH_IMAGES_PRED: False 34 | SAVE_HEATMAPS_GT: True 35 | SAVE_HEATMAPS_PRED: True 36 | SAVE_TAGMAPS_PRED: True 37 | LOSS: 38 | NUM_STAGES: 2 39 | AE_LOSS_TYPE: exp 40 | WITH_AE_LOSS: [True, False] 41 | PUSH_LOSS_FACTOR: [0.001, 0.001] 42 | PULL_LOSS_FACTOR: [0.001, 0.001] 43 | WITH_HEATMAPS_LOSS: [True, True] 44 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 45 | MODEL: 46 | EXTRA: 47 | FINAL_CONV_KERNEL: 1 48 | PRETRAINED_LAYERS: ['*'] 49 | STEM_INPLANES: 64 50 | STAGE2: 51 | NUM_MODULES: 1 52 | NUM_BRANCHES: 2 53 | BLOCK: BASIC 54 | NUM_BLOCKS: 55 | - 2 56 | - 2 57 | NUM_CHANNELS: 58 | - 32 59 | - 64 60 | FUSE_METHOD: SUM 61 | STAGE3: 62 | NUM_MODULES: 2 63 | NUM_BRANCHES: 3 64 | BLOCK: BASIC 65 | NUM_BLOCKS: 66 | - 2 67 | - 2 68 | - 2 69 | NUM_CHANNELS: 70 | - 32 71 | - 64 72 | - 128 73 | FUSE_METHOD: SUM 74 | STAGE4: 75 | NUM_MODULES: 3 76 | NUM_BRANCHES: 4 77 | BLOCK: BASIC 78 | NUM_BLOCKS: 79 | - 2 80 | - 2 81 | - 2 82 | - 2 83 | NUM_CHANNELS: 84 | - 32 85 | - 64 86 | - 128 87 | - 256 88 | FUSE_METHOD: SUM 89 | DECONV: 90 | NUM_DECONVS: 1 91 | NUM_CHANNELS: 92 | - 32 93 | KERNEL_SIZE: 94 | - 4 95 | NUM_BASIC_BLOCKS: 2 96 | CAT_OUTPUT: 97 | - True 98 | INIT_WEIGHTS: True 99 | SCALE_FACTOR: -2 100 | WIDTH_MULT: 0.826 101 | DEPTH_MULT: 0.694 102 | NAME: pose_efficient_hrnet 103 | NUM_JOINTS: 17 104 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 105 | TAG_PER_JOINT: True 106 | TEST: 107 | FLIP_TEST: True 108 | IMAGES_PER_GPU: 1 109 | MODEL_FILE: '' 110 | SCALE_FACTOR: [1] 111 | DETECTION_THRESHOLD: 0.1 112 | WITH_HEATMAPS: (True, True) 113 | WITH_AE: (True, False) 114 | PROJECT2IMAGE: True 115 | NMS_KERNEL: 5 116 | NMS_PADDING: 2 117 | TRAIN: 118 | BEGIN_EPOCH: 0 119 | CHECKPOINT: '' 120 | END_EPOCH: 300 121 | GAMMA1: 0.99 122 | GAMMA2: 0.0 123 | IMAGES_PER_GPU: 8 124 | LR: 0.001 125 | LR_FACTOR: 0.1 126 | LR_STEP: [200, 260] 127 | MOMENTUM: 0.9 128 | NESTEROV: False 129 | OPTIMIZER: adam 130 | RESUME: False 131 | SHUFFLE: True 132 | WD: 0.0001 133 | WORKERS: 8 134 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-3 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 416 18 | OUTPUT_SIZE: [104, 208] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '/dataset/pose/data/crowd_pose' #Dataset Root Folder 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 1 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -3 99 | WIDTH_MULT: 0.751 100 | DEPTH_MULT: 0.578 101 | NAME: pose_efficient_hrnet 102 | NUM_JOINTS: 14 103 | PRETRAINED: '' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 10 123 | LR: 0.004 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/efficient_hrnet/H-4.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output_H-4 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: True 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: coco_kpt 14 | DATASET_TEST: coco 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 384 18 | OUTPUT_SIZE: [96, 192] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: 'data/coco' #Dataset Root Folder 27 | TEST: val2017 28 | TRAIN: train2017 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 2 55 | - 2 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 1 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 2 66 | - 2 67 | - 2 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 2 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 2 79 | - 2 80 | - 2 81 | - 2 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 2 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | SCALE_FACTOR: -4 99 | WIDTH_MULT: 0.684 100 | DEPTH_MULT: 0.483 101 | NAME: pose_efficient_hrnet 102 | NUM_JOINTS: 17 103 | PRETRAINED: 'example_path/efficientnet-b0-4cfa50.pth' #Path to pretrained backbone model 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 48 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 8 133 | 134 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/higher_hrnet/w16_512_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '../data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 36 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 0 55 | - 4 56 | NUM_CHANNELS: 57 | - 18 58 | - 36 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 0 66 | - 0 67 | - 4 68 | NUM_CHANNELS: 69 | - 18 70 | - 36 71 | - 72 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 0 79 | - 0 80 | - 0 81 | - 4 82 | NUM_CHANNELS: 83 | - 18 84 | - 36 85 | - 72 86 | - 144 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 0 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 14 100 | PRETRAINED: '../models/pytorch/imagenet/hrnet_w32-36af842e.pth' 101 | TAG_PER_JOINT: True 102 | TEST: 103 | FLIP_TEST: True 104 | IMAGES_PER_GPU: 1 105 | MODEL_FILE: '' 106 | SCALE_FACTOR: [1] 107 | DETECTION_THRESHOLD: 0.1 108 | WITH_HEATMAPS: (True, True) 109 | WITH_AE: (True, False) 110 | PROJECT2IMAGE: True 111 | NMS_KERNEL: 5 112 | NMS_PADDING: 2 113 | TRAIN: 114 | BEGIN_EPOCH: 0 115 | CHECKPOINT: '' 116 | END_EPOCH: 300 117 | GAMMA1: 0.99 118 | GAMMA2: 0.0 119 | IMAGES_PER_GPU: 24 120 | LR: 0.001 121 | LR_FACTOR: 0.1 122 | LR_STEP: [200, 260] 123 | MOMENTUM: 0.9 124 | NESTEROV: False 125 | OPTIMIZER: adam 126 | RESUME: False 127 | SHUFFLE: True 128 | WD: 0.0001 129 | WORKERS: 4 130 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '/dataset/pose/data/crowd_pose' 27 | TEST: search 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 4 55 | - 4 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | - 4 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 4 79 | - 4 80 | - 4 81 | - 4 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 4 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 14 100 | PRETRAINED: '../models/pytorch/imagenet/hrnet_w32-36af842e.pth' 101 | TAG_PER_JOINT: True 102 | TEST: 103 | FLIP_TEST: True 104 | IMAGES_PER_GPU: 1 105 | MODEL_FILE: '' 106 | SCALE_FACTOR: [1] 107 | DETECTION_THRESHOLD: 0.1 108 | WITH_HEATMAPS: (True, True) 109 | WITH_AE: (True, False) 110 | PROJECT2IMAGE: True 111 | NMS_KERNEL: 5 112 | NMS_PADDING: 2 113 | TRAIN: 114 | BEGIN_EPOCH: 0 115 | CHECKPOINT: '' 116 | END_EPOCH: 300 117 | GAMMA1: 0.99 118 | GAMMA2: 0.0 119 | IMAGES_PER_GPU: 12 120 | LR: 0.001 121 | LR_FACTOR: 0.1 122 | LR_STEP: [200, 260] 123 | MOMENTUM: 0.9 124 | NESTEROV: False 125 | OPTIMIZER: adam 126 | RESUME: False 127 | SHUFFLE: True 128 | WD: 0.0001 129 | WORKERS: 4 130 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_coco.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: 'data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: 48 | - 'conv1' 49 | - 'bn1' 50 | - 'conv2' 51 | - 'bn2' 52 | - 'layer1' 53 | - 'transition1' 54 | - 'stage2' 55 | - 'transition2' 56 | - 'stage3' 57 | - 'transition3' 58 | - 'stage4' 59 | STEM_INPLANES: 64 60 | STAGE2: 61 | NUM_MODULES: 1 62 | NUM_BRANCHES: 2 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | NUM_CHANNELS: 68 | - 32 69 | - 64 70 | FUSE_METHOD: SUM 71 | STAGE3: 72 | NUM_MODULES: 4 73 | NUM_BRANCHES: 3 74 | BLOCK: BASIC 75 | NUM_BLOCKS: 76 | - 4 77 | - 4 78 | - 4 79 | NUM_CHANNELS: 80 | - 32 81 | - 64 82 | - 128 83 | FUSE_METHOD: SUM 84 | STAGE4: 85 | NUM_MODULES: 3 86 | NUM_BRANCHES: 4 87 | BLOCK: BASIC 88 | NUM_BLOCKS: 89 | - 4 90 | - 4 91 | - 4 92 | - 4 93 | NUM_CHANNELS: 94 | - 32 95 | - 64 96 | - 128 97 | - 256 98 | FUSE_METHOD: SUM 99 | DECONV: 100 | NUM_DECONVS: 1 101 | NUM_CHANNELS: 102 | - 32 103 | KERNEL_SIZE: 104 | - 4 105 | NUM_BASIC_BLOCKS: 4 106 | CAT_OUTPUT: 107 | - True 108 | INIT_WEIGHTS: True 109 | NAME: pose_higher_hrnet 110 | NUM_JOINTS: 14 111 | PRETRAINED: 'models/pytorch/pose_coco/pose_higher_hrnet_w32_512.pth' 112 | TAG_PER_JOINT: True 113 | TEST: 114 | FLIP_TEST: True 115 | IMAGES_PER_GPU: 1 116 | MODEL_FILE: '' 117 | SCALE_FACTOR: [1] 118 | DETECTION_THRESHOLD: 0.1 119 | WITH_HEATMAPS: (True, True) 120 | WITH_AE: (True, False) 121 | PROJECT2IMAGE: True 122 | NMS_KERNEL: 5 123 | NMS_PADDING: 2 124 | TRAIN: 125 | BEGIN_EPOCH: 0 126 | CHECKPOINT: '' 127 | END_EPOCH: 300 128 | GAMMA1: 0.99 129 | GAMMA2: 0.0 130 | IMAGES_PER_GPU: 12 131 | LR: 0.001 132 | LR_FACTOR: 0.1 133 | LR_STEP: [200, 260] 134 | MOMENTUM: 0.9 135 | NESTEROV: False 136 | OPTIMIZER: adam 137 | RESUME: False 138 | SHUFFLE: True 139 | WD: 0.0001 140 | WORKERS: 4 141 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_512_adam_lr1e-3_syncbn.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: 'data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | STEM_INPLANES: 64 49 | STAGE2: 50 | NUM_MODULES: 1 51 | NUM_BRANCHES: 2 52 | BLOCK: BASIC 53 | NUM_BLOCKS: 54 | - 4 55 | - 4 56 | NUM_CHANNELS: 57 | - 32 58 | - 64 59 | FUSE_METHOD: SUM 60 | STAGE3: 61 | NUM_MODULES: 4 62 | NUM_BRANCHES: 3 63 | BLOCK: BASIC 64 | NUM_BLOCKS: 65 | - 4 66 | - 4 67 | - 4 68 | NUM_CHANNELS: 69 | - 32 70 | - 64 71 | - 128 72 | FUSE_METHOD: SUM 73 | STAGE4: 74 | NUM_MODULES: 3 75 | NUM_BRANCHES: 4 76 | BLOCK: BASIC 77 | NUM_BLOCKS: 78 | - 4 79 | - 4 80 | - 4 81 | - 4 82 | NUM_CHANNELS: 83 | - 32 84 | - 64 85 | - 128 86 | - 256 87 | FUSE_METHOD: SUM 88 | DECONV: 89 | NUM_DECONVS: 1 90 | NUM_CHANNELS: 91 | - 32 92 | KERNEL_SIZE: 93 | - 4 94 | NUM_BASIC_BLOCKS: 4 95 | CAT_OUTPUT: 96 | - True 97 | INIT_WEIGHTS: True 98 | NAME: pose_higher_hrnet 99 | NUM_JOINTS: 14 100 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 101 | TAG_PER_JOINT: True 102 | SYNC_BN: True 103 | TEST: 104 | FLIP_TEST: True 105 | IMAGES_PER_GPU: 1 106 | MODEL_FILE: '' 107 | SCALE_FACTOR: [1] 108 | DETECTION_THRESHOLD: 0.1 109 | WITH_HEATMAPS: (True, True) 110 | WITH_AE: (True, False) 111 | PROJECT2IMAGE: True 112 | NMS_KERNEL: 5 113 | NMS_PADDING: 2 114 | TRAIN: 115 | BEGIN_EPOCH: 0 116 | CHECKPOINT: '' 117 | END_EPOCH: 300 118 | GAMMA1: 0.99 119 | GAMMA2: 0.0 120 | IMAGES_PER_GPU: 12 121 | LR: 0.001 122 | LR_FACTOR: 0.1 123 | LR_STEP: [200, 260] 124 | MOMENTUM: 0.9 125 | NESTEROV: False 126 | OPTIMIZER: adam 127 | RESUME: False 128 | SHUFFLE: True 129 | WD: 0.0001 130 | WORKERS: 4 131 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/higher_hrnet/w32_640_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | FP16: 8 | ENABLED: True 9 | DYNAMIC_LOSS_SCALE: True 10 | CUDNN: 11 | BENCHMARK: True 12 | DETERMINISTIC: False 13 | ENABLED: True 14 | DATASET: 15 | SIGMA: 2 16 | DATASET: crowd_pose_kpt 17 | DATASET_TEST: crowd_pose 18 | DATA_FORMAT: jpg 19 | FLIP: 0.5 20 | INPUT_SIZE: 640 21 | OUTPUT_SIZE: [160, 320] 22 | MAX_NUM_PEOPLE: 30 23 | MAX_ROTATION: 30 24 | MAX_SCALE: 1.5 25 | SCALE_TYPE: 'short' 26 | MAX_TRANSLATE: 40 27 | MIN_SCALE: 0.75 28 | NUM_JOINTS: 14 29 | ROOT: 'data/crowd_pose' 30 | TEST: test 31 | TRAIN: trainval 32 | DEBUG: 33 | DEBUG: True 34 | SAVE_BATCH_IMAGES_GT: False 35 | SAVE_BATCH_IMAGES_PRED: False 36 | SAVE_HEATMAPS_GT: True 37 | SAVE_HEATMAPS_PRED: True 38 | SAVE_TAGMAPS_PRED: True 39 | LOSS: 40 | NUM_STAGES: 2 41 | AE_LOSS_TYPE: exp 42 | WITH_AE_LOSS: [True, False] 43 | PUSH_LOSS_FACTOR: [0.001, 0.001] 44 | PULL_LOSS_FACTOR: [0.001, 0.001] 45 | WITH_HEATMAPS_LOSS: [True, True] 46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 47 | MODEL: 48 | EXTRA: 49 | FINAL_CONV_KERNEL: 1 50 | PRETRAINED_LAYERS: ['*'] 51 | STEM_INPLANES: 64 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 32 61 | - 64 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 32 73 | - 64 74 | - 128 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 32 87 | - 64 88 | - 128 89 | - 256 90 | FUSE_METHOD: SUM 91 | DECONV: 92 | NUM_DECONVS: 1 93 | NUM_CHANNELS: 94 | - 32 95 | KERNEL_SIZE: 96 | - 4 97 | NUM_BASIC_BLOCKS: 4 98 | CAT_OUTPUT: 99 | - True 100 | INIT_WEIGHTS: True 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 14 103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w32-36af842e.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 12 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 4 133 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/higher_hrnet/w48_640_adam_lr1e-3.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | FP16: 8 | ENABLED: True 9 | DYNAMIC_LOSS_SCALE: True 10 | CUDNN: 11 | BENCHMARK: True 12 | DETERMINISTIC: False 13 | ENABLED: True 14 | DATASET: 15 | SIGMA: 2 16 | DATASET: crowd_pose_kpt 17 | DATASET_TEST: crowd_pose 18 | DATA_FORMAT: jpg 19 | FLIP: 0.5 20 | INPUT_SIZE: 640 21 | OUTPUT_SIZE: [160, 320] 22 | MAX_NUM_PEOPLE: 30 23 | MAX_ROTATION: 30 24 | MAX_SCALE: 1.5 25 | SCALE_TYPE: 'short' 26 | MAX_TRANSLATE: 40 27 | MIN_SCALE: 0.75 28 | NUM_JOINTS: 14 29 | ROOT: 'data/crowd_pose' 30 | TEST: test 31 | TRAIN: trainval 32 | DEBUG: 33 | DEBUG: True 34 | SAVE_BATCH_IMAGES_GT: False 35 | SAVE_BATCH_IMAGES_PRED: False 36 | SAVE_HEATMAPS_GT: True 37 | SAVE_HEATMAPS_PRED: True 38 | SAVE_TAGMAPS_PRED: True 39 | LOSS: 40 | NUM_STAGES: 2 41 | AE_LOSS_TYPE: exp 42 | WITH_AE_LOSS: [True, False] 43 | PUSH_LOSS_FACTOR: [0.001, 0.001] 44 | PULL_LOSS_FACTOR: [0.001, 0.001] 45 | WITH_HEATMAPS_LOSS: [True, True] 46 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 47 | MODEL: 48 | EXTRA: 49 | FINAL_CONV_KERNEL: 1 50 | PRETRAINED_LAYERS: ['*'] 51 | STEM_INPLANES: 64 52 | STAGE2: 53 | NUM_MODULES: 1 54 | NUM_BRANCHES: 2 55 | BLOCK: BASIC 56 | NUM_BLOCKS: 57 | - 4 58 | - 4 59 | NUM_CHANNELS: 60 | - 48 61 | - 96 62 | FUSE_METHOD: SUM 63 | STAGE3: 64 | NUM_MODULES: 4 65 | NUM_BRANCHES: 3 66 | BLOCK: BASIC 67 | NUM_BLOCKS: 68 | - 4 69 | - 4 70 | - 4 71 | NUM_CHANNELS: 72 | - 48 73 | - 96 74 | - 192 75 | FUSE_METHOD: SUM 76 | STAGE4: 77 | NUM_MODULES: 3 78 | NUM_BRANCHES: 4 79 | BLOCK: BASIC 80 | NUM_BLOCKS: 81 | - 4 82 | - 4 83 | - 4 84 | - 4 85 | NUM_CHANNELS: 86 | - 48 87 | - 96 88 | - 192 89 | - 384 90 | FUSE_METHOD: SUM 91 | DECONV: 92 | NUM_DECONVS: 1 93 | NUM_CHANNELS: 94 | - 48 95 | KERNEL_SIZE: 96 | - 4 97 | NUM_BASIC_BLOCKS: 4 98 | CAT_OUTPUT: 99 | - True 100 | INIT_WEIGHTS: True 101 | NAME: pose_higher_hrnet 102 | NUM_JOINTS: 14 103 | PRETRAINED: 'models/pytorch/imagenet/hrnet_w48-8ef0771d.pth' 104 | TAG_PER_JOINT: True 105 | TEST: 106 | FLIP_TEST: True 107 | IMAGES_PER_GPU: 1 108 | MODEL_FILE: '' 109 | SCALE_FACTOR: [1] 110 | DETECTION_THRESHOLD: 0.1 111 | WITH_HEATMAPS: (True, True) 112 | WITH_AE: (True, False) 113 | PROJECT2IMAGE: True 114 | NMS_KERNEL: 5 115 | NMS_PADDING: 2 116 | TRAIN: 117 | BEGIN_EPOCH: 0 118 | CHECKPOINT: '' 119 | END_EPOCH: 300 120 | GAMMA1: 0.99 121 | GAMMA2: 0.0 122 | IMAGES_PER_GPU: 10 123 | LR: 0.001 124 | LR_FACTOR: 0.1 125 | LR_STEP: [200, 260] 126 | MOMENTUM: 0.9 127 | NESTEROV: False 128 | OPTIMIZER: adam 129 | RESUME: False 130 | SHUFFLE: True 131 | WD: 0.0001 132 | WORKERS: 4 133 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/mobilenet/mobile.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 30 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 256 18 | OUTPUT_SIZE: [64, 128] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '/dataset/crowdpose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 64 51 | - 48 52 | - 32 53 | NUM_DECONV_KERNELS: 54 | - 4 55 | - 4 56 | - 4 57 | INIT_WEIGHTS: True 58 | NAME: pose_mobilenet 59 | NUM_JOINTS: 14 60 | PRETRAINED: '' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 500 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 4e-3 81 | LR_FACTOR: 0.1 82 | LR_STEP: [350, 480] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 1e-4 89 | WORKERS: 4 90 | 91 | 92 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/mobilenet/supermobile.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 50 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '/dataset/crowdpose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 64 51 | - 48 52 | - 32 53 | NUM_DECONV_KERNELS: 54 | - 4 55 | - 4 56 | - 4 57 | INIT_WEIGHTS: True 58 | NAME: pose_supermobilenet 59 | NUM_JOINTS: 14 60 | PRETRAINED: 'pretrained_models/crowdpose-pretrain.pth.tar' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 2400 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 4e-3 81 | LR_FACTOR: 0.1 82 | LR_STEP: [36000, 40000] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 1e-4 89 | WORKERS: 4 90 | 91 | 92 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/resnet/resnet.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 30 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 256 18 | OUTPUT_SIZE: [64, 128] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '/dataset/crowdpose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 16 51 | - 24 52 | - 24 53 | NUM_DECONV_KERNELS: 54 | - 3 55 | - 3 56 | - 3 57 | INIT_WEIGHTS: True 58 | NAME: pose_resnet 59 | NUM_JOINTS: 14 60 | PRETRAINED: '' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 500 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 2e-3 81 | LR_FACTOR: 0.1 82 | LR_STEP: [350, 480] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 1e-4 89 | WORKERS: 4 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/resnet/superresnet.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 30 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '/dataset/crowdpose' 27 | TEST: search 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 64 51 | - 48 52 | - 32 53 | NUM_DECONV_KERNELS: 54 | - 3 55 | - 3 56 | - 3 57 | INIT_WEIGHTS: True 58 | NAME: pose_superresnet 59 | NUM_JOINTS: 14 60 | PRETRAINED: '' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 2400 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 2e-3 81 | LR_FACTOR: 0.1 82 | LR_STEP: [36000, 40000] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 1e-4 89 | WORKERS: 4 90 | 91 | 92 | -------------------------------------------------------------------------------- /lib/LitePose/experiments/crowd_pose/simplenet/simplenet.yaml: -------------------------------------------------------------------------------- 1 | AUTO_RESUME: True 2 | DATA_DIR: '' 3 | GPUS: (0,) 4 | LOG_DIR: log 5 | OUTPUT_DIR: output 6 | PRINT_FREQ: 100 7 | CUDNN: 8 | BENCHMARK: True 9 | DETERMINISTIC: False 10 | ENABLED: True 11 | DATASET: 12 | SIGMA: 2 13 | DATASET: crowd_pose_kpt 14 | DATASET_TEST: crowd_pose 15 | DATA_FORMAT: jpg 16 | FLIP: 0.5 17 | INPUT_SIZE: 512 18 | OUTPUT_SIZE: [128, 256] 19 | MAX_NUM_PEOPLE: 30 20 | MAX_ROTATION: 30 21 | MAX_SCALE: 1.5 22 | SCALE_TYPE: 'short' 23 | MAX_TRANSLATE: 40 24 | MIN_SCALE: 0.75 25 | NUM_JOINTS: 14 26 | ROOT: '../data/crowd_pose' 27 | TEST: test 28 | TRAIN: trainval 29 | DEBUG: 30 | DEBUG: True 31 | SAVE_BATCH_IMAGES_GT: False 32 | SAVE_BATCH_IMAGES_PRED: False 33 | SAVE_HEATMAPS_GT: True 34 | SAVE_HEATMAPS_PRED: True 35 | SAVE_TAGMAPS_PRED: True 36 | LOSS: 37 | NUM_STAGES: 2 38 | AE_LOSS_TYPE: exp 39 | WITH_AE_LOSS: [True, False] 40 | PUSH_LOSS_FACTOR: [0.001, 0.001] 41 | PULL_LOSS_FACTOR: [0.001, 0.001] 42 | WITH_HEATMAPS_LOSS: [True, True] 43 | HEATMAPS_LOSS_FACTOR: [1.0, 1.0] 44 | MODEL: 45 | EXTRA: 46 | FINAL_CONV_KERNEL: 1 47 | PRETRAINED_LAYERS: ['*'] 48 | NUM_DECONV_LAYERS: 3 49 | NUM_DECONV_FILTERS: 50 | - 64 51 | - 48 52 | - 32 53 | NUM_DECONV_KERNELS: 54 | - 4 55 | - 4 56 | - 4 57 | INIT_WEIGHTS: True 58 | NAME: pose_simplenet 59 | NUM_JOINTS: 14 60 | PRETRAINED: '' 61 | TAG_PER_JOINT: True 62 | TEST: 63 | FLIP_TEST: True 64 | IMAGES_PER_GPU: 1 65 | MODEL_FILE: '' 66 | SCALE_FACTOR: [1] 67 | DETECTION_THRESHOLD: 0.1 68 | WITH_HEATMAPS: (True, True) 69 | WITH_AE: (True, False) 70 | PROJECT2IMAGE: True 71 | NMS_KERNEL: 5 72 | NMS_PADDING: 2 73 | TRAIN: 74 | BEGIN_EPOCH: 0 75 | CHECKPOINT: '' 76 | END_EPOCH: 500 77 | GAMMA1: 0.99 78 | GAMMA2: 0.0 79 | IMAGES_PER_GPU: 16 80 | LR: 1e-3 81 | LR_FACTOR: 0.1 82 | LR_STEP: [350, 480] 83 | MOMENTUM: 0.9 84 | NESTEROV: False 85 | OPTIMIZER: adam 86 | RESUME: False 87 | SHUFFLE: True 88 | WD: 1e-4 89 | WORKERS: 4 90 | 91 | 92 | 93 | -------------------------------------------------------------------------------- /lib/LitePose/lib/arch_manager.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import copy 4 | 5 | def rand(c): 6 | return random.randint(0, c - 1) 7 | 8 | def _make_divisible(v, divisor, min_value=None): 9 | if min_value is None: 10 | min_value = divisor 11 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 12 | # Make sure that round down does not go down by more than 10%. 13 | if new_v < 0.9 * v: 14 | new_v += divisor 15 | return new_v 16 | 17 | 18 | class ArchManager: 19 | def __init__(self, cfg): 20 | self.cfg = cfg 21 | self.expansion = [6] 22 | self.kernel_size = [7] 23 | self.input_channel = 24 24 | self.width_mult = [1.0, 0.75, 0.5, 0.25] 25 | self.deconv_setting = cfg.MODEL.EXTRA.NUM_DECONV_FILTERS 26 | self.is_search = False 27 | self.search_arch = None 28 | self.arch_setting = [ 29 | # c, n, s 30 | [32, 4, 2], 31 | [64, 6, 2], 32 | [96, 8, 2], 33 | [160, 8, 1] 34 | ] 35 | 36 | def rand_kernel_size(self): 37 | l = len(self.kernel_size) 38 | return self.kernel_size[rand(l)] 39 | 40 | def rand_expansion(self): 41 | l = len(self.expansion) 42 | return self.expansion[rand(l)] 43 | 44 | def rand_channel(self, c): 45 | l = len(self.width_mult) 46 | new_c = c * self.width_mult[rand(l)] 47 | return _make_divisible(new_c, 8) 48 | 49 | def random_sample(self): 50 | if self.is_search == True: 51 | return self.search_arch 52 | cfg_arch = {} 53 | cfg_arch['img_size'] = 256 + 64 * rand(5) 54 | cfg_arch['input_channel'] = self.rand_channel(self.input_channel) 55 | cfg_arch['deconv_setting'] = [] 56 | for i in range(len(self.deconv_setting)): 57 | cfg_arch['deconv_setting'].append(self.rand_channel(self.deconv_setting[i])) 58 | cfg_arch['backbone_setting'] = [] 59 | for i in range(len(self.arch_setting)): 60 | stage = {} 61 | c, n, s = self.arch_setting[i] 62 | stage['num_blocks'] = n 63 | stage['stride'] = s 64 | stage['channel'] = self.rand_channel(c) 65 | stage['block_setting'] = [] 66 | for j in range(stage['num_blocks']): 67 | stage['block_setting'].append([6, 7]) 68 | cfg_arch['backbone_setting'].append(stage) 69 | return cfg_arch 70 | 71 | def fixed_sample(self, reso=256, ratio=0.5): 72 | cfg_arch = {} 73 | cfg_arch['img_size'] = reso 74 | cfg_arch['input_channel'] = _make_divisible(self.input_channel * ratio, 8) 75 | cfg_arch['deconv_setting'] = [] 76 | for i in range(len(self.deconv_setting)): 77 | cfg_arch['deconv_setting'].append(_make_divisible(self.deconv_setting[i] * ratio, 8)) 78 | cfg_arch['backbone_setting'] = [] 79 | for i in range(len(self.arch_setting)): 80 | stage = {} 81 | c, n, s = self.arch_setting[i] 82 | stage['num_blocks'] = n 83 | stage['stride'] = s 84 | stage['channel'] = _make_divisible(c * ratio, 8) 85 | stage['block_setting'] = [] 86 | for j in range(stage['num_blocks']): 87 | stage['block_setting'].append([6, 7]) 88 | cfg_arch['backbone_setting'].append(stage) 89 | return cfg_arch 90 | 91 | -------------------------------------------------------------------------------- /lib/LitePose/lib/config/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .default import _C as cfg 8 | from .default import update_config 9 | from .default import check_config 10 | from .default import update_config_dict 11 | -------------------------------------------------------------------------------- /lib/LitePose/lib/config/models.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | from yacs.config import CfgNode as CN 12 | 13 | 14 | # pose_multi_resoluton_net related params 15 | POSE_HIGHER_RESOLUTION_NET = CN() 16 | POSE_HIGHER_RESOLUTION_NET.PRETRAINED_LAYERS = ['*'] 17 | POSE_HIGHER_RESOLUTION_NET.STEM_INPLANES = 64 18 | POSE_HIGHER_RESOLUTION_NET.FINAL_CONV_KERNEL = 1 19 | 20 | POSE_HIGHER_RESOLUTION_NET.STAGE1 = CN() 21 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_MODULES = 1 22 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BRANCHES = 1 23 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_BLOCKS = [4] 24 | POSE_HIGHER_RESOLUTION_NET.STAGE1.NUM_CHANNELS = [64] 25 | POSE_HIGHER_RESOLUTION_NET.STAGE1.BLOCK = 'BOTTLENECK' 26 | POSE_HIGHER_RESOLUTION_NET.STAGE1.FUSE_METHOD = 'SUM' 27 | 28 | POSE_HIGHER_RESOLUTION_NET.STAGE2 = CN() 29 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_MODULES = 1 30 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BRANCHES = 2 31 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_BLOCKS = [4, 4] 32 | POSE_HIGHER_RESOLUTION_NET.STAGE2.NUM_CHANNELS = [24, 48] 33 | POSE_HIGHER_RESOLUTION_NET.STAGE2.BLOCK = 'BOTTLENECK' 34 | POSE_HIGHER_RESOLUTION_NET.STAGE2.FUSE_METHOD = 'SUM' 35 | 36 | POSE_HIGHER_RESOLUTION_NET.STAGE3 = CN() 37 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_MODULES = 1 38 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BRANCHES = 3 39 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_BLOCKS = [4, 4, 4] 40 | POSE_HIGHER_RESOLUTION_NET.STAGE3.NUM_CHANNELS = [24, 48, 92] 41 | POSE_HIGHER_RESOLUTION_NET.STAGE3.BLOCK = 'BOTTLENECK' 42 | POSE_HIGHER_RESOLUTION_NET.STAGE3.FUSE_METHOD = 'SUM' 43 | 44 | POSE_HIGHER_RESOLUTION_NET.STAGE4 = CN() 45 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_MODULES = 1 46 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BRANCHES = 4 47 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_BLOCKS = [4, 4, 4, 4] 48 | POSE_HIGHER_RESOLUTION_NET.STAGE4.NUM_CHANNELS = [24, 48, 92, 192] 49 | POSE_HIGHER_RESOLUTION_NET.STAGE4.BLOCK = 'BOTTLENECK' 50 | POSE_HIGHER_RESOLUTION_NET.STAGE4.FUSE_METHOD = 'SUM' 51 | 52 | POSE_HIGHER_RESOLUTION_NET.DECONV = CN() 53 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_DCONVS = 2 54 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_CHANNELS = [32, 32] 55 | POSE_HIGHER_RESOLUTION_NET.DECONV.NUM_BASIC_BLOCKS = 4 56 | POSE_HIGHER_RESOLUTION_NET.DECONV.KERNEL_SIZE = [2, 2] 57 | POSE_HIGHER_RESOLUTION_NET.DECONV.CAT_OUTPUT = [True, True] 58 | 59 | 60 | MODEL_EXTRAS = { 61 | 'pose_multi_resolution_net_v16': POSE_HIGHER_RESOLUTION_NET, 62 | } 63 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/COCOKeypoints.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import logging 13 | 14 | import numpy as np 15 | 16 | import pycocotools 17 | from .COCODataset import CocoDataset 18 | from .target_generators import HeatmapGenerator 19 | 20 | 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | class CocoKeypoints(CocoDataset): 25 | def __init__(self, 26 | cfg, 27 | dataset_name, 28 | remove_images_without_annotations, 29 | heatmap_generator, 30 | joints_generator, 31 | transforms=None): 32 | super().__init__(cfg.DATASET.ROOT, 33 | dataset_name, 34 | cfg.DATASET.DATA_FORMAT) 35 | 36 | if cfg.DATASET.WITH_CENTER: 37 | assert cfg.DATASET.NUM_JOINTS == 18, 'Number of joint with center for COCO is 18' 38 | else: 39 | assert cfg.DATASET.NUM_JOINTS == 17, 'Number of joint for COCO is 17' 40 | 41 | self.num_scales = self._init_check(heatmap_generator, joints_generator) 42 | 43 | self.num_joints = cfg.DATASET.NUM_JOINTS 44 | self.with_center = cfg.DATASET.WITH_CENTER 45 | self.num_joints_without_center = self.num_joints - 1 \ 46 | if self.with_center else self.num_joints 47 | self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA 48 | self.base_sigma = cfg.DATASET.BASE_SIGMA 49 | self.base_size = cfg.DATASET.BASE_SIZE 50 | self.int_sigma = cfg.DATASET.INT_SIGMA 51 | 52 | if remove_images_without_annotations: 53 | self.ids = [ 54 | img_id 55 | for img_id in self.ids 56 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 57 | ] 58 | 59 | self.transforms = transforms 60 | self.heatmap_generator = heatmap_generator 61 | self.joints_generator = joints_generator 62 | 63 | def __getitem__(self, idx): 64 | img, anno = super().__getitem__(idx) 65 | 66 | mask = self.get_mask(anno, idx) 67 | 68 | anno = [ 69 | obj for obj in anno 70 | if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 71 | ] 72 | 73 | # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint 74 | joints = self.get_joints(anno) 75 | 76 | mask_list = [mask.copy() for _ in range(self.num_scales)] 77 | joints_list = [joints.copy() for _ in range(self.num_scales)] 78 | target_list = list() 79 | 80 | if self.transforms: 81 | img, mask_list, joints_list = self.transforms( 82 | img, mask_list, joints_list 83 | ) 84 | 85 | for scale_id in range(self.num_scales): 86 | target_t = self.heatmap_generator[scale_id](joints_list[scale_id]) 87 | joints_t = self.joints_generator[scale_id](joints_list[scale_id]) 88 | 89 | target_list.append(target_t.astype(np.float32)) 90 | mask_list[scale_id] = mask_list[scale_id].astype(np.float32) 91 | joints_list[scale_id] = joints_t.astype(np.int32) 92 | 93 | return img, target_list, mask_list, joints_list 94 | 95 | def get_joints(self, anno): 96 | num_people = len(anno) 97 | 98 | if self.scale_aware_sigma: 99 | joints = np.zeros((num_people, self.num_joints, 4)) 100 | else: 101 | joints = np.zeros((num_people, self.num_joints, 3)) 102 | 103 | for i, obj in enumerate(anno): 104 | joints[i, :self.num_joints_without_center, :3] = \ 105 | np.array(obj['keypoints']).reshape([-1, 3]) 106 | if self.with_center: 107 | joints_sum = np.sum(joints[i, :-1, :2], axis=0) 108 | num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0]) 109 | if num_vis_joints > 0: 110 | joints[i, -1, :2] = joints_sum / num_vis_joints 111 | joints[i, -1, 2] = 1 112 | if self.scale_aware_sigma: 113 | # get person box 114 | box = obj['bbox'] 115 | size = max(box[2], box[3]) 116 | sigma = size / self.base_size * self.base_sigma 117 | if self.int_sigma: 118 | sigma = int(np.round(sigma + 0.5)) 119 | assert sigma > 0, sigma 120 | joints[i, :, 3] = sigma 121 | 122 | return joints 123 | 124 | def get_mask(self, anno, idx): 125 | coco = self.coco 126 | img_info = coco.loadImgs(self.ids[idx])[0] 127 | 128 | m = np.zeros((img_info['height'], img_info['width'])) 129 | 130 | for obj in anno: 131 | if obj['iscrowd']: 132 | rle = pycocotools.mask.frPyObjects( 133 | obj['segmentation'], img_info['height'], img_info['width']) 134 | m += pycocotools.mask.decode(rle) 135 | elif obj['num_keypoints'] == 0: 136 | rles = pycocotools.mask.frPyObjects( 137 | obj['segmentation'], img_info['height'], img_info['width']) 138 | for rle in rles: 139 | m += pycocotools.mask.decode(rle) 140 | 141 | return m < 0.5 142 | 143 | def _init_check(self, heatmap_generator, joints_generator): 144 | assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple' 145 | assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple' 146 | assert len(heatmap_generator) == len(joints_generator), \ 147 | 'heatmap_generator and joints_generator should have same length,'\ 148 | 'got {} vs {}.'.format( 149 | len(heatmap_generator), len(joints_generator) 150 | ) 151 | return len(heatmap_generator) 152 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/CrowdPoseKeypoints.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bowen Cheng (bcheng9@illinois.edu) and Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import logging 12 | 13 | import numpy as np 14 | 15 | import crowdposetools 16 | from .CrowdPoseDataset import CrowdPoseDataset 17 | from .target_generators import HeatmapGenerator 18 | 19 | 20 | logger = logging.getLogger(__name__) 21 | 22 | 23 | class CrowdPoseKeypoints(CrowdPoseDataset): 24 | def __init__(self, 25 | cfg, 26 | dataset_name, 27 | remove_images_without_annotations, 28 | heatmap_generator, 29 | joints_generator, 30 | transforms=None): 31 | super().__init__(cfg.DATASET.ROOT, 32 | dataset_name, 33 | cfg.DATASET.DATA_FORMAT) 34 | 35 | if cfg.DATASET.WITH_CENTER: 36 | assert cfg.DATASET.NUM_JOINTS == 15, 'Number of joint with center for CrowdPose is 15' 37 | else: 38 | assert cfg.DATASET.NUM_JOINTS == 14, 'Number of joint for CrowdPose is 14' 39 | 40 | self.num_scales = self._init_check(heatmap_generator, joints_generator) 41 | 42 | self.num_joints = cfg.DATASET.NUM_JOINTS 43 | self.with_center = cfg.DATASET.WITH_CENTER 44 | self.num_joints_without_center = self.num_joints - 1 \ 45 | if self.with_center else self.num_joints 46 | self.scale_aware_sigma = cfg.DATASET.SCALE_AWARE_SIGMA 47 | self.base_sigma = cfg.DATASET.BASE_SIGMA 48 | self.base_size = cfg.DATASET.BASE_SIZE 49 | self.int_sigma = cfg.DATASET.INT_SIGMA 50 | 51 | if remove_images_without_annotations: 52 | self.ids = [ 53 | img_id 54 | for img_id in self.ids 55 | if len(self.coco.getAnnIds(imgIds=img_id, iscrowd=None)) > 0 56 | ] 57 | 58 | self.transforms = transforms 59 | self.heatmap_generator = heatmap_generator 60 | self.joints_generator = joints_generator 61 | 62 | def __getitem__(self, idx): 63 | img, anno = super().__getitem__(idx) 64 | 65 | mask = self.get_mask(anno, idx) 66 | 67 | anno = [ 68 | obj for obj in anno 69 | if obj['iscrowd'] == 0 or obj['num_keypoints'] > 0 70 | ] 71 | 72 | # TODO(bowen): to generate scale-aware sigma, modify `get_joints` to associate a sigma to each joint 73 | joints = self.get_joints(anno) 74 | 75 | mask_list = [mask.copy() for _ in range(self.num_scales)] 76 | joints_list = [joints.copy() for _ in range(self.num_scales)] 77 | target_list = list() 78 | 79 | if self.transforms: 80 | img, mask_list, joints_list = self.transforms( 81 | img, mask_list, joints_list 82 | ) 83 | 84 | for scale_id in range(self.num_scales): 85 | target_t = self.heatmap_generator[scale_id](joints_list[scale_id]) 86 | joints_t = self.joints_generator[scale_id](joints_list[scale_id]) 87 | 88 | target_list.append(target_t.astype(np.float32)) 89 | mask_list[scale_id] = mask_list[scale_id].astype(np.float32) 90 | joints_list[scale_id] = joints_t.astype(np.int32) 91 | 92 | return img, target_list, mask_list, joints_list 93 | 94 | def get_joints(self, anno): 95 | num_people = len(anno) 96 | 97 | if self.scale_aware_sigma: 98 | joints = np.zeros((num_people, self.num_joints, 4)) 99 | else: 100 | joints = np.zeros((num_people, self.num_joints, 3)) 101 | 102 | for i, obj in enumerate(anno): 103 | joints[i, :self.num_joints_without_center, :3] = \ 104 | np.array(obj['keypoints']).reshape([-1, 3]) 105 | if self.with_center: 106 | joints_sum = np.sum(joints[i, :-1, :2], axis=0) 107 | num_vis_joints = len(np.nonzero(joints[i, :-1, 2])[0]) 108 | if num_vis_joints > 0: 109 | joints[i, -1, :2] = joints_sum / num_vis_joints 110 | joints[i, -1, 2] = 1 111 | if self.scale_aware_sigma: 112 | # get person box 113 | box = obj['bbox'] 114 | size = max(box[2], box[3]) 115 | sigma = size / self.base_size * self.base_sigma 116 | if self.int_sigma: 117 | sigma = int(np.round(sigma + 0.5)) 118 | assert sigma > 0, sigma 119 | joints[i, :, 3] = sigma 120 | 121 | return joints 122 | 123 | def get_mask(self, anno, idx): 124 | coco = self.coco 125 | img_info = coco.loadImgs(self.ids[idx])[0] 126 | 127 | m = np.zeros((img_info['height'], img_info['width'])) 128 | 129 | return m < 0.5 130 | 131 | def _init_check(self, heatmap_generator, joints_generator): 132 | assert isinstance(heatmap_generator, (list, tuple)), 'heatmap_generator should be a list or tuple' 133 | assert isinstance(joints_generator, (list, tuple)), 'joints_generator should be a list or tuple' 134 | assert len(heatmap_generator) == len(joints_generator), \ 135 | 'heatmap_generator and joints_generator should have same length,'\ 136 | 'got {} vs {}.'.format( 137 | len(heatmap_generator), len(joints_generator) 138 | ) 139 | return len(heatmap_generator) 140 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from .COCOKeypoints import CocoKeypoints as coco 8 | # from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose 9 | from .build import make_dataloader 10 | from .build import make_test_dataloader 11 | from .build import make_train_dataloader 12 | 13 | # dataset dependent configuration for visualization 14 | coco_part_labels = [ 15 | 'nose', 'eye_l', 'eye_r', 'ear_l', 'ear_r', 16 | 'sho_l', 'sho_r', 'elb_l', 'elb_r', 'wri_l', 'wri_r', 17 | 'hip_l', 'hip_r', 'kne_l', 'kne_r', 'ank_l', 'ank_r' 18 | ] 19 | coco_part_idx = { 20 | b: a for a, b in enumerate(coco_part_labels) 21 | } 22 | coco_part_orders = [ 23 | ('nose', 'eye_l'), ('eye_l', 'eye_r'), ('eye_r', 'nose'), 24 | ('eye_l', 'ear_l'), ('eye_r', 'ear_r'), ('ear_l', 'sho_l'), 25 | ('ear_r', 'sho_r'), ('sho_l', 'sho_r'), ('sho_l', 'hip_l'), 26 | ('sho_r', 'hip_r'), ('hip_l', 'hip_r'), ('sho_l', 'elb_l'), 27 | ('elb_l', 'wri_l'), ('sho_r', 'elb_r'), ('elb_r', 'wri_r'), 28 | ('hip_l', 'kne_l'), ('kne_l', 'ank_l'), ('hip_r', 'kne_r'), 29 | ('kne_r', 'ank_r') 30 | ] 31 | 32 | crowd_pose_part_labels = [ 33 | 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 34 | 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 35 | 'left_knee', 'right_knee', 'left_ankle', 'right_ankle', 36 | 'head', 'neck' 37 | ] 38 | crowd_pose_part_idx = { 39 | b: a for a, b in enumerate(crowd_pose_part_labels) 40 | } 41 | crowd_pose_part_orders = [ 42 | ('head', 'neck'), ('neck', 'left_shoulder'), ('neck', 'right_shoulder'), 43 | ('left_shoulder', 'right_shoulder'), ('left_shoulder', 'left_hip'), 44 | ('right_shoulder', 'right_hip'), ('left_hip', 'right_hip'), ('left_shoulder', 'left_elbow'), 45 | ('left_elbow', 'left_wrist'), ('right_shoulder', 'right_elbow'), ('right_elbow', 'right_wrist'), 46 | ('left_hip', 'left_knee'), ('left_knee', 'left_ankle'), ('right_hip', 'right_knee'), 47 | ('right_knee', 'right_ankle') 48 | ] 49 | 50 | VIS_CONFIG = { 51 | 'COCO': { 52 | 'part_labels': coco_part_labels, 53 | 'part_idx': coco_part_idx, 54 | 'part_orders': coco_part_orders 55 | }, 56 | 'CROWDPOSE': { 57 | 'part_labels': crowd_pose_part_labels, 58 | 'part_idx': crowd_pose_part_idx, 59 | 'part_orders': crowd_pose_part_orders 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/build.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import torch.utils.data 13 | 14 | from .COCODataset import CocoDataset as coco 15 | from .COCOKeypoints import CocoKeypoints as coco_kpt 16 | # # from .CrowdPoseDataset import CrowdPoseDataset as crowd_pose 17 | # from .CrowdPoseKeypoints import CrowdPoseKeypoints as crowd_pose_kpt 18 | from .transforms import build_transforms 19 | from .target_generators import HeatmapGenerator 20 | from .target_generators import ScaleAwareHeatmapGenerator 21 | from .target_generators import JointsGenerator 22 | # from .random_resolution_dataloader import RandomResolutionDataLoader 23 | 24 | 25 | def build_dataset(cfg, is_train): 26 | transforms = build_transforms(cfg, is_train) 27 | 28 | if cfg.DATASET.SCALE_AWARE_SIGMA: 29 | _HeatmapGenerator = ScaleAwareHeatmapGenerator 30 | else: 31 | _HeatmapGenerator = HeatmapGenerator 32 | 33 | heatmap_generator = [ 34 | _HeatmapGenerator( 35 | output_size, cfg.DATASET.NUM_JOINTS, cfg.DATASET.SIGMA 36 | ) for output_size in cfg.DATASET.OUTPUT_SIZE 37 | ] 38 | joints_generator = [ 39 | JointsGenerator( 40 | cfg.DATASET.MAX_NUM_PEOPLE, 41 | cfg.DATASET.NUM_JOINTS, 42 | output_size, 43 | cfg.MODEL.TAG_PER_JOINT 44 | ) for output_size in cfg.DATASET.OUTPUT_SIZE 45 | ] 46 | 47 | dataset_name = cfg.DATASET.TRAIN if is_train else cfg.DATASET.TEST 48 | 49 | dataset = eval(cfg.DATASET.DATASET)( 50 | cfg, 51 | dataset_name, 52 | is_train, 53 | heatmap_generator, 54 | joints_generator, 55 | transforms 56 | ) 57 | 58 | return dataset 59 | 60 | 61 | def make_dataloader(cfg, is_train=True, distributed=False): 62 | if is_train: 63 | images_per_gpu = cfg.TRAIN.IMAGES_PER_GPU 64 | shuffle = True 65 | else: 66 | images_per_gpu = cfg.TEST.IMAGES_PER_GPU 67 | shuffle = False 68 | images_per_batch = images_per_gpu * len(cfg.GPUS) 69 | 70 | dataset = build_dataset(cfg, is_train) 71 | 72 | if is_train and distributed: 73 | train_sampler = torch.utils.data.distributed.DistributedSampler( 74 | dataset 75 | ) 76 | shuffle = False 77 | else: 78 | train_sampler = None 79 | 80 | data_loader = torch.utils.data.DataLoader( 81 | # data_loader = RandomResolutionDataLoader( 82 | dataset, 83 | batch_size=images_per_batch, 84 | shuffle=shuffle, 85 | num_workers=cfg.WORKERS, 86 | pin_memory=cfg.PIN_MEMORY, 87 | sampler=train_sampler 88 | ) 89 | 90 | return data_loader 91 | 92 | def make_train_dataloader(cfg): 93 | is_train = True 94 | images_per_gpu = cfg.TRAIN.IMAGES_PER_GPU 95 | images_per_batch = images_per_gpu * len(cfg.GPUS) 96 | dataset = build_dataset(cfg, is_train) 97 | data_loader = torch.utils.data.DataLoader( 98 | dataset, 99 | batch_size=images_per_batch, 100 | shuffle=False, 101 | num_workers=cfg.WORKERS, 102 | pin_memory=False 103 | ) 104 | return data_loader, dataset 105 | 106 | 107 | def make_test_dataloader(cfg): 108 | transforms = None 109 | dataset = eval(cfg.DATASET.DATASET_TEST)( 110 | cfg.DATASET.ROOT, 111 | cfg.DATASET.TEST, 112 | cfg.DATASET.DATA_FORMAT, 113 | transforms 114 | ) 115 | 116 | data_loader = torch.utils.data.DataLoader( 117 | dataset, 118 | batch_size=1, 119 | shuffle=False, 120 | num_workers=0, 121 | pin_memory=False 122 | ) 123 | 124 | return data_loader, dataset 125 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/target_generators/__init__.py: -------------------------------------------------------------------------------- 1 | from .target_generators import HeatmapGenerator 2 | from .target_generators import ScaleAwareHeatmapGenerator 3 | from .target_generators import JointsGenerator 4 | 5 | __all__ = ['HeatmapGenerator', 'ScaleAwareHeatmapGenerator', 'JointsGenerator'] 6 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/target_generators/target_generators.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | 14 | 15 | class HeatmapGenerator(): 16 | def __init__(self, output_res, num_joints, sigma=-1): 17 | self.output_res = output_res 18 | self.num_joints = num_joints 19 | if sigma < 0: 20 | sigma = self.output_res/64 21 | self.sigma = sigma 22 | size = 6*sigma + 3 23 | x = np.arange(0, size, 1, float) 24 | y = x[:, np.newaxis] 25 | x0, y0 = 3*sigma + 1, 3*sigma + 1 26 | self.g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 27 | 28 | def __call__(self, joints): 29 | hms = np.zeros((self.num_joints, self.output_res, self.output_res), 30 | dtype=np.float32) 31 | sigma = self.sigma 32 | for p in joints: 33 | for idx, pt in enumerate(p): 34 | if pt[2] > 0: 35 | x, y = int(pt[0]), int(pt[1]) 36 | if x < 0 or y < 0 or \ 37 | x >= self.output_res or y >= self.output_res: 38 | continue 39 | 40 | ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1)) 41 | br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2)) 42 | 43 | c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0] 44 | a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1] 45 | 46 | cc, dd = max(0, ul[0]), min(br[0], self.output_res) 47 | aa, bb = max(0, ul[1]), min(br[1], self.output_res) 48 | hms[idx, aa:bb, cc:dd] = np.maximum( 49 | hms[idx, aa:bb, cc:dd], self.g[a:b, c:d]) 50 | return hms 51 | 52 | 53 | class ScaleAwareHeatmapGenerator(): 54 | def __init__(self, output_res, num_joints): 55 | self.output_res = output_res 56 | self.num_joints = num_joints 57 | 58 | def get_gaussian_kernel(self, sigma): 59 | size = 6*sigma + 3 60 | x = np.arange(0, size, 1, float) 61 | y = x[:, np.newaxis] 62 | x0, y0 = 3*sigma + 1, 3*sigma + 1 63 | g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2)) 64 | return g 65 | 66 | def __call__(self, joints): 67 | hms = np.zeros((self.num_joints, self.output_res, self.output_res), 68 | dtype=np.float32) 69 | for p in joints: 70 | sigma = p[0, 3] 71 | g = self.get_gaussian_kernel(sigma) 72 | for idx, pt in enumerate(p): 73 | if pt[2] > 0: 74 | x, y = int(pt[0]), int(pt[1]) 75 | if x < 0 or y < 0 or \ 76 | x >= self.output_res or y >= self.output_res: 77 | continue 78 | 79 | ul = int(np.round(x - 3 * sigma - 1)), int(np.round(y - 3 * sigma - 1)) 80 | br = int(np.round(x + 3 * sigma + 2)), int(np.round(y + 3 * sigma + 2)) 81 | 82 | c, d = max(0, -ul[0]), min(br[0], self.output_res) - ul[0] 83 | a, b = max(0, -ul[1]), min(br[1], self.output_res) - ul[1] 84 | 85 | cc, dd = max(0, ul[0]), min(br[0], self.output_res) 86 | aa, bb = max(0, ul[1]), min(br[1], self.output_res) 87 | hms[idx, aa:bb, cc:dd] = np.maximum( 88 | hms[idx, aa:bb, cc:dd], g[a:b, c:d]) 89 | return hms 90 | 91 | 92 | class JointsGenerator(): 93 | def __init__(self, max_num_people, num_joints, output_res, tag_per_joint): 94 | self.max_num_people = max_num_people 95 | self.num_joints = num_joints 96 | self.output_res = output_res 97 | self.tag_per_joint = tag_per_joint 98 | 99 | def __call__(self, joints): 100 | visible_nodes = np.zeros((self.max_num_people, self.num_joints, 2)) 101 | output_res = self.output_res 102 | for i in range(len(joints)): 103 | tot = 0 104 | for idx, pt in enumerate(joints[i]): 105 | x, y = int(pt[0]), int(pt[1]) 106 | if pt[2] > 0 and x >= 0 and y >= 0 \ 107 | and x < self.output_res and y < self.output_res: 108 | if self.tag_per_joint: 109 | visible_nodes[i][tot] = \ 110 | (idx * output_res**2 + y * output_res + x, 1) 111 | else: 112 | visible_nodes[i][tot] = \ 113 | (y * output_res + x, 1) 114 | tot += 1 115 | return visible_nodes 116 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .transforms import Compose 2 | from .transforms import RandomAffineTransform 3 | from .transforms import ToTensor 4 | from .transforms import Normalize 5 | from .transforms import RandomHorizontalFlip 6 | 7 | from .build import build_transforms 8 | from .build import FLIP_CONFIG 9 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/transforms/build.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | from . import transforms as T 13 | 14 | 15 | FLIP_CONFIG = { 16 | 'COCO': [ 17 | 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15 18 | ], 19 | 'COCO_WITH_CENTER': [ 20 | 0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15, 17 21 | ], 22 | 'CROWDPOSE': [ 23 | 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13 24 | ], 25 | 'CROWDPOSE_WITH_CENTER': [ 26 | 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 12, 13, 14 27 | ] 28 | } 29 | 30 | 31 | def build_transforms(cfg, is_train=True): 32 | assert is_train is True, 'Please only use build_transforms for training.' 33 | assert isinstance(cfg.DATASET.OUTPUT_SIZE, (list, tuple)), 'DATASET.OUTPUT_SIZE should be list or tuple' 34 | if is_train: 35 | max_rotation = cfg.DATASET.MAX_ROTATION 36 | min_scale = cfg.DATASET.MIN_SCALE 37 | max_scale = cfg.DATASET.MAX_SCALE 38 | max_translate = cfg.DATASET.MAX_TRANSLATE 39 | input_size = cfg.DATASET.INPUT_SIZE 40 | output_size = cfg.DATASET.OUTPUT_SIZE 41 | flip = cfg.DATASET.FLIP 42 | scale_type = cfg.DATASET.SCALE_TYPE 43 | else: 44 | scale_type = cfg.DATASET.SCALE_TYPE 45 | max_rotation = 0 46 | min_scale = 1 47 | max_scale = 1 48 | max_translate = 0 49 | input_size = 512 50 | output_size = [128] 51 | flip = 0 52 | 53 | # coco_flip_index = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 54 | # if cfg.DATASET.WITH_CENTER: 55 | # coco_flip_index.append(17) 56 | if 'coco' in cfg.DATASET.DATASET: 57 | dataset_name = 'COCO' 58 | elif 'crowd_pose' in cfg.DATASET.DATASET: 59 | dataset_name = 'CROWDPOSE' 60 | else: 61 | raise ValueError('Please implement flip_index for new dataset: %s.' % cfg.DATASET.DATASET) 62 | if cfg.DATASET.WITH_CENTER: 63 | coco_flip_index = FLIP_CONFIG[dataset_name + '_WITH_CENTER'] 64 | else: 65 | coco_flip_index = FLIP_CONFIG[dataset_name] 66 | 67 | transforms = T.Compose( 68 | [ 69 | T.RandomAffineTransform( 70 | input_size, 71 | output_size, 72 | max_rotation, 73 | min_scale, 74 | max_scale, 75 | scale_type, 76 | max_translate, 77 | scale_aware_sigma=cfg.DATASET.SCALE_AWARE_SIGMA 78 | ), 79 | T.RandomHorizontalFlip(coco_flip_index, output_size, flip), 80 | T.ToTensor(), 81 | T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 82 | ] 83 | ) 84 | 85 | return transforms 86 | -------------------------------------------------------------------------------- /lib/LitePose/lib/dataset/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # Modified by Bowen Cheng (bcheng9@illinois.edu) 6 | # ------------------------------------------------------------------------------ 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import random 13 | 14 | import cv2 15 | import numpy as np 16 | import torch 17 | import torchvision 18 | from torchvision.transforms import functional as F 19 | 20 | 21 | class Compose(object): 22 | def __init__(self, transforms): 23 | self.transforms = transforms 24 | 25 | def __call__(self, image, mask, joints): 26 | for t in self.transforms: 27 | image, mask, joints = t(image, mask, joints) 28 | return image, mask, joints 29 | 30 | def __repr__(self): 31 | format_string = self.__class__.__name__ + "(" 32 | for t in self.transforms: 33 | format_string += "\n" 34 | format_string += " {0}".format(t) 35 | format_string += "\n)" 36 | return format_string 37 | 38 | 39 | class ToTensor(object): 40 | def __call__(self, image, mask, joints): 41 | return F.to_tensor(image), mask, joints 42 | 43 | 44 | class Normalize(object): 45 | def __init__(self, mean, std): 46 | self.mean = mean 47 | self.std = std 48 | 49 | def __call__(self, image, mask, joints): 50 | image = F.normalize(image, mean=self.mean, std=self.std) 51 | return image, mask, joints 52 | 53 | 54 | class RandomHorizontalFlip(object): 55 | def __init__(self, flip_index, output_size, prob=0.5): 56 | self.flip_index = flip_index 57 | self.prob = prob 58 | self.output_size = output_size if isinstance(output_size, list) \ 59 | else [output_size] 60 | 61 | def __call__(self, image, mask, joints): 62 | assert isinstance(mask, list) 63 | assert isinstance(joints, list) 64 | assert len(mask) == len(joints) 65 | assert len(mask) == len(self.output_size) 66 | 67 | if random.random() < self.prob: 68 | image = image[:, ::-1] - np.zeros_like(image) 69 | for i, _output_size in enumerate(self.output_size): 70 | mask[i] = mask[i][:, ::-1] - np.zeros_like(mask[i]) 71 | joints[i] = joints[i][:, self.flip_index] 72 | joints[i][:, :, 0] = _output_size - joints[i][:, :, 0] - 1 73 | 74 | return image, mask, joints 75 | 76 | 77 | class RandomAffineTransform(object): 78 | def __init__(self, 79 | input_size, 80 | output_size, 81 | max_rotation, 82 | min_scale, 83 | max_scale, 84 | scale_type, 85 | max_translate, 86 | scale_aware_sigma=False): 87 | self.input_size = input_size 88 | self.output_size = output_size if isinstance(output_size, list) \ 89 | else [output_size] 90 | 91 | self.max_rotation = max_rotation 92 | self.min_scale = min_scale 93 | self.max_scale = max_scale 94 | self.scale_type = scale_type 95 | self.max_translate = max_translate 96 | self.scale_aware_sigma = scale_aware_sigma 97 | 98 | def _get_affine_matrix(self, center, scale, res, rot=0): 99 | # Generate transformation matrix 100 | h = 200 * scale 101 | t = np.zeros((3, 3)) 102 | t[0, 0] = float(res[1]) / h 103 | t[1, 1] = float(res[0]) / h 104 | t[0, 2] = res[1] * (-float(center[0]) / h + .5) 105 | t[1, 2] = res[0] * (-float(center[1]) / h + .5) 106 | t[2, 2] = 1 107 | if not rot == 0: 108 | rot = -rot # To match direction of rotation from cropping 109 | rot_mat = np.zeros((3, 3)) 110 | rot_rad = rot * np.pi / 180 111 | sn, cs = np.sin(rot_rad), np.cos(rot_rad) 112 | rot_mat[0, :2] = [cs, -sn] 113 | rot_mat[1, :2] = [sn, cs] 114 | rot_mat[2, 2] = 1 115 | # Need to rotate around center 116 | t_mat = np.eye(3) 117 | t_mat[0, 2] = -res[1]/2 118 | t_mat[1, 2] = -res[0]/2 119 | t_inv = t_mat.copy() 120 | t_inv[:2, 2] *= -1 121 | t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) 122 | return t 123 | 124 | def _affine_joints(self, joints, mat): 125 | joints = np.array(joints) 126 | shape = joints.shape 127 | joints = joints.reshape(-1, 2) 128 | return np.dot(np.concatenate( 129 | (joints, joints[:, 0:1]*0+1), axis=1), mat.T).reshape(shape) 130 | 131 | def __call__(self, image, mask, joints): 132 | assert isinstance(mask, list) 133 | assert isinstance(joints, list) 134 | assert len(mask) == len(joints) 135 | assert len(mask) == len(self.output_size) 136 | 137 | height, width = image.shape[:2] 138 | 139 | center = np.array((width/2, height/2)) 140 | if self.scale_type == 'long': 141 | scale = max(height, width)/200 142 | elif self.scale_type == 'short': 143 | scale = min(height, width)/200 144 | else: 145 | raise ValueError('Unkonw scale type: {}'.format(self.scale_type)) 146 | aug_scale = np.random.random() * (self.max_scale - self.min_scale) \ 147 | + self.min_scale 148 | scale *= aug_scale 149 | aug_rot = (np.random.random() * 2 - 1) * self.max_rotation 150 | 151 | if self.max_translate > 0: 152 | dx = np.random.randint( 153 | -self.max_translate*scale, self.max_translate*scale) 154 | dy = np.random.randint( 155 | -self.max_translate*scale, self.max_translate*scale) 156 | center[0] += dx 157 | center[1] += dy 158 | 159 | for i, _output_size in enumerate(self.output_size): 160 | mat_output = self._get_affine_matrix( 161 | center, scale, (_output_size, _output_size), aug_rot 162 | )[:2] 163 | mask[i] = cv2.warpAffine( 164 | (mask[i]*255).astype(np.uint8), mat_output, 165 | (_output_size, _output_size) 166 | ) / 255 167 | mask[i] = (mask[i] > 0.5).astype(np.float32) 168 | 169 | joints[i][:, :, 0:2] = self._affine_joints( 170 | joints[i][:, :, 0:2], mat_output 171 | ) 172 | if self.scale_aware_sigma: 173 | joints[i][:, :, 3] = joints[i][:, :, 3] / aug_scale 174 | 175 | mat_input = self._get_affine_matrix( 176 | center, scale, (self.input_size, self.input_size), aug_rot 177 | )[:2] 178 | image = cv2.warpAffine( 179 | image, mat_input, (self.input_size, self.input_size) 180 | ) 181 | 182 | return image, mask, joints 183 | -------------------------------------------------------------------------------- /lib/LitePose/lib/fp16_utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robincourant/jaws/8d83a6483a0c481e015d9f4587fef33f013edee9/lib/LitePose/lib/fp16_utils/__init__.py -------------------------------------------------------------------------------- /lib/LitePose/lib/models/__init__.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (Bin.Xiao@microsoft.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import models.pose_higher_hrnet 12 | import models.pose_mobilenet 13 | import models.pose_simplenet 14 | import models.pose_supermobilenet 15 | import models.pose_resnet 16 | import models.pose_superresnet 17 | import models.pose_efficient_hrnet 18 | -------------------------------------------------------------------------------- /lib/LitePose/lib/models/layers/efficient_blocks.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | 7 | def conv(in_channels, out_channels, kernel_size=3, padding=1, bn=True, dilation=1, stride=1, relu=True, bias=True): 8 | modules = [nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding, dilation, bias=bias)] 9 | if bn: 10 | modules.append(nn.BatchNorm2d(out_channels)) 11 | if relu: 12 | modules.append(nn.ReLU(inplace=True)) 13 | return nn.Sequential(*modules) 14 | 15 | def conv_bn(inp, oup, stride, use_batch_norm=True): 16 | #ReLU = nn.ReLU if onnx_compatible else nn.ReLU6 17 | 18 | if use_batch_norm: 19 | return nn.Sequential( 20 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 21 | nn.BatchNorm2d(oup), 22 | nn.ReLU(inplace=True), 23 | ) 24 | else: 25 | return nn.Sequential( 26 | nn.Conv2d(inp, oup, 3, stride, 1, bias=False), 27 | nn.ReLU(inplace=True), 28 | ) 29 | 30 | def conv1(in_channels,out_channels,stride): 31 | return nn.Sequential( 32 | nn.Conv2d(in_channels,out_channels,3,stride,1,bias=False), 33 | nn.BatchNorm2d(out_channels), 34 | nn.ReLU6(inplace=True), 35 | ) 36 | 37 | def conv_pw(in_channels, out_channels): 38 | return nn.Sequential( 39 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 40 | nn.BatchNorm2d(out_channels), 41 | nn.ReLU6(inplace=True), 42 | ) 43 | 44 | def conv_dw_no_bn(in_channels, out_channels, kernel_size=3, padding=1, stride=1, dilation=1): 45 | return nn.Sequential( 46 | nn.Conv2d(in_channels, in_channels, kernel_size, stride, padding, dilation=dilation, groups=in_channels, bias=False), 47 | nn.ELU(inplace=True), 48 | 49 | nn.Conv2d(in_channels, out_channels, 1, 1, 0, bias=False), 50 | nn.ELU(inplace=True), 51 | ) 52 | 53 | def conv_bn_act(in_, out_, kernel_size, 54 | stride=1, groups=1, bias=True, 55 | eps=1e-3, momentum=0.01): 56 | return nn.Sequential( 57 | SamePadConv2d(in_, out_, kernel_size, stride, groups=groups, bias=bias), 58 | nn.BatchNorm2d(out_, eps, momentum), 59 | Swish() 60 | ) 61 | 62 | 63 | class SamePadConv2d(nn.Conv2d): 64 | """ 65 | Conv with TF padding='same' 66 | https://github.com/pytorch/pytorch/issues/3867#issuecomment-349279036 67 | """ 68 | 69 | def __init__(self, in_channels, out_channels, kernel_size, stride=1, dilation=1, groups=1, bias=True, padding_mode="zeros"): 70 | super().__init__(in_channels, out_channels, kernel_size, stride, 0, dilation, groups, bias, padding_mode) 71 | 72 | def get_pad_odd(self, in_, weight, stride, dilation): 73 | effective_filter_size_rows = (weight - 1) * dilation + 1 74 | out_rows = (in_ + stride - 1) // stride 75 | padding_needed = max(0, (out_rows - 1) * stride + effective_filter_size_rows - in_) 76 | padding_rows = max(0, (out_rows - 1) * stride + (weight - 1) * dilation + 1 - in_) 77 | rows_odd = (padding_rows % 2 != 0) 78 | return padding_rows, rows_odd 79 | 80 | def forward(self, x): 81 | padding_rows, rows_odd = self.get_pad_odd(x.shape[2], self.weight.shape[2], self.stride[0], self.dilation[0]) 82 | padding_cols, cols_odd = self.get_pad_odd(x.shape[3], self.weight.shape[3], self.stride[1], self.dilation[1]) 83 | 84 | if rows_odd or cols_odd: 85 | x = F.pad(x, [0, int(cols_odd), 0, int(rows_odd)]) 86 | 87 | return F.conv2d(x, self.weight, self.bias, self.stride, 88 | padding=(padding_rows // 2, padding_cols // 2), 89 | dilation=self.dilation, groups=self.groups) 90 | 91 | 92 | class Swish(nn.Module): 93 | def forward(self, x): 94 | return x * torch.sigmoid(x) 95 | 96 | 97 | class Flatten(nn.Module): 98 | def forward(self, x): 99 | return x.view(x.shape[0], -1) 100 | 101 | 102 | class SEModule(nn.Module): 103 | def __init__(self, in_, squeeze_ch): 104 | super().__init__() 105 | self.se = nn.Sequential( 106 | nn.AdaptiveAvgPool2d(1), 107 | nn.Conv2d(in_, squeeze_ch, kernel_size=1, stride=1, padding=0, bias=True), 108 | Swish(), 109 | nn.Conv2d(squeeze_ch, in_, kernel_size=1, stride=1, padding=0, bias=True), 110 | ) 111 | 112 | def forward(self, x): 113 | return x * torch.sigmoid(self.se(x)) 114 | 115 | 116 | class DropConnect(nn.Module): 117 | def __init__(self, ratio): 118 | super().__init__() 119 | self.ratio = 1.0 - ratio 120 | 121 | def forward(self, x): 122 | if not self.training: 123 | return x 124 | 125 | random_tensor = self.ratio 126 | random_tensor += torch.rand([x.shape[0], 1, 1, 1], dtype=torch.float, device=x.device) 127 | random_tensor.requires_grad_(False) 128 | return x / self.ratio * random_tensor.floor() -------------------------------------------------------------------------------- /lib/LitePose/lib/models/layers/layers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | def _make_divisible(v, divisor, min_value=None): 9 | if min_value is None: 10 | min_value = divisor 11 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 12 | # Make sure that round down does not go down by more than 10%. 13 | if new_v < 0.9 * v: 14 | new_v += divisor 15 | return new_v 16 | 17 | 18 | class convbnrelu(nn.Sequential): 19 | def __init__(self, inp, oup, ker=3, stride=1, groups=1): 20 | super(convbnrelu, self).__init__( 21 | nn.Conv2d(inp, oup, ker, stride, ker // 2, groups=groups, bias=False), 22 | nn.BatchNorm2d(oup), 23 | nn.ReLU6(inplace=True) 24 | ) 25 | 26 | class Bottleneck(nn.Module): 27 | 28 | def __init__(self, inp, oup, s=1, k=3, r=4): 29 | super(Bottleneck, self).__init__() 30 | mid_dim = oup // r 31 | if inp == oup and s == 1: 32 | self.residual = True 33 | else: 34 | self.residual = False 35 | self.conv1 = nn.Conv2d(inp, mid_dim, kernel_size=1, bias=False) 36 | self.bn1 = nn.BatchNorm2d(mid_dim) 37 | self.conv2 = nn.Conv2d(mid_dim, mid_dim, kernel_size=k, stride=s, padding=k//2, bias=False) 38 | self.bn2 = nn.BatchNorm2d(mid_dim) 39 | self.conv3 = nn.Conv2d(mid_dim, oup, kernel_size=1, bias=False) 40 | self.bn3 = nn.BatchNorm2d(oup) 41 | self.relu = nn.ReLU(inplace=True) 42 | 43 | def forward(self, x): 44 | residual = x 45 | out = self.conv1(x) 46 | out = self.bn1(out) 47 | out = self.relu(out) 48 | out = self.conv2(out) 49 | out = self.bn2(out) 50 | out = self.relu(out) 51 | out = self.conv3(out) 52 | out = self.bn3(out) 53 | if self.residual == True: 54 | out += residual 55 | out = self.relu(out) 56 | return out 57 | 58 | class UpConv(nn.Module): 59 | def __init__(self, inp, oup, k=3): 60 | super(UpConv, self).__init__() 61 | self.conv = nn.Conv2d(inp, oup, k, 1, k // 2, bias=False) 62 | def forward(self, x): 63 | x = F.interpolate(x, scale_factor=2) 64 | x = self.conv(x) 65 | return x 66 | 67 | class FusedMBConv(nn.Module): 68 | 69 | def __init__(self, inp, oup, s=1, k=3, r=4): 70 | super(FusedMBConv, self).__init__() 71 | feature_dim = _make_divisible(round(inp * r), 8) 72 | self.inv = nn.Sequential( 73 | nn.Conv2d(inp, feature_dim, k, s, k // 2, bias=False), 74 | nn.BatchNorm2d(feature_dim), 75 | nn.ReLU6(inplace = True) 76 | ) 77 | self.point_conv = nn.Sequential( 78 | nn.Conv2d(feature_dim, oup, 1, 1, 0, bias=False), 79 | nn.BatchNorm2d(oup) 80 | ) 81 | self.use_residual_connection = s == 1 and inp == oup 82 | 83 | def forward(self, x): 84 | out = self.inv(x) 85 | out = self.point_conv(out) 86 | if self.use_residual_connection: 87 | out += x 88 | return out 89 | 90 | class InvBottleneck(nn.Module): 91 | 92 | def __init__(self, inplanes, planes, stride=1, ker=3, exp=6): 93 | super(InvBottleneck, self).__init__() 94 | feature_dim = _make_divisible(round(inplanes * exp), 8) 95 | self.inv = nn.Sequential( 96 | nn.Conv2d(inplanes, feature_dim, 1, 1, 0, bias=False), 97 | nn.BatchNorm2d(feature_dim), 98 | nn.ReLU6(inplace = True) 99 | ) 100 | self.depth_conv = nn.Sequential( 101 | nn.Conv2d(feature_dim, feature_dim, ker, stride, ker // 2, groups=feature_dim, bias=False), 102 | nn.BatchNorm2d(feature_dim), 103 | nn.ReLU6(inplace = True) 104 | ) 105 | self.point_conv = nn.Sequential( 106 | nn.Conv2d(feature_dim, planes, 1, 1, 0, bias=False), 107 | nn.BatchNorm2d(planes) 108 | ) 109 | self.stride = stride 110 | self.use_residual_connection = stride == 1 and inplanes == planes 111 | 112 | def forward(self, x): 113 | out = self.inv(x) 114 | out = self.depth_conv(out) 115 | out = self.point_conv(out) 116 | if self.use_residual_connection: 117 | out += x 118 | return out 119 | 120 | class SepConv2d(nn.Module): 121 | def __init__(self, inp, oup, ker=3, stride=1): 122 | super(SepConv2d, self).__init__() 123 | conv = [ 124 | nn.Conv2d(inp, inp, ker, stride, ker // 2, groups=inp, bias=False), 125 | nn.BatchNorm2d(inp), 126 | nn.ReLU(inplace=True), 127 | nn.Conv2d(inp, oup, 1, 1, 0, bias=False), 128 | ] 129 | self.conv = nn.Sequential(*conv) 130 | 131 | def forward(self, x): 132 | output = self.conv(x) 133 | return output -------------------------------------------------------------------------------- /lib/LitePose/lib/models/pose_resnet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import random 6 | import time 7 | from models.layers.layers import Bottleneck, convbnrelu, UpConv, FusedMBConv 8 | 9 | def rand(c): 10 | return random.randint(0, c - 1) 11 | 12 | def _make_divisible(v, divisor, min_value=None): 13 | if min_value is None: 14 | min_value = divisor 15 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 16 | # Make sure that round down does not go down by more than 10%. 17 | if new_v < 0.9 * v: 18 | new_v += divisor 19 | return new_v 20 | 21 | class LitePose(nn.Module): 22 | def __init__(self, cfg, width_mult=1.0, round_nearest=8): 23 | super(LitePose, self).__init__() 24 | input_channel = 16 25 | backbone_setting = [ 26 | # r, k, c, n, s 27 | [4, 7, 16, 4, 2], 28 | [4, 7, 32, 6, 2], 29 | [4, 5, 48, 8, 2], 30 | [4, 3, 80, 8, 1], 31 | ] 32 | # building first layer 33 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 34 | self.first = nn.Sequential( 35 | convbnrelu(3, 32, ker=7, stride=2), 36 | convbnrelu(32, input_channel, ker=7, stride=1) 37 | ) 38 | self.channel = [input_channel] 39 | self.stage = [] 40 | for id_stage in range(len(backbone_setting)): 41 | r, k, c, n, s = backbone_setting[id_stage] 42 | c = _make_divisible(c * width_mult, round_nearest) 43 | layer = [] 44 | for id_block in range(n): 45 | stride = s if id_block == 0 else 1 46 | layer.append(FusedMBConv(input_channel, c, s=stride, k=k, r=r)) 47 | input_channel = c 48 | layer = nn.Sequential(*layer) 49 | self.stage.append(layer) 50 | self.channel.append(c) 51 | self.stage = nn.ModuleList(self.stage) 52 | extra = cfg.MODEL.EXTRA 53 | self.filters = extra['NUM_DECONV_FILTERS'] 54 | self.inplanes = self.channel[-1] 55 | self.deconv_refined, self.deconv_raw, self.deconv_bnrelu = self._make_deconv_layers( 56 | extra.NUM_DECONV_LAYERS, 57 | self.filters, 58 | extra.NUM_DECONV_KERNELS, 59 | ) 60 | self.final_refined, self.final_raw, self.final_channel = self._make_final_layers(cfg, self.filters) 61 | self.num_deconv_layers = extra.NUM_DECONV_LAYERS 62 | self.loss_config = cfg.LOSS 63 | 64 | def _get_deconv_cfg(self, deconv_kernel): 65 | if deconv_kernel == 4: 66 | padding = 1 67 | output_padding = 0 68 | elif deconv_kernel == 3: 69 | padding = 1 70 | output_padding = 1 71 | elif deconv_kernel == 2: 72 | padding = 0 73 | output_padding = 0 74 | 75 | return deconv_kernel, padding, output_padding 76 | 77 | def _make_final_layers(self, cfg, num_filters): 78 | dim_tag = cfg.MODEL.NUM_JOINTS if cfg.MODEL.TAG_PER_JOINT else 1 79 | extra = cfg.MODEL.EXTRA 80 | final_raw= [] 81 | final_refined = [] 82 | final_channel = [] 83 | for i in range(1, extra.NUM_DECONV_LAYERS): 84 | # input_channels = num_filters[i] + self.channel[-i-3] 85 | oup_joint = cfg.MODEL.NUM_JOINTS if cfg.LOSS.WITH_HEATMAPS_LOSS[i-1] else 0 86 | oup_tag = dim_tag if cfg.LOSS.WITH_AE_LOSS[i-1] else 0 87 | final_refined.append(nn.Conv2d(num_filters[i], oup_joint + oup_tag, 3, 1, 1, bias=True)) 88 | final_raw.append(nn.Conv2d(self.channel[-i-3], oup_joint + oup_tag, 3, 1, 1, bias=True)) 89 | final_channel.append(oup_joint + oup_tag) 90 | 91 | return nn.ModuleList(final_refined), nn.ModuleList(final_raw), final_channel 92 | 93 | def _make_deconv_layers(self, num_layers, num_filters, num_kernels): 94 | deconv_refined = [] 95 | deconv_raw = [] 96 | deconv_bnrelu = [] 97 | for i in range(num_layers): 98 | kernel, padding, output_padding = \ 99 | self._get_deconv_cfg(num_kernels[i]) 100 | planes = num_filters[i] 101 | # inplanes = self.inplanes + self.channel[-i-2] 102 | layers = [] 103 | deconv_refined.append(UpConv(self.inplanes, planes, k=kernel)) 104 | deconv_raw.append(UpConv(self.channel[-i-2], planes, k=kernel)) 105 | layers.append(nn.BatchNorm2d(planes)) 106 | layers.append(nn.ReLU(inplace=True)) 107 | self.inplanes = planes 108 | deconv_bnrelu.append(nn.Sequential(*layers)) 109 | 110 | return nn.ModuleList(deconv_refined), nn.ModuleList(deconv_raw), nn.ModuleList(deconv_bnrelu) 111 | 112 | def forward(self, x): 113 | x = self.first(x) 114 | x_list = [x] 115 | for i in range(len(self.stage)): 116 | tmp = self.stage[i](x_list[-1]) 117 | x_list.append(tmp) 118 | final_outputs = [] 119 | input_refined = x_list[-1] 120 | input_raw = x_list[-2] 121 | for i in range(self.num_deconv_layers): 122 | next_input_refined = self.deconv_refined[i](input_refined) 123 | next_input_raw = self.deconv_raw[i](input_raw) 124 | input_refined= self.deconv_bnrelu[i](next_input_refined + next_input_raw) 125 | input_raw = x_list[-i-3] 126 | if i > 0: 127 | final_refined = self.final_refined[i-1](input_refined) 128 | final_raw = self.final_raw[i-1](input_raw) 129 | final_outputs.append(final_refined + final_raw) 130 | 131 | return final_outputs 132 | 133 | def get_pose_net(cfg, is_train=False, cfg_arch=None): 134 | model = LitePose(cfg) 135 | if is_train and cfg.MODEL.INIT_WEIGHTS: 136 | print(cfg.MODEL.PRETRAINED) 137 | if os.path.isfile(cfg.MODEL.PRETRAINED): 138 | print("load pre-train model") 139 | need_init_state_dict = {} 140 | state_dict = torch.load(cfg.MODEL.PRETRAINED, map_location=torch.device('cpu')) 141 | for key, value in state_dict.items(): 142 | if 'deconv' in key: 143 | continue 144 | if 'final' in key: 145 | continue 146 | need_init_state_dict[key] = value 147 | try: 148 | model.load_state_dict(need_init_state_dict, strict=False) 149 | except: 150 | print("Error load!") 151 | return model -------------------------------------------------------------------------------- /lib/LitePose/lib/models/pose_simplenet.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import random 6 | from models.layers.layers import InvBottleneck, convbnrelu, SepConv2d 7 | 8 | 9 | def rand(c): 10 | return random.randint(0, c - 1) 11 | 12 | 13 | def _make_divisible(v, divisor, min_value=None): 14 | if min_value is None: 15 | min_value = divisor 16 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 17 | # Make sure that round down does not go down by more than 10%. 18 | if new_v < 0.9 * v: 19 | new_v += divisor 20 | return new_v 21 | 22 | 23 | class LitePose(nn.Module): 24 | def __init__(self, cfg, width_mult=1.0, round_nearest=8, cfg_arch=None): 25 | super(LitePose, self).__init__() 26 | # input_channel = 24 27 | # inverted_residual_setting = [ 28 | # # t, c, n, s 29 | # [6, 32, 6, 2], 30 | # [6, 64, 8, 2], 31 | # [6, 96, 10, 2], 32 | # [6, 160, 10, 1], 33 | # ] 34 | backbone_setting = cfg_arch["backbone_setting"] 35 | input_channel = cfg_arch["input_channel"] 36 | # building first layer 37 | input_channel = _make_divisible(input_channel * width_mult, round_nearest) 38 | self.first = nn.Sequential( 39 | convbnrelu(3, 32, ker=3, stride=2), 40 | convbnrelu(32, 32, ker=3, stride=1, groups=32), 41 | nn.Conv2d(32, input_channel, 1, 1, 0, bias=False), 42 | nn.BatchNorm2d(input_channel), 43 | ) 44 | self.channel = [input_channel] 45 | # building inverted residual blocks 46 | self.stage = [] 47 | for id_stage in range(len(backbone_setting)): 48 | n = backbone_setting[id_stage]["num_blocks"] 49 | s = backbone_setting[id_stage]["stride"] 50 | c = backbone_setting[id_stage]["channel"] 51 | c = _make_divisible(c * width_mult, round_nearest) 52 | block_setting = backbone_setting[id_stage]["block_setting"] 53 | layer = [] 54 | for id_block in range(n): 55 | t, k = block_setting[id_block] 56 | stride = s if id_block == 0 else 1 57 | layer.append(InvBottleneck(input_channel, c, stride, ker=k, exp=t)) 58 | input_channel = c 59 | layer = nn.Sequential(*layer) 60 | self.stage.append(layer) 61 | self.channel.append(c) 62 | self.stage = nn.ModuleList(self.stage) 63 | extra = cfg.MODEL.EXTRA 64 | self.filters = cfg_arch["deconv_setting"] 65 | self.inplanes = self.channel[-1] 66 | self.deconv_refined, self.deconv_bnrelu = self._make_deconv_layers( 67 | extra.NUM_DECONV_LAYERS, 68 | self.filters, 69 | extra.NUM_DECONV_KERNELS, 70 | ) 71 | self.final_refined, self.final_channel = self._make_final_layers( 72 | cfg, self.filters 73 | ) 74 | self.num_deconv_layers = extra.NUM_DECONV_LAYERS 75 | self.loss_config = cfg.LOSS 76 | 77 | def _get_deconv_cfg(self, deconv_kernel): 78 | if deconv_kernel == 4: 79 | padding = 1 80 | output_padding = 0 81 | elif deconv_kernel == 3: 82 | padding = 1 83 | output_padding = 1 84 | elif deconv_kernel == 2: 85 | padding = 0 86 | output_padding = 0 87 | 88 | return deconv_kernel, padding, output_padding 89 | 90 | def _make_final_layers(self, cfg, num_filters): 91 | dim_tag = cfg.MODEL.NUM_JOINTS if cfg.MODEL.TAG_PER_JOINT else 1 92 | extra = cfg.MODEL.EXTRA 93 | final_refined = [] 94 | final_channel = [] 95 | for i in range(1, extra.NUM_DECONV_LAYERS): 96 | # input_channels = num_filters[i] + self.channel[-i-3] 97 | oup_joint = ( 98 | cfg.MODEL.NUM_JOINTS if cfg.LOSS.WITH_HEATMAPS_LOSS[i - 1] else 0 99 | ) 100 | oup_tag = dim_tag if cfg.LOSS.WITH_AE_LOSS[i - 1] else 0 101 | final_refined.append(SepConv2d(num_filters[i], oup_joint + oup_tag, ker=5)) 102 | final_channel.append(oup_joint + oup_tag) 103 | 104 | return nn.ModuleList(final_refined), final_channel 105 | 106 | def _make_deconv_layers(self, num_layers, num_filters, num_kernels): 107 | deconv_refined = [] 108 | deconv_bnrelu = [] 109 | for i in range(num_layers): 110 | kernel, padding, output_padding = self._get_deconv_cfg(num_kernels[i]) 111 | planes = num_filters[i] 112 | layers = [] 113 | deconv_refined.append( 114 | nn.ConvTranspose2d( 115 | in_channels=self.inplanes, 116 | out_channels=planes, 117 | kernel_size=kernel, 118 | stride=2, 119 | padding=padding, 120 | output_padding=output_padding, 121 | bias=False, 122 | ) 123 | ) 124 | layers.append(nn.BatchNorm2d(planes)) 125 | layers.append(nn.ReLU(inplace=True)) 126 | self.inplanes = planes 127 | deconv_bnrelu.append(nn.Sequential(*layers)) 128 | 129 | return nn.ModuleList(deconv_refined), nn.ModuleList(deconv_bnrelu) 130 | 131 | def forward(self, x): 132 | x = self.first(x) 133 | x_list = [x] 134 | for i in range(len(self.stage)): 135 | tmp = self.stage[i](x_list[-1]) 136 | x_list.append(tmp) 137 | final_outputs = [] 138 | input_refined = x_list[-1] 139 | for i in range(self.num_deconv_layers): 140 | next_input_refined = self.deconv_refined[i](input_refined) 141 | input_refined = self.deconv_bnrelu[i](next_input_refined) 142 | if i > 0: 143 | final_refined = self.final_refined[i - 1](input_refined) 144 | final_outputs.append(final_refined) 145 | 146 | return final_outputs 147 | 148 | 149 | def get_pose_net(cfg, is_train=False, cfg_arch=None): 150 | model = LitePose(cfg, cfg_arch=cfg_arch) 151 | if is_train and cfg.MODEL.INIT_WEIGHTS: 152 | print(cfg.MODEL.PRETRAINED) 153 | if os.path.isfile(cfg.MODEL.PRETRAINED): 154 | print("load pre-train model") 155 | need_init_state_dict = {} 156 | state_dict = torch.load(cfg.MODEL.PRETRAINED) 157 | for key, value in state_dict.items(): 158 | if "deconv" in key: 159 | continue 160 | if "final" in key: 161 | continue 162 | need_init_state_dict[key] = value 163 | model.load_state_dict(need_init_state_dict, strict=False) 164 | return model 165 | -------------------------------------------------------------------------------- /lib/LitePose/lib/utils/utils.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import logging 13 | import time 14 | from collections import namedtuple 15 | from pathlib import Path 16 | from ptflops import get_model_complexity_info 17 | 18 | import torch 19 | import torch.optim as optim 20 | import torch.nn as nn 21 | 22 | 23 | 24 | def setup_logger(final_output_dir, rank, phase): 25 | time_str = time.strftime('%Y-%m-%d-%H-%M') 26 | log_file = '{}_{}_rank{}.log'.format(phase, time_str, rank) 27 | final_log_file = os.path.join(final_output_dir, log_file) 28 | head = '%(asctime)-15s %(message)s' 29 | # logging.basicConfig(format=head) 30 | logging.basicConfig(filename=str(final_log_file), 31 | format=head) 32 | logger = logging.getLogger() 33 | logger.setLevel(logging.INFO) 34 | console = logging.StreamHandler() 35 | logging.getLogger('').addHandler(console) 36 | 37 | return logger, time_str 38 | 39 | 40 | def create_logger(cfg, cfg_name, phase='train'): 41 | root_output_dir = Path(cfg.OUTPUT_DIR) 42 | # set up logger 43 | if not root_output_dir.exists() and cfg.RANK == 0: 44 | print('=> creating {}'.format(root_output_dir)) 45 | root_output_dir.mkdir() 46 | else: 47 | while not root_output_dir.exists(): 48 | print('=> wait for {} created'.format(root_output_dir)) 49 | time.sleep(30) 50 | 51 | dataset = cfg.DATASET.DATASET 52 | dataset = dataset.replace(':', '_') 53 | model = cfg.MODEL.NAME 54 | cfg_name = os.path.basename(cfg_name).split('.')[0] 55 | 56 | final_output_dir = root_output_dir / dataset / model / cfg_name 57 | 58 | if cfg.RANK == 0: 59 | print('=> creating {}'.format(final_output_dir)) 60 | final_output_dir.mkdir(parents=True, exist_ok=True) 61 | else: 62 | while not final_output_dir.exists(): 63 | print('=> wait for {} created'.format(final_output_dir)) 64 | time.sleep(5) 65 | 66 | logger, time_str = setup_logger(final_output_dir, cfg.RANK, phase) 67 | 68 | tensorboard_log_dir = Path(cfg.LOG_DIR) / dataset / model / \ 69 | (cfg_name + '_' + time_str) 70 | 71 | print('=> creating {}'.format(tensorboard_log_dir)) 72 | tensorboard_log_dir.mkdir(parents=True, exist_ok=True) 73 | 74 | return logger, str(final_output_dir), str(tensorboard_log_dir) 75 | 76 | 77 | def get_optimizer(cfg, model): 78 | optimizer = None 79 | if cfg.TRAIN.OPTIMIZER == 'sgd': 80 | optimizer = optim.SGD( 81 | model.parameters(), 82 | lr=cfg.TRAIN.LR, 83 | momentum=cfg.TRAIN.MOMENTUM, 84 | weight_decay=cfg.TRAIN.WD, 85 | nesterov=cfg.TRAIN.NESTEROV 86 | ) 87 | elif cfg.TRAIN.OPTIMIZER == 'adam': 88 | optimizer = optim.Adam( 89 | model.parameters(), 90 | lr=cfg.TRAIN.LR 91 | ) 92 | 93 | return optimizer 94 | 95 | 96 | def save_checkpoint(states, is_best, output_dir, 97 | filename='checkpoint.pth.tar'): 98 | torch.save(states, os.path.join(output_dir, filename)) 99 | 100 | if is_best and 'state_dict' in states: 101 | torch.save( 102 | states['best_state_dict'], 103 | os.path.join(output_dir, 'model_best.pth.tar') 104 | ) 105 | 106 | 107 | def get_model_summary(input_size, model, *input_tensors, item_length=26, verbose=False): 108 | """ 109 | :param model: 110 | :param input_tensors: 111 | :param item_length: 112 | :return: 113 | """ 114 | 115 | summary = [] 116 | 117 | ModuleDetails = namedtuple( 118 | "Layer", ["name", "input_size", "output_size", "num_parameters", "multiply_adds"]) 119 | hooks = [] 120 | layer_instances = {} 121 | 122 | model.eval() 123 | 124 | space_len = item_length 125 | 126 | details = '' 127 | if verbose: 128 | details = "Model Summary" + \ 129 | os.linesep + \ 130 | "Name{}Input Size{}Output Size{}Parameters{}Multiply Adds (Flops){}".format( 131 | ' ' * (space_len - len("Name")), 132 | ' ' * (space_len - len("Input Size")), 133 | ' ' * (space_len - len("Output Size")), 134 | ' ' * (space_len - len("Parameters")), 135 | ' ' * (space_len - len("Multiply Adds (Flops)"))) \ 136 | + os.linesep + '-' * space_len * 5 + os.linesep 137 | 138 | params_sum = 0 139 | flops_sum = 0 140 | for layer in summary: 141 | params_sum += layer.num_parameters 142 | if layer.multiply_adds != "Not Available": 143 | flops_sum += layer.multiply_adds 144 | if verbose: 145 | details += "{}{}{}{}{}{}{}{}{}{}".format( 146 | layer.name, 147 | ' ' * (space_len - len(layer.name)), 148 | layer.input_size, 149 | ' ' * (space_len - len(str(layer.input_size))), 150 | layer.output_size, 151 | ' ' * (space_len - len(str(layer.output_size))), 152 | layer.num_parameters, 153 | ' ' * (space_len - len(str(layer.num_parameters))), 154 | layer.multiply_adds, 155 | ' ' * (space_len - len(str(layer.multiply_adds)))) \ 156 | + os.linesep + '-' * space_len * 5 + os.linesep 157 | 158 | macs, params = get_model_complexity_info(model, (3,input_size,input_size), print_per_layer_stat=False, as_strings=True, verbose=True) 159 | 160 | details += os.linesep + "Total Parameters: {}".format(params) + os.linesep + '-' * space_len * 5 + os.linesep 161 | details += "Total Multiply Adds: {}".format(macs) + os.linesep + '-' * space_len * 5 + os.linesep 162 | details += "Number of Layers" + os.linesep 163 | for layer in layer_instances: 164 | details += "{} : {} layers ".format(layer, layer_instances[layer]) 165 | 166 | return details 167 | 168 | 169 | class AverageMeter(object): 170 | """Computes and stores the average and current value""" 171 | def __init__(self): 172 | self.reset() 173 | 174 | def reset(self): 175 | self.val = 0 176 | self.avg = 0 177 | self.sum = 0 178 | self.count = 0 179 | 180 | def update(self, val, n=1): 181 | self.val = val 182 | self.sum += val * n 183 | self.count += n 184 | self.avg = self.sum / self.count if self.count != 0 else 0 185 | -------------------------------------------------------------------------------- /lib/LitePose/lib/utils/zipreader.py: -------------------------------------------------------------------------------- 1 | # ------------------------------------------------------------------------------ 2 | # Copyright (c) Microsoft 3 | # Licensed under the MIT License. 4 | # Written by Bin Xiao (leoxiaobin@gmail.com) 5 | # ------------------------------------------------------------------------------ 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import os 12 | import zipfile 13 | import xml.etree.ElementTree as ET 14 | 15 | import cv2 16 | import numpy as np 17 | 18 | _im_zfile = [] 19 | _xml_path_zip = [] 20 | _xml_zfile = [] 21 | 22 | 23 | def imread(filename, flags=cv2.IMREAD_COLOR): 24 | global _im_zfile 25 | path = filename 26 | pos_at = path.index('@') 27 | if pos_at == -1: 28 | print("character '@' is not found from the given path '%s'"%(path)) 29 | assert 0 30 | path_zip = path[0: pos_at] 31 | path_img = path[pos_at + 1:] 32 | if not os.path.isfile(path_zip): 33 | print("zip file '%s' is not found"%(path_zip)) 34 | assert 0 35 | for i in range(len(_im_zfile)): 36 | if _im_zfile[i]['path'] == path_zip: 37 | data = _im_zfile[i]['zipfile'].read(path_img) 38 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 39 | 40 | _im_zfile.append({ 41 | 'path': path_zip, 42 | 'zipfile': zipfile.ZipFile(path_zip, 'r') 43 | }) 44 | data = _im_zfile[-1]['zipfile'].read(path_img) 45 | 46 | return cv2.imdecode(np.frombuffer(data, np.uint8), flags) 47 | 48 | 49 | def xmlread(filename): 50 | global _xml_path_zip 51 | global _xml_zfile 52 | path = filename 53 | pos_at = path.index('@') 54 | if pos_at == -1: 55 | print("character '@' is not found from the given path '%s'"%(path)) 56 | assert 0 57 | path_zip = path[0: pos_at] 58 | path_xml = path[pos_at + 2:] 59 | if not os.path.isfile(path_zip): 60 | print("zip file '%s' is not found"%(path_zip)) 61 | assert 0 62 | for i in xrange(len(_xml_path_zip)): 63 | if _xml_path_zip[i] == path_zip: 64 | data = _xml_zfile[i].open(path_xml) 65 | return ET.fromstring(data.read()) 66 | _xml_path_zip.append(path_zip) 67 | print("read new xml file '%s'"%(path_zip)) 68 | _xml_zfile.append(zipfile.ZipFile(path_zip, 'r')) 69 | data = _xml_zfile[-1].open(path_xml) 70 | return ET.fromstring(data.read()) 71 | -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/prune-L.json: -------------------------------------------------------------------------------- 1 | {"img_size": 512, "input_channel": 24, "deconv_setting": [64, 48, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 64, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 96, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 160, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/prune-M.json: -------------------------------------------------------------------------------- 1 | {"img_size": 512, "input_channel": 24, "deconv_setting": [48, 40, 24], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 24, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 72, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 120, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/prune-S.json: -------------------------------------------------------------------------------- 1 | {"img_size": 512, "input_channel": 16, "deconv_setting": [32, 24, 16], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 16, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 80, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/search-L.json: -------------------------------------------------------------------------------- 1 | {"img_size": 512, "input_channel": 24, "deconv_setting": [64, 40, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 24, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 64, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 96, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 160, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/search-M.json: -------------------------------------------------------------------------------- 1 | {"img_size": 448, "input_channel": 16, "deconv_setting": [64, 40, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 24, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 72, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 120, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/search-S.json: -------------------------------------------------------------------------------- 1 | {"img_size": 448, "input_channel": 16, "deconv_setting": [32, 24, 32], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 16, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 120, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /lib/LitePose/mobile_configs/search-XS.json: -------------------------------------------------------------------------------- 1 | {"img_size": 256, "input_channel": 16, "deconv_setting": [16, 24, 24], "backbone_setting": [{"num_blocks": 6, "stride": 2, "channel": 16, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 8, "stride": 2, "channel": 32, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 2, "channel": 48, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}, {"num_blocks": 10, "stride": 1, "channel": 80, "block_setting": [[6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7], [6, 7]]}]} -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | hydra-core 2 | pytorch_lightning 3 | wandb 4 | imageio 5 | yacs 6 | pycocotools 7 | json_tricks 8 | ptflops 9 | kornia 10 | # git+https://github.com/pvigier/perlin-numpy -------------------------------------------------------------------------------- /run_demo.py: -------------------------------------------------------------------------------- 1 | import subprocess 2 | import os 3 | 4 | import hydra 5 | from omegaconf import DictConfig 6 | 7 | 8 | def get_losses(loss_type): 9 | if loss_type == "pixel": 10 | return ["true", "false", "false"] 11 | if loss_type == "pose": 12 | return ["false", "true", "false"] 13 | if loss_type == "flow": 14 | return ["false", "false", "true"] 15 | if loss_type == "pose_flow": 16 | return ["false", "true", "true"] 17 | 18 | 19 | def run_demo( 20 | data_dir: str, 21 | model: str, 22 | num_steps: int, 23 | seed: int, 24 | xpname: str, 25 | datamodule: str, 26 | alpha_losses: float, 27 | alpha_two_strokes: float, 28 | init_idx: int, 29 | init_focal_search: bool, 30 | two_strokes: float, 31 | flow_loss_type: str, 32 | guidance_type: str, 33 | learning_rate: float, 34 | num_epochs: int, 35 | num_mixed_grad: int, 36 | target_dir_name: str, 37 | loss_type: str, 38 | ): 39 | """ 40 | Run dolly zoom demo with the given parameters. 41 | 42 | :param data_dir: path to the data directory. 43 | :param model: model name. 44 | :param num_steps: number of steps sampled per ray. 45 | :param seed: random seed. 46 | :param xpname: experiment name. 47 | :param datamodule: datamodule name. 48 | :param alpha_losses: weight of the losses. 49 | :param alpha_two_strokes: weight of the two strokes (spatial and temporal). 50 | :param init_idx: index of the initial camera. 51 | :param init_focal_search: whether to search for the initial focal length. 52 | :param two_strokes: whether to uncouple spatial and temporal optimization. 53 | :param flow_loss_type: type of the flow metric (end-to-end, angular). 54 | :param guidance_type: type of the guidance (guidance map, inerf, random). 55 | :param learning_rate: learning rate. 56 | :param num_epochs: number of epochs. 57 | :param num_mixed_grad: number of guidance points. 58 | :param target_dir_name: name of the target directory. 59 | :param loss_type: type of the loss (pixel, pose, flow, pose_flow). 60 | """ 61 | losses = get_losses(loss_type) 62 | command = ( 63 | f"python {os.path.dirname(os.path.abspath(__file__))}/jaws/run.py \ 64 | --config-name batch_jaws \ 65 | run_type=jaws \ 66 | data_dir={data_dir} \ 67 | dynamic=true \ 68 | group_name=demo \ 69 | model={model} \ 70 | num_steps={num_steps} \ 71 | seed={seed} \ 72 | xp_name={xpname} \ 73 | datamodule={datamodule} \ 74 | datamodule.alpha_losses={alpha_losses} \ 75 | datamodule.alpha_two_strokes={alpha_two_strokes} \ 76 | datamodule.blur_pred=false \ 77 | datamodule.init_cam_idx={init_idx} \ 78 | datamodule.only_init_focal_search={init_focal_search} \ 79 | datamodule.two_strokes={two_strokes} \ 80 | jaws.diff_focal=true \ 81 | jaws.diff_temporal=false \ 82 | jaws.flow_loss={losses[2]} \ 83 | jaws.flow_loss_type={flow_loss_type} \ 84 | jaws.grad_norm=true \ 85 | jaws.guidance_type={guidance_type} \ 86 | jaws.learning_rate={learning_rate} \ 87 | jaws.num_epochs={num_epochs} \ 88 | jaws.num_sample_grad={num_mixed_grad} \ 89 | jaws.pixel_loss={losses[0]} \ 90 | jaws.pose_loss={losses[1]} \ 91 | jaws.target_dir={target_dir_name}", 92 | ) 93 | subprocess.call(command, shell=True) 94 | 95 | 96 | @hydra.main( 97 | config_path="./jaws/configs", 98 | config_name="demo_jaws.yaml", 99 | version_base="1.2", 100 | ) 101 | def main(config: DictConfig): 102 | run_demo( 103 | data_dir=config.data_dir, 104 | xpname=config.xp_name, 105 | model=config.model, 106 | target_dir_name=config.target_dir_video, 107 | datamodule=config.datamodule, 108 | num_epochs=config.num_epochs, 109 | num_mixed_grad=config.num_mixed_grad, 110 | num_steps=config.num_steps, 111 | learning_rate=config.lr, 112 | seed=config.seed, 113 | loss_type=config.loss_type, 114 | init_idx=config.init_cam_idx_same, 115 | init_focal_search=config.init_focal_search, 116 | two_strokes=config.two_strokes, 117 | flow_loss_type=config.flow_loss_type, 118 | alpha_losses=config.alpha_losses, 119 | alpha_two_strokes=config.alpha_two_strokes, 120 | guidance_type=config.guidance_type, 121 | ) 122 | 123 | 124 | if __name__ == "__main__": 125 | main() 126 | -------------------------------------------------------------------------------- /utils/data_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import trimesh 4 | 5 | 6 | def nerf_matrix_to_ngp(pose, scale=0.33): 7 | # for the fox dataset, 0.33 scales camera radius to ~ 2 8 | new_pose = np.array( 9 | [ 10 | [pose[1, 0], -pose[1, 1], -pose[1, 2], pose[1, 3] * scale], 11 | [pose[2, 0], -pose[2, 1], -pose[2, 2], pose[2, 3] * scale], 12 | [pose[0, 0], -pose[0, 1], -pose[0, 2], pose[0, 3] * scale], 13 | [0, 0, 0, 1], 14 | ], 15 | dtype=np.float32, 16 | ) 17 | return new_pose 18 | 19 | 20 | def visualize_poses(poses, size=0.1): 21 | # poses: [B, 4, 4] 22 | 23 | axes = trimesh.creation.axis(axis_length=4) 24 | sphere = trimesh.creation.icosphere(radius=1) 25 | objects = [axes, sphere] 26 | 27 | for pose in poses: 28 | # a camera is visualized with 8 line segments. 29 | pos = pose[:3, 3] 30 | a = pos + size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2] 31 | b = pos - size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2] 32 | c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2] 33 | d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2] 34 | 35 | segs = np.array( 36 | [ 37 | [pos, a], 38 | [pos, b], 39 | [pos, c], 40 | [pos, d], 41 | [a, b], 42 | [b, c], 43 | [c, d], 44 | [d, a], 45 | ] 46 | ) 47 | segs = trimesh.load_path(segs) 48 | objects.append(segs) 49 | 50 | trimesh.Scene(objects).show() 51 | 52 | 53 | def rand_poses( 54 | size, 55 | radius=1, 56 | theta_range=[np.pi / 3, 2 * np.pi / 3], 57 | phi_range=[0, 2 * np.pi], 58 | ): 59 | """generate random poses from an orbit camera 60 | Args: 61 | size: batch size of generated poses. 62 | radius: camera radius 63 | theta_range: [min, max], should be in [0, \pi] 64 | phi_range: [min, max], should be in [0, 2\pi] 65 | Return: 66 | poses: [size, 4, 4] 67 | """ 68 | 69 | def normalize(vectors): 70 | return vectors / (torch.norm(vectors, dim=-1, keepdim=True) + 1e-10) 71 | 72 | thetas = ( 73 | torch.rand(size) * (theta_range[1] - theta_range[0]) + theta_range[0] 74 | ) 75 | phis = torch.rand(size) * (phi_range[1] - phi_range[0]) + phi_range[0] 76 | 77 | centers = torch.stack( 78 | [ 79 | radius * torch.sin(thetas) * torch.sin(phis), 80 | radius * torch.cos(thetas), 81 | radius * torch.sin(thetas) * torch.cos(phis), 82 | ], 83 | dim=-1, 84 | ) # [B, 3] 85 | 86 | # lookat 87 | forward_vector = -normalize(centers) 88 | up_vector = ( 89 | torch.FloatTensor([0, -1, 0]).unsqueeze(0).repeat(size, 1) 90 | ) # confused at the coordinate system... 91 | right_vector = normalize(torch.cross(forward_vector, up_vector, dim=-1)) 92 | up_vector = normalize(torch.cross(right_vector, forward_vector, dim=-1)) 93 | 94 | poses = torch.eye(4, dtype=torch.float).unsqueeze(0).repeat(size, 1, 1) 95 | poses[:, :3, :3] = torch.stack( 96 | (right_vector, up_vector, forward_vector), dim=-1 97 | ) 98 | poses[:, :3, 3] = centers 99 | 100 | return poses 101 | -------------------------------------------------------------------------------- /utils/file_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import pickle 4 | import subprocess 5 | from typing import Any 6 | 7 | import pandas as pd 8 | import torch 9 | 10 | 11 | def create_dir(dir_name: str): 12 | """Create a directory if it does not exist yet.""" 13 | if not osp.exists(dir_name): 14 | os.makedirs(dir_name) 15 | 16 | 17 | def move_files(source_path: str, destpath: str): 18 | """Move files from `source_path` to `dest_path`.""" 19 | subprocess.call(["mv", source_path, destpath]) 20 | 21 | 22 | def load_pickle(pickle_path: str) -> Any: 23 | """Load a pickle file.""" 24 | with open(pickle_path, "rb") as f: 25 | data = pickle.load(f) 26 | return data 27 | 28 | 29 | def save_pickle(data: Any, pickle_path: str): 30 | """Save data in a pickle file.""" 31 | with open(pickle_path, "wb") as f: 32 | pickle.dump(data, f, protocol=4) 33 | 34 | 35 | def load_txt(txt_path: str): 36 | """Load a txt file.""" 37 | with open(txt_path, "r") as f: 38 | data = f.read() 39 | return data 40 | 41 | 42 | def save_txt(data: str, txt_path: str): 43 | """Save data in a txt file.""" 44 | with open(txt_path, "w") as f: 45 | f.write(data) 46 | 47 | 48 | def load_pth(pth_path: str) -> Any: 49 | """Load a pth (PyTorch) file.""" 50 | data = torch.load(pth_path) 51 | return data 52 | 53 | 54 | def save_pth(data: Any, pth_path: str): 55 | """Save data in a pth (PyTorch) file.""" 56 | torch.save(data, pth_path) 57 | 58 | 59 | def load_csv(csv_path: str, header: Any = None) -> pd.DataFrame: 60 | """Load a csv file.""" 61 | try: 62 | data = pd.read_csv(csv_path, header=header) 63 | except pd.errors.EmptyDataError: 64 | data = pd.DataFrame() 65 | return data 66 | 67 | 68 | def save_csv(data: Any, csv_path: str): 69 | """Save data in a csv file.""" 70 | pd.DataFrame(data).to_csv(csv_path, header=False, index=False) 71 | -------------------------------------------------------------------------------- /utils/misc_utils.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, Dict 2 | 3 | import numpy as np 4 | from omegaconf import DictConfig, OmegaConf 5 | from pytorch_lightning.utilities import rank_zero_only 6 | import rich.tree 7 | import rich.syntax 8 | import cv2 9 | import os.path as osp 10 | import imageio 11 | import trimesh 12 | import torch 13 | 14 | from utils.file_utils import save_pickle 15 | 16 | 17 | @rank_zero_only 18 | def print_config( 19 | config: DictConfig, 20 | fields: Sequence[str] = ( 21 | "compnode", 22 | "model", 23 | "datamodule", 24 | "jaws", 25 | "xp_name", 26 | "seed", 27 | ), 28 | resolve: bool = True, 29 | ) -> None: 30 | """ 31 | Adapted from: https://github.com/ashleve/lightning-hydra-template. 32 | Prints content of DictConfig using Rich library and its tree structure. 33 | 34 | :param config: configuration composed by Hydra. 35 | :param fields: determines which main fields from config will be printed and 36 | in what order. 37 | :param resolve: whether to resolve reference fields of DictConfig. 38 | """ 39 | style = "dim" 40 | tree = rich.tree.Tree("CONFIG", style=style, guide_style=style) 41 | 42 | for field in fields: 43 | branch = tree.add(field, style=style, guide_style=style) 44 | 45 | config_section = config.get(field) 46 | branch_content = str(config_section) 47 | if isinstance(config_section, DictConfig): 48 | branch_content = OmegaConf.to_yaml(config_section, resolve=resolve) 49 | 50 | branch.add(rich.syntax.Syntax(branch_content, "yaml")) 51 | 52 | rich.print(tree) 53 | 54 | with open("config_tree.log", "w") as fp: 55 | rich.print(tree, file=fp) 56 | 57 | 58 | def divide(a: np.array, b: np.array) -> np.array: 59 | """Perform array element-wise division, 0 when dividing by 0.""" 60 | res = np.divide( 61 | a, 62 | b, 63 | out=np.zeros_like(a, dtype=np.float64), 64 | where=(b != 0), 65 | ) 66 | return res 67 | 68 | 69 | def save_video( 70 | chunk, filename, over_write=False, is_resize=False, size=(224, 224) 71 | ): 72 | # for idx, chunk in enumerate(lchunks): 73 | if osp.exists(filename) and not over_write: 74 | return 75 | out = cv2.VideoWriter( 76 | filename + ".mp4", cv2.VideoWriter_fourcc(*"mp4v"), 5, (224, 224) 77 | ) 78 | for frm in chunk: 79 | # print("np.shape frm: ", np.shape(frm)) 80 | if np.shape(frm) != (size[0], size[1], 3): 81 | frm = cv2.resize(frm, size) 82 | out.write(frm) 83 | out.release() 84 | 85 | 86 | def save_nerf_img(_frm, filename): 87 | # to8b 88 | frm = (_frm.detach().cpu().numpy() * 255).astype(np.uint8) 89 | imageio.imwrite(filename, frm) 90 | 91 | 92 | def save_gif(imgs, filename, fps=5, is_resize=False, size=(224, 224)): 93 | final_imgs = [] 94 | for _frm in imgs: 95 | # to8b 96 | frm = (_frm.detach().cpu().numpy() * 255).astype(np.uint8) 97 | if np.shape(frm) != (size[0], size[1], 3) and is_resize: 98 | final_imgs.append(cv2.resize(frm, size)) 99 | else: 100 | final_imgs.append(frm) 101 | imageio.mimwrite(filename, final_imgs, fps=fps) 102 | 103 | 104 | def save_traj(poses: torch.Tensor, filename: str, saving_format="pkl"): 105 | if saving_format == "pkl": 106 | dict_save = {} 107 | for idx, pose in enumerate(poses): 108 | dict_save[idx] = pose.cpu().numpy() 109 | save_pickle(dict_save, filename + "." + saving_format) 110 | 111 | 112 | def draw_axis(img, R, t, K): 113 | # unit is mm 114 | rotV, _ = cv2.Rodrigues(R) 115 | points = np.float32( 116 | [[100, 0, 0], [0, 100, 0], [0, 0, 100], [0, 0, 0]] 117 | ).reshape(-1, 3) 118 | axisPoints, _ = cv2.projectPoints(points, rotV, t, K, (0, 0, 0, 0)) 119 | img = cv2.line( 120 | img, 121 | tuple(axisPoints[3].ravel()), 122 | tuple(axisPoints[0].ravel()), 123 | (255, 0, 0), 124 | 3, 125 | ) 126 | img = cv2.line( 127 | img, 128 | tuple(axisPoints[3].ravel()), 129 | tuple(axisPoints[1].ravel()), 130 | (0, 255, 0), 131 | 3, 132 | ) 133 | img = cv2.line( 134 | img, 135 | tuple(axisPoints[3].ravel()), 136 | tuple(axisPoints[2].ravel()), 137 | (0, 0, 255), 138 | 3, 139 | ) 140 | return img 141 | 142 | 143 | def render_cam_pose(pose, intrinsics, render_pose, img): 144 | inv_render_pose = np.linalg.inv(render_pose) 145 | pose_2_render = pose @ inv_render_pose 146 | t = pose_2_render[:3, 3] 147 | R = pose_2_render[:3, :3] 148 | 149 | K = np.zeros((3, 3)) 150 | K[0, 0] = intrinsics[0] # fx 151 | K[1, 1] = intrinsics[1] # fy 152 | K[0, 2] = intrinsics[2] # cx 153 | K[1, 2] = intrinsics[3] # cy 154 | K[2, 2] = 1 155 | 156 | draw_axis(img, R, t, K) 157 | 158 | # uvz = K@pose_2_render 159 | # x = uvz[0]/uvz[2] 160 | # y = uvz[1]/uvz[2] 161 | 162 | 163 | def visualize_poses(poses, file_name, size=0.1): 164 | # poses: [B, 4, 4] 165 | 166 | axes = trimesh.creation.axis(axis_length=4) 167 | # sphere = trimesh.creation.icosphere(radius=1) 168 | objects = [axes] 169 | 170 | for pose in poses: 171 | # a camera is visualized with 8 line segments. 172 | pos = pose[:3, 3] 173 | a = pos + size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2] 174 | b = pos - size * pose[:3, 0] + size * pose[:3, 1] + size * pose[:3, 2] 175 | c = pos - size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2] 176 | d = pos + size * pose[:3, 0] - size * pose[:3, 1] + size * pose[:3, 2] 177 | 178 | segs = np.array( 179 | [ 180 | [pos, a], 181 | [pos, b], 182 | [pos, c], 183 | [pos, d], 184 | [a, b], 185 | [b, c], 186 | [c, d], 187 | [d, a], 188 | ] 189 | ) 190 | segs = trimesh.load_path(segs) 191 | objects.append(segs) 192 | scene = trimesh.Scene(objects) 193 | png = scene.save_image(resolution=[800, 800], visible=True) 194 | with open(file_name, "wb") as f: 195 | f.write(png) 196 | f.close() 197 | 198 | 199 | def cfg2dict(cfg: DictConfig) -> Dict: 200 | """ 201 | Recursively convert OmegaConf to vanilla dict 202 | :param cfg: 203 | :return: 204 | """ 205 | cfg_dict = {} 206 | for k, v in cfg.items(): 207 | if type(v) == DictConfig: 208 | cfg_dict[k] = cfg2dict(v) 209 | else: 210 | cfg_dict[k] = v 211 | return cfg_dict 212 | --------------------------------------------------------------------------------