├── .DS_Store ├── .gitignore ├── .idea ├── LifelongReID.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml ├── other.xml ├── vcs.xml └── workspace.xml ├── LICENSE ├── README.md ├── apex ├── .nojekyll ├── LICENSE ├── README.md ├── apex │ ├── RNN │ │ ├── README.md │ │ ├── RNNBackend.py │ │ ├── __init__.py │ │ ├── cells.py │ │ └── models.py │ ├── __init__.py │ ├── amp │ │ ├── README.md │ │ ├── __init__.py │ │ ├── __version__.py │ │ ├── _amp_state.py │ │ ├── _initialize.py │ │ ├── _process_optimizer.py │ │ ├── amp.py │ │ ├── compat.py │ │ ├── frontend.py │ │ ├── handle.py │ │ ├── lists │ │ │ ├── __init__.py │ │ │ ├── functional_overrides.py │ │ │ ├── tensor_overrides.py │ │ │ └── torch_overrides.py │ │ ├── opt.py │ │ ├── rnn_compat.py │ │ ├── scaler.py │ │ ├── utils.py │ │ └── wrap.py │ ├── contrib │ │ ├── __init__.py │ │ ├── csrc │ │ │ ├── groupbn │ │ │ │ ├── batch_norm.cu │ │ │ │ ├── batch_norm.h │ │ │ │ ├── batch_norm_add_relu.cu │ │ │ │ ├── batch_norm_add_relu.h │ │ │ │ ├── cuda_utils.h │ │ │ │ ├── interface.cpp │ │ │ │ ├── ipc.cu │ │ │ │ └── nhwc_batch_norm_kernel.h │ │ │ ├── layer_norm │ │ │ │ ├── ln_api.cpp │ │ │ │ ├── ln_bwd_semi_cuda_kernel.cu │ │ │ │ ├── ln_fwd_cuda_kernel.cu │ │ │ │ ├── ln_kernel_traits.h │ │ │ │ └── utils.cuh │ │ │ ├── multihead_attn │ │ │ │ ├── additive_masked_softmax_dropout.cpp │ │ │ │ ├── additive_masked_softmax_dropout_cuda.cu │ │ │ │ ├── dropout.h │ │ │ │ ├── encdec_multihead_attn.cpp │ │ │ │ ├── encdec_multihead_attn_cuda.cu │ │ │ │ ├── encdec_multihead_attn_norm_add.cpp │ │ │ │ ├── encdec_multihead_attn_norm_add_cuda.cu │ │ │ │ ├── layer_norm.h │ │ │ │ ├── masked_softmax_dropout.cpp │ │ │ │ ├── masked_softmax_dropout_cuda.cu │ │ │ │ ├── philox.h │ │ │ │ ├── self_multihead_attn.cpp │ │ │ │ ├── self_multihead_attn_bias.cpp │ │ │ │ ├── self_multihead_attn_bias_additive_mask.cpp │ │ │ │ ├── self_multihead_attn_bias_additive_mask_cuda.cu │ │ │ │ ├── self_multihead_attn_bias_cuda.cu │ │ │ │ ├── self_multihead_attn_cuda.cu │ │ │ │ ├── self_multihead_attn_norm_add.cpp │ │ │ │ ├── self_multihead_attn_norm_add_cuda.cu │ │ │ │ ├── softmax.h │ │ │ │ └── strided_batched_gemm.h │ │ │ ├── optimizers │ │ │ │ ├── fused_adam_cuda.cpp │ │ │ │ ├── fused_adam_cuda_kernel.cu │ │ │ │ ├── fused_lamb_cuda.cpp │ │ │ │ ├── fused_lamb_cuda_kernel.cu │ │ │ │ ├── multi_tensor_distopt_adam.cpp │ │ │ │ ├── multi_tensor_distopt_adam_kernel.cu │ │ │ │ ├── multi_tensor_distopt_lamb.cpp │ │ │ │ └── multi_tensor_distopt_lamb_kernel.cu │ │ │ └── xentropy │ │ │ │ ├── interface.cpp │ │ │ │ └── xentropy_kernel.cu │ │ ├── examples │ │ │ └── multihead_attn │ │ │ │ ├── func_test_multihead_attn.py │ │ │ │ └── perf_test_multihead_attn.py │ │ ├── groupbn │ │ │ ├── __init__.py │ │ │ └── batch_norm.py │ │ ├── layer_norm │ │ │ ├── __init__.py │ │ │ └── layer_norm.py │ │ ├── multihead_attn │ │ │ ├── MHA_bwd.png │ │ │ ├── MHA_fwd.png │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── encdec_multihead_attn.py │ │ │ ├── encdec_multihead_attn_func.py │ │ │ ├── fast_encdec_multihead_attn_func.py │ │ │ ├── fast_encdec_multihead_attn_norm_add_func.py │ │ │ ├── fast_self_multihead_attn_func.py │ │ │ ├── fast_self_multihead_attn_norm_add_func.py │ │ │ ├── mask_softmax_dropout_func.py │ │ │ ├── self_multihead_attn.py │ │ │ └── self_multihead_attn_func.py │ │ ├── optimizers │ │ │ ├── __init__.py │ │ │ ├── distributed_fused_adam.py │ │ │ ├── distributed_fused_adam_v2.py │ │ │ ├── distributed_fused_adam_v3.py │ │ │ ├── distributed_fused_lamb.py │ │ │ ├── fp16_optimizer.py │ │ │ ├── fused_adam.py │ │ │ ├── fused_lamb.py │ │ │ └── fused_sgd.py │ │ ├── sparsity │ │ │ ├── README.md │ │ │ ├── __init__.py │ │ │ ├── asp.py │ │ │ ├── sparse_masklib.py │ │ │ └── test │ │ │ │ ├── checkpointing_test_part1.py │ │ │ │ ├── checkpointing_test_part2.py │ │ │ │ ├── checkpointing_test_reference.py │ │ │ │ └── toy_problem.py │ │ ├── test │ │ │ ├── layer_norm │ │ │ │ └── test_fast_layer_norm.py │ │ │ ├── multihead_attn │ │ │ │ ├── test_encdec_multihead_attn.py │ │ │ │ ├── test_encdec_multihead_attn_norm_add.py │ │ │ │ ├── test_fast_self_multihead_attn_bias.py │ │ │ │ ├── test_mha_fused_softmax.py │ │ │ │ ├── test_self_multihead_attn.py │ │ │ │ └── test_self_multihead_attn_norm_add.py │ │ │ └── test_label_smoothing.py │ │ └── xentropy │ │ │ ├── __init__.py │ │ │ └── softmax_xentropy.py │ ├── fp16_utils │ │ ├── README.md │ │ ├── __init__.py │ │ ├── fp16_optimizer.py │ │ ├── fp16util.py │ │ └── loss_scaler.py │ ├── mlp │ │ ├── __init__.py │ │ └── mlp.py │ ├── multi_tensor_apply │ │ ├── __init__.py │ │ └── multi_tensor_apply.py │ ├── normalization │ │ ├── __init__.py │ │ └── fused_layer_norm.py │ ├── optimizers │ │ ├── __init__.py │ │ ├── fused_adagrad.py │ │ ├── fused_adam.py │ │ ├── fused_lamb.py │ │ ├── fused_novograd.py │ │ └── fused_sgd.py │ ├── parallel │ │ ├── LARC.py │ │ ├── README.md │ │ ├── __init__.py │ │ ├── distributed.py │ │ ├── multiproc.py │ │ ├── optimized_sync_batchnorm.py │ │ ├── optimized_sync_batchnorm_kernel.py │ │ ├── sync_batchnorm.py │ │ └── sync_batchnorm_kernel.py │ ├── pyprof │ │ ├── FAQs.md │ │ ├── README.md │ │ ├── __init__.py │ │ ├── examples │ │ │ ├── .gitignore │ │ │ ├── apex │ │ │ │ ├── README.md │ │ │ │ ├── fused_adam.py │ │ │ │ ├── fused_layer_norm.py │ │ │ │ └── test.sh │ │ │ ├── custom_func_module │ │ │ │ ├── README.md │ │ │ │ ├── custom_function.py │ │ │ │ ├── custom_module.py │ │ │ │ └── test.sh │ │ │ ├── imagenet │ │ │ │ ├── imagenet.py │ │ │ │ └── test.sh │ │ │ ├── jit │ │ │ │ ├── README.md │ │ │ │ ├── jit_script_function.py │ │ │ │ ├── jit_script_method.py │ │ │ │ ├── jit_trace_function.py │ │ │ │ ├── jit_trace_method.py │ │ │ │ └── test.sh │ │ │ ├── lenet.py │ │ │ ├── operators.py │ │ │ ├── simple.py │ │ │ └── user_annotation │ │ │ │ ├── README.md │ │ │ │ ├── resnet.py │ │ │ │ └── test.sh │ │ ├── nvtx │ │ │ ├── __init__.py │ │ │ └── nvmarker.py │ │ ├── parse │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── db.py │ │ │ ├── kernel.py │ │ │ ├── nvvp.py │ │ │ └── parse.py │ │ └── prof │ │ │ ├── __init__.py │ │ │ ├── __main__.py │ │ │ ├── activation.py │ │ │ ├── base.py │ │ │ ├── blas.py │ │ │ ├── conv.py │ │ │ ├── convert.py │ │ │ ├── data.py │ │ │ ├── dropout.py │ │ │ ├── embedding.py │ │ │ ├── index_slice_join_mutate.py │ │ │ ├── linear.py │ │ │ ├── loss.py │ │ │ ├── misc.py │ │ │ ├── normalization.py │ │ │ ├── optim.py │ │ │ ├── output.py │ │ │ ├── pointwise.py │ │ │ ├── pooling.py │ │ │ ├── prof.py │ │ │ ├── randomSample.py │ │ │ ├── recurrentCell.py │ │ │ ├── reduction.py │ │ │ ├── softmax.py │ │ │ ├── usage.py │ │ │ └── utility.py │ └── reparameterization │ │ ├── README.md │ │ ├── __init__.py │ │ ├── reparameterization.py │ │ └── weight_norm.py ├── csrc │ ├── amp_C_frontend.cpp │ ├── compat.h │ ├── flatten_unflatten.cpp │ ├── layer_norm_cuda.cpp │ ├── layer_norm_cuda_kernel.cu │ ├── mlp.cpp │ ├── mlp_cuda.cu │ ├── multi_tensor_adagrad.cu │ ├── multi_tensor_adam.cu │ ├── multi_tensor_apply.cuh │ ├── multi_tensor_axpby_kernel.cu │ ├── multi_tensor_l2norm_kernel.cu │ ├── multi_tensor_lamb.cu │ ├── multi_tensor_lamb_stage_1.cu │ ├── multi_tensor_lamb_stage_2.cu │ ├── multi_tensor_novograd.cu │ ├── multi_tensor_scale_kernel.cu │ ├── multi_tensor_sgd_kernel.cu │ ├── syncbn.cpp │ ├── type_shim.h │ └── welford.cu ├── docs │ ├── Makefile │ └── source │ │ ├── _static │ │ ├── css │ │ │ └── pytorch_theme.css │ │ └── img │ │ │ └── nv-pytorch2.png │ │ ├── _templates │ │ └── layout.html │ │ ├── advanced.rst │ │ ├── amp.rst │ │ ├── conf.py │ │ ├── fp16_utils.rst │ │ ├── index.rst │ │ ├── layernorm.rst │ │ ├── optimizers.rst │ │ └── parallel.rst ├── examples │ ├── README.md │ ├── dcgan │ │ ├── README.md │ │ └── main_amp.py │ ├── docker │ │ ├── Dockerfile │ │ └── README.md │ ├── imagenet │ │ ├── README.md │ │ └── main_amp.py │ └── simple │ │ └── distributed │ │ ├── README.md │ │ ├── distributed_data_parallel.py │ │ └── run.sh ├── requirements.txt ├── requirements_dev.txt ├── setup.py └── tests │ ├── L0 │ ├── run_amp │ │ ├── __init__.py │ │ ├── test_add_param_group.py │ │ ├── test_basic_casts.py │ │ ├── test_cache.py │ │ ├── test_checkpointing.py │ │ ├── test_fused_sgd.py │ │ ├── test_larc.py │ │ ├── test_multi_tensor_axpby.py │ │ ├── test_multi_tensor_l2norm.py │ │ ├── test_multi_tensor_scale.py │ │ ├── test_multiple_models_optimizers_losses.py │ │ ├── test_promotion.py │ │ ├── test_rnn.py │ │ └── utils.py │ ├── run_fp16util │ │ ├── __init__.py │ │ └── test_fp16util.py │ ├── run_fused_layer_norm │ │ └── test_fused_layer_norm.py │ ├── run_mlp │ │ └── test_mlp.py │ ├── run_optimizers │ │ ├── __init__.py │ │ ├── test_dist_adam.py │ │ ├── test_fused_optimizer.py │ │ └── test_lamb.py │ ├── run_pyprof_data │ │ ├── __init__.py │ │ └── test_pyprof_data.py │ ├── run_pyprof_nvtx │ │ ├── __init__.py │ │ └── test_pyprof_nvtx.py │ └── run_test.py │ ├── L1 │ ├── common │ │ ├── compare.py │ │ ├── main_amp.py │ │ └── run_test.sh │ ├── cross_product │ │ └── run.sh │ └── cross_product_distributed │ │ └── run.sh │ ├── distributed │ ├── DDP │ │ ├── ddp_race_condition_test.py │ │ └── run_race_test.sh │ ├── amp_master_params │ │ ├── amp_master_params.py │ │ ├── compare.py │ │ └── run.sh │ └── synced_batchnorm │ │ ├── python_single_gpu_unit_test.py │ │ ├── single_gpu_unit_test.py │ │ ├── test_batchnorm1d.py │ │ ├── test_groups.py │ │ ├── two_gpu_test_different_batch_size.py │ │ ├── two_gpu_unit_test.py │ │ └── unit_test.sh │ └── docker_extension_builds │ └── run.sh ├── docs └── aka.png ├── lreid ├── __init__.py ├── core │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── base_metagraph_p_s.cpython-37.pyc │ │ └── lr_schedulers.cpython-37.pyc │ ├── base_metagraph_p_s.py │ ├── lr_schedulers.py │ └── torch16_lr_scheduler.py ├── data │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── datamanager.cpython-37.pyc │ │ ├── sampler.cpython-37.pyc │ │ └── transforms.cpython-37.pyc │ ├── datamanager.py │ ├── datasets │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── dataset.cpython-37.pyc │ │ │ └── ps_dataset.cpython-37.pyc │ │ ├── dataset.py │ │ ├── image │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── cuhk01.cpython-37.pyc │ │ │ │ ├── cuhk02.cpython-37.pyc │ │ │ │ ├── cuhk03.cpython-37.pyc │ │ │ │ ├── dukemtmcreid.cpython-37.pyc │ │ │ │ ├── grid.cpython-37.pyc │ │ │ │ ├── ilids.cpython-37.pyc │ │ │ │ ├── market1501.cpython-37.pyc │ │ │ │ ├── msmt17.cpython-37.pyc │ │ │ │ ├── prid.cpython-37.pyc │ │ │ │ ├── sensereid.cpython-37.pyc │ │ │ │ └── viper.cpython-37.pyc │ │ │ ├── cuhk01.py │ │ │ ├── cuhk02.py │ │ │ ├── cuhk03.py │ │ │ ├── dukemtmcreid.py │ │ │ ├── grid.py │ │ │ ├── ilids.py │ │ │ ├── market1501.py │ │ │ ├── msmt17.py │ │ │ ├── prid.py │ │ │ ├── sensereid.py │ │ │ └── viper.py │ │ ├── ps_dataset.py │ │ └── video │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── dukemtmcvidreid.cpython-37.pyc │ │ │ ├── ilidsvid.cpython-37.pyc │ │ │ ├── mars.cpython-37.pyc │ │ │ └── prid2011.cpython-37.pyc │ │ │ ├── dukemtmcvidreid.py │ │ │ ├── ilidsvid.py │ │ │ ├── mars.py │ │ │ └── prid2011.py │ ├── sampler.py │ └── transforms.py ├── data_loader │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── customed_loaders.cpython-37.pyc │ │ ├── dataset.cpython-37.pyc │ │ ├── incremental_datasets.cpython-37.pyc │ │ ├── incremental_reid_loaders.cpython-37.pyc │ │ ├── loader.cpython-37.pyc │ │ ├── reid_loaders.cpython-37.pyc │ │ └── transforms2.cpython-37.pyc │ ├── customed_loaders.py │ ├── dataset.py │ ├── incremental_datasets.py │ ├── incremental_reid_loaders.py │ ├── loader.py │ ├── reid_loaders.py │ └── transforms2.py ├── datasets │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── cuhk01.cpython-37.pyc │ │ ├── cuhk02.cpython-37.pyc │ │ ├── cuhk03.cpython-37.pyc │ │ ├── cuhksysu.cpython-37.pyc │ │ ├── dukemtmcreid.cpython-37.pyc │ │ ├── grid.cpython-37.pyc │ │ ├── ilids.cpython-37.pyc │ │ ├── market1501.cpython-37.pyc │ │ ├── mix.cpython-37.pyc │ │ ├── msmt17.cpython-37.pyc │ │ ├── prid.cpython-37.pyc │ │ ├── sensereid.cpython-37.pyc │ │ └── viper.cpython-37.pyc │ ├── cuhk01.py │ ├── cuhk02.py │ ├── cuhk03.py │ ├── cuhksysu.py │ ├── dukemtmcreid.py │ ├── grid.py │ ├── ilids.py │ ├── market1501.py │ ├── mix.py │ ├── msmt17.py │ ├── prid.py │ ├── sensereid.py │ └── viper.py ├── evaluation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── classification.cpython-37.pyc │ │ ├── distance.cpython-37.pyc │ │ ├── metric.cpython-37.pyc │ │ ├── rank.cpython-37.pyc │ │ └── reid.cpython-37.pyc │ ├── classification.py │ ├── distance.py │ ├── metric.py │ ├── rank.py │ ├── rank_cylib │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-37.pyc │ │ ├── rank_cy.c │ │ ├── rank_cy.cpython-37m-x86_64-linux-gnu.so │ │ ├── rank_cy.pyx │ │ ├── setup.py │ │ └── test_cython.py │ └── reid.py ├── losses │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── loss.cpython-37.pyc │ │ └── ranked_list_loss.cpython-37.pyc │ ├── loss.py │ └── ranked_list_loss.py ├── methods │ ├── __init__.py │ ├── backbone.py │ ├── drop_grad.py │ ├── maml.py │ └── meta_template.py ├── models │ ├── LwFnet.py │ ├── __init__.py │ ├── __pycache__ │ │ ├── LwFnet.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── bnneck.cpython-37.pyc │ │ └── metagraph_fd.cpython-37.pyc │ ├── bnneck.py │ └── metagraph_fd.py ├── operation │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── test_continual_operation_neck.cpython-37.pyc │ │ ├── test_p_s.cpython-37.pyc │ │ └── train_p_s.cpython-37.pyc │ ├── test_continual_operation_neck.py │ ├── test_p_s.py │ └── train_p_s.py ├── tools │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── meter.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── mean_variance.py │ ├── meter.py │ └── utils.py ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── avgmeter.cpython-37.pyc │ │ ├── loggers.cpython-37.pyc │ │ ├── model_complexity.cpython-37.pyc │ │ ├── reidtools.cpython-37.pyc │ │ ├── rerank.cpython-37.pyc │ │ ├── tools.cpython-37.pyc │ │ └── torchtools.cpython-37.pyc │ ├── avgmeter.py │ ├── loggers.py │ ├── model_complexity.py │ ├── reidtools.py │ ├── rerank.py │ ├── tools.py │ └── torchtools.py └── visualization │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-37.pyc │ ├── logger.cpython-37.pyc │ ├── visualising_rank.cpython-37.pyc │ ├── visualize.cpython-37.pyc │ └── visualize_featuremap.cpython-37.pyc │ ├── logger.py │ ├── visdom_show.py │ ├── visualising_rank.py │ ├── visualize.py │ └── visualize_featuremap.py ├── setup.py └── train_test.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ -------------------------------------------------------------------------------- /.idea/LifelongReID.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 12 | 13 | 19 | 20 | 21 | 23 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | 8 | 10 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Nan Pu 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LifelongReID 2 | Offical implementation of our [Lifelong Person Re-Identification via Adaptive Knowledge Accumulation](https://arxiv.org/abs/2103.12462) in CVPR2021 3 | by [Nan Pu](https://tpcd.github.io/), Wei Chen, [Yu Liu](https://visionyuliu.github.io/), [Erwin M. Bakker](https://www.universiteitleiden.nl/en/staffmembers/erwin-bakker/publications#tab-4) and [Michael S. Lew](http://liacs.leidenuniv.nl/~lewms/). 4 | 5 | We provide a lifelong person reid toolbox [lreid](https://github.com/TPCD/LifelongReID) in this repo. 6 | 7 | More details please see our paper. 8 | 9 | ![Framework](docs/aka.png) 10 | ## Citation 11 | ``` 12 | @InProceedings{pu_cvpr2021, 13 | author = {Pu, Nan and Chen, Wei and Liu, Yu and Bakker, Erwin M. and Lew, Michael S.}, 14 | title = {Lifelong Person Re-Identification via Adaptive Knowledge Accumulation}, 15 | booktitle = {IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)}, 16 | year = {2021} 17 | } 18 | ``` 19 | # Install 20 | ## Enviornment 21 | ```bash 22 | conda create -n lreid python=3.7 23 | conda activate lreid 24 | conda install pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.0 -c pytorch 25 | conda install opencv 26 | pip install Cython sklearn numpy prettytable easydict tqdm matplotlib 27 | ``` 28 | For visualization, you might need to install visdom: 29 | ```bash 30 | pip install visdom 31 | ``` 32 | 33 | If you want to use fp16, please follow https://github.com/NVIDIA/apex to install apex, which is just a optional pakage. 34 | The following codes work in our enviroment, but it could not work on other enviroment. 35 | ```bash 36 | git clone https://github.com/NVIDIA/apex 37 | cd apex 38 | pip install -v --disable-pip-version-check --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" ./ 39 | ``` 40 | ## lreid toolbox 41 | Then, you could clone our project and install lreid 42 | ```bash 43 | git clone https://github.com/TPCD/LifelongReID 44 | cd LifelongReID 45 | python setup.py develop 46 | ``` 47 | 48 | ## Dataset prepration 49 | Please follow [Torchreid_Dataset_Doc](https://kaiyangzhou.github.io/deep-person-reid/datasets.html) to download datasets and unzip them to your data path (we refer to 'machine_dataset_path' in train_test.py). Alternatively, you could download some of unseen-domain datasets in [DualNorm](https://github.com/BJTUJia/person_reID_DualNorm). 50 | 51 | ## Train & Test 52 | 53 | python train_test.py 54 | 55 | # Acknowledgement 56 | The code is based on the PyTorch implementation of the [Torchreid](https://github.com/KaiyangZhou/deep-person-reid]) and [Person_reID_baseline_pytorch](https://github.com/layumi/Person_reID_baseline_pytorch). 57 | -------------------------------------------------------------------------------- /apex/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/.nojekyll -------------------------------------------------------------------------------- /apex/LICENSE: -------------------------------------------------------------------------------- 1 | All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 4 | 5 | 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 6 | 7 | 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 8 | 9 | 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 10 | 11 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /apex/apex/RNN/README.md: -------------------------------------------------------------------------------- 1 | Under construction... 2 | -------------------------------------------------------------------------------- /apex/apex/RNN/__init__.py: -------------------------------------------------------------------------------- 1 | from .models import LSTM, GRU, ReLU, Tanh, mLSTM 2 | 3 | __all__ = ['models'] 4 | -------------------------------------------------------------------------------- /apex/apex/RNN/cells.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .RNNBackend import RNNCell 6 | 7 | from torch.nn._functions.thnn import rnnFusedPointwise as fusedBackend 8 | 9 | import math 10 | 11 | 12 | class mLSTMRNNCell(RNNCell): 13 | """ 14 | mLSTMRNNCell 15 | """ 16 | 17 | def __init__(self, input_size, hidden_size, bias = False, output_size = None): 18 | gate_multiplier = 4 19 | super(mLSTMRNNCell, self).__init__(gate_multiplier, input_size, hidden_size, mLSTMCell, n_hidden_states = 2, bias = bias, output_size = output_size) 20 | 21 | self.w_mih = nn.Parameter(torch.Tensor(self.output_size, self.input_size)) 22 | self.w_mhh = nn.Parameter(torch.Tensor(self.output_size, self.output_size)) 23 | 24 | self.reset_parameters() 25 | 26 | def forward(self, input): 27 | """ 28 | mLSTMRNNCell.forward() 29 | """ 30 | #if not inited or bsz has changed this will create hidden states 31 | self.init_hidden(input.size()[0]) 32 | 33 | hidden_state = self.hidden[0] if self.n_hidden_states == 1 else self.hidden 34 | 35 | self.hidden = list( 36 | self.cell(input, hidden_state, self.w_ih, self.w_hh, self.w_mih, self.w_mhh, 37 | b_ih=self.b_ih, b_hh=self.b_hh) 38 | ) 39 | 40 | if self.output_size != self.hidden_size: 41 | self.hidden[0] = F.linear(self.hidden[0], self.w_ho) 42 | return tuple(self.hidden) 43 | 44 | 45 | def new_like(self, new_input_size=None): 46 | if new_input_size is None: 47 | new_input_size = self.input_size 48 | 49 | return type(self)( 50 | new_input_size, 51 | self.hidden_size, 52 | self.bias, 53 | self.output_size) 54 | 55 | def mLSTMCell(input, hidden, w_ih, w_hh, w_mih, w_mhh, b_ih=None, b_hh=None): 56 | """ 57 | mLSTMCell 58 | """ 59 | 60 | if input.is_cuda: 61 | igates = F.linear(input, w_ih) 62 | m = F.linear(input, w_mih) * F.linear(hidden[0], w_mhh) 63 | hgates = F.linear(m, w_hh) 64 | 65 | state = fusedBackend.LSTMFused.apply 66 | return state(igates, hgates, hidden[1], b_ih, b_hh) 67 | 68 | hx, cx = hidden 69 | 70 | m = F.linear(input, w_mih) * F.linear(hidden[0], w_mhh) 71 | gates = F.linear(input, w_ih, b_ih) + F.linear(m, w_hh, b_hh) 72 | 73 | ingate, forgetgate, cellgate, outgate = gates.chunk(4, 1) 74 | 75 | ingate = F.sigmoid(ingate) 76 | forgetgate = F.sigmoid(forgetgate) 77 | cellgate = F.tanh(cellgate) 78 | outgate = F.sigmoid(outgate) 79 | 80 | cy = (forgetgate * cx) + (ingate * cellgate) 81 | hy = outgate * F.tanh(cy) 82 | 83 | return hy, cy 84 | 85 | -------------------------------------------------------------------------------- /apex/apex/RNN/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from torch.nn._functions.rnn import LSTMCell, RNNReLUCell, RNNTanhCell, GRUCell 4 | 5 | from .RNNBackend import bidirectionalRNN, stackedRNN, RNNCell 6 | from .cells import mLSTMRNNCell, mLSTMCell 7 | 8 | def toRNNBackend(inputRNN, num_layers, bidirectional=False, dropout = 0): 9 | """ 10 | :class:`toRNNBackend` 11 | """ 12 | 13 | if bidirectional: 14 | return bidirectionalRNN(inputRNN, num_layers, dropout = dropout) 15 | else: 16 | return stackedRNN(inputRNN, num_layers, dropout = dropout) 17 | 18 | 19 | def LSTM(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None): 20 | """ 21 | :class:`LSTM` 22 | """ 23 | inputRNN = RNNCell(4, input_size, hidden_size, LSTMCell, 2, bias, output_size) 24 | return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout) 25 | 26 | def GRU(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None): 27 | """ 28 | :class:`GRU` 29 | """ 30 | inputRNN = RNNCell(3, input_size, hidden_size, GRUCell, 1, bias, output_size) 31 | return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout) 32 | 33 | def ReLU(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None): 34 | """ 35 | :class:`ReLU` 36 | """ 37 | inputRNN = RNNCell(1, input_size, hidden_size, RNNReLUCell, 1, bias, output_size) 38 | return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout) 39 | 40 | def Tanh(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None): 41 | """ 42 | :class:`Tanh` 43 | """ 44 | inputRNN = RNNCell(1, input_size, hidden_size, RNNTanhCell, 1, bias, output_size) 45 | return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout) 46 | 47 | def mLSTM(input_size, hidden_size, num_layers, bias=True, batch_first=False, dropout=0, bidirectional=False, output_size = None): 48 | """ 49 | :class:`mLSTM` 50 | """ 51 | inputRNN = mLSTMRNNCell(input_size, hidden_size, bias=bias, output_size=output_size) 52 | return toRNNBackend(inputRNN, num_layers, bidirectional, dropout=dropout) 53 | 54 | 55 | -------------------------------------------------------------------------------- /apex/apex/__init__.py: -------------------------------------------------------------------------------- 1 | # May help avoid undefined symbol errors https://pytorch.org/cppdocs/notes/faq.html#undefined-symbol-errors-from-pytorch-aten 2 | import torch 3 | import warnings 4 | 5 | if torch.distributed.is_available(): 6 | from . import parallel 7 | 8 | from . import amp 9 | from . import fp16_utils 10 | 11 | # For optimizers and normalization there is no Python fallback. 12 | # Absence of cuda backend is a hard error. 13 | # I would like the errors from importing fused_adam_cuda or fused_layer_norm_cuda 14 | # to be triggered lazily, because if someone has installed with --cpp_ext and --cuda_ext 15 | # so they expect those backends to be available, but for some reason they actually aren't 16 | # available (for example because they built improperly in a way that isn't revealed until 17 | # load time) the error message is timely and visible. 18 | from . import optimizers 19 | from . import normalization 20 | from . import pyprof 21 | -------------------------------------------------------------------------------- /apex/apex/amp/README.md: -------------------------------------------------------------------------------- 1 | # amp: Automatic Mixed Precision 2 | 3 | ## Annotating User Functions 4 | 5 | Nearly all PyTorch user code needs nothing more than the two steps 6 | above to use amp. After all, custom layers are built out of simpler 7 | PyTorch components, and amp already can see those. 8 | 9 | However, any custom C++ or CUDA code is outside of amp's (default) 10 | view of things. For example, suppose I implemented a new recurrent 11 | cell called a "forgetful recurrent unit" that calls directly into a 12 | CUDA backend: 13 | 14 | ```python 15 | from backend import FRUBackend 16 | 17 | def fru(input, hidden, weight, bias): 18 | # call to CUDA code 19 | FRUBackend(input, hidden, weight, bias) 20 | ``` 21 | 22 | In this case, it is possible to get a runtime type mismatch. For 23 | example, you might have `input` in fp16, and `weight` in fp32, and amp 24 | doesn't have the visibility to insert an appropriate cast. 25 | 26 | amp exposes two ways to handle "invisible" backend code: function 27 | annotations and explicit registration. 28 | 29 | #### Function annotation 30 | 31 | The first way to handle backend code is a set of function annotations: 32 | 33 | - `@amp.half_function` 34 | - `@amp.float_function` 35 | - `@amp.promote_function` 36 | 37 | These correspond to: 38 | 39 | - Cast all arguments to fp16 40 | - Cast all argumnets fo fp32 41 | - If there are any type mismatches, cast everything to the widest type 42 | 43 | In our example, we believe that the FRU unit is fp16-safe and will get 44 | performance gains from casting its arguments to fp16, so we write: 45 | 46 | ```python 47 | @amp.half_function 48 | def fru(input, hidden, weight, bias): 49 | #... 50 | ``` 51 | 52 | #### Explicit registration 53 | 54 | The other way to handle backend code is with explicit function 55 | registration: 56 | 57 | - `amp.register_half_function(module, function_name)` 58 | - `amp.register_float_function(module, function_name)` 59 | - `amp.register_promote_function(module, function_name)` 60 | 61 | When using this API, `module` is the containing class or module for 62 | the function, and `function_name` is the _string_ name of the 63 | function. Note that the function must be registered before the call to 64 | `amp.initalize()`. 65 | 66 | For our FRU unit, we can register the backend function directly: 67 | 68 | ```python 69 | import backend 70 | 71 | amp.register_half_function(backend, 'FRUBackend') 72 | ``` 73 | -------------------------------------------------------------------------------- /apex/apex/amp/__init__.py: -------------------------------------------------------------------------------- 1 | from .amp import init, half_function, float_function, promote_function,\ 2 | register_half_function, register_float_function, register_promote_function 3 | from .handle import scale_loss, disable_casts 4 | from .frontend import initialize, state_dict, load_state_dict 5 | from ._amp_state import master_params, _amp_state 6 | -------------------------------------------------------------------------------- /apex/apex/amp/__version__.py: -------------------------------------------------------------------------------- 1 | VERSION = (0, 1, 0) 2 | __version__ = '.'.join(map(str, VERSION)) 3 | -------------------------------------------------------------------------------- /apex/apex/amp/_amp_state.py: -------------------------------------------------------------------------------- 1 | # This is a "header object" that allows different amp modules to communicate. 2 | # I'm a C++ guy, not a python guy. I decided this approach because it seemed most C++-like. 3 | # But apparently it's ok: 4 | # http://effbot.org/pyfaq/how-do-i-share-global-variables-across-modules.htm 5 | import os 6 | import torch 7 | 8 | TORCH_MAJOR = int(torch.__version__.split('.')[0]) 9 | TORCH_MINOR = int(torch.__version__.split('.')[1]) 10 | 11 | 12 | if TORCH_MAJOR == 1 and TORCH_MINOR < 8: 13 | from torch._six import container_abcs 14 | else: 15 | import collections.abc as container_abcs 16 | 17 | 18 | class AmpState(object): 19 | def __init__(self): 20 | self.hard_override=False 21 | self.allow_incoming_model_not_fp32 = False 22 | self.verbosity=1 23 | 24 | 25 | # Attribute stash. Could also just stash things as global module attributes. 26 | _amp_state = AmpState() 27 | 28 | 29 | def warn_or_err(msg): 30 | if _amp_state.hard_override: 31 | print("Warning: " + msg) 32 | else: 33 | raise RuntimeError(msg) 34 | # I'm not sure if allowing hard_override is a good idea. 35 | # + " If you're sure you know what you're doing, supply " + 36 | # "hard_override=True to amp.initialize.") 37 | 38 | 39 | def maybe_print(msg, rank0=False): 40 | distributed = torch.distributed.is_available() and \ 41 | torch.distributed.is_initialized() and \ 42 | torch.distributed.get_world_size() > 1 43 | if _amp_state.verbosity > 0: 44 | if rank0: 45 | if distributed: 46 | if torch.distributed.get_rank() == 0: 47 | print(msg) 48 | else: 49 | print(msg) 50 | else: 51 | print(msg) 52 | 53 | 54 | # def iter_params(param_groups): 55 | # for group in param_groups: 56 | # for p in group['params']: 57 | # yield p 58 | 59 | 60 | def master_params(optimizer): 61 | """ 62 | Generator expression that iterates over the params owned by ``optimizer``. 63 | 64 | Args: 65 | optimizer: An optimizer previously returned from ``amp.initialize``. 66 | """ 67 | for group in optimizer.param_groups: 68 | for p in group['params']: 69 | yield p 70 | -------------------------------------------------------------------------------- /apex/apex/amp/compat.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | # True for post-0.4, when Variables/Tensors merged. 4 | def variable_is_tensor(): 5 | v = torch.autograd.Variable() 6 | return isinstance(v, torch.Tensor) 7 | 8 | def tensor_is_variable(): 9 | x = torch.Tensor() 10 | return type(x) == torch.autograd.Variable 11 | 12 | # False for post-0.4 13 | def tensor_is_float_tensor(): 14 | x = torch.Tensor() 15 | return type(x) == torch.FloatTensor 16 | 17 | # Akin to `torch.is_tensor`, but returns True for Variable 18 | # objects in pre-0.4. 19 | def is_tensor_like(x): 20 | return torch.is_tensor(x) or isinstance(x, torch.autograd.Variable) 21 | 22 | # Wraps `torch.is_floating_point` if present, otherwise checks 23 | # the suffix of `x.type()`. 24 | def is_floating_point(x): 25 | if hasattr(torch, 'is_floating_point'): 26 | return torch.is_floating_point(x) 27 | try: 28 | torch_type = x.type() 29 | return torch_type.endswith('FloatTensor') or \ 30 | torch_type.endswith('HalfTensor') or \ 31 | torch_type.endswith('DoubleTensor') 32 | except AttributeError: 33 | return False 34 | 35 | def scalar_python_val(x): 36 | if hasattr(x, 'item'): 37 | return x.item() 38 | else: 39 | if isinstance(x, torch.autograd.Variable): 40 | return x.data[0] 41 | else: 42 | return x[0] 43 | 44 | # Accounts for the possibility that some ops may be removed from a namespace. 45 | def filter_attrs(module, attrs): 46 | return list(attrname for attrname in attrs if hasattr(module, attrname)) 47 | -------------------------------------------------------------------------------- /apex/apex/amp/lists/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/apex/amp/lists/__init__.py -------------------------------------------------------------------------------- /apex/apex/amp/lists/functional_overrides.py: -------------------------------------------------------------------------------- 1 | 2 | # TODO: think about the following two. They do weird things. 3 | # - torch.nn.utils.clip_grad (but it should always be fp32 anyway) 4 | # - torch.nn.utils.weight_norm 5 | 6 | # Notes: 7 | # F.instance_norm uses batch_norm internally. Which correctly handles 8 | # fp16 in/out with fp32 weights. So we shouldn't do anything for 9 | # either of these. 10 | # F.normalize calls `input.norm()` internally, so it's redundant, but 11 | # kept here in case impl. changes. 12 | # F.cosine_similarity is same: calls `x.norm()` internally. 13 | 14 | import torch.nn.functional 15 | 16 | MODULE = torch.nn.functional 17 | 18 | FP16_FUNCS = [ 19 | 'conv1d', 20 | 'conv2d', 21 | 'conv3d', 22 | 'conv_transpose1d', 23 | 'conv_transpose2d', 24 | 'conv_transpose3d', 25 | 'conv_tbc', # Undocumented / maybe new? 26 | 'linear', 27 | ] 28 | 29 | FP32_FUNCS = [ 30 | 31 | # Interpolation/Upsampling TODO: Remove for 1.2 32 | 'interpolate', 33 | 'grid_sample', 34 | 35 | # Pointwise 36 | 'softplus', 37 | 'softmin', 38 | 'log_softmax', 39 | 'softmax', 40 | 'gelu', 41 | 42 | # Normalization 43 | 'layer_norm', 44 | 'group_norm', 45 | 'local_response_norm', 46 | 'normalize', 47 | 'cosine_similarity', 48 | 49 | # Loss functions 50 | # TODO: which of these can be fp16? 51 | 'poisson_nll_loss', 52 | 'cosine_embedding_loss', 53 | 'cross_entropy', 54 | 'hinge_embedding_loss', 55 | 'kl_div', 56 | 'l1_loss', 57 | 'mse_loss', 58 | 'margin_ranking_loss', 59 | 'multilabel_margin_loss', 60 | 'multilabel_soft_margin_loss', 61 | 'multi_margin_loss', 62 | 'nll_loss', 63 | 'binary_cross_entropy_with_logits', 64 | 'smooth_l1_loss', 65 | 'soft_margin_loss', 66 | 'triplet_margin_loss', 67 | 'ctc_loss' 68 | ] 69 | 70 | BANNED_FUNCS = [ 71 | ('binary_cross_entropy', 72 | ("\namp does not work out-of-the-box with `F.binary_cross_entropy` or `torch.nn.BCELoss.` " 73 | "It requires that the output of the previous function be already a FloatTensor. \n\n" 74 | "Most models have a Sigmoid right before BCELoss. In that case, you can use\n" 75 | " torch.nn.BCEWithLogitsLoss\nto combine Sigmoid+BCELoss into a single layer " 76 | "that is compatible with amp.\nAnother option is to add\n" 77 | " amp.register_float_function(torch, 'sigmoid')\nbefore calling `amp.init()`.\n" 78 | "If you _really_ know what you are doing, you can disable this warning by passing " 79 | "allow_banned=True to `amp.init()`.")) 80 | ] 81 | -------------------------------------------------------------------------------- /apex/apex/amp/lists/tensor_overrides.py: -------------------------------------------------------------------------------- 1 | from .. import compat 2 | from . import torch_overrides 3 | 4 | import importlib 5 | 6 | import torch 7 | 8 | # if compat.variable_is_tensor() and not compat.tensor_is_variable(): 9 | MODULE = torch.Tensor 10 | # else: 11 | # MODULE = torch.autograd.Variable 12 | 13 | 14 | FP16_FUNCS = compat.filter_attrs(MODULE, [ 15 | '__matmul__', 16 | ]) 17 | 18 | FP32_FUNCS = compat.filter_attrs(MODULE, [ 19 | '__ipow__', 20 | '__pow__', 21 | '__rpow__', 22 | 23 | # Cast to fp32 before transfer to CPU 24 | 'cpu', 25 | ]) 26 | 27 | CASTS = compat.filter_attrs(MODULE, [ 28 | '__add__', 29 | '__div__', 30 | '__eq__', 31 | '__ge__', 32 | '__gt__', 33 | '__iadd__', 34 | '__idiv__', 35 | '__imul__', 36 | '__isub__', 37 | '__itruediv__', 38 | '__le__', 39 | '__lt__', 40 | '__mul__', 41 | '__ne__', 42 | '__radd__', 43 | '__rdiv__', 44 | '__rmul__', 45 | '__rsub__', 46 | '__rtruediv__', 47 | '__sub__', 48 | '__truediv__', 49 | ]) 50 | 51 | # None of these, but here to make code cleaner. 52 | SEQUENCE_CASTS = [] 53 | 54 | # We need to grab all the methods from torch_overrides and add them to 55 | # the Tensor lists as well, as almost all methods are duplicated 56 | # between `torch` and `torch.Tensor` (and check with `hasattr`, 57 | # because a few random ones aren't defined on Tensor) 58 | _self_mod = importlib.import_module(__name__) 59 | for attrname in ['FP16_FUNCS', 'FP32_FUNCS', 'CASTS', 'SEQUENCE_CASTS']: 60 | lst = getattr(_self_mod, attrname) 61 | for fn in getattr(torch_overrides, attrname): 62 | if hasattr(MODULE, fn): 63 | lst.append(fn) 64 | -------------------------------------------------------------------------------- /apex/apex/amp/lists/torch_overrides.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from .. import utils 4 | 5 | MODULE = torch 6 | 7 | FP16_FUNCS = [ 8 | # Low level functions wrapped by torch.nn layers. 9 | # The wrapper layers contain the weights which are then passed in as a parameter 10 | # to these functions. 11 | 'conv1d', 12 | 'conv2d', 13 | 'conv3d', 14 | 'conv_transpose1d', 15 | 'conv_transpose2d', 16 | 'conv_transpose3d', 17 | 'conv_tbc', 18 | 'prelu', 19 | 20 | # BLAS 21 | 'addmm', 22 | 'addmv', 23 | 'addr', 24 | 'matmul', 25 | 'mm', 26 | 'mv', 27 | ] 28 | 29 | FP32_FUNCS = [ 30 | # Pointwise 31 | 'acos', 32 | 'asin', 33 | 'cosh', 34 | 'erfinv', 35 | 'exp', 36 | 'expm1', 37 | 'log', 38 | 'log10', 39 | 'log2', 40 | 'reciprocal', 41 | 'rsqrt', 42 | 'sinh', 43 | 'tan', 44 | 45 | # Other math 46 | 'pow', 47 | 48 | # Reduction 49 | 'cumprod', 50 | 'cumsum', 51 | 'dist', 52 | # 'mean', 53 | 'norm', 54 | 'prod', 55 | 'std', 56 | 'sum', 57 | 'var', 58 | 59 | # Misc 60 | 'renorm' 61 | ] 62 | 63 | version_strings = torch.__version__.split('.') 64 | version_major = version_strings[0] 65 | version_minor = version_strings[1] 66 | version_num = float(version_major + "." + version_minor) 67 | # Before torch 1.1, mean must be blacklisted. 68 | if version_num < 1.1: 69 | FP32_FUNCS.append('mean') 70 | 71 | # Before CUDA 9.1, batched matmul was missing fast FP16 kernels. We 72 | # check the CUDA version -- if at least 9.1, then put the bmm 73 | # functions on the fp16 list. Otherwise, put them on the fp32 list. 74 | _bmms = ['addbmm', 75 | 'baddbmm', 76 | 'bmm'] 77 | 78 | if utils.is_cuda_enabled(): 79 | # workaround https://github.com/facebookresearch/maskrcnn-benchmark/issues/802 80 | if utils.get_cuda_version() >= (9, 1, 0): 81 | FP16_FUNCS.extend(_bmms) 82 | else: 83 | FP32_FUNCS.extend(_bmms) 84 | 85 | # Multi-tensor fns that may need type promotion 86 | CASTS = [ 87 | # Multi-tensor math 88 | 'addcdiv', 89 | 'addcmul', 90 | 'atan2', 91 | 'cross', 92 | 'bilinear', 93 | 'dot', 94 | 95 | # Element-wise _or_ tensor-wise math 96 | 'add', 97 | 'div', 98 | 'mul', 99 | 100 | # Comparison 101 | 'eq', 102 | 'equal', 103 | 'ge', 104 | 'gt', 105 | 'le', 106 | 'lt', 107 | 'ne' 108 | ] 109 | 110 | # Functions that take sequence arguments. We need to inspect the whole 111 | # sequence and cast to the widest type. 112 | SEQUENCE_CASTS = [ 113 | 'cat', 114 | 'stack' 115 | ] 116 | -------------------------------------------------------------------------------- /apex/apex/amp/rnn_compat.py: -------------------------------------------------------------------------------- 1 | from . import utils, wrap 2 | 3 | import torch 4 | _VF = torch._C._VariableFunctions 5 | RNN_NAMES = ['rnn_relu', 'rnn_tanh', 'gru', 'lstm'] 6 | 7 | def _gen_VF_wrapper(name): 8 | def wrapper(*args, **kwargs): 9 | return getattr(_VF, name)(*args, **kwargs) 10 | return wrapper 11 | 12 | # Some python magic to generate an object that has the rnn cell functions 13 | # defined on it, all of which call into corresponding _VF version. 14 | # Intended to patch torch.nn.modules.rnn._VF (aka, the ref named "_VF" 15 | # imported at module scope within torch.nn.modules.rnn). This should 16 | # not affect third-party importers of _VF.py. 17 | class VariableFunctionsShim(object): 18 | def __init__(self): 19 | for name in RNN_NAMES: 20 | for suffix in ['', '_cell']: 21 | fn_name = name + suffix 22 | setattr(self, fn_name, _gen_VF_wrapper(fn_name)) 23 | 24 | def has_old_rnns(): 25 | try: 26 | torch.nn.backends.thnn.backend.LSTMCell 27 | return True 28 | except: 29 | return False 30 | 31 | def whitelist_rnn_cells(handle, verbose): 32 | # Different module + function names in old/new RNN cases 33 | if has_old_rnns(): 34 | fn_names = ['RNNReLUCell', 'RNNTanhCell', 'LSTMCell', 'GRUCell'] 35 | mod = torch.nn.backends.thnn.backend 36 | else: 37 | fn_names = [x + '_cell' for x in RNN_NAMES] 38 | mod = torch.nn.modules.rnn._VF 39 | assert isinstance(mod, VariableFunctionsShim) 40 | 41 | # Insert casts on cell functions 42 | for fn in fn_names: 43 | wrap.cached_cast(mod, fn, utils.maybe_half, handle, 44 | try_caching=True, verbose=verbose) 45 | 46 | if has_old_rnns(): 47 | # Special handling of `backward` for fused gru / lstm: 48 | # The `backward` method calls Tensor.sum() (blacklist) internally, 49 | # and then the resulting grad_input has the wrong type. 50 | # TODO: where else is this a problem? 51 | for rnn_type in ['GRUFused', 'LSTMFused']: 52 | mod = getattr(torch.nn._functions.thnn.rnnFusedPointwise, rnn_type) 53 | wrap.disable_casts(mod, 'backward', handle) 54 | -------------------------------------------------------------------------------- /apex/apex/contrib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/apex/contrib/__init__.py -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/groupbn/cuda_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #ifndef CUDA_UTILS_H 3 | #define CUDA_UTILS_H 4 | 5 | namespace at { 6 | namespace cuda { 7 | 8 | namespace utils { 9 | 10 | static inline int MaxSharedMemoryPerMultiprocessor(int device_id) { 11 | return getDeviceProperties(device_id)->sharedMemPerMultiprocessor; 12 | } 13 | 14 | 15 | } 16 | } 17 | } 18 | 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/layer_norm/ln_kernel_traits.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | constexpr uint32_t THREADS_PER_WARP = 32; 4 | 5 | template 7 | struct Kernel_traits { 8 | enum { WARPS_M = WARPS_M_ }; 9 | enum { WARPS_N = WARPS_N_ }; 10 | enum { COLS = COLS_ }; 11 | enum { BYTES_PER_LDG = BYTES_PER_LDG_ }; 12 | 13 | using Vec = Vec; 14 | 15 | using vec_t = typename Vec::vec_t; 16 | using base_t = typename Vec::base_t; 17 | using packed_t = typename Vec::packed_t; 18 | using compute_t = typename Vec::compute_t; 19 | using packed_compute_t = typename Vec::packed_compute_t; 20 | 21 | enum { THREADS_PER_ROW = WARPS_N * THREADS_PER_WARP }; 22 | enum { THREADS_PER_CTA = WARPS_M * THREADS_PER_ROW }; 23 | enum { ROWS_PER_CTA = WARPS_M }; 24 | 25 | enum { BYTES_PER_ROW = COLS * sizeof(base_t) }; 26 | enum { BYTES_PER_ROW_PER_CTA = THREADS_PER_ROW * BYTES_PER_LDG }; 27 | enum {SMEM_BYTES = ROWS_PER_CTA * COLS * sizeof(compute_t)}; 28 | }; 29 | -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/multihead_attn/philox.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | //Philox CUDA. 3 | 4 | class Philox { 5 | public: 6 | __device__ inline Philox(unsigned long long seed, 7 | unsigned long long subsequence, 8 | unsigned long long offset) { 9 | key.x = (unsigned int)seed; 10 | key.y = (unsigned int)(seed >> 32); 11 | counter = make_uint4(0, 0, 0, 0); 12 | counter.z = (unsigned int)(subsequence); 13 | counter.w = (unsigned int)(subsequence >> 32); 14 | STATE = 0; 15 | incr_n(offset / 4); 16 | } 17 | __device__ inline uint4 operator()() { 18 | if(STATE == 0) { 19 | uint4 counter_ = counter; 20 | uint2 key_ = key; 21 | //7-round philox 22 | for(int i = 0; i < 6; i++) { 23 | counter_ = single_round(counter_, key_); 24 | key_.x += (kPhilox10A); key_.y += (kPhilox10B); 25 | } 26 | output = single_round(counter_, key_); 27 | incr(); 28 | } 29 | //return a float4 directly 30 | //unsigned long ret; 31 | //switch(STATE) { 32 | // case 0: ret = output.x; break; 33 | // case 1: ret = output.y; break; 34 | // case 2: ret = output.z; break; 35 | // case 3: ret = output.w; break; 36 | //} 37 | //STATE = (STATE + 1) % 4; 38 | return output; 39 | } 40 | private: 41 | uint4 counter; 42 | uint4 output; 43 | uint2 key; 44 | unsigned int STATE; 45 | __device__ inline void incr_n(unsigned long long n) { 46 | unsigned int nlo = (unsigned int)(n); 47 | unsigned int nhi = (unsigned int)(n >> 32); 48 | counter.x += nlo; 49 | if (counter.x < nlo) 50 | nhi++; 51 | counter.y += nhi; 52 | if (nhi <= counter.y) 53 | return; 54 | if (++counter.z) 55 | return; 56 | ++counter.w; 57 | } 58 | __device__ inline void incr() { 59 | if (++counter.x) 60 | return; 61 | if (++counter.y) 62 | return; 63 | if (++counter.z) 64 | return; 65 | ++counter.w; 66 | } 67 | __device__ unsigned int mulhilo32(unsigned int a, unsigned int b, 68 | unsigned int *result_high) { 69 | *result_high = __umulhi(a, b); 70 | return a*b; 71 | } 72 | __device__ inline uint4 single_round(uint4 ctr, uint2 key) { 73 | unsigned int hi0; 74 | unsigned int hi1; 75 | unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0); 76 | unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1); 77 | uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0}; 78 | return ret; 79 | } 80 | static const unsigned long kPhilox10A = 0x9E3779B9; 81 | static const unsigned long kPhilox10B = 0xBB67AE85; 82 | static const unsigned long kPhiloxSA = 0xD2511F53; 83 | static const unsigned long kPhiloxSB = 0xCD9E8D57; 84 | }; 85 | // Inverse of 2^32. 86 | #define M_RAN_INVM32 2.3283064e-10f 87 | __device__ __inline__ float4 uniform4(uint4 x) { 88 | return make_float4(x.x * M_RAN_INVM32, x.y * M_RAN_INVM32, x.z * M_RAN_INVM32,x.w * M_RAN_INVM32); 89 | 90 | } 91 | -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/optimizers/fused_lamb_cuda.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void multi_tensor_lamb_cuda( 4 | int chunk_size, 5 | at::Tensor noop_flag, 6 | std::vector> tensor_lists, 7 | const float lr, 8 | const float beta1, 9 | const float beta2, 10 | const float epsilon, 11 | const int step, 12 | const int bias_correction, 13 | const float weight_decay, 14 | const int grad_averaging, 15 | const int mode, 16 | const float global_grad_norm, 17 | const float max_grad_norm); 18 | 19 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 20 | m.def("lamb", &multi_tensor_lamb_cuda, "Computes and apply update for LAMB optimizer"); 21 | } 22 | -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/optimizers/multi_tensor_distopt_adam.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void multi_tensor_fused_adam_cuda( 4 | int chunk_size, 5 | at::Tensor noop_flag, 6 | std::vector> tensor_lists, 7 | at::Tensor per_tensor_beta1, 8 | at::Tensor per_tensor_beta2, 9 | at::Tensor per_tensor_bias_correction, 10 | at::Tensor per_tensor_eps, 11 | at::Tensor per_tensor_weight_decay, 12 | float lr, 13 | float grad_scale, 14 | int step, 15 | int mode); 16 | 17 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 18 | m.def("multi_tensor_fused_adam", &multi_tensor_fused_adam_cuda, 19 | "Multi tensor Adam optimized CUDA implementation."); 20 | } 21 | -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/optimizers/multi_tensor_distopt_lamb.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void multi_tensor_lamb_compute_update_term_cuda( 4 | int chunk_size, 5 | at::Tensor noop_flag, 6 | std::vector> tensor_lists, 7 | at::Tensor per_tensor_beta1, 8 | at::Tensor per_tensor_beta2, 9 | at::Tensor per_tensor_beta3, 10 | at::Tensor per_tensor_bias_correction, 11 | const int step, 12 | at::Tensor per_tensor_epsilon, 13 | const int mode, 14 | at::Tensor per_tensor_decay, 15 | const float grad_scale); 16 | 17 | void multi_tensor_lamb_update_weights_cuda( 18 | int chunk_size, 19 | at::Tensor noop_flag, 20 | std::vector> tensor_lists, 21 | at::Tensor per_tensor_param_norm, 22 | at::Tensor per_tensor_update_norm, 23 | const float learning_rate, 24 | at::Tensor per_tensor_decay, 25 | bool use_nvlamb); 26 | 27 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 28 | m.def("multi_tensor_lamb_compute_update_term", &multi_tensor_lamb_compute_update_term_cuda, 29 | "Computes update term for LAMB optimizer"); 30 | m.def("multi_tensor_lamb_update_weights", &multi_tensor_lamb_update_weights_cuda, 31 | "Applies update term for LAMB optimizer"); 32 | } 33 | -------------------------------------------------------------------------------- /apex/apex/contrib/csrc/xentropy/interface.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | // CUDA forward declarations 4 | 5 | std::vector softmax_xentropy_cuda( 6 | const at::Tensor &input, 7 | const at::Tensor &labels, 8 | const float smoothing, 9 | const bool half_to_float); 10 | 11 | at::Tensor softmax_xentropy_backward_cuda( 12 | const at::Tensor &grad_loss, 13 | const at::Tensor &logits, 14 | const at::Tensor &max_log_sum_exp, 15 | const at::Tensor &labels, 16 | const float smoothing); 17 | 18 | // C++ interface 19 | 20 | #define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be a CUDA tensor") 21 | #define CHECK_CONTIGUOUS(x) AT_ASSERTM(x.is_contiguous(), #x " must be contiguous") 22 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 23 | 24 | std::vector softmax_xentropy_forward( 25 | const at::Tensor &input, 26 | const at::Tensor &labels, 27 | const float smoothing, 28 | const bool half_to_float) { 29 | CHECK_CUDA(input); 30 | CHECK_INPUT(labels); 31 | 32 | return softmax_xentropy_cuda(input, labels, smoothing, half_to_float); 33 | } 34 | 35 | at::Tensor softmax_xentropy_backward( 36 | const at::Tensor &grad_loss, 37 | const at::Tensor &logits, 38 | const at::Tensor &max_log_sum_exp, 39 | const at::Tensor &labels, 40 | const float smoothing) { 41 | CHECK_CUDA(grad_loss); 42 | CHECK_CUDA(logits); 43 | CHECK_INPUT(max_log_sum_exp); 44 | CHECK_INPUT(labels); 45 | 46 | return softmax_xentropy_backward_cuda(grad_loss, logits, max_log_sum_exp, labels, smoothing); 47 | } 48 | 49 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 50 | m.def("forward", &softmax_xentropy_forward, "Softmax cross entropy loss with label smoothing forward (CUDA)"); 51 | m.def("backward", &softmax_xentropy_backward, "Softmax cross entropy loss with label smoothing backward (CUDA)"); 52 | } 53 | -------------------------------------------------------------------------------- /apex/apex/contrib/groupbn/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import torch 3 | import bnp 4 | from .batch_norm import BatchNorm2d_NHWC 5 | del torch 6 | del bnp 7 | del batch_norm 8 | except ImportError as err: 9 | print("apex was installed without --bnp flag, contrib.groupbn is not available") 10 | -------------------------------------------------------------------------------- /apex/apex/contrib/layer_norm/__init__.py: -------------------------------------------------------------------------------- 1 | from .layer_norm import FastLayerNorm 2 | -------------------------------------------------------------------------------- /apex/apex/contrib/layer_norm/layer_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch.nn import init 3 | 4 | import fast_layer_norm 5 | 6 | class FastLayerNormFN(torch.autograd.Function): 7 | @staticmethod 8 | def forward(ctx, x, gamma, beta, epsilon): 9 | x = x.contiguous() 10 | gamma = gamma.contiguous() 11 | beta = beta.contiguous() 12 | hidden_size = gamma.numel() 13 | xmat = x.view((-1, hidden_size)) 14 | ymat, mu, rsigma = fast_layer_norm.ln_fwd(xmat, gamma, beta, epsilon) 15 | ctx.save_for_backward(x, gamma, mu, rsigma) 16 | return ymat.view(x.shape) 17 | 18 | @staticmethod 19 | def backward(ctx, dy): 20 | #assert dy.is_contiguous() 21 | dy = dy.contiguous() # this happens! 22 | x, gamma, mu, rsigma = ctx.saved_tensors 23 | 24 | hidden_size = gamma.numel() 25 | xmat = x.view((-1, hidden_size)) 26 | dymat = dy.view(xmat.shape) 27 | dxmat, dgamma, dbeta = fast_layer_norm.ln_bwd(dymat, xmat, mu, rsigma, gamma) 28 | dx = dxmat.view(x.shape) 29 | return dx, dgamma, dbeta, None 30 | 31 | class FastLayerNorm(torch.nn.Module): 32 | def __init__(self, hidden_size, eps=1e-5): 33 | super(FastLayerNorm, self).__init__() 34 | self.epsilon = eps 35 | self.weight = torch.nn.Parameter(torch.Tensor(hidden_size)) 36 | self.bias = torch.nn.Parameter(torch.Tensor(hidden_size)) 37 | self.reset_parameters() 38 | 39 | def reset_parameters(self): 40 | init.ones_(self.weight) 41 | init.zeros_(self.bias) 42 | 43 | def forward(self, x): 44 | return FastLayerNormFN.apply(x, self.weight, self.bias, self.epsilon) 45 | -------------------------------------------------------------------------------- /apex/apex/contrib/multihead_attn/MHA_bwd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/apex/contrib/multihead_attn/MHA_bwd.png -------------------------------------------------------------------------------- /apex/apex/contrib/multihead_attn/MHA_fwd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/apex/contrib/multihead_attn/MHA_fwd.png -------------------------------------------------------------------------------- /apex/apex/contrib/multihead_attn/README.md: -------------------------------------------------------------------------------- 1 | # Fast Multihead Attention 2 | 3 | This implementation has two main features : 4 | * A C++ implementation to avoid the CPU overheads of Pytorch found with smaller batch sizes. 5 | * The removal of all copies and transposes found in standard implementations of Multihead Attention. 6 | 7 | | | Python Version | C++ Version | 8 | | :----------------------------------------- | :------------: | :---------: | 9 | | Layer Norm and Residual Add Variant | X | X | 10 | | Includes Linear Biases | X | | 11 | | Reduces CPU Overheads | | X | 12 | | Fuses masking with Softmax | | X | 13 | | Removes Transposes and Copies | X | X | 14 | | Includes Self and Encoder/Decoder Variants | X | X | 15 | 16 | ## How to Instantiate 17 | 18 | `SelfMultiheadAttn(` _hidden dim_, _heads_, _dropout=prob_, _bias=bool_, _include_norm_add=bool_, _impl='fast'_ `)` 19 | `EncdecMultiheadAttn(` _hidden dim_, _heads_, _dropout=prob_, _bias=bool_, _include_norm_add=bool_, _impl='fast'_ `)` 20 | 21 | `impl` has two options: 22 | * `fast` uses C++ Version 23 | * `default` uses Python Version 24 | 25 | ## Instructions to build on Linux 26 | 27 | ``` 28 | $ git clone https://github.com/NVIDIA/apex 29 | $ cd apex 30 | $ pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" --global-option="--fast_multihead_attn" ./ 31 | ``` 32 | ## Try Performance Tests Yourself! 33 | Perf test script is found here! 34 | ``` 35 | cd contrib/examples/multihead_attn 36 | ``` 37 | #### Fast Multihead Attention 38 | ``` 39 | python perf_test_multihead_attn.py --ref 40 | ``` 41 | #### Fast Multihead Attention with C++ Implementation 42 | ``` 43 | python perf_test_multihead_attn.py 44 | ``` 45 | #### Compare with `torch.nn.MultiheadAttn` 46 | ``` 47 | python perf_test_multihead_attn.py --native 48 | ``` 49 | #### Test your own range! 50 | ``` 51 | python perf_test_multihead_attn.py --seq-length 64 --num-seqs-start 10 --num-seqs-stop 120 --num-seqs-inc 5 52 | ``` 53 | 54 | ## Performance Comparisons 55 | 56 | * Performance was measured with 64 token sequence lengths on an NVIDIA TitanV card. 57 | * Time is measured across multiple layers to simulate an in model scenario. 58 | 59 | ![Multihead Attention Forward](MHA_fwd.png) 60 | ![Multihead Attention Backward](MHA_bwd.png) 61 | -------------------------------------------------------------------------------- /apex/apex/contrib/multihead_attn/__init__.py: -------------------------------------------------------------------------------- 1 | from .self_multihead_attn import SelfMultiheadAttn 2 | from .encdec_multihead_attn import EncdecMultiheadAttn 3 | from .mask_softmax_dropout_func import fast_mask_softmax_dropout_func 4 | -------------------------------------------------------------------------------- /apex/apex/contrib/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .fp16_optimizer import FP16_Optimizer 2 | from .fused_adam import FusedAdam 3 | from .fused_lamb import FusedLAMB 4 | -------------------------------------------------------------------------------- /apex/apex/contrib/sparsity/__init__.py: -------------------------------------------------------------------------------- 1 | from .sparse_masklib import create_mask 2 | from .asp import ASP 3 | -------------------------------------------------------------------------------- /apex/apex/contrib/test/multihead_attn/test_mha_fused_softmax.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import unittest 3 | import torch.nn.functional as F 4 | from apex.contrib.multihead_attn import fast_mask_softmax_dropout_func 5 | 6 | class FusedSoftmaxTest(unittest.TestCase): 7 | def setUp(self, seed=1234): 8 | torch.manual_seed(seed) 9 | torch.cuda.manual_seed_all(seed) 10 | 11 | self.seq_length = 80 12 | self.sequences = 10 13 | self.hidden_dim = 1024 14 | self.heads = 16 15 | self.dropout_prob = 0.0 16 | 17 | self.mask = (torch.randn(self.sequences,self.seq_length)>0).cuda() 18 | self.mask = self.mask.half()*-10000 19 | self.ref_inputs = torch.randn(self.heads * self.sequences, self.seq_length, self.seq_length, 20 | dtype=torch.float16, device=torch.device("cuda")).requires_grad_(True) 21 | 22 | self.tst_inputs = self.ref_inputs.clone().detach().requires_grad_(True) 23 | 24 | def test_fused_softmax(self) : 25 | grads = torch.randn_like(self.tst_inputs) 26 | y_ref = self.ref_inputs.view(self.sequences, self.heads, self.seq_length, self.seq_length) 27 | y_ref = y_ref + self.mask.unsqueeze(1).unsqueeze(2) 28 | y_ref = y_ref.view(self.sequences*self.heads, self.seq_length, self.seq_length) 29 | y_ref = F.softmax(y_ref, dim=-1) 30 | y_ref = torch._fused_dropout(y_ref, 1.0) 31 | 32 | y_tst = fast_mask_softmax_dropout_func(True, self.heads, self.tst_inputs, self.mask, True, 0.0) 33 | y_ref[0].backward(grads) 34 | y_tst.backward(grads) 35 | 36 | self.assertTrue(torch.allclose(self.ref_inputs, self.tst_inputs, atol=1e-5, rtol=1e-5)) 37 | self.assertTrue(torch.allclose(y_ref[0], y_tst, atol=1e-3, rtol=1e-3)) 38 | self.assertTrue(torch.allclose(self.ref_inputs.grad, self.tst_inputs.grad, atol=1e-3, rtol=1e-3)) 39 | 40 | 41 | if __name__ == '__main__': 42 | unittest.main() 43 | -------------------------------------------------------------------------------- /apex/apex/contrib/xentropy/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import torch 3 | import xentropy_cuda 4 | from .softmax_xentropy import SoftmaxCrossEntropyLoss 5 | del torch 6 | del xentropy_cuda 7 | del softmax_xentropy 8 | except ImportError as err: 9 | print("apex was installed without --xentropy flag, contrib.xentropy is not available") 10 | -------------------------------------------------------------------------------- /apex/apex/contrib/xentropy/softmax_xentropy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import xentropy_cuda 3 | 4 | class SoftmaxCrossEntropyLoss(torch.autograd.Function): 5 | @staticmethod 6 | def forward(ctx, logits, labels, smoothing=0.0, padding_idx=0, half_to_float=False): 7 | losses, max_log_sum_exp = xentropy_cuda.forward( 8 | logits, labels, smoothing, half_to_float) 9 | losses.masked_fill_(labels==padding_idx, 0) 10 | 11 | ctx.save_for_backward(logits, max_log_sum_exp, labels, 12 | torch.FloatTensor([smoothing]), 13 | torch.LongTensor([padding_idx])) 14 | 15 | return losses 16 | 17 | @staticmethod 18 | def backward(ctx, grad_loss): 19 | logits, max_log_sum_exp, labels, smoothing, padding_idx = ctx.saved_tensors 20 | 21 | if not grad_loss.is_contiguous(): 22 | grad_loss = grad_loss.contiguous() 23 | grad_loss.masked_fill_(labels==padding_idx.item(), 0) 24 | grad_logits = xentropy_cuda.backward( 25 | grad_loss.contiguous(), logits, max_log_sum_exp, 26 | labels, smoothing.item()) 27 | 28 | return grad_logits, None, None, None, None 29 | -------------------------------------------------------------------------------- /apex/apex/fp16_utils/README.md: -------------------------------------------------------------------------------- 1 | fp16_optimizer.py contains `FP16_Optimizer`, a Python class designed to wrap an existing Pytorch optimizer and automatically enable master parameters and loss scaling in a manner transparent to the user. To use `FP16_Optimizer`, only two lines of one's Python model need to change. 2 | 3 | #### [FP16_Optimizer API documentation](https://nvidia.github.io/apex/fp16_utils.html#automatic-management-of-master-params-loss-scaling) 4 | 5 | #### [Simple examples with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/FP16_Optimizer_simple) 6 | 7 | #### [Imagenet with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/imagenet) 8 | 9 | #### [word_language_model with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/word_language_model) 10 | 11 | 12 | fp16_util.py contains a number of utilities to manually manage master parameters and loss scaling, if the user chooses. 13 | 14 | #### [Manual management documentation](https://nvidia.github.io/apex/fp16_utils.html#manual-master-parameter-management) 15 | 16 | The [Imagenet with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/imagenet) and [word_language_model with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/word_language_model) directories also contain `main.py` files that demonstrate manual management of master parameters and static loss scaling. These examples illustrate what sort of operations `FP16_Optimizer` is performing automatically. 17 | -------------------------------------------------------------------------------- /apex/apex/fp16_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .fp16util import ( 2 | BN_convert_float, 3 | network_to_half, 4 | prep_param_lists, 5 | model_grads_to_master_grads, 6 | master_params_to_model_params, 7 | tofp16, 8 | to_python_float, 9 | clip_grad_norm, 10 | convert_module, 11 | convert_network, 12 | FP16Model, 13 | ) 14 | 15 | from .fp16_optimizer import FP16_Optimizer 16 | from .loss_scaler import LossScaler, DynamicLossScaler 17 | -------------------------------------------------------------------------------- /apex/apex/mlp/__init__.py: -------------------------------------------------------------------------------- 1 | from .mlp import * 2 | -------------------------------------------------------------------------------- /apex/apex/mlp/mlp.py: -------------------------------------------------------------------------------- 1 | from copy import copy 2 | import math 3 | import torch 4 | from torch import nn 5 | import mlp_cuda 6 | from .. import amp 7 | 8 | class MlpFunction(torch.autograd.Function): 9 | @staticmethod 10 | def forward(ctx, bias, activation, *args): 11 | output = mlp_cuda.forward(bias, activation, args) 12 | ctx.save_for_backward(*args) 13 | ctx.outputs = output 14 | ctx.bias = bias 15 | ctx.activation = activation 16 | return output[0] 17 | 18 | @staticmethod 19 | def backward(ctx, grad_o): 20 | grads = mlp_cuda.backward(ctx.bias, ctx.activation, grad_o, ctx.outputs, ctx.saved_tensors) 21 | del ctx.outputs 22 | return (None, None, *grads) 23 | 24 | mlp_function = amp.half_function(MlpFunction.apply) 25 | 26 | class MLP(torch.nn.Module): 27 | """Launch MLP in C++ 28 | 29 | Args: 30 | mlp_sizes (list of int): MLP sizes. Example: [1024,1024,1024] will create 2 MLP layers with shape 1024x1024 31 | bias (bool): Default True: 32 | relu (bool): Default True 33 | """ 34 | def __init__(self, mlp_sizes, bias=True, activation='relu'): 35 | super(MLP, self).__init__() 36 | self.num_layers = len(mlp_sizes) - 1 37 | self.mlp_sizes = copy(mlp_sizes) 38 | self.bias = 1 if bias else 0 39 | 40 | if activation is 'none': 41 | self.activation = 0 42 | elif activation is 'relu': 43 | self.activation = 1 44 | elif activation is 'sigmoid': 45 | self.activation = 2 46 | else: 47 | raise TypeError("activation must be relu or none.") 48 | 49 | self.weights = [] 50 | self.biases = [] 51 | for i in range(self.num_layers): 52 | w = torch.nn.Parameter(torch.empty(mlp_sizes[i+1], mlp_sizes[i])) 53 | self.weights.append(w) 54 | name = 'weight_{}'.format(i) 55 | setattr(self, name, w) 56 | if self.bias: 57 | b = torch.nn.Parameter(torch.empty(mlp_sizes[i+1])) 58 | self.biases.append(b) 59 | name = 'bias_{}'.format(i) 60 | setattr(self, name, b) 61 | 62 | self.reset_parameters() 63 | 64 | def reset_parameters(self): 65 | for weight in self.weights: 66 | dimsum = weight.size(0) + weight.size(1) 67 | std = math.sqrt(2. / float(dimsum)) 68 | nn.init.normal_(weight, 0., std) 69 | if self.bias: 70 | for bias in self.biases: 71 | std = math.sqrt(1. / float(bias.size(0))) 72 | nn.init.normal_(bias, 0., std) 73 | 74 | def forward(self, input): 75 | return mlp_function(self.bias, self.activation, input, *self.weights, *self.biases) 76 | 77 | def extra_repr(self): 78 | s = F"MLP sizes: {self.mlp_sizes}, Bias={self.bias}, activation={self.activation}" 79 | return s 80 | -------------------------------------------------------------------------------- /apex/apex/multi_tensor_apply/__init__.py: -------------------------------------------------------------------------------- 1 | from .multi_tensor_apply import MultiTensorApply 2 | 3 | multi_tensor_applier = MultiTensorApply(2048*32) 4 | 5 | -------------------------------------------------------------------------------- /apex/apex/multi_tensor_apply/multi_tensor_apply.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class MultiTensorApply(object): 4 | available = False 5 | warned = False 6 | 7 | def __init__(self, chunk_size): 8 | try: 9 | import amp_C 10 | MultiTensorApply.available = True 11 | self.chunk_size = chunk_size 12 | except ImportError as err: 13 | MultiTensorApply.available = False 14 | MultiTensorApply.import_err = err 15 | 16 | def check_avail(self): 17 | if MultiTensorApply.available == False: 18 | raise RuntimeError( 19 | "Attempted to call MultiTensorApply method, but MultiTensorApply " 20 | "is not available, possibly because Apex was installed without " 21 | "--cpp_ext --cuda_ext. Original import error message:", 22 | MultiTensorApply.import_err) 23 | 24 | def __call__(self, op, noop_flag_buffer, tensor_lists, *args): 25 | self.check_avail() 26 | 27 | return op(self.chunk_size, 28 | noop_flag_buffer, 29 | tensor_lists, 30 | *args) 31 | -------------------------------------------------------------------------------- /apex/apex/normalization/__init__.py: -------------------------------------------------------------------------------- 1 | from .fused_layer_norm import FusedLayerNorm 2 | -------------------------------------------------------------------------------- /apex/apex/optimizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .fused_sgd import FusedSGD 2 | from .fused_adam import FusedAdam 3 | from .fused_novograd import FusedNovoGrad 4 | from .fused_lamb import FusedLAMB 5 | from .fused_adagrad import FusedAdagrad -------------------------------------------------------------------------------- /apex/apex/parallel/README.md: -------------------------------------------------------------------------------- 1 | ## Distributed Data Parallel 2 | 3 | distributed.py contains the source code for `apex.parallel.DistributedDataParallel`, a module wrapper that enables multi-process multi-GPU data parallel training optimized for NVIDIA's NCCL communication library. 4 | 5 | `apex.parallel.DistributedDataParallel` achieves high performance by overlapping communication with 6 | computation in the backward pass and bucketing smaller transfers to reduce the total number of 7 | transfers required. 8 | 9 | multiproc.py contains the source code for `apex.parallel.multiproc`, a launch utility that places one process on each of the node's available GPUs. 10 | 11 | #### [API Documentation](https://nvidia.github.io/apex/parallel.html) 12 | 13 | #### [Example/Walkthrough](https://github.com/NVIDIA/apex/tree/master/examples/distributed) 14 | 15 | #### [Imagenet example with Mixed Precision](https://github.com/NVIDIA/apex/tree/master/examples/imagenet) 16 | 17 | #### [Simple example with FP16_Optimizer](https://github.com/NVIDIA/apex/tree/master/examples/FP16_Optimizer_simple/distributed_apex) 18 | 19 | ### Synchronized Batch Normalization 20 | 21 | `apex.parallel.SyncBatchNorm` has similar APIs as with `torch.nn.BatchNorm*N*d`. 22 | It reduces stats on the first (channel) dimension of the Tensor and accepts 23 | arbitrary spatial dimensions. 24 | 25 | #### Installation 26 | 27 | Apex provides two sync BN implementation: 28 | 29 | 1. There is the Python-only implementation, which is the default implementation 30 | when install with `python setup.py install`. 31 | It uses PyTorch primitive operations and distributed communication package from 32 | `torch.distributed`. 33 | 34 | - _Python-only implementation requires input tensor to be of same data type as 35 | layer_ 36 | 37 | 2. We also provide implementation with kernels through CUDA/C++ extension with 38 | improved performance. We are experimenting with Welford and Kahan for reduction 39 | hoping to get better accuracy. 40 | To use the kernel implementation, user need to install Apex with CUDA extension 41 | enabled `python setup.py install --cuda_ext`. 42 | 43 | - _Custom kernel implementation supports fp16 input with fp32 layer as cudnn. 44 | This is required to run imagenet example in fp16._ 45 | 46 | - _Currently kernel implementation only supports GPU._ 47 | 48 | #### HowTo 49 | 50 | 1. User could use `apex.parallel.SyncBatchNorm` by building their module with 51 | the layer explicitly. 52 | 53 | ``` 54 | import apex 55 | input_t = torch.randn(3, 5, 20).cuda() 56 | sbn = apex.parallel.SyncBatchNorm(5).cuda() 57 | output_t = sbn(input) 58 | ``` 59 | 60 | 2. User could also take a constructed `torch.nn.Model` and replace all its `torch.nn.BatchNorm*N*d` modules with `apex.parallel.SyncBatchNorm` through utility function `apex.parallel.convert_syncbn_model`. 61 | 62 | ``` 63 | # model is an instance of torch.nn.Module 64 | import apex 65 | sync_bn_model = apex.parallel.convert_syncbn_model(model) 66 | ``` 67 | -------------------------------------------------------------------------------- /apex/apex/parallel/multiproc.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import sys 3 | import subprocess 4 | 5 | def docstring_hack(): 6 | """ 7 | Multiproc file which will launch a set of processes locally for multi-gpu 8 | usage: python -m apex.parallel.multiproc main.py ... 9 | """ 10 | pass 11 | 12 | argslist = list(sys.argv)[1:] 13 | world_size = torch.cuda.device_count() 14 | 15 | if '--world-size' in argslist: 16 | world_size = int(argslist[argslist.index('--world-size')+1]) 17 | else: 18 | argslist.append('--world-size') 19 | argslist.append(str(world_size)) 20 | 21 | workers = [] 22 | 23 | for i in range(world_size): 24 | if '--rank' in argslist: 25 | argslist[argslist.index('--rank')+1] = str(i) 26 | else: 27 | argslist.append('--rank') 28 | argslist.append(str(i)) 29 | stdout = None if i == 0 else open("GPU_"+str(i)+".log", "w") 30 | print(argslist) 31 | p = subprocess.Popen([str(sys.executable)]+argslist, stdout=stdout) 32 | workers.append(p) 33 | 34 | for p in workers: 35 | p.wait() 36 | -------------------------------------------------------------------------------- /apex/apex/pyprof/FAQs.md: -------------------------------------------------------------------------------- 1 | 1. How do I intercept the Adam optimizer in APEX ? 2 | 3 | ```python 4 | from apex import pyprof 5 | import fused_adam_cuda 6 | pyprof.nvtx.wrap(fused_adam_cuda, 'adam') 7 | ``` 8 | 9 | 2. If you are using JIT and/or AMP, the correct initialization sequence is 10 | 1. Let any JIT to finish. 11 | 2. Initlialize pyprof `pyprof.nvtx.init()`. 12 | 3. Initialize AMP. 13 | 14 | 3. How do I profile with `torch.distributed.launch` ? 15 | 16 | ```python 17 | nvprof -f -o net%p.sql \ 18 | --profile-from-start off \ 19 | --profile-child-processes \ 20 | python -m torch.distributed.launch net.py 21 | ``` 22 | -------------------------------------------------------------------------------- /apex/apex/pyprof/__init__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from . import nvtx, prof 4 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.sql 3 | *.dict 4 | *.csv 5 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/apex/README.md: -------------------------------------------------------------------------------- 1 | This directory has examples of how to use `pyprof` with APEX extensions e.g. `fused_adam_cuda` and `fused_layer_norm_cuda`. 2 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/apex/fused_adam.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import fused_adam_cuda 3 | from apex.optimizers import FusedAdam, FP16_Optimizer 4 | from apex import pyprof 5 | 6 | pyprof.nvtx.init() 7 | pyprof.nvtx.wrap(fused_adam_cuda, 'adam') 8 | 9 | model = torch.nn.Linear(10, 20).cuda().half() 10 | criterion = torch.nn.CrossEntropyLoss().cuda() 11 | optimizer = FusedAdam(model.parameters()) 12 | optimizer = FP16_Optimizer(optimizer) 13 | 14 | x = torch.ones(32, 10).cuda().half() 15 | target = torch.empty(32, dtype=torch.long).random_(20).cuda() 16 | y = model(x) 17 | loss = criterion(y, target) 18 | optimizer.zero_grad() 19 | loss.backward() 20 | optimizer.step() 21 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/apex/fused_layer_norm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import fused_layer_norm_cuda 3 | from apex.normalization import FusedLayerNorm 4 | from apex import pyprof 5 | 6 | pyprof.nvtx.init() 7 | pyprof.nvtx.wrap(fused_layer_norm_cuda, 'forward') 8 | pyprof.nvtx.wrap(fused_layer_norm_cuda, 'backward') 9 | pyprof.nvtx.wrap(fused_layer_norm_cuda, 'forward_affine') 10 | pyprof.nvtx.wrap(fused_layer_norm_cuda, 'backward_affine') 11 | 12 | input = torch.randn(20, 5, 10, 10).cuda() 13 | 14 | # With Learnable Parameters 15 | m = FusedLayerNorm(input.size()[1:]).cuda() 16 | output = m(input) 17 | 18 | # Without Learnable Parameters 19 | m = FusedLayerNorm(input.size()[1:], elementwise_affine=False).cuda() 20 | output = m(input) 21 | 22 | # Normalize over last two dimensions 23 | m = FusedLayerNorm([10, 10]).cuda() 24 | output = m(input) 25 | 26 | # Normalize over last dimension of size 10 27 | m = FusedLayerNorm(10).cuda() 28 | output = m(input) 29 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/apex/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT=`realpath $0` 6 | SCRIPTPATH=`dirname $SCRIPT` 7 | PYPROF="$SCRIPTPATH/../.." 8 | 9 | parse="python $PYPROF/parse/parse.py" 10 | prof="python $PYPROF/prof/prof.py" 11 | 12 | for f in *.py 13 | do 14 | base=`basename $f .py` 15 | sql=$base.sql 16 | dict=$base.dict 17 | 18 | #NVprof 19 | echo "nvprof -fo $sql python $f" 20 | nvprof -fo $sql python $f 21 | 22 | #Parse 23 | echo $parse $sql 24 | $parse $sql > $dict 25 | 26 | #Prof 27 | echo $prof $dict 28 | $prof -w 130 $dict 29 | \rm $sql $dict 30 | done 31 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/custom_func_module/README.md: -------------------------------------------------------------------------------- 1 | This directory has examples which show how to intercept (monkey patch) custom functions and modules with `pyprof`. No changes are required in `pyprof/parse`, however, users can add support for bytes and flops calculation for custom functions and modules in `pyprof/prof` by extending the `OperatorLayerBase` class. 2 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/custom_func_module/custom_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.cuda.profiler as profiler 5 | from apex import pyprof 6 | #Initialize pyprof 7 | pyprof.nvtx.init() 8 | 9 | class Foo(torch.autograd.Function): 10 | @staticmethod 11 | def forward(ctx, in1, in2): 12 | out = in1 + in2 #This could be a custom C/C++ function. 13 | return out 14 | 15 | @staticmethod 16 | def backward(ctx, grad): 17 | in1_grad = grad #This could be a custom C/C++ function. 18 | in2_grad = grad #This could be a custom C/C++ function. 19 | return in1_grad, in2_grad 20 | 21 | #Hook the forward and backward functions to pyprof 22 | pyprof.nvtx.wrap(Foo, 'forward') 23 | pyprof.nvtx.wrap(Foo, 'backward') 24 | 25 | foo = Foo.apply 26 | 27 | x = torch.ones(4,4).cuda() 28 | y = torch.ones(4,4).cuda() 29 | 30 | with torch.autograd.profiler.emit_nvtx(): 31 | profiler.start() 32 | z = foo(x,y) 33 | profiler.stop() 34 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/custom_func_module/custom_module.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.cuda.profiler as profiler 5 | from apex import pyprof 6 | pyprof.nvtx.init() 7 | 8 | class Foo(torch.nn.Module): 9 | def __init__(self, size): 10 | super(Foo, self).__init__() 11 | self.n = torch.nn.Parameter(torch.ones(size)) 12 | self.m = torch.nn.Parameter(torch.ones(size)) 13 | 14 | def forward(self, input): 15 | return self.n*input + self.m 16 | 17 | #Hook the forward function to pyprof 18 | pyprof.nvtx.wrap(Foo, 'forward') 19 | 20 | foo = Foo(4) 21 | foo.cuda() 22 | x = torch.ones(4).cuda() 23 | 24 | with torch.autograd.profiler.emit_nvtx(): 25 | profiler.start() 26 | z = foo(x) 27 | profiler.stop() 28 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/custom_func_module/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT=`realpath $0` 6 | SCRIPTPATH=`dirname $SCRIPT` 7 | PYPROF="$SCRIPTPATH/../.." 8 | 9 | parse="python $PYPROF/parse/parse.py" 10 | prof="python $PYPROF/prof/prof.py" 11 | 12 | for f in *.py 13 | do 14 | base=`basename $f .py` 15 | sql=$base.sql 16 | dict=$base.dict 17 | 18 | #NVprof 19 | echo "nvprof -fo $sql python $f" 20 | nvprof -fo $sql python $f 21 | 22 | #Parse 23 | echo $parse $sql 24 | $parse $sql > $dict 25 | 26 | #Prof 27 | echo $prof $dict 28 | $prof -w 130 $dict 29 | \rm $sql $dict 30 | done 31 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/imagenet/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT=`realpath $0` 6 | SCRIPTPATH=`dirname $SCRIPT` 7 | PYPROF="$SCRIPTPATH/../.." 8 | 9 | parse="python -m apex.pyprof.parse" 10 | prof="python -m apex.pyprof.prof" 11 | 12 | for net in "resnet50" 13 | do 14 | for optim in adam sgd 15 | do 16 | for batch in 32 64 17 | do 18 | base="torchvision".$net.$optim.$batch 19 | sql=$base.sql 20 | dict=$base.dict 21 | 22 | #NVprof 23 | echo "nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch" 24 | nvprof -fo $sql --profile-from-start off python imagenet.py -m ${net} -o $optim -b $batch 25 | 26 | #Parse 27 | echo $parse $sql 28 | $parse $sql > $dict 29 | 30 | #Prof 31 | echo $prof $dict 32 | $prof -w 130 $dict 33 | # \rm $sql $dict 34 | done 35 | done 36 | done 37 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/jit/README.md: -------------------------------------------------------------------------------- 1 | *As of this writing, these examples do not work 2 | because of changes being proposed in PyTorch.* 3 | 4 | There are two ways to use PyTorch JIT 5 | - Scripting 6 | - Tracing 7 | 8 | In addition, we can JIT a 9 | - Stand alone function 10 | - Class / class method 11 | 12 | This directory has an example for each of the 4 cases. 13 | Intercepting (monkey patching) JITted code has a few extra steps, 14 | which are explained through comments. 15 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/jit/jit_script_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.cuda.profiler as profiler 5 | from apex import pyprof 6 | 7 | #The following creates an object "foo" of type ScriptModule 8 | #The new object has a function called "forward" 9 | 10 | @torch.jit.script 11 | def foo(x, y): 12 | return torch.sigmoid(x) + y 13 | 14 | #Initialize pyprof after the JIT step 15 | pyprof.nvtx.init() 16 | 17 | #Assign a name to the object "foo" 18 | foo.__name__ = "foo" 19 | 20 | #Hook up the forward function to pyprof 21 | pyprof.nvtx.wrap(foo, 'forward') 22 | 23 | x = torch.zeros(4,4).cuda() 24 | y = torch.ones(4,4).cuda() 25 | 26 | with torch.autograd.profiler.emit_nvtx(): 27 | profiler.start() 28 | z = foo(x, y) 29 | profiler.stop() 30 | print(z) 31 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/jit/jit_script_method.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.cuda.profiler as profiler 5 | from apex import pyprof 6 | 7 | class Foo(torch.jit.ScriptModule): 8 | def __init__(self, size): 9 | super(Foo, self).__init__() 10 | self.n = torch.nn.Parameter(torch.ones(size)) 11 | self.m = torch.nn.Parameter(torch.ones(size)) 12 | 13 | @torch.jit.script_method 14 | def forward(self, input): 15 | return self.n*input + self.m 16 | 17 | #Initialize pyprof after the JIT step 18 | pyprof.nvtx.init() 19 | 20 | #Hook up the forward function to pyprof 21 | pyprof.nvtx.wrap(Foo, 'forward') 22 | 23 | foo = Foo(4) 24 | foo.cuda() 25 | x = torch.ones(4).cuda() 26 | 27 | with torch.autograd.profiler.emit_nvtx(): 28 | profiler.start() 29 | z = foo(x) 30 | profiler.stop() 31 | print(z) 32 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/jit/jit_trace_function.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.cuda.profiler as profiler 5 | from apex import pyprof 6 | 7 | def foo(x, y): 8 | return torch.sigmoid(x) + y 9 | 10 | x = torch.zeros(4,4).cuda() 11 | y = torch.ones(4,4).cuda() 12 | 13 | #JIT the function using tracing 14 | #This returns an object of type ScriptModule with a forward method. 15 | traced_foo = torch.jit.trace(foo, (x,y)) 16 | 17 | #Initialize pyprof after the JIT step 18 | pyprof.nvtx.init() 19 | 20 | #Assign a name to the object "traced_foo" 21 | traced_foo.__dict__['__name__'] = "foo" 22 | 23 | #Hook up the forward function to pyprof 24 | pyprof.nvtx.wrap(traced_foo, 'forward') 25 | 26 | with torch.autograd.profiler.emit_nvtx(): 27 | profiler.start() 28 | z = traced_foo(x, y) 29 | profiler.stop() 30 | print(z) 31 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/jit/jit_trace_method.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.cuda.profiler as profiler 5 | from apex import pyprof 6 | 7 | class Foo(torch.nn.Module): 8 | def __init__(self, size): 9 | super(Foo, self).__init__() 10 | self.n = torch.nn.Parameter(torch.ones(size)) 11 | self.m = torch.nn.Parameter(torch.ones(size)) 12 | 13 | def forward(self, input): 14 | return self.n*input + self.m 15 | 16 | foo = Foo(4) 17 | foo.cuda() 18 | x = torch.ones(4).cuda() 19 | 20 | #JIT the class using tracing 21 | traced_foo = torch.jit.trace(foo, x) 22 | 23 | #Initialize pyprof after the JIT step 24 | pyprof.nvtx.init() 25 | 26 | #Assign a name to the object "traced_foo" 27 | traced_foo.__dict__['__name__'] = "foo" 28 | 29 | #Hook up the forward function to pyprof 30 | pyprof.nvtx.wrap(traced_foo, 'forward') 31 | 32 | with torch.autograd.profiler.emit_nvtx(): 33 | profiler.start() 34 | z = traced_foo(x) 35 | profiler.stop() 36 | print(z) 37 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/jit/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT=`realpath $0` 6 | SCRIPTPATH=`dirname $SCRIPT` 7 | PYPROF="$SCRIPTPATH/../.." 8 | 9 | parse="python $PYPROF/parse/parse.py" 10 | prof="python $PYPROF/prof/prof.py" 11 | 12 | for f in *.py 13 | do 14 | base=`basename $f .py` 15 | sql=$base.sql 16 | dict=$base.dict 17 | 18 | #NVprof 19 | echo "nvprof -fo $sql python $f" 20 | nvprof -fo $sql python $f 21 | 22 | #Parse 23 | echo $parse $sql 24 | $parse $sql > $dict 25 | 26 | #Prof 27 | echo $prof $dict 28 | $prof -w 130 $dict 29 | \rm $sql $dict 30 | done 31 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/lenet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import torch.cuda.profiler as profiler 7 | import torch.optim as optim 8 | 9 | from apex import pyprof 10 | pyprof.nvtx.init() 11 | 12 | class LeNet5(nn.Module): 13 | def __init__(self): 14 | super(LeNet5, self).__init__() 15 | # 1 input image channel, 6 output channels, 5x5 square convolution 16 | # kernel 17 | self.conv1 = nn.Conv2d(1, 6, 5) 18 | self.conv2 = nn.Conv2d(6, 16, 5) 19 | # an affine operation: y = Wx + b 20 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 21 | self.fc2 = nn.Linear(120, 84) 22 | self.fc3 = nn.Linear(84, 10) 23 | 24 | def forward(self, x): 25 | # Max pooling over a (2, 2) window 26 | x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) 27 | # If the size is a square you can only specify a single number 28 | x = F.max_pool2d(F.relu(self.conv2(x)), 2) 29 | x = x.view(-1, self.num_flat_features(x)) 30 | x = F.relu(self.fc1(x)) 31 | x = F.relu(self.fc2(x)) 32 | x = self.fc3(x) 33 | return x 34 | 35 | def num_flat_features(self, x): 36 | size = x.size()[1:] # all dimensions except the batch dimension 37 | num_features = 1 38 | for s in size: 39 | num_features *= s 40 | return num_features 41 | 42 | with torch.autograd.profiler.emit_nvtx(): 43 | 44 | net = LeNet5().cuda() 45 | 46 | input = torch.randn(1, 1, 32, 32).cuda() 47 | out = net(input) 48 | 49 | target = torch.randn(10) # a dummy target, for example 50 | target = target.view(1, -1).cuda() # make it the same shape as output 51 | criterion = nn.MSELoss() 52 | 53 | # create your optimizer 54 | optimizer = optim.SGD(net.parameters(), lr=0.01) 55 | 56 | # in your training loop: 57 | optimizer.zero_grad() # zero the gradient buffers 58 | 59 | profiler.start() 60 | output = net(input) 61 | loss = criterion(output, target) 62 | loss.backward() 63 | optimizer.step() # Does the update 64 | profiler.stop() 65 | 66 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/simple.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """ 4 | This simple file provides an example of how to 5 | - import the pyprof library and initialize it 6 | - use the emit_nvtx context manager 7 | - start and stop the profiler 8 | 9 | Only kernels within profiler.start and profiler.stop calls are profiled. 10 | To profile 11 | $ nvprof -f -o simple.sql --profile-from-start off ./simple.py 12 | """ 13 | 14 | import sys 15 | import torch 16 | import torch.cuda.profiler as profiler 17 | 18 | #Import and initialize pyprof 19 | from apex import pyprof 20 | pyprof.nvtx.init() 21 | 22 | a = torch.randn(5, 5).cuda() 23 | b = torch.randn(5, 5).cuda() 24 | 25 | #Context manager 26 | with torch.autograd.profiler.emit_nvtx(): 27 | 28 | #Start profiler 29 | profiler.start() 30 | 31 | c = a + b 32 | c = torch.mul(a,b) 33 | c = torch.matmul(a,b) 34 | c = torch.argmax(a, dim=1) 35 | c = torch.nn.functional.pad(a, (1,1)) 36 | 37 | #Stop profiler 38 | profiler.stop() 39 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/user_annotation/README.md: -------------------------------------------------------------------------------- 1 | Nvidia NVTX range markers (https://docs.nvidia.com/gameworks/content/gameworkslibrary/nvtx/nvidia_tools_extension_library_nvtx.htm) 2 | are a useful tool to capture and observe events and code ranges etc. 3 | Using PyTorch APIs e.g, `torch.cuda.nvtx.range_push("xxx")` and `torch.cuda.nvtx.range_pop()` users can easily add their own NVTX range markers. These markers can then be observed in the Nvidia Visual Profiler (NVVP). 4 | 5 | While inserting NVTX markers (strings), if the users follow a specific string pattern `"layer:your_string_here"` e.g. `"layer:conv1"` or `"layer:encoder_layer_3_self_attention`, then `pyprof` will display the strings `conv1` and `encoder_layer_3_self_attention` next to the associated kernels in the output of `prof.py` when used with the `-c layer` option. 6 | 7 | NVTX range markers can be nested and if users follow the above string pattern, the output of `prof.py` will show all the markers associated with a kernel. 8 | 9 | The file `resnet.py` (a simplified version of the torchvision model) shows an example of how users can add (nested) NVTX markers with information which can greatly aid in understanding and analysis of networks. 10 | 11 | Note that the pattern `"layer:your_string_here"` was chosen to aid information extraction by `pyprof`. The tool will work seamlessly even if there are other markers or no markers at all. 12 | 13 | ### To run 14 | 15 | ```sh 16 | nvprof -fo resnet.sql --profile-from-start off python resnet.py 17 | parse.py resnet.sql > resnet.dict 18 | prof.py --csv -c idx,layer,dir,mod,op,kernel,params,sil resnet.dict 19 | ``` 20 | 21 | The file `resnet.sql` can also be opened with NVVP as usual. 22 | -------------------------------------------------------------------------------- /apex/apex/pyprof/examples/user_annotation/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | SCRIPT=`realpath $0` 6 | SCRIPTPATH=`dirname $SCRIPT` 7 | PYPROF="$SCRIPTPATH/../.." 8 | 9 | parse="python $PYPROF/parse/parse.py" 10 | prof="python $PYPROF/prof/prof.py" 11 | 12 | for f in *.py 13 | do 14 | base=`basename $f .py` 15 | sql=$base.sql 16 | dict=$base.dict 17 | 18 | #NVprof 19 | echo "nvprof -fo --profile-from-start off $sql python $f" 20 | nvprof -fo $sql --profile-from-start off python $f 21 | 22 | #Parse 23 | echo $parse $sql 24 | $parse $sql > $dict 25 | 26 | #Prof 27 | echo $prof $dict 28 | #$prof -w 130 $dict 29 | $prof --csv -c idx,layer,dir,mod,op,kernel,params,sil $dict 30 | \rm $sql $dict 31 | done 32 | -------------------------------------------------------------------------------- /apex/apex/pyprof/nvtx/__init__.py: -------------------------------------------------------------------------------- 1 | from .nvmarker import init 2 | from .nvmarker import add_wrapper as wrap 3 | -------------------------------------------------------------------------------- /apex/apex/pyprof/parse/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/apex/pyprof/parse/__init__.py -------------------------------------------------------------------------------- /apex/apex/pyprof/parse/__main__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | try: 4 | from .parse import main 5 | except ImportError as e: 6 | warnings.warn("Did you make sure to install PyProf dependencies by using the --pyprof flag during Apex installation?)") 7 | raise e 8 | 9 | if __name__ == '__main__': 10 | main() 11 | -------------------------------------------------------------------------------- /apex/apex/pyprof/parse/db.py: -------------------------------------------------------------------------------- 1 | import sys, sqlite3 2 | 3 | class DB(object): 4 | """ 5 | This class provides functions for DB operations 6 | with exception handling. 7 | """ 8 | 9 | def __init__(self, dbFile): 10 | try: 11 | conn = sqlite3.connect(dbFile) 12 | conn.row_factory = sqlite3.Row 13 | c = conn.cursor() 14 | except: 15 | print("Error opening {}".format(dbFile)) 16 | sys.exit(1) 17 | 18 | self.conn = conn 19 | self.c = c 20 | 21 | def select(self, cmd): 22 | try: 23 | self.c.execute(cmd) 24 | #rows = self.c.fetchall() 25 | rows = [dict(row) for row in self.c.fetchall()] 26 | except sqlite3.Error as e: 27 | print(e) 28 | sys.exit(1) 29 | except: 30 | print("Uncaught error in SQLite access while executing {}".format(cmd)) 31 | sys.exit(1) 32 | 33 | #print(rows) 34 | return rows 35 | 36 | def insert(self, cmd, data): 37 | try: 38 | self.c.execute(cmd, data) 39 | except sqlite3.Error as e: 40 | print(e) 41 | sys.exit(1) 42 | except: 43 | print("Uncaught error in SQLite access while executing {}".format(cmd)) 44 | sys.exit(1) 45 | 46 | def execute(self, cmd): 47 | try: 48 | self.c.execute(cmd) 49 | except sqlite3.Error as e: 50 | print(e) 51 | sys.exit(1) 52 | except: 53 | print("Uncaught error in SQLite access while executing {}".format(cmd)) 54 | sys.exit(1) 55 | 56 | def commit(self): 57 | self.conn.commit() 58 | 59 | def close(self): 60 | self.c.close() 61 | self.conn.close() 62 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/__init__.py: -------------------------------------------------------------------------------- 1 | from . import data, prof 2 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/__main__.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | try: 4 | from .prof import main 5 | except ImportError as e: 6 | warnings.warn("Did you make sure to install PyProf dependencies by using the --pyprof flag during Apex installation?") 7 | raise e 8 | 9 | if __name__ == '__main__': 10 | main() 11 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/activation.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class Activation(OperatorLayerBase): 6 | """ 7 | This class handles the various activation functions. 8 | """ 9 | 10 | ops = ["celu", "elu", "elu_", "hardshrink", "hardtanh", "hardtanh_", "leaky_relu", "leaky_relu_", "logsigmoid", "prelu", "relu", "relu_", "relu6", "rrelu", "rrelu_", "selu", "sigmoid", "softplus", "softshrink", "softsign", "tanh", "tanhshrink", "threshold", "threshold_"] 11 | 12 | def __init__(self, d): 13 | marker = eval(d.argMarker[0]) 14 | mod = marker['mod'] 15 | op = marker['op'] 16 | args = marker['args'] 17 | 18 | self.marker = marker 19 | self.mod_ = mod 20 | self.op_ = op 21 | self.args = args 22 | 23 | assert (mod in ["torch.nn.functional", "torch", "Tensor"]) 24 | 25 | #Filter out named parameters 26 | args = list(filter(lambda x : x['name'] == '', args)) 27 | 28 | assert (len(args) >= 1) 29 | arg = args[0] 30 | assert (arg['type'] == "tensor") 31 | 32 | self.i = arg 33 | self.dir = d.dir 34 | 35 | def params(self): 36 | p = OrderedDict([('T', self.i['shape']),('type', self.i['dtype'])]) 37 | return p 38 | 39 | def flops(self): 40 | direction = self.dir 41 | tensor = self.i['shape'] 42 | t = self.i['dtype'] 43 | 44 | # TODO: revise 45 | elems = Utility.numElems(tensor) 46 | return elems 47 | 48 | def bytes(self): 49 | direction = self.dir 50 | tensor = self.i['shape'] 51 | t = self.i['dtype'] 52 | 53 | elems = Utility.numElems(tensor) 54 | elems = elems * (2 if direction == "fprop" else 3) 55 | 56 | return elems * Utility.typeToBytes(t) 57 | 58 | def tc(self): 59 | return "-" 60 | 61 | def op(self): 62 | return self.op_ 63 | 64 | def mod(self): 65 | return self.mod_ 66 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | class OperatorLayerBase(ABC): 4 | """ 5 | Base class for all layers and operators. 6 | Every derived class should have the following functions. 7 | """ 8 | 9 | @abstractmethod 10 | def tc(self): 11 | """ 12 | Tensor core usage by the kernel. 13 | Return "1" (yes), "0" (no, but possible), "-" (not applicable) 14 | """ 15 | pass 16 | 17 | @abstractmethod 18 | def params(self): 19 | """ 20 | Kernel parameters to be printed. 21 | """ 22 | pass 23 | 24 | @abstractmethod 25 | def flops(self): 26 | """ 27 | Note that 1 FMA = 2 flops. 28 | """ 29 | pass 30 | 31 | @abstractmethod 32 | def bytes(self): 33 | pass 34 | 35 | @abstractmethod 36 | def mod(self): 37 | """ 38 | Name of the module/class e.g. torch.nn.functional. 39 | """ 40 | pass 41 | 42 | @abstractmethod 43 | def op(self): 44 | """ 45 | Name of the operator e.g. sigmoid. 46 | """ 47 | pass 48 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/convert.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class Convert(OperatorLayerBase): 6 | """ 7 | Class to handle convert operations. 8 | """ 9 | ops = ["byte", "char", "double", "float", "half", "int", "long", "short", "to"] 10 | 11 | def __init__(self, d): 12 | marker = eval(d.argMarker[0]) 13 | mod = marker['mod'] 14 | op = marker['op'] 15 | args = marker['args'] 16 | 17 | self.marker = marker 18 | self.mod_ = mod 19 | self.op_ = op 20 | self.args = args 21 | 22 | assert (mod == "Tensor") 23 | assert (op in Convert.ops) 24 | assert (len(args) == 1) 25 | 26 | #The argument could be a tensor or scalar 27 | t = args[0] 28 | if t['type'] == "tensor": 29 | shape = t['shape'] 30 | stype = t['dtype'] 31 | else: 32 | shape = (1,) 33 | stype = t['type'] 34 | if self.op_ == "to": 35 | op = stype 36 | 37 | self.shape = shape 38 | self.stype = stype 39 | self.dtype = op 40 | 41 | def params(self): 42 | p = OrderedDict([('T', self.shape), ('stype', self.stype), ('dtype', self.dtype)]) 43 | return p 44 | 45 | def op(self): 46 | return self.op_ 47 | 48 | def mod(self): 49 | return self.mod_ 50 | 51 | def tc(self): 52 | return "-" 53 | 54 | def elems(self): 55 | return Utility.numElems(self.shape) 56 | 57 | def flops(self): 58 | return 0 59 | 60 | def bytes(self): 61 | b = self.elems() * (Utility.typeToBytes(self.stype) + Utility.typeToBytes(self.dtype)) 62 | return b 63 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/data.py: -------------------------------------------------------------------------------- 1 | from .utility import Utility 2 | 3 | class Data(object): 4 | """ 5 | Class to store all the data for every kernel e.g. name, bytes, flops, device, stream etc. 6 | """ 7 | def __init__(self, kernel): 8 | #Available from NVprof 9 | self.tid = kernel['tid'] 10 | self.device = kernel['device'] 11 | self.stream = kernel['stream'] 12 | self.grid = str(kernel['grid']).replace(" ","").replace("(","").replace(")","") 13 | self.block = str(kernel['block']).replace(" ","").replace("(","").replace(")","") 14 | self.name = kernel['kShortName'].replace(" ","_") 15 | self.lName = kernel['kLongName'] 16 | self.sil = kernel['kDuration'] #units ns 17 | 18 | self.index = None 19 | 20 | #Markers 21 | self.argMarker = kernel['marker'] 22 | self.modMarker = kernel['reprMarkers'] 23 | self.seqMarker = kernel['seqMarker'] 24 | 25 | self.layer = kernel['layer'] 26 | self.trace = kernel['trace'] 27 | 28 | self.seqId = kernel['seqId'] 29 | self.altSeqId = kernel['altSeqId'] 30 | 31 | self.dir = kernel['dir'] 32 | self.sub = kernel['subSeqId'] 33 | 34 | self.mod = "na" 35 | self.op = "na" 36 | self.params = {"na":"na"} 37 | self.tc = "na" 38 | self.flops = 0 39 | self.bytes = 0 40 | 41 | def setParams(self, params): 42 | #Remove space from params 43 | qaz = "" 44 | for key,value in params.items(): 45 | if "type" not in key: 46 | qaz += "{}={},".format(key,value) 47 | else: 48 | if type(value) is str: 49 | qaz += "{},".format(Utility.typeToString(value)) 50 | else: 51 | qaz += "{}".format(value) 52 | 53 | self.params = qaz.replace(" ", "") 54 | 55 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/dropout.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class Dropout(OperatorLayerBase): 6 | 7 | def __init__(self, d): 8 | marker = eval(d.argMarker[0]) 9 | mod = marker['mod'] 10 | op = marker['op'] 11 | args = marker['args'] 12 | 13 | self.marker = marker 14 | self.mod_ = mod 15 | self.op_ = op 16 | self.args = args 17 | 18 | assert (mod == "torch.nn.functional") 19 | assert (op == "dropout") 20 | #assert (len(args) == 1) 21 | 22 | self.shape = args[0]['shape'] 23 | self.type = args[0]['dtype'] 24 | self.dir = d.dir 25 | 26 | return 27 | 28 | def params(self): 29 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 30 | return p 31 | 32 | def op(self): 33 | return self.op_ 34 | 35 | def mod(self): 36 | return self.mod_ 37 | 38 | def tc(self): 39 | return "-" 40 | 41 | def elems(self): 42 | return Utility.numElems(self.shape) 43 | 44 | def bytes(self): 45 | #Ignoring the cost of writing and reading the mask 46 | return Utility.typeToBytes(self.type) * self.elems() * 2 47 | 48 | def flops(self): 49 | # Note: This is approximate and depends on the RNG 50 | return 5*self.elems() 51 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/embedding.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class Embedding(OperatorLayerBase): 6 | 7 | def __init__(self, d): 8 | marker = eval(d.argMarker[0]) 9 | mod = marker['mod'] 10 | op = marker['op'] 11 | args = marker['args'] 12 | 13 | self.marker = marker 14 | self.mod_ = mod 15 | self.op_ = op 16 | self.args = args 17 | 18 | assert (mod == "torch.nn.functional") 19 | assert (op == "embedding") 20 | 21 | self.ishape = args[0]['shape'] 22 | self.itype = args[0]['dtype'] 23 | 24 | self.eshape = args[1]['shape'] 25 | self.etype = args[1]['dtype'] 26 | 27 | assert (len(self.eshape) == 2) 28 | 29 | self.dir = d.dir 30 | self.sub = d.sub 31 | return 32 | 33 | def params(self): 34 | p = OrderedDict([('I', self.ishape), ('itype', self.itype), ('E', self.eshape), ('etype', self.etype)]) 35 | return p 36 | 37 | def op(self): 38 | return self.op_ 39 | 40 | def mod(self): 41 | return self.mod_ 42 | 43 | def tc(self): 44 | return "-" 45 | 46 | def bytes(self): 47 | ishape = self.ishape 48 | itype = self.itype 49 | eshape = self.eshape 50 | etype = self.etype 51 | 52 | ielems = Utility.numElems(ishape) 53 | 54 | b = 0 55 | if self.dir == "fprop": 56 | #indices 57 | b += ielems * Utility.typeToBytes(itype) 58 | #read and write the embedding matrix 59 | b += ielems * eshape[1] * 2 * Utility.typeToBytes(etype) 60 | else: 61 | #3 times the size of the incoming gradient 62 | b = ielems * eshape[1] * 3 * Utility.typeToBytes(etype) 63 | 64 | if self.sub > 0: 65 | b = 0 66 | 67 | return b 68 | 69 | def flops(self): 70 | # Note: not implemented yet 71 | return 0 72 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/loss.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | #TODO: Add support for additional loss functions. 6 | 7 | class MSELoss(OperatorLayerBase): 8 | 9 | def __init__(self, d): 10 | marker = eval(d.argMarker[0]) 11 | mod = marker['mod'] 12 | op = marker['op'] 13 | args = marker['args'] 14 | 15 | self.marker = marker 16 | self.mod_ = mod 17 | self.op_ = op 18 | self.args = args 19 | 20 | assert (mod == "torch.nn.functional") 21 | assert (op == "mse_loss") 22 | assert (len(args) == 3) 23 | 24 | #Get input, target and reduction 25 | if (args[0]['name'] == ""): 26 | x = args[0] 27 | else: 28 | x = list(filter(lambda x : x['name'] == "input", args))[0] 29 | 30 | if (args[1]['name'] == ""): 31 | y = args[1] 32 | else: 33 | y = list(filter(lambda x : x['name'] == "target", args))[0] 34 | 35 | if (args[2]['name'] == ""): 36 | r = args[2] 37 | else: 38 | r = list(filter(lambda x : x['name'] == "reduction", args))[0] 39 | 40 | assert (x['type'] == y['type'] == "tensor") 41 | assert (x['shape'] == y['shape']) 42 | assert (x['dtype'] == y['dtype']) 43 | assert (r['type'] == "str") 44 | assert (r['value'] in ["none", "mean", "sum"]) 45 | 46 | self.shape = x['shape'] 47 | self.type = x['dtype'] 48 | self.red = r['value'] 49 | self.dir = d.dir 50 | 51 | def params(self): 52 | p = OrderedDict([('T', self.shape), ('type', self.type), ('red', self.red)]) 53 | return p 54 | 55 | def elems(self): 56 | red = self.red 57 | e = Utility.numElems(self.shape) 58 | 59 | if self.dir == "fprop": 60 | if red == "none": 61 | e *= 3 62 | else: 63 | e *= 2 64 | else: 65 | if red == "none": 66 | e *= 4 67 | else: 68 | e *= 3 69 | return e 70 | 71 | def bytes(self): 72 | return self.elems() * Utility.typeToBytes(self.type) 73 | 74 | def flops(self): 75 | return self.elems() * 2 + 1 76 | 77 | def tc(self): 78 | return "-" 79 | 80 | def op(self): 81 | return self.op_ 82 | 83 | def mod(self): 84 | return self.mod_ 85 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/normalization.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class BatchNorm(OperatorLayerBase): 6 | 7 | def __init__(self, d): 8 | marker = eval(d.argMarker[0]) 9 | mod = marker['mod'] 10 | op = marker['op'] 11 | args = marker['args'] 12 | 13 | self.marker = marker 14 | self.mod_ = mod 15 | self.op_ = op 16 | self.args = args 17 | 18 | assert (op == "batch_norm") 19 | assert (len(args) == 8) 20 | i = args[0] 21 | assert (i['type'] == "tensor") 22 | 23 | self.shape = i['shape'] 24 | self.type = i['dtype'] 25 | self.dir = d.dir 26 | 27 | def params(self): 28 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 29 | return p 30 | 31 | def tc(self): 32 | return "-" 33 | 34 | def op(self): 35 | return self.op_ 36 | 37 | def mod(self): 38 | return self.mod_ 39 | 40 | def elems(self): 41 | return Utility.numElems(self.shape) 42 | 43 | def flops(self): 44 | # Variance algo-dependent, but this is a reasonable value. 45 | return self.elems() * 8 46 | 47 | def bytes(self): 48 | e = self.elems() 49 | if self.dir == "fprop": 50 | e *= 4 51 | else: 52 | e *= 5 53 | 54 | return e * Utility.typeToBytes(self.type) 55 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/optim.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | #TODO: Add support for other optimizers. 6 | 7 | class Adam(OperatorLayerBase): 8 | 9 | def __init__(self, d): 10 | marker = eval(d.argMarker[0]) 11 | mod = marker['mod'] 12 | op = marker['op'] 13 | args = marker['args'] 14 | 15 | self.marker = marker 16 | self.mod_ = mod 17 | self.op_ = op 18 | self.args = args 19 | 20 | assert(op == "adam") 21 | assert (len(args) == 12) or (len(args) == 14) 22 | w, hw, m, v, g = args[0:5] 23 | assert (w['shape'] == m['shape'] == v['shape'] == g['shape']) 24 | assert (hw['shape'] == w['shape']) or (hw['shape'] == (0,)) #hw could be null 25 | assert (w['type'] == m['type'] == v['type'] == g['type'] == hw['type'] == "tensor") 26 | assert (w['dtype'] == m['dtype'] == v['dtype'] == "float32") 27 | 28 | self.w = w 29 | self.g = g 30 | 31 | def params(self): 32 | p = OrderedDict([('T',self.w['shape']), ('wtype',self.w['dtype']), ('gtype',self.g['dtype'])]) 33 | return p 34 | 35 | def flops(self): 36 | return 0 37 | 38 | def bytes(self): 39 | wshape = self.w['shape'] 40 | wtype = self.w['dtype'] 41 | gtype = self.g['dtype'] 42 | b = 0 43 | 44 | elems = Utility.numElems(wshape) 45 | 46 | #Get time to stream read/write w, m, v 47 | b += 6 * elems * Utility.typeToBytes(wtype) 48 | 49 | #Get time to read "g" 50 | b += elems * Utility.typeToBytes(gtype) 51 | 52 | if wtype != gtype: #mixed precision 53 | #Get time to write "hw 54 | b += elems * Utility.typeToBytes(gtype) 55 | 56 | return b 57 | 58 | def tc(self): 59 | return "-" 60 | 61 | def op(self): 62 | return self.op_ 63 | 64 | def mod(self): 65 | return self.mod_ 66 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/pooling.py: -------------------------------------------------------------------------------- 1 | from .collections import OrderedDict 2 | from .utility import Utility 3 | 4 | # Work in progress. 5 | 6 | #poolFuncs = ["max_pool2d_with_indices_forward", "max_pool2d_with_indices"] 7 | class MaxPool2d(object): 8 | 9 | def parse(marker): 10 | 11 | def convert2Tuple(arg): 12 | assert (arg['type'] in ["int", "tuple"]) 13 | if arg['type'] == "int": 14 | return (arg['value'], arg['value']) 15 | else: 16 | return arg['value'] 17 | 18 | mod = marker['mod'] 19 | op = marker['op'] 20 | args = marker['args'] 21 | assert (mod == "torch.nn.functional") 22 | assert (op == "max_pool2d") 23 | assert (len(args) >= 2) 24 | 25 | #input 26 | assert (args[0]['name'] == "") 27 | inp = args[0] 28 | assert (inp['type'] == "tensor") 29 | i = inp['shape'] 30 | t = inp['dtype'] 31 | assert (len(i) == 4) #nchw tensor 32 | 33 | #kernel 34 | if (args[1]['name'] == ""): 35 | k = args[1] 36 | else: 37 | k = list(filter(lambda x : x['name'] == "kernel_size", args))[0] 38 | k = convert2Tuple(k) 39 | 40 | #stride 41 | s = k #default value 42 | if ((len(args) >= 3) and args[2] == ""): 43 | s = args[2] 44 | s = convert2Tuple(s) 45 | elif any(x['name'] == "stride" for x in args): 46 | s = list(filter(lambda x : x['name'] == "stride", args))[0] 47 | s = convert2Tuple(s) 48 | 49 | #padding 50 | p = (0,0) 51 | if ((len(args) >= 4) and args[3] == ""): 52 | p = args[3] 53 | p = convert2Tuple(p) 54 | elif any(x['name'] == "padding" for x in args): 55 | p = list(filter(lambda x : x['name'] == "padding", args))[0] 56 | p = convert2Tuple(p) 57 | 58 | params = OrderedDict([('T', i), ('K', k), ('s',s), ('p',p), ('type', t)]) 59 | return params 60 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/randomSample.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class RandPerm(OperatorLayerBase): 6 | 7 | def __init__(self, d): 8 | marker = eval(d.argMarker[0]) 9 | mod = marker['mod'] 10 | op = marker['op'] 11 | args = marker['args'] 12 | 13 | self.marker = marker 14 | self.mod_ = mod 15 | self.op_ = op 16 | self.args = args 17 | 18 | assert (mod == "torch") 19 | assert (op == "randperm") 20 | assert (len(args) == 1) 21 | n = args[0] 22 | assert n['type'] == "int" 23 | self.n = n['value'] 24 | 25 | def params(self): 26 | p = OrderedDict([('N', self.n)]) 27 | return p 28 | 29 | def tc(self): 30 | return "-" 31 | 32 | def op(self): 33 | return self.op_ 34 | 35 | def mod(self): 36 | return self.mod_ 37 | 38 | def bytes(self): 39 | return self.n * Utility.typeToBytes("int64") 40 | 41 | def flops(self): 42 | # Depends on RNG but this is probably a reasonable assumption. 43 | return self.n * 3 44 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/softmax.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from .utility import Utility 3 | from .base import OperatorLayerBase 4 | 5 | class Softmax(OperatorLayerBase): 6 | 7 | def __init__(self, d): 8 | marker = eval(d.argMarker[0]) 9 | mod = marker['mod'] 10 | op = marker['op'] 11 | args = marker['args'] 12 | 13 | self.marker = marker 14 | self.mod_ = mod 15 | self.op_ = op 16 | self.args = args 17 | 18 | assert (mod == "torch.nn.functional") 19 | assert (op == "softmax") 20 | 21 | #Filter out named parameters 22 | args = list(filter(lambda x : x['name'] == '', args)) 23 | 24 | assert (len(args) <= 2) 25 | self.shape = args[0]['shape'] 26 | self.type = args[0]['dtype'] 27 | self.dir = d.dir 28 | 29 | return 30 | 31 | def op(self): 32 | return self.op_ 33 | 34 | def mod(self): 35 | return self.mod_ 36 | 37 | def tc(self): 38 | return "-" 39 | 40 | def params(self): 41 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 42 | return p 43 | 44 | def elems(self): 45 | return Utility.numElems(self.shape) 46 | 47 | def flops(self): 48 | # Note: exp, sum-reduce, divide 49 | #flops = elems * 3 50 | return 0 51 | 52 | def bytes(self): 53 | b = self.elems() * Utility.typeToBytes(self.type) 54 | b *= 3 if self.dir == "fprop" else 5 #verify 55 | return b 56 | 57 | class LogSoftmax(OperatorLayerBase): 58 | 59 | def __init__(self, d): 60 | marker = eval(d.argMarker[0]) 61 | mod = marker['mod'] 62 | op = marker['op'] 63 | args = marker['args'] 64 | 65 | self.marker = marker 66 | self.mod_ = mod 67 | self.op_ = op 68 | self.args = args 69 | 70 | assert (mod == "torch.nn.functional") 71 | assert (op == "log_softmax") 72 | 73 | #Filter out named parameters 74 | args = list(filter(lambda x : x['name'] == '', args)) 75 | 76 | assert (len(args) <= 2) 77 | 78 | #Get input 79 | if (args[0]['name'] == ""): 80 | i = args[0] 81 | else: 82 | i = list(filter(lambda x : x['name'] == "input", args))[0] 83 | 84 | t = i['dtype'] 85 | 86 | self.shape = i['shape'] 87 | self.type = i['dtype'] 88 | self.dir = d.dir 89 | return 90 | 91 | def op(self): 92 | return self.op_ 93 | 94 | def mod(self): 95 | return self.mod_ 96 | 97 | def tc(self): 98 | return "-" 99 | 100 | def params(self): 101 | p = OrderedDict([('T', self.shape), ('type', self.type)]) 102 | return p 103 | 104 | def elems(self): 105 | return Utility.numElems(self.shape) 106 | 107 | def flops(self): 108 | # Note: exp, sum-reduce, divide, log 109 | #flops = elems * 4 110 | return 0 111 | 112 | def bytes(self): 113 | b = self.elems() * Utility.typeToBytes(self.type) 114 | b *= 3 if self.dir == "fprop" else 5 #verify 115 | return b 116 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/usage.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | 4 | def parseArgs(): 5 | """ 6 | Print usage and parse arguments. 7 | """ 8 | 9 | def check_cols(value): 10 | valid = ["idx", "seq", "altseq", "tid", "layer", "trace", "dir", "sub", "mod", "op", "kernel", "params", "sil", "tc", "device", "stream", "grid", "block", "flops", "bytes"] 11 | cols = value.split(",") 12 | for col in cols: 13 | if col not in valid: 14 | raise argparse.ArgumentTypeError("{} is not a valid column name. Valid column names are {}.".format(col, ",".join(valid))) 15 | return cols 16 | 17 | def openFile(f): 18 | try: 19 | d = open(f, "r") 20 | return d 21 | except IOError: 22 | print("Error opening file {}. Exiting.".format(f), file=sys.stderr) 23 | sys.exit(1) 24 | 25 | parser = argparse.ArgumentParser(prog=sys.argv[0], description="PyTorch Profiler", formatter_class=argparse.RawTextHelpFormatter) 26 | parser.add_argument("file", 27 | nargs='?', 28 | type=str, 29 | default=None, 30 | help="Output of parse.py (Python dictionary).") 31 | 32 | parser.add_argument("-c", 33 | type=check_cols, 34 | default="idx,dir,sub,mod,op,kernel,params,sil", 35 | help='''Comma seperated names of columns to print. 36 | idx: Index 37 | seq: PyTorch Sequence Id 38 | altseq: PyTorch Alternate Sequence Id 39 | tid: Thread Id 40 | layer: User annotated NVTX string (can be nested) 41 | trace: Function Call Trace 42 | dir: Direction 43 | sub: Sub Sequence Id 44 | mod: Module 45 | op: Operattion 46 | kernel: Kernel Name 47 | params: Parameters 48 | sil: Silicon Time (in ns) 49 | tc: Tensor Core Usage 50 | device: GPU Device Id 51 | stream: Stream Id 52 | grid: Grid Dimensions 53 | block: Block Dimensions 54 | flops: Floating point ops (FMA = 2 FLOPs) 55 | bytes: Number of bytes in and out of DRAM 56 | e.g. -c idx,kernel,sil''') 57 | 58 | group = parser.add_mutually_exclusive_group() 59 | group.add_argument("--csv", 60 | action="store_true", 61 | default=False, 62 | help="Print a CSV output.") 63 | group.add_argument("-w", 64 | type=int, 65 | default=0, 66 | help="Width of columnated output.") 67 | 68 | args = parser.parse_args() 69 | if args.file is None: 70 | args.file = sys.stdin 71 | else: 72 | args.file = openFile(args.file) 73 | return args 74 | -------------------------------------------------------------------------------- /apex/apex/pyprof/prof/utility.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | 3 | class Utility(object): 4 | 5 | @staticmethod 6 | def numElems(shape): 7 | assert (type(shape) == tuple) 8 | return reduce(lambda x,y: x*y, shape, 1) 9 | 10 | @staticmethod 11 | def typeToBytes(t): 12 | if (t in ["uint8", "int8", "byte", "char", "bool"]): 13 | return 1 14 | elif (t in ["float16", "half", "int16", "short"]): 15 | return 2 16 | elif (t in ["float32", "float", "int32", "int"]): 17 | return 4 18 | elif (t in ["int64", "long", "float64", "double"]): 19 | return 8 20 | assert False 21 | 22 | @staticmethod 23 | def typeToString(t): 24 | if (t in ["uint8", "byte", "char",]): 25 | return "uint8" 26 | elif (t in ["int8",]): 27 | return "int8" 28 | elif (t in ["int16", "short",]): 29 | return "int16" 30 | elif (t in ["float16", "half"]): 31 | return "fp16" 32 | elif (t in ["float32", "float"]): 33 | return "fp32" 34 | elif (t in ["int32", "int",]): 35 | return "int32" 36 | elif (t in ["int64", "long"]): 37 | return "int64" 38 | elif (t in ["float64", "double",]): 39 | return "fp64" 40 | elif (t in ["bool",]): 41 | return "bool" 42 | assert False 43 | 44 | @staticmethod 45 | def hasNVTX(marker): 46 | if type(marker) is str: 47 | try: 48 | marker = eval(marker) 49 | except: 50 | return False 51 | 52 | if type(marker) is dict: 53 | keys = marker.keys() 54 | return ("mod" in keys) and ("op" in keys) and ("args" in keys) 55 | else: 56 | return False 57 | 58 | @staticmethod 59 | def isscalar(t): 60 | return (t in ["float", "int"]) 61 | -------------------------------------------------------------------------------- /apex/apex/reparameterization/README.md: -------------------------------------------------------------------------------- 1 | Under construction... 2 | -------------------------------------------------------------------------------- /apex/csrc/compat.h: -------------------------------------------------------------------------------- 1 | #ifndef TORCH_CHECK 2 | #define TORCH_CHECK AT_CHECK 3 | #endif 4 | 5 | #ifdef VERSION_GE_1_3 6 | #define DATA_PTR data_ptr 7 | #else 8 | #define DATA_PTR data 9 | #endif 10 | -------------------------------------------------------------------------------- /apex/csrc/flatten_unflatten.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | // https://github.com/pytorch/pytorch/blob/master/torch/csrc/utils/tensor_flatten.h 4 | 5 | at::Tensor flatten(std::vector tensors) 6 | { 7 | return torch::utils::flatten_dense_tensors(tensors); 8 | } 9 | 10 | std::vector unflatten(at::Tensor flat, std::vector tensors) 11 | { 12 | return torch::utils::unflatten_dense_tensors(flat, tensors); 13 | } 14 | 15 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 16 | m.def("flatten", &flatten, "Flatten dense tensors"); 17 | m.def("unflatten", &unflatten, "Unflatten dense tensors"); 18 | } 19 | -------------------------------------------------------------------------------- /apex/docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SPHINXPROJ = NVIDIAAPEX 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | gh-pages: 16 | git checkout gh-pages 17 | rm -rf build 18 | rm -rf source 19 | git checkout master -- . 20 | make html 21 | rm -rf ../_modules ../_sources ../_static 22 | mv -fv build/html/* ../ 23 | rm -rf build 24 | git add -A 25 | git commit -m "Generated gh-pages for `git log master -1 --pretty=short --abbrev-commit`" && git push origin gh-pages ; git checkout master 26 | 27 | .PHONY: help Makefile 28 | 29 | # Catch-all target: route all unknown targets to Sphinx using the new 30 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 31 | %: Makefile 32 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 33 | -------------------------------------------------------------------------------- /apex/docs/source/_static/css/pytorch_theme.css: -------------------------------------------------------------------------------- 1 | body { 2 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 3 | } 4 | 5 | /* Default header fonts are ugly */ 6 | h1, h2, .rst-content .toctree-wrapper p.caption, h3, h4, h5, h6, legend, p.caption { 7 | font-family: "Lato","proxima-nova","Helvetica Neue",Arial,sans-serif; 8 | } 9 | 10 | /* Use white for docs background */ 11 | .wy-side-nav-search { 12 | background-color: #fff; 13 | } 14 | 15 | .wy-nav-content-wrap, .wy-menu li.current > a { 16 | background-color: #fff; 17 | } 18 | 19 | @media screen and (min-width: 1400px) { 20 | .wy-nav-content-wrap { 21 | background-color: rgba(0, 0, 0, 0.0470588); 22 | } 23 | 24 | .wy-nav-content { 25 | background-color: #fff; 26 | } 27 | } 28 | 29 | /* Fixes for mobile */ 30 | .wy-nav-top { 31 | background-color: #fff; 32 | background-image: url('../img/apex.jpg'); 33 | background-repeat: no-repeat; 34 | background-position: center; 35 | padding: 0; 36 | margin: 0.4045em 0.809em; 37 | color: #333; 38 | } 39 | 40 | .wy-nav-top > a { 41 | display: none; 42 | } 43 | 44 | @media screen and (max-width: 768px) { 45 | .wy-side-nav-search>a img.logo { 46 | height: 60px; 47 | } 48 | } 49 | 50 | /* This is needed to ensure that logo above search scales properly */ 51 | .wy-side-nav-search a { 52 | display: block; 53 | } 54 | 55 | /* This ensures that multiple constructors will remain in separate lines. */ 56 | .rst-content dl:not(.docutils) dt { 57 | display: table; 58 | } 59 | 60 | /* Use our red for literals (it's very similar to the original color) */ 61 | .rst-content tt.literal, .rst-content tt.literal, .rst-content code.literal { 62 | color: #F05732; 63 | } 64 | 65 | .rst-content tt.xref, a .rst-content tt, .rst-content tt.xref, 66 | .rst-content code.xref, a .rst-content tt, a .rst-content code { 67 | color: #404040; 68 | } 69 | 70 | /* Change link colors (except for the menu) */ 71 | 72 | a { 73 | color: #F05732; 74 | } 75 | 76 | a:hover { 77 | color: #F05732; 78 | } 79 | 80 | 81 | a:visited { 82 | color: #D44D2C; 83 | } 84 | 85 | .wy-menu a { 86 | color: #b3b3b3; 87 | } 88 | 89 | .wy-menu a:hover { 90 | color: #b3b3b3; 91 | } 92 | 93 | /* Default footer text is quite big */ 94 | footer { 95 | font-size: 80%; 96 | } 97 | 98 | footer .rst-footer-buttons { 99 | font-size: 125%; /* revert footer settings - 1/80% = 125% */ 100 | } 101 | 102 | footer p { 103 | font-size: 100%; 104 | } 105 | 106 | /* For hidden headers that appear in TOC tree */ 107 | /* see http://stackoverflow.com/a/32363545/3343043 */ 108 | .rst-content .hidden-section { 109 | display: none; 110 | } 111 | 112 | nav .hidden-section { 113 | display: inherit; 114 | } 115 | 116 | .wy-side-nav-search>div.version { 117 | color: #000; 118 | } 119 | -------------------------------------------------------------------------------- /apex/docs/source/_static/img/nv-pytorch2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/docs/source/_static/img/nv-pytorch2.png -------------------------------------------------------------------------------- /apex/docs/source/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block sidebartitle %} {{ super() }} 3 | 4 | 32 | {% endblock %} 33 | 34 | {% block footer %} {{ super() }} 35 | 36 | 51 | {% endblock %} 52 | -------------------------------------------------------------------------------- /apex/docs/source/fp16_utils.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | apex.fp16_utils 5 | =================================== 6 | 7 | This submodule contains utilities designed to streamline the mixed precision training recipe 8 | presented by NVIDIA `on Parallel Forall`_ and in GTC 2018 Sessions 9 | `Training Neural Networks with Mixed Precision: Theory and Practice`_ and 10 | `Training Neural Networks with Mixed Precision: Real Examples`_. 11 | For Pytorch users, Real Examples in particular is recommended. 12 | 13 | Full runnable Python scripts demonstrating ``apex.fp16_utils`` 14 | can be found on the Github page: 15 | 16 | | `Simple FP16_Optimizer demos`_ 17 | | 18 | | `Distributed Mixed Precision Training with imagenet`_ 19 | | 20 | | `Mixed Precision Training with word_language_model`_ 21 | | 22 | | 23 | 24 | .. _`on Parallel Forall`: 25 | https://devblogs.nvidia.com/mixed-precision-training-deep-neural-networks/ 26 | .. _`Training Neural Networks with Mixed Precision: Theory and Practice`: 27 | http://on-demand.gputechconf.com/gtc/2018/video/S8923/ 28 | .. _`Training Neural Networks with Mixed Precision: Real Examples`: 29 | http://on-demand.gputechconf.com/gtc/2018/video/S81012/ 30 | .. _`Simple FP16_Optimizer demos`: 31 | https://github.com/NVIDIA/apex/tree/master/examples/FP16_Optimizer_simple 32 | .. _`Distributed Mixed Precision Training with imagenet`: 33 | https://github.com/NVIDIA/apex/tree/master/examples/imagenet 34 | .. _`Mixed Precision Training with word_language_model`: 35 | https://github.com/NVIDIA/apex/tree/master/examples/word_language_model 36 | 37 | .. automodule:: apex.fp16_utils 38 | .. currentmodule:: apex.fp16_utils 39 | 40 | Automatic management of master params + loss scaling 41 | ---------------------------------------------------- 42 | 43 | .. autoclass:: FP16_Optimizer 44 | :members: 45 | 46 | .. autoclass:: LossScaler 47 | :members: 48 | 49 | .. autoclass:: DynamicLossScaler 50 | :members: 51 | 52 | Manual master parameter management 53 | ---------------------------------- 54 | 55 | .. autofunction:: prep_param_lists 56 | 57 | .. autofunction:: master_params_to_model_params 58 | 59 | .. autofunction:: model_grads_to_master_grads 60 | -------------------------------------------------------------------------------- /apex/docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. PyTorch documentation master file, created by 2 | sphinx-quickstart on Fri Dec 23 13:31:47 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | :github_url: https://github.com/nvidia/apex 7 | 8 | Apex (A PyTorch Extension) 9 | =================================== 10 | 11 | This site contains the API documentation for Apex (https://github.com/nvidia/apex), 12 | a Pytorch extension with NVIDIA-maintained utilities to streamline mixed precision and distributed training. Some of the code here will be included in upstream Pytorch eventually. The intention of Apex is to make up-to-date utilities available to users as quickly as possible. 13 | 14 | Installation instructions can be found here: https://github.com/NVIDIA/apex#quick-start. 15 | 16 | Some other useful material, including GTC 2019 and Pytorch DevCon 2019 Slides, can be found here: https://github.com/mcarilli/mixed_precision_references. 17 | 18 | .. toctree:: 19 | :maxdepth: 1 20 | :caption: AMP: Automatic Mixed Precision 21 | 22 | amp 23 | 24 | .. toctree:: 25 | :maxdepth: 1 26 | :caption: Distributed Training 27 | 28 | parallel 29 | 30 | .. toctree:: 31 | :maxdepth: 1 32 | :caption: Fused Optimizers 33 | 34 | optimizers 35 | 36 | .. toctree:: 37 | :maxdepth: 1 38 | :caption: Fused Layer Norm 39 | 40 | layernorm 41 | 42 | .. .. toctree:: 43 | :maxdepth: 1 44 | :caption: Deprecated mixed precision API 45 | fp16_util 46 | 47 | .. reparameterization 48 | .. RNN 49 | 50 | Indices and tables 51 | ================== 52 | 53 | * :ref:`genindex` 54 | * :ref:`modindex` 55 | -------------------------------------------------------------------------------- /apex/docs/source/layernorm.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | apex.normalization.fused_layer_norm 5 | =================================== 6 | 7 | .. automodule:: apex.normalization 8 | .. currentmodule:: apex.normalization 9 | 10 | .. FusedAdam 11 | ---------- 12 | 13 | .. autoclass:: FusedLayerNorm 14 | :members: 15 | -------------------------------------------------------------------------------- /apex/docs/source/optimizers.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | apex.optimizers 5 | =================================== 6 | 7 | .. automodule:: apex.optimizers 8 | .. currentmodule:: apex.optimizers 9 | 10 | .. FusedAdam 11 | ---------- 12 | 13 | .. autoclass:: FusedAdam 14 | :members: 15 | 16 | .. autoclass:: FusedLAMB 17 | :members: 18 | 19 | .. autoclass:: FusedNovoGrad 20 | :members: 21 | 22 | .. autoclass:: FusedSGD 23 | :members: 24 | -------------------------------------------------------------------------------- /apex/docs/source/parallel.rst: -------------------------------------------------------------------------------- 1 | .. role:: hidden 2 | :class: hidden-section 3 | 4 | apex.parallel 5 | =================================== 6 | 7 | .. automodule:: apex.parallel 8 | .. currentmodule:: apex.parallel 9 | 10 | .. DistributedDataParallel 11 | ---------- 12 | 13 | .. autoclass:: DistributedDataParallel 14 | :members: 15 | 16 | .. autoclass:: Reducer 17 | :members: 18 | 19 | .. autoclass:: SyncBatchNorm 20 | :members: 21 | 22 | Utility functions 23 | ---------------------------------- 24 | 25 | .. autofunction:: convert_syncbn_model 26 | -------------------------------------------------------------------------------- /apex/examples/README.md: -------------------------------------------------------------------------------- 1 | This directory contains examples illustrating Apex mixed precision and distributed tools. 2 | 3 | **Note for users of the pre-unification API**: 4 | `deprecated_api` contains examples illustrating the old (pre-unified) APIs. These APIs will be removed soon, and users are strongly encouraged to switch. The separate mixed precision tools called `Amp` and `FP16_Optimizer` in the old API are exposed via different flags/optimization levels in the new API. 5 | -------------------------------------------------------------------------------- /apex/examples/dcgan/README.md: -------------------------------------------------------------------------------- 1 | # Mixed Precision DCGAN Training in PyTorch 2 | 3 | `main_amp.py` is based on [https://github.com/pytorch/examples/tree/master/dcgan](https://github.com/pytorch/examples/tree/master/dcgan). 4 | It implements Automatic Mixed Precision (Amp) training of the DCGAN example for different datasets. Command-line flags forwarded to `amp.initialize` are used to easily manipulate and switch between various pure and mixed precision "optimization levels" or `opt_level`s. For a detailed explanation of `opt_level`s, see the [updated API guide](https://nvidia.github.io/apex/amp.html). 5 | 6 | We introduce these changes to the PyTorch DCGAN example as described in the [Multiple models/optimizers/losses](https://nvidia.github.io/apex/advanced.html#multiple-models-optimizers-losses) section of the documentation:: 7 | ``` 8 | # Added after models and optimizers construction 9 | [netD, netG], [optimizerD, optimizerG] = amp.initialize( 10 | [netD, netG], [optimizerD, optimizerG], opt_level=opt.opt_level, num_losses=3) 11 | ... 12 | # loss.backward() changed to: 13 | with amp.scale_loss(errD_real, optimizerD, loss_id=0) as errD_real_scaled: 14 | errD_real_scaled.backward() 15 | ... 16 | with amp.scale_loss(errD_fake, optimizerD, loss_id=1) as errD_fake_scaled: 17 | errD_fake_scaled.backward() 18 | ... 19 | with amp.scale_loss(errG, optimizerG, loss_id=2) as errG_scaled: 20 | errG_scaled.backward() 21 | ``` 22 | 23 | Note that we use different `loss_scalers` for each computed loss. 24 | Using a separate loss scaler per loss is [optional, not required](https://nvidia.github.io/apex/advanced.html#optionally-have-amp-use-a-different-loss-scaler-per-loss). 25 | 26 | To improve the numerical stability, we swapped `nn.Sigmoid() + nn.BCELoss()` to `nn.BCEWithLogitsLoss()`. 27 | 28 | With the new Amp API **you never need to explicitly convert your model, or the input data, to half().** 29 | 30 | "Pure FP32" training: 31 | ``` 32 | $ python main_amp.py --opt_level O0 33 | ``` 34 | Recommended mixed precision training: 35 | ``` 36 | $ python main_amp.py --opt_level O1 37 | ``` 38 | 39 | Have a look at the original [DCGAN example](https://github.com/pytorch/examples/tree/master/dcgan) for more information about the used arguments. 40 | 41 | To enable mixed precision training, we introduce the `--opt_level` argument. 42 | -------------------------------------------------------------------------------- /apex/examples/docker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Base image must at least have pytorch and CUDA installed. 2 | ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:19.07-py3 3 | FROM $BASE_IMAGE 4 | ARG BASE_IMAGE 5 | RUN echo "Installing Apex on top of ${BASE_IMAGE}" 6 | # make sure we don't overwrite some existing directory called "apex" 7 | WORKDIR /tmp/unique_for_apex 8 | # uninstall Apex if present, twice to make absolutely sure :) 9 | RUN pip uninstall -y apex || : 10 | RUN pip uninstall -y apex || : 11 | # SHA is something the user can touch to force recreation of this Docker layer, 12 | # and therefore force cloning of the latest version of Apex 13 | RUN SHA=ToUcHMe git clone https://github.com/NVIDIA/apex.git 14 | WORKDIR /tmp/unique_for_apex/apex 15 | RUN pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . 16 | WORKDIR /workspace 17 | -------------------------------------------------------------------------------- /apex/examples/docker/README.md: -------------------------------------------------------------------------------- 1 | ## Option 1: Create a new container with Apex 2 | 3 | **Dockerfile** installs the latest Apex on top of an existing image. Run 4 | ``` 5 | docker build -t new_image_with_apex . 6 | ``` 7 | By default, **Dockerfile** uses NVIDIA's Pytorch container as the base image, 8 | which requires an NVIDIA GPU Cloud (NGC) account. If you don't have an NGC account, you can sign up for free by following the instructions [here](https://docs.nvidia.com/ngc/ngc-getting-started-guide/index.html#generating-api-key). 9 | 10 | Alternatively, you can supply your own base image via the `BASE_IMAGE` build-arg. 11 | `BASE_IMAGE` must have Pytorch and Cuda installed. For example, any 12 | `-devel` image for Pytorch 1.0 and later from the 13 | [official Pytorch Dockerhub](https://hub.docker.com/r/pytorch/pytorch) may be used: 14 | ``` 15 | docker build --build-arg BASE_IMAGE=1.3-cuda10.1-cudnn7-devel -t new_image_with_apex . 16 | ``` 17 | 18 | If you want to rebuild your image, and force the latest Apex to be cloned and installed, make any small change to the `SHA` variable in **Dockerfile**. 19 | 20 | **Warning:** 21 | Currently, the non-`-devel` images on Pytorch Dockerhub do not contain the Cuda compiler `nvcc`. Therefore, 22 | images whose name does not contain `-devel` are not eligible candidates for `BASE_IMAGE`. 23 | 24 | ### Running your Apex container 25 | 26 | Like any Cuda-enabled Pytorch container, a container with Apex should be run via [nvidia-docker](https://github.com/NVIDIA/nvidia-docker), for example: 27 | ``` 28 | docker run --runtime=nvidia -it --rm --ipc=host new_image_with_apex 29 | ``` 30 | 31 | ## Option 2: Install Apex in a running container 32 | 33 | Instead of building a new container, it is also a viable option to `git clone https://github.com/NVIDIA/apex.git` on bare metal, mount the Apex repo into your container at launch by running, for example, 34 | ``` 35 | docker run --runtime=nvidia -it --rm --ipc=host -v /bare/metal/apex:/apex/in/container 36 | ``` 37 | then go to /apex/in/container within the running container and 38 | ``` 39 | pip install -v --no-cache-dir --global-option="--cpp_ext" --global-option="--cuda_ext" . 40 | ``` 41 | -------------------------------------------------------------------------------- /apex/examples/simple/distributed/README.md: -------------------------------------------------------------------------------- 1 | **distributed_data_parallel.py** and **run.sh** show an example using Amp with 2 | [apex.parallel.DistributedDataParallel](https://nvidia.github.io/apex/parallel.html) or 3 | [torch.nn.parallel.DistributedDataParallel](https://pytorch.org/docs/stable/nn.html#distributeddataparallel) 4 | and the Pytorch multiprocess launcher script, 5 | [torch.distributed.launch](https://pytorch.org/docs/master/distributed.html#launch-utility). 6 | The use of `Amp` with DistributedDataParallel does not need to change from ordinary 7 | single-process use. The only gotcha is that wrapping your model with `DistributedDataParallel` must 8 | come after the call to `amp.initialize`. Test via 9 | ```bash 10 | bash run.sh 11 | ``` 12 | 13 | **This is intended purely as an instructional example, not a performance showcase.** 14 | -------------------------------------------------------------------------------- /apex/examples/simple/distributed/distributed_data_parallel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import argparse 3 | import os 4 | from apex import amp 5 | # FOR DISTRIBUTED: (can also use torch.nn.parallel.DistributedDataParallel instead) 6 | from apex.parallel import DistributedDataParallel 7 | 8 | parser = argparse.ArgumentParser() 9 | # FOR DISTRIBUTED: Parse for the local_rank argument, which will be supplied 10 | # automatically by torch.distributed.launch. 11 | parser.add_argument("--local_rank", default=0, type=int) 12 | args = parser.parse_args() 13 | 14 | # FOR DISTRIBUTED: If we are running under torch.distributed.launch, 15 | # the 'WORLD_SIZE' environment variable will also be set automatically. 16 | args.distributed = False 17 | if 'WORLD_SIZE' in os.environ: 18 | args.distributed = int(os.environ['WORLD_SIZE']) > 1 19 | 20 | if args.distributed: 21 | # FOR DISTRIBUTED: Set the device according to local_rank. 22 | torch.cuda.set_device(args.local_rank) 23 | 24 | # FOR DISTRIBUTED: Initialize the backend. torch.distributed.launch will provide 25 | # environment variables, and requires that you use init_method=`env://`. 26 | torch.distributed.init_process_group(backend='nccl', 27 | init_method='env://') 28 | 29 | torch.backends.cudnn.benchmark = True 30 | 31 | N, D_in, D_out = 64, 1024, 16 32 | 33 | # Each process receives its own batch of "fake input data" and "fake target data." 34 | # The "training loop" in each process just uses this fake batch over and over. 35 | # https://github.com/NVIDIA/apex/tree/master/examples/imagenet provides a more realistic 36 | # example of distributed data sampling for both training and validation. 37 | x = torch.randn(N, D_in, device='cuda') 38 | y = torch.randn(N, D_out, device='cuda') 39 | 40 | model = torch.nn.Linear(D_in, D_out).cuda() 41 | optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 42 | 43 | model, optimizer = amp.initialize(model, optimizer, opt_level="O1") 44 | 45 | if args.distributed: 46 | # FOR DISTRIBUTED: After amp.initialize, wrap the model with 47 | # apex.parallel.DistributedDataParallel. 48 | model = DistributedDataParallel(model) 49 | # torch.nn.parallel.DistributedDataParallel is also fine, with some added args: 50 | # model = torch.nn.parallel.DistributedDataParallel(model, 51 | # device_ids=[args.local_rank], 52 | # output_device=args.local_rank) 53 | 54 | loss_fn = torch.nn.MSELoss() 55 | 56 | for t in range(500): 57 | optimizer.zero_grad() 58 | y_pred = model(x) 59 | loss = loss_fn(y_pred, y) 60 | with amp.scale_loss(loss, optimizer) as scaled_loss: 61 | scaled_loss.backward() 62 | optimizer.step() 63 | 64 | if args.local_rank == 0: 65 | print("final loss = ", loss) 66 | -------------------------------------------------------------------------------- /apex/examples/simple/distributed/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m torch.distributed.launch --nproc_per_node=2 distributed_data_parallel.py 3 | -------------------------------------------------------------------------------- /apex/requirements.txt: -------------------------------------------------------------------------------- 1 | cxxfilt>=0.2.0 2 | tqdm>=4.28.1 3 | numpy>=1.15.3 4 | PyYAML>=5.1 5 | pytest>=3.5.1 6 | -------------------------------------------------------------------------------- /apex/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | -r requirements.txt 2 | flake8>=3.7.9 3 | Sphinx>=3.0.3 -------------------------------------------------------------------------------- /apex/tests/L0/run_amp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/tests/L0/run_amp/__init__.py -------------------------------------------------------------------------------- /apex/tests/L0/run_amp/test_larc.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import torch 4 | from torch import nn 5 | from torch.nn import Parameter 6 | 7 | from apex import amp 8 | from apex.parallel.LARC import LARC 9 | from utils import common_init 10 | 11 | 12 | class MyModel(torch.nn.Module): 13 | def __init__(self, unique): 14 | super(MyModel, self).__init__() 15 | self.weight0 = Parameter( 16 | unique + torch.arange(2, device="cuda", dtype=torch.float32) 17 | ) 18 | 19 | def forward(self, input): 20 | return (input * self.weight0).sum() 21 | 22 | 23 | class TestLARC(unittest.TestCase): 24 | def setUp(self): 25 | self.x = torch.ones((2), device="cuda", dtype=torch.float32) 26 | common_init(self) 27 | 28 | def tearDown(self): 29 | pass 30 | 31 | def test_larc_mixed_precision(self): 32 | for opt_level in ["O0", "O1", "O2", "O3"]: 33 | model = MyModel(1) 34 | 35 | optimizer = LARC( 36 | torch.optim.SGD( 37 | [{"params": model.parameters(), "lr": 0.25}], momentum=0.125 38 | ) 39 | ) 40 | 41 | model, optimizer = amp.initialize( 42 | model, optimizer, opt_level=opt_level, verbosity=0 43 | ) 44 | 45 | optimizer.zero_grad() 46 | loss = model(self.x) 47 | with amp.scale_loss(loss, optimizer) as scaled_loss: 48 | scaled_loss.backward() 49 | optimizer.step() 50 | 51 | 52 | if __name__ == "__main__": 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /apex/tests/L0/run_amp/test_promotion.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import itertools as it 4 | 5 | from apex import amp 6 | import torch 7 | from torch import nn 8 | import torch.nn.functional as F 9 | 10 | from utils import common_init, HALF, FLOAT, DTYPES 11 | 12 | class TestPromotion(unittest.TestCase): 13 | def setUp(self): 14 | self.handle = amp.init(enabled=True) 15 | common_init(self) 16 | 17 | def tearDown(self): 18 | self.handle._deactivate() 19 | 20 | def run_binary_promote_test(self, fns, input_shape, x_inplace=False): 21 | type_pairs = it.product(DTYPES, DTYPES) 22 | for fn, (xtype, ytype) in it.product(fns, type_pairs): 23 | x = torch.randn(input_shape, dtype=xtype).requires_grad_() 24 | x_leaf = x 25 | if x_inplace: 26 | # We need a non-leaf to call in place on 27 | x = x.clone() 28 | y = torch.randn(input_shape, dtype=ytype) 29 | out = fn(x, y) 30 | if x_inplace: 31 | # In place: always match xtype 32 | self.assertEqual(out.type(), x.type()) 33 | else: 34 | # Out of place: match widest type 35 | if xtype == torch.float or ytype == torch.float: 36 | self.assertEqual(out.type(), FLOAT) 37 | else: 38 | self.assertEqual(out.type(), HALF) 39 | out.float().sum().backward() 40 | self.assertEqual(x_leaf.grad.dtype, xtype) 41 | 42 | def test_atan2_matches_widest(self): 43 | fns = [lambda x, y : torch.atan2(x, y), 44 | lambda x, y : x.atan2(y)] 45 | self.run_binary_promote_test(fns, (self.b,)) 46 | 47 | def test_mul_matches_widest(self): 48 | fns = [lambda x, y : torch.mul(x, y), 49 | lambda x, y: x.mul(y)] 50 | self.run_binary_promote_test(fns, (self.b,)) 51 | 52 | def test_cat_matches_widest(self): 53 | shape = self.b 54 | ys = [torch.randn(shape, dtype=torch.half) for _ in range(5)] 55 | x_float = torch.randn(shape) 56 | out = torch.cat(ys + [x_float]) 57 | self.assertEqual(out.type(), FLOAT) 58 | x_half = torch.randn(shape, dtype=torch.half) 59 | out = torch.cat(ys + [x_half]) 60 | self.assertEqual(out.type(), HALF) 61 | 62 | def test_inplace_exp_is_error_for_half(self): 63 | xs = torch.randn(self.b) 64 | xs.exp_() 65 | self.assertEqual(xs.type(), FLOAT) 66 | xs = torch.randn(self.b, dtype=torch.half) 67 | with self.assertRaises(NotImplementedError): 68 | xs.exp_() 69 | 70 | def test_inplace_add_matches_self(self): 71 | fn = lambda x, y: x.add_(y) 72 | self.run_binary_promote_test([fn], (self.b,), x_inplace=True) 73 | 74 | if __name__ == '__main__': 75 | unittest.main() 76 | -------------------------------------------------------------------------------- /apex/tests/L0/run_amp/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | HALF = 'torch.cuda.HalfTensor' 4 | FLOAT = 'torch.cuda.FloatTensor' 5 | 6 | DTYPES = [torch.half, torch.float] 7 | 8 | ALWAYS_HALF = {torch.float: HALF, 9 | torch.half: HALF} 10 | ALWAYS_FLOAT = {torch.float: FLOAT, 11 | torch.half: FLOAT} 12 | MATCH_INPUT = {torch.float: FLOAT, 13 | torch.half: HALF} 14 | 15 | def common_init(test_case): 16 | test_case.h = 64 17 | test_case.b = 16 18 | test_case.c = 16 19 | test_case.k = 3 20 | test_case.t = 10 21 | torch.set_default_tensor_type(torch.cuda.FloatTensor) 22 | -------------------------------------------------------------------------------- /apex/tests/L0/run_fp16util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/tests/L0/run_fp16util/__init__.py -------------------------------------------------------------------------------- /apex/tests/L0/run_fp16util/test_fp16util.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | from apex.fp16_utils import FP16Model 7 | 8 | 9 | class DummyBlock(nn.Module): 10 | def __init__(self): 11 | super(DummyBlock, self).__init__() 12 | 13 | self.conv = nn.Conv2d(10, 10, 2) 14 | self.bn = nn.BatchNorm2d(10, affine=True) 15 | 16 | def forward(self, x): 17 | return self.conv(self.bn(x)) 18 | 19 | 20 | class DummyNet(nn.Module): 21 | def __init__(self): 22 | super(DummyNet, self).__init__() 23 | 24 | self.conv1 = nn.Conv2d(3, 10, 2) 25 | self.bn1 = nn.BatchNorm2d(10, affine=False) 26 | self.db1 = DummyBlock() 27 | self.db2 = DummyBlock() 28 | 29 | def forward(self, x): 30 | out = x 31 | out = self.conv1(out) 32 | out = self.bn1(out) 33 | out = self.db1(out) 34 | out = self.db2(out) 35 | return out 36 | 37 | 38 | class DummyNetWrapper(nn.Module): 39 | def __init__(self): 40 | super(DummyNetWrapper, self).__init__() 41 | 42 | self.bn = nn.BatchNorm2d(3, affine=True) 43 | self.dn = DummyNet() 44 | 45 | def forward(self, x): 46 | return self.dn(self.bn(x)) 47 | 48 | 49 | class TestFP16Model(unittest.TestCase): 50 | def setUp(self): 51 | self.N = 64 52 | self.C_in = 3 53 | self.H_in = 16 54 | self.W_in = 32 55 | self.in_tensor = torch.randn((self.N, self.C_in, self.H_in, self.W_in)).cuda() 56 | self.orig_model = DummyNetWrapper().cuda() 57 | self.fp16_model = FP16Model(self.orig_model) 58 | 59 | def test_params_and_buffers(self): 60 | exempted_modules = [ 61 | self.fp16_model.network.bn, 62 | self.fp16_model.network.dn.db1.bn, 63 | self.fp16_model.network.dn.db2.bn, 64 | ] 65 | for m in self.fp16_model.modules(): 66 | expected_dtype = torch.float if (m in exempted_modules) else torch.half 67 | for p in m.parameters(recurse=False): 68 | assert p.dtype == expected_dtype 69 | for b in m.buffers(recurse=False): 70 | assert b.dtype in (expected_dtype, torch.int64) 71 | 72 | def test_output_is_half(self): 73 | out_tensor = self.fp16_model(self.in_tensor) 74 | assert out_tensor.dtype == torch.half 75 | 76 | -------------------------------------------------------------------------------- /apex/tests/L0/run_fused_layer_norm/test_fused_layer_norm.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import os 3 | import random 4 | 5 | import torch 6 | import apex 7 | from torch.autograd import Variable 8 | 9 | 10 | class TestFusedLayerNorm(unittest.TestCase): 11 | def setUp(self): 12 | # bias and weight are set to 0 and 1 respectively, so no need to copy parameters from cpu module to the gpu one 13 | self.module_cpu_ = apex.normalization.FusedLayerNorm(normalized_shape=[32, 16], elementwise_affine=False).cpu() 14 | self.module_cuda_ = apex.normalization.FusedLayerNorm(normalized_shape=[32, 16], elementwise_affine=False).cuda() 15 | 16 | def _test_same_output(self, batch_size): 17 | torch.cuda.manual_seed(42) 18 | self.input_ = torch.randn((batch_size, *self.module_cpu_.normalized_shape), device="cpu").requires_grad_(True) 19 | self.input_cuda_ = self.input_.cuda().detach().requires_grad_(True) 20 | out_cpu_ = self.module_cpu_(self.input_) 21 | gO = torch.rand_like(out_cpu_) 22 | out_cpu_.backward(gO) 23 | out_cuda_ = self.module_cuda_(self.input_cuda_) 24 | gO = gO.cuda() 25 | out_cuda_.backward(gO) 26 | assert out_cpu_.is_cuda == False 27 | assert out_cuda_.is_cuda == True 28 | torch.testing.assert_allclose(out_cpu_, out_cuda_.cpu()) 29 | torch.testing.assert_allclose(self.input_.grad, self.input_cuda_.grad.cpu()) 30 | 31 | def test_layer_norm(self): 32 | self._test_same_output(16) 33 | 34 | def test_large_batch(self): 35 | self._test_same_output(65536) 36 | 37 | 38 | class TestFusedLayerNormElemWise(TestFusedLayerNorm): 39 | def setUp(self): 40 | self.module_cpu_ = apex.normalization.FusedLayerNorm(normalized_shape=[32, 16], elementwise_affine=True).cpu() 41 | self.module_cuda_ = apex.normalization.FusedLayerNorm(normalized_shape=[32, 16], elementwise_affine=True).cuda() 42 | 43 | -------------------------------------------------------------------------------- /apex/tests/L0/run_optimizers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/tests/L0/run_optimizers/__init__.py -------------------------------------------------------------------------------- /apex/tests/L0/run_pyprof_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/apex/tests/L0/run_pyprof_data/__init__.py -------------------------------------------------------------------------------- /apex/tests/L0/run_pyprof_nvtx/__init__.py: -------------------------------------------------------------------------------- 1 | import test_pyprof_nvtx.TestPyProfNvtx as TestPyProfNvtx 2 | -------------------------------------------------------------------------------- /apex/tests/L0/run_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import sys 3 | 4 | test_dirs = ["run_amp", "run_fp16util", "run_optimizers", "run_fused_layer_norm", "run_pyprof_nvtx", "run_pyprof_data", "run_mlp"] 5 | 6 | runner = unittest.TextTestRunner(verbosity=2) 7 | 8 | errcode = 0 9 | 10 | for test_dir in test_dirs: 11 | suite = unittest.TestLoader().discover(test_dir) 12 | 13 | print("\nExecuting tests from " + test_dir) 14 | 15 | result = runner.run(suite) 16 | 17 | if not result.wasSuccessful(): 18 | errcode = 1 19 | 20 | sys.exit(errcode) 21 | -------------------------------------------------------------------------------- /apex/tests/L1/common/compare.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | parser = argparse.ArgumentParser(description='Compare') 5 | parser.add_argument('--opt-level', type=str) 6 | parser.add_argument('--keep-batchnorm-fp32', type=str, default=None) 7 | parser.add_argument('--loss-scale', type=str, default=None) 8 | parser.add_argument('--fused-adam', action='store_true') 9 | parser.add_argument('--use_baseline', action='store_true') 10 | args = parser.parse_args() 11 | 12 | base_file = str(args.opt_level) + "_" +\ 13 | str(args.loss_scale) + "_" +\ 14 | str(args.keep_batchnorm_fp32) + "_" +\ 15 | str(args.fused_adam) 16 | 17 | file_e = "True_" + base_file 18 | file_p = "False_" + base_file 19 | if args.use_baseline: 20 | file_b = "baselines/True_" + base_file 21 | 22 | dict_e = torch.load(file_e) 23 | dict_p = torch.load(file_p) 24 | if args.use_baseline: 25 | dict_b = torch.load(file_b) 26 | 27 | torch.set_printoptions(precision=10) 28 | 29 | print(file_e) 30 | print(file_p) 31 | if args.use_baseline: 32 | print(file_b) 33 | 34 | # ugly duplication here... 35 | if not args.use_baseline: 36 | for n, (i_e, i_p) in enumerate(zip(dict_e["Iteration"], dict_p["Iteration"])): 37 | assert i_e == i_p, "i_e = {}, i_p = {}".format(i_e, i_p) 38 | 39 | loss_e = dict_e["Loss"][n] 40 | loss_p = dict_p["Loss"][n] 41 | assert loss_e == loss_p, "Iteration {}, loss_e = {}, loss_p = {}".format(i_e, loss_e, loss_p) 42 | print("{:4} {:15.10f} {:15.10f} {:15.10f} {:15.10f}".format( 43 | i_e, 44 | loss_e, 45 | loss_p, 46 | dict_e["Speed"][n], 47 | dict_p["Speed"][n])) 48 | else: 49 | for n, (i_e, i_p) in enumerate(zip(dict_e["Iteration"], dict_p["Iteration"])): 50 | assert i_e == i_p, "i_e = {}, i_p = {}".format(i_e, i_p) 51 | 52 | loss_e = dict_e["Loss"][n] 53 | loss_p = dict_p["Loss"][n] 54 | loss_b = dict_b["Loss"][n] 55 | assert loss_e == loss_p, "Iteration {}, loss_e = {}, loss_p = {}".format(i_e, loss_e, loss_p) 56 | assert loss_e == loss_b, "Iteration {}, loss_e = {}, loss_b = {}".format(i_e, loss_e, loss_b) 57 | print("{:4} {:15.10f} {:15.10f} {:15.10f} {:15.10f} {:15.10f} {:15.10f}".format( 58 | i_e, 59 | loss_b, 60 | loss_e, 61 | loss_p, 62 | dict_b["Speed"][n], 63 | dict_e["Speed"][n], 64 | dict_p["Speed"][n])) 65 | -------------------------------------------------------------------------------- /apex/tests/L1/cross_product/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # DATADIR="/home/mcarilli/Desktop/pt18data/apex_stale/examples/imagenet/bare_metal_train_val/" 4 | # DATADIR="/opt/home/apex/examples/imagenet/" 5 | cp ../common/* . 6 | bash run_test.sh single_gpu $1 7 | -------------------------------------------------------------------------------- /apex/tests/L1/cross_product_distributed/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cp ../common/* . 4 | bash run_test.sh distributed $1 5 | -------------------------------------------------------------------------------- /apex/tests/distributed/DDP/ddp_race_condition_test.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributed as dist 3 | from torch.nn import Parameter 4 | from torch.nn import Module 5 | from apex.parallel import DistributedDataParallel as DDP 6 | import argparse 7 | import os 8 | 9 | 10 | parser = argparse.ArgumentParser(description='allreduce hook example') 11 | parser.add_argument("--local_rank", default=0, type=int) 12 | args = parser.parse_args() 13 | 14 | args.distributed = False 15 | if 'WORLD_SIZE' in os.environ: 16 | args.distributed = int(os.environ['WORLD_SIZE']) > 1 17 | 18 | if args.distributed: 19 | args.gpu = args.local_rank % torch.cuda.device_count() 20 | torch.cuda.set_device(args.gpu) 21 | torch.distributed.init_process_group(backend='nccl', 22 | init_method='env://') 23 | args.world_size = torch.distributed.get_world_size() 24 | 25 | torch.set_printoptions(precision=10) 26 | torch.manual_seed(args.local_rank) 27 | 28 | class Model(Module): 29 | def __init__(self): 30 | super(Model, self).__init__() 31 | self.a = Parameter(torch.cuda.FloatTensor(4096*4096).fill_(1.0)) 32 | self.b = Parameter(torch.cuda.FloatTensor(4096*4096).fill_(2.0)) 33 | def forward(self, input): 34 | return (input*self.a)*self.b 35 | 36 | model = Model() 37 | # model = DDP(model, message_size=1, gradient_predivide_factor=8.0) 38 | # model = DDP(model, delay_allreduce=True) 39 | # model = DDP(model, message_size=1, allreduce_trigger_params=[model.b]) 40 | model = DDP(model, message_size=1, allreduce_trigger_params=[model.b], num_allreduce_streams=3) 41 | 42 | x = torch.cuda.FloatTensor(4096*4096) 43 | 44 | passed = True 45 | torch.cuda.cudart().cudaProfilerStart() 46 | for i in range(10): 47 | x.fill_(i + args.local_rank) # fill x with new values every iteration for sanity 48 | model.zero_grad() 49 | out = model(x) 50 | loss = out.sum() 51 | # torch.cuda.nvtx.range_push("backward") 52 | loss.backward() 53 | # torch.cuda.nvtx.range_pop() 54 | 55 | # torch.cuda.nvtx.range_push("synchronize() + info") 56 | # torch.cuda.synchronize() 57 | print("i = {}".format(i)) 58 | def info(name, param, val): 59 | expected = val*4096*4096*(2.*i+1)/2. 60 | actual = param.grad.data.sum().item() 61 | print(name+": grad.data_ptr() = {}, expected sum {}, got {}".format( 62 | param.grad.data_ptr(), expected, actual)) 63 | return (expected == actual) 64 | if not info("model.a", model.module.a, 2.): passed = False 65 | if not info("model.b", model.module.b, 1.): passed = False 66 | # torch.cuda.nvtx.range_pop() 67 | torch.cuda.cudart().cudaProfilerStop() 68 | 69 | print("passed = ", passed) 70 | -------------------------------------------------------------------------------- /apex/tests/distributed/DDP/run_race_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | CUDA_VISIBLE_DEVICES=0,1 python -m torch.distributed.launch --nproc_per_node=2 ddp_race_condition_test.py 4 | -------------------------------------------------------------------------------- /apex/tests/distributed/amp_master_params/compare.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | model_params_rank0 = torch.load("rank0model.pth", 4 | map_location = lambda storage, loc: storage.cuda(0)) 5 | model_params_rank1 = torch.load("rank1model.pth", 6 | map_location = lambda storage, loc: storage.cuda(0)) 7 | master_params_rank0 = torch.load("rank0master.pth", 8 | map_location = lambda storage, loc: storage.cuda(0)) 9 | master_params_rank1 = torch.load("rank1master.pth", 10 | map_location = lambda storage, loc: storage.cuda(0)) 11 | 12 | for model_rank0, model_rank1, master_rank0, master_rank1 in zip( 13 | model_params_rank0, 14 | model_params_rank1, 15 | master_params_rank0, 16 | master_params_rank1): 17 | assert torch.allclose(model_rank0, model_rank1), "Model param mismatch" 18 | assert torch.allclose(master_rank0, master_rank1), "Master param mismatch" 19 | # Some debugging/investigation assistance code: 20 | # maxval, maxind = torch.max(((torch.abs(model_rank0).float())/torch.abs(master_rank0)).view(-1), 0) 21 | # offending_val_half = model_rank0.view(-1)[maxind.item()] 22 | # offending_val_float = master_rank0.view(-1)[maxind.item()] 23 | # print(maxval.item(), maxind.item(), offending_val_half.item(), offending_val_float.item(), 24 | # offending_val_float.half().item()) 25 | # rtol needs to be > 2^-11 because of denormals... 26 | assert torch.allclose(model_rank0, master_rank0.half(), rtol=.005), "Model-master mismatch" 27 | 28 | print("OK: Model and master params match across ranks.") 29 | -------------------------------------------------------------------------------- /apex/tests/distributed/amp_master_params/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python -m torch.distributed.launch --nproc_per_node=2 amp_master_params.py 3 | 4 | python compare.py 5 | -------------------------------------------------------------------------------- /apex/tests/distributed/synced_batchnorm/test_batchnorm1d.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import apex 3 | 4 | model = apex.parallel.SyncBatchNorm(4).cuda() 5 | model.weight.data.uniform_() 6 | model.bias.data.uniform_() 7 | data = torch.rand((8,4)).cuda() 8 | 9 | model_ref = torch.nn.BatchNorm1d(4).cuda() 10 | model_ref.load_state_dict(model.state_dict()) 11 | data_ref = data.clone() 12 | 13 | output = model(data) 14 | output_ref = model_ref(data_ref) 15 | 16 | assert(output.allclose(output_ref)) 17 | assert(model.running_mean.allclose(model_ref.running_mean)) 18 | assert(model.running_var.allclose(model_ref.running_var)) 19 | -------------------------------------------------------------------------------- /apex/tests/distributed/synced_batchnorm/unit_test.sh: -------------------------------------------------------------------------------- 1 | python python_single_gpu_unit_test.py 2 | python single_gpu_unit_test.py 3 | python test_batchnorm1d.py 4 | python -m torch.distributed.launch --nproc_per_node=2 two_gpu_unit_test.py 5 | python -m torch.distributed.launch --nproc_per_node=2 two_gpu_unit_test.py --fp16 6 | python -m torch.distributed.launch --nproc_per_node=2 two_gpu_test_different_batch_size.py --apex 7 | #beware, you need a system with at least 4 gpus to test group_size`_ 17 | 18 | Dataset statistics: 19 | - identities: 1404 (train + query). 20 | - images:16522 (train) + 2228 (query) + 17661 (gallery). 21 | - cameras: 8. 22 | """ 23 | dataset_dir = 'dukemtmc-reid' 24 | dataset_url = 'http://vision.cs.duke.edu/DukeMTMC/data/misc/DukeMTMC-reID.zip' 25 | 26 | def __init__(self, root='', **kwargs): 27 | self.root = osp.abspath(osp.expanduser(root)) 28 | self.dataset_dir = osp.join(self.root, self.dataset_dir) 29 | self.download_dataset(self.dataset_dir, self.dataset_url) 30 | self.train_dir = osp.join( 31 | self.dataset_dir, 'DukeMTMC-reID/bounding_box_train' 32 | ) 33 | self.query_dir = osp.join(self.dataset_dir, 'DukeMTMC-reID/query') 34 | self.gallery_dir = osp.join( 35 | self.dataset_dir, 'DukeMTMC-reID/bounding_box_test' 36 | ) 37 | 38 | required_files = [ 39 | self.dataset_dir, self.train_dir, self.query_dir, self.gallery_dir 40 | ] 41 | self.check_before_run(required_files) 42 | 43 | train = self.process_dir(self.train_dir, relabel=True) 44 | query = self.process_dir(self.query_dir, relabel=False) 45 | gallery = self.process_dir(self.gallery_dir, relabel=False) 46 | 47 | super(DukeMTMCreID, self).__init__(train, query, gallery, **kwargs) 48 | 49 | def process_dir(self, dir_path, relabel=False): 50 | img_paths = glob.glob(osp.join(dir_path, '*.jpg')) 51 | pattern = re.compile(r'([-\d]+)_c(\d)') 52 | 53 | pid_container = set() 54 | for img_path in img_paths: 55 | pid, _ = map(int, pattern.search(img_path).groups()) 56 | pid_container.add(pid) 57 | pid2label = {pid: label for label, pid in enumerate(pid_container)} 58 | 59 | data = [] 60 | for img_path in img_paths: 61 | pid, camid = map(int, pattern.search(img_path).groups()) 62 | assert 1 <= camid <= 8 63 | camid -= 1 # index starts from 0 64 | if relabel: 65 | pid = pid2label[pid] 66 | data.append((img_path, pid, camid)) 67 | 68 | return data 69 | -------------------------------------------------------------------------------- /lreid/data/datasets/image/sensereid.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import copy 3 | import glob 4 | import os.path as osp 5 | 6 | from ..dataset import ImageDataset 7 | 8 | 9 | class SenseReID(ImageDataset): 10 | """SenseReID. 11 | 12 | This dataset is used for test purpose only. 13 | 14 | Reference: 15 | Zhao et al. Spindle Net: Person Re-identification with Human Body 16 | Region Guided Feature Decomposition and Fusion. CVPR 2017. 17 | 18 | URL: ``_ 19 | 20 | Dataset statistics: 21 | - query: 522 ids, 1040 images. 22 | - gallery: 1717 ids, 3388 images. 23 | """ 24 | dataset_dir = 'sensereid' 25 | dataset_url = None 26 | 27 | def __init__(self, root='', **kwargs): 28 | self.root = osp.abspath(osp.expanduser(root)) 29 | self.dataset_dir = osp.join(self.root, self.dataset_dir) 30 | self.download_dataset(self.dataset_dir, self.dataset_url) 31 | 32 | self.query_dir = osp.join(self.dataset_dir, 'SenseReID', 'test_probe') 33 | self.gallery_dir = osp.join( 34 | self.dataset_dir, 'SenseReID', 'test_gallery' 35 | ) 36 | 37 | required_files = [self.dataset_dir, self.query_dir, self.gallery_dir] 38 | self.check_before_run(required_files) 39 | 40 | query = self.process_dir(self.query_dir) 41 | gallery = self.process_dir(self.gallery_dir) 42 | 43 | # relabel 44 | g_pids = set() 45 | for _, pid, _ in gallery: 46 | g_pids.add(pid) 47 | pid2label = {pid: i for i, pid in enumerate(g_pids)} 48 | 49 | query = [ 50 | (img_path, pid2label[pid], camid) for img_path, pid, camid in query 51 | ] 52 | gallery = [ 53 | (img_path, pid2label[pid], camid) 54 | for img_path, pid, camid in gallery 55 | ] 56 | train = copy.deepcopy(query) + copy.deepcopy(gallery) # dummy variable 57 | 58 | super(SenseReID, self).__init__(train, query, gallery, **kwargs) 59 | 60 | def process_dir(self, dir_path): 61 | img_paths = glob.glob(osp.join(dir_path, '*.jpg')) 62 | data = [] 63 | 64 | for img_path in img_paths: 65 | img_name = osp.splitext(osp.basename(img_path))[0] 66 | pid, camid = img_name.split('_') 67 | pid, camid = int(pid), int(camid) 68 | data.append((img_path, pid, camid)) 69 | 70 | return data 71 | -------------------------------------------------------------------------------- /lreid/data/datasets/video/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .mars import Mars 4 | from .ilidsvid import iLIDSVID 5 | from .prid2011 import PRID2011 6 | from .dukemtmcvidreid import DukeMTMCVidReID 7 | -------------------------------------------------------------------------------- /lreid/data/datasets/video/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data/datasets/video/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data/datasets/video/__pycache__/dukemtmcvidreid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data/datasets/video/__pycache__/dukemtmcvidreid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data/datasets/video/__pycache__/ilidsvid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data/datasets/video/__pycache__/ilidsvid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data/datasets/video/__pycache__/mars.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data/datasets/video/__pycache__/mars.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data/datasets/video/__pycache__/prid2011.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data/datasets/video/__pycache__/prid2011.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .reid_loaders import ReIDLoaders 4 | from .incremental_reid_loaders import IncrementalReIDLoaders 5 | from .customed_loaders import CustomedLoaders 6 | from .transforms2 import RandomErasing 7 | from .incremental_datasets import IncrementalReIDDataSet, Incremental_combine_train_samples, Incremental_combine_test_samples -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/customed_loaders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/customed_loaders.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/dataset.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/incremental_datasets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/incremental_datasets.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/incremental_reid_loaders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/incremental_reid_loaders.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/loader.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/loader.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/reid_loaders.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/reid_loaders.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/__pycache__/transforms2.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/data_loader/__pycache__/transforms2.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/data_loader/customed_loaders.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torchvision 3 | from torchvision import transforms 4 | from .dataset import PersonReIDDataSet 5 | 6 | class CustomedLoaders: 7 | ''' 8 | load customed dataset 9 | query_path and gallery_path should have the following structure 10 | |____ data_path/ 11 | |____ person_id_1/ 12 | |____ 1.jpg 13 | |____ 2.jpg 14 | ...... 15 | |____ person_id_2/ 16 | |____ person_id_2/ 17 | ...... 18 | ''' 19 | 20 | def __init__(self, config): 21 | 22 | self.config = config 23 | self.query_path = config.query_path 24 | self.gallery_path = config.gallery_path 25 | self.transform_test = transforms.Compose([ 26 | transforms.Resize(config.image_size, interpolation=3), 27 | transforms.ToTensor(), 28 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 29 | ]) 30 | 31 | query_dataset = torchvision.datasets.ImageFolder(self.query_path) 32 | gallery_dataset = torchvision.datasets.ImageFolder(self.gallery_path) 33 | self.query_samples = [list(sample)+[1] for sample in query_dataset.samples] # set all camera id as 1 34 | self.gallery_samples = [list(sample)+[1] for sample in gallery_dataset.samples] # set all camera id as 1 35 | self.query_dataset = PersonReIDDataSet(self.query_samples, self.transform_test) 36 | self.gallery_dataset = PersonReIDDataSet(self.gallery_samples, self.transform_test) 37 | 38 | self.query_loader = \ 39 | torch.utils.data.DataLoader(self.query_dataset, batch_size=64, num_workers=8, drop_last=False, shuffle=False) 40 | self.gallery_loader = \ 41 | torch.utils.data.DataLoader(self.gallery_dataset, batch_size=64, num_workers=8, drop_last=False, shuffle=False) 42 | -------------------------------------------------------------------------------- /lreid/data_loader/transforms2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import math 4 | 5 | 6 | class RandomErasing(object): 7 | """ Randomly selects a rectangle region in an image and erases its pixels. 8 | 'Random Erasing Data Augmentation' by Zhong et al. 9 | See https://arxiv.org/pdf/1708.04896.pdf 10 | Args: 11 | probability: The probability that the Random Erasing operation will be performed. 12 | sl: Minimum proportion of erased area against input image. 13 | sh: Maximum proportion of erased area against input image. 14 | r1: Minimum aspect ratio of erased area. 15 | mean: Erasing value. 16 | """ 17 | 18 | def __init__(self, probability=0.5, sl=0.02, sh=0.4, r1=0.3, mean=(0.4914, 0.4822, 0.4465)): 19 | self.probability = probability 20 | self.mean = mean 21 | self.sl = sl 22 | self.sh = sh 23 | self.r1 = r1 24 | 25 | def __call__(self, img): 26 | 27 | if random.uniform(0, 1) >= self.probability: 28 | return img 29 | 30 | for attempt in range(100): 31 | area = img.size()[1] * img.size()[2] 32 | 33 | target_area = random.uniform(self.sl, self.sh) * area 34 | aspect_ratio = random.uniform(self.r1, 1 / self.r1) 35 | 36 | h = int(round(math.sqrt(target_area * aspect_ratio))) 37 | w = int(round(math.sqrt(target_area / aspect_ratio))) 38 | 39 | if w < img.size()[2] and h < img.size()[1]: 40 | x1 = random.randint(0, img.size()[1] - h) 41 | y1 = random.randint(0, img.size()[2] - w) 42 | if img.size()[0] == 3: 43 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 44 | img[1, x1:x1 + h, y1:y1 + w] = self.mean[1] 45 | img[2, x1:x1 + h, y1:y1 + w] = self.mean[2] 46 | else: 47 | img[0, x1:x1 + h, y1:y1 + w] = self.mean[0] 48 | return img 49 | 50 | return img -------------------------------------------------------------------------------- /lreid/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .grid import IncrementalSamples4grid 4 | from .prid import IncrementalSamples4prid 5 | from .ilids import IncrementalSamples4ilids 6 | from .viper import IncrementalSamples4viper 7 | from .cuhk01 import IncrementalSamples4cuhk01 8 | from .cuhk02 import IncrementalSamples4cuhk02 9 | from .cuhk03 import IncrementalSamples4cuhk03 10 | from .msmt17 import IncrementalSamples4msmt17 11 | from .sensereid import IncrementalSamples4sensereid 12 | from .market1501 import IncrementalSamples4market 13 | from .dukemtmcreid import IncrementalSamples4duke 14 | from .cuhksysu import IncrementalSamples4subcuhksysu 15 | from .mix import IncrementalSamples4mix 16 | -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/cuhk01.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/cuhk01.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/cuhk02.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/cuhk02.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/cuhk03.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/cuhk03.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/cuhksysu.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/cuhksysu.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/dukemtmcreid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/dukemtmcreid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/grid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/grid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/ilids.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/ilids.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/market1501.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/market1501.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/mix.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/mix.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/msmt17.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/msmt17.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/prid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/prid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/sensereid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/sensereid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/datasets/__pycache__/viper.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/datasets/__pycache__/viper.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .classification import accuracy 4 | from .reid import ReIDEvaluator, PrecisionRecall, np_cosine_dist, np_euclidean_dist 5 | from .rank import fast_evaluate_rank 6 | from .metric import tensor_euclidean_dist, tensor_cosine_dist 7 | from .distance import compute_distance_matrix -------------------------------------------------------------------------------- /lreid/evaluation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/__pycache__/classification.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/__pycache__/classification.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/__pycache__/distance.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/__pycache__/distance.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/__pycache__/metric.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/__pycache__/metric.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/__pycache__/rank.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/__pycache__/rank.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/__pycache__/reid.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/__pycache__/reid.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/classification.py: -------------------------------------------------------------------------------- 1 | def accuracy(output, target, topk=[1]): 2 | """Computes the precision@k for the specified values of k""" 3 | maxk = max(topk) 4 | batch_size = target.size(0) 5 | 6 | _, pred = output.topk(maxk, 1, True, True) 7 | pred = pred.t() 8 | correct = pred.eq(target.view(1, -1).expand_as(pred)) 9 | 10 | res = [] 11 | for k in topk: 12 | correct_k = correct[:k].view(-1).float().sum(0, keepdim=True) 13 | res.append(correct_k.mul_(100.0 / batch_size)) 14 | return res -------------------------------------------------------------------------------- /lreid/evaluation/distance.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function, absolute_import 2 | import torch 3 | from torch.nn import functional as F 4 | 5 | 6 | def compute_distance_matrix(input1, input2, metric='euclidean'): 7 | """A wrapper function for computing distance matrix. 8 | 9 | Args: 10 | input1 (torch.Tensor): 2-D feature matrix. 11 | input2 (torch.Tensor): 2-D feature matrix. 12 | metric (str, optional): "euclidean" or "cosine". 13 | Default is "euclidean". 14 | 15 | Returns: 16 | torch.Tensor: distance matrix. 17 | 18 | 19 | """ 20 | # check input 21 | assert isinstance(input1, torch.Tensor) 22 | assert isinstance(input2, torch.Tensor) 23 | assert input1.dim() == 2, 'Expected 2-D tensor, but got {}-D'.format( 24 | input1.dim() 25 | ) 26 | assert input2.dim() == 2, 'Expected 2-D tensor, but got {}-D'.format( 27 | input2.dim() 28 | ) 29 | assert input1.size(1) == input2.size(1) 30 | 31 | if metric == 'euclidean': 32 | distmat = euclidean_squared_distance(input1, input2) 33 | elif metric == 'cosine': 34 | distmat = cosine_distance(input1, input2) 35 | else: 36 | raise ValueError( 37 | 'Unknown distance metric: {}. ' 38 | 'Please choose either "euclidean" or "cosine"'.format(metric) 39 | ) 40 | 41 | return distmat 42 | 43 | 44 | def euclidean_squared_distance(input1, input2): 45 | """Computes euclidean squared distance. 46 | 47 | Args: 48 | input1 (torch.Tensor): 2-D feature matrix. 49 | input2 (torch.Tensor): 2-D feature matrix. 50 | 51 | Returns: 52 | torch.Tensor: distance matrix. 53 | """ 54 | m, n = input1.size(0), input2.size(0) 55 | mat1 = torch.pow(input1, 2).sum(dim=1, keepdim=True).expand(m, n) 56 | mat2 = torch.pow(input2, 2).sum(dim=1, keepdim=True).expand(n, m).t() 57 | distmat = mat1 + mat2 58 | distmat.addmm_(1, -2, input1, input2.t()) 59 | return distmat 60 | 61 | 62 | def cosine_distance(input1, input2): 63 | """Computes cosine distance. 64 | 65 | Args: 66 | input1 (torch.Tensor): 2-D feature matrix. 67 | input2 (torch.Tensor): 2-D feature matrix. 68 | 69 | Returns: 70 | torch.Tensor: distance matrix. 71 | """ 72 | input1_normed = F.normalize(input1, p=2, dim=1) 73 | input2_normed = F.normalize(input2, p=2, dim=1) 74 | distmat = 1 - torch.mm(input1_normed, input2_normed.t()) 75 | return distmat 76 | -------------------------------------------------------------------------------- /lreid/evaluation/metric.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | def tensor_cosine_dist(x, y): 5 | ''' 6 | compute cosine distance between two matrix x and y 7 | with size (n1, d) and (n2, d) and type torch.tensor 8 | return a matrix (n1, n2) 9 | ''' 10 | 11 | x = F.normalize(x, dim=1) 12 | y = F.normalize(y, dim=1) 13 | return torch.matmul(x, y.transpose(0,1)) 14 | 15 | 16 | def tensor_euclidean_dist(x, y): 17 | """ 18 | compute euclidean distance between two matrix x and y 19 | with size (n1, d) and (n2, d) and type torch.tensor 20 | return a matrix (n1, n2) 21 | """ 22 | m, n = x.size(0), y.size(0) 23 | xx = torch.pow(x, 2).sum(1, keepdim=True).expand(m, n) 24 | yy = torch.pow(y, 2).sum(1, keepdim=True).expand(n, m).t() 25 | dist = xx + yy 26 | dist.addmm_(1, -2, x, y.t()) 27 | dist = dist.clamp(min=1e-12).sqrt() # for numerical stability 28 | return dist 29 | -------------------------------------------------------------------------------- /lreid/evaluation/rank_cylib/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | $(PYTHON) setup.py build_ext --inplace 3 | rm -rf build 4 | clean: 5 | rm -rf build 6 | rm -f rank_cy.c *.so -------------------------------------------------------------------------------- /lreid/evaluation/rank_cylib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/rank_cylib/__init__.py -------------------------------------------------------------------------------- /lreid/evaluation/rank_cylib/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/rank_cylib/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/evaluation/rank_cylib/rank_cy.cpython-37m-x86_64-linux-gnu.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/evaluation/rank_cylib/rank_cy.cpython-37m-x86_64-linux-gnu.so -------------------------------------------------------------------------------- /lreid/evaluation/rank_cylib/setup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from distutils.core import setup 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | 7 | def numpy_include(): 8 | try: 9 | numpy_include = np.get_include() 10 | except AttributeError: 11 | numpy_include = np.get_numpy_include() 12 | return numpy_include 13 | 14 | 15 | ext_modules = [ 16 | Extension( 17 | 'rank_cy', 18 | ['rank_cy.pyx'], 19 | include_dirs=[numpy_include()], 20 | ) 21 | ] 22 | 23 | setup( 24 | name='Cython-based reid evaluation code', 25 | ext_modules=cythonize(ext_modules) 26 | ) 27 | -------------------------------------------------------------------------------- /lreid/evaluation/rank_cylib/test_cython.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import numpy as np 4 | import timeit 5 | import os.path as osp 6 | 7 | from lreid import metrics 8 | 9 | sys.path.insert(0, osp.dirname(osp.abspath(__file__)) + '/../../..') 10 | """ 11 | Test the speed of cython-based evaluation code. The speed improvements 12 | can be much bigger when using the real reid data, which contains a larger 13 | amount of query and gallery images. 14 | 15 | Note: you might encounter the following error: 16 | 'AssertionError: Error: all query identities do not appear in gallery'. 17 | This is normal because the inputs are random numbers. Just try again. 18 | """ 19 | 20 | print('*** Compare running time ***') 21 | 22 | setup = ''' 23 | import sys 24 | import os.path as osp 25 | import numpy as np 26 | sys.path.insert(0, osp.dirname(osp.abspath(__file__)) + '/../../..') 27 | from lreid import metrics 28 | num_q = 30 29 | num_g = 300 30 | max_rank = 5 31 | distmat = np.random.rand(num_q, num_g) * 20 32 | q_pids = np.random.randint(0, num_q, size=num_q) 33 | g_pids = np.random.randint(0, num_g, size=num_g) 34 | q_camids = np.random.randint(0, 5, size=num_q) 35 | g_camids = np.random.randint(0, 5, size=num_g) 36 | ''' 37 | 38 | print('=> Using market1501\'s metric') 39 | pytime = timeit.timeit( 40 | 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=False)', 41 | setup=setup, 42 | number=20 43 | ) 44 | cytime = timeit.timeit( 45 | 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=True)', 46 | setup=setup, 47 | number=20 48 | ) 49 | print('Python time: {} s'.format(pytime)) 50 | print('Cython time: {} s'.format(cytime)) 51 | print('Cython is {} times faster than python\n'.format(pytime / cytime)) 52 | 53 | print('=> Using cuhk03\'s metric') 54 | pytime = timeit.timeit( 55 | 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03=True, use_cython=False)', 56 | setup=setup, 57 | number=20 58 | ) 59 | cytime = timeit.timeit( 60 | 'metrics.evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03=True, use_cython=True)', 61 | setup=setup, 62 | number=20 63 | ) 64 | print('Python time: {} s'.format(pytime)) 65 | print('Cython time: {} s'.format(cytime)) 66 | print('Cython is {} times faster than python\n'.format(pytime / cytime)) 67 | """ 68 | print("=> Check precision") 69 | 70 | num_q = 30 71 | num_g = 300 72 | max_rank = 5 73 | distmat = np.random.rand(num_q, num_g) * 20 74 | q_pids = np.random.randint(0, num_q, size=num_q) 75 | g_pids = np.random.randint(0, num_g, size=num_g) 76 | q_camids = np.random.randint(0, 5, size=num_q) 77 | g_camids = np.random.randint(0, 5, size=num_g) 78 | 79 | cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=False) 80 | print("Python:\nmAP = {} \ncmc = {}\n".format(mAP, cmc)) 81 | cmc, mAP = evaluate(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=True) 82 | print("Cython:\nmAP = {} \ncmc = {}\n".format(mAP, cmc)) 83 | """ 84 | -------------------------------------------------------------------------------- /lreid/losses/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .loss import TripletLoss, CrossEntropyLabelSmooth, VAE_Kl_Loss, PlasticityLoss 4 | from .ranked_list_loss import RankedLoss -------------------------------------------------------------------------------- /lreid/losses/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/losses/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/losses/__pycache__/loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/losses/__pycache__/loss.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/losses/__pycache__/ranked_list_loss.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/losses/__pycache__/ranked_list_loss.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/methods/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from . import meta_template 4 | from . import maml 5 | -------------------------------------------------------------------------------- /lreid/methods/drop_grad.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | class DropGrad(torch.nn.Module): 5 | def __init__(self, method='gaussian', rate=0.1, schedule='constant'): 6 | super(DropGrad, self).__init__() 7 | self.method = method 8 | self.rate = rate if self.method != 'gaussian' else np.sqrt(rate/(1 - rate)) 9 | self.schedule = schedule 10 | 11 | def update_rate(self, epoch ,stop_epoch): 12 | if self.schedule == 'constant': 13 | self.cur_rate = self.rate 14 | elif self.schedule == 'linear': 15 | self.cur_rate = self.rate * epoch / (stop_epoch - 1) 16 | else: 17 | raise Exception('no such DropGrad schedule') 18 | 19 | def forward(self, input): 20 | if self.method == 'binary': 21 | output = input * (torch.gt(torch.rand_like(input), self.cur_rate).float() * (1 / (1 - self.cur_rate))) 22 | elif self.method == 'gaussian': 23 | output = input * torch.normal(mean=torch.ones_like(input), std=torch.ones_like(input)*self.cur_rate) 24 | elif self.method == 'none': 25 | output = input 26 | else: 27 | raise Exception('no such DropGrad method') 28 | return output 29 | -------------------------------------------------------------------------------- /lreid/models/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .LwFnet import LwFNet, LwFNet_without_bn_bias, LwFNet_without_bn 4 | from .metagraph_fd import MetaGraph_fd, FixedMetaGraph 5 | 6 | -------------------------------------------------------------------------------- /lreid/models/__pycache__/LwFnet.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/models/__pycache__/LwFnet.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/models/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/models/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/models/__pycache__/bnneck.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/models/__pycache__/bnneck.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/models/__pycache__/metagraph_fd.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/models/__pycache__/metagraph_fd.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/operation/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | from .train_p_s import train_p_s_an_epoch 3 | from .test_continual_operation_neck import plot_prerecall_curve, test_continual_neck, fast_test_continual_neck, output_featuremaps_from_fixed 4 | from .test_p_s import fast_test_p_s, save_and_fast_test_p_s 5 | -------------------------------------------------------------------------------- /lreid/operation/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/operation/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/operation/__pycache__/test_continual_operation_neck.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/operation/__pycache__/test_continual_operation_neck.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/operation/__pycache__/test_p_s.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/operation/__pycache__/test_p_s.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/operation/__pycache__/train_p_s.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/operation/__pycache__/train_p_s.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .utils import * 4 | from .meter import * -------------------------------------------------------------------------------- /lreid/tools/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/tools/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/tools/__pycache__/meter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/tools/__pycache__/meter.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/tools/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/tools/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/tools/mean_variance.py: -------------------------------------------------------------------------------- 1 | """ 2 | Compute channel-wise mean and standard deviation of a dataset. 3 | Usage: 4 | $ python compute_mean_std.py DATASET_ROOT DATASET_KEY 5 | - The first argument points to the root path where you put the datasets. 6 | - The second argument means the specific dataset key. 7 | For instance, your datasets are put under $DATA and you wanna 8 | compute the statistics of Market1501, do 9 | $ python compute_mean_std.py $DATA market1501 10 | """ 11 | import argparse 12 | 13 | import lreid 14 | 15 | 16 | def main(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('root', type=str) 19 | parser.add_argument('sources', type=str) 20 | args = parser.parse_args() 21 | 22 | datamanager = lreid.data.ImageDataManager( 23 | root=args.root, 24 | sources=args.sources, 25 | targets=None, 26 | height=256, 27 | width=128, 28 | batch_size_train=100, 29 | batch_size_test=100, 30 | transforms=None, 31 | norm_mean=[0., 0., 0.], 32 | norm_std=[1., 1., 1.], 33 | train_sampler='SequentialSampler' 34 | ) 35 | train_loader = datamanager.train_loader 36 | 37 | print('Computing mean and std ...') 38 | mean = 0. 39 | std = 0. 40 | n_samples = 0. 41 | for data in train_loader: 42 | data = data['img'] 43 | batch_size = data.size(0) 44 | data = data.view(batch_size, data.size(1), -1) 45 | mean += data.mean(2).sum(0) 46 | std += data.std(2).sum(0) 47 | n_samples += batch_size 48 | 49 | mean /= n_samples 50 | std /= n_samples 51 | print('Mean: {}'.format(mean)) 52 | print('Std: {}'.format(std)) 53 | 54 | 55 | if __name__ == '__main__': 56 | main() -------------------------------------------------------------------------------- /lreid/tools/meter.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class CatMeter: 5 | ''' 6 | Concatenate Meter for torch.Tensor 7 | ''' 8 | def __init__(self): 9 | self.reset() 10 | 11 | def reset(self): 12 | self.val = None 13 | 14 | def update(self, val): 15 | if self.val is None: 16 | self.val = val 17 | else: 18 | self.val = torch.cat([self.val, val], dim=0) 19 | def get_val(self): 20 | return self.val 21 | 22 | def get_val_numpy(self): 23 | return self.val.data.cpu().numpy() 24 | 25 | 26 | class MultiItemAverageMeter: 27 | 28 | def __init__(self): 29 | self.content = {} 30 | 31 | def update(self, val): 32 | ''' 33 | :param val: dict, keys are strs, values are torch.Tensor or np.array 34 | ''' 35 | for key in list(val.keys()): 36 | value = val[key] 37 | if key not in list(self.content.keys()): 38 | self.content[key] = {'avg': value, 'sum': value, 'count': 1.0} 39 | else: 40 | self.content[key]['sum'] += value 41 | self.content[key]['count'] += 1.0 42 | self.content[key]['avg'] = self.content[key]['sum'] / self.content[key]['count'] 43 | 44 | def get_val(self): 45 | keys = list(self.content.keys()) 46 | values = [] 47 | for key in keys: 48 | try: 49 | values.append(self.content[key]['avg'].data.cpu().numpy()) 50 | except: 51 | values.append(self.content[key]['avg']) 52 | return keys, values 53 | 54 | def get_value_dict(self): 55 | keys = list(self.content.keys()) 56 | result_dict = {} 57 | for key in keys: 58 | try: 59 | result_dict[key] = self.content[key]['avg'].data.cpu().numpy() 60 | except: 61 | result_dict[key] = self.content[key]['avg'] 62 | return result_dict 63 | 64 | def get_str(self): 65 | 66 | result = '' 67 | keys, values = self.get_val() 68 | 69 | for key, value in zip(keys, values): 70 | result += key 71 | result += ': ' 72 | result += str(value) 73 | result += '; ' 74 | 75 | return result 76 | 77 | -------------------------------------------------------------------------------- /lreid/tools/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | 5 | def os_walk(folder_dir): 6 | for root, dirs, files in os.walk(folder_dir): 7 | files = sorted(files, reverse=True) 8 | dirs = sorted(dirs, reverse=True) 9 | return root, dirs, files 10 | 11 | 12 | def time_now(): 13 | '''return current time in format of 2000-01-01 12:01:01''' 14 | return time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()) 15 | 16 | 17 | def make_dirs(dir): 18 | if not os.path.exists(dir): 19 | os.makedirs(dir) 20 | print('Successfully make dirs: {}'.format(dir)) 21 | else: 22 | print('Existed dirs: {}'.format(dir)) 23 | 24 | 25 | -------------------------------------------------------------------------------- /lreid/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from .tools import * 4 | from .rerank import re_ranking 5 | from .loggers import * 6 | from .avgmeter import * 7 | from .reidtools import * 8 | from .torchtools import * 9 | from .model_complexity import compute_model_complexity 10 | -------------------------------------------------------------------------------- /lreid/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/avgmeter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/avgmeter.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/loggers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/loggers.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/model_complexity.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/model_complexity.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/reidtools.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/reidtools.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/rerank.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/rerank.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/tools.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/tools.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/__pycache__/torchtools.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/utils/__pycache__/torchtools.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/utils/avgmeter.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import 2 | from collections import defaultdict 3 | import torch 4 | 5 | __all__ = ['AverageMeter', 'MetricMeter'] 6 | 7 | 8 | class AverageMeter(object): 9 | """Computes and stores the average and current value. 10 | 11 | Examples:: 12 | >>> # Initialize a meter to record loss 13 | >>> losses = AverageMeter() 14 | >>> # Update meter after every minibatch update 15 | >>> losses.update(loss_value, batch_size) 16 | """ 17 | 18 | def __init__(self): 19 | self.reset() 20 | 21 | def reset(self): 22 | self.val = 0 23 | self.avg = 0 24 | self.sum = 0 25 | self.count = 0 26 | 27 | def update(self, val, n=1): 28 | self.val = val 29 | self.sum += val * n 30 | self.count += n 31 | self.avg = self.sum / self.count 32 | 33 | 34 | class MetricMeter(object): 35 | """A collection of metrics. 36 | 37 | Source: https://github.com/KaiyangZhou/Dassl.pytorch 38 | 39 | Examples:: 40 | >>> # 1. Create an instance of MetricMeter 41 | >>> metric = MetricMeter() 42 | >>> # 2. Update using a dictionary as input 43 | >>> input_dict = {'loss_1': value_1, 'loss_2': value_2} 44 | >>> metric.update(input_dict) 45 | >>> # 3. Convert to string and print 46 | >>> print(str(metric)) 47 | """ 48 | 49 | def __init__(self, delimiter='\t'): 50 | self.meters = defaultdict(AverageMeter) 51 | self.delimiter = delimiter 52 | 53 | def update(self, input_dict): 54 | if input_dict is None: 55 | return 56 | 57 | if not isinstance(input_dict, dict): 58 | raise TypeError( 59 | 'Input to MetricMeter.update() must be a dictionary' 60 | ) 61 | 62 | for k, v in input_dict.items(): 63 | if isinstance(v, torch.Tensor): 64 | v = v.item() 65 | self.meters[k].update(v) 66 | 67 | def __str__(self): 68 | output_str = [] 69 | for name, meter in self.meters.items(): 70 | output_str.append( 71 | '{} {:.4f} ({:.4f})'.format(name, meter.val, meter.avg) 72 | ) 73 | return self.delimiter.join(output_str) 74 | -------------------------------------------------------------------------------- /lreid/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, absolute_import 2 | 3 | from .visualize import visualize 4 | from .visualising_rank import visualize_ranked_results 5 | from .logger import VisdomPlotLogger, Logger, VisdomFeatureMapsLogger 6 | from .visualize_featuremap import featuremaps2heatmaps -------------------------------------------------------------------------------- /lreid/visualization/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/visualization/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/visualization/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/visualization/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/visualization/__pycache__/visualising_rank.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/visualization/__pycache__/visualising_rank.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/visualization/__pycache__/visualize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/visualization/__pycache__/visualize.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/visualization/__pycache__/visualize_featuremap.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/TPCD/LifelongReID/cb33f9c29fe398e7546db345fab1c338dda8252f/lreid/visualization/__pycache__/visualize_featuremap.cpython-37.pyc -------------------------------------------------------------------------------- /lreid/visualization/visualize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from lreid.tools import CatMeter 3 | from lreid.evaluation.metric import tensor_cosine_dist, tensor_euclidean_dist 4 | from .visualising_rank import visualize_ranked_results 5 | 6 | 7 | def visualize(config, base, loaders): 8 | 9 | base.set_eval() 10 | 11 | # meters 12 | query_features_meter, query_pids_meter, query_cids_meter = CatMeter(), CatMeter(), CatMeter() 13 | gallery_features_meter, gallery_pids_meter, gallery_cids_meter = CatMeter(), CatMeter(), CatMeter() 14 | 15 | # init dataset 16 | if config.visualize_dataset == 'market': 17 | _datasets = [loaders.market_query_samples, loaders.market_gallery_samples] 18 | _loaders = [loaders.market_query_loader, loaders.market_gallery_loader] 19 | elif config.visualize_dataset == 'duke': 20 | _datasets = [loaders.duke_query_samples, loaders.duke_gallery_samples] 21 | _loaders = [loaders.duke_query_loader, loaders.duke_gallery_loader] 22 | elif config.visualize_dataset == 'customed': 23 | _datasets = [loaders.query_samples, loaders.gallery_samples] 24 | _loaders = [loaders.query_loader, loaders.gallery_loader] 25 | 26 | # compute query and gallery features 27 | with torch.no_grad(): 28 | for loader_id, loader in enumerate(_loaders): 29 | for data in loader: 30 | # compute feautres 31 | images, pids, cids = data 32 | images = images.cuda() 33 | features = base.model(images) 34 | # save as query features 35 | if loader_id == 0: 36 | query_features_meter.update(features.data) 37 | query_pids_meter.update(pids) 38 | query_cids_meter.update(cids) 39 | # save as gallery features 40 | elif loader_id == 1: 41 | gallery_features_meter.update(features.data) 42 | gallery_pids_meter.update(pids) 43 | gallery_cids_meter.update(cids) 44 | 45 | # compute distance 46 | query_features = query_features_meter.get_val() 47 | gallery_features = gallery_features_meter.get_val() 48 | 49 | if config.test_metric is 'cosine': 50 | distance = tensor_cosine_dist(query_features, gallery_features).data.cpu().numpy() 51 | 52 | elif config.test_metric is 'euclidean': 53 | distance = tensor_euclidean_dist(query_features, gallery_features).data.cpu().numpy() 54 | 55 | # visualize 56 | visualize_ranked_results(distance, _datasets, config.visualize_output_path, mode=config.visualize_mode, only_show=config.visualize_mode_onlyshow) 57 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from setuptools import setup, find_packages 3 | from distutils.extension import Extension 4 | from Cython.Build import cythonize 5 | 6 | def numpy_include(): 7 | try: 8 | numpy_include = np.get_include() 9 | except AttributeError: 10 | numpy_include = np.get_numpy_include() 11 | return numpy_include 12 | 13 | 14 | ext_modules = [ 15 | Extension( 16 | 'lreid.evaluation.rank_cylib.rank_cy', 17 | ['lreid/evaluation/rank_cylib/rank_cy.pyx'], 18 | include_dirs=[numpy_include()], 19 | ) 20 | ] 21 | __version__ = '1.0.0' 22 | 23 | setup( 24 | name='lreid', 25 | version='1.0.0', 26 | description='A library for Lifelong Person Re-Identification in PyTorch', 27 | author='Nan Pu', 28 | license='MIT', 29 | packages=find_packages(), 30 | keywords=['Person Re-Identification', 'Deep Learning', 'Computer Vision'], 31 | ext_modules=cythonize(ext_modules) 32 | ) 33 | --------------------------------------------------------------------------------