├── LICENSE
├── README.md
├── agent
    ├── dreamer.py
    ├── dreamer.yaml
    ├── dreamer_utils.py
    ├── genrl.py
    ├── genrl.yaml
    ├── plan2explore.py
    ├── plan2explore.yaml
    └── video_utils.py
├── assets
    ├── GenRL_cover.gif
    ├── dashboard.png
    ├── stickman_run.gif
    └── video_samples
    │   ├── a_spider_walking_on_the_floor.mp4
    │   ├── backflip.mp4
    │   ├── dancing.mp4
    │   ├── dead_spider_white.gif
    │   ├── dog_running_seen_from_the_side.mp4
    │   ├── doing_splits.mp4
    │   ├── flex.mp4
    │   ├── guy_walking.gif
    │   ├── headstand.mp4
    │   ├── karate_kick.mp4
    │   ├── lying_down_with_legs_up.mp4
    │   ├── open_microwave.gif
    │   ├── person_standing_up_with_hands_up_seen_from_the_side.mp4
    │   ├── punching.mp4
    │   └── spider_draw.gif
├── collect_data.py
├── collect_data.yaml
├── conf
    ├── defaults
    │   ├── dreamer_v2.yaml
    │   ├── dreamer_v3.yaml
    │   └── genrl.yaml
    ├── env
    │   └── dmc_pixels.yaml
    └── train_mode
    │   ├── train_behavior.yaml
    │   └── train_model.yaml
├── data
    └── stickman_example
    │   └── 1000-20240504T040956-d7ee0ea24b3e4863b1ef5e5bf1849924-501.npz
├── demo
    ├── app.py
    ├── demo_test.py
    └── t2v.py
├── envs
    ├── __init__.py
    ├── custom_dmc_tasks
    │   ├── __init__.py
    │   ├── cheetah.py
    │   ├── cheetah.xml
    │   ├── jaco.py
    │   ├── quadruped.py
    │   ├── quadruped.xml
    │   ├── stickman.py
    │   ├── stickman.xml
    │   ├── walker.py
    │   └── walker.xml
    ├── kitchen_extra.py
    └── main.py
├── notebooks
    ├── demo_videoclip.ipynb
    ├── text2video.ipynb
    ├── video2video.ipynb
    ├── visualize_dataset_episodes.ipynb
    └── visualize_env.ipynb
├── process_dataset.py
├── process_dataset.yaml
├── requirements.txt
├── test
    ├── pytest.ini
    └── test_env.py
├── third_party
    ├── InternVideo
    │   ├── .gitignore
    │   ├── .gitmodules
    │   ├── Data
    │   │   ├── InternVid
    │   │   │   ├── README.md
    │   │   │   ├── README_CN.md
    │   │   │   ├── demo.ipynb
    │   │   │   ├── div_sampling.py
    │   │   │   ├── example1.mp4
    │   │   │   ├── start_annotation_prototype.sh
    │   │   │   ├── utils
    │   │   │   │   ├── basic_utils.py
    │   │   │   │   ├── config.py
    │   │   │   │   ├── config_utils.py
    │   │   │   │   ├── distributed.py
    │   │   │   │   ├── easydict.py
    │   │   │   │   ├── logger.py
    │   │   │   │   ├── optimizer.py
    │   │   │   │   └── scheduler.py
    │   │   │   └── viclip
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   ├── simple_tokenizer.py
    │   │   │   │   ├── viclip.py
    │   │   │   │   ├── viclip_text.py
    │   │   │   │   └── viclip_vision.py
    │   │   └── instruction_data
    │   │   │   ├── README.md
    │   │   │   └── assert
    │   │   │       ├── conversation.png
    │   │   │       └── detailed_description.png
    │   ├── InternVideo1
    │   │   ├── Downstream
    │   │   │   ├── Open-Set-Action-Recognition
    │   │   │   │   ├── .gitignore
    │   │   │   │   ├── README.md
    │   │   │   │   ├── configs
    │   │   │   │   │   ├── localization
    │   │   │   │   │   │   ├── bmn
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   └── bmn_400x100_2x8_9e_activitynet_feature.py
    │   │   │   │   │   │   ├── bsn
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── bsn_pem_400x100_1x16_20e_activitynet_feature.py
    │   │   │   │   │   │   │   ├── bsn_pgm_400x100_activitynet_feature.py
    │   │   │   │   │   │   │   └── bsn_tem_400x100_1x16_20e_activitynet_feature.py
    │   │   │   │   │   │   └── ssn
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── ssn_r50_450e_thumos14_rgb_test.py
    │   │   │   │   │   │   │   └── ssn_r50_450e_thumos14_rgb_train.py
    │   │   │   │   │   ├── recognition
    │   │   │   │   │   │   ├── c3d
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   └── c3d_sports1m_16x1x1_45e_ucf101_rgb.py
    │   │   │   │   │   │   ├── csn
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── finetune_ucf101_csn_dnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_csn_edlnokl_avuc_debias.py
    │   │   │   │   │   │   │   ├── inference_csn_dnn.py
    │   │   │   │   │   │   │   ├── inference_csn_enn.py
    │   │   │   │   │   │   │   ├── ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py
    │   │   │   │   │   │   │   └── ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py
    │   │   │   │   │   │   ├── i3d
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── finetune_ucf101_i3d_bnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_i3d_dnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_i3d_edlnokl.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_i3d_edlnokl_avuc_debias.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_i3d_rpl.py
    │   │   │   │   │   │   │   ├── i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_nl_embedded_gaussian_r50_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_nl_gaussian_r50_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_dense_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_video_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── inference_i3d_bnn.py
    │   │   │   │   │   │   │   ├── inference_i3d_dnn.py
    │   │   │   │   │   │   │   ├── inference_i3d_enn.py
    │   │   │   │   │   │   │   ├── inference_i3d_rpl.py
    │   │   │   │   │   │   │   ├── train_kinetics10_i3d_DEAR.py
    │   │   │   │   │   │   │   └── train_kinetics10_i3d_DEAR_noDebias.py
    │   │   │   │   │   │   ├── mae
    │   │   │   │   │   │   │   ├── finetune_ucf101_mae_edlnokl.py
    │   │   │   │   │   │   │   └── inference_mae_enn.py
    │   │   │   │   │   │   ├── omnisource
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── pipeline.png
    │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_minikinetics
    │   │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb.py
    │   │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb.py
    │   │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb.py
    │   │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb.py
    │   │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_minikinetics_rgb.py
    │   │   │   │   │   │   │   │   └── slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb.py
    │   │   │   │   │   │   │   └── tsn_r50_1x1x8_100e_minikinetics
    │   │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_100e_minikinetics_rgb.py
    │   │   │   │   │   │   │   │   └── tsn_r50_1x1x8_100e_minikinetics_webimage_rgb.py
    │   │   │   │   │   │   ├── r2plus1d
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── r2plus1d_r34_32x2x1_180e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py
    │   │   │   │   │   │   │   └── r2plus1d_r34_video_inference_8x8x1_180e_kinetics400_rgb.py
    │   │   │   │   │   │   ├── slowfast
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── finetune_ucf101_slowfast_bnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_slowfast_dnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_slowfast_edlnokl_avuc_debias.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_slowfast_rpl.py
    │   │   │   │   │   │   │   ├── inference_slowfast_bnn.py
    │   │   │   │   │   │   │   ├── inference_slowfast_dnn.py
    │   │   │   │   │   │   │   ├── inference_slowfast_enn.py
    │   │   │   │   │   │   │   ├── inference_slowfast_rpl.py
    │   │   │   │   │   │   │   ├── slowfast_r50_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowfast_r50_8x8x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   └── slowfast_r50_video_inference_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   ├── slowonly
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── data_benchmark
    │   │   │   │   │   │   │   │   ├── slowonly_r50_randomresizedcrop_256p_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── slowonly_r50_randomresizedcrop_320p_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   └── slowonly_r50_randomresizedcrop_340x256_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_imagenet_pretrained_r50_4x16x1_120e_gym99_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_kinetics_pretrained_r50_4x16x1_120e_gym99_flow.py
    │   │   │   │   │   │   │   ├── slowonly_r101_8x8x1_196e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_r50_4x16x1_256e_kinetics400_flow.py
    │   │   │   │   │   │   │   ├── slowonly_r50_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_kinetics400_flow.py
    │   │   │   │   │   │   │   ├── slowonly_r50_8x8x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py
    │   │   │   │   │   │   │   ├── slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py
    │   │   │   │   │   │   │   └── slowonly_r50_video_inference_4x16x1_256e_kinetics400_rgb.py
    │   │   │   │   │   │   ├── tin
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── tin_r50_1x1x8_40e_sthv1_rgb.py
    │   │   │   │   │   │   │   ├── tin_r50_1x1x8_40e_sthv2_rgb.py
    │   │   │   │   │   │   │   └── tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   ├── tpn
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── finetune_ucf101_tpn_slowonly_rpl.py
    │   │   │   │   │   │   │   ├── inference_tpn_slowonly_bnn.py
    │   │   │   │   │   │   │   ├── inference_tpn_slowonly_dnn.py
    │   │   │   │   │   │   │   ├── inference_tpn_slowonly_enn.py
    │   │   │   │   │   │   │   ├── inference_tpn_slowonly_rpl.py
    │   │   │   │   │   │   │   ├── tpn_imagenet_pretrained_slowonly_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_bnn_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_celoss_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_avuc_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_nokl_avuc_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_nokl_avuc_rebias_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_nokl_davuc_debias_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_nokl_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_edlloss_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   ├── tpn_slowonly_r50_8x8x1_150e_kinetics_rgb.py
    │   │   │   │   │   │   │   └── tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py
    │   │   │   │   │   │   ├── tsm
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── finetune_ucf101_tsm_bnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_tsm_dnn.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_tsm_edlnokl_avuc_debias.py
    │   │   │   │   │   │   │   ├── finetune_ucf101_tsm_rpl.py
    │   │   │   │   │   │   │   ├── inference_tsm_bnn.py
    │   │   │   │   │   │   │   ├── inference_tsm_dnn.py
    │   │   │   │   │   │   │   ├── inference_tsm_enn.py
    │   │   │   │   │   │   │   ├── inference_tsm_rpl.py
    │   │   │   │   │   │   │   ├── train_kinetics10_tsm_DEAR.py
    │   │   │   │   │   │   │   ├── train_kinetics10_tsm_DEAR_noDebias.py
    │   │   │   │   │   │   │   ├── tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r101_1x1x8_50e_sthv1_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r101_1x1x8_50e_sthv2_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_1x1x16_50e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_1x1x16_50e_sthv1_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_1x1x16_50e_sthv2_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_1x1x8_50e_sthv1_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_1x1x8_50e_sthv2_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_video_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsm_r50_video_inference_1x1x8_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   └── tsm_temporal_pool_r50_1x1x8_50e_kinetics400_rgb.py
    │   │   │   │   │   │   ├── tsn
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── data_benchmark
    │   │   │   │   │   │   │   │   ├── tsn_r50_multiscalecrop_256p_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_randomresizedcrop_256p_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_test_256p_1x1x25_10crop_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_test_256p_1x1x25_3crop_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   │   └── tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── hvu
    │   │   │   │   │   │   │   │   ├── tsn_r18_1x1x8_100e_hvu_action_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r18_1x1x8_100e_hvu_attribute_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r18_1x1x8_100e_hvu_concept_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r18_1x1x8_100e_hvu_event_rgb.py
    │   │   │   │   │   │   │   │   ├── tsn_r18_1x1x8_100e_hvu_object_rgb.py
    │   │   │   │   │   │   │   │   └── tsn_r18_1x1x8_100e_hvu_scene_rgb.py
    │   │   │   │   │   │   │   ├── tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r101_1x1x5_50e_mmit_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x16_50e_sthv1_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x16_50e_sthv2_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x3_75e_ucf101_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x6_100e_mit_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_50e_sthv1_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_1x1x8_50e_sthv2_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x3_110e_kinetics400_flow.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x8_110e_kinetics400_flow.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_video_1x1x8_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_video_1x1x8_100e_kinetics600_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_video_1x1x8_100e_kinetics700_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   ├── tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py
    │   │   │   │   │   │   │   └── tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py
    │   │   │   │   │   │   └── x3d
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── x3d_m_16x5x1_facebook_kinetics400_rgb.py
    │   │   │   │   │   │   │   └── x3d_s_13x6x1_facebook_kinetics400_rgb.py
    │   │   │   │   │   └── recognition_audio
    │   │   │   │   │   │   ├── audioonly
    │   │   │   │   │   │       └── audioonly_r50_64x1x1_100e_kinetics400_audio_feature.py
    │   │   │   │   │   │   └── resnet
    │   │   │   │   │   │       ├── README.md
    │   │   │   │   │   │       ├── tsn_r18_64x1x1_100e_kinetics400_audio_feature.py
    │   │   │   │   │   │       └── tsn_r50_64x1x1_100e_kinetics400_audio.py
    │   │   │   │   ├── docker
    │   │   │   │   │   └── Dockerfile
    │   │   │   │   ├── docs
    │   │   │   │   │   ├── Makefile
    │   │   │   │   │   ├── api.rst
    │   │   │   │   │   ├── benchmark.md
    │   │   │   │   │   ├── changelog.md
    │   │   │   │   │   ├── conf.py
    │   │   │   │   │   ├── config.md
    │   │   │   │   │   ├── data_preparation.md
    │   │   │   │   │   ├── faq.md
    │   │   │   │   │   ├── getting_started.md
    │   │   │   │   │   ├── imgs
    │   │   │   │   │   │   ├── acc_curve.png
    │   │   │   │   │   │   ├── data_pipeline.png
    │   │   │   │   │   │   ├── mmaction2_logo.png
    │   │   │   │   │   │   └── mmaction2_overview.gif
    │   │   │   │   │   ├── index.rst
    │   │   │   │   │   ├── install.md
    │   │   │   │   │   ├── make.bat
    │   │   │   │   │   ├── merge_docs.sh
    │   │   │   │   │   ├── stat.py
    │   │   │   │   │   ├── supported_datasets.md
    │   │   │   │   │   ├── tutorials
    │   │   │   │   │   │   ├── customize_runtime.md
    │   │   │   │   │   │   ├── data_pipeline.md
    │   │   │   │   │   │   ├── export_model.md
    │   │   │   │   │   │   ├── finetune.md
    │   │   │   │   │   │   ├── new_dataset.md
    │   │   │   │   │   │   └── new_modules.md
    │   │   │   │   │   └── useful_tools.md
    │   │   │   │   ├── experiments
    │   │   │   │   │   ├── analyze_features.py
    │   │   │   │   │   ├── baseline_openmax.py
    │   │   │   │   │   ├── baseline_rpl.py
    │   │   │   │   │   ├── baseline_softmax.py
    │   │   │   │   │   ├── compare_openness.py
    │   │   │   │   │   ├── compare_openness_new.py
    │   │   │   │   │   ├── csn
    │   │   │   │   │   │   ├── baseline_csn_openmax.sh
    │   │   │   │   │   │   ├── baseline_csn_softmax.sh
    │   │   │   │   │   │   ├── evaluate_csn_dnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_csn_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── finetune_csn_dnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_csn_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── run_get_threshold.sh
    │   │   │   │   │   │   ├── run_ood_detection.sh
    │   │   │   │   │   │   ├── run_openness.sh
    │   │   │   │   │   │   └── run_reliability_evaluation.sh
    │   │   │   │   │   ├── demo.py
    │   │   │   │   │   ├── draw_confusion_matrix.py
    │   │   │   │   │   ├── draw_fig7cd.py
    │   │   │   │   │   ├── draw_ood_hist.py
    │   │   │   │   │   ├── draw_openness_curves.py
    │   │   │   │   │   ├── draw_performance_gain.py
    │   │   │   │   │   ├── eval_debias.py
    │   │   │   │   │   ├── evaluate_calibration.py
    │   │   │   │   │   ├── get_threshold.py
    │   │   │   │   │   ├── get_threshold_dist.py
    │   │   │   │   │   ├── i3d
    │   │   │   │   │   │   ├── baseline_i3d_openmax.sh
    │   │   │   │   │   │   ├── baseline_i3d_rpl.sh
    │   │   │   │   │   │   ├── baseline_i3d_softmax.sh
    │   │   │   │   │   │   ├── evaluate_i3d_bnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_i3d_dnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_i3d_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_i3d_edlnokl_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_i3d_rpl_ucf101.sh
    │   │   │   │   │   │   ├── finetune_i3d_bnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_i3d_dnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_i3d_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── finetune_i3d_edlnokl_ucf101.sh
    │   │   │   │   │   │   ├── finetune_i3d_rpl_ucf101.sh
    │   │   │   │   │   │   ├── run_draw_confmat.sh
    │   │   │   │   │   │   ├── run_get_threshold.sh
    │   │   │   │   │   │   ├── run_ood_detection.sh
    │   │   │   │   │   │   ├── run_openness.sh
    │   │   │   │   │   │   ├── run_openness_new.sh
    │   │   │   │   │   │   ├── run_reliability_evaluation.sh
    │   │   │   │   │   │   ├── train_i3d_DEAR_kinetics10.sh
    │   │   │   │   │   │   └── train_i3d_DEAR_noDebias_kinetics10.sh
    │   │   │   │   │   ├── libMR
    │   │   │   │   │   │   ├── COPYRIGHT_Libmr.txt
    │   │   │   │   │   │   ├── Makefile
    │   │   │   │   │   │   ├── MetaRecognition.cpp
    │   │   │   │   │   │   ├── MetaRecognition.h
    │   │   │   │   │   │   ├── build_libmr_python.sh
    │   │   │   │   │   │   ├── compile.sh
    │   │   │   │   │   │   ├── estimate_wscores.py
    │   │   │   │   │   │   ├── libmr.c
    │   │   │   │   │   │   ├── libmr.cpp
    │   │   │   │   │   │   ├── libmr.pxd
    │   │   │   │   │   │   ├── libmr.pyx
    │   │   │   │   │   │   ├── malloc.h
    │   │   │   │   │   │   ├── setup.py
    │   │   │   │   │   │   ├── test_libmr.py
    │   │   │   │   │   │   ├── weibull.c
    │   │   │   │   │   │   └── weibull.h
    │   │   │   │   │   ├── mae
    │   │   │   │   │   │   ├── finetune_mae_edlnokl_ucf101.sh
    │   │   │   │   │   │   ├── run_get_mae_threshold.sh
    │   │   │   │   │   │   └── run_ood_mae_dist_detection.sh
    │   │   │   │   │   ├── ood_detection.py
    │   │   │   │   │   ├── ood_detection_dist.py
    │   │   │   │   │   ├── slowfast
    │   │   │   │   │   │   ├── baseline_slowfast_openmax.sh
    │   │   │   │   │   │   ├── baseline_slowfast_rpl.sh
    │   │   │   │   │   │   ├── baseline_slowfast_softmax.sh
    │   │   │   │   │   │   ├── evaluate_slowfast_bnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_slowfast_dnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_slowfast_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_slowfast_rpl_ucf101.sh
    │   │   │   │   │   │   ├── finetune_slowfast_bnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_slowfast_dnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_slowfast_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── finetune_slowfast_rpl_ucf101.sh
    │   │   │   │   │   │   ├── run_draw_confmat.sh
    │   │   │   │   │   │   ├── run_get_threshold.sh
    │   │   │   │   │   │   ├── run_ood_detection.sh
    │   │   │   │   │   │   ├── run_openness.sh
    │   │   │   │   │   │   ├── run_openness_new.sh
    │   │   │   │   │   │   └── run_reliability_evaluation.sh
    │   │   │   │   │   ├── tpn_slowonly
    │   │   │   │   │   │   ├── analyze_features.sh
    │   │   │   │   │   │   ├── baseline_tpn_openmax.sh
    │   │   │   │   │   │   ├── baseline_tpn_rpl.sh
    │   │   │   │   │   │   ├── baseline_tpn_softmax.sh
    │   │   │   │   │   │   ├── evaluate_tpn_bnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_celoss_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_edlloss_avuc_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_edlloss_nokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_edlloss_nokl_avuc_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_edlloss_nokl_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_edlloss_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tpn_rpl_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_bnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_celoss_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_edlloss_avuc_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_edlloss_nokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_edlloss_nokl_avuc_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_edlloss_nokl_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_edlloss_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tpn_rpl_ucf101.sh
    │   │   │   │   │   │   ├── gradcam_demo.sh
    │   │   │   │   │   │   ├── run_draw_confmat.sh
    │   │   │   │   │   │   ├── run_get_threshold.sh
    │   │   │   │   │   │   ├── run_ood_detection.sh
    │   │   │   │   │   │   ├── run_openness.sh
    │   │   │   │   │   │   ├── run_openness_new.sh
    │   │   │   │   │   │   ├── run_reliability_evaluation.sh
    │   │   │   │   │   │   ├── searchw_evaluate.sh
    │   │   │   │   │   │   └── searchw_finetune.sh
    │   │   │   │   │   └── tsm
    │   │   │   │   │   │   ├── baseline_tsm_openmax.sh
    │   │   │   │   │   │   ├── baseline_tsm_rpl.sh
    │   │   │   │   │   │   ├── baseline_tsm_softmax.sh
    │   │   │   │   │   │   ├── demo_tsm_debias.sh
    │   │   │   │   │   │   ├── evaluate_tsm_DEAR.sh
    │   │   │   │   │   │   ├── evaluate_tsm_DEAR_noDebias.sh
    │   │   │   │   │   │   ├── evaluate_tsm_bnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tsm_dnn_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tsm_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── evaluate_tsm_rpl_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tsm_bnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tsm_dnn_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tsm_edlnokl_avuc_debias_ucf101.sh
    │   │   │   │   │   │   ├── finetune_tsm_rpl_ucf101.sh
    │   │   │   │   │   │   ├── run_draw_confmat.sh
    │   │   │   │   │   │   ├── run_get_threshold.sh
    │   │   │   │   │   │   ├── run_ood_detection.sh
    │   │   │   │   │   │   ├── run_openness.sh
    │   │   │   │   │   │   ├── run_openness_new.sh
    │   │   │   │   │   │   ├── run_reliability_evaluation.sh
    │   │   │   │   │   │   ├── train_tsm_DEAR_kinetics10.sh
    │   │   │   │   │   │   └── train_tsm_DEAR_noDebias_kinetics10.sh
    │   │   │   │   ├── mmaction
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── apis
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   ├── test.py
    │   │   │   │   │   │   └── train.py
    │   │   │   │   │   ├── core
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── evaluation
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── accuracy.py
    │   │   │   │   │   │   │   ├── eval_detection.py
    │   │   │   │   │   │   │   └── eval_hooks.py
    │   │   │   │   │   │   ├── lr
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   └── tin_lr_hook.py
    │   │   │   │   │   │   ├── optimizer
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── copy_of_sgd.py
    │   │   │   │   │   │   │   └── tsm_optimizer_constructor.py
    │   │   │   │   │   │   └── runner
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── annealing_runner.py
    │   │   │   │   │   │   │   └── omnisource_runner.py
    │   │   │   │   │   ├── datasets
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── activitynet_dataset.py
    │   │   │   │   │   │   ├── audio_dataset.py
    │   │   │   │   │   │   ├── audio_feature_dataset.py
    │   │   │   │   │   │   ├── audio_visual_dataset.py
    │   │   │   │   │   │   ├── ava_dataset.py
    │   │   │   │   │   │   ├── base.py
    │   │   │   │   │   │   ├── builder.py
    │   │   │   │   │   │   ├── dataset_wrappers.py
    │   │   │   │   │   │   ├── hvu_dataset.py
    │   │   │   │   │   │   ├── image_dataset.py
    │   │   │   │   │   │   ├── pipelines
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── augmentations.py
    │   │   │   │   │   │   │   ├── compose.py
    │   │   │   │   │   │   │   ├── formating.py
    │   │   │   │   │   │   │   └── loading.py
    │   │   │   │   │   │   ├── rawframe_dataset.py
    │   │   │   │   │   │   ├── rawvideo_dataset.py
    │   │   │   │   │   │   ├── registry.py
    │   │   │   │   │   │   ├── samplers
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   └── distributed_sampler.py
    │   │   │   │   │   │   ├── ssn_dataset.py
    │   │   │   │   │   │   └── video_dataset.py
    │   │   │   │   │   ├── localization
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bsn_utils.py
    │   │   │   │   │   │   ├── proposal_utils.py
    │   │   │   │   │   │   └── ssn_utils.py
    │   │   │   │   │   ├── models
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── backbones
    │   │   │   │   │   │   │   ├── ViT3D.py
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── c3d.py
    │   │   │   │   │   │   │   ├── resnet.py
    │   │   │   │   │   │   │   ├── resnet2plus1d.py
    │   │   │   │   │   │   │   ├── resnet3d.py
    │   │   │   │   │   │   │   ├── resnet3d_csn.py
    │   │   │   │   │   │   │   ├── resnet3d_slowfast.py
    │   │   │   │   │   │   │   ├── resnet3d_slowonly.py
    │   │   │   │   │   │   │   ├── resnet_audio.py
    │   │   │   │   │   │   │   ├── resnet_tin.py
    │   │   │   │   │   │   │   ├── resnet_tsm.py
    │   │   │   │   │   │   │   └── x3d.py
    │   │   │   │   │   │   ├── builder.py
    │   │   │   │   │   │   ├── common
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── conv2plus1d.py
    │   │   │   │   │   │   │   └── conv_audio.py
    │   │   │   │   │   │   ├── heads
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── audio_tsn_head.py
    │   │   │   │   │   │   │   ├── aux_head.py
    │   │   │   │   │   │   │   ├── base.py
    │   │   │   │   │   │   │   ├── base_cls_head.py
    │   │   │   │   │   │   │   ├── bnn.py
    │   │   │   │   │   │   │   ├── debias_head.py
    │   │   │   │   │   │   │   ├── i3d_bnn_head.py
    │   │   │   │   │   │   │   ├── i3d_head.py
    │   │   │   │   │   │   │   ├── i3d_rpl_head.py
    │   │   │   │   │   │   │   ├── rebias_head.py
    │   │   │   │   │   │   │   ├── rpl_dist.py
    │   │   │   │   │   │   │   ├── slowfast_bnn_head.py
    │   │   │   │   │   │   │   ├── slowfast_head.py
    │   │   │   │   │   │   │   ├── slowfast_rpl_head.py
    │   │   │   │   │   │   │   ├── ssn_head.py
    │   │   │   │   │   │   │   ├── tpn_bnn_head.py
    │   │   │   │   │   │   │   ├── tpn_head.py
    │   │   │   │   │   │   │   ├── tpn_rpl_head.py
    │   │   │   │   │   │   │   ├── tsm_bnn_head.py
    │   │   │   │   │   │   │   ├── tsm_head.py
    │   │   │   │   │   │   │   ├── tsm_rpl_head.py
    │   │   │   │   │   │   │   ├── tsn_head.py
    │   │   │   │   │   │   │   └── x3d_head.py
    │   │   │   │   │   │   ├── localizers
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── base.py
    │   │   │   │   │   │   │   ├── bmn.py
    │   │   │   │   │   │   │   ├── bsn.py
    │   │   │   │   │   │   │   ├── ssn.py
    │   │   │   │   │   │   │   └── utils
    │   │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   │   └── post_processing.py
    │   │   │   │   │   │   ├── losses
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── base.py
    │   │   │   │   │   │   │   ├── binary_logistic_regression_loss.py
    │   │   │   │   │   │   │   ├── bmn_loss.py
    │   │   │   │   │   │   │   ├── bnn_loss.py
    │   │   │   │   │   │   │   ├── cross_entropy_loss.py
    │   │   │   │   │   │   │   ├── edl_loss.py
    │   │   │   │   │   │   │   ├── gcp_loss.py
    │   │   │   │   │   │   │   ├── hvu_loss.py
    │   │   │   │   │   │   │   ├── nll_loss.py
    │   │   │   │   │   │   │   ├── ohem_hinge_loss.py
    │   │   │   │   │   │   │   ├── rebias_loss.py
    │   │   │   │   │   │   │   ├── rpl_loss.py
    │   │   │   │   │   │   │   └── ssn_loss.py
    │   │   │   │   │   │   ├── necks
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   └── tpn.py
    │   │   │   │   │   │   ├── recognizers
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── audio_recognizer.py
    │   │   │   │   │   │   │   ├── base.py
    │   │   │   │   │   │   │   ├── recognizer2d.py
    │   │   │   │   │   │   │   ├── recognizer2d_bnn.py
    │   │   │   │   │   │   │   ├── recognizer2d_rpl.py
    │   │   │   │   │   │   │   ├── recognizer3d.py
    │   │   │   │   │   │   │   ├── recognizer3d_bnn.py
    │   │   │   │   │   │   │   └── recognizer3d_rpl.py
    │   │   │   │   │   │   └── registry.py
    │   │   │   │   │   ├── utils
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── collect_env.py
    │   │   │   │   │   │   ├── gradcam_utils.py
    │   │   │   │   │   │   ├── logger.py
    │   │   │   │   │   │   └── misc.py
    │   │   │   │   │   └── version.py
    │   │   │   │   ├── requirements.txt
    │   │   │   │   ├── requirements
    │   │   │   │   │   ├── build.txt
    │   │   │   │   │   ├── docs.txt
    │   │   │   │   │   ├── optional.txt
    │   │   │   │   │   ├── readthedocs.txt
    │   │   │   │   │   ├── runtime.txt
    │   │   │   │   │   └── tests.txt
    │   │   │   │   ├── setup.cfg
    │   │   │   │   ├── setup.py
    │   │   │   │   ├── tests
    │   │   │   │   │   ├── data
    │   │   │   │   │   │   ├── action_test_anno.json
    │   │   │   │   │   │   ├── audio_feature_test_list.txt
    │   │   │   │   │   │   ├── audio_test_list.txt
    │   │   │   │   │   │   ├── frame_test_list.txt
    │   │   │   │   │   │   ├── frame_test_list_multi_label.txt
    │   │   │   │   │   │   ├── frame_test_list_with_offset.txt
    │   │   │   │   │   │   ├── hvu_frame_test_anno.json
    │   │   │   │   │   │   ├── hvu_video_eval_test_anno.json
    │   │   │   │   │   │   ├── hvu_video_test_anno.json
    │   │   │   │   │   │   ├── proposal_normalized_list.txt
    │   │   │   │   │   │   ├── proposal_test_list.txt
    │   │   │   │   │   │   ├── rawvideo_test_anno.json
    │   │   │   │   │   │   ├── rawvideo_test_anno.txt
    │   │   │   │   │   │   ├── test.jpg
    │   │   │   │   │   │   ├── test.mp4
    │   │   │   │   │   │   ├── test.wav
    │   │   │   │   │   │   ├── test_activitynet_features
    │   │   │   │   │   │   │   ├── v_test1.csv
    │   │   │   │   │   │   │   └── v_test2.csv
    │   │   │   │   │   │   ├── test_ava_dataset
    │   │   │   │   │   │   │   ├── ava_excluded_timestamps_sample.csv
    │   │   │   │   │   │   │   └── ava_sample.csv
    │   │   │   │   │   │   ├── test_bsp_features
    │   │   │   │   │   │   │   └── v_test1.npy
    │   │   │   │   │   │   ├── test_eval_detection
    │   │   │   │   │   │   │   ├── gt.json
    │   │   │   │   │   │   │   └── result.json
    │   │   │   │   │   │   ├── test_imgs
    │   │   │   │   │   │   │   ├── img_00001.jpg
    │   │   │   │   │   │   │   ├── img_00002.jpg
    │   │   │   │   │   │   │   ├── img_00003.jpg
    │   │   │   │   │   │   │   ├── img_00004.jpg
    │   │   │   │   │   │   │   ├── img_00005.jpg
    │   │   │   │   │   │   │   ├── img_00006.jpg
    │   │   │   │   │   │   │   ├── img_00007.jpg
    │   │   │   │   │   │   │   ├── img_00008.jpg
    │   │   │   │   │   │   │   ├── img_00009.jpg
    │   │   │   │   │   │   │   ├── img_00010.jpg
    │   │   │   │   │   │   │   ├── x_00001.jpg
    │   │   │   │   │   │   │   ├── x_00002.jpg
    │   │   │   │   │   │   │   ├── x_00003.jpg
    │   │   │   │   │   │   │   ├── x_00004.jpg
    │   │   │   │   │   │   │   ├── x_00005.jpg
    │   │   │   │   │   │   │   ├── y_00001.jpg
    │   │   │   │   │   │   │   ├── y_00002.jpg
    │   │   │   │   │   │   │   ├── y_00003.jpg
    │   │   │   │   │   │   │   ├── y_00004.jpg
    │   │   │   │   │   │   │   └── y_00005.jpg
    │   │   │   │   │   │   ├── test_proposals
    │   │   │   │   │   │   │   ├── v_test1.csv
    │   │   │   │   │   │   │   └── v_test2.csv
    │   │   │   │   │   │   ├── test_rawvideo_dataset
    │   │   │   │   │   │   │   ├── part_0.mp4
    │   │   │   │   │   │   │   └── part_1.mp4
    │   │   │   │   │   │   ├── test_tem_results
    │   │   │   │   │   │   │   ├── v_test1.csv
    │   │   │   │   │   │   │   └── v_test2.csv
    │   │   │   │   │   │   └── video_test_list.txt
    │   │   │   │   │   ├── test_accuracy.py
    │   │   │   │   │   ├── test_data
    │   │   │   │   │   │   ├── test_augmentations.py
    │   │   │   │   │   │   ├── test_ava_dataset.py
    │   │   │   │   │   │   ├── test_compose.py
    │   │   │   │   │   │   ├── test_dataset.py
    │   │   │   │   │   │   ├── test_formating.py
    │   │   │   │   │   │   └── test_loading.py
    │   │   │   │   │   ├── test_gradcam.py
    │   │   │   │   │   ├── test_localization_utils.py
    │   │   │   │   │   ├── test_loss.py
    │   │   │   │   │   ├── test_models
    │   │   │   │   │   │   ├── test_backbone.py
    │   │   │   │   │   │   ├── test_common_modules.py
    │   │   │   │   │   │   ├── test_head.py
    │   │   │   │   │   │   ├── test_inference.py
    │   │   │   │   │   │   ├── test_localizers.py
    │   │   │   │   │   │   ├── test_neck.py
    │   │   │   │   │   │   └── test_recognizers.py
    │   │   │   │   │   └── test_runtime
    │   │   │   │   │   │   ├── test_apis_test.py
    │   │   │   │   │   │   ├── test_config.py
    │   │   │   │   │   │   ├── test_eval_hook.py
    │   │   │   │   │   │   ├── test_lr.py
    │   │   │   │   │   │   ├── test_onnx.py
    │   │   │   │   │   │   ├── test_optimizer.py
    │   │   │   │   │   │   └── test_train.py
    │   │   │   │   └── tools
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── analysis
    │   │   │   │   │       ├── analyze_logs.py
    │   │   │   │   │       ├── bench_processing.py
    │   │   │   │   │       ├── benchmark.py
    │   │   │   │   │       ├── eval_metric.py
    │   │   │   │   │       ├── get_flops.py
    │   │   │   │   │       ├── print_config.py
    │   │   │   │   │       ├── report_accuracy.py
    │   │   │   │   │       └── report_map.py
    │   │   │   │   │   ├── argparse.bash
    │   │   │   │   │   ├── bsn_proposal_generation.py
    │   │   │   │   │   ├── data
    │   │   │   │   │       ├── activitynet
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── action_name.csv
    │   │   │   │   │       │   ├── activitynet_feature_postprocessing.py
    │   │   │   │   │       │   ├── convert_proposal_format.py
    │   │   │   │   │       │   ├── download.py
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_features.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── environment.yml
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.py
    │   │   │   │   │       │   ├── process_annotations.py
    │   │   │   │   │       │   └── tsn_feature_extraction.py
    │   │   │   │   │       ├── anno_txt2json.py
    │   │   │   │   │       ├── ava
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── cut_videos.sh
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── download_videos_gnu_parallel.sh
    │   │   │   │   │       │   ├── download_videos_parallel.py
    │   │   │   │   │       │   ├── download_videos_parallel.sh
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_ffmpeg.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   └── fetch_ava_proposals.sh
    │   │   │   │   │       ├── build_audio_features.py
    │   │   │   │   │       ├── build_file_list.py
    │   │   │   │   │       ├── build_rawframes.py
    │   │   │   │   │       ├── build_videos.py
    │   │   │   │   │       ├── data_check.py
    │   │   │   │   │       ├── denormalize_proposal_file.py
    │   │   │   │   │       ├── extract_audio.py
    │   │   │   │   │       ├── gym
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── download.py
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── download_ytdl.py
    │   │   │   │   │       │   ├── environment.yml
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── generate_file_list.py
    │   │   │   │   │       │   ├── trim_event.py
    │   │   │   │   │       │   └── trim_subaction.py
    │   │   │   │   │       ├── hmdb51
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       ├── hvu
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── download.py
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── environment.yml
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── generate_file_list.py
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   ├── generate_sub_file_list.py
    │   │   │   │   │       │   ├── generate_videos_filelist.sh
    │   │   │   │   │       │   └── parse_tag_list.py
    │   │   │   │   │       ├── jester
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── encode_videos.sh
    │   │   │   │   │       │   ├── extract_flow.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       ├── jhmdb
    │   │   │   │   │       │   └── README.md
    │   │   │   │   │       ├── kinetics
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── check_data.sh
    │   │   │   │   │       │   ├── download.py
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_subset.py
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── download_videos_subset.sh
    │   │   │   │   │       │   ├── environment.yml
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   ├── generate_videos_filelist.sh
    │   │   │   │   │       │   ├── rename_classnames.sh
    │   │   │   │   │       │   └── subset_list.txt
    │   │   │   │   │       ├── mimetics
    │   │   │   │   │       │   ├── build_file_list.py
    │   │   │   │   │       │   ├── check_data.sh
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos_subset.sh
    │   │   │   │   │       │   ├── generate_videos_filelist.sh
    │   │   │   │   │       │   └── rename_classnames.sh
    │   │   │   │   │       ├── mit
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── download_data.sh
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   ├── fix_video_filelist.py
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       ├── mmit
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── download_data.sh
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       ├── omnisource
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   └── trim_raw_video.py
    │   │   │   │   │       ├── parse_file_list.py
    │   │   │   │   │       ├── resize_video.py
    │   │   │   │   │       ├── sthv1
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── encode_videos.sh
    │   │   │   │   │       │   ├── extract_flow.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       ├── sthv2
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       ├── thumos14
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── denormalize_proposal_file.sh
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   └── fetch_tag_proposals.sh
    │   │   │   │   │       ├── ucf101
    │   │   │   │   │       │   ├── README.md
    │   │   │   │   │       │   ├── download_annotations.sh
    │   │   │   │   │       │   ├── download_videos.sh
    │   │   │   │   │       │   ├── extract_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames.sh
    │   │   │   │   │       │   ├── extract_rgb_frames_opencv.sh
    │   │   │   │   │       │   ├── generate_rawframes_filelist.sh
    │   │   │   │   │       │   └── generate_videos_filelist.sh
    │   │   │   │   │       └── ucf101_24
    │   │   │   │   │       │   └── README.md
    │   │   │   │   │   ├── dist_test.sh
    │   │   │   │   │   ├── dist_train.sh
    │   │   │   │   │   ├── flow_extraction.py
    │   │   │   │   │   ├── hypertune.py
    │   │   │   │   │   ├── publish_model.py
    │   │   │   │   │   ├── pytorch2onnx.py
    │   │   │   │   │   ├── slurm_test.sh
    │   │   │   │   │   ├── slurm_train
    │   │   │   │   │   ├── slurm_train.sh
    │   │   │   │   │   ├── test.py
    │   │   │   │   │   └── train.py
    │   │   │   ├── Spatial-Temporal-Action-Localization
    │   │   │   │   ├── .DS_Store
    │   │   │   │   ├── FINETUNE.md
    │   │   │   │   ├── README.md
    │   │   │   │   ├── akeval.sh
    │   │   │   │   ├── akfinetune.sh
    │   │   │   │   ├── alphaction
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── config
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── defaults.py
    │   │   │   │   │   │   └── paths_catalog.py
    │   │   │   │   │   ├── csrc
    │   │   │   │   │   │   ├── ROIAlign3d.h
    │   │   │   │   │   │   ├── ROIPool3d.h
    │   │   │   │   │   │   ├── SigmoidFocalLoss.h
    │   │   │   │   │   │   ├── SoftmaxFocalLoss.h
    │   │   │   │   │   │   ├── cpu
    │   │   │   │   │   │   │   └── vision.h
    │   │   │   │   │   │   ├── cuda
    │   │   │   │   │   │   │   ├── ROIAlign3d_cuda.cu
    │   │   │   │   │   │   │   ├── ROIPool3d_cuda.cu
    │   │   │   │   │   │   │   ├── SigmoidFocalLoss_cuda.cu
    │   │   │   │   │   │   │   ├── SoftmaxFocalLoss_cuda.cu
    │   │   │   │   │   │   │   └── vision.h
    │   │   │   │   │   │   └── vision.cpp
    │   │   │   │   │   ├── dataset
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   ├── collate_batch.py
    │   │   │   │   │   │   ├── datasets
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── ava.py
    │   │   │   │   │   │   │   ├── concat_dataset.py
    │   │   │   │   │   │   │   └── evaluation
    │   │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   │   └── ava
    │   │   │   │   │   │   │   │       ├── README.md
    │   │   │   │   │   │   │   │       ├── __init__.py
    │   │   │   │   │   │   │   │       ├── ava_eval.py
    │   │   │   │   │   │   │   │       └── pascal_evaluation
    │   │   │   │   │   │   │   │           ├── __init__.py
    │   │   │   │   │   │   │   │           ├── label_map_util.py
    │   │   │   │   │   │   │   │           ├── metrics.py
    │   │   │   │   │   │   │   │           ├── np_box_list.py
    │   │   │   │   │   │   │   │           ├── np_box_list_ops.py
    │   │   │   │   │   │   │   │           ├── np_box_mask_list.py
    │   │   │   │   │   │   │   │           ├── np_box_mask_list_ops.py
    │   │   │   │   │   │   │   │           ├── np_box_ops.py
    │   │   │   │   │   │   │   │           ├── np_mask_ops.py
    │   │   │   │   │   │   │   │           ├── object_detection_evaluation.py
    │   │   │   │   │   │   │   │           ├── per_image_evaluation.py
    │   │   │   │   │   │   │   │           └── standard_fields.py
    │   │   │   │   │   │   ├── samplers
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── distributed.py
    │   │   │   │   │   │   │   ├── grouped_batch_sampler.py
    │   │   │   │   │   │   │   └── iteration_based_batch_sampler.py
    │   │   │   │   │   │   └── transforms
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   │   ├── object_transforms.py
    │   │   │   │   │   │   │   └── video_transforms.py
    │   │   │   │   │   ├── engine
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   └── trainer.py
    │   │   │   │   │   ├── layers
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── batch_norm.py
    │   │   │   │   │   │   ├── roi_align_3d.py
    │   │   │   │   │   │   ├── roi_pool_3d.py
    │   │   │   │   │   │   ├── sigmoid_focal_loss.py
    │   │   │   │   │   │   └── softmax_focal_loss.py
    │   │   │   │   │   ├── modeling
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── backbone
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── backbone.py
    │   │   │   │   │   │   │   ├── i3d.py
    │   │   │   │   │   │   │   └── slowfast.py
    │   │   │   │   │   │   ├── common_blocks.py
    │   │   │   │   │   │   ├── detector
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   └── action_detector.py
    │   │   │   │   │   │   ├── nonlocal_block.py
    │   │   │   │   │   │   ├── poolers.py
    │   │   │   │   │   │   ├── registry.py
    │   │   │   │   │   │   ├── roi_heads
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── action_head
    │   │   │   │   │   │   │   │   ├── IA_structure.py
    │   │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   │   ├── action_head.py
    │   │   │   │   │   │   │   │   ├── inference.py
    │   │   │   │   │   │   │   │   ├── loss.py
    │   │   │   │   │   │   │   │   ├── metric.py
    │   │   │   │   │   │   │   │   ├── roi_action_feature_extractor.py
    │   │   │   │   │   │   │   │   └── roi_action_predictors.py
    │   │   │   │   │   │   │   └── roi_heads_3d.py
    │   │   │   │   │   │   └── utils.py
    │   │   │   │   │   ├── solver
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   └── lr_scheduler.py
    │   │   │   │   │   ├── structures
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bounding_box.py
    │   │   │   │   │   │   └── memory_pool.py
    │   │   │   │   │   └── utils
    │   │   │   │   │   │   ├── IA_helper.py
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── c2_model_loading.py
    │   │   │   │   │   │   ├── comm.py
    │   │   │   │   │   │   ├── logger.py
    │   │   │   │   │   │   ├── metric_logger.py
    │   │   │   │   │   │   ├── model_serialization.py
    │   │   │   │   │   │   ├── random_seed.py
    │   │   │   │   │   │   ├── registry.py
    │   │   │   │   │   │   └── video_decode.py
    │   │   │   │   ├── avaeval.sh
    │   │   │   │   ├── avalarge.sh
    │   │   │   │   ├── avalarge2.sh
    │   │   │   │   ├── avatest.sh
    │   │   │   │   ├── avatrain.sh
    │   │   │   │   ├── data
    │   │   │   │   │   ├── RandomAugmentBBox.py
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── augmentations.py
    │   │   │   │   │   ├── ava.py
    │   │   │   │   │   ├── ava_eval.py
    │   │   │   │   │   ├── movie_size.npy
    │   │   │   │   │   ├── rand_aug.py
    │   │   │   │   │   ├── spatial_transforms.py
    │   │   │   │   │   └── transforms.py
    │   │   │   │   ├── datasets.py
    │   │   │   │   ├── engine_for_finetuning.py
    │   │   │   │   ├── engine_for_pretraining.py
    │   │   │   │   ├── figs
    │   │   │   │   │   └── videomae.png
    │   │   │   │   ├── functional.py
    │   │   │   │   ├── kinetics.py
    │   │   │   │   ├── masking_generator.py
    │   │   │   │   ├── modeling_finetune.py
    │   │   │   │   ├── modeling_pretrain.py
    │   │   │   │   ├── optim_factory.py
    │   │   │   │   ├── rand_augment.py
    │   │   │   │   ├── random_erasing.py
    │   │   │   │   ├── run_class_finetuning.py
    │   │   │   │   ├── run_mae_pretraining.py
    │   │   │   │   ├── run_videomae_vis.py
    │   │   │   │   ├── start.sh
    │   │   │   │   ├── train.sh
    │   │   │   │   ├── transforms.py
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── v100_config.json
    │   │   │   │   ├── video_transforms.py
    │   │   │   │   ├── vis.sh
    │   │   │   │   └── volume_transforms.py
    │   │   │   ├── Temporal-Action-Localization
    │   │   │   │   ├── INSTALL.md
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── README.md
    │   │   │   │   ├── anet_run.sh
    │   │   │   │   ├── configs
    │   │   │   │   │   ├── anet.yaml
    │   │   │   │   │   ├── fineaction.yaml
    │   │   │   │   │   ├── hacs.yaml
    │   │   │   │   │   └── thumos.yaml
    │   │   │   │   ├── fa_run.sh
    │   │   │   │   ├── hacs_run.sh
    │   │   │   │   ├── libs
    │   │   │   │   │   ├── core
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   └── config.py
    │   │   │   │   │   ├── datasets
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── anet.py
    │   │   │   │   │   │   ├── data_utils.py
    │   │   │   │   │   │   ├── datasets.py
    │   │   │   │   │   │   └── thumos14.py
    │   │   │   │   │   ├── modeling
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── backbones.py
    │   │   │   │   │   │   ├── blocks.py
    │   │   │   │   │   │   ├── loc_generators.py
    │   │   │   │   │   │   ├── losses.py
    │   │   │   │   │   │   ├── meta_archs.py
    │   │   │   │   │   │   ├── models.py
    │   │   │   │   │   │   ├── necks.py
    │   │   │   │   │   │   └── weight_init.py
    │   │   │   │   │   └── utils
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── csrc
    │   │   │   │   │   │       └── nms_cpu.cpp
    │   │   │   │   │   │   ├── lr_schedulers.py
    │   │   │   │   │   │   ├── metrics.py
    │   │   │   │   │   │   ├── nms.py
    │   │   │   │   │   │   ├── postprocessing.py
    │   │   │   │   │   │   ├── setup.py
    │   │   │   │   │   │   └── train_utils.py
    │   │   │   │   ├── th14_run.sh
    │   │   │   │   ├── tools
    │   │   │   │   │   └── run_all_exps.sh
    │   │   │   │   └── train_eval.py
    │   │   │   ├── Video-Text-Retrieval
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── README.md
    │   │   │   │   ├── dataloaders
    │   │   │   │   │   ├── data_dataloaders.py
    │   │   │   │   │   ├── dataloader_activitynet_retrieval.py
    │   │   │   │   │   ├── dataloader_didemo_retrieval.py
    │   │   │   │   │   ├── dataloader_lsmdc_retrieval.py
    │   │   │   │   │   ├── dataloader_msrvtt_retrieval.py
    │   │   │   │   │   ├── dataloader_msvd_retrieval.py
    │   │   │   │   │   ├── dataloader_vatex_retrieval.py
    │   │   │   │   │   └── rawvideo_util.py
    │   │   │   │   ├── eval_finetuned_scripts
    │   │   │   │   │   ├── run_kc4_activitynet_infer.sh
    │   │   │   │   │   ├── run_kc4_didemo_infer.sh
    │   │   │   │   │   ├── run_kc4_lsmdc_infer.sh
    │   │   │   │   │   ├── run_kc4_msrvtt_infer.sh
    │   │   │   │   │   ├── run_kc4_msvd_infer.sh
    │   │   │   │   │   └── run_kc4_vatex_infer.sh
    │   │   │   │   ├── finetune_scripts
    │   │   │   │   │   ├── ret_activity.sh
    │   │   │   │   │   ├── ret_didemo.sh
    │   │   │   │   │   ├── ret_lsmdc.sh
    │   │   │   │   │   ├── ret_msrvtt.sh
    │   │   │   │   │   ├── ret_msvd.sh
    │   │   │   │   │   ├── ret_vatex.sh
    │   │   │   │   │   ├── train_activitynet.sh
    │   │   │   │   │   ├── train_didemo.sh
    │   │   │   │   │   ├── train_lsmdc.sh
    │   │   │   │   │   ├── train_msrvtt.sh
    │   │   │   │   │   ├── train_msvd.sh
    │   │   │   │   │   └── train_vatex.sh
    │   │   │   │   ├── inference.py
    │   │   │   │   ├── main_task_retrieval.py
    │   │   │   │   ├── metrics.py
    │   │   │   │   ├── modules
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   ├── clip_evl
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   │   ├── clip.py
    │   │   │   │   │   │   ├── dog.png
    │   │   │   │   │   │   ├── evl_utils
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── attention.py
    │   │   │   │   │   │   │   ├── attention_bias.py
    │   │   │   │   │   │   │   ├── attention_module.py
    │   │   │   │   │   │   │   ├── attention_module_bias.py
    │   │   │   │   │   │   │   ├── clip_vit.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d_dw_bias.py
    │   │   │   │   │   │   │   ├── clip_vit_fusion.py
    │   │   │   │   │   │   │   ├── clip_vit_only_global.py
    │   │   │   │   │   │   │   ├── evl_module.py
    │   │   │   │   │   │   │   └── evl_module_uniformer_diff_conv_balance.py
    │   │   │   │   │   │   ├── model.py
    │   │   │   │   │   │   ├── model_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze_diff.py
    │   │   │   │   │   │   ├── model_no_freeze_only_global.py
    │   │   │   │   │   │   ├── model_no_freeze_uniformer.py
    │   │   │   │   │   │   └── simple_tokenizer.py
    │   │   │   │   │   ├── clip_kc
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   │   ├── clip.py
    │   │   │   │   │   │   ├── evl_utils
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── attention.py
    │   │   │   │   │   │   │   ├── attention_bias.py
    │   │   │   │   │   │   │   ├── attention_module.py
    │   │   │   │   │   │   │   ├── attention_module_bias.py
    │   │   │   │   │   │   │   ├── clip_vit.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d_dw_bias.py
    │   │   │   │   │   │   │   ├── evl_module.py
    │   │   │   │   │   │   │   └── evl_module_uniformer_diff_conv_balance.py
    │   │   │   │   │   │   ├── model.py
    │   │   │   │   │   │   ├── model_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze_diff.py
    │   │   │   │   │   │   └── simple_tokenizer.py
    │   │   │   │   │   ├── clip_kc2
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   │   ├── clip.py
    │   │   │   │   │   │   ├── clip_decoders.py
    │   │   │   │   │   │   ├── coca.py
    │   │   │   │   │   │   ├── evl_utils
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── attention.py
    │   │   │   │   │   │   │   ├── attention_bias.py
    │   │   │   │   │   │   │   ├── attention_module.py
    │   │   │   │   │   │   │   ├── attention_module_bias.py
    │   │   │   │   │   │   │   ├── clip_vit.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d_dw_bias.py
    │   │   │   │   │   │   │   ├── evl_module.py
    │   │   │   │   │   │   │   └── evl_module_uniformer_diff_conv_balance.py
    │   │   │   │   │   │   ├── model.py
    │   │   │   │   │   │   ├── model_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze_diff.py
    │   │   │   │   │   │   └── simple_tokenizer.py
    │   │   │   │   │   ├── clip_kc_new
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   │   ├── clip.py
    │   │   │   │   │   │   ├── evl_utils
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── attention.py
    │   │   │   │   │   │   │   ├── attention_bias.py
    │   │   │   │   │   │   │   ├── attention_module.py
    │   │   │   │   │   │   │   ├── attention_module_bias.py
    │   │   │   │   │   │   │   ├── clip_vit.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d.py
    │   │   │   │   │   │   │   ├── clip_vit_2plus1d_dw_bias.py
    │   │   │   │   │   │   │   ├── clip_vit_fusion.py
    │   │   │   │   │   │   │   ├── clip_vit_only_global.py
    │   │   │   │   │   │   │   ├── evl_module.py
    │   │   │   │   │   │   │   └── evl_module_uniformer_diff_conv_balance.py
    │   │   │   │   │   │   ├── model.py
    │   │   │   │   │   │   ├── model_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze.py
    │   │   │   │   │   │   ├── model_no_freeze_diff.py
    │   │   │   │   │   │   ├── model_no_freeze_only_global.py
    │   │   │   │   │   │   ├── model_no_freeze_uniformer.py
    │   │   │   │   │   │   └── simple_tokenizer.py
    │   │   │   │   │   ├── cross-base
    │   │   │   │   │   │   └── cross_config.json
    │   │   │   │   │   ├── file_utils.py
    │   │   │   │   │   ├── modeling.py
    │   │   │   │   │   ├── modeling_backup.py
    │   │   │   │   │   ├── modeling_raw.py
    │   │   │   │   │   ├── module_clip.py
    │   │   │   │   │   ├── module_cross.py
    │   │   │   │   │   ├── optimization.py
    │   │   │   │   │   ├── tokenization_clip.py
    │   │   │   │   │   ├── until_config.py
    │   │   │   │   │   └── until_module.py
    │   │   │   │   ├── preprocess
    │   │   │   │   │   └── compress_video.py
    │   │   │   │   ├── util.py
    │   │   │   │   └── zeroshot_scripts
    │   │   │   │   │   ├── eval_activitynet.sh
    │   │   │   │   │   ├── eval_didemo.sh
    │   │   │   │   │   ├── eval_lsmdc.sh
    │   │   │   │   │   ├── eval_msrvtt.sh
    │   │   │   │   │   ├── eval_msvd.sh
    │   │   │   │   │   └── eval_vatex.sh
    │   │   │   ├── Visual-Language-Navigation
    │   │   │   │   ├── .gitignore
    │   │   │   │   ├── README.md
    │   │   │   │   ├── eval_base.bash
    │   │   │   │   ├── eval_large.bash
    │   │   │   │   ├── exp
    │   │   │   │   │   ├── cma_r2r.yaml
    │   │   │   │   │   ├── cma_rxr_en.yaml
    │   │   │   │   │   ├── cma_rxr_hi.yaml
    │   │   │   │   │   ├── cma_rxr_te.yaml
    │   │   │   │   │   ├── vlnbert_r2r.yaml
    │   │   │   │   │   ├── vlnbert_r2r_da.yaml
    │   │   │   │   │   └── vlnbert_rxr_en.yaml
    │   │   │   │   ├── habitat_extensions
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── config
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── default.py
    │   │   │   │   │   │   ├── r2r_vlnce.yaml
    │   │   │   │   │   │   ├── rxr_vlnce_en.yaml
    │   │   │   │   │   │   ├── rxr_vlnce_hi.yaml
    │   │   │   │   │   │   └── rxr_vlnce_te.yaml
    │   │   │   │   │   ├── habitat_simulator.py
    │   │   │   │   │   ├── maps.py
    │   │   │   │   │   ├── measures.py
    │   │   │   │   │   ├── nav.py
    │   │   │   │   │   ├── obs_transformers.py
    │   │   │   │   │   ├── sensors.py
    │   │   │   │   │   ├── shortest_path_follower.py
    │   │   │   │   │   ├── task.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── iter_train.yaml
    │   │   │   │   ├── requirements.txt
    │   │   │   │   ├── run.py
    │   │   │   │   ├── run
    │   │   │   │   │   ├── batch_run.sh
    │   │   │   │   │   ├── cma_r2r.bash
    │   │   │   │   │   ├── cma_r2r_eval.bash
    │   │   │   │   │   ├── cma_rxr.bash
    │   │   │   │   │   ├── cma_rxr_eval.bash
    │   │   │   │   │   ├── vlnbert_r2r.bash
    │   │   │   │   │   ├── vlnbert_r2r_da.bash
    │   │   │   │   │   ├── vlnbert_r2r_da_eval.bash
    │   │   │   │   │   ├── vlnbert_r2r_eval.bash
    │   │   │   │   │   ├── vlnbert_rxr.bash
    │   │   │   │   │   └── vlnbert_rxr_eval.bash
    │   │   │   │   ├── train.bash
    │   │   │   │   └── vlnce_baselines
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── common
    │   │   │   │   │       ├── aux_losses.py
    │   │   │   │   │       ├── base_il_trainer.py
    │   │   │   │   │       ├── env_utils.py
    │   │   │   │   │       ├── environments.py
    │   │   │   │   │       ├── recollection_dataset.py
    │   │   │   │   │       └── utils.py
    │   │   │   │   │   ├── config
    │   │   │   │   │       ├── __init__.py
    │   │   │   │   │       ├── default.py
    │   │   │   │   │       ├── nonlearning.yaml
    │   │   │   │   │       └── r2r_configs
    │   │   │   │   │       │   ├── cma.yaml
    │   │   │   │   │       │   ├── cma_aug.yaml
    │   │   │   │   │       │   ├── cma_aug_tune.yaml
    │   │   │   │   │       │   ├── cma_da.yaml
    │   │   │   │   │       │   ├── cma_da_aug_tune.yaml
    │   │   │   │   │       │   ├── cma_pm.yaml
    │   │   │   │   │       │   ├── cma_pm_aug.yaml
    │   │   │   │   │       │   ├── cma_pm_aug_tune.yaml
    │   │   │   │   │       │   ├── cma_pm_da.yaml
    │   │   │   │   │       │   ├── cma_pm_da_aug_tune.yaml
    │   │   │   │   │       │   ├── cma_sf.yaml
    │   │   │   │   │       │   ├── cma_ss.yaml
    │   │   │   │   │       │   ├── seq2seq.yaml
    │   │   │   │   │       │   ├── seq2seq_aug.yaml
    │   │   │   │   │       │   ├── seq2seq_aug_tune.yaml
    │   │   │   │   │       │   ├── seq2seq_da.yaml
    │   │   │   │   │       │   ├── seq2seq_pm.yaml
    │   │   │   │   │       │   ├── seq2seq_pm_aug.yaml
    │   │   │   │   │       │   ├── seq2seq_pm_da_aug_tune.yaml
    │   │   │   │   │       │   └── test_set_inference.yaml
    │   │   │   │   │   ├── models
    │   │   │   │   │       ├── Policy_ViewSelection_CMA.py
    │   │   │   │   │       ├── Policy_ViewSelection_HAMT.py
    │   │   │   │   │       ├── __init__.py
    │   │   │   │   │       ├── encoders
    │   │   │   │   │       │   ├── image_encoders.py
    │   │   │   │   │       │   ├── instruction_encoder.py
    │   │   │   │   │       │   └── video_encoder.py
    │   │   │   │   │       ├── hamt
    │   │   │   │   │       │   ├── base_il_trainer.py
    │   │   │   │   │       │   ├── vilmodel_cmt.py
    │   │   │   │   │       │   └── vlnbert_init.py
    │   │   │   │   │       ├── policy.py
    │   │   │   │   │       ├── utils.py
    │   │   │   │   │       ├── videomae
    │   │   │   │   │       │   ├── functional.py
    │   │   │   │   │       │   ├── get_args.py
    │   │   │   │   │       │   ├── modeling_finetune.py
    │   │   │   │   │       │   ├── rand_augment.py
    │   │   │   │   │       │   ├── random_erasing.py
    │   │   │   │   │       │   ├── utils.py
    │   │   │   │   │       │   ├── video_transforms.py
    │   │   │   │   │       │   └── volume_transforms.py
    │   │   │   │   │       └── vlnbert
    │   │   │   │   │       │   ├── vlnbert_PREVALENT.py
    │   │   │   │   │       │   └── vlnbert_init.py
    │   │   │   │   │   ├── trainer_HAMT.py
    │   │   │   │   │   ├── utils.py
    │   │   │   │   │   └── waypoint_pred
    │   │   │   │   │       ├── TRM_net.py
    │   │   │   │   │       ├── transformer
    │   │   │   │   │           ├── pytorch_transformer
    │   │   │   │   │           │   ├── file_utils.py
    │   │   │   │   │           │   ├── modeling_bert.py
    │   │   │   │   │           │   └── modeling_utils.py
    │   │   │   │   │           └── waypoint_bert.py
    │   │   │   │   │       └── utils.py
    │   │   │   └── multi-modalities-downstream
    │   │   │   │   ├── CoTrain
    │   │   │   │       ├── __init__.py
    │   │   │   │       ├── config.py
    │   │   │   │       ├── datamodules
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── image
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── activitynet_datamodule.py
    │   │   │   │       │   │   ├── cc12m_datamodule.py
    │   │   │   │       │   │   ├── cc3m_datamodule.py
    │   │   │   │       │   │   ├── coco_caption_karpathy_datamodule.py
    │   │   │   │       │   │   ├── conceptual_caption_datamodule.py
    │   │   │   │       │   │   ├── datamodule_base.py
    │   │   │   │       │   │   ├── f30k_caption_karpathy_datamodule.py
    │   │   │   │       │   │   ├── laion400m_datamodule.py
    │   │   │   │       │   │   ├── mix100m_datamodule.py
    │   │   │   │       │   │   ├── nlvr2_datamodule.py
    │   │   │   │       │   │   ├── sbu_datamodule.py
    │   │   │   │       │   │   ├── vcr_datamodule.py
    │   │   │   │       │   │   ├── vg_caption_datamodule.py
    │   │   │   │       │   │   ├── vqav2_datamodule.py
    │   │   │   │       │   │   └── yfcc15m_datamodule.py
    │   │   │   │       │   └── video
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── didemo_datamodule.py
    │   │   │   │       │   │   ├── ego4d_choice_datamodule.py
    │   │   │   │       │   │   ├── ego4d_datamodule.py
    │   │   │   │       │   │   ├── hmdb51_datamodule.py
    │   │   │   │       │   │   ├── howto100m_datamodule.py
    │   │   │   │       │   │   ├── k400_datamodule.py
    │   │   │   │       │   │   ├── k400_video_datamodule.py
    │   │   │   │       │   │   ├── lsmdc_choice_datamodule.py
    │   │   │   │       │   │   ├── lsmdc_datamodule.py
    │   │   │   │       │   │   ├── msrvtt_choice_datamodule.py
    │   │   │   │       │   │   ├── msrvtt_datamodule.py
    │   │   │   │       │   │   ├── msrvttqa_datamodule.py
    │   │   │   │       │   │   ├── msvd_datamodule.py
    │   │   │   │       │   │   ├── msvdqa_datamodule.py
    │   │   │   │       │   │   ├── multitask_datamodule.py
    │   │   │   │       │   │   ├── tgif_datamodule.py
    │   │   │   │       │   │   ├── tgifqa_datamodule.py
    │   │   │   │       │   │   ├── tvqa_datamodule.py
    │   │   │   │       │   │   ├── ucf101_datamodule.py
    │   │   │   │       │   │   ├── webvid10m_datamodule.py
    │   │   │   │       │   │   ├── webvid_datamodule.py
    │   │   │   │       │   │   ├── youtube_datamodule.py
    │   │   │   │       │   │   └── yttemporal_datamodule.py
    │   │   │   │       ├── datasets
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── image
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── base_dataset.py
    │   │   │   │       │   │   ├── cc12m.py
    │   │   │   │       │   │   ├── cc3m.py
    │   │   │   │       │   │   ├── coco_caption_karpathy_dataset.py
    │   │   │   │       │   │   ├── conceptual_caption_dataset.py
    │   │   │   │       │   │   ├── f30k_caption_karpathy_dataset.py
    │   │   │   │       │   │   ├── laion400m.py
    │   │   │   │       │   │   ├── mix100m.py
    │   │   │   │       │   │   ├── nlvr2_dataset.py
    │   │   │   │       │   │   ├── sbu_caption_dataset.py
    │   │   │   │       │   │   ├── vcr.py
    │   │   │   │       │   │   ├── vg_caption_dataset.py
    │   │   │   │       │   │   ├── vqav2_dataset.py
    │   │   │   │       │   │   └── yfcc15m.py
    │   │   │   │       │   └── video
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── activitynet.py
    │   │   │   │       │   │   ├── activitynetqa.py
    │   │   │   │       │   │   ├── didemo.py
    │   │   │   │       │   │   ├── ego4d.py
    │   │   │   │       │   │   ├── ego4d_choice.py
    │   │   │   │       │   │   ├── ego4d_v2.py
    │   │   │   │       │   │   ├── hmdb51.py
    │   │   │   │       │   │   ├── howto100m.py
    │   │   │   │       │   │   ├── k400.py
    │   │   │   │       │   │   ├── k400_video.py
    │   │   │   │       │   │   ├── lsmdc_choice.py
    │   │   │   │       │   │   ├── lsmdc_dataset.py
    │   │   │   │       │   │   ├── msrvtt.py
    │   │   │   │       │   │   ├── msrvtt_choice.py
    │   │   │   │       │   │   ├── msrvttqa.py
    │   │   │   │       │   │   ├── msvd.py
    │   │   │   │       │   │   ├── msvdqa.py
    │   │   │   │       │   │   ├── pack_meta.py
    │   │   │   │       │   │   ├── tgif.py
    │   │   │   │       │   │   ├── tgifqa.py
    │   │   │   │       │   │   ├── tvqa.py
    │   │   │   │       │   │   ├── tvqaplus.py
    │   │   │   │       │   │   ├── ucf101.py
    │   │   │   │       │   │   ├── video_base_dataset.py
    │   │   │   │       │   │   ├── webvid.py
    │   │   │   │       │   │   ├── webvid10m.py
    │   │   │   │       │   │   ├── webvid_old.py
    │   │   │   │       │   │   ├── youtube.py
    │   │   │   │       │   │   └── yttemporal.py
    │   │   │   │       ├── gadgets
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   └── my_metrics.py
    │   │   │   │       ├── modules
    │   │   │   │       │   ├── InternVideo
    │   │   │   │       │   │   ├── __init__.py
    │   │   │   │       │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │       │   │   ├── clip_utils
    │   │   │   │       │   │   │   ├── __init__.py
    │   │   │   │       │   │   │   ├── clip.py
    │   │   │   │       │   │   │   ├── model.py
    │   │   │   │       │   │   │   ├── simple_tokenizer.py
    │   │   │   │       │   │   │   └── utils
    │   │   │   │       │   │   │   │   ├── __init__.py
    │   │   │   │       │   │   │   │   ├── attention.py
    │   │   │   │       │   │   │   │   ├── attention_module.py
    │   │   │   │       │   │   │   │   ├── attention_module_bias.py
    │   │   │   │       │   │   │   │   └── clip_vit_only_global.py
    │   │   │   │       │   │   ├── internvideo.py
    │   │   │   │       │   │   ├── simple_tokenizer.py
    │   │   │   │       │   │   └── video_transform.py
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── base_vision_transformer.py
    │   │   │   │       │   ├── clip_decoders.py
    │   │   │   │       │   ├── clip_module.py
    │   │   │   │       │   ├── clip_param_keys.py
    │   │   │   │       │   ├── coca.py
    │   │   │   │       │   ├── cotrain_module.py
    │   │   │   │       │   ├── cotrain_utils.py
    │   │   │   │       │   ├── dist_utils.py
    │   │   │   │       │   ├── forzen_param.py
    │   │   │   │       │   ├── heads.py
    │   │   │   │       │   ├── objectives.py
    │   │   │   │       │   ├── retrieval_metrics.py
    │   │   │   │       │   ├── temporal_roll.py
    │   │   │   │       │   └── text_prompt.py
    │   │   │   │       ├── requirements.txt
    │   │   │   │       └── transforms
    │   │   │   │       │   ├── __init__.py
    │   │   │   │       │   ├── image
    │   │   │   │       │       ├── __init__.py
    │   │   │   │       │       ├── functional.py
    │   │   │   │       │       ├── imageaug.py
    │   │   │   │       │       ├── pixelbert.py
    │   │   │   │       │       ├── randaug.py
    │   │   │   │       │       └── utils.py
    │   │   │   │       │   └── video
    │   │   │   │       │       ├── __init__.py
    │   │   │   │       │       ├── video_transform.py
    │   │   │   │       │       └── videoaug.py
    │   │   │   │   ├── README.md
    │   │   │   │   ├── requirement.txt
    │   │   │   │   ├── run.py
    │   │   │   │   └── scripts
    │   │   │   │       ├── finetune_msrvttqa.sh
    │   │   │   │       ├── finetune_msvdqa.sh
    │   │   │   │       ├── finetune_tgifqa.sh
    │   │   │   │       ├── zs_choice_lsmdc.sh
    │   │   │   │       ├── zs_choice_msrvtt.sh
    │   │   │   │       └── zs_classify.sh
    │   │   ├── Media
    │   │   │   └── download.png
    │   │   ├── Pretrain
    │   │   │   ├── Multi-Modalities-Pretraining
    │   │   │   │   ├── .gitignore
    │   │   │   │   ├── InternVideo
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   ├── clip_utils
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── clip.py
    │   │   │   │   │   │   ├── model.py
    │   │   │   │   │   │   ├── simple_tokenizer.py
    │   │   │   │   │   │   └── utils
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── attention.py
    │   │   │   │   │   │   │   ├── attention_module.py
    │   │   │   │   │   │   │   ├── attention_module_bias.py
    │   │   │   │   │   │   │   └── clip_vit_only_global.py
    │   │   │   │   │   ├── internvideo.py
    │   │   │   │   │   ├── simple_tokenizer.py
    │   │   │   │   │   └── video_transform.py
    │   │   │   │   ├── README.md
    │   │   │   │   ├── data
    │   │   │   │   │   └── demo.mp4
    │   │   │   │   └── demo.py
    │   │   │   ├── UniFormerV2
    │   │   │   │   ├── .gitignore
    │   │   │   │   ├── DATASET.md
    │   │   │   │   ├── INSTALL.md
    │   │   │   │   ├── INSTRUCTIONS.md
    │   │   │   │   ├── LICENSE
    │   │   │   │   ├── MODEL_ZOO.md
    │   │   │   │   ├── README.md
    │   │   │   │   ├── exp
    │   │   │   │   │   ├── anet
    │   │   │   │   │   │   ├── anet_l14_16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── anet_l14_32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   ├── hacs
    │   │   │   │   │   │   ├── hacs_l14_16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── hacs_l14_32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   ├── k400
    │   │   │   │   │   │   ├── frozen_k400+k710_l14_f32x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── frozen_k400+k710_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── frozen_k400_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k400+k710_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k400+k710_l14_f16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k400+k710_l14_f32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k400+k710_l14_f32x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k400+k710_l14_f64x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k400+k710_l14_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── k400_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   ├── run.sh
    │   │   │   │   │   │   │   └── test.sh
    │   │   │   │   │   ├── k600
    │   │   │   │   │   │   ├── frozen_k600+k710_l14_f32x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── frozen_k600+k710_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── frozen_k600_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k600+k710_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k600+k710_l14_f16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k600+k710_l14_f32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k600+k710_l14_f32x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k600+k710_l14_f64x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k600+k710_l14_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── k600_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   ├── run.sh
    │   │   │   │   │   │   │   └── test.sh
    │   │   │   │   │   ├── k700
    │   │   │   │   │   │   ├── frozen_k700+k710_l14_f32x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── frozen_k700+k710_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── frozen_k700_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k700+k710_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k700+k710_l14_f16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k700+k710_l14_f32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k700+k710_l14_f32x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k700+k710_l14_f64x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k700+k710_l14_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── k700_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   ├── run.sh
    │   │   │   │   │   │   │   └── test.sh
    │   │   │   │   │   ├── k710
    │   │   │   │   │   │   ├── frozen_k710_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k710_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── k710_l14_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── k710_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   ├── mit
    │   │   │   │   │   │   ├── mit_b16_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── mit_l14_f8x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── mit_l14_f8x336
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   ├── sthv1
    │   │   │   │   │   │   ├── ssv1_b16_f16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── ssv1_b16_f32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   ├── ssv1_l14_f16x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   │   └── ssv1_l14_f32x224
    │   │   │   │   │   │   │   ├── config.yaml
    │   │   │   │   │   │   │   └── run.sh
    │   │   │   │   │   └── sthv2
    │   │   │   │   │   │   ├── ssv2_b16_f16x224
    │   │   │   │   │   │       ├── config.yaml
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   ├── ssv2_b16_f32x224
    │   │   │   │   │   │       ├── config.yaml
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   ├── ssv2_l14_f16x224
    │   │   │   │   │   │       ├── config.yaml
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   └── ssv2_l14_f32x224
    │   │   │   │   │   │       ├── config.yaml
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   ├── extract_clip
    │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   ├── clip.py
    │   │   │   │   │   ├── extract.ipynb
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   └── simple_tokenizer.py
    │   │   │   │   ├── img
    │   │   │   │   │   └── framework.png
    │   │   │   │   ├── linter.sh
    │   │   │   │   ├── setup.cfg
    │   │   │   │   ├── setup.py
    │   │   │   │   ├── slowfast
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── config
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── custom_config.py
    │   │   │   │   │   │   └── defaults.py
    │   │   │   │   │   ├── datasets
    │   │   │   │   │   │   ├── DATASET.md
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── anet.py
    │   │   │   │   │   │   ├── ava_dataset.py
    │   │   │   │   │   │   ├── ava_helper.py
    │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   ├── charades.py
    │   │   │   │   │   │   ├── cv2_transform.py
    │   │   │   │   │   │   ├── decoder.py
    │   │   │   │   │   │   ├── imagenet.py
    │   │   │   │   │   │   ├── kinetics.py
    │   │   │   │   │   │   ├── kinetics_sparse.py
    │   │   │   │   │   │   ├── loader.py
    │   │   │   │   │   │   ├── mit.py
    │   │   │   │   │   │   ├── mixup.py
    │   │   │   │   │   │   ├── multigrid_helper.py
    │   │   │   │   │   │   ├── ptv_datasets.py
    │   │   │   │   │   │   ├── rand_augment.py
    │   │   │   │   │   │   ├── random_erasing.py
    │   │   │   │   │   │   ├── sth.py
    │   │   │   │   │   │   ├── transform.py
    │   │   │   │   │   │   ├── utils.py
    │   │   │   │   │   │   └── video_container.py
    │   │   │   │   │   ├── models
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── attention.py
    │   │   │   │   │   │   ├── batchnorm_helper.py
    │   │   │   │   │   │   ├── build.py
    │   │   │   │   │   │   ├── common.py
    │   │   │   │   │   │   ├── custom_video_model_builder.py
    │   │   │   │   │   │   ├── head_helper.py
    │   │   │   │   │   │   ├── helpers.py
    │   │   │   │   │   │   ├── losses.py
    │   │   │   │   │   │   ├── nonlocal_helper.py
    │   │   │   │   │   │   ├── operators.py
    │   │   │   │   │   │   ├── optimizer.py
    │   │   │   │   │   │   ├── optimizer_backbone.py
    │   │   │   │   │   │   ├── optimizer_backbone_special.py
    │   │   │   │   │   │   ├── ptv_model_builder.py
    │   │   │   │   │   │   ├── resnet_helper.py
    │   │   │   │   │   │   ├── stem_helper.py
    │   │   │   │   │   │   ├── uniformer.py
    │   │   │   │   │   │   ├── uniformerv2.py
    │   │   │   │   │   │   ├── uniformerv2_model.py
    │   │   │   │   │   │   ├── utils.py
    │   │   │   │   │   │   └── video_model_builder.py
    │   │   │   │   │   ├── utils
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── ava_eval_helper.py
    │   │   │   │   │   │   ├── ava_evaluation
    │   │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt
    │   │   │   │   │   │   │   ├── label_map_util.py
    │   │   │   │   │   │   │   ├── metrics.py
    │   │   │   │   │   │   │   ├── np_box_list.py
    │   │   │   │   │   │   │   ├── np_box_list_ops.py
    │   │   │   │   │   │   │   ├── np_box_mask_list.py
    │   │   │   │   │   │   │   ├── np_box_mask_list_ops.py
    │   │   │   │   │   │   │   ├── np_box_ops.py
    │   │   │   │   │   │   │   ├── np_mask_ops.py
    │   │   │   │   │   │   │   ├── object_detection_evaluation.py
    │   │   │   │   │   │   │   ├── per_image_evaluation.py
    │   │   │   │   │   │   │   └── standard_fields.py
    │   │   │   │   │   │   ├── benchmark.py
    │   │   │   │   │   │   ├── bn_helper.py
    │   │   │   │   │   │   ├── c2_model_loading.py
    │   │   │   │   │   │   ├── distributed.py
    │   │   │   │   │   │   ├── ema.py
    │   │   │   │   │   │   ├── env.py
    │   │   │   │   │   │   ├── logging.py
    │   │   │   │   │   │   ├── lr_policy.py
    │   │   │   │   │   │   ├── meters.py
    │   │   │   │   │   │   ├── meters_co.py
    │   │   │   │   │   │   ├── metrics.py
    │   │   │   │   │   │   ├── misc.py
    │   │   │   │   │   │   ├── multigrid.py
    │   │   │   │   │   │   ├── multiprocessing.py
    │   │   │   │   │   │   ├── parser.py
    │   │   │   │   │   │   ├── parser_co.py
    │   │   │   │   │   │   └── weight_init_helper.py
    │   │   │   │   │   └── visualization
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── async_predictor.py
    │   │   │   │   │   │   ├── ava_demo_precomputed_boxes.py
    │   │   │   │   │   │   ├── demo_loader.py
    │   │   │   │   │   │   ├── gradcam_utils.py
    │   │   │   │   │   │   ├── prediction_vis.py
    │   │   │   │   │   │   ├── predictor.py
    │   │   │   │   │   │   ├── tensorboard_vis.py
    │   │   │   │   │   │   ├── utils.py
    │   │   │   │   │   │   └── video_visualizer.py
    │   │   │   │   └── tools
    │   │   │   │   │   ├── benchmark.py
    │   │   │   │   │   ├── demo_net.py
    │   │   │   │   │   ├── run_net.py
    │   │   │   │   │   ├── run_net_multi_node.py
    │   │   │   │   │   ├── test_net.py
    │   │   │   │   │   ├── train_net.py
    │   │   │   │   │   └── visualization.py
    │   │   │   ├── ViCLIP
    │   │   │   │   ├── README.md
    │   │   │   │   ├── configs
    │   │   │   │   │   ├── beit-base-patch16-224-pt22k-ft22k.json
    │   │   │   │   │   ├── config_bert.json
    │   │   │   │   │   ├── config_bert_large.json
    │   │   │   │   │   ├── data.py
    │   │   │   │   │   ├── model.py
    │   │   │   │   │   ├── pretrain.py
    │   │   │   │   │   ├── qa.py
    │   │   │   │   │   ├── qa_anet.py
    │   │   │   │   │   ├── qa_msrvtt.py
    │   │   │   │   │   ├── ret_anet.py
    │   │   │   │   │   ├── ret_coco.py
    │   │   │   │   │   ├── ret_didemo.py
    │   │   │   │   │   ├── ret_flickr.py
    │   │   │   │   │   ├── ret_msrvtt.py
    │   │   │   │   │   ├── ret_msrvtt_9k.py
    │   │   │   │   │   ├── ret_msrvtt_mc.py
    │   │   │   │   │   ├── ret_ssv2_label.py
    │   │   │   │   │   ├── ret_ssv2_template.py
    │   │   │   │   │   └── tvqa.py
    │   │   │   │   ├── dataset
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── base_dataset.py
    │   │   │   │   │   ├── caption_dataset.py
    │   │   │   │   │   ├── dataloader.py
    │   │   │   │   │   ├── qa_dataset.py
    │   │   │   │   │   ├── serialize.py
    │   │   │   │   │   ├── sqlite_dataset.py
    │   │   │   │   │   ├── text_prompt.py
    │   │   │   │   │   ├── utils.py
    │   │   │   │   │   └── video_utils.py
    │   │   │   │   ├── exp
    │   │   │   │   │   └── exp_pretrain_ViCLIP
    │   │   │   │   │   │   ├── our230522_10m_webvid_10m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── our_0602_200m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       └── run_freeze_unmask.sh
    │   │   │   │   │   │   ├── our_0602_200m_10m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── our_0613_10m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       └── run_freeze_unmask.sh
    │   │   │   │   │   │   ├── our_0613_filtered
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   ├── our_0613_filtered_30p_10m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       ├── run_freeze_cos.sh
    │   │   │   │   │   │       ├── run_freeze_cos_unmask.sh
    │   │   │   │   │   │       ├── run_freeze_do01.sh
    │   │   │   │   │   │       ├── run_freeze_do01_unmask.sh
    │   │   │   │   │   │       └── run_freeze_unmask.sh
    │   │   │   │   │   │   ├── our_230412_cc15m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   ├── our_230522
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run.sh
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       ├── run_freeze_unmask.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── our_230522_10m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── our_230522_10m_cc15m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── our_230522_cc15m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   ├── our_230604
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── our_230604_cc15m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       └── run_freeze.sh
    │   │   │   │   │   │   ├── simple_25m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run.sh
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       └── run_freeze_unmask.sh
    │   │   │   │   │   │   ├── viclip_base
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       └── run.sh
    │   │   │   │   │   │   ├── webvid_10m
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run.sh
    │   │   │   │   │   │       ├── run_freeze.sh
    │   │   │   │   │   │       ├── run_freeze_unmask.sh
    │   │   │   │   │   │       └── run_unmask.sh
    │   │   │   │   │   │   ├── zs_anet
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       ├── run_simple25m.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_simple25m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask_wiseft05.sh
    │   │   │   │   │   │       └── run_webvid_10m_wiseft05.sh
    │   │   │   │   │   │   ├── zs_didemo
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       ├── run_simple25m.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_simple25m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask_wiseft05.sh
    │   │   │   │   │   │       └── run_webvid_10m_wiseft05.sh
    │   │   │   │   │   │   ├── zs_k400
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       ├── run_simple25m.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_simple25m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask_wiseft05.sh
    │   │   │   │   │   │       └── run_webvid_10m_wiseft05.sh
    │   │   │   │   │   │   ├── zs_k600
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       └── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │   ├── zs_k700
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05 copy.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       └── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │   ├── zs_lsmdc
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       ├── run_simple25m.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_simple25m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask_wiseft05.sh
    │   │   │   │   │   │       └── run_webvid_10m_wiseft05.sh
    │   │   │   │   │   │   ├── zs_msrvtt_1k
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       ├── run_simple25m.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_simple25m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask_wiseft05.sh
    │   │   │   │   │   │       └── run_webvid_10m_wiseft05.sh
    │   │   │   │   │   │   ├── zs_msvd
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       ├── run_simple25m.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_simple25m_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_freeze_wiseft05.sh
    │   │   │   │   │   │       ├── run_webvid_10m_simple25m.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask.sh
    │   │   │   │   │   │       ├── run_webvid_10m_unmask_wiseft05.sh
    │   │   │   │   │   │       └── run_webvid_10m_wiseft05.sh
    │   │   │   │   │   │   ├── zs_sthsthv1
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       └── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │   └── zs_sthsthv2
    │   │   │   │   │   │       ├── config.py
    │   │   │   │   │   │       ├── run_clip_pretrained.sh
    │   │   │   │   │   │       ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230522_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_ensemble_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230604_resized_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   │   │       ├── run_pretrained.sh
    │   │   │   │   │   │       └── run_webvid_10m_freeze_unmask_wiseft05.sh
    │   │   │   │   ├── models
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── backbones
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── beit
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── builder.py
    │   │   │   │   │   │   │   └── st_beit.py
    │   │   │   │   │   │   ├── bert
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── builder.py
    │   │   │   │   │   │   │   ├── tokenization_bert.py
    │   │   │   │   │   │   │   └── xbert.py
    │   │   │   │   │   │   ├── blip_toremove
    │   │   │   │   │   │   │   ├── Qformer.py
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── builder.py
    │   │   │   │   │   │   │   └── modeling_t5.py
    │   │   │   │   │   │   ├── clip
    │   │   │   │   │   │   │   ├── bpe_simple_vocab_16e6.txt.gz
    │   │   │   │   │   │   │   ├── clip_text.py
    │   │   │   │   │   │   │   ├── clip_vision.py
    │   │   │   │   │   │   │   └── simple_tokenizer.py
    │   │   │   │   │   │   └── vit
    │   │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   │   ├── clip.py
    │   │   │   │   │   │   │   ├── clip_text.py
    │   │   │   │   │   │   │   ├── clip_vision.py
    │   │   │   │   │   │   │   ├── simple_tokenizer.py
    │   │   │   │   │   │   │   ├── vit.py
    │   │   │   │   │   │   │   └── vit_clean.py
    │   │   │   │   │   ├── criterions.py
    │   │   │   │   │   ├── mask.py
    │   │   │   │   │   ├── modules
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   └── temporal_model.py
    │   │   │   │   │   ├── prompts.py
    │   │   │   │   │   ├── utils.py
    │   │   │   │   │   └── viclip.py
    │   │   │   │   ├── preprocess
    │   │   │   │   │   ├── compress.py
    │   │   │   │   │   ├── create_sqlite_db.py
    │   │   │   │   │   ├── extract_hfclip.ipynb
    │   │   │   │   │   ├── gen_webvid10m_label.py
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── tasks
    │   │   │   │   │   ├── caption.py
    │   │   │   │   │   ├── caption_utils.py
    │   │   │   │   │   ├── compute_sim.py
    │   │   │   │   │   ├── pretrain.py
    │   │   │   │   │   ├── retrieval.py
    │   │   │   │   │   ├── retrieval_mc.py
    │   │   │   │   │   ├── retrieval_utils.py
    │   │   │   │   │   ├── shared_utils.py
    │   │   │   │   │   ├── trainer.py
    │   │   │   │   │   ├── tvqa.py
    │   │   │   │   │   ├── vqa.py
    │   │   │   │   │   └── vqa_utils.py
    │   │   │   │   ├── tests
    │   │   │   │   │   └── test_cfg.py
    │   │   │   │   ├── tools
    │   │   │   │   │   ├── run.py
    │   │   │   │   │   ├── submit.sh
    │   │   │   │   │   └── utils.py
    │   │   │   │   ├── utils
    │   │   │   │   │   ├── basic_utils.py
    │   │   │   │   │   ├── config.py
    │   │   │   │   │   ├── config_utils.py
    │   │   │   │   │   ├── distributed.py
    │   │   │   │   │   ├── easydict.py
    │   │   │   │   │   ├── logger.py
    │   │   │   │   │   ├── optimizer.py
    │   │   │   │   │   └── scheduler.py
    │   │   │   │   └── viclip.yaml
    │   │   │   └── VideoMAE
    │   │   │   │   ├── .gitignore
    │   │   │   │   ├── README.md
    │   │   │   │   ├── anet.py
    │   │   │   │   ├── datasets.py
    │   │   │   │   ├── engine_for_finetuning.py
    │   │   │   │   ├── engine_for_pretraining.py
    │   │   │   │   ├── ensemble.py
    │   │   │   │   ├── functional.py
    │   │   │   │   ├── kinetics.py
    │   │   │   │   ├── mae.py
    │   │   │   │   ├── masking_generator.py
    │   │   │   │   ├── modeling_finetune.py
    │   │   │   │   ├── modeling_pretrain.py
    │   │   │   │   ├── optim_factory.py
    │   │   │   │   ├── rand_augment.py
    │   │   │   │   ├── random_erasing.py
    │   │   │   │   ├── run_class_finetuning.py
    │   │   │   │   ├── run_class_linear.py
    │   │   │   │   ├── run_mae_pretraining.py
    │   │   │   │   ├── run_mae_vis.py
    │   │   │   │   ├── scripts
    │   │   │   │       ├── finetune
    │   │   │   │       │   ├── dist_train_vit_b_k400_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_b_anet_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_b_k400_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_b_k400_sparse_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_b_ssv2_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_k400_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_k400_sparse_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_k600_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_k600_it_k400_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_k700_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_mixk_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_mixk_it_k400_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_mixk_it_k600_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_mixk_it_k700_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_h_ssv2_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_l_k400_ft.sh
    │   │   │   │       │   ├── slurm_train_vit_l_k700_ft.sh
    │   │   │   │       │   └── slurm_train_vit_l_ssv2_ft.sh
    │   │   │   │       └── pretrain
    │   │   │   │       │   ├── dist_train_vit_b_k400_pt.sh
    │   │   │   │       │   ├── slurm_train_vit_b_hybrid_pt.sh
    │   │   │   │       │   ├── slurm_train_vit_b_k400_pt.sh
    │   │   │   │       │   ├── slurm_train_vit_b_ssv2_pt.sh
    │   │   │   │       │   ├── slurm_train_vit_h_hybrid_pt.sh
    │   │   │   │       │   ├── slurm_train_vit_l_hybrid_pt.sh
    │   │   │   │       │   └── slurm_train_vit_l_k700_pt.sh
    │   │   │   │   ├── ssv2.py
    │   │   │   │   ├── transforms.py
    │   │   │   │   ├── utils.py
    │   │   │   │   ├── video_transforms.py
    │   │   │   │   ├── vis.sh
    │   │   │   │   ├── vits.py
    │   │   │   │   └── volume_transforms.py
    │   │   ├── README.md
    │   │   └── README_cn.md
    │   ├── InternVideo2
    │   │   ├── README.md
    │   │   ├── figs
    │   │   │   ├── teaser-internvideo2.png
    │   │   │   └── wechatgrp.png
    │   │   ├── multi_modality
    │   │   │   ├── .gitignore
    │   │   │   ├── DATASET.md
    │   │   │   ├── INSTALL.md
    │   │   │   ├── MODEL_ZOO.md
    │   │   │   ├── README.md
    │   │   │   ├── configs
    │   │   │   │   ├── config_bert.json
    │   │   │   │   ├── config_bert_large.json
    │   │   │   │   ├── data.py
    │   │   │   │   ├── med_config.json
    │   │   │   │   ├── med_config_fusion.json
    │   │   │   │   ├── med_large_config.json
    │   │   │   │   └── model.py
    │   │   │   ├── dataset
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── av_utils.py
    │   │   │   │   ├── base_dataset.py
    │   │   │   │   ├── dataloader.py
    │   │   │   │   ├── pt_dataset.py
    │   │   │   │   ├── qa_dataset.py
    │   │   │   │   ├── resample_concat_dataset.py
    │   │   │   │   ├── ret_dataset.py
    │   │   │   │   ├── sampler.py
    │   │   │   │   ├── serialize.py
    │   │   │   │   ├── text_prompt.py
    │   │   │   │   ├── utils.py
    │   │   │   │   └── video_utils.py
    │   │   │   ├── demo
    │   │   │   │   ├── demo.ipynb
    │   │   │   │   ├── easydict.py
    │   │   │   │   ├── example1.mp4
    │   │   │   │   ├── internvideo2_stage2_config.py
    │   │   │   │   ├── small_config.py
    │   │   │   │   └── small_utils.py
    │   │   │   ├── miscs
    │   │   │   │   └── test_flops.py
    │   │   │   ├── models
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── backbones
    │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   ├── beats
    │   │   │   │   │   │   ├── BEATs.py
    │   │   │   │   │   │   ├── README.md
    │   │   │   │   │   │   ├── Tokenizers.py
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── backbone.py
    │   │   │   │   │   │   ├── modules.py
    │   │   │   │   │   │   └── quantizer.py
    │   │   │   │   │   ├── bert
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── builder.py
    │   │   │   │   │   │   ├── med.py
    │   │   │   │   │   │   ├── tokenization_bert.py
    │   │   │   │   │   │   └── xbert.py
    │   │   │   │   │   └── internvideo2
    │   │   │   │   │   │   ├── __init__.py
    │   │   │   │   │   │   ├── flash_attention_class.py
    │   │   │   │   │   │   ├── internvideo2.py
    │   │   │   │   │   │   ├── internvideo2_clip_text.py
    │   │   │   │   │   │   ├── internvideo2_clip_vision.py
    │   │   │   │   │   │   ├── internvl_clip_vision.py
    │   │   │   │   │   │   └── pos_embed.py
    │   │   │   │   ├── criterions.py
    │   │   │   │   ├── dist_utils.py
    │   │   │   │   ├── internvideo2_clip.py
    │   │   │   │   ├── internvideo2_stage2.py
    │   │   │   │   ├── mask.py
    │   │   │   │   └── utils.py
    │   │   │   ├── preprocess
    │   │   │   │   ├── compress.py
    │   │   │   │   ├── create_sqlite_db.py
    │   │   │   │   ├── gen_webvid10m_label.py
    │   │   │   │   └── utils.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── scripts
    │   │   │   │   ├── evaluation
    │   │   │   │   │   ├── clip
    │   │   │   │   │   │   └── zero_shot
    │   │   │   │   │   │   │   ├── 1B
    │   │   │   │   │   │   │       ├── config_anet.py
    │   │   │   │   │   │   │       ├── config_charades_mc.py
    │   │   │   │   │   │   │       ├── config_didemo.py
    │   │   │   │   │   │   │       ├── config_hmdb51.py
    │   │   │   │   │   │   │       ├── config_k400.py
    │   │   │   │   │   │   │       ├── config_k600.py
    │   │   │   │   │   │   │       ├── config_k700.py
    │   │   │   │   │   │   │       ├── config_lsmdc.py
    │   │   │   │   │   │   │       ├── config_mit.py
    │   │   │   │   │   │   │       ├── config_msrvtt.py
    │   │   │   │   │   │   │       ├── config_ssv2_mc.py
    │   │   │   │   │   │   │       ├── config_ucf101.py
    │   │   │   │   │   │   │       ├── config_vatex_ch.py
    │   │   │   │   │   │   │       ├── config_vatex_en.py
    │   │   │   │   │   │   │       ├── eval_anet.sh
    │   │   │   │   │   │   │       ├── eval_charades_mc.sh
    │   │   │   │   │   │   │       ├── eval_hmdb51.sh
    │   │   │   │   │   │   │       ├── eval_k400.sh
    │   │   │   │   │   │   │       ├── eval_k600.sh
    │   │   │   │   │   │   │       ├── eval_k700.sh
    │   │   │   │   │   │   │       ├── eval_lsmdc.sh
    │   │   │   │   │   │   │       ├── eval_mit.sh
    │   │   │   │   │   │   │       ├── eval_msrvtt.sh
    │   │   │   │   │   │   │       ├── eval_ssv2_mc.sh
    │   │   │   │   │   │   │       ├── eval_ucf101.sh
    │   │   │   │   │   │   │       ├── eval_vatex_ch.sh
    │   │   │   │   │   │   │       └── eval_vatex_en.sh
    │   │   │   │   │   │   │   └── 6B
    │   │   │   │   │   │   │       ├── config_anet.py
    │   │   │   │   │   │   │       ├── config_charades_mc.py
    │   │   │   │   │   │   │       ├── config_didemo.py
    │   │   │   │   │   │   │       ├── config_hmdb51.py
    │   │   │   │   │   │   │       ├── config_k400.py
    │   │   │   │   │   │   │       ├── config_k600.py
    │   │   │   │   │   │   │       ├── config_k700.py
    │   │   │   │   │   │   │       ├── config_lsmdc.py
    │   │   │   │   │   │   │       ├── config_mit.py
    │   │   │   │   │   │   │       ├── config_msrvtt.py
    │   │   │   │   │   │   │       ├── config_ssv2_mc.py
    │   │   │   │   │   │   │       ├── config_ucf101.py
    │   │   │   │   │   │   │       ├── config_vatex_ch.py
    │   │   │   │   │   │   │       ├── config_vatex_en.py
    │   │   │   │   │   │   │       ├── eval_anet.sh
    │   │   │   │   │   │   │       ├── eval_charades_mc.sh
    │   │   │   │   │   │   │       ├── eval_hmdb51.sh
    │   │   │   │   │   │   │       ├── eval_k400.sh
    │   │   │   │   │   │   │       ├── eval_k600.sh
    │   │   │   │   │   │   │       ├── eval_k700.sh
    │   │   │   │   │   │   │       ├── eval_lsmdc.sh
    │   │   │   │   │   │   │       ├── eval_mit.sh
    │   │   │   │   │   │   │       ├── eval_msrvtt.sh
    │   │   │   │   │   │   │       ├── eval_ssv2_mc.sh
    │   │   │   │   │   │   │       ├── eval_ucf101.sh
    │   │   │   │   │   │   │       ├── eval_vatex_ch.sh
    │   │   │   │   │   │   │       └── eval_vatex_en.sh
    │   │   │   │   │   └── stage2
    │   │   │   │   │   │   └── zero_shot
    │   │   │   │   │   │       └── 1B
    │   │   │   │   │   │           ├── config_anet.py
    │   │   │   │   │   │           ├── config_didemo.py
    │   │   │   │   │   │           ├── config_lsmdc.py
    │   │   │   │   │   │           ├── config_msrvtt.py
    │   │   │   │   │   │           ├── config_msvd.py
    │   │   │   │   │   │           ├── config_vatex.py
    │   │   │   │   │   │           ├── eval_anet.sh
    │   │   │   │   │   │           ├── eval_didemo.sh
    │   │   │   │   │   │           ├── eval_lsmdc.sh
    │   │   │   │   │   │           ├── eval_msrvtt.sh
    │   │   │   │   │   │           ├── eval_msvd.sh
    │   │   │   │   │   │           └── eval_vatex.sh
    │   │   │   │   └── pretraining
    │   │   │   │   │   ├── clip
    │   │   │   │   │       ├── 1B
    │   │   │   │   │       │   ├── config.py
    │   │   │   │   │       │   └── run.sh
    │   │   │   │   │       └── 6B
    │   │   │   │   │       │   ├── config.py
    │   │   │   │   │       │   └── run.sh
    │   │   │   │   │   └── stage2
    │   │   │   │   │       ├── 1B
    │   │   │   │   │           ├── config.py
    │   │   │   │   │           └── run.sh
    │   │   │   │   │       └── 6B
    │   │   │   │   │           ├── config.py
    │   │   │   │   │           └── run.sh
    │   │   │   ├── tasks
    │   │   │   │   ├── pretrain.py
    │   │   │   │   ├── retrieval_utils.py
    │   │   │   │   └── shared_utils.py
    │   │   │   ├── tasks_clip
    │   │   │   │   ├── pretrain.py
    │   │   │   │   ├── retrieval.py
    │   │   │   │   ├── retrieval_mc.py
    │   │   │   │   ├── retrieval_mc2.py
    │   │   │   │   ├── retrieval_utils.py
    │   │   │   │   └── shared_utils.py
    │   │   │   ├── tests
    │   │   │   │   └── test_cfg.py
    │   │   │   ├── tools
    │   │   │   │   ├── run.py
    │   │   │   │   ├── submit.sh
    │   │   │   │   └── utils.py
    │   │   │   ├── torchrun.sh
    │   │   │   └── utils
    │   │   │   │   ├── basic_utils.py
    │   │   │   │   ├── config.py
    │   │   │   │   ├── config_utils.py
    │   │   │   │   ├── distributed.py
    │   │   │   │   ├── easydict.py
    │   │   │   │   ├── logger.py
    │   │   │   │   ├── optimizer.py
    │   │   │   │   └── scheduler.py
    │   │   └── single_modality
    │   │   │   ├── DATASET.md
    │   │   │   ├── INSTALL.md
    │   │   │   ├── MODEL_ZOO.md
    │   │   │   ├── README.md
    │   │   │   ├── datasets
    │   │   │       ├── __init__.py
    │   │   │       ├── anet.py
    │   │   │       ├── build.py
    │   │   │       ├── hmdb.py
    │   │   │       ├── kinetics.py
    │   │   │       ├── kinetics_sparse.py
    │   │   │       ├── mae.py
    │   │   │       ├── mae_multi.py
    │   │   │       ├── masking_generator.py
    │   │   │       ├── mixup.py
    │   │   │       ├── rand_augment.py
    │   │   │       ├── random_erasing.py
    │   │   │       ├── ssv2.py
    │   │   │       ├── transforms.py
    │   │   │       ├── video_transforms.py
    │   │   │       └── volume_transforms.py
    │   │   │   ├── engines
    │   │   │       ├── __init__.py
    │   │   │       ├── engine_for_finetuning.py
    │   │   │       └── engine_for_pretraining.py
    │   │   │   ├── functional.py
    │   │   │   ├── models
    │   │   │       ├── __init__.py
    │   │   │       ├── flash_attention_class.py
    │   │   │       ├── internvideo2.py
    │   │   │       ├── internvideo2_ap.py
    │   │   │       ├── internvideo2_cat.py
    │   │   │       ├── internvideo2_pretrain.py
    │   │   │       ├── internvl_clip_vision.py
    │   │   │       ├── pos_embed.py
    │   │   │       └── videomae.py
    │   │   │   ├── optim_factory.py
    │   │   │   ├── requirements.txt
    │   │   │   ├── run_finetuning.py
    │   │   │   ├── run_linear_probing.py
    │   │   │   ├── run_pretraining.py
    │   │   │   ├── scripts
    │   │   │       ├── finetuning
    │   │   │       │   ├── attentive_probing
    │   │   │       │   │   ├── k400
    │   │   │       │   │   │   ├── 1B_ap_k710_ap_k400_f16.sh
    │   │   │       │   │   │   └── 6B_ap_k710_ap_k400_f16.sh
    │   │   │       │   │   ├── k600
    │   │   │       │   │   │   ├── 1B_ap_k710_ap_k600_f16.sh
    │   │   │       │   │   │   └── 6B_ap_k710_ap_k600_f16.sh
    │   │   │       │   │   ├── k700
    │   │   │       │   │   │   ├── 1B_ap_k710_ap_k700_f16.sh
    │   │   │       │   │   │   └── 6B_ap_k710_ap_k700_f16.sh
    │   │   │       │   │   ├── k710
    │   │   │       │   │   │   ├── 1B_ap_k710_f16_loadStage2.sh
    │   │   │       │   │   │   └── 6B_ap_k710_f16_loadStage2.sh
    │   │   │       │   │   ├── mit
    │   │   │       │   │   │   ├── 1B_ap_k710_ap_k400_ap_mit_f16.sh
    │   │   │       │   │   │   └── 6B_ap_k710_ap_k400_ap_mit_f16.sh
    │   │   │       │   │   └── ssv2
    │   │   │       │   │   │   ├── 1B_ap_ssv2_f16_loadStage2.sh
    │   │   │       │   │   │   └── 6B_ap_ssv2_f16_loadStage2.sh
    │   │   │       │   ├── full_tuning
    │   │   │       │   │   ├── anet
    │   │   │       │   │   │   └── 6B_ft_k710_ft_k400_ap_anet_f8.sh
    │   │   │       │   │   ├── hacs
    │   │   │       │   │   │   └── 6B_ft_k710_ft_k400_ap_hacs_f8.sh
    │   │   │       │   │   ├── k400
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k400_f16.sh
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k400_f8.sh
    │   │   │       │   │   │   ├── 6B_ft_k710_ft_k400_f16.sh
    │   │   │       │   │   │   └── 6B_ft_k710_ft_k400_f8.sh
    │   │   │       │   │   ├── k600
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k600_f16.sh
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k600_f8.sh
    │   │   │       │   │   │   ├── 6B_ft_k710_ft_k600_f16.sh
    │   │   │       │   │   │   └── 6B_ft_k710_ft_k600_f8.sh
    │   │   │       │   │   ├── k700
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k700_f16.sh
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k700_f8.sh
    │   │   │       │   │   │   ├── 6B_ft_k710_ft_k700_f16.sh
    │   │   │       │   │   │   └── 6B_ft_k710_ft_k700_f8.sh
    │   │   │       │   │   ├── k710
    │   │   │       │   │   │   ├── 1B_ft_k710_f8.sh
    │   │   │       │   │   │   └── 6B_ft_k710_f8.sh
    │   │   │       │   │   ├── mit
    │   │   │       │   │   │   ├── 1B_ft_k710_ft_k400_ft_mit_f8.sh
    │   │   │       │   │   │   ├── 6B_ft_k710_ft_k400_ft_mit_f8.sh
    │   │   │       │   │   │   └── 6B_ft_k710_ft_k400_ft_mit_f8_res224to336.sh
    │   │   │       │   │   ├── ssv1
    │   │   │       │   │   │   ├── 1B_ft_ssv1_f8.sh
    │   │   │       │   │   │   └── 6B_ft_ssv1_f8.sh
    │   │   │       │   │   └── ssv2
    │   │   │       │   │   │   ├── 1B_ft_ssv2_f8.sh
    │   │   │       │   │   │   └── 6B_ft_ssv2_f8.sh
    │   │   │       │   └── linear_probing
    │   │   │       │   │   ├── hmdb51
    │   │   │       │   │       ├── 1B_lp_hmdb51_f16.sh
    │   │   │       │   │       ├── 6B_lp_hmdb51_f16.sh
    │   │   │       │   │       └── 6B_lp_hmdb51_f16_loadStage2.sh
    │   │   │       │   │   ├── k400
    │   │   │       │   │       ├── 1B_lp_k400_f16.sh
    │   │   │       │   │       ├── 6B_lp_k400_f16.sh
    │   │   │       │   │       └── 6B_lp_k400_f16_loadStage2.sh
    │   │   │       │   │   ├── ssv2
    │   │   │       │   │       ├── 1B_lp_ssv2_f16.sh
    │   │   │       │   │       ├── 6B_lp_ssv2_f16.sh
    │   │   │       │   │       └── 6B_lp_ssv2_f16_loadStage2.sh
    │   │   │       │   │   └── ucf101
    │   │   │       │   │       ├── 1B_lp_ucf101_f16.sh
    │   │   │       │   │       ├── 6B_lp_ssv2_f16_loadStage2.sh
    │   │   │       │   │       └── 6B_lp_ucf101_f16.sh
    │   │   │       └── pretraining
    │   │   │       │   ├── 1B_pt.sh
    │   │   │       │   └── 6B_pt.sh
    │   │   │   └── utils.py
    │   ├── LICENSE
    │   └── README.md
    └── relay-policy-learning
    │   ├── .gitattributes
    │   ├── CONTRIBUTING.md
    │   ├── LICENSE
    │   ├── NOTICE
    │   ├── README.md
    │   ├── adept_envs
    │       ├── .pylintrc
    │       ├── .style.yapf
    │       └── adept_envs
    │       │   ├── __init__.py
    │       │   ├── base_robot.py
    │       │   ├── franka
    │       │       ├── __init__.py
    │       │       ├── assets
    │       │       │   └── franka_kitchen_jntpos_act_ab.xml
    │       │       ├── kitchen_multitask_v0.py
    │       │       └── robot
    │       │       │   ├── franka_config.xml
    │       │       │   └── franka_robot.py
    │       │   ├── mujoco_env.py
    │       │   ├── robot_env.py
    │       │   ├── simulation
    │       │       ├── module.py
    │       │       ├── renderer.py
    │       │       └── sim_robot.py
    │       │   └── utils
    │       │       ├── config.py
    │       │       ├── configurable.py
    │       │       ├── constants.py
    │       │       ├── parse_demos.py
    │       │       └── quatmath.py
    │   ├── adept_models
    │       ├── .gitignore
    │       ├── CONTRIBUTING.public.md
    │       ├── LICENSE
    │       ├── README.public.md
    │       ├── __init__.py
    │       ├── kitchen
    │       │   ├── assets
    │       │   │   ├── backwall_asset.xml
    │       │   │   ├── backwall_chain.xml
    │       │   │   ├── counters_asset.xml
    │       │   │   ├── counters_chain.xml
    │       │   │   ├── hingecabinet_asset.xml
    │       │   │   ├── hingecabinet_chain.xml
    │       │   │   ├── kettle_asset.xml
    │       │   │   ├── kettle_chain.xml
    │       │   │   ├── microwave_asset.xml
    │       │   │   ├── microwave_chain.xml
    │       │   │   ├── oven_asset.xml
    │       │   │   ├── oven_chain.xml
    │       │   │   ├── slidecabinet_asset.xml
    │       │   │   └── slidecabinet_chain.xml
    │       │   ├── counters.xml
    │       │   ├── hingecabinet.xml
    │       │   ├── kettle.xml
    │       │   ├── kitchen.xml
    │       │   ├── meshes
    │       │   │   ├── burnerplate.stl
    │       │   │   ├── burnerplate_mesh.stl
    │       │   │   ├── cabinetbase.stl
    │       │   │   ├── cabinetdrawer.stl
    │       │   │   ├── cabinethandle.stl
    │       │   │   ├── countertop.stl
    │       │   │   ├── faucet.stl
    │       │   │   ├── handle2.stl
    │       │   │   ├── hingecabinet.stl
    │       │   │   ├── hingedoor.stl
    │       │   │   ├── hingehandle.stl
    │       │   │   ├── hood.stl
    │       │   │   ├── kettle.stl
    │       │   │   ├── kettlehandle.stl
    │       │   │   ├── knob.stl
    │       │   │   ├── lightswitch.stl
    │       │   │   ├── lightswitchbase.stl
    │       │   │   ├── micro.stl
    │       │   │   ├── microbutton.stl
    │       │   │   ├── microdoor.stl
    │       │   │   ├── microefeet.stl
    │       │   │   ├── microfeet.stl
    │       │   │   ├── microhandle.stl
    │       │   │   ├── microwindow.stl
    │       │   │   ├── oven.stl
    │       │   │   ├── ovenhandle.stl
    │       │   │   ├── oventop.stl
    │       │   │   ├── ovenwindow.stl
    │       │   │   ├── slidecabinet.stl
    │       │   │   ├── slidedoor.stl
    │       │   │   ├── stoverim.stl
    │       │   │   ├── tile.stl
    │       │   │   └── wall.stl
    │       │   ├── microwave.xml
    │       │   ├── oven.xml
    │       │   ├── slidecabinet.xml
    │       │   └── textures
    │       │   │   ├── marble1.png
    │       │   │   ├── metal1.png
    │       │   │   ├── tile1.png
    │       │   │   └── wood1.png
    │       └── scenes
    │       │   ├── basic_scene.xml
    │       │   └── textures
    │       │       ├── white_marble_tile.png
    │       │       └── white_marble_tile2.png
    │   └── third_party
    │       └── franka
    │           ├── LICENSE
    │           ├── README.md
    │           ├── assets
    │               ├── actuator0.xml
    │               ├── actuator1.xml
    │               ├── assets.xml
    │               ├── basic_scene.xml
    │               ├── chain0.xml
    │               ├── chain0_overlay.xml
    │               ├── chain1.xml
    │               └── teleop_actuator.xml
    │           ├── bi-franka_panda.xml
    │           ├── franka_panda.png
    │           ├── franka_panda.xml
    │           ├── franka_panda_teleop.xml
    │           └── meshes
    │               ├── collision
    │                   ├── finger.stl
    │                   ├── hand.stl
    │                   ├── link0.stl
    │                   ├── link1.stl
    │                   ├── link2.stl
    │                   ├── link3.stl
    │                   ├── link4.stl
    │                   ├── link5.stl
    │                   ├── link6.stl
    │                   └── link7.stl
    │               └── visual
    │                   ├── finger.stl
    │                   ├── hand.stl
    │                   ├── link0.stl
    │                   ├── link1.stl
    │                   ├── link2.stl
    │                   ├── link3.stl
    │                   ├── link4.stl
    │                   ├── link5.stl
    │                   ├── link6.stl
    │                   └── link7.stl
├── tools
    ├── __init__.py
    ├── genrl_utils.py
    ├── logger.py
    ├── replay.py
    ├── task_scores.py
    └── utils.py
├── train.py
└── train.yaml


/agent/dreamer.yaml:
--------------------------------------------------------------------------------
1 | # @package agent
2 | _target_: agent.dreamer.DreamerAgent
3 | name: dreamer
4 | cfg: ???
5 | obs_space: ???
6 | act_spec: ???
7 | grad_heads: [decoder, reward]
8 | reward_norm: {momentum: 1.0, scale: 1.0, eps: 1e-8}
9 | actor_ent: 3e-4


--------------------------------------------------------------------------------
/agent/plan2explore.yaml:
--------------------------------------------------------------------------------
1 | # @package agent
2 | _target_: agent.plan2explore.Plan2Explore
3 | name: plan2explore
4 | cfg: ???
5 | obs_space: ???
6 | act_spec: ???
7 | grad_heads: [decoder]
8 | reward_norm: {momentum: 0.95, scale: 1.0, eps: 1e-8}
9 | actor_ent: 0 


--------------------------------------------------------------------------------
/assets/GenRL_cover.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/GenRL_cover.gif


--------------------------------------------------------------------------------
/assets/dashboard.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/dashboard.png


--------------------------------------------------------------------------------
/assets/stickman_run.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/stickman_run.gif


--------------------------------------------------------------------------------
/assets/video_samples/a_spider_walking_on_the_floor.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/a_spider_walking_on_the_floor.mp4


--------------------------------------------------------------------------------
/assets/video_samples/backflip.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/backflip.mp4


--------------------------------------------------------------------------------
/assets/video_samples/dancing.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/dancing.mp4


--------------------------------------------------------------------------------
/assets/video_samples/dead_spider_white.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/dead_spider_white.gif


--------------------------------------------------------------------------------
/assets/video_samples/dog_running_seen_from_the_side.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/dog_running_seen_from_the_side.mp4


--------------------------------------------------------------------------------
/assets/video_samples/doing_splits.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/doing_splits.mp4


--------------------------------------------------------------------------------
/assets/video_samples/flex.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/flex.mp4


--------------------------------------------------------------------------------
/assets/video_samples/guy_walking.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/guy_walking.gif


--------------------------------------------------------------------------------
/assets/video_samples/headstand.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/headstand.mp4


--------------------------------------------------------------------------------
/assets/video_samples/karate_kick.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/karate_kick.mp4


--------------------------------------------------------------------------------
/assets/video_samples/lying_down_with_legs_up.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/lying_down_with_legs_up.mp4


--------------------------------------------------------------------------------
/assets/video_samples/open_microwave.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/open_microwave.gif


--------------------------------------------------------------------------------
/assets/video_samples/person_standing_up_with_hands_up_seen_from_the_side.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/person_standing_up_with_hands_up_seen_from_the_side.mp4


--------------------------------------------------------------------------------
/assets/video_samples/punching.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/punching.mp4


--------------------------------------------------------------------------------
/assets/video_samples/spider_draw.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/spider_draw.gif


--------------------------------------------------------------------------------
/conf/env/dmc_pixels.yaml:
--------------------------------------------------------------------------------
1 | # @package _global_
2 | obs_type: pixels
3 | action_repeat: 2 
4 | encoder: {mlp_keys: '$^', cnn_keys: 'observation', norm: layer, cnn_depth: 48, cnn_kernels: [4, 4, 4, 4], mlp_layers: [400, 400, 400, 400]} # act: elu
5 | decoder: {mlp_keys: '$^', cnn_keys: 'observation', norm: layer, cnn_depth: 48, cnn_kernels: [5, 5, 6, 6], mlp_layers: [400, 400, 400, 400], } # act: elu
6 | pred_discount: False
7 | imag_actor_grad: dynamics
8 | actor_grad: dynamics


--------------------------------------------------------------------------------
/conf/train_mode/train_behavior.yaml:
--------------------------------------------------------------------------------
1 | num_train_frames: 500_010
2 | batch_size: 32
3 | batch_length: 32
4 | agent.imag_reward_fn: video_text_reward
5 | eval_modality: task_imag


--------------------------------------------------------------------------------
/conf/train_mode/train_model.yaml:
--------------------------------------------------------------------------------
1 | num_train_frames: 5_000_010
2 | visual_every_frames: 250_000
3 | train_world_model: True
4 | train_connector: True
5 | reset_world_model: True
6 | reset_connector: True


--------------------------------------------------------------------------------
/data/stickman_example/1000-20240504T040956-d7ee0ea24b3e4863b1ef5e5bf1849924-501.npz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/data/stickman_example/1000-20240504T040956-d7ee0ea24b3e4863b1ef5e5bf1849924-501.npz


--------------------------------------------------------------------------------
/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/envs/__init__.py


--------------------------------------------------------------------------------
/envs/custom_dmc_tasks/__init__.py:
--------------------------------------------------------------------------------
 1 | from . import cheetah
 2 | from . import walker
 3 | from . import quadruped
 4 | from . import jaco
 5 | from . import stickman
 6 | from dm_control import suite
 7 | 
 8 | suite._DOMAINS['stickman'] = stickman
 9 | suite.ALL_TASKS = suite.ALL_TASKS + suite._get_tasks('custom')
10 | suite.TASKS_BY_DOMAIN = suite._get_tasks_by_domain(suite.ALL_TASKS)
11 |     
12 | def make_jaco(task, obs_type, seed, img_size, ):
13 |     return jaco.make(task, obs_type, seed, img_size, )


--------------------------------------------------------------------------------
/test/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | log_cli = 1
3 | log_cli_level = INFO
4 | log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s)
5 | log_cli_date_format=%Y-%m-%d %H:%M:%S


--------------------------------------------------------------------------------
/third_party/InternVideo/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "InternVideo1/Pretrain/UniFormerV2"]
2 | 	path = InternVideo1/Pretrain/UniFormerV2
3 | 	url = https://github.com/OpenGVLab/UniFormerV2.git
4 | [submodule "InternVideo1/Downstream/Ego-Tasks"]
5 | 	path = InternVideo1/Downstream/Ego-Tasks
6 | 	url = https://github.com/OpenGVLab/ego4d-eccv2022-solutions.git
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/InternVid/example1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/example1.mp4


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/InternVid/start_annotation_prototype.sh:
--------------------------------------------------------------------------------
 1 | unset http_proxy; unset https_proxy; unset HTTP_PROXY; unset HTTPS_PROXY
 2 | JOB_NAME='data-annotate_check'
 3 | OUTPUT_DIR="$(dirname $0)/$JOB_NAME"
 4 | LOG_DIR="$(dirname $0)/logs/${JOB_NAME}"
 5 | PARTITION='Video-aigc-general'
 6 | NNODE=1
 7 | NUM_GPUS=1
 8 | NUM_CPU=16
 9 | 
10 | srun -p ${PARTITION} \
11 |     --job-name=${JOB_NAME} \
12 |     -n${NNODE} \
13 |     --gres=gpu:${NUM_GPUS} \
14 |     --ntasks-per-node=1 \
15 |     --cpus-per-task=${NUM_CPU} \
16 |     jupyter lab --ip=0.0.0.0


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/InternVid/utils/distributed.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/utils/distributed.py


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/InternVid/utils/logger.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/utils/logger.py


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/InternVid/utils/scheduler.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/utils/scheduler.py


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/InternVid/viclip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/viclip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/instruction_data/assert/conversation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/instruction_data/assert/conversation.png


--------------------------------------------------------------------------------
/third_party/InternVideo/Data/instruction_data/assert/detailed_description.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/instruction_data/assert/detailed_description.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/configs/recognition/omnisource/pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/configs/recognition/omnisource/pipeline.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/acc_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/acc_curve.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/data_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/data_pipeline.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_logo.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_overview.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_overview.gif


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/evaluate_csn_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/csn/finetune_ucf101_csn_dnn.py \
 9 | 	work_dirs/csn/finetune_ucf101_csn_dnn/latest.pth \
10 | 	--videos_per_gpu 1 \
11 | 	--out work_dirs/csn/test_ucf101_csn_dnn.pkl \
12 | 	--eval top_k_accuracy mean_class_accuracy
13 | 
14 | cd $pwd_dir
15 | echo "Experiments finished!"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/evaluate_csn_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/csn/finetune_ucf101_csn_edlnokl_avuc_debias.py \
 9 | 	work_dirs/csn/finetune_ucf101_csn_edlnokl_avuc_debias/latest.pth \
10 | 	--videos_per_gpu 1 \
11 | 	--out work_dirs/csn/test_ucf101_csn_edlnokl_avuc_debias.pkl \
12 | 	--eval top_k_accuracy mean_class_accuracy
13 | 
14 | cd $pwd_dir
15 | echo "Experiments finished!"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/finetune_csn_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/csn/finetune_ucf101_csn_dnn.py \
10 | 	--work-dir work_dirs/csn/finetune_ucf101_csn_dnn \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/finetune_csn_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/csn/finetune_ucf101_csn_edlnokl_avuc_debias.py \
10 | 	--work-dir work_dirs/csn/finetune_ucf101_csn_edlnokl_avuc_debias \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_bnn.py \
 9 | 	work_dirs/i3d/finetune_ucf101_i3d_bnn/latest.pth \
10 | 	--out work_dirs/i3d/test_ucf101_i3d_bnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_dnn.py \
 9 | 	work_dirs/i3d/finetune_ucf101_i3d_dnn/latest.pth \
10 | 	--out work_dirs/i3d/test_ucf101_i3d_dnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias.py \
 9 | 	work_dirs/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias/latest.pth \
10 | 	--out work_dirs/i3d/test_ucf101_i3d_edlnokl_avuc_debias.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_edlnokl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl.py \
 9 | 	work_dirs/i3d/finetune_ucf101_i3d_edlnokl/latest.pth \
10 | 	--out work_dirs/i3d/test_ucf101_i3d_edlnokl.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_rpl.py \
 9 | 	work_dirs/i3d/finetune_ucf101_i3d_rpl/latest.pth \
10 | 	--out work_dirs/i3d/test_ucf101_i3d_rpl.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_bnn.py \
 9 | 	--work-dir work_dirs/i3d/finetune_ucf101_i3d_bnn \
10 | 	--validate \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0
14 | 
15 | cd $pwd_dir
16 | echo "Experiments finished!"
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_dnn.py \
 9 | 	--work-dir work_dirs/i3d/finetune_ucf101_i3d_dnn \
10 | 	--validate \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0
14 | 
15 | cd $pwd_dir
16 | echo "Experiments finished!"
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias.py \
10 | 	--work-dir work_dirs/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias \
11 | 	--validate \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_edlnokl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl.py \
10 | 	--work-dir work_dirs/i3d/finetune_ucf101_i3d_edlnokl \
11 | 	--validate \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_rpl.py \
 9 | 	--work-dir work_dirs/i3d/finetune_ucf101_i3d_rpl \
10 | 	--validate \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0
14 | 
15 | cd $pwd_dir
16 | echo "Experiments finished!"
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/run_draw_confmat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export CUDA_HOME='/usr/local/cuda'
 4 | 
 5 | pwd_dir=$pwd
 6 | cd ../../
 7 | 
 8 | source activate mmaction
 9 | 
10 | OOD_DATA=$1  # HMDB or MiT
11 | RESULT_PATH="experiments/i3d/results"
12 | 
13 | # Confusion Matrix comparison
14 | python experiments/draw_confusion_matrix.py \
15 |     --ood_result ${RESULT_PATH}/I3D_EDLNoKLAvUCDebias_EDL_${OOD_DATA}_result.npz \
16 |     --uncertain_thresh 0.004550 \
17 |     --save_file ${RESULT_PATH}/../results_confmat/I3D_DEAR_${OOD_DATA}_ConfMat.png
18 | 
19 | cd $pwd_dir
20 | echo "Experiments finished!"


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/train_i3d_DEAR_kinetics10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/train_kinetics10_i3d_DEAR.py \
10 | 	--work-dir work_dirs/i3d/train_kinetics10_i3d_DEAR \
11 | 	--validate \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/train_i3d_DEAR_noDebias_kinetics10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/train_kinetics10_i3d_DEAR_noDebias.py \
10 | 	--work-dir work_dirs/i3d/train_kinetics10_i3d_DEAR_noDebias \
11 | 	--validate \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/Makefile:
--------------------------------------------------------------------------------
1 | CXX= g++
2 | SRC= MetaRecognition.cpp weibull.c
3 | 
4 | libmr: $(SRC) weibull.h malloc.h MetaRecognition.h
5 | 	$(CXX) -o libmr $(SRC) -I.
6 | 
7 | clean:
8 | 	rm -f *~ *.o libmr


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/compile.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "----- Removing previously compiled libmr.so -----\n"
 4 | rm -r build
 5 | rm *.model
 6 | rm libmr.so
 7 | rm *.dump
 8 | rm ../libmr.so
 9 | 
10 | echo "----- Building and compiling libmr ------- \n"
11 | python setup.py build_ext -i
12 | # cp libmr.so ../
13 | 
14 | # echo "----- Completed Compiling libmr -------- \n"
15 | # echo "Now trying python -c \"import libmr\""
16 | # python test_libmr.py
17 | # echo "----- Compiling Done. Now import *.so file in your application -----\n"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/libmr.c:
--------------------------------------------------------------------------------
1 | #error Do not use this file, it is the result of a failed Cython compilation.
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/libmr.pxd:
--------------------------------------------------------------------------------
1 | cdef extern from "MetaRecognition.h":
2 |     cdef struct svm_node_libsvm:
3 |         int index
4 |         double value


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/mae/finetune_mae_edlnokl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | pwd_dir=$pwd
 3 | cd ../../
 4 | GPUS=$1
 5 | PORT=${PORT:-29498}
 6 | 
 7 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT tools/train.py configs/recognition/mae/finetune_ucf101_mae_edlnokl.py \
 8 | 	--work-dir work_dirs/mae/ky \
 9 | 	--validate \
10 | 	--seed 0 \
11 | 	--deterministic \
12 | 	--launcher pytorch \
13 | 
14 | cd $pwd_dir
15 | echo "Experiments finished!"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_bnn.py \
 9 | 	work_dirs/slowfast/finetune_ucf101_slowfast_bnn/latest.pth \
10 | 	--out work_dirs/slowfast/test_ucf101_slowfast_bnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_dnn.py \
 9 | 	work_dirs/slowfast/finetune_ucf101_slowfast_dnn/latest.pth \
10 | 	--out work_dirs/slowfast/test_ucf101_slowfast_dnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias.py \
 9 | 	work_dirs/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias/latest.pth \
10 | 	--out work_dirs/slowfast/test_ucf101_slowfast_edlnokl_avuc_debias.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_rpl.py \
 9 | 	work_dirs/slowfast/finetune_ucf101_slowfast_rpl/latest.pth \
10 | 	--out work_dirs/slowfast/test_ucf101_slowfast_rpl.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_bnn.py \
 9 | 	--work-dir work_dirs/slowfast/finetune_ucf101_slowfast_bnn \
10 | 	--validate \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0
14 | 
15 | cd $pwd_dir
16 | echo "Experiments finished!"
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_dnn.py \
10 | 	--work-dir work_dirs/slowfast/finetune_ucf101_slowfast_dnn \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias.py \
10 | 	--work-dir work_dirs/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_rpl.py \
10 | 	--work-dir work_dirs/slowfast/finetune_ucf101_slowfast_rpl \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_bnn_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_bnn/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_bnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_celoss_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_celoss_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_celoss/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_celoss.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_avuc_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_avuc_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_avuc/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_avuc.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_nokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_nokl_avuc_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_nokl_avuc.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_nokl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_nokl.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/finetune_ucf101_tpn_slowonly_rpl.py \
 9 | 	work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_rpl/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_rpl.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_bnn_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_bnn \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_celoss_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_celoss_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_celoss \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_avuc_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_avuc_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_avuc \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_nokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_nokl_avuc_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_nokl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/finetune_ucf101_tpn_slowonly_rpl.py \
10 | 	--work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_rpl \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/searchw_evaluate.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \
 9 | 	work_dirs/tpn_slowonly/search_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias/search_$2/latest.pth \
10 | 	--out work_dirs/tpn_slowonly/search_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias/test_$2.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/searchw_finetune.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/hypertune.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \
10 | 	--work-dir work_dirs/tpn_slowonly/search_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias \
11 | 	--weight_factor $2 $3 \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0 \
15 | 	--validate
16 | 
17 | cd $pwd_dir
18 | echo "Experiments finished!"
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_bnn.py \
 9 | 	work_dirs/tsm/finetune_ucf101_tsm_bnn/latest.pth \
10 | 	--out work_dirs/tsm/test_ucf101_tsm_bnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_dnn.py \
 9 | 	work_dirs/tsm/finetune_ucf101_tsm_dnn/latest.pth \
10 | 	--out work_dirs/tsm/test_ucf101_tsm_dnn.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias.py \
 9 | 	work_dirs/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias/latest.pth \
10 | 	--out work_dirs/tsm/test_ucf101_tsm_edlnokl_avuc_debias.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_rpl.py \
 9 | 	work_dirs/tsm/finetune_ucf101_tsm_rpl/latest.pth \
10 | 	--out work_dirs/tsm/test_ucf101_tsm_rpl.pkl \
11 | 	--eval top_k_accuracy mean_class_accuracy
12 | 
13 | cd $pwd_dir
14 | echo "Experiments finished!"
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_bnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_bnn.py \
 9 | 	--work-dir work_dirs/tsm/finetune_ucf101_tsm_bnn \
10 | 	--validate \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0
14 | 
15 | cd $pwd_dir
16 | echo "Experiments finished!"
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_dnn_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_dnn.py \
10 | 	--work-dir work_dirs/tsm/finetune_ucf101_tsm_dnn \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_edlnokl_avuc_debias_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias.py \
10 | 	--work-dir work_dirs/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_rpl_ucf101.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_rpl.py \
10 | 	--work-dir work_dirs/tsm/finetune_ucf101_tsm_rpl \
11 | 	--seed 0 \
12 | 	--deterministic \
13 | 	--gpu-ids 0 \
14 | 	--validate
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/run_draw_confmat.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | export CUDA_HOME='/usr/local/cuda'
 4 | 
 5 | pwd_dir=$pwd
 6 | cd ../../
 7 | 
 8 | source activate mmaction
 9 | 
10 | OOD_DATA=$1  # HMDB or MiT
11 | RESULT_PATH="experiments/tsm/results"
12 | 
13 | # Confusion Matrix comparison
14 | python experiments/draw_confusion_matrix.py \
15 |     --ood_result ${RESULT_PATH}/TSM_EDLNoKLAvUCDebias_EDL_${OOD_DATA}_result.npz \
16 |     --uncertain_thresh 0.004549 \
17 |     --save_file ${RESULT_PATH}/../results_confmat/TSM_DEAR_${OOD_DATA}_ConfMat.png
18 | 
19 | cd $pwd_dir
20 | echo "Experiments finished!"


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/train_tsm_DEAR_kinetics10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/train_kinetics10_tsm_DEAR.py \
10 | 	--work-dir work_dirs/tsm/train_kinetics10_tsm_DEAR \
11 | 	--validate \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/train_tsm_DEAR_noDebias_kinetics10.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | pwd_dir=$pwd
 4 | cd ../../
 5 | 
 6 | source activate mmaction
 7 | 
 8 | # --validate
 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/train_kinetics10_tsm_DEAR_noDebias.py \
10 | 	--work-dir work_dirs/tsm/train_kinetics10_tsm_DEAR_noDebias \
11 | 	--validate \
12 | 	--seed 0 \
13 | 	--deterministic \
14 | 	--gpu-ids 0
15 | 
16 | cd $pwd_dir
17 | echo "Experiments finished!"
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/__init__.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | from mmcv import digit_version
 3 | 
 4 | from .version import __version__
 5 | 
 6 | mmcv_minimum_version = '1.1.1'
 7 | mmcv_maximum_version = '1.3'
 8 | mmcv_version = digit_version(mmcv.__version__)
 9 | 
10 | assert (digit_version(mmcv_minimum_version) <= mmcv_version
11 |         <= digit_version(mmcv_maximum_version)), \
12 |     f'MMCV=={mmcv.__version__} is used but incompatible. ' \
13 |     f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.'
14 | 
15 | __all__ = ['__version__']
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/apis/__init__.py:
--------------------------------------------------------------------------------
1 | from .inference import inference_recognizer, init_recognizer
2 | from .test import multi_gpu_test, single_gpu_test, collect_results_cpu
3 | from .train import train_model
4 | 
5 | __all__ = [
6 |     'train_model', 'init_recognizer', 'inference_recognizer', 'multi_gpu_test',
7 |     'single_gpu_test', 'collect_results_cpu'
8 | ]
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .evaluation import *  # noqa: F401, F403
2 | from .lr import *  # noqa: F401, F403
3 | from .optimizer import *  # noqa: F401, F403
4 | from .runner import *  # noqa: F401, F403
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/lr/__init__.py:
--------------------------------------------------------------------------------
1 | from .tin_lr_hook import TINLrUpdaterHook
2 | 
3 | __all__ = ['TINLrUpdaterHook']
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/optimizer/__init__.py:
--------------------------------------------------------------------------------
1 | from .copy_of_sgd import CopyOfSGD
2 | from .tsm_optimizer_constructor import TSMOptimizerConstructor
3 | 
4 | __all__ = ['CopyOfSGD', 'TSMOptimizerConstructor']
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/optimizer/copy_of_sgd.py:
--------------------------------------------------------------------------------
 1 | from mmcv.runner import OPTIMIZERS
 2 | from torch.optim import SGD
 3 | 
 4 | 
 5 | @OPTIMIZERS.register_module()
 6 | class CopyOfSGD(SGD):
 7 |     """A clone of torch.optim.SGD.
 8 | 
 9 |     A customized optimizer could be defined like CopyOfSGD. You may derive from
10 |     built-in optimizers in torch.optim, or directly implement a new optimizer.
11 |     """
12 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/runner/__init__.py:
--------------------------------------------------------------------------------
1 | from .omnisource_runner import OmniSourceDistSamplerSeedHook, OmniSourceRunner
2 | from .annealing_runner import AnnealingRunner
3 | 
4 | __all__ = ['OmniSourceRunner', 'OmniSourceDistSamplerSeedHook', 'AnnealingRunner']
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/datasets/registry.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry
2 | 
3 | DATASETS = Registry('dataset')
4 | PIPELINES = Registry('pipeline')
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/datasets/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed_sampler import DistributedPowerSampler, DistributedSampler
2 | 
3 | __all__ = ['DistributedSampler', 'DistributedPowerSampler']
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/localization/__init__.py:
--------------------------------------------------------------------------------
 1 | from .bsn_utils import generate_bsp_feature, generate_candidate_proposals
 2 | from .proposal_utils import soft_nms, temporal_iop, temporal_iou
 3 | from .ssn_utils import (eval_ap, load_localize_proposal_file,
 4 |                         perform_regression, temporal_nms)
 5 | 
 6 | __all__ = [
 7 |     'generate_candidate_proposals', 'generate_bsp_feature', 'temporal_iop',
 8 |     'temporal_iou', 'soft_nms', 'load_localize_proposal_file',
 9 |     'perform_regression', 'temporal_nms', 'eval_ap'
10 | ]
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/common/__init__.py:
--------------------------------------------------------------------------------
1 | from .conv2plus1d import Conv2plus1d
2 | from .conv_audio import ConvAudio
3 | 
4 | __all__ = ['Conv2plus1d', 'ConvAudio']
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/localizers/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import BaseLocalizer
2 | from .bmn import BMN
3 | from .bsn import PEM, TEM
4 | from .ssn import SSN
5 | 
6 | __all__ = ['PEM', 'TEM', 'BMN', 'SSN', 'BaseLocalizer']
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/localizers/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .post_processing import post_processing
2 | 
3 | __all__ = ['post_processing']
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/necks/__init__.py:
--------------------------------------------------------------------------------
1 | from .tpn import TPN
2 | 
3 | __all__ = ['TPN']
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/recognizers/__init__.py:
--------------------------------------------------------------------------------
 1 | from .audio_recognizer import AudioRecognizer
 2 | from .base import BaseRecognizer
 3 | from .recognizer2d import Recognizer2D
 4 | from .recognizer3d import Recognizer3D
 5 | from .recognizer2d_bnn import Recognizer2DBNN
 6 | from .recognizer3d_bnn import Recognizer3DBNN
 7 | from .recognizer2d_rpl import Recognizer2DRPL
 8 | from .recognizer3d_rpl import Recognizer3DRPL
 9 | 
10 | __all__ = ['BaseRecognizer', 'Recognizer2D', 'Recognizer3D', 'Recognizer2DBNN', 'Recognizer3DBNN', 'Recognizer2DRPL', 'Recognizer3DRPL', 'AudioRecognizer']
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/registry.py:
--------------------------------------------------------------------------------
1 | from mmcv.utils import Registry
2 | 
3 | BACKBONES = Registry('backbone')
4 | NECKS = Registry('neck')
5 | HEADS = Registry('head')
6 | RECOGNIZERS = Registry('recognizer')
7 | LOSSES = Registry('loss')
8 | LOCALIZERS = Registry('localizer')
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .collect_env import collect_env
 2 | from .gradcam_utils import GradCAM
 3 | from .logger import get_root_logger
 4 | from .misc import get_random_string, get_shm_dir, get_thread_id
 5 | 
 6 | __all__ = [
 7 |     'get_root_logger', 'collect_env', 'get_random_string', 'get_thread_id',
 8 |     'get_shm_dir', 'GradCAM'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/utils/collect_env.py:
--------------------------------------------------------------------------------
 1 | from mmcv.utils import collect_env as collect_basic_env
 2 | from mmcv.utils import get_git_hash
 3 | 
 4 | import mmaction
 5 | 
 6 | 
 7 | def collect_env():
 8 |     env_info = collect_basic_env()
 9 |     env_info['MMAction2'] = (
10 |         mmaction.__version__ + '+' + get_git_hash(digits=7))
11 |     return env_info
12 | 
13 | 
14 | if __name__ == '__main__':
15 |     for name, val in collect_env().items():
16 |         print(f'{name}: {val}')
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/version.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Open-MMLab. All rights reserved.
 2 | 
 3 | __version__ = '0.9.0'
 4 | 
 5 | 
 6 | def parse_version_info(version_str):
 7 |     version_info = []
 8 |     for x in version_str.split('.'):
 9 |         if x.isdigit():
10 |             version_info.append(int(x))
11 |         elif x.find('rc') != -1:
12 |             patch_version = x.split('rc')
13 |             version_info.append(int(patch_version[0]))
14 |             version_info.append(f'rc{patch_version[1]}')
15 |     return tuple(version_info)
16 | 
17 | 
18 | version_info = parse_version_info(__version__)
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements.txt:
--------------------------------------------------------------------------------
1 | -r requirements/build.txt
2 | -r requirements/optional.txt
3 | -r requirements/runtime.txt
4 | -r requirements/tests.txt
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/build.txt:
--------------------------------------------------------------------------------
1 | # These must be installed before building mmaction2
2 | numpy
3 | torch>=1.3
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/docs.txt:
--------------------------------------------------------------------------------
1 | recommonmark
2 | sphinx
3 | sphinx_markdown_tables
4 | sphinx_rtd_theme
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/optional.txt:
--------------------------------------------------------------------------------
1 | av
2 | decord >= 0.4.1
3 | moviepy
4 | onnx
5 | onnxruntime
6 | PyTurboJPEG
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/readthedocs.txt:
--------------------------------------------------------------------------------
1 | mmcv
2 | torch
3 | torchvision
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/runtime.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | numpy
3 | opencv-contrib-python
4 | Pillow
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/tests.txt:
--------------------------------------------------------------------------------
1 | coverage
2 | flake8
3 | interrogate
4 | isort==4.3.21
5 | pytest
6 | pytest-runner
7 | xdoctest >= 0.10.0
8 | yapf
9 | terminaltables==3.1.0


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/audio_feature_test_list.txt:
--------------------------------------------------------------------------------
1 | test 100 127
2 | test 100 127
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/audio_test_list.txt:
--------------------------------------------------------------------------------
1 | test.wav 100 127
2 | test.wav 100 127
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/frame_test_list.txt:
--------------------------------------------------------------------------------
1 | test_imgs 5 127
2 | test_imgs 5 127
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/frame_test_list_multi_label.txt:
--------------------------------------------------------------------------------
1 | test_imgs 5 1
2 | test_imgs 5 3 5
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/frame_test_list_with_offset.txt:
--------------------------------------------------------------------------------
1 | test_imgs 2 5 127
2 | test_imgs 2 5 127
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/hvu_video_eval_test_anno.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "filename":"test.mp4",
 4 |     "label":{
 5 |         "action": [2],
 6 |         "scene": [2],
 7 |         "object": [1]
 8 |     }
 9 |   },
10 |   {
11 |     "filename":"test.avi",
12 |     "label":{
13 |         "action": [1],
14 |         "scene": [1],
15 |         "object": [2]
16 |     }
17 |   }
18 | ]
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/hvu_video_test_anno.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "filename":"tmp.mp4",
 4 |     "label":{
 5 |         "concept":[250, 131, 42, 51, 57, 155, 122],
 6 |         "object":[1570, 508],
 7 |         "event":[16],
 8 |         "action":[180],
 9 |         "scene":[206]
10 |     }
11 |   },
12 |   {
13 |     "filename":"tmp.mp4",
14 |     "label":{
15 |         "concept":[250, 131, 42, 51, 57, 155, 122],
16 |         "object":[1570, 508],
17 |         "event":[16],
18 |         "action":[180],
19 |         "scene":[206]
20 |     }
21 |   }
22 | ]
23 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/proposal_normalized_list.txt:
--------------------------------------------------------------------------------
 1 | # 0
 2 | test_imgs
 3 | 5
 4 | 1
 5 | 2
 6 | 3 0.2000 0.4000
 7 | 3 0.6000 1.0000
 8 | 10
 9 | 3 1.0000 1.0000 0.2000 0.4000
10 | 3 0.5000 0.5000 0.2000 0.6000
11 | 3 0.3333 0.3333 0.2000 0.8000
12 | 3 0.5000 0.5000 0.2000 1.0000
13 | 3 0.0000 0.0000 0.4000 0.6000
14 | 3 0.3333 0.5000 0.4000 0.8000
15 | 3 0.6666 0.6666 0.4000 1.0000
16 | 3 0.5000 1.0000 0.6000 0.8000
17 | 3 1.0000 1.0000 0.6000 1.0000
18 | 3 0.5000 1.0000 0.8000 1.0000
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/proposal_test_list.txt:
--------------------------------------------------------------------------------
 1 | # 0
 2 | test_imgs
 3 | 5
 4 | 1
 5 | 2
 6 | 3 1 2
 7 | 3 3 5
 8 | 10
 9 | 3 1.0000 1.0000 1 2
10 | 3 0.5000 0.5000 1 3
11 | 3 0.3333 0.3333 1 4
12 | 3 0.5000 0.5000 1 5
13 | 3 0.0000 0.0000 2 3
14 | 3 0.3333 0.5000 2 4
15 | 3 0.6666 0.6666 2 5
16 | 3 0.5000 1.0000 3 4
17 | 3 1.0000 1.0000 3 5
18 | 3 0.5000 1.0000 4 5
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/rawvideo_test_anno.json:
--------------------------------------------------------------------------------
1 | [
2 |   {
3 |     "video_dir":"test_rawvideo_dataset",
4 |     "label":1,
5 |     "num_clips":2,
6 |     "positive_clip_inds":[0]
7 |   }
8 | ]
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/rawvideo_test_anno.txt:
--------------------------------------------------------------------------------
1 | test_rawvideo_dataset 1 2 0
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.mp4


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.wav


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_ava_dataset/ava_excluded_timestamps_sample.csv:
--------------------------------------------------------------------------------
1 | 0f39OWEqJ24,0903
2 | _-Z6wFjXtGQ,0902
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_ava_dataset/ava_sample.csv:
--------------------------------------------------------------------------------
1 | 0f39OWEqJ24,0902,0.031,0.162,0.670,0.995,12,0
2 | 0f39OWEqJ24,0902,0.031,0.162,0.670,0.995,17,0
3 | 0f39OWEqJ24,0902,0.031,0.162,0.670,0.995,79,0
4 | 0f39OWEqJ24,0903,0.034,0.189,0.669,0.980,12,0
5 | 0f39OWEqJ24,0903,0.034,0.189,0.669,0.980,17,0
6 | _-Z6wFjXtGQ,0902,0.063,0.049,0.524,0.996,12,0
7 | _-Z6wFjXtGQ,0902,0.063,0.049,0.524,0.996,74,0
8 | _-Z6wFjXtGQ,0902,0.063,0.049,0.524,0.996,80,0
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_bsp_features/v_test1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_bsp_features/v_test1.npy


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00001.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00002.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00003.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00004.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00005.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00006.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00007.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00007.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00008.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00008.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00009.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00009.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00010.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00010.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00001.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00002.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00003.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00004.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00005.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00001.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00002.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00003.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00004.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00004.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00005.jpg


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_proposals/v_test1.csv:
--------------------------------------------------------------------------------
 1 | tmin,tmax,tmin_score,tmax_score,score,match_iou,match_ioa
 2 | 0.1,0.2,0.95,0.96,0.97,0.85,0.84
 3 | 0.2,0.3,0.94,0.95,0.96,0.84,0.83
 4 | 0.3,0.4,0.93,0.94,0.95,0.83,0.82
 5 | 0.4,0.5,0.92,0.93,0.94,0.82,0.81
 6 | 0.5,0.6,0.91,0.92,0.93,0.81,0.80
 7 | 0.6,0.7,0.90,0.91,0.92,0.80,0.79
 8 | 0.5,0.7,0.90,0.91,0.92,0.80,0.79
 9 | 0.6,0.8,0.90,0.91,0.92,0.80,0.79
10 | 0.4,0.7,0.90,0.91,0.92,0.80,0.79
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_proposals/v_test2.csv:
--------------------------------------------------------------------------------
1 | tmin,tmax,tmin_score,tmax_score,score,match_iou,match_ioa
2 | 0.1,0.2,0.95,0.96,0.97,0.75,0.74
3 | 0.2,0.3,0.94,0.95,0.96,0.74,0.73
4 | 0.3,0.4,0.93,0.94,0.95,0.73,0.72
5 | 0.4,0.5,0.92,0.93,0.94,0.72,0.71
6 | 0.5,0.6,0.91,0.92,0.93,0.71,0.70
7 | 0.6,0.7,0.90,0.91,0.92,0.70,0.79
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_0.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_0.mp4


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_1.mp4


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/video_test_list.txt:
--------------------------------------------------------------------------------
1 | test.mp4 0
2 | test.mp4 0
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/activitynet/download_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # set up environment
 4 | conda env create -f environment.yml
 5 | source activate activitynet
 6 | pip install --upgrade youtube-dl
 7 | 
 8 | DATA_DIR="../../../data/ActivityNet"
 9 | python download.py
10 | 
11 | source deactivate activitynet
12 | conda remove -n activitynet --all
13 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/activitynet/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ../
3 | python build_rawframes.py ../../data/ActivityNet/videos/ ../../data/ActivityNet/rawframes/ --level 1 --flow-type tvl1 --ext mp4 --task both  --new-width 340 --new-height 256
4 | echo "Raw frames (RGB and tv-l1) Generated for train set"
5 | 
6 | cd activitynet/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/download_annotations.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | VERSION=${VERSION:-"2.1"}
 6 | DATA_DIR="../../../data/ava/annotations"
 7 | 
 8 | if [[ ! -d "${DATA_DIR}" ]]; then
 9 |   echo "${DATA_DIR} does not exist. Creating";
10 |   mkdir -p ${DATA_DIR}
11 | fi
12 | 
13 | wget https://research.google.com/ava/download/ava_v${VERSION}.zip
14 | unzip -j ava_v${VERSION}.zip -d ${DATA_DIR}/
15 | rm ava_v${VERSION}.zip
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/download_videos_parallel.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | DATA_DIR="../../../data/ava/videos"
 6 | ANNO_DIR="../../../data/ava/annotations"
 7 | 
 8 | if [[ ! -d "${DATA_DIR}" ]]; then
 9 |   echo "${DATA_DIR} does not exist. Creating";
10 |   mkdir -p ${DATA_DIR}
11 | fi
12 | 
13 | wget https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt -P ${ANNO_DIR}
14 | 
15 | python download_videos_parallel.py ${ANNO_DIR}/ava_file_names_trainval_v2.1.txt ${DATA_DIR}
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/ava/videos_15min/ ../../data/ava/rawframes/ --task both --level 1 --flow-type tvl1 --mixed-ext
5 | echo "Raw frames (RGB and Flow) Generated"
6 | cd ava/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/ava/videos_15min/ ../../data/ava/rawframes/ --task rgb --level 1 --mixed-ext
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd ava/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/ava/videos_15min/ ../../data/ava/rawframes/ --task rgb --level 1 --use-opencv --mixed-ext
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd ava/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/fetch_ava_proposals.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | DATA_DIR="../../../data/ava/"
 6 | 
 7 | wget https://download.openmmlab.com/mmaction/dataset/ava/ava_dense_proposals_train.FAIR.recall_93.9.pkl -P ${DATA_DIR}
 8 | wget https://download.openmmlab.com/mmaction/dataset/ava/ava_dense_proposals_val.FAIR.recall_93.9.pkl -P ${DATA_DIR}
 9 | wget https://download.openmmlab.com/mmaction/dataset/ava/ava_dense_proposals_test.FAIR.recall_93.9.pkl -P ${DATA_DIR}
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/gym/download_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # set up environment
 4 | conda env create -f environment.yml
 5 | source activate gym
 6 | pip install --upgrade youtube-dl
 7 | 
 8 | DATA_DIR="../../../data/gym"
 9 | ANNO_DIR="../../../data/gym/annotations"
10 | python download_ytdl.py ${ANNO_DIR}/annotation.json ${DATA_DIR}/videos
11 | 
12 | source deactivate gym
13 | conda remove -n gym --all
14 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/gym/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/gym/subactions/ ../../data/gym/subaction_frames/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-short 256
5 | echo "Raw frames (RGB and tv-l1) Generated"
6 | 
7 | cd gym/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/hmdb51/videos/ ../../data/hmdb51/rawframes/ --task both --level 2 --flow-type tvl1
5 | echo "Raw frames (RGB and Flow) Generated"
6 | cd hmdb51/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/hmdb51/videos/ ../../data/hmdb51/rawframes/ --task rgb --level 2  --ext avi
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd hmdb51/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/hmdb51/videos/ ../../data/hmdb51/rawframes/ --task rgb --level 2 --ext avi --use-opencv
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd hmdb51/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | 
5 | PYTHONPATH=. python tools/data/build_file_list.py hmdb51 data/hmdb51/rawframes/ --level 2 --format rawframes --shuffle
6 | echo "Filelist for rawframes generated."
7 | 
8 | cd tools/data/hmdb51/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | 
5 | PYTHONPATH=. python tools/data/build_file_list.py hmdb51 data/hmdb51/videos/ --level 2 --format videos --shuffle
6 | echo "Filelist for videos generated."
7 | 
8 | cd tools/data/hmdb51/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/download_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # set up environment
 4 | conda env create -f environment.yml
 5 | source activate hvu
 6 | pip install --upgrade youtube-dl
 7 | 
 8 | DATA_DIR="../../../data/hvu"
 9 | ANNO_DIR="../../../data/hvu/annotations"
10 | python download.py ${ANNO_DIR}/hvu_train.csv ${DATA_DIR}/videos_train
11 | python download.py ${ANNO_DIR}/hvu_val.csv ${DATA_DIR}/videos_val
12 | 
13 | source deactivate hvu
14 | conda remove -n hvu --all
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/extract_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/hvu/videos_train/ ../../data/hvu/rawframes_train/ --level 1 --flow-type tvl1 --ext mp4 --task both  --new-short 256
 5 | echo "Raw frames (RGB and tv-l1) Generated for train set"
 6 | 
 7 | python build_rawframes.py ../../data/hvu/videos_val/ ../../data/hvu/rawframes_val/ --level 1 --flow-type tvl1 --ext mp4 --task both  --new-short 256
 8 | echo "Raw frames (RGB and tv-l1) Generated for val set"
 9 | 
10 | cd hvu/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | # to generate file list of frames
2 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_train.csv --src_dir ../../../data/hvu/rawframes_train \
3 |     --output ../../../data/hvu/hvu_train.json --mode frames
4 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_val.csv --src_dir ../../../data/hvu/rawframes_val \
5 |     --output ../../../data/hvu/hvu_val.json --mode frames
6 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
1 | # to generate file lists of videos
2 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_train.csv --src_dir ../../../data/hvu/videos_train \
3 |     --output ../../../data/hvu/hvu_train_video.json --mode videos
4 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_val.csv --src_dir ../../../data/hvu/videos_val \
5 |     --output ../../../data/hvu/hvu_val_video.json --mode videos
6 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/parse_tag_list.py:
--------------------------------------------------------------------------------
 1 | import mmcv
 2 | 
 3 | tag_list = '../../../data/hvu/annotations/hvu_categories.csv'
 4 | 
 5 | lines = open(tag_list).readlines()
 6 | lines = [x.strip().split(',') for x in lines[1:]]
 7 | tag_categories = {}
 8 | for line in lines:
 9 |     tag, category = line
10 |     tag_categories.setdefault(category, []).append(tag)
11 | 
12 | for k in tag_categories:
13 |     tag_categories[k].sort()
14 | 
15 | mmcv.dump(tag_categories, 'hvu_tags.json')
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/encode_videos.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_videos.py ../../data/jester/rawframes/ ../../data/jester/videos/ --fps 12 --level 1 --start-idx 1 --filename-tmpl '%05d'
5 | echo "Encode videos"
6 | 
7 | cd jester/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/extract_flow.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/jester/rawframes/ ../../data/jester/rawframes/ --task flow --level 1 --flow-type tvl1 --input-frames
5 | echo "Flow (tv-l1) Generated"
6 | cd jester/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset train --format rawframes --shuffle
5 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset val --format rawframes --shuffle
6 | echo "Filelist for rawframes generated."
7 | 
8 | cd tools/data/jester/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle
5 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle
6 | echo "Filelist for videos generated."
7 | 
8 | cd tools/data/jester/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/kinetics/subset_list.txt:
--------------------------------------------------------------------------------
 1 | canoeing or kayaking
 2 | climbing a rope
 3 | driving car
 4 | golf driving
 5 | opening bottle
 6 | playing piano
 7 | playing volleyball
 8 | shooting goal (soccer)
 9 | surfing water
10 | writing


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/check_data.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DATASET=$1
 4 | if [ "$DATASET" == "mimetics10" ] || [ "$DATASET" == "mimetics" ]; then
 5 |         echo "We are processing $DATASET"
 6 | else
 7 |         echo "Bad Argument, we only support mimetics10 or mimetics"
 8 |         exit 0
 9 | fi
10 | 
11 | pwd_dir=$pwd
12 | 
13 | cd ../../../
14 | PYTHONPATH=. python tools/data/data_check.py data/${DATASET}/videos data/${DATASET}/${DATASET}_test_list_videos.txt test
15 | echo "Test filelist for video passed checking."
16 | 
17 | cd ${pwd_dir}
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/download_annotations.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DATASET='mimetics'
 4 | DATA_DIR="../../../data/${DATASET}/annotations"
 5 | 
 6 | if [[ ! -d "${DATA_DIR}" ]]; then
 7 |   echo "${DATA_DIR} does not exist. Creating";
 8 |   mkdir -p ${DATA_DIR}
 9 | fi
10 | 
11 | wget https://europe.naverlabs.com/wp-content/uploads/2019/12/Mimetics_release_v1.0.zip
12 | 
13 | unzip Mimetics_release_v1.0.zip -d ${DATA_DIR}/
14 | 
15 | rm Mimetics_release_v1.0.zip
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/download_videos_subset.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # set up environment
 4 | conda env create -f ../kinetics/environment.yml
 5 | source activate kinetics
 6 | pip install --upgrade youtube-dl
 7 | 
 8 | DATA_DIR="../../../data/mimetics10"
 9 | ANNO_DIR="../../../data/mimetics/annotations"
10 | SUBSET="../kinetics/subset_list.txt"
11 | python ../kinetics/download_subset.py ${ANNO_DIR}/mimetics_v1.0.csv ${DATA_DIR}/videos --subset_file ${SUBSET} -t /ssd/data/tmp/mimetics10 -n 1 
12 | 
13 | source deactivate kinetics
14 | # conda remove -n kinetics --all
15 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DATASET=$1
 4 | if [ "$DATASET" == "mimetics10" ] || [ "$DATASET" == "mimetics" ]; then
 5 |         echo "We are processing $DATASET"
 6 | else
 7 |         echo "Bad Argument, we only support mimetics10, mimetics"
 8 |         exit 0
 9 | fi
10 | 
11 | pwd_dir=$pwd
12 | cd ../../../
13 | 
14 | PYTHONPATH=. python tools/data/mimetics/build_file_list.py ${DATASET} data/${DATASET}/videos/ data/${DATASET}/${DATASET}_test_list_videos.txt
15 | echo "test filelist for video generated."
16 | 
17 | cd ${pwd_dir}


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/extract_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/mit/videos/training ../../data/mit/rawframes/training/ --level 2 --flow-type tvl1 --ext mp4 --task both
 5 | echo "Raw frames (RGB and tv-l1) Generated for train set"
 6 | 
 7 | python build_rawframes.py ../../data/mit/vides/validation/ ../../data/mit/rawframes/validation/ --level 2 --flow-type tvl1 --ext mp4 --task both
 8 | echo "Raw frames (RGB and tv-l1) Generated for val set"
 9 | 
10 | cd mit/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/mit/videos/training ../../data/mit/rawframes/training/ --level 2 --ext mp4 --task rgb
 5 | echo "Raw frames (RGB only) generated for train set"
 6 | 
 7 | python build_rawframes.py ../../data/mit/videos/validation ../../data/mit/rawframes/validation/ --level 2 --ext mp4 --task rgb
 8 | echo "Raw frames (RGB only) generated for val set"
 9 | 
10 | cd mit/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/mit/videos/training ../../data/mit/rawframes/training/ --level 2 --ext mp4 --task rgb --use-opencv
 5 | echo "Raw frames (RGB only) generated for train set"
 6 | 
 7 | python build_rawframes.py ../../data/mit/videos/validation ../../data/mit/rawframes/validation/ --level 2 --ext mp4 --task rgb --use-opencv
 8 | echo "Raw frames (RGB only) generated for val set"
 9 | 
10 | cd mit/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../../../
 4 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/rawframes/training/ --level 2 --format rawframes --num-split 1 --subset train --shuffle
 5 | echo "Train filelist for rawframes generated."
 6 | 
 7 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/rawframes/validation/ --level 2 --format rawframes --num-split 1 --subset val --shuffle
 8 | echo "Val filelist for rawframes generated."
 9 | cd tools/data/mit/
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../../../
 4 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/videos/training/ --level 2 --format videos --num-split 1 --subset train --shuffle
 5 | echo "Train filelist for videos generated."
 6 | 
 7 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/videos/validation/ --level 2 --format videos --num-split 1 --subset val --shuffle
 8 | echo "Val filelist for videos generated."
 9 | cd tools/data/mit/
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/download_data.sh:
--------------------------------------------------------------------------------
 1 | DATA_DIR="../../../data/mmit/"
 2 | 
 3 | if [[ ! -d "${DATA_DIR}" ]]; then
 4 |   echo "${DATA_DIR} does not exist. Creating";
 5 |   mkdir -p ${DATA_DIR}
 6 | fi
 7 | 
 8 | cd ${DATA_DIR}
 9 | 
10 | wget -c https://www.dropbox.com/s/sz3yd1o0gf09amh/Multi_Moments_in_Time.zip?dl=0
11 | 
12 | unzip Multi_Moments_in_Time_Raw.zip
13 | rm Multi_Moments_in_Time.zip
14 | 
15 | if [ ! -d "./annotations" ]; then
16 |   mkdir ./annotations
17 | fi
18 | 
19 | mv *.txt annotations && mv *.csv annotations
20 | 
21 | cd -
22 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/mmit/videos/ ../../../data/mmit/rawframes/ --task both --level 2 --flow-type tvl1 --ext mp4
5 | echo "Raw frames (RGB and Flow) Generated"
6 | cd mmit/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/mmit/videos/ ../../data/mmit/rawframes/ --task rgb --level 2 --ext mp4
5 | 
6 | echo "Genearte raw frames (RGB only)"
7 | 
8 | cd mmit/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/mmit/videos/ ../../data/mmit/rawframes/ --task rgb --level 2 --ext mp4 --use-opencv
5 | 
6 | echo "Genearte raw frames (RGB only)"
7 | 
8 | cd mmit/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../../../
 4 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/rawframes/ --level 2 --format rawframes --num-split 1 --subset train --shuffle
 5 | echo "Train filelist for rawframes generated."
 6 | 
 7 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/rawframes/ --level 2 --format rawframes --num-split 1 --subset val --shuffle
 8 | echo "Val filelist for rawframes generated."
 9 | cd tools/data/mmit/
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../../../
 4 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/videos/ --level 2 --format videos --num-split 1 --subset train --shuffle
 5 | echo "Train filelist for videos generated."
 6 | 
 7 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/videos/ --level 2 --format videos --num-split 1 --subset val --shuffle
 8 | echo "Val filelist for videos generated."
 9 | cd tools/data/mmit/
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/encode_videos.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_videos.py ../../data/sthv1/rawframes/ ../../data/sthv1/videos/ --fps 12 --level 1 --start-idx 1 --filename-tmpl '%05d'
5 | echo "Encode videos"
6 | 
7 | cd sthv1/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/extract_flow.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/sthv1/rawframes/ ../../data/sthv1/rawframes/ --task flow --level 1 --flow-type tvl1 --input-frames
5 | echo "Flow (tv-l1) Generated"
6 | cd sthv1/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset train --format rawframes --shuffle
5 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset val --format rawframes --shuffle
6 | echo "Filelist for rawframes generated."
7 | 
8 | cd tools/data/sthv1/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle
5 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle
6 | echo "Filelist for videos generated."
7 | 
8 | cd tools/data/sthv1/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/sthv2/videos/ ../../data/sthv2/rawframes/ --task both --level 1 --flow-type tvl1 --ext webm
5 | echo "Raw frames (RGB and tv-l1) Generated"
6 | cd sthv2/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/sthv2/videos/ ../../data/sthv2/rawframes/ --task rgb --level 1  --ext webm
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd sthv2/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/sthv2/videos/ ../../data/sthv2/rawframes/ --task rgb --level 1 --ext webm --use-opencv
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd sthv2/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/rawframes/ --num-split 1 --level 1 --subset train --format rawframes --shuffle
5 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/rawframes/ --num-split 1 --level 1 --subset val --format rawframes --shuffle
6 | echo "Filelist for rawframes generated."
7 | 
8 | cd tools/data/sthv2/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle
5 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle
6 | echo "Filelist for videos generated."
7 | 
8 | cd tools/data/sthv2/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/extract_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/thumos14/videos/val/ ../../data/thumos14/rawframes/val/ --level 1 --flow-type tvl1 --ext mp4 --task both
 5 | echo "Raw frames (RGB and tv-l1) Generated for val set"
 6 | 
 7 | python build_rawframes.py ../../data/thumos14/videos/test/ ../../data/thumos14/rawframes/test/ --level 1 --flow-type tvl1 --ext mp4 --task both
 8 | echo "Raw frames (RGB and tv-l1) Generated for test set"
 9 | 
10 | cd thumos14/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/thumos14/videos/val/ ../../data/thumos14/rawframes/val/ --level 1 --ext mp4 --task rgb
 5 | echo "Raw frames (RGB only) generated for val set"
 6 | 
 7 | python build_rawframes.py ../../data/thumos14/videos/test/ ../../data/thumos14/rawframes/test/ --level 1 --ext mp4 --task rgb
 8 | echo "Raw frames (RGB only) generated for test set"
 9 | 
10 | cd thumos14/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | cd ../
 4 | python build_rawframes.py ../../data/thumos14/videos/val/ ../../data/thumos14/rawframes/val/ --level 1 --ext mp4 --task rgb --use-opencv
 5 | echo "Raw frames (RGB only) generated for val set"
 6 | 
 7 | python build_rawframes.py ../../data/thumos14/videos/test/ ../../data/thumos14/rawframes/test/ --level 1 --ext mp4 --task rgb --use-opencv
 8 | echo "Raw frames (RGB only) generated for test set"
 9 | 
10 | cd thumos14/
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/fetch_tag_proposals.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | PROP_DIR="../../../data/thumos14/proposals"
 4 | 
 5 | if [[ ! -d "${PROP_DIR}" ]]; then
 6 |   echo "${PROP_DIR} does not exist. Creating";
 7 |   mkdir -p ${PROP_DIR}
 8 | fi
 9 | 
10 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_val_normalized_proposal_list.txt -P ${PROP_DIR}
11 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_test_normalized_proposal_list.txt -P ${PROP_DIR}
12 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/download_annotations.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DATA_DIR="../../../data/ucf101/annotations"
 4 | 
 5 | if [[ ! -d "${DATA_DIR}" ]]; then
 6 |   echo "${DATA_DIR} does not exist. Creating";
 7 |   mkdir -p ${DATA_DIR}
 8 | fi
 9 | 
10 | wget https://www.crcv.ucf.edu/wp-content/uploads/2019/03/UCF101TrainTestSplits-RecognitionTask.zip --no-check-certificate
11 | 
12 | unzip -j UCF101TrainTestSplits-RecognitionTask.zip -d ${DATA_DIR}/
13 | rm UCF101TrainTestSplits-RecognitionTask.zip
14 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/download_videos.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | DATA_DIR="../../../data/ucf101/"
 4 | 
 5 | if [[ ! -d "${DATA_DIR}" ]]; then
 6 |   echo "${DATA_DIR} does not exist. Creating";
 7 |   mkdir -p ${DATA_DIR}
 8 | fi
 9 | 
10 | cd ${DATA_DIR}
11 | 
12 | wget https://www.crcv.ucf.edu/datasets/human-actions/ucf101/UCF101.rar --no-check-certificate
13 | unrar x UCF101.rar
14 | mv ./UCF-101 ./videos
15 | 
16 | cd "../../tools/data/ucf101"
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/extract_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/ucf101/videos/ ../../data/ucf101/rawframes/ --task both --level 2 --flow-type tvl1
5 | echo "Raw frames (RGB and Flow) Generated"
6 | cd ucf101/
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/extract_rgb_frames.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/ucf101/videos/ ../../data/ucf101/rawframes/ --task rgb --level 2  --ext avi
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd ucf101/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/extract_rgb_frames_opencv.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../
4 | python build_rawframes.py ../../data/ucf101/videos/ ../../data/ucf101/rawframes/ --task rgb --level 2 --ext avi --use-opencv
5 | echo "Genearte raw frames (RGB only)"
6 | 
7 | cd ucf101/
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/generate_rawframes_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | 
5 | PYTHONPATH=. python tools/data/build_file_list.py ucf101 data/ucf101/rawframes/ --level 2 --format rawframes --shuffle
6 | echo "Filelist for rawframes generated."
7 | 
8 | cd tools/data/ucf101/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/generate_videos_filelist.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | cd ../../../
4 | 
5 | PYTHONPATH=. python tools/data/build_file_list.py ucf101 data/ucf101/videos/ --level 2 --format videos --shuffle
6 | echo "Filelist for videos generated."
7 | 
8 | cd tools/data/ucf101/
9 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/dist_test.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | CHECKPOINT=$2
 5 | GPUS=$3
 6 | PORT=${PORT:-29500}
 7 | 
 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
10 |     $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4}
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/dist_train.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | CONFIG=$1
 4 | GPUS=$2
 5 | PORT=${PORT:-29500}
 6 | 
 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \
 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \
 9 |     $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3}
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/.DS_Store


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/config/__init__.py:
--------------------------------------------------------------------------------
1 | from .defaults import _C as cfg
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/csrc/cpu/vision.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include <torch/extension.h>
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import make_data_loader
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .concat_dataset import ConcatDataset
2 | from .ava import AVAVideoDataset
3 | 
4 | __all__ = ["ConcatDataset", "AVAVideoDataset"]


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/README.md:
--------------------------------------------------------------------------------
1 | The evaluation code of AVA is modified from [https://github.com/activitynet/ActivityNet](https://github.com/activitynet/ActivityNet).


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/__init__.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from .ava_eval import do_ava_evaluation
 3 | 
 4 | 
 5 | def ava_evaluation(dataset, predictions, output_folder, **_):
 6 |     logger = logging.getLogger("alphaction.inference")
 7 |     logger.info("performing ava evaluation.")
 8 |     return do_ava_evaluation(
 9 |         dataset=dataset,
10 |         predictions=predictions,
11 |         output_folder=output_folder,
12 |         logger=logger,
13 |     )
14 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/pascal_evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/pascal_evaluation/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/samplers/__init__.py:
--------------------------------------------------------------------------------
1 | from .distributed import DistributedSampler
2 | from .grouped_batch_sampler import GroupedBatchSampler
3 | from .iteration_based_batch_sampler import IterationBasedBatchSampler
4 | 
5 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"]
6 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/transforms/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import build_transforms, build_object_transforms


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/engine/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/engine/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/backbone/__init__.py:
--------------------------------------------------------------------------------
1 | from .backbone import build_backbone


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/detector/__init__.py:
--------------------------------------------------------------------------------
1 | from .action_detector import build_detection_model


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/registry.py:
--------------------------------------------------------------------------------
1 | from alphaction.utils.registry import Registry
2 | 
3 | BACKBONES = Registry()
4 | ROI_ACTION_FEATURE_EXTRACTORS = Registry()
5 | ROI_ACTION_PREDICTORS = Registry()
6 | INTERACTION_AGGREGATION_STRUCTURES = Registry()


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/action_head/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/action_head/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/solver/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import make_optimizer
2 | from .build import make_lr_scheduler
3 | from .lr_scheduler import WarmupMultiStepLR
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/structures/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/structures/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/random_seed.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import random
 3 | import numpy as np
 4 | 
 5 | def set_seed(seed, rank, world_size):
 6 |     rng = random.Random(seed)
 7 |     seed_per_rank = [rng.randint(0, 2**32-1) for _ in range(world_size)]
 8 |     cur_seed = seed_per_rank[rank]
 9 |     random.seed(cur_seed)
10 |     torch.manual_seed(cur_seed)
11 |     torch.cuda.manual_seed(cur_seed)
12 |     np.random.seed(cur_seed)


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/video_decode.py:
--------------------------------------------------------------------------------
1 | import av
2 | import io
3 | import decord
4 | def av_decode_video(video_path):
5 |     with av.open(video_path) as container:
6 |         frames = []
7 |         for frame in container.decode(video=0):
8 |             frames.append(frame.to_rgb().to_ndarray())
9 |     return frames


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/data/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/data/movie_size.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/data/movie_size.npy


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/figs/videomae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/figs/videomae.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/start.sh:
--------------------------------------------------------------------------------
1 | sbatch -N 1 --gres=gpu:1 --qos=gpugpu akeval.sh


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/vis.sh:
--------------------------------------------------------------------------------
 1 | # Set the path to save video
 2 | OUTPUT_DIR='TODO/VideoMAE/demo/vis_k400_1_0.9'
 3 | # path to video for visualization
 4 | VIDEO_PATH='TODO/TODO.mp4'
 5 | # path to pretrain model
 6 | MODEL_PATH='TODO/videomae_pretrain_base_patch16_224_frame_16x4_tube_mask_ratio_0.9_e1600/checkpoint-1599.pth'
 7 | 
 8 | python3 run_videomae_vis.py \
 9 |     --mask_ratio 0.9 \
10 |     --mask_type tube \
11 |     --decoder_depth 4 \
12 |     --model pretrain_videomae_base_patch16_224 \
13 |     ${VIDEO_PATH} ${OUTPUT_DIR} ${MODEL_PATH}


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/INSTALL.md:
--------------------------------------------------------------------------------
 1 | # Requirements
 2 | 
 3 | - Linux
 4 | - Python 3.5+
 5 | - PyTorch 1.10+
 6 | - TensorBoard
 7 | - CUDA 11.0+
 8 | - GCC 4.9+
 9 | - NumPy 1.11+
10 | - PyYaml
11 | - Pandas
12 | - h5py
13 | - joblib
14 | 
15 | # Compilation
16 | 
17 | Part of NMS is implemented in C++. The code can be compiled by
18 | 
19 | ```shell
20 | cd ./libs/utils
21 | python setup.py install --user
22 | cd ../..
23 | ```
24 | 
25 | The code should be recompiled every time you update PyTorch.
26 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/anet_run.sh:
--------------------------------------------------------------------------------
1 | python -u ./train_eval.py ./configs/anet.yaml --output anet_mae_h 2>&1 | tee anet_mae_h.log


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/fa_run.sh:
--------------------------------------------------------------------------------
1 | python -u ./train_eval.py ./configs/fineaction.yaml --output fa_mae_h 2>&1 | tee fa_mae_h.log


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/hacs_run.sh:
--------------------------------------------------------------------------------
1 | python -u ./train_eval.py ./configs/hacs.yaml --output hacs_mae_h 2>&1 | tee hacs_mae_h.log


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/libs/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .config import load_default_config, load_config
2 | 
3 | __all__ = ['load_default_config', 'load_config']
4 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/libs/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .data_utils import worker_init_reset_seed, truncate_feats
2 | from .datasets import make_dataset, make_data_loader
3 | from . import  thumos14, anet # other datasets go here
4 | 
5 | __all__ = ['worker_init_reset_seed', 'truncate_feats',
6 |            'make_dataset', 'make_data_loader']
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/libs/utils/setup.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from setuptools import setup, Extension
 4 | from torch.utils.cpp_extension import BuildExtension, CppExtension
 5 | 
 6 | 
 7 | setup(
 8 |     name='nms_1d_cpu',
 9 |     ext_modules=[
10 |         CppExtension(
11 |             name = 'nms_1d_cpu',
12 |             sources = ['./csrc/nms_cpu.cpp'],
13 |             extra_compile_args=['-fopenmp']
14 |         )
15 |     ],
16 |     cmdclass={
17 |         'build_ext': BuildExtension
18 |     }
19 | )
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/th14_run.sh:
--------------------------------------------------------------------------------
1 | python -u ./train_eval.py ./configs/thumos.yaml --output th14_mae_h 2>&1 | tee th14_mae_h.log


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | from .evl_utils import * 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
1 | ../bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/dog.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/dog.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
1 | ../bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc/evl_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .evl_module import TransformerDecoder
2 | from .evl_module_uniformer_diff_conv_balance import TransformerDecoder_uniformer_diff_conv_balance
3 | from .clip_vit import vit_b32, vit_b16, vit_l14, vit_l14_336
4 | from .clip_vit_2plus1d import vit_2plus1d_b32, vit_2plus1d_b16, vit_2plus1d_l14, vit_2plus1d_l14_336
5 | from .clip_vit_2plus1d_dw_bias import vit_2plus1d_dw_bias_b32, vit_2plus1d_dw_bias_b16, vit_2plus1d_dw_bias_l14, vit_2plus1d_dw_bias_l14_336


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/evl_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .evl_module import TransformerDecoder
2 | from .evl_module_uniformer_diff_conv_balance import TransformerDecoder_uniformer_diff_conv_balance
3 | from .clip_vit import vit_b32, vit_b16, vit_l14, vit_l14_336
4 | from .clip_vit_2plus1d import vit_2plus1d_b32, vit_2plus1d_b16, vit_2plus1d_l14, vit_2plus1d_l14_336
5 | from .clip_vit_2plus1d_dw_bias import vit_2plus1d_dw_bias_b32, vit_2plus1d_dw_bias_b16, vit_2plus1d_dw_bias_l14, vit_2plus1d_dw_bias_l14_336


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | from .evl_utils import * 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/cross-base/cross_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "attention_probs_dropout_prob": 0.1,
 3 |   "hidden_act": "gelu",
 4 |   "hidden_dropout_prob": 0.1,
 5 |   "hidden_size": 512,
 6 |   "initializer_range": 0.02,
 7 |   "intermediate_size": 2048,
 8 |   "max_position_embeddings": 128,
 9 |   "num_attention_heads": 8,
10 |   "num_hidden_layers": 4,
11 |   "vocab_size": 512
12 | }


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/eval_base.bash:
--------------------------------------------------------------------------------
 1 | # srun -p GVT -n1 -c5 --gres=gpu:1 -x SH-IDC1-10-198-8-[61,62] bash iter_eval.bash
 2 | 
 3 | ngpus=1
 4 | flag="--exp_name eval_base
 5 |       --run-type eval
 6 |       --exp-config iter_train.yaml
 7 | 
 8 |       SIMULATOR_GPU_IDS [0]
 9 |       TORCH_GPU_IDS [0]
10 |       GPU_NUMBERS $ngpus
11 |       NUM_ENVIRONMENTS 11
12 | 
13 |       EVAL.SAVE_RESULTS False
14 |       EVAL.CKPT_PATH_DIR pretrained/pretrained_models/base_ckpt.iter6900.pth
15 |       TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.ALLOW_SLIDING True
16 |       "
17 | python run.py $flag
18 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/habitat_extensions/__init__.py:
--------------------------------------------------------------------------------
1 | from habitat_extensions import measures, obs_transformers, sensors, nav
2 | from habitat_extensions.config.default import get_extended_config
3 | from habitat_extensions.task import VLNCEDatasetV1
4 | from habitat_extensions.habitat_simulator import Simulator
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/habitat_extensions/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/habitat_extensions/config/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/requirements.txt:
--------------------------------------------------------------------------------
1 | torch==1.8.0
2 | torchvision==0.9.0
3 | boto3
4 | pytorch_transformers==1.2.0
5 | timm==0.4.9
6 | transformers==4.12.3


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/run/vlnbert_r2r.bash:
--------------------------------------------------------------------------------
 1 | # srun -p GVT -n1 -c10 --gres=gpu:3 -x SH-IDC1-10-198-8-[61,62] bash run/vlnbert_r2r.bash
 2 | 
 3 | flag="--exp_name r2r_vlnbert_slide1
 4 |       --run-type train
 5 |       --exp-config exp/vlnbert_r2r.yaml
 6 | 
 7 |       SIMULATOR_GPU_IDS [0,1,2]
 8 |       TORCH_GPU_IDS [0,1,2]
 9 |       GPU_NUMBERS 3
10 | 
11 |       IL.batch_size 20
12 |       IL.lr 3.5e-5
13 |       IL.schedule_ratio 0.50
14 |       IL.max_traj_len 20
15 |       "
16 | python -m torch.distributed.launch --nproc_per_node=3 run.py $flag


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/run/vlnbert_r2r_da_eval.bash:
--------------------------------------------------------------------------------
 1 | # srun -p GVT -n1 -c5 --gres=gpu:1 -x SH-IDC1-10-198-8-[61,62,79] bash run/vlnbert_r2r_da_eval.bash
 2 | 
 3 | p=0.5
 4 | bs=32
 5 | diter=$1
 6 | epoch=$2
 7 | ngpus=4
 8 | flag="--exp_name r2r_vlnbert_da.p${p}.bs${bs}.di${diter}.ep${epoch}
 9 |       --run-type eval
10 |       --exp-config exp/vlnbert_r2r_da.yaml
11 | 
12 |       SIMULATOR_GPU_IDS [0]
13 |       TORCH_GPU_IDS [0]
14 |       GPU_NUMBERS $ngpus
15 |       NUM_ENVIRONMENTS 11
16 | 
17 |       IL.lr 3.5e-5
18 |       IL.batch_size 11
19 |       IL.max_traj_len 20
20 |       "
21 | python run.py $flag


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/run/vlnbert_r2r_eval.bash:
--------------------------------------------------------------------------------
 1 | # eval
 2 | # srun -p GVT -n1 -c5 --gres=gpu:1 -x SH-IDC1-10-198-8-[61,62] bash run/vlnbert_r2r_eval.bash
 3 | 
 4 | flag="--exp_name r2r_vlnbert_slide1
 5 |       --run-type eval
 6 |       --exp-config exp/vlnbert_r2r.yaml
 7 | 
 8 |       SIMULATOR_GPU_IDS [0]
 9 |       TORCH_GPU_IDS [0]
10 |       TORCH_GPU_ID 0
11 |       GPU_NUMBERS 1
12 | 
13 |       IL.batch_size 11
14 |       IL.lr 3.5e-5
15 |       IL.schedule_ratio 0.50
16 |       IL.max_traj_len 20
17 |       "
18 | python run.py $flag


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/__init__.py:
--------------------------------------------------------------------------------
1 | from vlnce_baselines import trainer_HAMT
2 | from vlnce_baselines.common import environments
3 | 
4 | from vlnce_baselines.models import (
5 |     Policy_ViewSelection_CMA,
6 |     Policy_ViewSelection_HAMT,
7 | )
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/config/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/config/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/config/r2r_configs/test_set_inference.yaml:
--------------------------------------------------------------------------------
 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml
 2 | SIMULATOR_GPU_ID: 0
 3 | TORCH_GPU_ID: 0
 4 | NUM_PROCESSES: 1
 5 | 
 6 | INFERENCE:
 7 |   SPLIT: test
 8 |   USE_CKPT_CONFIG: False
 9 |   SAMPLE: False
10 |   CKPT_PATH: data/checkpoints/CMA_PM_DA_Aug.pth
11 |   PREDICTIONS_FILE: predictions.json
12 | 
13 | MODEL:
14 |   policy_name: CMAPolicy
15 | 
16 |   INSTRUCTION_ENCODER:
17 |     bidirectional: True
18 | 
19 |   CMA:
20 |     use: True
21 | 
22 |   PROGRESS_MONITOR:
23 |     use: True
24 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/models/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/activitynet_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import ActivityNetDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class ActivityNetDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return ActivityNetDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "activitynet"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/cc12m_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import CC12MDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class CC12MDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return CC12MDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "cc3m"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/cc3m_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import CC3MDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class CC3MDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return CC3MDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "cc3m"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/conceptual_caption_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import ConceptualCaptionDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class ConceptualCaptionDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return ConceptualCaptionDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "gcc"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/laion400m_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import LAION400MDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class LAION400MDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return LAION400MDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "laion400m"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/mix100m_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import MIX100MDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class MIX100MDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return MIX100MDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "mix100m"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/nlvr2_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import NLVR2Dataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class NLVR2DataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return NLVR2Dataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "nlvr2"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/sbu_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import SBUCaptionDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class SBUCaptionDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return SBUCaptionDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "sbu"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/vcr_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import VCRDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class VCRDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return VCRDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "vcr"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/vg_caption_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import VisualGenomeCaptionDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class VisualGenomeCaptionDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return VisualGenomeCaptionDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "vg"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/yfcc15m_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import YFCC15MDataset
 2 | from .datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class YFCC15MDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return YFCC15MDataset
12 | 
13 |     @property
14 |     def dataset_name(self):
15 |         return "yfcc15m"
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/didemo_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import DIDEMODataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class DIDEMODataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return DIDEMODataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return DIDEMODataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "didemo"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/ego4d_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import Ego4DDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class Ego4DDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return Ego4DDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return Ego4DDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "ego4d"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/hmdb51_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import HMDB51Dataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class HMDB51DataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return HMDB51Dataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return HMDB51Dataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "hmdb51"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/howto100m_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import HT100MDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class HT100MDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return HT100MDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return HT100MDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "howto100m"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/k400_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import K400Dataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class K400DataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return K400Dataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return K400Dataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "k400"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/k400_video_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import K400VideoDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class K400VideoDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return K400VideoDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return K400VideoDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "k400_video"


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/lsmdc_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import LSMDCDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class LSMDCDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return LSMDCDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return LSMDCDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "lsmdc"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/msrvtt_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import MSRVTTDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class MSRVTTDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return MSRVTTDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return MSRVTTDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "msrvtt"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/msvd_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import MSVDDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class MSVDDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return MSVDDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return MSVDDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "msvd"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/tgif_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import TGIFDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class TGIFDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return TGIFDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return TGIFDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "tgif"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/tgifqa_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import TGIFQADataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class TGIFQADataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return TGIFQADataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return TGIFQADataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "tgifqa"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/tvqa_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import TVQADataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class TVQADataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return TVQADataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return TVQADataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "tvqa"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/ucf101_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import UCF101Dataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class UCF101DataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return UCF101Dataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return UCF101Dataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "ucf101"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/webvid_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import WEBVIDDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class WEBVIDDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return WEBVIDDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return WEBVIDDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "webvid"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/youtube_datamodule.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.datasets import YOUTUBEDataset
 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule
 3 | 
 4 | 
 5 | class YOUTUBEDataModule(BaseDataModule):
 6 |     def __init__(self, *args, **kwargs):
 7 |         super().__init__(*args, **kwargs)
 8 | 
 9 |     @property
10 |     def dataset_cls(self):
11 |         return YOUTUBEDataset
12 | 
13 |     @property
14 |     def dataset_cls_no_false(self):
15 |         return YOUTUBEDataset
16 | 
17 |     @property
18 |     def dataset_name(self):
19 |         return "youtube"
20 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/image/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/image/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/video/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/video/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/gadgets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/gadgets/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/__init__.py:
--------------------------------------------------------------------------------
1 | from .internvideo import *


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/clip_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/clip_utils/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from .evl_module import TransformerDecoder
2 | from .clip_vit_only_global import vit_only_global_b32, vit_only_global_b16, vit_only_global_l14, vit_only_global_l14_336


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/__init__.py:
--------------------------------------------------------------------------------
1 | # from CoTrain.modules.cotrain_dino_module_v2 import CoTrainTransformerSS
2 | from CoTrain.modules.cotrain_module import CoTrainTransformerSS
3 | # from CoTrain.modules.cotrain_dino_module_v3 import CoTrainTransformerSS
4 | from CoTrain.modules.clip_module import CLIP


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/forzen_param.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | # def forzen_param(model):
 5 | #     for name, param in model.named_parameters():
 6 | #         if 'mlm_score' in name or 'vtm_score' in name or 'mpp_score' in name:
 7 | #             param.requires_grad = True
 8 | #         else:
 9 | #             param.requires_grad = False
10 | #     return True
11 | 
12 | 
13 | def forzen_param(model):
14 |     flag = False
15 |     for name, param in model.named_parameters():
16 |         if '10' in name:
17 |             flag = True
18 |         param.requires_grad = flag
19 |     return True


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.21.6
2 | setuptools==61.2.0
3 | torch==1.9.0+cu111
4 | torchvision==0.10.0+cu111
5 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/__init__.py:
--------------------------------------------------------------------------------
 1 | from CoTrain.transforms.image.pixelbert import (
 2 |     pixelbert_transform,
 3 |     pixelbert_transform_randaug,
 4 |     open_clip_transform,
 5 | )
 6 | 
 7 | _transforms = {
 8 |     "pixelbert": pixelbert_transform,
 9 |     "pixelbert_randaug": pixelbert_transform_randaug,
10 |     "open_clip": open_clip_transform,
11 | }
12 | 
13 | 
14 | def keys_to_transforms(keys: list, size=224, mode="train"):
15 |     return [_transforms[key](size=size, mode=mode) for key in keys]
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/image/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/image/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/video/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/video/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Media/download.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Media/download.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/__init__.py:
--------------------------------------------------------------------------------
1 | from .internvideo import *


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/clip_utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .clip import *
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/clip_utils/utils/__init__.py:
--------------------------------------------------------------------------------
1 | # from .evl_module import TransformerDecoder
2 | from .clip_vit_only_global import vit_only_global_b32, vit_only_global_b16, vit_only_global_l14, vit_only_global_l14_336


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/data/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/data/demo.mp4


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/extract_clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/extract_clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/img/framework.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/img/framework.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/linter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | # Run this script at project root by ".linter.sh" before you commit.
 4 | echo "Running isort..."
 5 | isort -y -sp .
 6 | 
 7 | echo "Running black..."
 8 | black -l 80 .
 9 | 
10 | echo "Running flake..."
11 | flake8 .
12 | 
13 | command -v arc > /dev/null && {
14 |   echo "Running arc lint ..."
15 |   arc lint
16 | }
17 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 
4 | from slowfast.utils.env import setup_environment
5 | 
6 | setup_environment()
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/config/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/config/custom_config.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Add custom configs and default values"""
 5 | 
 6 | 
 7 | def add_custom_config(_C):
 8 |     # Add your own customized configs.
 9 |     pass
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/models/__init__.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | from .build import MODEL_REGISTRY, build_model  # noqa
 5 | from .custom_video_model_builder import *  # noqa
 6 | from .ptv_model_builder import (
 7 |     PTVCSN,
 8 |     PTVX3D,
 9 |     PTVR2plus1D,
10 |     PTVResNet,
11 |     PTVSlowFast,
12 | )  # noqa
13 | from .video_model_builder import ResNet, SlowFast  # noqa
14 | from .uniformer import Uniformer  # noqa
15 | from .uniformerv2 import Uniformerv2 # noqa


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/models/custom_video_model_builder.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 
4 | 
5 | """A More Flexible Video models."""
6 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/ava_evaluation/README.md:
--------------------------------------------------------------------------------
1 | The code under this folder is from the official [ActivityNet repo](https://github.com/activitynet/ActivityNet).
2 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/ava_evaluation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/ava_evaluation/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/env.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
 3 | 
 4 | """Set up Environment."""
 5 | 
 6 | import slowfast.utils.logging as logging
 7 | 
 8 | _ENV_SETUP_DONE = False
 9 | 
10 | 
11 | def setup_environment():
12 |     global _ENV_SETUP_DONE
13 |     if _ENV_SETUP_DONE:
14 |         return
15 |     _ENV_SETUP_DONE = True
16 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/visualization/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
3 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/qa.py:
--------------------------------------------------------------------------------
 1 | from .pretrain import *
 2 | 
 3 | del available_corpus
 4 | 
 5 | criterion["loss_weight"]["mlm"] = 0.0
 6 | scheduler["warmup_epochs"] = 0.5
 7 | 
 8 | max_txt_l = 32
 9 | batch_size = 32
10 | num_frames = 12
11 | 
12 | optimizer["lr"] = 1e-5
13 | log_freq = 100
14 | 
15 | # =========additional args for VQA ============
16 | eos = "[SEP]"
17 | max_q_len = 25
18 | max_a_len = 5
19 | # =========end ================================
20 | 
21 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/ret_msrvtt_9k.py:
--------------------------------------------------------------------------------
1 | from .ret_msrvtt import *
2 | 
3 | train_file = [
4 |     f"{anno_root_downstream}/msrvtt_ret_train9k.json",
5 |     f"{data_root}/msrvtt_2fps_224",
6 |     "video",
7 | ]
8 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/ret_ssv2_label.py:
--------------------------------------------------------------------------------
 1 | from .ret_msrvtt import *
 2 | 
 3 | train_file = [
 4 |     f"{anno_root_downstream}/ssv2_ret_label_train.json",
 5 |     f"{data_root}/ssv2",
 6 |     "video",
 7 | ]
 8 | test_file = dict(
 9 |     val=[
10 |         f"{anno_root_downstream}/ssv2_ret_label_val_small.json",
11 |         f"{data_root}/ssv2",
12 |         "video",
13 |     ],
14 | )
15 | 
16 | test_types = ["val"]
17 | stop_key = None  # used to choose the best ckpt. If None, save the last.
18 | 
19 | has_multi_vision_gt = True
20 | 
21 | scheduler["epochs"] = 10
22 | optimizer["lr"] = 1e-4
23 | 
24 | max_txt_l = 25
25 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/ret_ssv2_template.py:
--------------------------------------------------------------------------------
 1 | from .ret_msrvtt import *
 2 | 
 3 | train_file = [
 4 |     f"{anno_root_downstream}/ssv2_ret_template_train.json",
 5 |     f"{data_root}/ssv2",
 6 |     "video",
 7 | ]
 8 | test_file = dict(
 9 |     val=[
10 |         f"{anno_root_downstream}/ssv2_ret_template_val_small.json",
11 |         f"{data_root}/ssv2",
12 |         "video",
13 |     ],
14 | )
15 | 
16 | test_types = ["val"]
17 | stop_key = None  # used to choose the best ckpt. If None, save the last.
18 | 
19 | has_multi_vision_gt = True
20 | 
21 | scheduler["epochs"] = 10
22 | optimizer["lr"] = 1e-4
23 | 
24 | max_txt_l = 22
25 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/exp/exp_pretrain_ViCLIP/viclip_base/run.sh:
--------------------------------------------------------------------------------
 1 | torchrun --rdzv_endpoint=${MASTER_NODE}:${MASTER_PORT} \
 2 |     --nnodes=${NNODE} \
 3 |     --nproc_per_node=${NUM_GPUS} \
 4 |     --rdzv_backend=c10d \
 5 |     tasks/pretrain.py \
 6 |     $(dirname $0)/config.py \
 7 |     wandb.enable False \
 8 |     model.vision_encoder.pretrained 'CLIP-ViT-B/16' \
 9 |     model.text_encoder.pretrained 'CLIP-ViT-B/16' \
10 |     output_dir ${OUTPUT_DIR}
11 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/beit/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/beit/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/bert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/bert/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/blip_toremove/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/blip_toremove/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/clip/bpe_simple_vocab_16e6.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/clip/bpe_simple_vocab_16e6.txt.gz


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/modules/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/modules/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/preprocess/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import subprocess
 3 | 
 4 | 
 5 | def get_video_duration(filename):
 6 | 
 7 |     result = subprocess.check_output(
 8 |         f'ffprobe -v quiet -show_streams -select_streams v:0 -of json "{filename}"', shell=True
 9 |     ).decode()
10 |     fields = json.loads(result)["streams"][0]
11 | 
12 |     duration = float(fields["duration"])
13 |     return duration
14 | 
15 | if __name__ == "__main__":
16 |     import os
17 |     fp = os.path.join(os.environ["SL_DATA_DIR"], "videos_images/webvid_10m_2fps_224/22920757.mp4")
18 |     print(get_video_duration(fp))
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/tests/test_cfg.py:
--------------------------------------------------------------------------------
1 | from utils.config import Config
2 | 
3 | cfg = Config.get_config()
4 | 
5 | cfg_text = Config.pretty_text(cfg)
6 | print(cfg_text)
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/tools/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import socket
 4 | 
 5 | 
 6 | def has_slurm():
 7 |     """determine the system has slurm or not
 8 |     Returns: True if has else False.
 9 | 
10 |     """
11 |     return shutil.which("sbatch") is not None
12 | 
13 | def random_port():
14 |     """random a unused port
15 |     Returns: str
16 | 
17 |     """
18 |     with socket.socket() as s:
19 |         s.bind(("", 0))
20 |         return s.getsockname()[1]
21 | 
22 | def runcmd(cmd):
23 |     """run command
24 | 
25 |     Args:
26 |         cmd (str): The command to run
27 | 
28 |     """
29 |     os.system(cmd)
30 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/figs/teaser-internvideo2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/figs/teaser-internvideo2.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/figs/wechatgrp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/figs/wechatgrp.png


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/demo/example1.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/demo/example1.mp4


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/models/__init__.py:
--------------------------------------------------------------------------------
 1 | from .internvideo2_clip import InternVideo2_CLIP
 2 | from .internvideo2_stage2 import InternVideo2_Stage2
 3 | # from .internvideo2_stage2_audio import InternVideo2_Stage2_audio
 4 | 
 5 | __all__ = [
 6 |     'InternVideo2_CLIP',
 7 |     'InternVideo2_Stage2', 
 8 |     # 'InternVideo2_Stage2_audio'
 9 | ]
10 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/beats/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/beats/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/bert/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/bert/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/internvideo2/__init__.py:
--------------------------------------------------------------------------------
1 | from .internvl_clip_vision import internvl_clip_6b
2 | from .internvideo2 import pretrain_internvideo2_1b_patch14_224, pretrain_internvideo2_6b_patch14_224
3 | from .internvideo2_clip_vision import InternVideo2
4 | from .internvideo2_clip_text import LLaMA, Tokenizer


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/preprocess/utils.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import subprocess
 3 | 
 4 | 
 5 | def get_video_duration(filename):
 6 | 
 7 |     result = subprocess.check_output(
 8 |         f'ffprobe -v quiet -show_streams -select_streams v:0 -of json "{filename}"', shell=True
 9 |     ).decode()
10 |     fields = json.loads(result)["streams"][0]
11 | 
12 |     duration = float(fields["duration"])
13 |     return duration
14 | 
15 | if __name__ == "__main__":
16 |     import os
17 |     fp = os.path.join(os.environ["SL_DATA_DIR"], "videos_images/webvid_10m_2fps_224/22920757.mp4")
18 |     print(get_video_duration(fp))
19 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/tests/test_cfg.py:
--------------------------------------------------------------------------------
1 | from utils.config import Config
2 | 
3 | cfg = Config.get_config()
4 | 
5 | cfg_text = Config.pretty_text(cfg)
6 | print(cfg_text)
7 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/tools/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import socket
 4 | 
 5 | 
 6 | def has_slurm():
 7 |     """determine the system has slurm or not
 8 |     Returns: True if has else False.
 9 | 
10 |     """
11 |     return shutil.which("sbatch") is not None
12 | 
13 | def random_port():
14 |     """random a unused port
15 |     Returns: str
16 | 
17 |     """
18 |     with socket.socket() as s:
19 |         s.bind(("", 0))
20 |         return s.getsockname()[1]
21 | 
22 | def runcmd(cmd):
23 |     """run command
24 | 
25 |     Args:
26 |         cmd (str): The command to run
27 | 
28 |     """
29 |     os.system(cmd)
30 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/multi_modality/torchrun.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | MASTER_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1)
 3 | ALL_NODES=$(scontrol show hostnames "$SLURM_JOB_NODELIST")
 4 | MASTER_PORT=$((10660 + $RANDOM % 10))
 5 | 
 6 | echo "All nodes used:"
 7 | echo ${ALL_NODES}
 8 | echo "Master node:"
 9 | echo ${MASTER_NODE}
10 | echo "Args:"
11 | echo $@
12 | 
13 | torchrun --rdzv_endpoint=${MASTER_NODE}:10069 $@
14 | 


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/single_modality/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .build import build_dataset, build_pretraining_dataset, build_multi_pretraining_dataset


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/single_modality/engines/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/single_modality/engines/__init__.py


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/single_modality/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .internvl_clip_vision import internvl_clip_6b
2 | from .videomae import mae_g14_hybrid
3 | from .internvideo2 import internvideo2_1B_patch14_224, internvideo2_6B_patch14_224
4 | from .internvideo2_cat import internvideo2_cat_1B_patch14_224, internvideo2_cat_6B_patch14_224
5 | from .internvideo2_ap import internvideo2_ap_1B_patch14_224, internvideo2_ap_6B_patch14_224
6 | from .internvideo2_pretrain import pretrain_internvideo2_1B_patch14_224, pretrain_internvideo2_6B_patch14_224


--------------------------------------------------------------------------------
/third_party/InternVideo/InternVideo2/single_modality/requirements.txt:
--------------------------------------------------------------------------------
 1 | apex==0.9.10dev
 2 | auto_augment==1.0.0
 3 | decord==0.6.0
 4 | deepspeed==0.10.1
 5 | einops==0.7.0
 6 | flash_attn==2.0.8
 7 | fvcore==0.1.5.post20221221
 8 | numpy==1.24.4
 9 | opencv_python==4.8.0.76
10 | pandas==2.0.3
11 | Pillow==10.0.0
12 | scipy==1.13.0
13 | skimage==0.0
14 | tensorboardX==2.6.2
15 | timm==0.5.4
16 | torch==1.13.1+cu117
17 | torchvision==0.14.1+cu117
18 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/.gitattributes:
--------------------------------------------------------------------------------
1 | *.zip filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/NOTICE:
--------------------------------------------------------------------------------
1 | Apache Relay Policy Learning
2 | Copyright Google LLC The Apache Software Foundation
3 | 
4 | This product includes software developed at
5 | The Apache Software Foundation (http://www.apache.org/).


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/.gitignore:
--------------------------------------------------------------------------------
1 | # General
2 | .DS_Store
3 | *.swp
4 | *.profraw
5 | 
6 | # Editors
7 | .vscode
8 | .idea
9 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/README.public.md:
--------------------------------------------------------------------------------
 1 | # D'Suite Scenes
 2 | 
 3 | This repository is based on a collection of [MuJoCo](http://www.mujoco.org/) simulation
 4 | scenes and common assets for D'Suite environments. Based on code in the ROBEL suite 
 5 | https://github.com/google-research/robel
 6 | 
 7 | ## Disclaimer
 8 | 
 9 | This is not an official Google product.
10 | 
11 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/__init__.py


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/counters.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="counters">
 2 |     <compiler angle="radian" meshdir="" texturedir=""/>
 3 |     <include file='../scenes/basic_scene.xml'/>
 4 |     <include file="../kitchen/assets/counters_asset.xml"/>
 5 | 
 6 |     <worldbody>
 7 | 
 8 |         <body>
 9 | 		    <include file="../kitchen/assets/counters_chain.xml"/>
10 |         </body>
11 | 
12 |     </worldbody>
13 | 
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/hingecabinet.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="hinge cabinet">
 2 |     <compiler angle="radian"/>
 3 |     <include file='../scenes/basic_scene.xml'/>
 4 |     <include file="../kitchen/assets/hingecabinet_asset.xml"/>
 5 | 
 6 |     <worldbody>
 7 | 
 8 |         <body pos="0 0 .25">
 9 | 		    <include file="../kitchen/assets/hingecabinet_chain.xml"/>
10 |         </body>
11 | 
12 |     </worldbody>
13 | 
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/kettle.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="kettle">
 2 |     <compiler angle="radian" meshdir="" texturedir=""/>
 3 |     <include file='../scenes/basic_scene.xml'/>
 4 |     <include file="../kitchen/assets/kettle_asset.xml"/>
 5 | 
 6 |     <worldbody>
 7 | 
 8 |         <body>
 9 | 		    <include file="../kitchen/assets/kettle_chain.xml"/>
10 |         </body>
11 | 
12 |     </worldbody>
13 | 
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate_mesh.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate_mesh.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetbase.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetbase.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetdrawer.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetdrawer.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinethandle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinethandle.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/countertop.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/countertop.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/faucet.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/faucet.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/handle2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/handle2.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingecabinet.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingecabinet.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingedoor.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingedoor.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingehandle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingehandle.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/hood.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hood.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/kettle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/kettle.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/kettlehandle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/kettlehandle.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/knob.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/knob.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitch.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitch.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitchbase.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitchbase.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/micro.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/micro.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/microbutton.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microbutton.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/microdoor.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microdoor.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/microefeet.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microefeet.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/microfeet.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microfeet.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/microhandle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microhandle.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/microwindow.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microwindow.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/oven.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/oven.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenhandle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenhandle.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/oventop.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/oventop.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenwindow.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenwindow.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/slidecabinet.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/slidecabinet.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/slidedoor.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/slidedoor.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/stoverim.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/stoverim.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/tile.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/tile.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/meshes/wall.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/wall.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/microwave.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="microwave">
 2 |     <compiler angle="radian" />
 3 |     <include file='../scenes/basic_scene.xml'/>
 4 |     <include file="../kitchen/assets/microwave_asset.xml"/>
 5 | 
 6 |     <worldbody>
 7 | 
 8 |         <body>
 9 | 		    <include file="../kitchen/assets/microwave_chain.xml"/>
10 |         </body>
11 | 
12 |     </worldbody>
13 | 
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/oven.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="Oven">
 2 |     <compiler angle="radian" meshdir="" texturedir=""/>
 3 |     <include file='../scenes/basic_scene.xml'/>
 4 |     <include file="../kitchen/assets/oven_asset.xml"/>
 5 | 
 6 |     <worldbody>
 7 | 
 8 |         <body>
 9 | 		    <include file="../kitchen/assets/oven_chain.xml"/>
10 |         </body>
11 | 
12 |     </worldbody>
13 | 
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/slidecabinet.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="slide">
 2 |     <compiler meshdir="" texturedir=""/>
 3 |     <include file='../scenes/basic_scene.xml'/>
 4 |     <include file="../kitchen/assets/slidecabinet_asset.xml"/>
 5 | 
 6 |     <worldbody>
 7 | 
 8 |         <body pos="0 0 .25">
 9 | 		    <include file="../kitchen/assets/slidecabinet_chain.xml"/>
10 |         </body>
11 | 
12 |     </worldbody>
13 | 
14 | </mujoco>
15 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/textures/marble1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/marble1.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/textures/metal1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/metal1.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/textures/tile1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/tile1.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/kitchen/textures/wood1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/wood1.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile2.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/README.md:
--------------------------------------------------------------------------------
 1 | # franka
 2 | Franka panda mujoco models
 3 | 
 4 | 
 5 | # Environment
 6 | 
 7 | franka_panda.xml           |  comming soon
 8 | :-------------------------:|:-------------------------:
 9 | ![Alt text](franka_panda.png?raw=false "sawyer") |  comming soon
10 | 


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/franka_panda.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/franka_panda.png


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/finger.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/hand.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link0.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link1.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link2.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link3.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link4.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link5.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link6.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/collision/link7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link7.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/finger.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/finger.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/hand.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/hand.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link0.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link1.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link2.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link3.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link4.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link5.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link6.stl


--------------------------------------------------------------------------------
/third_party/relay-policy-learning/third_party/franka/meshes/visual/link7.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link7.stl


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/tools/__init__.py


--------------------------------------------------------------------------------