├── LICENSE ├── README.md ├── agent ├── dreamer.py ├── dreamer.yaml ├── dreamer_utils.py ├── genrl.py ├── genrl.yaml ├── plan2explore.py ├── plan2explore.yaml └── video_utils.py ├── assets ├── GenRL_cover.gif ├── dashboard.png ├── stickman_run.gif └── video_samples │ ├── a_spider_walking_on_the_floor.mp4 │ ├── backflip.mp4 │ ├── dancing.mp4 │ ├── dead_spider_white.gif │ ├── dog_running_seen_from_the_side.mp4 │ ├── doing_splits.mp4 │ ├── flex.mp4 │ ├── guy_walking.gif │ ├── headstand.mp4 │ ├── karate_kick.mp4 │ ├── lying_down_with_legs_up.mp4 │ ├── open_microwave.gif │ ├── person_standing_up_with_hands_up_seen_from_the_side.mp4 │ ├── punching.mp4 │ └── spider_draw.gif ├── collect_data.py ├── collect_data.yaml ├── conf ├── defaults │ ├── dreamer_v2.yaml │ ├── dreamer_v3.yaml │ └── genrl.yaml ├── env │ └── dmc_pixels.yaml └── train_mode │ ├── train_behavior.yaml │ └── train_model.yaml ├── data └── stickman_example │ └── 1000-20240504T040956-d7ee0ea24b3e4863b1ef5e5bf1849924-501.npz ├── demo ├── app.py ├── demo_test.py └── t2v.py ├── envs ├── __init__.py ├── custom_dmc_tasks │ ├── __init__.py │ ├── cheetah.py │ ├── cheetah.xml │ ├── jaco.py │ ├── quadruped.py │ ├── quadruped.xml │ ├── stickman.py │ ├── stickman.xml │ ├── walker.py │ └── walker.xml ├── kitchen_extra.py └── main.py ├── notebooks ├── demo_videoclip.ipynb ├── text2video.ipynb ├── video2video.ipynb ├── visualize_dataset_episodes.ipynb └── visualize_env.ipynb ├── process_dataset.py ├── process_dataset.yaml ├── requirements.txt ├── test ├── pytest.ini └── test_env.py ├── third_party ├── InternVideo │ ├── .gitignore │ ├── .gitmodules │ ├── Data │ │ ├── InternVid │ │ │ ├── README.md │ │ │ ├── README_CN.md │ │ │ ├── demo.ipynb │ │ │ ├── div_sampling.py │ │ │ ├── example1.mp4 │ │ │ ├── start_annotation_prototype.sh │ │ │ ├── utils │ │ │ │ ├── basic_utils.py │ │ │ │ ├── config.py │ │ │ │ ├── config_utils.py │ │ │ │ ├── distributed.py │ │ │ │ ├── easydict.py │ │ │ │ ├── logger.py │ │ │ │ ├── optimizer.py │ │ │ │ └── scheduler.py │ │ │ └── viclip │ │ │ │ ├── __init__.py │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ ├── simple_tokenizer.py │ │ │ │ ├── viclip.py │ │ │ │ ├── viclip_text.py │ │ │ │ └── viclip_vision.py │ │ └── instruction_data │ │ │ ├── README.md │ │ │ └── assert │ │ │ ├── conversation.png │ │ │ └── detailed_description.png │ ├── InternVideo1 │ │ ├── Downstream │ │ │ ├── Open-Set-Action-Recognition │ │ │ │ ├── .gitignore │ │ │ │ ├── README.md │ │ │ │ ├── configs │ │ │ │ │ ├── localization │ │ │ │ │ │ ├── bmn │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ └── bmn_400x100_2x8_9e_activitynet_feature.py │ │ │ │ │ │ ├── bsn │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── bsn_pem_400x100_1x16_20e_activitynet_feature.py │ │ │ │ │ │ │ ├── bsn_pgm_400x100_activitynet_feature.py │ │ │ │ │ │ │ └── bsn_tem_400x100_1x16_20e_activitynet_feature.py │ │ │ │ │ │ └── ssn │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── ssn_r50_450e_thumos14_rgb_test.py │ │ │ │ │ │ │ └── ssn_r50_450e_thumos14_rgb_train.py │ │ │ │ │ ├── recognition │ │ │ │ │ │ ├── c3d │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ └── c3d_sports1m_16x1x1_45e_ucf101_rgb.py │ │ │ │ │ │ ├── csn │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── finetune_ucf101_csn_dnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_csn_edlnokl_avuc_debias.py │ │ │ │ │ │ │ ├── inference_csn_dnn.py │ │ │ │ │ │ │ ├── inference_csn_enn.py │ │ │ │ │ │ │ ├── ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb.py │ │ │ │ │ │ │ └── ircsn_ig65m_pretrained_r152_32x2x1_58e_kinetics400_rgb.py │ │ │ │ │ │ ├── i3d │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── finetune_ucf101_i3d_bnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_i3d_dnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_i3d_edlnokl.py │ │ │ │ │ │ │ ├── finetune_ucf101_i3d_edlnokl_avuc_debias.py │ │ │ │ │ │ │ ├── finetune_ucf101_i3d_rpl.py │ │ │ │ │ │ │ ├── i3d_nl_dot_product_r50_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_nl_embedded_gaussian_r50_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_nl_gaussian_r50_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_dense_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_heavy_8x8x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_lazy_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_video_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_video_heavy_8x8x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── i3d_r50_video_inference_32x2x1_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── inference_i3d_bnn.py │ │ │ │ │ │ │ ├── inference_i3d_dnn.py │ │ │ │ │ │ │ ├── inference_i3d_enn.py │ │ │ │ │ │ │ ├── inference_i3d_rpl.py │ │ │ │ │ │ │ ├── train_kinetics10_i3d_DEAR.py │ │ │ │ │ │ │ └── train_kinetics10_i3d_DEAR_noDebias.py │ │ │ │ │ │ ├── mae │ │ │ │ │ │ │ ├── finetune_ucf101_mae_edlnokl.py │ │ │ │ │ │ │ └── inference_mae_enn.py │ │ │ │ │ │ ├── omnisource │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── pipeline.png │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_minikinetics │ │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_minikinetics_googleimage_rgb.py │ │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_minikinetics_insvideo_rgb.py │ │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_minikinetics_kineticsraw_rgb.py │ │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_minikinetics_omnisource_rgb.py │ │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_minikinetics_rgb.py │ │ │ │ │ │ │ │ └── slowonly_r50_8x8x1_256e_minikinetics_webimage_rgb.py │ │ │ │ │ │ │ └── tsn_r50_1x1x8_100e_minikinetics │ │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_100e_minikinetics_googleimage_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_100e_minikinetics_insvideo_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_100e_minikinetics_kineticsraw_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_100e_minikinetics_omnisource_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_100e_minikinetics_rgb.py │ │ │ │ │ │ │ │ └── tsn_r50_1x1x8_100e_minikinetics_webimage_rgb.py │ │ │ │ │ │ ├── r2plus1d │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── r2plus1d_r34_32x2x1_180e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── r2plus1d_r34_8x8x1_180e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── r2plus1d_r34_video_8x8x1_180e_kinetics400_rgb.py │ │ │ │ │ │ │ └── r2plus1d_r34_video_inference_8x8x1_180e_kinetics400_rgb.py │ │ │ │ │ │ ├── slowfast │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── finetune_ucf101_slowfast_bnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_slowfast_dnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_slowfast_edlnokl_avuc_debias.py │ │ │ │ │ │ │ ├── finetune_ucf101_slowfast_rpl.py │ │ │ │ │ │ │ ├── inference_slowfast_bnn.py │ │ │ │ │ │ │ ├── inference_slowfast_dnn.py │ │ │ │ │ │ │ ├── inference_slowfast_enn.py │ │ │ │ │ │ │ ├── inference_slowfast_rpl.py │ │ │ │ │ │ │ ├── slowfast_r50_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowfast_r50_8x8x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowfast_r50_video_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ └── slowfast_r50_video_inference_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ ├── slowonly │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── data_benchmark │ │ │ │ │ │ │ │ ├── slowonly_r50_randomresizedcrop_256p_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── slowonly_r50_randomresizedcrop_320p_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ │ └── slowonly_r50_randomresizedcrop_340x256_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_imagenet_pretrained_r50_4x16x1_120e_gym99_rgb.py │ │ │ │ │ │ │ ├── slowonly_imagenet_pretrained_r50_4x16x1_150e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_imagenet_pretrained_r50_8x8x1_150e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_kinetics_pretrained_r50_4x16x1_120e_gym99_flow.py │ │ │ │ │ │ │ ├── slowonly_r101_8x8x1_196e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_r50_4x16x1_256e_kinetics400_flow.py │ │ │ │ │ │ │ ├── slowonly_r50_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_kinetics400_flow.py │ │ │ │ │ │ │ ├── slowonly_r50_8x8x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_r50_video_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── slowonly_r50_video_8x8x1_256e_kinetics600_rgb.py │ │ │ │ │ │ │ ├── slowonly_r50_video_8x8x1_256e_kinetics700_rgb.py │ │ │ │ │ │ │ └── slowonly_r50_video_inference_4x16x1_256e_kinetics400_rgb.py │ │ │ │ │ │ ├── tin │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── tin_r50_1x1x8_40e_sthv1_rgb.py │ │ │ │ │ │ │ ├── tin_r50_1x1x8_40e_sthv2_rgb.py │ │ │ │ │ │ │ └── tin_tsm_finetune_r50_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ ├── tpn │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── finetune_ucf101_tpn_slowonly_rpl.py │ │ │ │ │ │ │ ├── inference_tpn_slowonly_bnn.py │ │ │ │ │ │ │ ├── inference_tpn_slowonly_dnn.py │ │ │ │ │ │ │ ├── inference_tpn_slowonly_enn.py │ │ │ │ │ │ │ ├── inference_tpn_slowonly_rpl.py │ │ │ │ │ │ │ ├── tpn_imagenet_pretrained_slowonly_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_bnn_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_celoss_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_avuc_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_nokl_avuc_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_nokl_avuc_rebias_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_nokl_davuc_debias_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_nokl_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_edlloss_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ ├── tpn_slowonly_r50_8x8x1_150e_kinetics_rgb.py │ │ │ │ │ │ │ └── tpn_tsm_r50_1x1x8_150e_sthv1_rgb.py │ │ │ │ │ │ ├── tsm │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── finetune_ucf101_tsm_bnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_tsm_dnn.py │ │ │ │ │ │ │ ├── finetune_ucf101_tsm_edlnokl_avuc_debias.py │ │ │ │ │ │ │ ├── finetune_ucf101_tsm_rpl.py │ │ │ │ │ │ │ ├── inference_tsm_bnn.py │ │ │ │ │ │ │ ├── inference_tsm_dnn.py │ │ │ │ │ │ │ ├── inference_tsm_enn.py │ │ │ │ │ │ │ ├── inference_tsm_rpl.py │ │ │ │ │ │ │ ├── train_kinetics10_tsm_DEAR.py │ │ │ │ │ │ │ ├── train_kinetics10_tsm_DEAR_noDebias.py │ │ │ │ │ │ │ ├── tsm_nl_dot_product_r50_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_nl_embedded_gaussian_r50_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_nl_gaussian_r50_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_r101_1x1x8_50e_sthv1_rgb.py │ │ │ │ │ │ │ ├── tsm_r101_1x1x8_50e_sthv2_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_1x1x16_50e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_1x1x16_50e_sthv1_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_1x1x16_50e_sthv2_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_1x1x8_50e_sthv1_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_1x1x8_50e_sthv2_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_dense_1x1x8_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_video_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsm_r50_video_inference_1x1x8_100e_kinetics400_rgb.py │ │ │ │ │ │ │ └── tsm_temporal_pool_r50_1x1x8_50e_kinetics400_rgb.py │ │ │ │ │ │ ├── tsn │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── data_benchmark │ │ │ │ │ │ │ │ ├── tsn_r50_multiscalecrop_256p_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_multiscalecrop_320p_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_multiscalecrop_340x256_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_randomresizedcrop_256p_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_randomresizedcrop_320p_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_randomresizedcrop_340x256_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_test_256p_1x1x25_10crop_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_test_256p_1x1x25_3crop_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_test_320p_1x1x25_10crop_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_test_320p_1x1x25_3crop_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r50_test_340x256_1x1x25_10crop_100e_kinetics400_rgb.py │ │ │ │ │ │ │ │ └── tsn_r50_test_340x256_1x1x25_3crop_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── hvu │ │ │ │ │ │ │ │ ├── tsn_r18_1x1x8_100e_hvu_action_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r18_1x1x8_100e_hvu_attribute_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r18_1x1x8_100e_hvu_concept_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r18_1x1x8_100e_hvu_event_rgb.py │ │ │ │ │ │ │ │ ├── tsn_r18_1x1x8_100e_hvu_object_rgb.py │ │ │ │ │ │ │ │ └── tsn_r18_1x1x8_100e_hvu_scene_rgb.py │ │ │ │ │ │ │ ├── tsn_fp16_r50_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r101_1x1x5_50e_mmit_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x16_50e_sthv1_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x16_50e_sthv2_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x3_75e_ucf101_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x6_100e_mit_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_50e_hmdb51_imagenet_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_50e_hmdb51_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_50e_hmdb51_mit_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_50e_sthv1_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_1x1x8_50e_sthv2_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x3_110e_kinetics400_flow.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x8_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x8_110e_kinetics400_flow.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x8_150e_activitynet_clip_flow.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x8_150e_activitynet_video_flow.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x8_50e_activitynet_clip_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_320p_1x1x8_50e_activitynet_video_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_dense_1x1x5_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_dense_1x1x8_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_inference_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_video_1x1x8_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_video_1x1x8_100e_kinetics600_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_video_1x1x8_100e_kinetics700_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_video_320p_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ │ ├── tsn_r50_video_dense_1x1x8_100e_kinetics400_rgb.py │ │ │ │ │ │ │ └── tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py │ │ │ │ │ │ └── x3d │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── x3d_m_16x5x1_facebook_kinetics400_rgb.py │ │ │ │ │ │ │ └── x3d_s_13x6x1_facebook_kinetics400_rgb.py │ │ │ │ │ └── recognition_audio │ │ │ │ │ │ ├── audioonly │ │ │ │ │ │ └── audioonly_r50_64x1x1_100e_kinetics400_audio_feature.py │ │ │ │ │ │ └── resnet │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── tsn_r18_64x1x1_100e_kinetics400_audio_feature.py │ │ │ │ │ │ └── tsn_r50_64x1x1_100e_kinetics400_audio.py │ │ │ │ ├── docker │ │ │ │ │ └── Dockerfile │ │ │ │ ├── docs │ │ │ │ │ ├── Makefile │ │ │ │ │ ├── api.rst │ │ │ │ │ ├── benchmark.md │ │ │ │ │ ├── changelog.md │ │ │ │ │ ├── conf.py │ │ │ │ │ ├── config.md │ │ │ │ │ ├── data_preparation.md │ │ │ │ │ ├── faq.md │ │ │ │ │ ├── getting_started.md │ │ │ │ │ ├── imgs │ │ │ │ │ │ ├── acc_curve.png │ │ │ │ │ │ ├── data_pipeline.png │ │ │ │ │ │ ├── mmaction2_logo.png │ │ │ │ │ │ └── mmaction2_overview.gif │ │ │ │ │ ├── index.rst │ │ │ │ │ ├── install.md │ │ │ │ │ ├── make.bat │ │ │ │ │ ├── merge_docs.sh │ │ │ │ │ ├── stat.py │ │ │ │ │ ├── supported_datasets.md │ │ │ │ │ ├── tutorials │ │ │ │ │ │ ├── customize_runtime.md │ │ │ │ │ │ ├── data_pipeline.md │ │ │ │ │ │ ├── export_model.md │ │ │ │ │ │ ├── finetune.md │ │ │ │ │ │ ├── new_dataset.md │ │ │ │ │ │ └── new_modules.md │ │ │ │ │ └── useful_tools.md │ │ │ │ ├── experiments │ │ │ │ │ ├── analyze_features.py │ │ │ │ │ ├── baseline_openmax.py │ │ │ │ │ ├── baseline_rpl.py │ │ │ │ │ ├── baseline_softmax.py │ │ │ │ │ ├── compare_openness.py │ │ │ │ │ ├── compare_openness_new.py │ │ │ │ │ ├── csn │ │ │ │ │ │ ├── baseline_csn_openmax.sh │ │ │ │ │ │ ├── baseline_csn_softmax.sh │ │ │ │ │ │ ├── evaluate_csn_dnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_csn_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── finetune_csn_dnn_ucf101.sh │ │ │ │ │ │ ├── finetune_csn_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── run_get_threshold.sh │ │ │ │ │ │ ├── run_ood_detection.sh │ │ │ │ │ │ ├── run_openness.sh │ │ │ │ │ │ └── run_reliability_evaluation.sh │ │ │ │ │ ├── demo.py │ │ │ │ │ ├── draw_confusion_matrix.py │ │ │ │ │ ├── draw_fig7cd.py │ │ │ │ │ ├── draw_ood_hist.py │ │ │ │ │ ├── draw_openness_curves.py │ │ │ │ │ ├── draw_performance_gain.py │ │ │ │ │ ├── eval_debias.py │ │ │ │ │ ├── evaluate_calibration.py │ │ │ │ │ ├── get_threshold.py │ │ │ │ │ ├── get_threshold_dist.py │ │ │ │ │ ├── i3d │ │ │ │ │ │ ├── baseline_i3d_openmax.sh │ │ │ │ │ │ ├── baseline_i3d_rpl.sh │ │ │ │ │ │ ├── baseline_i3d_softmax.sh │ │ │ │ │ │ ├── evaluate_i3d_bnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_i3d_dnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_i3d_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── evaluate_i3d_edlnokl_ucf101.sh │ │ │ │ │ │ ├── evaluate_i3d_rpl_ucf101.sh │ │ │ │ │ │ ├── finetune_i3d_bnn_ucf101.sh │ │ │ │ │ │ ├── finetune_i3d_dnn_ucf101.sh │ │ │ │ │ │ ├── finetune_i3d_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── finetune_i3d_edlnokl_ucf101.sh │ │ │ │ │ │ ├── finetune_i3d_rpl_ucf101.sh │ │ │ │ │ │ ├── run_draw_confmat.sh │ │ │ │ │ │ ├── run_get_threshold.sh │ │ │ │ │ │ ├── run_ood_detection.sh │ │ │ │ │ │ ├── run_openness.sh │ │ │ │ │ │ ├── run_openness_new.sh │ │ │ │ │ │ ├── run_reliability_evaluation.sh │ │ │ │ │ │ ├── train_i3d_DEAR_kinetics10.sh │ │ │ │ │ │ └── train_i3d_DEAR_noDebias_kinetics10.sh │ │ │ │ │ ├── libMR │ │ │ │ │ │ ├── COPYRIGHT_Libmr.txt │ │ │ │ │ │ ├── Makefile │ │ │ │ │ │ ├── MetaRecognition.cpp │ │ │ │ │ │ ├── MetaRecognition.h │ │ │ │ │ │ ├── build_libmr_python.sh │ │ │ │ │ │ ├── compile.sh │ │ │ │ │ │ ├── estimate_wscores.py │ │ │ │ │ │ ├── libmr.c │ │ │ │ │ │ ├── libmr.cpp │ │ │ │ │ │ ├── libmr.pxd │ │ │ │ │ │ ├── libmr.pyx │ │ │ │ │ │ ├── malloc.h │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ ├── test_libmr.py │ │ │ │ │ │ ├── weibull.c │ │ │ │ │ │ └── weibull.h │ │ │ │ │ ├── mae │ │ │ │ │ │ ├── finetune_mae_edlnokl_ucf101.sh │ │ │ │ │ │ ├── run_get_mae_threshold.sh │ │ │ │ │ │ └── run_ood_mae_dist_detection.sh │ │ │ │ │ ├── ood_detection.py │ │ │ │ │ ├── ood_detection_dist.py │ │ │ │ │ ├── slowfast │ │ │ │ │ │ ├── baseline_slowfast_openmax.sh │ │ │ │ │ │ ├── baseline_slowfast_rpl.sh │ │ │ │ │ │ ├── baseline_slowfast_softmax.sh │ │ │ │ │ │ ├── evaluate_slowfast_bnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_slowfast_dnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_slowfast_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── evaluate_slowfast_rpl_ucf101.sh │ │ │ │ │ │ ├── finetune_slowfast_bnn_ucf101.sh │ │ │ │ │ │ ├── finetune_slowfast_dnn_ucf101.sh │ │ │ │ │ │ ├── finetune_slowfast_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── finetune_slowfast_rpl_ucf101.sh │ │ │ │ │ │ ├── run_draw_confmat.sh │ │ │ │ │ │ ├── run_get_threshold.sh │ │ │ │ │ │ ├── run_ood_detection.sh │ │ │ │ │ │ ├── run_openness.sh │ │ │ │ │ │ ├── run_openness_new.sh │ │ │ │ │ │ └── run_reliability_evaluation.sh │ │ │ │ │ ├── tpn_slowonly │ │ │ │ │ │ ├── analyze_features.sh │ │ │ │ │ │ ├── baseline_tpn_openmax.sh │ │ │ │ │ │ ├── baseline_tpn_rpl.sh │ │ │ │ │ │ ├── baseline_tpn_softmax.sh │ │ │ │ │ │ ├── evaluate_tpn_bnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_celoss_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_edlloss_avuc_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_edlloss_nokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_edlloss_nokl_avuc_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_edlloss_nokl_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_edlloss_ucf101.sh │ │ │ │ │ │ ├── evaluate_tpn_rpl_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_bnn_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_celoss_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_edlloss_avuc_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_edlloss_nokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_edlloss_nokl_avuc_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_edlloss_nokl_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_edlloss_ucf101.sh │ │ │ │ │ │ ├── finetune_tpn_rpl_ucf101.sh │ │ │ │ │ │ ├── gradcam_demo.sh │ │ │ │ │ │ ├── run_draw_confmat.sh │ │ │ │ │ │ ├── run_get_threshold.sh │ │ │ │ │ │ ├── run_ood_detection.sh │ │ │ │ │ │ ├── run_openness.sh │ │ │ │ │ │ ├── run_openness_new.sh │ │ │ │ │ │ ├── run_reliability_evaluation.sh │ │ │ │ │ │ ├── searchw_evaluate.sh │ │ │ │ │ │ └── searchw_finetune.sh │ │ │ │ │ └── tsm │ │ │ │ │ │ ├── baseline_tsm_openmax.sh │ │ │ │ │ │ ├── baseline_tsm_rpl.sh │ │ │ │ │ │ ├── baseline_tsm_softmax.sh │ │ │ │ │ │ ├── demo_tsm_debias.sh │ │ │ │ │ │ ├── evaluate_tsm_DEAR.sh │ │ │ │ │ │ ├── evaluate_tsm_DEAR_noDebias.sh │ │ │ │ │ │ ├── evaluate_tsm_bnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_tsm_dnn_ucf101.sh │ │ │ │ │ │ ├── evaluate_tsm_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── evaluate_tsm_rpl_ucf101.sh │ │ │ │ │ │ ├── finetune_tsm_bnn_ucf101.sh │ │ │ │ │ │ ├── finetune_tsm_dnn_ucf101.sh │ │ │ │ │ │ ├── finetune_tsm_edlnokl_avuc_debias_ucf101.sh │ │ │ │ │ │ ├── finetune_tsm_rpl_ucf101.sh │ │ │ │ │ │ ├── run_draw_confmat.sh │ │ │ │ │ │ ├── run_get_threshold.sh │ │ │ │ │ │ ├── run_ood_detection.sh │ │ │ │ │ │ ├── run_openness.sh │ │ │ │ │ │ ├── run_openness_new.sh │ │ │ │ │ │ ├── run_reliability_evaluation.sh │ │ │ │ │ │ ├── train_tsm_DEAR_kinetics10.sh │ │ │ │ │ │ └── train_tsm_DEAR_noDebias_kinetics10.sh │ │ │ │ ├── mmaction │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── apis │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── inference.py │ │ │ │ │ │ ├── test.py │ │ │ │ │ │ └── train.py │ │ │ │ │ ├── core │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── evaluation │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── accuracy.py │ │ │ │ │ │ │ ├── eval_detection.py │ │ │ │ │ │ │ └── eval_hooks.py │ │ │ │ │ │ ├── lr │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ └── tin_lr_hook.py │ │ │ │ │ │ ├── optimizer │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── copy_of_sgd.py │ │ │ │ │ │ │ └── tsm_optimizer_constructor.py │ │ │ │ │ │ └── runner │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── annealing_runner.py │ │ │ │ │ │ │ └── omnisource_runner.py │ │ │ │ │ ├── datasets │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activitynet_dataset.py │ │ │ │ │ │ ├── audio_dataset.py │ │ │ │ │ │ ├── audio_feature_dataset.py │ │ │ │ │ │ ├── audio_visual_dataset.py │ │ │ │ │ │ ├── ava_dataset.py │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ ├── dataset_wrappers.py │ │ │ │ │ │ ├── hvu_dataset.py │ │ │ │ │ │ ├── image_dataset.py │ │ │ │ │ │ ├── pipelines │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── augmentations.py │ │ │ │ │ │ │ ├── compose.py │ │ │ │ │ │ │ ├── formating.py │ │ │ │ │ │ │ └── loading.py │ │ │ │ │ │ ├── rawframe_dataset.py │ │ │ │ │ │ ├── rawvideo_dataset.py │ │ │ │ │ │ ├── registry.py │ │ │ │ │ │ ├── samplers │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ └── distributed_sampler.py │ │ │ │ │ │ ├── ssn_dataset.py │ │ │ │ │ │ └── video_dataset.py │ │ │ │ │ ├── localization │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bsn_utils.py │ │ │ │ │ │ ├── proposal_utils.py │ │ │ │ │ │ └── ssn_utils.py │ │ │ │ │ ├── models │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── backbones │ │ │ │ │ │ │ ├── ViT3D.py │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── c3d.py │ │ │ │ │ │ │ ├── resnet.py │ │ │ │ │ │ │ ├── resnet2plus1d.py │ │ │ │ │ │ │ ├── resnet3d.py │ │ │ │ │ │ │ ├── resnet3d_csn.py │ │ │ │ │ │ │ ├── resnet3d_slowfast.py │ │ │ │ │ │ │ ├── resnet3d_slowonly.py │ │ │ │ │ │ │ ├── resnet_audio.py │ │ │ │ │ │ │ ├── resnet_tin.py │ │ │ │ │ │ │ ├── resnet_tsm.py │ │ │ │ │ │ │ └── x3d.py │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ ├── common │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── conv2plus1d.py │ │ │ │ │ │ │ └── conv_audio.py │ │ │ │ │ │ ├── heads │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── audio_tsn_head.py │ │ │ │ │ │ │ ├── aux_head.py │ │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ │ ├── base_cls_head.py │ │ │ │ │ │ │ ├── bnn.py │ │ │ │ │ │ │ ├── debias_head.py │ │ │ │ │ │ │ ├── i3d_bnn_head.py │ │ │ │ │ │ │ ├── i3d_head.py │ │ │ │ │ │ │ ├── i3d_rpl_head.py │ │ │ │ │ │ │ ├── rebias_head.py │ │ │ │ │ │ │ ├── rpl_dist.py │ │ │ │ │ │ │ ├── slowfast_bnn_head.py │ │ │ │ │ │ │ ├── slowfast_head.py │ │ │ │ │ │ │ ├── slowfast_rpl_head.py │ │ │ │ │ │ │ ├── ssn_head.py │ │ │ │ │ │ │ ├── tpn_bnn_head.py │ │ │ │ │ │ │ ├── tpn_head.py │ │ │ │ │ │ │ ├── tpn_rpl_head.py │ │ │ │ │ │ │ ├── tsm_bnn_head.py │ │ │ │ │ │ │ ├── tsm_head.py │ │ │ │ │ │ │ ├── tsm_rpl_head.py │ │ │ │ │ │ │ ├── tsn_head.py │ │ │ │ │ │ │ └── x3d_head.py │ │ │ │ │ │ ├── localizers │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ │ ├── bmn.py │ │ │ │ │ │ │ ├── bsn.py │ │ │ │ │ │ │ ├── ssn.py │ │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ └── post_processing.py │ │ │ │ │ │ ├── losses │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ │ ├── binary_logistic_regression_loss.py │ │ │ │ │ │ │ ├── bmn_loss.py │ │ │ │ │ │ │ ├── bnn_loss.py │ │ │ │ │ │ │ ├── cross_entropy_loss.py │ │ │ │ │ │ │ ├── edl_loss.py │ │ │ │ │ │ │ ├── gcp_loss.py │ │ │ │ │ │ │ ├── hvu_loss.py │ │ │ │ │ │ │ ├── nll_loss.py │ │ │ │ │ │ │ ├── ohem_hinge_loss.py │ │ │ │ │ │ │ ├── rebias_loss.py │ │ │ │ │ │ │ ├── rpl_loss.py │ │ │ │ │ │ │ └── ssn_loss.py │ │ │ │ │ │ ├── necks │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ └── tpn.py │ │ │ │ │ │ ├── recognizers │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── audio_recognizer.py │ │ │ │ │ │ │ ├── base.py │ │ │ │ │ │ │ ├── recognizer2d.py │ │ │ │ │ │ │ ├── recognizer2d_bnn.py │ │ │ │ │ │ │ ├── recognizer2d_rpl.py │ │ │ │ │ │ │ ├── recognizer3d.py │ │ │ │ │ │ │ ├── recognizer3d_bnn.py │ │ │ │ │ │ │ └── recognizer3d_rpl.py │ │ │ │ │ │ └── registry.py │ │ │ │ │ ├── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── collect_env.py │ │ │ │ │ │ ├── gradcam_utils.py │ │ │ │ │ │ ├── logger.py │ │ │ │ │ │ └── misc.py │ │ │ │ │ └── version.py │ │ │ │ ├── requirements.txt │ │ │ │ ├── requirements │ │ │ │ │ ├── build.txt │ │ │ │ │ ├── docs.txt │ │ │ │ │ ├── optional.txt │ │ │ │ │ ├── readthedocs.txt │ │ │ │ │ ├── runtime.txt │ │ │ │ │ └── tests.txt │ │ │ │ ├── setup.cfg │ │ │ │ ├── setup.py │ │ │ │ ├── tests │ │ │ │ │ ├── data │ │ │ │ │ │ ├── action_test_anno.json │ │ │ │ │ │ ├── audio_feature_test_list.txt │ │ │ │ │ │ ├── audio_test_list.txt │ │ │ │ │ │ ├── frame_test_list.txt │ │ │ │ │ │ ├── frame_test_list_multi_label.txt │ │ │ │ │ │ ├── frame_test_list_with_offset.txt │ │ │ │ │ │ ├── hvu_frame_test_anno.json │ │ │ │ │ │ ├── hvu_video_eval_test_anno.json │ │ │ │ │ │ ├── hvu_video_test_anno.json │ │ │ │ │ │ ├── proposal_normalized_list.txt │ │ │ │ │ │ ├── proposal_test_list.txt │ │ │ │ │ │ ├── rawvideo_test_anno.json │ │ │ │ │ │ ├── rawvideo_test_anno.txt │ │ │ │ │ │ ├── test.jpg │ │ │ │ │ │ ├── test.mp4 │ │ │ │ │ │ ├── test.wav │ │ │ │ │ │ ├── test_activitynet_features │ │ │ │ │ │ │ ├── v_test1.csv │ │ │ │ │ │ │ └── v_test2.csv │ │ │ │ │ │ ├── test_ava_dataset │ │ │ │ │ │ │ ├── ava_excluded_timestamps_sample.csv │ │ │ │ │ │ │ └── ava_sample.csv │ │ │ │ │ │ ├── test_bsp_features │ │ │ │ │ │ │ └── v_test1.npy │ │ │ │ │ │ ├── test_eval_detection │ │ │ │ │ │ │ ├── gt.json │ │ │ │ │ │ │ └── result.json │ │ │ │ │ │ ├── test_imgs │ │ │ │ │ │ │ ├── img_00001.jpg │ │ │ │ │ │ │ ├── img_00002.jpg │ │ │ │ │ │ │ ├── img_00003.jpg │ │ │ │ │ │ │ ├── img_00004.jpg │ │ │ │ │ │ │ ├── img_00005.jpg │ │ │ │ │ │ │ ├── img_00006.jpg │ │ │ │ │ │ │ ├── img_00007.jpg │ │ │ │ │ │ │ ├── img_00008.jpg │ │ │ │ │ │ │ ├── img_00009.jpg │ │ │ │ │ │ │ ├── img_00010.jpg │ │ │ │ │ │ │ ├── x_00001.jpg │ │ │ │ │ │ │ ├── x_00002.jpg │ │ │ │ │ │ │ ├── x_00003.jpg │ │ │ │ │ │ │ ├── x_00004.jpg │ │ │ │ │ │ │ ├── x_00005.jpg │ │ │ │ │ │ │ ├── y_00001.jpg │ │ │ │ │ │ │ ├── y_00002.jpg │ │ │ │ │ │ │ ├── y_00003.jpg │ │ │ │ │ │ │ ├── y_00004.jpg │ │ │ │ │ │ │ └── y_00005.jpg │ │ │ │ │ │ ├── test_proposals │ │ │ │ │ │ │ ├── v_test1.csv │ │ │ │ │ │ │ └── v_test2.csv │ │ │ │ │ │ ├── test_rawvideo_dataset │ │ │ │ │ │ │ ├── part_0.mp4 │ │ │ │ │ │ │ └── part_1.mp4 │ │ │ │ │ │ ├── test_tem_results │ │ │ │ │ │ │ ├── v_test1.csv │ │ │ │ │ │ │ └── v_test2.csv │ │ │ │ │ │ └── video_test_list.txt │ │ │ │ │ ├── test_accuracy.py │ │ │ │ │ ├── test_data │ │ │ │ │ │ ├── test_augmentations.py │ │ │ │ │ │ ├── test_ava_dataset.py │ │ │ │ │ │ ├── test_compose.py │ │ │ │ │ │ ├── test_dataset.py │ │ │ │ │ │ ├── test_formating.py │ │ │ │ │ │ └── test_loading.py │ │ │ │ │ ├── test_gradcam.py │ │ │ │ │ ├── test_localization_utils.py │ │ │ │ │ ├── test_loss.py │ │ │ │ │ ├── test_models │ │ │ │ │ │ ├── test_backbone.py │ │ │ │ │ │ ├── test_common_modules.py │ │ │ │ │ │ ├── test_head.py │ │ │ │ │ │ ├── test_inference.py │ │ │ │ │ │ ├── test_localizers.py │ │ │ │ │ │ ├── test_neck.py │ │ │ │ │ │ └── test_recognizers.py │ │ │ │ │ └── test_runtime │ │ │ │ │ │ ├── test_apis_test.py │ │ │ │ │ │ ├── test_config.py │ │ │ │ │ │ ├── test_eval_hook.py │ │ │ │ │ │ ├── test_lr.py │ │ │ │ │ │ ├── test_onnx.py │ │ │ │ │ │ ├── test_optimizer.py │ │ │ │ │ │ └── test_train.py │ │ │ │ └── tools │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── analysis │ │ │ │ │ ├── analyze_logs.py │ │ │ │ │ ├── bench_processing.py │ │ │ │ │ ├── benchmark.py │ │ │ │ │ ├── eval_metric.py │ │ │ │ │ ├── get_flops.py │ │ │ │ │ ├── print_config.py │ │ │ │ │ ├── report_accuracy.py │ │ │ │ │ └── report_map.py │ │ │ │ │ ├── argparse.bash │ │ │ │ │ ├── bsn_proposal_generation.py │ │ │ │ │ ├── data │ │ │ │ │ ├── activitynet │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── action_name.csv │ │ │ │ │ │ ├── activitynet_feature_postprocessing.py │ │ │ │ │ │ ├── convert_proposal_format.py │ │ │ │ │ │ ├── download.py │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_features.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── environment.yml │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.py │ │ │ │ │ │ ├── process_annotations.py │ │ │ │ │ │ └── tsn_feature_extraction.py │ │ │ │ │ ├── anno_txt2json.py │ │ │ │ │ ├── ava │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── cut_videos.sh │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── download_videos_gnu_parallel.sh │ │ │ │ │ │ ├── download_videos_parallel.py │ │ │ │ │ │ ├── download_videos_parallel.sh │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_ffmpeg.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ └── fetch_ava_proposals.sh │ │ │ │ │ ├── build_audio_features.py │ │ │ │ │ ├── build_file_list.py │ │ │ │ │ ├── build_rawframes.py │ │ │ │ │ ├── build_videos.py │ │ │ │ │ ├── data_check.py │ │ │ │ │ ├── denormalize_proposal_file.py │ │ │ │ │ ├── extract_audio.py │ │ │ │ │ ├── gym │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── download.py │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── download_ytdl.py │ │ │ │ │ │ ├── environment.yml │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── generate_file_list.py │ │ │ │ │ │ ├── trim_event.py │ │ │ │ │ │ └── trim_subaction.py │ │ │ │ │ ├── hmdb51 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ ├── hvu │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── download.py │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── environment.yml │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── generate_file_list.py │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ ├── generate_sub_file_list.py │ │ │ │ │ │ ├── generate_videos_filelist.sh │ │ │ │ │ │ └── parse_tag_list.py │ │ │ │ │ ├── jester │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── encode_videos.sh │ │ │ │ │ │ ├── extract_flow.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ ├── jhmdb │ │ │ │ │ │ └── README.md │ │ │ │ │ ├── kinetics │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── check_data.sh │ │ │ │ │ │ ├── download.py │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_subset.py │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── download_videos_subset.sh │ │ │ │ │ │ ├── environment.yml │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ ├── generate_videos_filelist.sh │ │ │ │ │ │ ├── rename_classnames.sh │ │ │ │ │ │ └── subset_list.txt │ │ │ │ │ ├── mimetics │ │ │ │ │ │ ├── build_file_list.py │ │ │ │ │ │ ├── check_data.sh │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos_subset.sh │ │ │ │ │ │ ├── generate_videos_filelist.sh │ │ │ │ │ │ └── rename_classnames.sh │ │ │ │ │ ├── mit │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ ├── fix_video_filelist.py │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ ├── mmit │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── download_data.sh │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ ├── omnisource │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ └── trim_raw_video.py │ │ │ │ │ ├── parse_file_list.py │ │ │ │ │ ├── resize_video.py │ │ │ │ │ ├── sthv1 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── encode_videos.sh │ │ │ │ │ │ ├── extract_flow.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ ├── sthv2 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ ├── thumos14 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── denormalize_proposal_file.sh │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ └── fetch_tag_proposals.sh │ │ │ │ │ ├── ucf101 │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── download_annotations.sh │ │ │ │ │ │ ├── download_videos.sh │ │ │ │ │ │ ├── extract_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames.sh │ │ │ │ │ │ ├── extract_rgb_frames_opencv.sh │ │ │ │ │ │ ├── generate_rawframes_filelist.sh │ │ │ │ │ │ └── generate_videos_filelist.sh │ │ │ │ │ └── ucf101_24 │ │ │ │ │ │ └── README.md │ │ │ │ │ ├── dist_test.sh │ │ │ │ │ ├── dist_train.sh │ │ │ │ │ ├── flow_extraction.py │ │ │ │ │ ├── hypertune.py │ │ │ │ │ ├── publish_model.py │ │ │ │ │ ├── pytorch2onnx.py │ │ │ │ │ ├── slurm_test.sh │ │ │ │ │ ├── slurm_train │ │ │ │ │ ├── slurm_train.sh │ │ │ │ │ ├── test.py │ │ │ │ │ └── train.py │ │ │ ├── Spatial-Temporal-Action-Localization │ │ │ │ ├── .DS_Store │ │ │ │ ├── FINETUNE.md │ │ │ │ ├── README.md │ │ │ │ ├── akeval.sh │ │ │ │ ├── akfinetune.sh │ │ │ │ ├── alphaction │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── defaults.py │ │ │ │ │ │ └── paths_catalog.py │ │ │ │ │ ├── csrc │ │ │ │ │ │ ├── ROIAlign3d.h │ │ │ │ │ │ ├── ROIPool3d.h │ │ │ │ │ │ ├── SigmoidFocalLoss.h │ │ │ │ │ │ ├── SoftmaxFocalLoss.h │ │ │ │ │ │ ├── cpu │ │ │ │ │ │ │ └── vision.h │ │ │ │ │ │ ├── cuda │ │ │ │ │ │ │ ├── ROIAlign3d_cuda.cu │ │ │ │ │ │ │ ├── ROIPool3d_cuda.cu │ │ │ │ │ │ │ ├── SigmoidFocalLoss_cuda.cu │ │ │ │ │ │ │ ├── SoftmaxFocalLoss_cuda.cu │ │ │ │ │ │ │ └── vision.h │ │ │ │ │ │ └── vision.cpp │ │ │ │ │ ├── dataset │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── collate_batch.py │ │ │ │ │ │ ├── datasets │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── ava.py │ │ │ │ │ │ │ ├── concat_dataset.py │ │ │ │ │ │ │ └── evaluation │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ └── ava │ │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── ava_eval.py │ │ │ │ │ │ │ │ └── pascal_evaluation │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── label_map_util.py │ │ │ │ │ │ │ │ ├── metrics.py │ │ │ │ │ │ │ │ ├── np_box_list.py │ │ │ │ │ │ │ │ ├── np_box_list_ops.py │ │ │ │ │ │ │ │ ├── np_box_mask_list.py │ │ │ │ │ │ │ │ ├── np_box_mask_list_ops.py │ │ │ │ │ │ │ │ ├── np_box_ops.py │ │ │ │ │ │ │ │ ├── np_mask_ops.py │ │ │ │ │ │ │ │ ├── object_detection_evaluation.py │ │ │ │ │ │ │ │ ├── per_image_evaluation.py │ │ │ │ │ │ │ │ └── standard_fields.py │ │ │ │ │ │ ├── samplers │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── distributed.py │ │ │ │ │ │ │ ├── grouped_batch_sampler.py │ │ │ │ │ │ │ └── iteration_based_batch_sampler.py │ │ │ │ │ │ └── transforms │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ │ ├── object_transforms.py │ │ │ │ │ │ │ └── video_transforms.py │ │ │ │ │ ├── engine │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── inference.py │ │ │ │ │ │ └── trainer.py │ │ │ │ │ ├── layers │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── batch_norm.py │ │ │ │ │ │ ├── roi_align_3d.py │ │ │ │ │ │ ├── roi_pool_3d.py │ │ │ │ │ │ ├── sigmoid_focal_loss.py │ │ │ │ │ │ └── softmax_focal_loss.py │ │ │ │ │ ├── modeling │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── backbone │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── backbone.py │ │ │ │ │ │ │ ├── i3d.py │ │ │ │ │ │ │ └── slowfast.py │ │ │ │ │ │ ├── common_blocks.py │ │ │ │ │ │ ├── detector │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ └── action_detector.py │ │ │ │ │ │ ├── nonlocal_block.py │ │ │ │ │ │ ├── poolers.py │ │ │ │ │ │ ├── registry.py │ │ │ │ │ │ ├── roi_heads │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── action_head │ │ │ │ │ │ │ │ ├── IA_structure.py │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── action_head.py │ │ │ │ │ │ │ │ ├── inference.py │ │ │ │ │ │ │ │ ├── loss.py │ │ │ │ │ │ │ │ ├── metric.py │ │ │ │ │ │ │ │ ├── roi_action_feature_extractor.py │ │ │ │ │ │ │ │ └── roi_action_predictors.py │ │ │ │ │ │ │ └── roi_heads_3d.py │ │ │ │ │ │ └── utils.py │ │ │ │ │ ├── solver │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ └── lr_scheduler.py │ │ │ │ │ ├── structures │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bounding_box.py │ │ │ │ │ │ └── memory_pool.py │ │ │ │ │ └── utils │ │ │ │ │ │ ├── IA_helper.py │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── c2_model_loading.py │ │ │ │ │ │ ├── comm.py │ │ │ │ │ │ ├── logger.py │ │ │ │ │ │ ├── metric_logger.py │ │ │ │ │ │ ├── model_serialization.py │ │ │ │ │ │ ├── random_seed.py │ │ │ │ │ │ ├── registry.py │ │ │ │ │ │ └── video_decode.py │ │ │ │ ├── avaeval.sh │ │ │ │ ├── avalarge.sh │ │ │ │ ├── avalarge2.sh │ │ │ │ ├── avatest.sh │ │ │ │ ├── avatrain.sh │ │ │ │ ├── data │ │ │ │ │ ├── RandomAugmentBBox.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── augmentations.py │ │ │ │ │ ├── ava.py │ │ │ │ │ ├── ava_eval.py │ │ │ │ │ ├── movie_size.npy │ │ │ │ │ ├── rand_aug.py │ │ │ │ │ ├── spatial_transforms.py │ │ │ │ │ └── transforms.py │ │ │ │ ├── datasets.py │ │ │ │ ├── engine_for_finetuning.py │ │ │ │ ├── engine_for_pretraining.py │ │ │ │ ├── figs │ │ │ │ │ └── videomae.png │ │ │ │ ├── functional.py │ │ │ │ ├── kinetics.py │ │ │ │ ├── masking_generator.py │ │ │ │ ├── modeling_finetune.py │ │ │ │ ├── modeling_pretrain.py │ │ │ │ ├── optim_factory.py │ │ │ │ ├── rand_augment.py │ │ │ │ ├── random_erasing.py │ │ │ │ ├── run_class_finetuning.py │ │ │ │ ├── run_mae_pretraining.py │ │ │ │ ├── run_videomae_vis.py │ │ │ │ ├── start.sh │ │ │ │ ├── train.sh │ │ │ │ ├── transforms.py │ │ │ │ ├── utils.py │ │ │ │ ├── v100_config.json │ │ │ │ ├── video_transforms.py │ │ │ │ ├── vis.sh │ │ │ │ └── volume_transforms.py │ │ │ ├── Temporal-Action-Localization │ │ │ │ ├── INSTALL.md │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── anet_run.sh │ │ │ │ ├── configs │ │ │ │ │ ├── anet.yaml │ │ │ │ │ ├── fineaction.yaml │ │ │ │ │ ├── hacs.yaml │ │ │ │ │ └── thumos.yaml │ │ │ │ ├── fa_run.sh │ │ │ │ ├── hacs_run.sh │ │ │ │ ├── libs │ │ │ │ │ ├── core │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── config.py │ │ │ │ │ ├── datasets │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── anet.py │ │ │ │ │ │ ├── data_utils.py │ │ │ │ │ │ ├── datasets.py │ │ │ │ │ │ └── thumos14.py │ │ │ │ │ ├── modeling │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── backbones.py │ │ │ │ │ │ ├── blocks.py │ │ │ │ │ │ ├── loc_generators.py │ │ │ │ │ │ ├── losses.py │ │ │ │ │ │ ├── meta_archs.py │ │ │ │ │ │ ├── models.py │ │ │ │ │ │ ├── necks.py │ │ │ │ │ │ └── weight_init.py │ │ │ │ │ └── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── csrc │ │ │ │ │ │ └── nms_cpu.cpp │ │ │ │ │ │ ├── lr_schedulers.py │ │ │ │ │ │ ├── metrics.py │ │ │ │ │ │ ├── nms.py │ │ │ │ │ │ ├── postprocessing.py │ │ │ │ │ │ ├── setup.py │ │ │ │ │ │ └── train_utils.py │ │ │ │ ├── th14_run.sh │ │ │ │ ├── tools │ │ │ │ │ └── run_all_exps.sh │ │ │ │ └── train_eval.py │ │ │ ├── Video-Text-Retrieval │ │ │ │ ├── LICENSE │ │ │ │ ├── README.md │ │ │ │ ├── dataloaders │ │ │ │ │ ├── data_dataloaders.py │ │ │ │ │ ├── dataloader_activitynet_retrieval.py │ │ │ │ │ ├── dataloader_didemo_retrieval.py │ │ │ │ │ ├── dataloader_lsmdc_retrieval.py │ │ │ │ │ ├── dataloader_msrvtt_retrieval.py │ │ │ │ │ ├── dataloader_msvd_retrieval.py │ │ │ │ │ ├── dataloader_vatex_retrieval.py │ │ │ │ │ └── rawvideo_util.py │ │ │ │ ├── eval_finetuned_scripts │ │ │ │ │ ├── run_kc4_activitynet_infer.sh │ │ │ │ │ ├── run_kc4_didemo_infer.sh │ │ │ │ │ ├── run_kc4_lsmdc_infer.sh │ │ │ │ │ ├── run_kc4_msrvtt_infer.sh │ │ │ │ │ ├── run_kc4_msvd_infer.sh │ │ │ │ │ └── run_kc4_vatex_infer.sh │ │ │ │ ├── finetune_scripts │ │ │ │ │ ├── ret_activity.sh │ │ │ │ │ ├── ret_didemo.sh │ │ │ │ │ ├── ret_lsmdc.sh │ │ │ │ │ ├── ret_msrvtt.sh │ │ │ │ │ ├── ret_msvd.sh │ │ │ │ │ ├── ret_vatex.sh │ │ │ │ │ ├── train_activitynet.sh │ │ │ │ │ ├── train_didemo.sh │ │ │ │ │ ├── train_lsmdc.sh │ │ │ │ │ ├── train_msrvtt.sh │ │ │ │ │ ├── train_msvd.sh │ │ │ │ │ └── train_vatex.sh │ │ │ │ ├── inference.py │ │ │ │ ├── main_task_retrieval.py │ │ │ │ ├── metrics.py │ │ │ │ ├── modules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ ├── clip_evl │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ ├── dog.png │ │ │ │ │ │ ├── evl_utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ ├── attention_bias.py │ │ │ │ │ │ │ ├── attention_module.py │ │ │ │ │ │ │ ├── attention_module_bias.py │ │ │ │ │ │ │ ├── clip_vit.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d_dw_bias.py │ │ │ │ │ │ │ ├── clip_vit_fusion.py │ │ │ │ │ │ │ ├── clip_vit_only_global.py │ │ │ │ │ │ │ ├── evl_module.py │ │ │ │ │ │ │ └── evl_module_uniformer_diff_conv_balance.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ ├── model_freeze.py │ │ │ │ │ │ ├── model_no_freeze.py │ │ │ │ │ │ ├── model_no_freeze_diff.py │ │ │ │ │ │ ├── model_no_freeze_only_global.py │ │ │ │ │ │ ├── model_no_freeze_uniformer.py │ │ │ │ │ │ └── simple_tokenizer.py │ │ │ │ │ ├── clip_kc │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ ├── evl_utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ ├── attention_bias.py │ │ │ │ │ │ │ ├── attention_module.py │ │ │ │ │ │ │ ├── attention_module_bias.py │ │ │ │ │ │ │ ├── clip_vit.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d_dw_bias.py │ │ │ │ │ │ │ ├── evl_module.py │ │ │ │ │ │ │ └── evl_module_uniformer_diff_conv_balance.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ ├── model_freeze.py │ │ │ │ │ │ ├── model_no_freeze.py │ │ │ │ │ │ ├── model_no_freeze_diff.py │ │ │ │ │ │ └── simple_tokenizer.py │ │ │ │ │ ├── clip_kc2 │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ ├── clip_decoders.py │ │ │ │ │ │ ├── coca.py │ │ │ │ │ │ ├── evl_utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ ├── attention_bias.py │ │ │ │ │ │ │ ├── attention_module.py │ │ │ │ │ │ │ ├── attention_module_bias.py │ │ │ │ │ │ │ ├── clip_vit.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d_dw_bias.py │ │ │ │ │ │ │ ├── evl_module.py │ │ │ │ │ │ │ └── evl_module_uniformer_diff_conv_balance.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ ├── model_freeze.py │ │ │ │ │ │ ├── model_no_freeze.py │ │ │ │ │ │ ├── model_no_freeze_diff.py │ │ │ │ │ │ └── simple_tokenizer.py │ │ │ │ │ ├── clip_kc_new │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ ├── evl_utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ ├── attention_bias.py │ │ │ │ │ │ │ ├── attention_module.py │ │ │ │ │ │ │ ├── attention_module_bias.py │ │ │ │ │ │ │ ├── clip_vit.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d.py │ │ │ │ │ │ │ ├── clip_vit_2plus1d_dw_bias.py │ │ │ │ │ │ │ ├── clip_vit_fusion.py │ │ │ │ │ │ │ ├── clip_vit_only_global.py │ │ │ │ │ │ │ ├── evl_module.py │ │ │ │ │ │ │ └── evl_module_uniformer_diff_conv_balance.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ ├── model_freeze.py │ │ │ │ │ │ ├── model_no_freeze.py │ │ │ │ │ │ ├── model_no_freeze_diff.py │ │ │ │ │ │ ├── model_no_freeze_only_global.py │ │ │ │ │ │ ├── model_no_freeze_uniformer.py │ │ │ │ │ │ └── simple_tokenizer.py │ │ │ │ │ ├── cross-base │ │ │ │ │ │ └── cross_config.json │ │ │ │ │ ├── file_utils.py │ │ │ │ │ ├── modeling.py │ │ │ │ │ ├── modeling_backup.py │ │ │ │ │ ├── modeling_raw.py │ │ │ │ │ ├── module_clip.py │ │ │ │ │ ├── module_cross.py │ │ │ │ │ ├── optimization.py │ │ │ │ │ ├── tokenization_clip.py │ │ │ │ │ ├── until_config.py │ │ │ │ │ └── until_module.py │ │ │ │ ├── preprocess │ │ │ │ │ └── compress_video.py │ │ │ │ ├── util.py │ │ │ │ └── zeroshot_scripts │ │ │ │ │ ├── eval_activitynet.sh │ │ │ │ │ ├── eval_didemo.sh │ │ │ │ │ ├── eval_lsmdc.sh │ │ │ │ │ ├── eval_msrvtt.sh │ │ │ │ │ ├── eval_msvd.sh │ │ │ │ │ └── eval_vatex.sh │ │ │ ├── Visual-Language-Navigation │ │ │ │ ├── .gitignore │ │ │ │ ├── README.md │ │ │ │ ├── eval_base.bash │ │ │ │ ├── eval_large.bash │ │ │ │ ├── exp │ │ │ │ │ ├── cma_r2r.yaml │ │ │ │ │ ├── cma_rxr_en.yaml │ │ │ │ │ ├── cma_rxr_hi.yaml │ │ │ │ │ ├── cma_rxr_te.yaml │ │ │ │ │ ├── vlnbert_r2r.yaml │ │ │ │ │ ├── vlnbert_r2r_da.yaml │ │ │ │ │ └── vlnbert_rxr_en.yaml │ │ │ │ ├── habitat_extensions │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── default.py │ │ │ │ │ │ ├── r2r_vlnce.yaml │ │ │ │ │ │ ├── rxr_vlnce_en.yaml │ │ │ │ │ │ ├── rxr_vlnce_hi.yaml │ │ │ │ │ │ └── rxr_vlnce_te.yaml │ │ │ │ │ ├── habitat_simulator.py │ │ │ │ │ ├── maps.py │ │ │ │ │ ├── measures.py │ │ │ │ │ ├── nav.py │ │ │ │ │ ├── obs_transformers.py │ │ │ │ │ ├── sensors.py │ │ │ │ │ ├── shortest_path_follower.py │ │ │ │ │ ├── task.py │ │ │ │ │ └── utils.py │ │ │ │ ├── iter_train.yaml │ │ │ │ ├── requirements.txt │ │ │ │ ├── run.py │ │ │ │ ├── run │ │ │ │ │ ├── batch_run.sh │ │ │ │ │ ├── cma_r2r.bash │ │ │ │ │ ├── cma_r2r_eval.bash │ │ │ │ │ ├── cma_rxr.bash │ │ │ │ │ ├── cma_rxr_eval.bash │ │ │ │ │ ├── vlnbert_r2r.bash │ │ │ │ │ ├── vlnbert_r2r_da.bash │ │ │ │ │ ├── vlnbert_r2r_da_eval.bash │ │ │ │ │ ├── vlnbert_r2r_eval.bash │ │ │ │ │ ├── vlnbert_rxr.bash │ │ │ │ │ └── vlnbert_rxr_eval.bash │ │ │ │ ├── train.bash │ │ │ │ └── vlnce_baselines │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── common │ │ │ │ │ ├── aux_losses.py │ │ │ │ │ ├── base_il_trainer.py │ │ │ │ │ ├── env_utils.py │ │ │ │ │ ├── environments.py │ │ │ │ │ ├── recollection_dataset.py │ │ │ │ │ └── utils.py │ │ │ │ │ ├── config │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── default.py │ │ │ │ │ ├── nonlearning.yaml │ │ │ │ │ └── r2r_configs │ │ │ │ │ │ ├── cma.yaml │ │ │ │ │ │ ├── cma_aug.yaml │ │ │ │ │ │ ├── cma_aug_tune.yaml │ │ │ │ │ │ ├── cma_da.yaml │ │ │ │ │ │ ├── cma_da_aug_tune.yaml │ │ │ │ │ │ ├── cma_pm.yaml │ │ │ │ │ │ ├── cma_pm_aug.yaml │ │ │ │ │ │ ├── cma_pm_aug_tune.yaml │ │ │ │ │ │ ├── cma_pm_da.yaml │ │ │ │ │ │ ├── cma_pm_da_aug_tune.yaml │ │ │ │ │ │ ├── cma_sf.yaml │ │ │ │ │ │ ├── cma_ss.yaml │ │ │ │ │ │ ├── seq2seq.yaml │ │ │ │ │ │ ├── seq2seq_aug.yaml │ │ │ │ │ │ ├── seq2seq_aug_tune.yaml │ │ │ │ │ │ ├── seq2seq_da.yaml │ │ │ │ │ │ ├── seq2seq_pm.yaml │ │ │ │ │ │ ├── seq2seq_pm_aug.yaml │ │ │ │ │ │ ├── seq2seq_pm_da_aug_tune.yaml │ │ │ │ │ │ └── test_set_inference.yaml │ │ │ │ │ ├── models │ │ │ │ │ ├── Policy_ViewSelection_CMA.py │ │ │ │ │ ├── Policy_ViewSelection_HAMT.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── encoders │ │ │ │ │ │ ├── image_encoders.py │ │ │ │ │ │ ├── instruction_encoder.py │ │ │ │ │ │ └── video_encoder.py │ │ │ │ │ ├── hamt │ │ │ │ │ │ ├── base_il_trainer.py │ │ │ │ │ │ ├── vilmodel_cmt.py │ │ │ │ │ │ └── vlnbert_init.py │ │ │ │ │ ├── policy.py │ │ │ │ │ ├── utils.py │ │ │ │ │ ├── videomae │ │ │ │ │ │ ├── functional.py │ │ │ │ │ │ ├── get_args.py │ │ │ │ │ │ ├── modeling_finetune.py │ │ │ │ │ │ ├── rand_augment.py │ │ │ │ │ │ ├── random_erasing.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ ├── video_transforms.py │ │ │ │ │ │ └── volume_transforms.py │ │ │ │ │ └── vlnbert │ │ │ │ │ │ ├── vlnbert_PREVALENT.py │ │ │ │ │ │ └── vlnbert_init.py │ │ │ │ │ ├── trainer_HAMT.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── waypoint_pred │ │ │ │ │ ├── TRM_net.py │ │ │ │ │ ├── transformer │ │ │ │ │ ├── pytorch_transformer │ │ │ │ │ │ ├── file_utils.py │ │ │ │ │ │ ├── modeling_bert.py │ │ │ │ │ │ └── modeling_utils.py │ │ │ │ │ └── waypoint_bert.py │ │ │ │ │ └── utils.py │ │ │ └── multi-modalities-downstream │ │ │ │ ├── CoTrain │ │ │ │ ├── __init__.py │ │ │ │ ├── config.py │ │ │ │ ├── datamodules │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── image │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activitynet_datamodule.py │ │ │ │ │ │ ├── cc12m_datamodule.py │ │ │ │ │ │ ├── cc3m_datamodule.py │ │ │ │ │ │ ├── coco_caption_karpathy_datamodule.py │ │ │ │ │ │ ├── conceptual_caption_datamodule.py │ │ │ │ │ │ ├── datamodule_base.py │ │ │ │ │ │ ├── f30k_caption_karpathy_datamodule.py │ │ │ │ │ │ ├── laion400m_datamodule.py │ │ │ │ │ │ ├── mix100m_datamodule.py │ │ │ │ │ │ ├── nlvr2_datamodule.py │ │ │ │ │ │ ├── sbu_datamodule.py │ │ │ │ │ │ ├── vcr_datamodule.py │ │ │ │ │ │ ├── vg_caption_datamodule.py │ │ │ │ │ │ ├── vqav2_datamodule.py │ │ │ │ │ │ └── yfcc15m_datamodule.py │ │ │ │ │ └── video │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── didemo_datamodule.py │ │ │ │ │ │ ├── ego4d_choice_datamodule.py │ │ │ │ │ │ ├── ego4d_datamodule.py │ │ │ │ │ │ ├── hmdb51_datamodule.py │ │ │ │ │ │ ├── howto100m_datamodule.py │ │ │ │ │ │ ├── k400_datamodule.py │ │ │ │ │ │ ├── k400_video_datamodule.py │ │ │ │ │ │ ├── lsmdc_choice_datamodule.py │ │ │ │ │ │ ├── lsmdc_datamodule.py │ │ │ │ │ │ ├── msrvtt_choice_datamodule.py │ │ │ │ │ │ ├── msrvtt_datamodule.py │ │ │ │ │ │ ├── msrvttqa_datamodule.py │ │ │ │ │ │ ├── msvd_datamodule.py │ │ │ │ │ │ ├── msvdqa_datamodule.py │ │ │ │ │ │ ├── multitask_datamodule.py │ │ │ │ │ │ ├── tgif_datamodule.py │ │ │ │ │ │ ├── tgifqa_datamodule.py │ │ │ │ │ │ ├── tvqa_datamodule.py │ │ │ │ │ │ ├── ucf101_datamodule.py │ │ │ │ │ │ ├── webvid10m_datamodule.py │ │ │ │ │ │ ├── webvid_datamodule.py │ │ │ │ │ │ ├── youtube_datamodule.py │ │ │ │ │ │ └── yttemporal_datamodule.py │ │ │ │ ├── datasets │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── image │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── base_dataset.py │ │ │ │ │ │ ├── cc12m.py │ │ │ │ │ │ ├── cc3m.py │ │ │ │ │ │ ├── coco_caption_karpathy_dataset.py │ │ │ │ │ │ ├── conceptual_caption_dataset.py │ │ │ │ │ │ ├── f30k_caption_karpathy_dataset.py │ │ │ │ │ │ ├── laion400m.py │ │ │ │ │ │ ├── mix100m.py │ │ │ │ │ │ ├── nlvr2_dataset.py │ │ │ │ │ │ ├── sbu_caption_dataset.py │ │ │ │ │ │ ├── vcr.py │ │ │ │ │ │ ├── vg_caption_dataset.py │ │ │ │ │ │ ├── vqav2_dataset.py │ │ │ │ │ │ └── yfcc15m.py │ │ │ │ │ └── video │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── activitynet.py │ │ │ │ │ │ ├── activitynetqa.py │ │ │ │ │ │ ├── didemo.py │ │ │ │ │ │ ├── ego4d.py │ │ │ │ │ │ ├── ego4d_choice.py │ │ │ │ │ │ ├── ego4d_v2.py │ │ │ │ │ │ ├── hmdb51.py │ │ │ │ │ │ ├── howto100m.py │ │ │ │ │ │ ├── k400.py │ │ │ │ │ │ ├── k400_video.py │ │ │ │ │ │ ├── lsmdc_choice.py │ │ │ │ │ │ ├── lsmdc_dataset.py │ │ │ │ │ │ ├── msrvtt.py │ │ │ │ │ │ ├── msrvtt_choice.py │ │ │ │ │ │ ├── msrvttqa.py │ │ │ │ │ │ ├── msvd.py │ │ │ │ │ │ ├── msvdqa.py │ │ │ │ │ │ ├── pack_meta.py │ │ │ │ │ │ ├── tgif.py │ │ │ │ │ │ ├── tgifqa.py │ │ │ │ │ │ ├── tvqa.py │ │ │ │ │ │ ├── tvqaplus.py │ │ │ │ │ │ ├── ucf101.py │ │ │ │ │ │ ├── video_base_dataset.py │ │ │ │ │ │ ├── webvid.py │ │ │ │ │ │ ├── webvid10m.py │ │ │ │ │ │ ├── webvid_old.py │ │ │ │ │ │ ├── youtube.py │ │ │ │ │ │ └── yttemporal.py │ │ │ │ ├── gadgets │ │ │ │ │ ├── __init__.py │ │ │ │ │ └── my_metrics.py │ │ │ │ ├── modules │ │ │ │ │ ├── InternVideo │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ │ ├── clip_utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ │ ├── simple_tokenizer.py │ │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ │ ├── attention_module.py │ │ │ │ │ │ │ │ ├── attention_module_bias.py │ │ │ │ │ │ │ │ └── clip_vit_only_global.py │ │ │ │ │ │ ├── internvideo.py │ │ │ │ │ │ ├── simple_tokenizer.py │ │ │ │ │ │ └── video_transform.py │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_vision_transformer.py │ │ │ │ │ ├── clip_decoders.py │ │ │ │ │ ├── clip_module.py │ │ │ │ │ ├── clip_param_keys.py │ │ │ │ │ ├── coca.py │ │ │ │ │ ├── cotrain_module.py │ │ │ │ │ ├── cotrain_utils.py │ │ │ │ │ ├── dist_utils.py │ │ │ │ │ ├── forzen_param.py │ │ │ │ │ ├── heads.py │ │ │ │ │ ├── objectives.py │ │ │ │ │ ├── retrieval_metrics.py │ │ │ │ │ ├── temporal_roll.py │ │ │ │ │ └── text_prompt.py │ │ │ │ ├── requirements.txt │ │ │ │ └── transforms │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── image │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── functional.py │ │ │ │ │ ├── imageaug.py │ │ │ │ │ ├── pixelbert.py │ │ │ │ │ ├── randaug.py │ │ │ │ │ └── utils.py │ │ │ │ │ └── video │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── video_transform.py │ │ │ │ │ └── videoaug.py │ │ │ │ ├── README.md │ │ │ │ ├── requirement.txt │ │ │ │ ├── run.py │ │ │ │ └── scripts │ │ │ │ ├── finetune_msrvttqa.sh │ │ │ │ ├── finetune_msvdqa.sh │ │ │ │ ├── finetune_tgifqa.sh │ │ │ │ ├── zs_choice_lsmdc.sh │ │ │ │ ├── zs_choice_msrvtt.sh │ │ │ │ └── zs_classify.sh │ │ ├── Media │ │ │ └── download.png │ │ ├── Pretrain │ │ │ ├── Multi-Modalities-Pretraining │ │ │ │ ├── .gitignore │ │ │ │ ├── InternVideo │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ ├── clip_utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ ├── model.py │ │ │ │ │ │ ├── simple_tokenizer.py │ │ │ │ │ │ └── utils │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ │ ├── attention_module.py │ │ │ │ │ │ │ ├── attention_module_bias.py │ │ │ │ │ │ │ └── clip_vit_only_global.py │ │ │ │ │ ├── internvideo.py │ │ │ │ │ ├── simple_tokenizer.py │ │ │ │ │ └── video_transform.py │ │ │ │ ├── README.md │ │ │ │ ├── data │ │ │ │ │ └── demo.mp4 │ │ │ │ └── demo.py │ │ │ ├── UniFormerV2 │ │ │ │ ├── .gitignore │ │ │ │ ├── DATASET.md │ │ │ │ ├── INSTALL.md │ │ │ │ ├── INSTRUCTIONS.md │ │ │ │ ├── LICENSE │ │ │ │ ├── MODEL_ZOO.md │ │ │ │ ├── README.md │ │ │ │ ├── exp │ │ │ │ │ ├── anet │ │ │ │ │ │ ├── anet_l14_16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── anet_l14_32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ ├── hacs │ │ │ │ │ │ ├── hacs_l14_16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── hacs_l14_32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ ├── k400 │ │ │ │ │ │ ├── frozen_k400+k710_l14_f32x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── frozen_k400+k710_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── frozen_k400_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k400+k710_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k400+k710_l14_f16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k400+k710_l14_f32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k400+k710_l14_f32x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k400+k710_l14_f64x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k400+k710_l14_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── k400_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ │ └── test.sh │ │ │ │ │ ├── k600 │ │ │ │ │ │ ├── frozen_k600+k710_l14_f32x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── frozen_k600+k710_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── frozen_k600_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k600+k710_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k600+k710_l14_f16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k600+k710_l14_f32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k600+k710_l14_f32x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k600+k710_l14_f64x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k600+k710_l14_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── k600_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ │ └── test.sh │ │ │ │ │ ├── k700 │ │ │ │ │ │ ├── frozen_k700+k710_l14_f32x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── frozen_k700+k710_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── frozen_k700_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k700+k710_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k700+k710_l14_f16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k700+k710_l14_f32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k700+k710_l14_f32x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k700+k710_l14_f64x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k700+k710_l14_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── k700_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ │ └── test.sh │ │ │ │ │ ├── k710 │ │ │ │ │ │ ├── frozen_k710_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k710_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── k710_l14_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── k710_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ ├── mit │ │ │ │ │ │ ├── mit_b16_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── mit_l14_f8x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── mit_l14_f8x336 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ ├── sthv1 │ │ │ │ │ │ ├── ssv1_b16_f16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── ssv1_b16_f32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── ssv1_l14_f16x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── ssv1_l14_f32x224 │ │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ │ └── run.sh │ │ │ │ │ └── sthv2 │ │ │ │ │ │ ├── ssv2_b16_f16x224 │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── ssv2_b16_f32x224 │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── ssv2_l14_f16x224 │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ └── ssv2_l14_f32x224 │ │ │ │ │ │ ├── config.yaml │ │ │ │ │ │ └── run.sh │ │ │ │ ├── extract_clip │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ ├── clip.py │ │ │ │ │ ├── extract.ipynb │ │ │ │ │ ├── model.py │ │ │ │ │ └── simple_tokenizer.py │ │ │ │ ├── img │ │ │ │ │ └── framework.png │ │ │ │ ├── linter.sh │ │ │ │ ├── setup.cfg │ │ │ │ ├── setup.py │ │ │ │ ├── slowfast │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── config │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── custom_config.py │ │ │ │ │ │ └── defaults.py │ │ │ │ │ ├── datasets │ │ │ │ │ │ ├── DATASET.md │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── anet.py │ │ │ │ │ │ ├── ava_dataset.py │ │ │ │ │ │ ├── ava_helper.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── charades.py │ │ │ │ │ │ ├── cv2_transform.py │ │ │ │ │ │ ├── decoder.py │ │ │ │ │ │ ├── imagenet.py │ │ │ │ │ │ ├── kinetics.py │ │ │ │ │ │ ├── kinetics_sparse.py │ │ │ │ │ │ ├── loader.py │ │ │ │ │ │ ├── mit.py │ │ │ │ │ │ ├── mixup.py │ │ │ │ │ │ ├── multigrid_helper.py │ │ │ │ │ │ ├── ptv_datasets.py │ │ │ │ │ │ ├── rand_augment.py │ │ │ │ │ │ ├── random_erasing.py │ │ │ │ │ │ ├── sth.py │ │ │ │ │ │ ├── transform.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── video_container.py │ │ │ │ │ ├── models │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── attention.py │ │ │ │ │ │ ├── batchnorm_helper.py │ │ │ │ │ │ ├── build.py │ │ │ │ │ │ ├── common.py │ │ │ │ │ │ ├── custom_video_model_builder.py │ │ │ │ │ │ ├── head_helper.py │ │ │ │ │ │ ├── helpers.py │ │ │ │ │ │ ├── losses.py │ │ │ │ │ │ ├── nonlocal_helper.py │ │ │ │ │ │ ├── operators.py │ │ │ │ │ │ ├── optimizer.py │ │ │ │ │ │ ├── optimizer_backbone.py │ │ │ │ │ │ ├── optimizer_backbone_special.py │ │ │ │ │ │ ├── ptv_model_builder.py │ │ │ │ │ │ ├── resnet_helper.py │ │ │ │ │ │ ├── stem_helper.py │ │ │ │ │ │ ├── uniformer.py │ │ │ │ │ │ ├── uniformerv2.py │ │ │ │ │ │ ├── uniformerv2_model.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── video_model_builder.py │ │ │ │ │ ├── utils │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── ava_eval_helper.py │ │ │ │ │ │ ├── ava_evaluation │ │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── ava_action_list_v2.1_for_activitynet_2018.pbtxt.txt │ │ │ │ │ │ │ ├── label_map_util.py │ │ │ │ │ │ │ ├── metrics.py │ │ │ │ │ │ │ ├── np_box_list.py │ │ │ │ │ │ │ ├── np_box_list_ops.py │ │ │ │ │ │ │ ├── np_box_mask_list.py │ │ │ │ │ │ │ ├── np_box_mask_list_ops.py │ │ │ │ │ │ │ ├── np_box_ops.py │ │ │ │ │ │ │ ├── np_mask_ops.py │ │ │ │ │ │ │ ├── object_detection_evaluation.py │ │ │ │ │ │ │ ├── per_image_evaluation.py │ │ │ │ │ │ │ └── standard_fields.py │ │ │ │ │ │ ├── benchmark.py │ │ │ │ │ │ ├── bn_helper.py │ │ │ │ │ │ ├── c2_model_loading.py │ │ │ │ │ │ ├── distributed.py │ │ │ │ │ │ ├── ema.py │ │ │ │ │ │ ├── env.py │ │ │ │ │ │ ├── logging.py │ │ │ │ │ │ ├── lr_policy.py │ │ │ │ │ │ ├── meters.py │ │ │ │ │ │ ├── meters_co.py │ │ │ │ │ │ ├── metrics.py │ │ │ │ │ │ ├── misc.py │ │ │ │ │ │ ├── multigrid.py │ │ │ │ │ │ ├── multiprocessing.py │ │ │ │ │ │ ├── parser.py │ │ │ │ │ │ ├── parser_co.py │ │ │ │ │ │ └── weight_init_helper.py │ │ │ │ │ └── visualization │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── async_predictor.py │ │ │ │ │ │ ├── ava_demo_precomputed_boxes.py │ │ │ │ │ │ ├── demo_loader.py │ │ │ │ │ │ ├── gradcam_utils.py │ │ │ │ │ │ ├── prediction_vis.py │ │ │ │ │ │ ├── predictor.py │ │ │ │ │ │ ├── tensorboard_vis.py │ │ │ │ │ │ ├── utils.py │ │ │ │ │ │ └── video_visualizer.py │ │ │ │ └── tools │ │ │ │ │ ├── benchmark.py │ │ │ │ │ ├── demo_net.py │ │ │ │ │ ├── run_net.py │ │ │ │ │ ├── run_net_multi_node.py │ │ │ │ │ ├── test_net.py │ │ │ │ │ ├── train_net.py │ │ │ │ │ └── visualization.py │ │ │ ├── ViCLIP │ │ │ │ ├── README.md │ │ │ │ ├── configs │ │ │ │ │ ├── beit-base-patch16-224-pt22k-ft22k.json │ │ │ │ │ ├── config_bert.json │ │ │ │ │ ├── config_bert_large.json │ │ │ │ │ ├── data.py │ │ │ │ │ ├── model.py │ │ │ │ │ ├── pretrain.py │ │ │ │ │ ├── qa.py │ │ │ │ │ ├── qa_anet.py │ │ │ │ │ ├── qa_msrvtt.py │ │ │ │ │ ├── ret_anet.py │ │ │ │ │ ├── ret_coco.py │ │ │ │ │ ├── ret_didemo.py │ │ │ │ │ ├── ret_flickr.py │ │ │ │ │ ├── ret_msrvtt.py │ │ │ │ │ ├── ret_msrvtt_9k.py │ │ │ │ │ ├── ret_msrvtt_mc.py │ │ │ │ │ ├── ret_ssv2_label.py │ │ │ │ │ ├── ret_ssv2_template.py │ │ │ │ │ └── tvqa.py │ │ │ │ ├── dataset │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── base_dataset.py │ │ │ │ │ ├── caption_dataset.py │ │ │ │ │ ├── dataloader.py │ │ │ │ │ ├── qa_dataset.py │ │ │ │ │ ├── serialize.py │ │ │ │ │ ├── sqlite_dataset.py │ │ │ │ │ ├── text_prompt.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── video_utils.py │ │ │ │ ├── exp │ │ │ │ │ └── exp_pretrain_ViCLIP │ │ │ │ │ │ ├── our230522_10m_webvid_10m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── our_0602_200m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ └── run_freeze_unmask.sh │ │ │ │ │ │ ├── our_0602_200m_10m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── our_0613_10m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ └── run_freeze_unmask.sh │ │ │ │ │ │ ├── our_0613_filtered │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── our_0613_filtered_30p_10m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ ├── run_freeze_cos.sh │ │ │ │ │ │ ├── run_freeze_cos_unmask.sh │ │ │ │ │ │ ├── run_freeze_do01.sh │ │ │ │ │ │ ├── run_freeze_do01_unmask.sh │ │ │ │ │ │ └── run_freeze_unmask.sh │ │ │ │ │ │ ├── our_230412_cc15m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── our_230522 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ ├── run_freeze_unmask.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── our_230522_10m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── our_230522_10m_cc15m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── our_230522_cc15m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── our_230604 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── our_230604_cc15m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run_freeze.sh │ │ │ │ │ │ ├── simple_25m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ └── run_freeze_unmask.sh │ │ │ │ │ │ ├── viclip_base │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run.sh │ │ │ │ │ │ ├── webvid_10m │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run.sh │ │ │ │ │ │ ├── run_freeze.sh │ │ │ │ │ │ ├── run_freeze_unmask.sh │ │ │ │ │ │ └── run_unmask.sh │ │ │ │ │ │ ├── zs_anet │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ ├── run_simple25m.sh │ │ │ │ │ │ ├── run_simple25m_freeze.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_simple25m.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask_wiseft05.sh │ │ │ │ │ │ └── run_webvid_10m_wiseft05.sh │ │ │ │ │ │ ├── zs_didemo │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ ├── run_simple25m.sh │ │ │ │ │ │ ├── run_simple25m_freeze.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_simple25m.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask_wiseft05.sh │ │ │ │ │ │ └── run_webvid_10m_wiseft05.sh │ │ │ │ │ │ ├── zs_k400 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ ├── run_simple25m.sh │ │ │ │ │ │ ├── run_simple25m_freeze.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_simple25m.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask_wiseft05.sh │ │ │ │ │ │ └── run_webvid_10m_wiseft05.sh │ │ │ │ │ │ ├── zs_k600 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ └── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── zs_k700 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05 copy.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ └── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── zs_lsmdc │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ ├── run_simple25m.sh │ │ │ │ │ │ ├── run_simple25m_freeze.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_simple25m.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask_wiseft05.sh │ │ │ │ │ │ └── run_webvid_10m_wiseft05.sh │ │ │ │ │ │ ├── zs_msrvtt_1k │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ ├── run_simple25m.sh │ │ │ │ │ │ ├── run_simple25m_freeze.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_simple25m.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask_wiseft05.sh │ │ │ │ │ │ └── run_webvid_10m_wiseft05.sh │ │ │ │ │ │ ├── zs_msvd │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_cc15m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask.sh │ │ │ │ │ │ ├── run_our_230522_resized_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ ├── run_simple25m.sh │ │ │ │ │ │ ├── run_simple25m_freeze.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask.sh │ │ │ │ │ │ ├── run_simple25m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_simple25m_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_freeze_wiseft05.sh │ │ │ │ │ │ ├── run_webvid_10m_simple25m.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask.sh │ │ │ │ │ │ ├── run_webvid_10m_unmask_wiseft05.sh │ │ │ │ │ │ └── run_webvid_10m_wiseft05.sh │ │ │ │ │ │ ├── zs_sthsthv1 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ └── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ └── zs_sthsthv2 │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ ├── run_clip_pretrained.sh │ │ │ │ │ │ ├── run_our_230522_10m_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230522_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230602_200m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_ensemble_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230604_resized_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_10m_resized_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_do01_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_our_230613_filtered_30p_10m_freeze_unmask_wiseft05.sh │ │ │ │ │ │ ├── run_pretrained.sh │ │ │ │ │ │ └── run_webvid_10m_freeze_unmask_wiseft05.sh │ │ │ │ ├── models │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── backbones │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── beit │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ │ └── st_beit.py │ │ │ │ │ │ ├── bert │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ │ ├── tokenization_bert.py │ │ │ │ │ │ │ └── xbert.py │ │ │ │ │ │ ├── blip_toremove │ │ │ │ │ │ │ ├── Qformer.py │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ │ └── modeling_t5.py │ │ │ │ │ │ ├── clip │ │ │ │ │ │ │ ├── bpe_simple_vocab_16e6.txt.gz │ │ │ │ │ │ │ ├── clip_text.py │ │ │ │ │ │ │ ├── clip_vision.py │ │ │ │ │ │ │ └── simple_tokenizer.py │ │ │ │ │ │ └── vit │ │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ │ ├── clip.py │ │ │ │ │ │ │ ├── clip_text.py │ │ │ │ │ │ │ ├── clip_vision.py │ │ │ │ │ │ │ ├── simple_tokenizer.py │ │ │ │ │ │ │ ├── vit.py │ │ │ │ │ │ │ └── vit_clean.py │ │ │ │ │ ├── criterions.py │ │ │ │ │ ├── mask.py │ │ │ │ │ ├── modules │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ └── temporal_model.py │ │ │ │ │ ├── prompts.py │ │ │ │ │ ├── utils.py │ │ │ │ │ └── viclip.py │ │ │ │ ├── preprocess │ │ │ │ │ ├── compress.py │ │ │ │ │ ├── create_sqlite_db.py │ │ │ │ │ ├── extract_hfclip.ipynb │ │ │ │ │ ├── gen_webvid10m_label.py │ │ │ │ │ └── utils.py │ │ │ │ ├── tasks │ │ │ │ │ ├── caption.py │ │ │ │ │ ├── caption_utils.py │ │ │ │ │ ├── compute_sim.py │ │ │ │ │ ├── pretrain.py │ │ │ │ │ ├── retrieval.py │ │ │ │ │ ├── retrieval_mc.py │ │ │ │ │ ├── retrieval_utils.py │ │ │ │ │ ├── shared_utils.py │ │ │ │ │ ├── trainer.py │ │ │ │ │ ├── tvqa.py │ │ │ │ │ ├── vqa.py │ │ │ │ │ └── vqa_utils.py │ │ │ │ ├── tests │ │ │ │ │ └── test_cfg.py │ │ │ │ ├── tools │ │ │ │ │ ├── run.py │ │ │ │ │ ├── submit.sh │ │ │ │ │ └── utils.py │ │ │ │ ├── utils │ │ │ │ │ ├── basic_utils.py │ │ │ │ │ ├── config.py │ │ │ │ │ ├── config_utils.py │ │ │ │ │ ├── distributed.py │ │ │ │ │ ├── easydict.py │ │ │ │ │ ├── logger.py │ │ │ │ │ ├── optimizer.py │ │ │ │ │ └── scheduler.py │ │ │ │ └── viclip.yaml │ │ │ └── VideoMAE │ │ │ │ ├── .gitignore │ │ │ │ ├── README.md │ │ │ │ ├── anet.py │ │ │ │ ├── datasets.py │ │ │ │ ├── engine_for_finetuning.py │ │ │ │ ├── engine_for_pretraining.py │ │ │ │ ├── ensemble.py │ │ │ │ ├── functional.py │ │ │ │ ├── kinetics.py │ │ │ │ ├── mae.py │ │ │ │ ├── masking_generator.py │ │ │ │ ├── modeling_finetune.py │ │ │ │ ├── modeling_pretrain.py │ │ │ │ ├── optim_factory.py │ │ │ │ ├── rand_augment.py │ │ │ │ ├── random_erasing.py │ │ │ │ ├── run_class_finetuning.py │ │ │ │ ├── run_class_linear.py │ │ │ │ ├── run_mae_pretraining.py │ │ │ │ ├── run_mae_vis.py │ │ │ │ ├── scripts │ │ │ │ ├── finetune │ │ │ │ │ ├── dist_train_vit_b_k400_ft.sh │ │ │ │ │ ├── slurm_train_vit_b_anet_ft.sh │ │ │ │ │ ├── slurm_train_vit_b_k400_ft.sh │ │ │ │ │ ├── slurm_train_vit_b_k400_sparse_ft.sh │ │ │ │ │ ├── slurm_train_vit_b_ssv2_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_k400_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_k400_sparse_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_k600_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_k600_it_k400_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_k700_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_mixk_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_mixk_it_k400_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_mixk_it_k600_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_mixk_it_k700_ft.sh │ │ │ │ │ ├── slurm_train_vit_h_ssv2_ft.sh │ │ │ │ │ ├── slurm_train_vit_l_k400_ft.sh │ │ │ │ │ ├── slurm_train_vit_l_k700_ft.sh │ │ │ │ │ └── slurm_train_vit_l_ssv2_ft.sh │ │ │ │ └── pretrain │ │ │ │ │ ├── dist_train_vit_b_k400_pt.sh │ │ │ │ │ ├── slurm_train_vit_b_hybrid_pt.sh │ │ │ │ │ ├── slurm_train_vit_b_k400_pt.sh │ │ │ │ │ ├── slurm_train_vit_b_ssv2_pt.sh │ │ │ │ │ ├── slurm_train_vit_h_hybrid_pt.sh │ │ │ │ │ ├── slurm_train_vit_l_hybrid_pt.sh │ │ │ │ │ └── slurm_train_vit_l_k700_pt.sh │ │ │ │ ├── ssv2.py │ │ │ │ ├── transforms.py │ │ │ │ ├── utils.py │ │ │ │ ├── video_transforms.py │ │ │ │ ├── vis.sh │ │ │ │ ├── vits.py │ │ │ │ └── volume_transforms.py │ │ ├── README.md │ │ └── README_cn.md │ ├── InternVideo2 │ │ ├── README.md │ │ ├── figs │ │ │ ├── teaser-internvideo2.png │ │ │ └── wechatgrp.png │ │ ├── multi_modality │ │ │ ├── .gitignore │ │ │ ├── DATASET.md │ │ │ ├── INSTALL.md │ │ │ ├── MODEL_ZOO.md │ │ │ ├── README.md │ │ │ ├── configs │ │ │ │ ├── config_bert.json │ │ │ │ ├── config_bert_large.json │ │ │ │ ├── data.py │ │ │ │ ├── med_config.json │ │ │ │ ├── med_config_fusion.json │ │ │ │ ├── med_large_config.json │ │ │ │ └── model.py │ │ │ ├── dataset │ │ │ │ ├── __init__.py │ │ │ │ ├── av_utils.py │ │ │ │ ├── base_dataset.py │ │ │ │ ├── dataloader.py │ │ │ │ ├── pt_dataset.py │ │ │ │ ├── qa_dataset.py │ │ │ │ ├── resample_concat_dataset.py │ │ │ │ ├── ret_dataset.py │ │ │ │ ├── sampler.py │ │ │ │ ├── serialize.py │ │ │ │ ├── text_prompt.py │ │ │ │ ├── utils.py │ │ │ │ └── video_utils.py │ │ │ ├── demo │ │ │ │ ├── demo.ipynb │ │ │ │ ├── easydict.py │ │ │ │ ├── example1.mp4 │ │ │ │ ├── internvideo2_stage2_config.py │ │ │ │ ├── small_config.py │ │ │ │ └── small_utils.py │ │ │ ├── miscs │ │ │ │ └── test_flops.py │ │ │ ├── models │ │ │ │ ├── __init__.py │ │ │ │ ├── backbones │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── beats │ │ │ │ │ │ ├── BEATs.py │ │ │ │ │ │ ├── README.md │ │ │ │ │ │ ├── Tokenizers.py │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── backbone.py │ │ │ │ │ │ ├── modules.py │ │ │ │ │ │ └── quantizer.py │ │ │ │ │ ├── bert │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── builder.py │ │ │ │ │ │ ├── med.py │ │ │ │ │ │ ├── tokenization_bert.py │ │ │ │ │ │ └── xbert.py │ │ │ │ │ └── internvideo2 │ │ │ │ │ │ ├── __init__.py │ │ │ │ │ │ ├── flash_attention_class.py │ │ │ │ │ │ ├── internvideo2.py │ │ │ │ │ │ ├── internvideo2_clip_text.py │ │ │ │ │ │ ├── internvideo2_clip_vision.py │ │ │ │ │ │ ├── internvl_clip_vision.py │ │ │ │ │ │ └── pos_embed.py │ │ │ │ ├── criterions.py │ │ │ │ ├── dist_utils.py │ │ │ │ ├── internvideo2_clip.py │ │ │ │ ├── internvideo2_stage2.py │ │ │ │ ├── mask.py │ │ │ │ └── utils.py │ │ │ ├── preprocess │ │ │ │ ├── compress.py │ │ │ │ ├── create_sqlite_db.py │ │ │ │ ├── gen_webvid10m_label.py │ │ │ │ └── utils.py │ │ │ ├── requirements.txt │ │ │ ├── scripts │ │ │ │ ├── evaluation │ │ │ │ │ ├── clip │ │ │ │ │ │ └── zero_shot │ │ │ │ │ │ │ ├── 1B │ │ │ │ │ │ │ ├── config_anet.py │ │ │ │ │ │ │ ├── config_charades_mc.py │ │ │ │ │ │ │ ├── config_didemo.py │ │ │ │ │ │ │ ├── config_hmdb51.py │ │ │ │ │ │ │ ├── config_k400.py │ │ │ │ │ │ │ ├── config_k600.py │ │ │ │ │ │ │ ├── config_k700.py │ │ │ │ │ │ │ ├── config_lsmdc.py │ │ │ │ │ │ │ ├── config_mit.py │ │ │ │ │ │ │ ├── config_msrvtt.py │ │ │ │ │ │ │ ├── config_ssv2_mc.py │ │ │ │ │ │ │ ├── config_ucf101.py │ │ │ │ │ │ │ ├── config_vatex_ch.py │ │ │ │ │ │ │ ├── config_vatex_en.py │ │ │ │ │ │ │ ├── eval_anet.sh │ │ │ │ │ │ │ ├── eval_charades_mc.sh │ │ │ │ │ │ │ ├── eval_hmdb51.sh │ │ │ │ │ │ │ ├── eval_k400.sh │ │ │ │ │ │ │ ├── eval_k600.sh │ │ │ │ │ │ │ ├── eval_k700.sh │ │ │ │ │ │ │ ├── eval_lsmdc.sh │ │ │ │ │ │ │ ├── eval_mit.sh │ │ │ │ │ │ │ ├── eval_msrvtt.sh │ │ │ │ │ │ │ ├── eval_ssv2_mc.sh │ │ │ │ │ │ │ ├── eval_ucf101.sh │ │ │ │ │ │ │ ├── eval_vatex_ch.sh │ │ │ │ │ │ │ └── eval_vatex_en.sh │ │ │ │ │ │ │ └── 6B │ │ │ │ │ │ │ ├── config_anet.py │ │ │ │ │ │ │ ├── config_charades_mc.py │ │ │ │ │ │ │ ├── config_didemo.py │ │ │ │ │ │ │ ├── config_hmdb51.py │ │ │ │ │ │ │ ├── config_k400.py │ │ │ │ │ │ │ ├── config_k600.py │ │ │ │ │ │ │ ├── config_k700.py │ │ │ │ │ │ │ ├── config_lsmdc.py │ │ │ │ │ │ │ ├── config_mit.py │ │ │ │ │ │ │ ├── config_msrvtt.py │ │ │ │ │ │ │ ├── config_ssv2_mc.py │ │ │ │ │ │ │ ├── config_ucf101.py │ │ │ │ │ │ │ ├── config_vatex_ch.py │ │ │ │ │ │ │ ├── config_vatex_en.py │ │ │ │ │ │ │ ├── eval_anet.sh │ │ │ │ │ │ │ ├── eval_charades_mc.sh │ │ │ │ │ │ │ ├── eval_hmdb51.sh │ │ │ │ │ │ │ ├── eval_k400.sh │ │ │ │ │ │ │ ├── eval_k600.sh │ │ │ │ │ │ │ ├── eval_k700.sh │ │ │ │ │ │ │ ├── eval_lsmdc.sh │ │ │ │ │ │ │ ├── eval_mit.sh │ │ │ │ │ │ │ ├── eval_msrvtt.sh │ │ │ │ │ │ │ ├── eval_ssv2_mc.sh │ │ │ │ │ │ │ ├── eval_ucf101.sh │ │ │ │ │ │ │ ├── eval_vatex_ch.sh │ │ │ │ │ │ │ └── eval_vatex_en.sh │ │ │ │ │ └── stage2 │ │ │ │ │ │ └── zero_shot │ │ │ │ │ │ └── 1B │ │ │ │ │ │ ├── config_anet.py │ │ │ │ │ │ ├── config_didemo.py │ │ │ │ │ │ ├── config_lsmdc.py │ │ │ │ │ │ ├── config_msrvtt.py │ │ │ │ │ │ ├── config_msvd.py │ │ │ │ │ │ ├── config_vatex.py │ │ │ │ │ │ ├── eval_anet.sh │ │ │ │ │ │ ├── eval_didemo.sh │ │ │ │ │ │ ├── eval_lsmdc.sh │ │ │ │ │ │ ├── eval_msrvtt.sh │ │ │ │ │ │ ├── eval_msvd.sh │ │ │ │ │ │ └── eval_vatex.sh │ │ │ │ └── pretraining │ │ │ │ │ ├── clip │ │ │ │ │ ├── 1B │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run.sh │ │ │ │ │ └── 6B │ │ │ │ │ │ ├── config.py │ │ │ │ │ │ └── run.sh │ │ │ │ │ └── stage2 │ │ │ │ │ ├── 1B │ │ │ │ │ ├── config.py │ │ │ │ │ └── run.sh │ │ │ │ │ └── 6B │ │ │ │ │ ├── config.py │ │ │ │ │ └── run.sh │ │ │ ├── tasks │ │ │ │ ├── pretrain.py │ │ │ │ ├── retrieval_utils.py │ │ │ │ └── shared_utils.py │ │ │ ├── tasks_clip │ │ │ │ ├── pretrain.py │ │ │ │ ├── retrieval.py │ │ │ │ ├── retrieval_mc.py │ │ │ │ ├── retrieval_mc2.py │ │ │ │ ├── retrieval_utils.py │ │ │ │ └── shared_utils.py │ │ │ ├── tests │ │ │ │ └── test_cfg.py │ │ │ ├── tools │ │ │ │ ├── run.py │ │ │ │ ├── submit.sh │ │ │ │ └── utils.py │ │ │ ├── torchrun.sh │ │ │ └── utils │ │ │ │ ├── basic_utils.py │ │ │ │ ├── config.py │ │ │ │ ├── config_utils.py │ │ │ │ ├── distributed.py │ │ │ │ ├── easydict.py │ │ │ │ ├── logger.py │ │ │ │ ├── optimizer.py │ │ │ │ └── scheduler.py │ │ └── single_modality │ │ │ ├── DATASET.md │ │ │ ├── INSTALL.md │ │ │ ├── MODEL_ZOO.md │ │ │ ├── README.md │ │ │ ├── datasets │ │ │ ├── __init__.py │ │ │ ├── anet.py │ │ │ ├── build.py │ │ │ ├── hmdb.py │ │ │ ├── kinetics.py │ │ │ ├── kinetics_sparse.py │ │ │ ├── mae.py │ │ │ ├── mae_multi.py │ │ │ ├── masking_generator.py │ │ │ ├── mixup.py │ │ │ ├── rand_augment.py │ │ │ ├── random_erasing.py │ │ │ ├── ssv2.py │ │ │ ├── transforms.py │ │ │ ├── video_transforms.py │ │ │ └── volume_transforms.py │ │ │ ├── engines │ │ │ ├── __init__.py │ │ │ ├── engine_for_finetuning.py │ │ │ └── engine_for_pretraining.py │ │ │ ├── functional.py │ │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── flash_attention_class.py │ │ │ ├── internvideo2.py │ │ │ ├── internvideo2_ap.py │ │ │ ├── internvideo2_cat.py │ │ │ ├── internvideo2_pretrain.py │ │ │ ├── internvl_clip_vision.py │ │ │ ├── pos_embed.py │ │ │ └── videomae.py │ │ │ ├── optim_factory.py │ │ │ ├── requirements.txt │ │ │ ├── run_finetuning.py │ │ │ ├── run_linear_probing.py │ │ │ ├── run_pretraining.py │ │ │ ├── scripts │ │ │ ├── finetuning │ │ │ │ ├── attentive_probing │ │ │ │ │ ├── k400 │ │ │ │ │ │ ├── 1B_ap_k710_ap_k400_f16.sh │ │ │ │ │ │ └── 6B_ap_k710_ap_k400_f16.sh │ │ │ │ │ ├── k600 │ │ │ │ │ │ ├── 1B_ap_k710_ap_k600_f16.sh │ │ │ │ │ │ └── 6B_ap_k710_ap_k600_f16.sh │ │ │ │ │ ├── k700 │ │ │ │ │ │ ├── 1B_ap_k710_ap_k700_f16.sh │ │ │ │ │ │ └── 6B_ap_k710_ap_k700_f16.sh │ │ │ │ │ ├── k710 │ │ │ │ │ │ ├── 1B_ap_k710_f16_loadStage2.sh │ │ │ │ │ │ └── 6B_ap_k710_f16_loadStage2.sh │ │ │ │ │ ├── mit │ │ │ │ │ │ ├── 1B_ap_k710_ap_k400_ap_mit_f16.sh │ │ │ │ │ │ └── 6B_ap_k710_ap_k400_ap_mit_f16.sh │ │ │ │ │ └── ssv2 │ │ │ │ │ │ ├── 1B_ap_ssv2_f16_loadStage2.sh │ │ │ │ │ │ └── 6B_ap_ssv2_f16_loadStage2.sh │ │ │ │ ├── full_tuning │ │ │ │ │ ├── anet │ │ │ │ │ │ └── 6B_ft_k710_ft_k400_ap_anet_f8.sh │ │ │ │ │ ├── hacs │ │ │ │ │ │ └── 6B_ft_k710_ft_k400_ap_hacs_f8.sh │ │ │ │ │ ├── k400 │ │ │ │ │ │ ├── 1B_ft_k710_ft_k400_f16.sh │ │ │ │ │ │ ├── 1B_ft_k710_ft_k400_f8.sh │ │ │ │ │ │ ├── 6B_ft_k710_ft_k400_f16.sh │ │ │ │ │ │ └── 6B_ft_k710_ft_k400_f8.sh │ │ │ │ │ ├── k600 │ │ │ │ │ │ ├── 1B_ft_k710_ft_k600_f16.sh │ │ │ │ │ │ ├── 1B_ft_k710_ft_k600_f8.sh │ │ │ │ │ │ ├── 6B_ft_k710_ft_k600_f16.sh │ │ │ │ │ │ └── 6B_ft_k710_ft_k600_f8.sh │ │ │ │ │ ├── k700 │ │ │ │ │ │ ├── 1B_ft_k710_ft_k700_f16.sh │ │ │ │ │ │ ├── 1B_ft_k710_ft_k700_f8.sh │ │ │ │ │ │ ├── 6B_ft_k710_ft_k700_f16.sh │ │ │ │ │ │ └── 6B_ft_k710_ft_k700_f8.sh │ │ │ │ │ ├── k710 │ │ │ │ │ │ ├── 1B_ft_k710_f8.sh │ │ │ │ │ │ └── 6B_ft_k710_f8.sh │ │ │ │ │ ├── mit │ │ │ │ │ │ ├── 1B_ft_k710_ft_k400_ft_mit_f8.sh │ │ │ │ │ │ ├── 6B_ft_k710_ft_k400_ft_mit_f8.sh │ │ │ │ │ │ └── 6B_ft_k710_ft_k400_ft_mit_f8_res224to336.sh │ │ │ │ │ ├── ssv1 │ │ │ │ │ │ ├── 1B_ft_ssv1_f8.sh │ │ │ │ │ │ └── 6B_ft_ssv1_f8.sh │ │ │ │ │ └── ssv2 │ │ │ │ │ │ ├── 1B_ft_ssv2_f8.sh │ │ │ │ │ │ └── 6B_ft_ssv2_f8.sh │ │ │ │ └── linear_probing │ │ │ │ │ ├── hmdb51 │ │ │ │ │ ├── 1B_lp_hmdb51_f16.sh │ │ │ │ │ ├── 6B_lp_hmdb51_f16.sh │ │ │ │ │ └── 6B_lp_hmdb51_f16_loadStage2.sh │ │ │ │ │ ├── k400 │ │ │ │ │ ├── 1B_lp_k400_f16.sh │ │ │ │ │ ├── 6B_lp_k400_f16.sh │ │ │ │ │ └── 6B_lp_k400_f16_loadStage2.sh │ │ │ │ │ ├── ssv2 │ │ │ │ │ ├── 1B_lp_ssv2_f16.sh │ │ │ │ │ ├── 6B_lp_ssv2_f16.sh │ │ │ │ │ └── 6B_lp_ssv2_f16_loadStage2.sh │ │ │ │ │ └── ucf101 │ │ │ │ │ ├── 1B_lp_ucf101_f16.sh │ │ │ │ │ ├── 6B_lp_ssv2_f16_loadStage2.sh │ │ │ │ │ └── 6B_lp_ucf101_f16.sh │ │ │ └── pretraining │ │ │ │ ├── 1B_pt.sh │ │ │ │ └── 6B_pt.sh │ │ │ └── utils.py │ ├── LICENSE │ └── README.md └── relay-policy-learning │ ├── .gitattributes │ ├── CONTRIBUTING.md │ ├── LICENSE │ ├── NOTICE │ ├── README.md │ ├── adept_envs │ ├── .pylintrc │ ├── .style.yapf │ └── adept_envs │ │ ├── __init__.py │ │ ├── base_robot.py │ │ ├── franka │ │ ├── __init__.py │ │ ├── assets │ │ │ └── franka_kitchen_jntpos_act_ab.xml │ │ ├── kitchen_multitask_v0.py │ │ └── robot │ │ │ ├── franka_config.xml │ │ │ └── franka_robot.py │ │ ├── mujoco_env.py │ │ ├── robot_env.py │ │ ├── simulation │ │ ├── module.py │ │ ├── renderer.py │ │ └── sim_robot.py │ │ └── utils │ │ ├── config.py │ │ ├── configurable.py │ │ ├── constants.py │ │ ├── parse_demos.py │ │ └── quatmath.py │ ├── adept_models │ ├── .gitignore │ ├── CONTRIBUTING.public.md │ ├── LICENSE │ ├── README.public.md │ ├── __init__.py │ ├── kitchen │ │ ├── assets │ │ │ ├── backwall_asset.xml │ │ │ ├── backwall_chain.xml │ │ │ ├── counters_asset.xml │ │ │ ├── counters_chain.xml │ │ │ ├── hingecabinet_asset.xml │ │ │ ├── hingecabinet_chain.xml │ │ │ ├── kettle_asset.xml │ │ │ ├── kettle_chain.xml │ │ │ ├── microwave_asset.xml │ │ │ ├── microwave_chain.xml │ │ │ ├── oven_asset.xml │ │ │ ├── oven_chain.xml │ │ │ ├── slidecabinet_asset.xml │ │ │ └── slidecabinet_chain.xml │ │ ├── counters.xml │ │ ├── hingecabinet.xml │ │ ├── kettle.xml │ │ ├── kitchen.xml │ │ ├── meshes │ │ │ ├── burnerplate.stl │ │ │ ├── burnerplate_mesh.stl │ │ │ ├── cabinetbase.stl │ │ │ ├── cabinetdrawer.stl │ │ │ ├── cabinethandle.stl │ │ │ ├── countertop.stl │ │ │ ├── faucet.stl │ │ │ ├── handle2.stl │ │ │ ├── hingecabinet.stl │ │ │ ├── hingedoor.stl │ │ │ ├── hingehandle.stl │ │ │ ├── hood.stl │ │ │ ├── kettle.stl │ │ │ ├── kettlehandle.stl │ │ │ ├── knob.stl │ │ │ ├── lightswitch.stl │ │ │ ├── lightswitchbase.stl │ │ │ ├── micro.stl │ │ │ ├── microbutton.stl │ │ │ ├── microdoor.stl │ │ │ ├── microefeet.stl │ │ │ ├── microfeet.stl │ │ │ ├── microhandle.stl │ │ │ ├── microwindow.stl │ │ │ ├── oven.stl │ │ │ ├── ovenhandle.stl │ │ │ ├── oventop.stl │ │ │ ├── ovenwindow.stl │ │ │ ├── slidecabinet.stl │ │ │ ├── slidedoor.stl │ │ │ ├── stoverim.stl │ │ │ ├── tile.stl │ │ │ └── wall.stl │ │ ├── microwave.xml │ │ ├── oven.xml │ │ ├── slidecabinet.xml │ │ └── textures │ │ │ ├── marble1.png │ │ │ ├── metal1.png │ │ │ ├── tile1.png │ │ │ └── wood1.png │ └── scenes │ │ ├── basic_scene.xml │ │ └── textures │ │ ├── white_marble_tile.png │ │ └── white_marble_tile2.png │ └── third_party │ └── franka │ ├── LICENSE │ ├── README.md │ ├── assets │ ├── actuator0.xml │ ├── actuator1.xml │ ├── assets.xml │ ├── basic_scene.xml │ ├── chain0.xml │ ├── chain0_overlay.xml │ ├── chain1.xml │ └── teleop_actuator.xml │ ├── bi-franka_panda.xml │ ├── franka_panda.png │ ├── franka_panda.xml │ ├── franka_panda_teleop.xml │ └── meshes │ ├── collision │ ├── finger.stl │ ├── hand.stl │ ├── link0.stl │ ├── link1.stl │ ├── link2.stl │ ├── link3.stl │ ├── link4.stl │ ├── link5.stl │ ├── link6.stl │ └── link7.stl │ └── visual │ ├── finger.stl │ ├── hand.stl │ ├── link0.stl │ ├── link1.stl │ ├── link2.stl │ ├── link3.stl │ ├── link4.stl │ ├── link5.stl │ ├── link6.stl │ └── link7.stl ├── tools ├── __init__.py ├── genrl_utils.py ├── logger.py ├── replay.py ├── task_scores.py └── utils.py ├── train.py └── train.yaml /agent/dreamer.yaml: -------------------------------------------------------------------------------- 1 | # @package agent 2 | _target_: agent.dreamer.DreamerAgent 3 | name: dreamer 4 | cfg: ??? 5 | obs_space: ??? 6 | act_spec: ??? 7 | grad_heads: [decoder, reward] 8 | reward_norm: {momentum: 1.0, scale: 1.0, eps: 1e-8} 9 | actor_ent: 3e-4 -------------------------------------------------------------------------------- /agent/plan2explore.yaml: -------------------------------------------------------------------------------- 1 | # @package agent 2 | _target_: agent.plan2explore.Plan2Explore 3 | name: plan2explore 4 | cfg: ??? 5 | obs_space: ??? 6 | act_spec: ??? 7 | grad_heads: [decoder] 8 | reward_norm: {momentum: 0.95, scale: 1.0, eps: 1e-8} 9 | actor_ent: 0 -------------------------------------------------------------------------------- /assets/GenRL_cover.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/GenRL_cover.gif -------------------------------------------------------------------------------- /assets/dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/dashboard.png -------------------------------------------------------------------------------- /assets/stickman_run.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/stickman_run.gif -------------------------------------------------------------------------------- /assets/video_samples/a_spider_walking_on_the_floor.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/a_spider_walking_on_the_floor.mp4 -------------------------------------------------------------------------------- /assets/video_samples/backflip.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/backflip.mp4 -------------------------------------------------------------------------------- /assets/video_samples/dancing.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/dancing.mp4 -------------------------------------------------------------------------------- /assets/video_samples/dead_spider_white.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/dead_spider_white.gif -------------------------------------------------------------------------------- /assets/video_samples/dog_running_seen_from_the_side.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/dog_running_seen_from_the_side.mp4 -------------------------------------------------------------------------------- /assets/video_samples/doing_splits.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/doing_splits.mp4 -------------------------------------------------------------------------------- /assets/video_samples/flex.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/flex.mp4 -------------------------------------------------------------------------------- /assets/video_samples/guy_walking.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/guy_walking.gif -------------------------------------------------------------------------------- /assets/video_samples/headstand.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/headstand.mp4 -------------------------------------------------------------------------------- /assets/video_samples/karate_kick.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/karate_kick.mp4 -------------------------------------------------------------------------------- /assets/video_samples/lying_down_with_legs_up.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/lying_down_with_legs_up.mp4 -------------------------------------------------------------------------------- /assets/video_samples/open_microwave.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/open_microwave.gif -------------------------------------------------------------------------------- /assets/video_samples/person_standing_up_with_hands_up_seen_from_the_side.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/person_standing_up_with_hands_up_seen_from_the_side.mp4 -------------------------------------------------------------------------------- /assets/video_samples/punching.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/punching.mp4 -------------------------------------------------------------------------------- /assets/video_samples/spider_draw.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/assets/video_samples/spider_draw.gif -------------------------------------------------------------------------------- /conf/env/dmc_pixels.yaml: -------------------------------------------------------------------------------- 1 | # @package _global_ 2 | obs_type: pixels 3 | action_repeat: 2 4 | encoder: {mlp_keys: '$^', cnn_keys: 'observation', norm: layer, cnn_depth: 48, cnn_kernels: [4, 4, 4, 4], mlp_layers: [400, 400, 400, 400]} # act: elu 5 | decoder: {mlp_keys: '$^', cnn_keys: 'observation', norm: layer, cnn_depth: 48, cnn_kernels: [5, 5, 6, 6], mlp_layers: [400, 400, 400, 400], } # act: elu 6 | pred_discount: False 7 | imag_actor_grad: dynamics 8 | actor_grad: dynamics -------------------------------------------------------------------------------- /conf/train_mode/train_behavior.yaml: -------------------------------------------------------------------------------- 1 | num_train_frames: 500_010 2 | batch_size: 32 3 | batch_length: 32 4 | agent.imag_reward_fn: video_text_reward 5 | eval_modality: task_imag -------------------------------------------------------------------------------- /conf/train_mode/train_model.yaml: -------------------------------------------------------------------------------- 1 | num_train_frames: 5_000_010 2 | visual_every_frames: 250_000 3 | train_world_model: True 4 | train_connector: True 5 | reset_world_model: True 6 | reset_connector: True -------------------------------------------------------------------------------- /data/stickman_example/1000-20240504T040956-d7ee0ea24b3e4863b1ef5e5bf1849924-501.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/data/stickman_example/1000-20240504T040956-d7ee0ea24b3e4863b1ef5e5bf1849924-501.npz -------------------------------------------------------------------------------- /envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/envs/__init__.py -------------------------------------------------------------------------------- /envs/custom_dmc_tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from . import cheetah 2 | from . import walker 3 | from . import quadruped 4 | from . import jaco 5 | from . import stickman 6 | from dm_control import suite 7 | 8 | suite._DOMAINS['stickman'] = stickman 9 | suite.ALL_TASKS = suite.ALL_TASKS + suite._get_tasks('custom') 10 | suite.TASKS_BY_DOMAIN = suite._get_tasks_by_domain(suite.ALL_TASKS) 11 | 12 | def make_jaco(task, obs_type, seed, img_size, ): 13 | return jaco.make(task, obs_type, seed, img_size, ) -------------------------------------------------------------------------------- /test/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | log_cli = 1 3 | log_cli_level = INFO 4 | log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) 5 | log_cli_date_format=%Y-%m-%d %H:%M:%S -------------------------------------------------------------------------------- /third_party/InternVideo/.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "InternVideo1/Pretrain/UniFormerV2"] 2 | path = InternVideo1/Pretrain/UniFormerV2 3 | url = https://github.com/OpenGVLab/UniFormerV2.git 4 | [submodule "InternVideo1/Downstream/Ego-Tasks"] 5 | path = InternVideo1/Downstream/Ego-Tasks 6 | url = https://github.com/OpenGVLab/ego4d-eccv2022-solutions.git 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/Data/InternVid/example1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/example1.mp4 -------------------------------------------------------------------------------- /third_party/InternVideo/Data/InternVid/start_annotation_prototype.sh: -------------------------------------------------------------------------------- 1 | unset http_proxy; unset https_proxy; unset HTTP_PROXY; unset HTTPS_PROXY 2 | JOB_NAME='data-annotate_check' 3 | OUTPUT_DIR="$(dirname $0)/$JOB_NAME" 4 | LOG_DIR="$(dirname $0)/logs/${JOB_NAME}" 5 | PARTITION='Video-aigc-general' 6 | NNODE=1 7 | NUM_GPUS=1 8 | NUM_CPU=16 9 | 10 | srun -p ${PARTITION} \ 11 | --job-name=${JOB_NAME} \ 12 | -n${NNODE} \ 13 | --gres=gpu:${NUM_GPUS} \ 14 | --ntasks-per-node=1 \ 15 | --cpus-per-task=${NUM_CPU} \ 16 | jupyter lab --ip=0.0.0.0 -------------------------------------------------------------------------------- /third_party/InternVideo/Data/InternVid/utils/distributed.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/utils/distributed.py -------------------------------------------------------------------------------- /third_party/InternVideo/Data/InternVid/utils/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/utils/logger.py -------------------------------------------------------------------------------- /third_party/InternVideo/Data/InternVid/utils/scheduler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/utils/scheduler.py -------------------------------------------------------------------------------- /third_party/InternVideo/Data/InternVid/viclip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/InternVid/viclip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/Data/instruction_data/assert/conversation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/instruction_data/assert/conversation.png -------------------------------------------------------------------------------- /third_party/InternVideo/Data/instruction_data/assert/detailed_description.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/Data/instruction_data/assert/detailed_description.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/configs/recognition/omnisource/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/configs/recognition/omnisource/pipeline.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/acc_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/acc_curve.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/data_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/data_pipeline.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_logo.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_overview.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/docs/imgs/mmaction2_overview.gif -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/evaluate_csn_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/csn/finetune_ucf101_csn_dnn.py \ 9 | work_dirs/csn/finetune_ucf101_csn_dnn/latest.pth \ 10 | --videos_per_gpu 1 \ 11 | --out work_dirs/csn/test_ucf101_csn_dnn.pkl \ 12 | --eval top_k_accuracy mean_class_accuracy 13 | 14 | cd $pwd_dir 15 | echo "Experiments finished!" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/evaluate_csn_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/csn/finetune_ucf101_csn_edlnokl_avuc_debias.py \ 9 | work_dirs/csn/finetune_ucf101_csn_edlnokl_avuc_debias/latest.pth \ 10 | --videos_per_gpu 1 \ 11 | --out work_dirs/csn/test_ucf101_csn_edlnokl_avuc_debias.pkl \ 12 | --eval top_k_accuracy mean_class_accuracy 13 | 14 | cd $pwd_dir 15 | echo "Experiments finished!" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/finetune_csn_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/csn/finetune_ucf101_csn_dnn.py \ 10 | --work-dir work_dirs/csn/finetune_ucf101_csn_dnn \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/csn/finetune_csn_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/csn/finetune_ucf101_csn_edlnokl_avuc_debias.py \ 10 | --work-dir work_dirs/csn/finetune_ucf101_csn_edlnokl_avuc_debias \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_bnn.py \ 9 | work_dirs/i3d/finetune_ucf101_i3d_bnn/latest.pth \ 10 | --out work_dirs/i3d/test_ucf101_i3d_bnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_dnn.py \ 9 | work_dirs/i3d/finetune_ucf101_i3d_dnn/latest.pth \ 10 | --out work_dirs/i3d/test_ucf101_i3d_dnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias.py \ 9 | work_dirs/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias/latest.pth \ 10 | --out work_dirs/i3d/test_ucf101_i3d_edlnokl_avuc_debias.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_edlnokl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl.py \ 9 | work_dirs/i3d/finetune_ucf101_i3d_edlnokl/latest.pth \ 10 | --out work_dirs/i3d/test_ucf101_i3d_edlnokl.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/evaluate_i3d_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/i3d/finetune_ucf101_i3d_rpl.py \ 9 | work_dirs/i3d/finetune_ucf101_i3d_rpl/latest.pth \ 10 | --out work_dirs/i3d/test_ucf101_i3d_rpl.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_bnn.py \ 9 | --work-dir work_dirs/i3d/finetune_ucf101_i3d_bnn \ 10 | --validate \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 14 | 15 | cd $pwd_dir 16 | echo "Experiments finished!" 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_dnn.py \ 9 | --work-dir work_dirs/i3d/finetune_ucf101_i3d_dnn \ 10 | --validate \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 14 | 15 | cd $pwd_dir 16 | echo "Experiments finished!" 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias.py \ 10 | --work-dir work_dirs/i3d/finetune_ucf101_i3d_edlnokl_avuc_debias \ 11 | --validate \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_edlnokl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_edlnokl.py \ 10 | --work-dir work_dirs/i3d/finetune_ucf101_i3d_edlnokl \ 11 | --validate \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/finetune_i3d_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/finetune_ucf101_i3d_rpl.py \ 9 | --work-dir work_dirs/i3d/finetune_ucf101_i3d_rpl \ 10 | --validate \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 14 | 15 | cd $pwd_dir 16 | echo "Experiments finished!" 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/run_draw_confmat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export CUDA_HOME='/usr/local/cuda' 4 | 5 | pwd_dir=$pwd 6 | cd ../../ 7 | 8 | source activate mmaction 9 | 10 | OOD_DATA=$1 # HMDB or MiT 11 | RESULT_PATH="experiments/i3d/results" 12 | 13 | # Confusion Matrix comparison 14 | python experiments/draw_confusion_matrix.py \ 15 | --ood_result ${RESULT_PATH}/I3D_EDLNoKLAvUCDebias_EDL_${OOD_DATA}_result.npz \ 16 | --uncertain_thresh 0.004550 \ 17 | --save_file ${RESULT_PATH}/../results_confmat/I3D_DEAR_${OOD_DATA}_ConfMat.png 18 | 19 | cd $pwd_dir 20 | echo "Experiments finished!" -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/train_i3d_DEAR_kinetics10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/train_kinetics10_i3d_DEAR.py \ 10 | --work-dir work_dirs/i3d/train_kinetics10_i3d_DEAR \ 11 | --validate \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/i3d/train_i3d_DEAR_noDebias_kinetics10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/i3d/train_kinetics10_i3d_DEAR_noDebias.py \ 10 | --work-dir work_dirs/i3d/train_kinetics10_i3d_DEAR_noDebias \ 11 | --validate \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/Makefile: -------------------------------------------------------------------------------- 1 | CXX= g++ 2 | SRC= MetaRecognition.cpp weibull.c 3 | 4 | libmr: $(SRC) weibull.h malloc.h MetaRecognition.h 5 | $(CXX) -o libmr $(SRC) -I. 6 | 7 | clean: 8 | rm -f *~ *.o libmr -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/compile.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "----- Removing previously compiled libmr.so -----\n" 4 | rm -r build 5 | rm *.model 6 | rm libmr.so 7 | rm *.dump 8 | rm ../libmr.so 9 | 10 | echo "----- Building and compiling libmr ------- \n" 11 | python setup.py build_ext -i 12 | # cp libmr.so ../ 13 | 14 | # echo "----- Completed Compiling libmr -------- \n" 15 | # echo "Now trying python -c \"import libmr\"" 16 | # python test_libmr.py 17 | # echo "----- Compiling Done. Now import *.so file in your application -----\n" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/libmr.c: -------------------------------------------------------------------------------- 1 | #error Do not use this file, it is the result of a failed Cython compilation. 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/libMR/libmr.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "MetaRecognition.h": 2 | cdef struct svm_node_libsvm: 3 | int index 4 | double value -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/mae/finetune_mae_edlnokl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | pwd_dir=$pwd 3 | cd ../../ 4 | GPUS=$1 5 | PORT=${PORT:-29498} 6 | 7 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT tools/train.py configs/recognition/mae/finetune_ucf101_mae_edlnokl.py \ 8 | --work-dir work_dirs/mae/ky \ 9 | --validate \ 10 | --seed 0 \ 11 | --deterministic \ 12 | --launcher pytorch \ 13 | 14 | cd $pwd_dir 15 | echo "Experiments finished!" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_bnn.py \ 9 | work_dirs/slowfast/finetune_ucf101_slowfast_bnn/latest.pth \ 10 | --out work_dirs/slowfast/test_ucf101_slowfast_bnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_dnn.py \ 9 | work_dirs/slowfast/finetune_ucf101_slowfast_dnn/latest.pth \ 10 | --out work_dirs/slowfast/test_ucf101_slowfast_dnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias.py \ 9 | work_dirs/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias/latest.pth \ 10 | --out work_dirs/slowfast/test_ucf101_slowfast_edlnokl_avuc_debias.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/evaluate_slowfast_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/slowfast/finetune_ucf101_slowfast_rpl.py \ 9 | work_dirs/slowfast/finetune_ucf101_slowfast_rpl/latest.pth \ 10 | --out work_dirs/slowfast/test_ucf101_slowfast_rpl.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_bnn.py \ 9 | --work-dir work_dirs/slowfast/finetune_ucf101_slowfast_bnn \ 10 | --validate \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 14 | 15 | cd $pwd_dir 16 | echo "Experiments finished!" 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_dnn.py \ 10 | --work-dir work_dirs/slowfast/finetune_ucf101_slowfast_dnn \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias.py \ 10 | --work-dir work_dirs/slowfast/finetune_ucf101_slowfast_edlnokl_avuc_debias \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/slowfast/finetune_slowfast_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/slowfast/finetune_ucf101_slowfast_rpl.py \ 10 | --work-dir work_dirs/slowfast/finetune_ucf101_slowfast_rpl \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_bnn_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_bnn/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_bnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_celoss_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_celoss_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_celoss/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_celoss.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_avuc_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_avuc_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_avuc/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_avuc.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_nokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_nokl_avuc_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_nokl_avuc.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_nokl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss_nokl.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_edlloss_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_edlloss.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/evaluate_tpn_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/finetune_ucf101_tpn_slowonly_rpl.py \ 9 | work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_rpl/latest.pth \ 10 | --out work_dirs/tpn_slowonly/test_ucf101_tpn_slowonly_rpl.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_bnn_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_bnn \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_celoss_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_celoss_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_celoss \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_avuc_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_avuc_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_avuc \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_nokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_nokl_avuc_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl_avuc \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_nokl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss_nokl \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_edlloss_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/tpn_slowonly_edlloss_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_edlloss \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/finetune_tpn_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tpn/finetune_ucf101_tpn_slowonly_rpl.py \ 10 | --work-dir work_dirs/tpn_slowonly/finetune_ucf101_tpn_slowonly_rpl \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/searchw_evaluate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \ 9 | work_dirs/tpn_slowonly/search_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias/search_$2/latest.pth \ 10 | --out work_dirs/tpn_slowonly/search_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias/test_$2.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tpn_slowonly/searchw_finetune.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/hypertune.py configs/recognition/tpn/tpn_slowonly_edlloss_nokl_avuc_debias_r50_8x8x1_150e_kinetics_rgb.py \ 10 | --work-dir work_dirs/tpn_slowonly/search_ucf101_tpn_slowonly_edlloss_nokl_avuc_debias \ 11 | --weight_factor $2 $3 \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 \ 15 | --validate 16 | 17 | cd $pwd_dir 18 | echo "Experiments finished!" 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_bnn.py \ 9 | work_dirs/tsm/finetune_ucf101_tsm_bnn/latest.pth \ 10 | --out work_dirs/tsm/test_ucf101_tsm_bnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_dnn.py \ 9 | work_dirs/tsm/finetune_ucf101_tsm_dnn/latest.pth \ 10 | --out work_dirs/tsm/test_ucf101_tsm_dnn.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias.py \ 9 | work_dirs/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias/latest.pth \ 10 | --out work_dirs/tsm/test_ucf101_tsm_edlnokl_avuc_debias.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/evaluate_tsm_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/test.py configs/recognition/tsm/finetune_ucf101_tsm_rpl.py \ 9 | work_dirs/tsm/finetune_ucf101_tsm_rpl/latest.pth \ 10 | --out work_dirs/tsm/test_ucf101_tsm_rpl.pkl \ 11 | --eval top_k_accuracy mean_class_accuracy 12 | 13 | cd $pwd_dir 14 | echo "Experiments finished!" 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_bnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_bnn.py \ 9 | --work-dir work_dirs/tsm/finetune_ucf101_tsm_bnn \ 10 | --validate \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 14 | 15 | cd $pwd_dir 16 | echo "Experiments finished!" 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_dnn_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_dnn.py \ 10 | --work-dir work_dirs/tsm/finetune_ucf101_tsm_dnn \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_edlnokl_avuc_debias_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias.py \ 10 | --work-dir work_dirs/tsm/finetune_ucf101_tsm_edlnokl_avuc_debias \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/finetune_tsm_rpl_ucf101.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/finetune_ucf101_tsm_rpl.py \ 10 | --work-dir work_dirs/tsm/finetune_ucf101_tsm_rpl \ 11 | --seed 0 \ 12 | --deterministic \ 13 | --gpu-ids 0 \ 14 | --validate 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/run_draw_confmat.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export CUDA_HOME='/usr/local/cuda' 4 | 5 | pwd_dir=$pwd 6 | cd ../../ 7 | 8 | source activate mmaction 9 | 10 | OOD_DATA=$1 # HMDB or MiT 11 | RESULT_PATH="experiments/tsm/results" 12 | 13 | # Confusion Matrix comparison 14 | python experiments/draw_confusion_matrix.py \ 15 | --ood_result ${RESULT_PATH}/TSM_EDLNoKLAvUCDebias_EDL_${OOD_DATA}_result.npz \ 16 | --uncertain_thresh 0.004549 \ 17 | --save_file ${RESULT_PATH}/../results_confmat/TSM_DEAR_${OOD_DATA}_ConfMat.png 18 | 19 | cd $pwd_dir 20 | echo "Experiments finished!" -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/train_tsm_DEAR_kinetics10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/train_kinetics10_tsm_DEAR.py \ 10 | --work-dir work_dirs/tsm/train_kinetics10_tsm_DEAR \ 11 | --validate \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/experiments/tsm/train_tsm_DEAR_noDebias_kinetics10.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pwd_dir=$pwd 4 | cd ../../ 5 | 6 | source activate mmaction 7 | 8 | # --validate 9 | CUDA_VISIBLE_DEVICES=$1 python tools/train.py configs/recognition/tsm/train_kinetics10_tsm_DEAR_noDebias.py \ 10 | --work-dir work_dirs/tsm/train_kinetics10_tsm_DEAR_noDebias \ 11 | --validate \ 12 | --seed 0 \ 13 | --deterministic \ 14 | --gpu-ids 0 15 | 16 | cd $pwd_dir 17 | echo "Experiments finished!" 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/__init__.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | from mmcv import digit_version 3 | 4 | from .version import __version__ 5 | 6 | mmcv_minimum_version = '1.1.1' 7 | mmcv_maximum_version = '1.3' 8 | mmcv_version = digit_version(mmcv.__version__) 9 | 10 | assert (digit_version(mmcv_minimum_version) <= mmcv_version 11 | <= digit_version(mmcv_maximum_version)), \ 12 | f'MMCV=={mmcv.__version__} is used but incompatible. ' \ 13 | f'Please install mmcv>={mmcv_minimum_version}, <={mmcv_maximum_version}.' 14 | 15 | __all__ = ['__version__'] 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/apis/__init__.py: -------------------------------------------------------------------------------- 1 | from .inference import inference_recognizer, init_recognizer 2 | from .test import multi_gpu_test, single_gpu_test, collect_results_cpu 3 | from .train import train_model 4 | 5 | __all__ = [ 6 | 'train_model', 'init_recognizer', 'inference_recognizer', 'multi_gpu_test', 7 | 'single_gpu_test', 'collect_results_cpu' 8 | ] 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .evaluation import * # noqa: F401, F403 2 | from .lr import * # noqa: F401, F403 3 | from .optimizer import * # noqa: F401, F403 4 | from .runner import * # noqa: F401, F403 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/lr/__init__.py: -------------------------------------------------------------------------------- 1 | from .tin_lr_hook import TINLrUpdaterHook 2 | 3 | __all__ = ['TINLrUpdaterHook'] 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/optimizer/__init__.py: -------------------------------------------------------------------------------- 1 | from .copy_of_sgd import CopyOfSGD 2 | from .tsm_optimizer_constructor import TSMOptimizerConstructor 3 | 4 | __all__ = ['CopyOfSGD', 'TSMOptimizerConstructor'] 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/optimizer/copy_of_sgd.py: -------------------------------------------------------------------------------- 1 | from mmcv.runner import OPTIMIZERS 2 | from torch.optim import SGD 3 | 4 | 5 | @OPTIMIZERS.register_module() 6 | class CopyOfSGD(SGD): 7 | """A clone of torch.optim.SGD. 8 | 9 | A customized optimizer could be defined like CopyOfSGD. You may derive from 10 | built-in optimizers in torch.optim, or directly implement a new optimizer. 11 | """ 12 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/core/runner/__init__.py: -------------------------------------------------------------------------------- 1 | from .omnisource_runner import OmniSourceDistSamplerSeedHook, OmniSourceRunner 2 | from .annealing_runner import AnnealingRunner 3 | 4 | __all__ = ['OmniSourceRunner', 'OmniSourceDistSamplerSeedHook', 'AnnealingRunner'] 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/datasets/registry.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry 2 | 3 | DATASETS = Registry('dataset') 4 | PIPELINES = Registry('pipeline') 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/datasets/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed_sampler import DistributedPowerSampler, DistributedSampler 2 | 3 | __all__ = ['DistributedSampler', 'DistributedPowerSampler'] 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/localization/__init__.py: -------------------------------------------------------------------------------- 1 | from .bsn_utils import generate_bsp_feature, generate_candidate_proposals 2 | from .proposal_utils import soft_nms, temporal_iop, temporal_iou 3 | from .ssn_utils import (eval_ap, load_localize_proposal_file, 4 | perform_regression, temporal_nms) 5 | 6 | __all__ = [ 7 | 'generate_candidate_proposals', 'generate_bsp_feature', 'temporal_iop', 8 | 'temporal_iou', 'soft_nms', 'load_localize_proposal_file', 9 | 'perform_regression', 'temporal_nms', 'eval_ap' 10 | ] 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/common/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv2plus1d import Conv2plus1d 2 | from .conv_audio import ConvAudio 3 | 4 | __all__ = ['Conv2plus1d', 'ConvAudio'] 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/localizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import BaseLocalizer 2 | from .bmn import BMN 3 | from .bsn import PEM, TEM 4 | from .ssn import SSN 5 | 6 | __all__ = ['PEM', 'TEM', 'BMN', 'SSN', 'BaseLocalizer'] 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/localizers/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .post_processing import post_processing 2 | 3 | __all__ = ['post_processing'] 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/necks/__init__.py: -------------------------------------------------------------------------------- 1 | from .tpn import TPN 2 | 3 | __all__ = ['TPN'] 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/recognizers/__init__.py: -------------------------------------------------------------------------------- 1 | from .audio_recognizer import AudioRecognizer 2 | from .base import BaseRecognizer 3 | from .recognizer2d import Recognizer2D 4 | from .recognizer3d import Recognizer3D 5 | from .recognizer2d_bnn import Recognizer2DBNN 6 | from .recognizer3d_bnn import Recognizer3DBNN 7 | from .recognizer2d_rpl import Recognizer2DRPL 8 | from .recognizer3d_rpl import Recognizer3DRPL 9 | 10 | __all__ = ['BaseRecognizer', 'Recognizer2D', 'Recognizer3D', 'Recognizer2DBNN', 'Recognizer3DBNN', 'Recognizer2DRPL', 'Recognizer3DRPL', 'AudioRecognizer'] 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/models/registry.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import Registry 2 | 3 | BACKBONES = Registry('backbone') 4 | NECKS = Registry('neck') 5 | HEADS = Registry('head') 6 | RECOGNIZERS = Registry('recognizer') 7 | LOSSES = Registry('loss') 8 | LOCALIZERS = Registry('localizer') 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .collect_env import collect_env 2 | from .gradcam_utils import GradCAM 3 | from .logger import get_root_logger 4 | from .misc import get_random_string, get_shm_dir, get_thread_id 5 | 6 | __all__ = [ 7 | 'get_root_logger', 'collect_env', 'get_random_string', 'get_thread_id', 8 | 'get_shm_dir', 'GradCAM' 9 | ] 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/utils/collect_env.py: -------------------------------------------------------------------------------- 1 | from mmcv.utils import collect_env as collect_basic_env 2 | from mmcv.utils import get_git_hash 3 | 4 | import mmaction 5 | 6 | 7 | def collect_env(): 8 | env_info = collect_basic_env() 9 | env_info['MMAction2'] = ( 10 | mmaction.__version__ + '+' + get_git_hash(digits=7)) 11 | return env_info 12 | 13 | 14 | if __name__ == '__main__': 15 | for name, val in collect_env().items(): 16 | print(f'{name}: {val}') 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/mmaction/version.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Open-MMLab. All rights reserved. 2 | 3 | __version__ = '0.9.0' 4 | 5 | 6 | def parse_version_info(version_str): 7 | version_info = [] 8 | for x in version_str.split('.'): 9 | if x.isdigit(): 10 | version_info.append(int(x)) 11 | elif x.find('rc') != -1: 12 | patch_version = x.split('rc') 13 | version_info.append(int(patch_version[0])) 14 | version_info.append(f'rc{patch_version[1]}') 15 | return tuple(version_info) 16 | 17 | 18 | version_info = parse_version_info(__version__) 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements.txt: -------------------------------------------------------------------------------- 1 | -r requirements/build.txt 2 | -r requirements/optional.txt 3 | -r requirements/runtime.txt 4 | -r requirements/tests.txt 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/build.txt: -------------------------------------------------------------------------------- 1 | # These must be installed before building mmaction2 2 | numpy 3 | torch>=1.3 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/docs.txt: -------------------------------------------------------------------------------- 1 | recommonmark 2 | sphinx 3 | sphinx_markdown_tables 4 | sphinx_rtd_theme 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/optional.txt: -------------------------------------------------------------------------------- 1 | av 2 | decord >= 0.4.1 3 | moviepy 4 | onnx 5 | onnxruntime 6 | PyTurboJPEG 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/readthedocs.txt: -------------------------------------------------------------------------------- 1 | mmcv 2 | torch 3 | torchvision 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/runtime.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | numpy 3 | opencv-contrib-python 4 | Pillow 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/requirements/tests.txt: -------------------------------------------------------------------------------- 1 | coverage 2 | flake8 3 | interrogate 4 | isort==4.3.21 5 | pytest 6 | pytest-runner 7 | xdoctest >= 0.10.0 8 | yapf 9 | terminaltables==3.1.0 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/audio_feature_test_list.txt: -------------------------------------------------------------------------------- 1 | test 100 127 2 | test 100 127 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/audio_test_list.txt: -------------------------------------------------------------------------------- 1 | test.wav 100 127 2 | test.wav 100 127 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/frame_test_list.txt: -------------------------------------------------------------------------------- 1 | test_imgs 5 127 2 | test_imgs 5 127 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/frame_test_list_multi_label.txt: -------------------------------------------------------------------------------- 1 | test_imgs 5 1 2 | test_imgs 5 3 5 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/frame_test_list_with_offset.txt: -------------------------------------------------------------------------------- 1 | test_imgs 2 5 127 2 | test_imgs 2 5 127 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/hvu_video_eval_test_anno.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "filename":"test.mp4", 4 | "label":{ 5 | "action": [2], 6 | "scene": [2], 7 | "object": [1] 8 | } 9 | }, 10 | { 11 | "filename":"test.avi", 12 | "label":{ 13 | "action": [1], 14 | "scene": [1], 15 | "object": [2] 16 | } 17 | } 18 | ] 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/hvu_video_test_anno.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "filename":"tmp.mp4", 4 | "label":{ 5 | "concept":[250, 131, 42, 51, 57, 155, 122], 6 | "object":[1570, 508], 7 | "event":[16], 8 | "action":[180], 9 | "scene":[206] 10 | } 11 | }, 12 | { 13 | "filename":"tmp.mp4", 14 | "label":{ 15 | "concept":[250, 131, 42, 51, 57, 155, 122], 16 | "object":[1570, 508], 17 | "event":[16], 18 | "action":[180], 19 | "scene":[206] 20 | } 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/proposal_normalized_list.txt: -------------------------------------------------------------------------------- 1 | # 0 2 | test_imgs 3 | 5 4 | 1 5 | 2 6 | 3 0.2000 0.4000 7 | 3 0.6000 1.0000 8 | 10 9 | 3 1.0000 1.0000 0.2000 0.4000 10 | 3 0.5000 0.5000 0.2000 0.6000 11 | 3 0.3333 0.3333 0.2000 0.8000 12 | 3 0.5000 0.5000 0.2000 1.0000 13 | 3 0.0000 0.0000 0.4000 0.6000 14 | 3 0.3333 0.5000 0.4000 0.8000 15 | 3 0.6666 0.6666 0.4000 1.0000 16 | 3 0.5000 1.0000 0.6000 0.8000 17 | 3 1.0000 1.0000 0.6000 1.0000 18 | 3 0.5000 1.0000 0.8000 1.0000 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/proposal_test_list.txt: -------------------------------------------------------------------------------- 1 | # 0 2 | test_imgs 3 | 5 4 | 1 5 | 2 6 | 3 1 2 7 | 3 3 5 8 | 10 9 | 3 1.0000 1.0000 1 2 10 | 3 0.5000 0.5000 1 3 11 | 3 0.3333 0.3333 1 4 12 | 3 0.5000 0.5000 1 5 13 | 3 0.0000 0.0000 2 3 14 | 3 0.3333 0.5000 2 4 15 | 3 0.6666 0.6666 2 5 16 | 3 0.5000 1.0000 3 4 17 | 3 1.0000 1.0000 3 5 18 | 3 0.5000 1.0000 4 5 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/rawvideo_test_anno.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "video_dir":"test_rawvideo_dataset", 4 | "label":1, 5 | "num_clips":2, 6 | "positive_clip_inds":[0] 7 | } 8 | ] 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/rawvideo_test_anno.txt: -------------------------------------------------------------------------------- 1 | test_rawvideo_dataset 1 2 0 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.mp4 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test.wav -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_ava_dataset/ava_excluded_timestamps_sample.csv: -------------------------------------------------------------------------------- 1 | 0f39OWEqJ24,0903 2 | _-Z6wFjXtGQ,0902 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_ava_dataset/ava_sample.csv: -------------------------------------------------------------------------------- 1 | 0f39OWEqJ24,0902,0.031,0.162,0.670,0.995,12,0 2 | 0f39OWEqJ24,0902,0.031,0.162,0.670,0.995,17,0 3 | 0f39OWEqJ24,0902,0.031,0.162,0.670,0.995,79,0 4 | 0f39OWEqJ24,0903,0.034,0.189,0.669,0.980,12,0 5 | 0f39OWEqJ24,0903,0.034,0.189,0.669,0.980,17,0 6 | _-Z6wFjXtGQ,0902,0.063,0.049,0.524,0.996,12,0 7 | _-Z6wFjXtGQ,0902,0.063,0.049,0.524,0.996,74,0 8 | _-Z6wFjXtGQ,0902,0.063,0.049,0.524,0.996,80,0 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_bsp_features/v_test1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_bsp_features/v_test1.npy -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00001.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00002.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00003.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00004.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00005.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00006.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00006.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00007.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00007.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00008.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00008.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00009.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00009.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00010.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/img_00010.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00001.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00002.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00003.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00004.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/x_00005.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00001.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00001.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00002.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00003.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00004.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00004.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00005.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_imgs/y_00005.jpg -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_proposals/v_test1.csv: -------------------------------------------------------------------------------- 1 | tmin,tmax,tmin_score,tmax_score,score,match_iou,match_ioa 2 | 0.1,0.2,0.95,0.96,0.97,0.85,0.84 3 | 0.2,0.3,0.94,0.95,0.96,0.84,0.83 4 | 0.3,0.4,0.93,0.94,0.95,0.83,0.82 5 | 0.4,0.5,0.92,0.93,0.94,0.82,0.81 6 | 0.5,0.6,0.91,0.92,0.93,0.81,0.80 7 | 0.6,0.7,0.90,0.91,0.92,0.80,0.79 8 | 0.5,0.7,0.90,0.91,0.92,0.80,0.79 9 | 0.6,0.8,0.90,0.91,0.92,0.80,0.79 10 | 0.4,0.7,0.90,0.91,0.92,0.80,0.79 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_proposals/v_test2.csv: -------------------------------------------------------------------------------- 1 | tmin,tmax,tmin_score,tmax_score,score,match_iou,match_ioa 2 | 0.1,0.2,0.95,0.96,0.97,0.75,0.74 3 | 0.2,0.3,0.94,0.95,0.96,0.74,0.73 4 | 0.3,0.4,0.93,0.94,0.95,0.73,0.72 5 | 0.4,0.5,0.92,0.93,0.94,0.72,0.71 6 | 0.5,0.6,0.91,0.92,0.93,0.71,0.70 7 | 0.6,0.7,0.90,0.91,0.92,0.70,0.79 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_0.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_0.mp4 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/test_rawvideo_dataset/part_1.mp4 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tests/data/video_test_list.txt: -------------------------------------------------------------------------------- 1 | test.mp4 0 2 | test.mp4 0 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/activitynet/download_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # set up environment 4 | conda env create -f environment.yml 5 | source activate activitynet 6 | pip install --upgrade youtube-dl 7 | 8 | DATA_DIR="../../../data/ActivityNet" 9 | python download.py 10 | 11 | source deactivate activitynet 12 | conda remove -n activitynet --all 13 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/activitynet/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | cd ../ 3 | python build_rawframes.py ../../data/ActivityNet/videos/ ../../data/ActivityNet/rawframes/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-width 340 --new-height 256 4 | echo "Raw frames (RGB and tv-l1) Generated for train set" 5 | 6 | cd activitynet/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | VERSION=${VERSION:-"2.1"} 6 | DATA_DIR="../../../data/ava/annotations" 7 | 8 | if [[ ! -d "${DATA_DIR}" ]]; then 9 | echo "${DATA_DIR} does not exist. Creating"; 10 | mkdir -p ${DATA_DIR} 11 | fi 12 | 13 | wget https://research.google.com/ava/download/ava_v${VERSION}.zip 14 | unzip -j ava_v${VERSION}.zip -d ${DATA_DIR}/ 15 | rm ava_v${VERSION}.zip 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/download_videos_parallel.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | DATA_DIR="../../../data/ava/videos" 6 | ANNO_DIR="../../../data/ava/annotations" 7 | 8 | if [[ ! -d "${DATA_DIR}" ]]; then 9 | echo "${DATA_DIR} does not exist. Creating"; 10 | mkdir -p ${DATA_DIR} 11 | fi 12 | 13 | wget https://s3.amazonaws.com/ava-dataset/annotations/ava_file_names_trainval_v2.1.txt -P ${ANNO_DIR} 14 | 15 | python download_videos_parallel.py ${ANNO_DIR}/ava_file_names_trainval_v2.1.txt ${DATA_DIR} 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/ava/videos_15min/ ../../data/ava/rawframes/ --task both --level 1 --flow-type tvl1 --mixed-ext 5 | echo "Raw frames (RGB and Flow) Generated" 6 | cd ava/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/ava/videos_15min/ ../../data/ava/rawframes/ --task rgb --level 1 --mixed-ext 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd ava/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/ava/videos_15min/ ../../data/ava/rawframes/ --task rgb --level 1 --use-opencv --mixed-ext 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd ava/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ava/fetch_ava_proposals.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | 5 | DATA_DIR="../../../data/ava/" 6 | 7 | wget https://download.openmmlab.com/mmaction/dataset/ava/ava_dense_proposals_train.FAIR.recall_93.9.pkl -P ${DATA_DIR} 8 | wget https://download.openmmlab.com/mmaction/dataset/ava/ava_dense_proposals_val.FAIR.recall_93.9.pkl -P ${DATA_DIR} 9 | wget https://download.openmmlab.com/mmaction/dataset/ava/ava_dense_proposals_test.FAIR.recall_93.9.pkl -P ${DATA_DIR} 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/gym/download_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # set up environment 4 | conda env create -f environment.yml 5 | source activate gym 6 | pip install --upgrade youtube-dl 7 | 8 | DATA_DIR="../../../data/gym" 9 | ANNO_DIR="../../../data/gym/annotations" 10 | python download_ytdl.py ${ANNO_DIR}/annotation.json ${DATA_DIR}/videos 11 | 12 | source deactivate gym 13 | conda remove -n gym --all 14 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/gym/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/gym/subactions/ ../../data/gym/subaction_frames/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-short 256 5 | echo "Raw frames (RGB and tv-l1) Generated" 6 | 7 | cd gym/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/hmdb51/videos/ ../../data/hmdb51/rawframes/ --task both --level 2 --flow-type tvl1 5 | echo "Raw frames (RGB and Flow) Generated" 6 | cd hmdb51/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/hmdb51/videos/ ../../data/hmdb51/rawframes/ --task rgb --level 2 --ext avi 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd hmdb51/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/hmdb51/videos/ ../../data/hmdb51/rawframes/ --task rgb --level 2 --ext avi --use-opencv 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd hmdb51/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | 5 | PYTHONPATH=. python tools/data/build_file_list.py hmdb51 data/hmdb51/rawframes/ --level 2 --format rawframes --shuffle 6 | echo "Filelist for rawframes generated." 7 | 8 | cd tools/data/hmdb51/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hmdb51/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | 5 | PYTHONPATH=. python tools/data/build_file_list.py hmdb51 data/hmdb51/videos/ --level 2 --format videos --shuffle 6 | echo "Filelist for videos generated." 7 | 8 | cd tools/data/hmdb51/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/download_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # set up environment 4 | conda env create -f environment.yml 5 | source activate hvu 6 | pip install --upgrade youtube-dl 7 | 8 | DATA_DIR="../../../data/hvu" 9 | ANNO_DIR="../../../data/hvu/annotations" 10 | python download.py ${ANNO_DIR}/hvu_train.csv ${DATA_DIR}/videos_train 11 | python download.py ${ANNO_DIR}/hvu_val.csv ${DATA_DIR}/videos_val 12 | 13 | source deactivate hvu 14 | conda remove -n hvu --all 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/hvu/videos_train/ ../../data/hvu/rawframes_train/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-short 256 5 | echo "Raw frames (RGB and tv-l1) Generated for train set" 6 | 7 | python build_rawframes.py ../../data/hvu/videos_val/ ../../data/hvu/rawframes_val/ --level 1 --flow-type tvl1 --ext mp4 --task both --new-short 256 8 | echo "Raw frames (RGB and tv-l1) Generated for val set" 9 | 10 | cd hvu/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | # to generate file list of frames 2 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_train.csv --src_dir ../../../data/hvu/rawframes_train \ 3 | --output ../../../data/hvu/hvu_train.json --mode frames 4 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_val.csv --src_dir ../../../data/hvu/rawframes_val \ 5 | --output ../../../data/hvu/hvu_val.json --mode frames 6 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | # to generate file lists of videos 2 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_train.csv --src_dir ../../../data/hvu/videos_train \ 3 | --output ../../../data/hvu/hvu_train_video.json --mode videos 4 | python generate_file_list.py --input_csv ../../../data/hvu/annotations/hvu_val.csv --src_dir ../../../data/hvu/videos_val \ 5 | --output ../../../data/hvu/hvu_val_video.json --mode videos 6 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/hvu/parse_tag_list.py: -------------------------------------------------------------------------------- 1 | import mmcv 2 | 3 | tag_list = '../../../data/hvu/annotations/hvu_categories.csv' 4 | 5 | lines = open(tag_list).readlines() 6 | lines = [x.strip().split(',') for x in lines[1:]] 7 | tag_categories = {} 8 | for line in lines: 9 | tag, category = line 10 | tag_categories.setdefault(category, []).append(tag) 11 | 12 | for k in tag_categories: 13 | tag_categories[k].sort() 14 | 15 | mmcv.dump(tag_categories, 'hvu_tags.json') 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/encode_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_videos.py ../../data/jester/rawframes/ ../../data/jester/videos/ --fps 12 --level 1 --start-idx 1 --filename-tmpl '%05d' 5 | echo "Encode videos" 6 | 7 | cd jester/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/extract_flow.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/jester/rawframes/ ../../data/jester/rawframes/ --task flow --level 1 --flow-type tvl1 --input-frames 5 | echo "Flow (tv-l1) Generated" 6 | cd jester/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset train --format rawframes --shuffle 5 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset val --format rawframes --shuffle 6 | echo "Filelist for rawframes generated." 7 | 8 | cd tools/data/jester/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/jester/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle 5 | PYTHONPATH=. python tools/data/build_file_list.py jester data/jester/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle 6 | echo "Filelist for videos generated." 7 | 8 | cd tools/data/jester/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/kinetics/subset_list.txt: -------------------------------------------------------------------------------- 1 | canoeing or kayaking 2 | climbing a rope 3 | driving car 4 | golf driving 5 | opening bottle 6 | playing piano 7 | playing volleyball 8 | shooting goal (soccer) 9 | surfing water 10 | writing -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/check_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DATASET=$1 4 | if [ "$DATASET" == "mimetics10" ] || [ "$DATASET" == "mimetics" ]; then 5 | echo "We are processing $DATASET" 6 | else 7 | echo "Bad Argument, we only support mimetics10 or mimetics" 8 | exit 0 9 | fi 10 | 11 | pwd_dir=$pwd 12 | 13 | cd ../../../ 14 | PYTHONPATH=. python tools/data/data_check.py data/${DATASET}/videos data/${DATASET}/${DATASET}_test_list_videos.txt test 15 | echo "Test filelist for video passed checking." 16 | 17 | cd ${pwd_dir} 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DATASET='mimetics' 4 | DATA_DIR="../../../data/${DATASET}/annotations" 5 | 6 | if [[ ! -d "${DATA_DIR}" ]]; then 7 | echo "${DATA_DIR} does not exist. Creating"; 8 | mkdir -p ${DATA_DIR} 9 | fi 10 | 11 | wget https://europe.naverlabs.com/wp-content/uploads/2019/12/Mimetics_release_v1.0.zip 12 | 13 | unzip Mimetics_release_v1.0.zip -d ${DATA_DIR}/ 14 | 15 | rm Mimetics_release_v1.0.zip 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/download_videos_subset.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # set up environment 4 | conda env create -f ../kinetics/environment.yml 5 | source activate kinetics 6 | pip install --upgrade youtube-dl 7 | 8 | DATA_DIR="../../../data/mimetics10" 9 | ANNO_DIR="../../../data/mimetics/annotations" 10 | SUBSET="../kinetics/subset_list.txt" 11 | python ../kinetics/download_subset.py ${ANNO_DIR}/mimetics_v1.0.csv ${DATA_DIR}/videos --subset_file ${SUBSET} -t /ssd/data/tmp/mimetics10 -n 1 12 | 13 | source deactivate kinetics 14 | # conda remove -n kinetics --all 15 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mimetics/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DATASET=$1 4 | if [ "$DATASET" == "mimetics10" ] || [ "$DATASET" == "mimetics" ]; then 5 | echo "We are processing $DATASET" 6 | else 7 | echo "Bad Argument, we only support mimetics10, mimetics" 8 | exit 0 9 | fi 10 | 11 | pwd_dir=$pwd 12 | cd ../../../ 13 | 14 | PYTHONPATH=. python tools/data/mimetics/build_file_list.py ${DATASET} data/${DATASET}/videos/ data/${DATASET}/${DATASET}_test_list_videos.txt 15 | echo "test filelist for video generated." 16 | 17 | cd ${pwd_dir} -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/mit/videos/training ../../data/mit/rawframes/training/ --level 2 --flow-type tvl1 --ext mp4 --task both 5 | echo "Raw frames (RGB and tv-l1) Generated for train set" 6 | 7 | python build_rawframes.py ../../data/mit/vides/validation/ ../../data/mit/rawframes/validation/ --level 2 --flow-type tvl1 --ext mp4 --task both 8 | echo "Raw frames (RGB and tv-l1) Generated for val set" 9 | 10 | cd mit/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/mit/videos/training ../../data/mit/rawframes/training/ --level 2 --ext mp4 --task rgb 5 | echo "Raw frames (RGB only) generated for train set" 6 | 7 | python build_rawframes.py ../../data/mit/videos/validation ../../data/mit/rawframes/validation/ --level 2 --ext mp4 --task rgb 8 | echo "Raw frames (RGB only) generated for val set" 9 | 10 | cd mit/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/mit/videos/training ../../data/mit/rawframes/training/ --level 2 --ext mp4 --task rgb --use-opencv 5 | echo "Raw frames (RGB only) generated for train set" 6 | 7 | python build_rawframes.py ../../data/mit/videos/validation ../../data/mit/rawframes/validation/ --level 2 --ext mp4 --task rgb --use-opencv 8 | echo "Raw frames (RGB only) generated for val set" 9 | 10 | cd mit/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/rawframes/training/ --level 2 --format rawframes --num-split 1 --subset train --shuffle 5 | echo "Train filelist for rawframes generated." 6 | 7 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/rawframes/validation/ --level 2 --format rawframes --num-split 1 --subset val --shuffle 8 | echo "Val filelist for rawframes generated." 9 | cd tools/data/mit/ 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mit/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/videos/training/ --level 2 --format videos --num-split 1 --subset train --shuffle 5 | echo "Train filelist for videos generated." 6 | 7 | PYTHONPATH=. python tools/data/build_file_list.py mit data/mit/videos/validation/ --level 2 --format videos --num-split 1 --subset val --shuffle 8 | echo "Val filelist for videos generated." 9 | cd tools/data/mit/ 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/download_data.sh: -------------------------------------------------------------------------------- 1 | DATA_DIR="../../../data/mmit/" 2 | 3 | if [[ ! -d "${DATA_DIR}" ]]; then 4 | echo "${DATA_DIR} does not exist. Creating"; 5 | mkdir -p ${DATA_DIR} 6 | fi 7 | 8 | cd ${DATA_DIR} 9 | 10 | wget -c https://www.dropbox.com/s/sz3yd1o0gf09amh/Multi_Moments_in_Time.zip?dl=0 11 | 12 | unzip Multi_Moments_in_Time_Raw.zip 13 | rm Multi_Moments_in_Time.zip 14 | 15 | if [ ! -d "./annotations" ]; then 16 | mkdir ./annotations 17 | fi 18 | 19 | mv *.txt annotations && mv *.csv annotations 20 | 21 | cd - 22 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/mmit/videos/ ../../../data/mmit/rawframes/ --task both --level 2 --flow-type tvl1 --ext mp4 5 | echo "Raw frames (RGB and Flow) Generated" 6 | cd mmit/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/mmit/videos/ ../../data/mmit/rawframes/ --task rgb --level 2 --ext mp4 5 | 6 | echo "Genearte raw frames (RGB only)" 7 | 8 | cd mmit/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/mmit/videos/ ../../data/mmit/rawframes/ --task rgb --level 2 --ext mp4 --use-opencv 5 | 6 | echo "Genearte raw frames (RGB only)" 7 | 8 | cd mmit/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/rawframes/ --level 2 --format rawframes --num-split 1 --subset train --shuffle 5 | echo "Train filelist for rawframes generated." 6 | 7 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/rawframes/ --level 2 --format rawframes --num-split 1 --subset val --shuffle 8 | echo "Val filelist for rawframes generated." 9 | cd tools/data/mmit/ 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/mmit/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/videos/ --level 2 --format videos --num-split 1 --subset train --shuffle 5 | echo "Train filelist for videos generated." 6 | 7 | PYTHONPATH=. python tools/data/build_file_list.py mmit data/mmit/videos/ --level 2 --format videos --num-split 1 --subset val --shuffle 8 | echo "Val filelist for videos generated." 9 | cd tools/data/mmit/ 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/encode_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_videos.py ../../data/sthv1/rawframes/ ../../data/sthv1/videos/ --fps 12 --level 1 --start-idx 1 --filename-tmpl '%05d' 5 | echo "Encode videos" 6 | 7 | cd sthv1/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/extract_flow.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/sthv1/rawframes/ ../../data/sthv1/rawframes/ --task flow --level 1 --flow-type tvl1 --input-frames 5 | echo "Flow (tv-l1) Generated" 6 | cd sthv1/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset train --format rawframes --shuffle 5 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/rawframes/ --rgb-prefix '0' --num-split 1 --level 1 --subset val --format rawframes --shuffle 6 | echo "Filelist for rawframes generated." 7 | 8 | cd tools/data/sthv1/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv1/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle 5 | PYTHONPATH=. python tools/data/build_file_list.py sthv1 data/sthv1/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle 6 | echo "Filelist for videos generated." 7 | 8 | cd tools/data/sthv1/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/sthv2/videos/ ../../data/sthv2/rawframes/ --task both --level 1 --flow-type tvl1 --ext webm 5 | echo "Raw frames (RGB and tv-l1) Generated" 6 | cd sthv2/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/sthv2/videos/ ../../data/sthv2/rawframes/ --task rgb --level 1 --ext webm 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd sthv2/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/sthv2/videos/ ../../data/sthv2/rawframes/ --task rgb --level 1 --ext webm --use-opencv 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd sthv2/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/rawframes/ --num-split 1 --level 1 --subset train --format rawframes --shuffle 5 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/rawframes/ --num-split 1 --level 1 --subset val --format rawframes --shuffle 6 | echo "Filelist for rawframes generated." 7 | 8 | cd tools/data/sthv2/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/sthv2/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/videos/ --num-split 1 --level 1 --subset train --format videos --shuffle 5 | PYTHONPATH=. python tools/data/build_file_list.py sthv2 data/sthv2/videos/ --num-split 1 --level 1 --subset val --format videos --shuffle 6 | echo "Filelist for videos generated." 7 | 8 | cd tools/data/sthv2/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/thumos14/videos/val/ ../../data/thumos14/rawframes/val/ --level 1 --flow-type tvl1 --ext mp4 --task both 5 | echo "Raw frames (RGB and tv-l1) Generated for val set" 6 | 7 | python build_rawframes.py ../../data/thumos14/videos/test/ ../../data/thumos14/rawframes/test/ --level 1 --flow-type tvl1 --ext mp4 --task both 8 | echo "Raw frames (RGB and tv-l1) Generated for test set" 9 | 10 | cd thumos14/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/thumos14/videos/val/ ../../data/thumos14/rawframes/val/ --level 1 --ext mp4 --task rgb 5 | echo "Raw frames (RGB only) generated for val set" 6 | 7 | python build_rawframes.py ../../data/thumos14/videos/test/ ../../data/thumos14/rawframes/test/ --level 1 --ext mp4 --task rgb 8 | echo "Raw frames (RGB only) generated for test set" 9 | 10 | cd thumos14/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/thumos14/videos/val/ ../../data/thumos14/rawframes/val/ --level 1 --ext mp4 --task rgb --use-opencv 5 | echo "Raw frames (RGB only) generated for val set" 6 | 7 | python build_rawframes.py ../../data/thumos14/videos/test/ ../../data/thumos14/rawframes/test/ --level 1 --ext mp4 --task rgb --use-opencv 8 | echo "Raw frames (RGB only) generated for test set" 9 | 10 | cd thumos14/ 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/thumos14/fetch_tag_proposals.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PROP_DIR="../../../data/thumos14/proposals" 4 | 5 | if [[ ! -d "${PROP_DIR}" ]]; then 6 | echo "${PROP_DIR} does not exist. Creating"; 7 | mkdir -p ${PROP_DIR} 8 | fi 9 | 10 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_val_normalized_proposal_list.txt -P ${PROP_DIR} 11 | wget https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/filelist/thumos14_tag_test_normalized_proposal_list.txt -P ${PROP_DIR} 12 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/download_annotations.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DATA_DIR="../../../data/ucf101/annotations" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | wget https://www.crcv.ucf.edu/wp-content/uploads/2019/03/UCF101TrainTestSplits-RecognitionTask.zip --no-check-certificate 11 | 12 | unzip -j UCF101TrainTestSplits-RecognitionTask.zip -d ${DATA_DIR}/ 13 | rm UCF101TrainTestSplits-RecognitionTask.zip 14 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/download_videos.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | DATA_DIR="../../../data/ucf101/" 4 | 5 | if [[ ! -d "${DATA_DIR}" ]]; then 6 | echo "${DATA_DIR} does not exist. Creating"; 7 | mkdir -p ${DATA_DIR} 8 | fi 9 | 10 | cd ${DATA_DIR} 11 | 12 | wget https://www.crcv.ucf.edu/datasets/human-actions/ucf101/UCF101.rar --no-check-certificate 13 | unrar x UCF101.rar 14 | mv ./UCF-101 ./videos 15 | 16 | cd "../../tools/data/ucf101" 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/extract_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/ucf101/videos/ ../../data/ucf101/rawframes/ --task both --level 2 --flow-type tvl1 5 | echo "Raw frames (RGB and Flow) Generated" 6 | cd ucf101/ 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/extract_rgb_frames.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/ucf101/videos/ ../../data/ucf101/rawframes/ --task rgb --level 2 --ext avi 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd ucf101/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/extract_rgb_frames_opencv.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../ 4 | python build_rawframes.py ../../data/ucf101/videos/ ../../data/ucf101/rawframes/ --task rgb --level 2 --ext avi --use-opencv 5 | echo "Genearte raw frames (RGB only)" 6 | 7 | cd ucf101/ 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/generate_rawframes_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | 5 | PYTHONPATH=. python tools/data/build_file_list.py ucf101 data/ucf101/rawframes/ --level 2 --format rawframes --shuffle 6 | echo "Filelist for rawframes generated." 7 | 8 | cd tools/data/ucf101/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/data/ucf101/generate_videos_filelist.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cd ../../../ 4 | 5 | PYTHONPATH=. python tools/data/build_file_list.py ucf101 data/ucf101/videos/ --level 2 --format videos --shuffle 6 | echo "Filelist for videos generated." 7 | 8 | cd tools/data/ucf101/ 9 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/dist_test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | CHECKPOINT=$2 5 | GPUS=$3 6 | PORT=${PORT:-29500} 7 | 8 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 9 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 10 | $(dirname "$0")/test.py $CONFIG $CHECKPOINT --launcher pytorch ${@:4} 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Open-Set-Action-Recognition/tools/dist_train.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | CONFIG=$1 4 | GPUS=$2 5 | PORT=${PORT:-29500} 6 | 7 | PYTHONPATH="$(dirname $0)/..":$PYTHONPATH \ 8 | python -m torch.distributed.launch --nproc_per_node=$GPUS --master_port=$PORT \ 9 | $(dirname "$0")/train.py $CONFIG --launcher pytorch ${@:3} 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/.DS_Store -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/config/__init__.py: -------------------------------------------------------------------------------- 1 | from .defaults import _C as cfg 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/csrc/cpu/vision.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import make_data_loader 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .concat_dataset import ConcatDataset 2 | from .ava import AVAVideoDataset 3 | 4 | __all__ = ["ConcatDataset", "AVAVideoDataset"] -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/README.md: -------------------------------------------------------------------------------- 1 | The evaluation code of AVA is modified from [https://github.com/activitynet/ActivityNet](https://github.com/activitynet/ActivityNet). -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from .ava_eval import do_ava_evaluation 3 | 4 | 5 | def ava_evaluation(dataset, predictions, output_folder, **_): 6 | logger = logging.getLogger("alphaction.inference") 7 | logger.info("performing ava evaluation.") 8 | return do_ava_evaluation( 9 | dataset=dataset, 10 | predictions=predictions, 11 | output_folder=output_folder, 12 | logger=logger, 13 | ) 14 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/pascal_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/datasets/evaluation/ava/pascal_evaluation/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/samplers/__init__.py: -------------------------------------------------------------------------------- 1 | from .distributed import DistributedSampler 2 | from .grouped_batch_sampler import GroupedBatchSampler 3 | from .iteration_based_batch_sampler import IterationBasedBatchSampler 4 | 5 | __all__ = ["DistributedSampler", "GroupedBatchSampler", "IterationBasedBatchSampler"] 6 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/dataset/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_transforms, build_object_transforms -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/engine/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/engine/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/backbone/__init__.py: -------------------------------------------------------------------------------- 1 | from .backbone import build_backbone -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/detector/__init__.py: -------------------------------------------------------------------------------- 1 | from .action_detector import build_detection_model -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/registry.py: -------------------------------------------------------------------------------- 1 | from alphaction.utils.registry import Registry 2 | 3 | BACKBONES = Registry() 4 | ROI_ACTION_FEATURE_EXTRACTORS = Registry() 5 | ROI_ACTION_PREDICTORS = Registry() 6 | INTERACTION_AGGREGATION_STRUCTURES = Registry() -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/action_head/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/modeling/roi_heads/action_head/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/solver/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import make_optimizer 2 | from .build import make_lr_scheduler 3 | from .lr_scheduler import WarmupMultiStepLR 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/structures/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/structures/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/random_seed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import numpy as np 4 | 5 | def set_seed(seed, rank, world_size): 6 | rng = random.Random(seed) 7 | seed_per_rank = [rng.randint(0, 2**32-1) for _ in range(world_size)] 8 | cur_seed = seed_per_rank[rank] 9 | random.seed(cur_seed) 10 | torch.manual_seed(cur_seed) 11 | torch.cuda.manual_seed(cur_seed) 12 | np.random.seed(cur_seed) -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/alphaction/utils/video_decode.py: -------------------------------------------------------------------------------- 1 | import av 2 | import io 3 | import decord 4 | def av_decode_video(video_path): 5 | with av.open(video_path) as container: 6 | frames = [] 7 | for frame in container.decode(video=0): 8 | frames.append(frame.to_rgb().to_ndarray()) 9 | return frames -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/data/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/data/movie_size.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/data/movie_size.npy -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/figs/videomae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/figs/videomae.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/start.sh: -------------------------------------------------------------------------------- 1 | sbatch -N 1 --gres=gpu:1 --qos=gpugpu akeval.sh -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Spatial-Temporal-Action-Localization/vis.sh: -------------------------------------------------------------------------------- 1 | # Set the path to save video 2 | OUTPUT_DIR='TODO/VideoMAE/demo/vis_k400_1_0.9' 3 | # path to video for visualization 4 | VIDEO_PATH='TODO/TODO.mp4' 5 | # path to pretrain model 6 | MODEL_PATH='TODO/videomae_pretrain_base_patch16_224_frame_16x4_tube_mask_ratio_0.9_e1600/checkpoint-1599.pth' 7 | 8 | python3 run_videomae_vis.py \ 9 | --mask_ratio 0.9 \ 10 | --mask_type tube \ 11 | --decoder_depth 4 \ 12 | --model pretrain_videomae_base_patch16_224 \ 13 | ${VIDEO_PATH} ${OUTPUT_DIR} ${MODEL_PATH} -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Requirements 2 | 3 | - Linux 4 | - Python 3.5+ 5 | - PyTorch 1.10+ 6 | - TensorBoard 7 | - CUDA 11.0+ 8 | - GCC 4.9+ 9 | - NumPy 1.11+ 10 | - PyYaml 11 | - Pandas 12 | - h5py 13 | - joblib 14 | 15 | # Compilation 16 | 17 | Part of NMS is implemented in C++. The code can be compiled by 18 | 19 | ```shell 20 | cd ./libs/utils 21 | python setup.py install --user 22 | cd ../.. 23 | ``` 24 | 25 | The code should be recompiled every time you update PyTorch. 26 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/anet_run.sh: -------------------------------------------------------------------------------- 1 | python -u ./train_eval.py ./configs/anet.yaml --output anet_mae_h 2>&1 | tee anet_mae_h.log -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/fa_run.sh: -------------------------------------------------------------------------------- 1 | python -u ./train_eval.py ./configs/fineaction.yaml --output fa_mae_h 2>&1 | tee fa_mae_h.log -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/hacs_run.sh: -------------------------------------------------------------------------------- 1 | python -u ./train_eval.py ./configs/hacs.yaml --output hacs_mae_h 2>&1 | tee hacs_mae_h.log -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/libs/core/__init__.py: -------------------------------------------------------------------------------- 1 | from .config import load_default_config, load_config 2 | 3 | __all__ = ['load_default_config', 'load_config'] 4 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/libs/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_utils import worker_init_reset_seed, truncate_feats 2 | from .datasets import make_dataset, make_data_loader 3 | from . import thumos14, anet # other datasets go here 4 | 5 | __all__ = ['worker_init_reset_seed', 'truncate_feats', 6 | 'make_dataset', 'make_data_loader'] 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/libs/utils/setup.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | from setuptools import setup, Extension 4 | from torch.utils.cpp_extension import BuildExtension, CppExtension 5 | 6 | 7 | setup( 8 | name='nms_1d_cpu', 9 | ext_modules=[ 10 | CppExtension( 11 | name = 'nms_1d_cpu', 12 | sources = ['./csrc/nms_cpu.cpp'], 13 | extra_compile_args=['-fopenmp'] 14 | ) 15 | ], 16 | cmdclass={ 17 | 'build_ext': BuildExtension 18 | } 19 | ) 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Temporal-Action-Localization/th14_run.sh: -------------------------------------------------------------------------------- 1 | python -u ./train_eval.py ./configs/thumos.yaml --output th14_mae_h 2>&1 | tee th14_mae_h.log -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | from .evl_utils import * -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- 1 | ../bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/dog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_evl/dog.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- 1 | ../bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc/evl_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .evl_module import TransformerDecoder 2 | from .evl_module_uniformer_diff_conv_balance import TransformerDecoder_uniformer_diff_conv_balance 3 | from .clip_vit import vit_b32, vit_b16, vit_l14, vit_l14_336 4 | from .clip_vit_2plus1d import vit_2plus1d_b32, vit_2plus1d_b16, vit_2plus1d_l14, vit_2plus1d_l14_336 5 | from .clip_vit_2plus1d_dw_bias import vit_2plus1d_dw_bias_b32, vit_2plus1d_dw_bias_b16, vit_2plus1d_dw_bias_l14, vit_2plus1d_dw_bias_l14_336 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc2/evl_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .evl_module import TransformerDecoder 2 | from .evl_module_uniformer_diff_conv_balance import TransformerDecoder_uniformer_diff_conv_balance 3 | from .clip_vit import vit_b32, vit_b16, vit_l14, vit_l14_336 4 | from .clip_vit_2plus1d import vit_2plus1d_b32, vit_2plus1d_b16, vit_2plus1d_l14, vit_2plus1d_l14_336 5 | from .clip_vit_2plus1d_dw_bias import vit_2plus1d_dw_bias_b32, vit_2plus1d_dw_bias_b16, vit_2plus1d_dw_bias_l14, vit_2plus1d_dw_bias_l14_336 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | from .evl_utils import * -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/clip_kc_new/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Video-Text-Retrieval/modules/cross-base/cross_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "attention_probs_dropout_prob": 0.1, 3 | "hidden_act": "gelu", 4 | "hidden_dropout_prob": 0.1, 5 | "hidden_size": 512, 6 | "initializer_range": 0.02, 7 | "intermediate_size": 2048, 8 | "max_position_embeddings": 128, 9 | "num_attention_heads": 8, 10 | "num_hidden_layers": 4, 11 | "vocab_size": 512 12 | } -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/eval_base.bash: -------------------------------------------------------------------------------- 1 | # srun -p GVT -n1 -c5 --gres=gpu:1 -x SH-IDC1-10-198-8-[61,62] bash iter_eval.bash 2 | 3 | ngpus=1 4 | flag="--exp_name eval_base 5 | --run-type eval 6 | --exp-config iter_train.yaml 7 | 8 | SIMULATOR_GPU_IDS [0] 9 | TORCH_GPU_IDS [0] 10 | GPU_NUMBERS $ngpus 11 | NUM_ENVIRONMENTS 11 12 | 13 | EVAL.SAVE_RESULTS False 14 | EVAL.CKPT_PATH_DIR pretrained/pretrained_models/base_ckpt.iter6900.pth 15 | TASK_CONFIG.SIMULATOR.HABITAT_SIM_V0.ALLOW_SLIDING True 16 | " 17 | python run.py $flag 18 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/habitat_extensions/__init__.py: -------------------------------------------------------------------------------- 1 | from habitat_extensions import measures, obs_transformers, sensors, nav 2 | from habitat_extensions.config.default import get_extended_config 3 | from habitat_extensions.task import VLNCEDatasetV1 4 | from habitat_extensions.habitat_simulator import Simulator 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/habitat_extensions/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/habitat_extensions/config/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==1.8.0 2 | torchvision==0.9.0 3 | boto3 4 | pytorch_transformers==1.2.0 5 | timm==0.4.9 6 | transformers==4.12.3 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/run/vlnbert_r2r.bash: -------------------------------------------------------------------------------- 1 | # srun -p GVT -n1 -c10 --gres=gpu:3 -x SH-IDC1-10-198-8-[61,62] bash run/vlnbert_r2r.bash 2 | 3 | flag="--exp_name r2r_vlnbert_slide1 4 | --run-type train 5 | --exp-config exp/vlnbert_r2r.yaml 6 | 7 | SIMULATOR_GPU_IDS [0,1,2] 8 | TORCH_GPU_IDS [0,1,2] 9 | GPU_NUMBERS 3 10 | 11 | IL.batch_size 20 12 | IL.lr 3.5e-5 13 | IL.schedule_ratio 0.50 14 | IL.max_traj_len 20 15 | " 16 | python -m torch.distributed.launch --nproc_per_node=3 run.py $flag -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/run/vlnbert_r2r_da_eval.bash: -------------------------------------------------------------------------------- 1 | # srun -p GVT -n1 -c5 --gres=gpu:1 -x SH-IDC1-10-198-8-[61,62,79] bash run/vlnbert_r2r_da_eval.bash 2 | 3 | p=0.5 4 | bs=32 5 | diter=$1 6 | epoch=$2 7 | ngpus=4 8 | flag="--exp_name r2r_vlnbert_da.p${p}.bs${bs}.di${diter}.ep${epoch} 9 | --run-type eval 10 | --exp-config exp/vlnbert_r2r_da.yaml 11 | 12 | SIMULATOR_GPU_IDS [0] 13 | TORCH_GPU_IDS [0] 14 | GPU_NUMBERS $ngpus 15 | NUM_ENVIRONMENTS 11 16 | 17 | IL.lr 3.5e-5 18 | IL.batch_size 11 19 | IL.max_traj_len 20 20 | " 21 | python run.py $flag -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/run/vlnbert_r2r_eval.bash: -------------------------------------------------------------------------------- 1 | # eval 2 | # srun -p GVT -n1 -c5 --gres=gpu:1 -x SH-IDC1-10-198-8-[61,62] bash run/vlnbert_r2r_eval.bash 3 | 4 | flag="--exp_name r2r_vlnbert_slide1 5 | --run-type eval 6 | --exp-config exp/vlnbert_r2r.yaml 7 | 8 | SIMULATOR_GPU_IDS [0] 9 | TORCH_GPU_IDS [0] 10 | TORCH_GPU_ID 0 11 | GPU_NUMBERS 1 12 | 13 | IL.batch_size 11 14 | IL.lr 3.5e-5 15 | IL.schedule_ratio 0.50 16 | IL.max_traj_len 20 17 | " 18 | python run.py $flag -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/__init__.py: -------------------------------------------------------------------------------- 1 | from vlnce_baselines import trainer_HAMT 2 | from vlnce_baselines.common import environments 3 | 4 | from vlnce_baselines.models import ( 5 | Policy_ViewSelection_CMA, 6 | Policy_ViewSelection_HAMT, 7 | ) 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/config/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/config/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/config/r2r_configs/test_set_inference.yaml: -------------------------------------------------------------------------------- 1 | BASE_TASK_CONFIG_PATH: habitat_extensions/config/vlnce_task.yaml 2 | SIMULATOR_GPU_ID: 0 3 | TORCH_GPU_ID: 0 4 | NUM_PROCESSES: 1 5 | 6 | INFERENCE: 7 | SPLIT: test 8 | USE_CKPT_CONFIG: False 9 | SAMPLE: False 10 | CKPT_PATH: data/checkpoints/CMA_PM_DA_Aug.pth 11 | PREDICTIONS_FILE: predictions.json 12 | 13 | MODEL: 14 | policy_name: CMAPolicy 15 | 16 | INSTRUCTION_ENCODER: 17 | bidirectional: True 18 | 19 | CMA: 20 | use: True 21 | 22 | PROGRESS_MONITOR: 23 | use: True 24 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/Visual-Language-Navigation/vlnce_baselines/models/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/activitynet_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import ActivityNetDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class ActivityNetDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return ActivityNetDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "activitynet" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/cc12m_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import CC12MDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class CC12MDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return CC12MDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "cc3m" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/cc3m_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import CC3MDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class CC3MDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return CC3MDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "cc3m" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/conceptual_caption_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import ConceptualCaptionDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class ConceptualCaptionDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return ConceptualCaptionDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "gcc" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/laion400m_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import LAION400MDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class LAION400MDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return LAION400MDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "laion400m" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/mix100m_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import MIX100MDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class MIX100MDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return MIX100MDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "mix100m" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/nlvr2_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import NLVR2Dataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class NLVR2DataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return NLVR2Dataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "nlvr2" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/sbu_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import SBUCaptionDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class SBUCaptionDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return SBUCaptionDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "sbu" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/vcr_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import VCRDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class VCRDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return VCRDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "vcr" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/vg_caption_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import VisualGenomeCaptionDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class VisualGenomeCaptionDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return VisualGenomeCaptionDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "vg" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/image/yfcc15m_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import YFCC15MDataset 2 | from .datamodule_base import BaseDataModule 3 | 4 | 5 | class YFCC15MDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return YFCC15MDataset 12 | 13 | @property 14 | def dataset_name(self): 15 | return "yfcc15m" 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/didemo_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import DIDEMODataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class DIDEMODataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return DIDEMODataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return DIDEMODataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "didemo" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/ego4d_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import Ego4DDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class Ego4DDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return Ego4DDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return Ego4DDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "ego4d" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/hmdb51_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import HMDB51Dataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class HMDB51DataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return HMDB51Dataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return HMDB51Dataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "hmdb51" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/howto100m_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import HT100MDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class HT100MDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return HT100MDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return HT100MDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "howto100m" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/k400_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import K400Dataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class K400DataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return K400Dataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return K400Dataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "k400" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/k400_video_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import K400VideoDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class K400VideoDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return K400VideoDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return K400VideoDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "k400_video" -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/lsmdc_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import LSMDCDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class LSMDCDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return LSMDCDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return LSMDCDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "lsmdc" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/msrvtt_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import MSRVTTDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class MSRVTTDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return MSRVTTDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return MSRVTTDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "msrvtt" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/msvd_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import MSVDDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class MSVDDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return MSVDDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return MSVDDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "msvd" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/tgif_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import TGIFDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class TGIFDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return TGIFDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return TGIFDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "tgif" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/tgifqa_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import TGIFQADataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class TGIFQADataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return TGIFQADataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return TGIFQADataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "tgifqa" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/tvqa_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import TVQADataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class TVQADataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return TVQADataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return TVQADataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "tvqa" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/ucf101_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import UCF101Dataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class UCF101DataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return UCF101Dataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return UCF101Dataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "ucf101" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/webvid_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import WEBVIDDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class WEBVIDDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return WEBVIDDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return WEBVIDDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "webvid" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datamodules/video/youtube_datamodule.py: -------------------------------------------------------------------------------- 1 | from CoTrain.datasets import YOUTUBEDataset 2 | from CoTrain.datamodules.image.datamodule_base import BaseDataModule 3 | 4 | 5 | class YOUTUBEDataModule(BaseDataModule): 6 | def __init__(self, *args, **kwargs): 7 | super().__init__(*args, **kwargs) 8 | 9 | @property 10 | def dataset_cls(self): 11 | return YOUTUBEDataset 12 | 13 | @property 14 | def dataset_cls_no_false(self): 15 | return YOUTUBEDataset 16 | 17 | @property 18 | def dataset_name(self): 19 | return "youtube" 20 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/image/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/image/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/video/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/datasets/video/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/gadgets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/gadgets/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/__init__.py: -------------------------------------------------------------------------------- 1 | from .internvideo import * -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/clip_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/InternVideo/clip_utils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from .evl_module import TransformerDecoder 2 | from .clip_vit_only_global import vit_only_global_b32, vit_only_global_b16, vit_only_global_l14, vit_only_global_l14_336 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/__init__.py: -------------------------------------------------------------------------------- 1 | # from CoTrain.modules.cotrain_dino_module_v2 import CoTrainTransformerSS 2 | from CoTrain.modules.cotrain_module import CoTrainTransformerSS 3 | # from CoTrain.modules.cotrain_dino_module_v3 import CoTrainTransformerSS 4 | from CoTrain.modules.clip_module import CLIP -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/modules/forzen_param.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | # def forzen_param(model): 5 | # for name, param in model.named_parameters(): 6 | # if 'mlm_score' in name or 'vtm_score' in name or 'mpp_score' in name: 7 | # param.requires_grad = True 8 | # else: 9 | # param.requires_grad = False 10 | # return True 11 | 12 | 13 | def forzen_param(model): 14 | flag = False 15 | for name, param in model.named_parameters(): 16 | if '10' in name: 17 | flag = True 18 | param.requires_grad = flag 19 | return True -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.21.6 2 | setuptools==61.2.0 3 | torch==1.9.0+cu111 4 | torchvision==0.10.0+cu111 5 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from CoTrain.transforms.image.pixelbert import ( 2 | pixelbert_transform, 3 | pixelbert_transform_randaug, 4 | open_clip_transform, 5 | ) 6 | 7 | _transforms = { 8 | "pixelbert": pixelbert_transform, 9 | "pixelbert_randaug": pixelbert_transform_randaug, 10 | "open_clip": open_clip_transform, 11 | } 12 | 13 | 14 | def keys_to_transforms(keys: list, size=224, mode="train"): 15 | return [_transforms[key](size=size, mode=mode) for key in keys] 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/image/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/image/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/video/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Downstream/multi-modalities-downstream/CoTrain/transforms/video/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Media/download.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Media/download.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/__init__.py: -------------------------------------------------------------------------------- 1 | from .internvideo import * -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/clip_utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/InternVideo/clip_utils/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # from .evl_module import TransformerDecoder 2 | from .clip_vit_only_global import vit_only_global_b32, vit_only_global_b16, vit_only_global_l14, vit_only_global_l14_336 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/data/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/Multi-Modalities-Pretraining/data/demo.mp4 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/extract_clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/extract_clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/img/framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/img/framework.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/linter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | # Run this script at project root by ".linter.sh" before you commit. 4 | echo "Running isort..." 5 | isort -y -sp . 6 | 7 | echo "Running black..." 8 | black -l 80 . 9 | 10 | echo "Running flake..." 11 | flake8 . 12 | 13 | command -v arc > /dev/null && { 14 | echo "Running arc lint ..." 15 | arc lint 16 | } 17 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | from slowfast.utils.env import setup_environment 5 | 6 | setup_environment() 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/config/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/config/custom_config.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """Add custom configs and default values""" 5 | 6 | 7 | def add_custom_config(_C): 8 | # Add your own customized configs. 9 | pass 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/models/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | from .build import MODEL_REGISTRY, build_model # noqa 5 | from .custom_video_model_builder import * # noqa 6 | from .ptv_model_builder import ( 7 | PTVCSN, 8 | PTVX3D, 9 | PTVR2plus1D, 10 | PTVResNet, 11 | PTVSlowFast, 12 | ) # noqa 13 | from .video_model_builder import ResNet, SlowFast # noqa 14 | from .uniformer import Uniformer # noqa 15 | from .uniformerv2 import Uniformerv2 # noqa -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/models/custom_video_model_builder.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | 5 | """A More Flexible Video models.""" 6 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/ava_evaluation/README.md: -------------------------------------------------------------------------------- 1 | The code under this folder is from the official [ActivityNet repo](https://github.com/activitynet/ActivityNet). 2 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/ava_evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/ava_evaluation/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/utils/env.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | 4 | """Set up Environment.""" 5 | 6 | import slowfast.utils.logging as logging 7 | 8 | _ENV_SETUP_DONE = False 9 | 10 | 11 | def setup_environment(): 12 | global _ENV_SETUP_DONE 13 | if _ENV_SETUP_DONE: 14 | return 15 | _ENV_SETUP_DONE = True 16 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/UniFormerV2/slowfast/visualization/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved. 3 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/qa.py: -------------------------------------------------------------------------------- 1 | from .pretrain import * 2 | 3 | del available_corpus 4 | 5 | criterion["loss_weight"]["mlm"] = 0.0 6 | scheduler["warmup_epochs"] = 0.5 7 | 8 | max_txt_l = 32 9 | batch_size = 32 10 | num_frames = 12 11 | 12 | optimizer["lr"] = 1e-5 13 | log_freq = 100 14 | 15 | # =========additional args for VQA ============ 16 | eos = "[SEP]" 17 | max_q_len = 25 18 | max_a_len = 5 19 | # =========end ================================ 20 | 21 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/ret_msrvtt_9k.py: -------------------------------------------------------------------------------- 1 | from .ret_msrvtt import * 2 | 3 | train_file = [ 4 | f"{anno_root_downstream}/msrvtt_ret_train9k.json", 5 | f"{data_root}/msrvtt_2fps_224", 6 | "video", 7 | ] 8 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/ret_ssv2_label.py: -------------------------------------------------------------------------------- 1 | from .ret_msrvtt import * 2 | 3 | train_file = [ 4 | f"{anno_root_downstream}/ssv2_ret_label_train.json", 5 | f"{data_root}/ssv2", 6 | "video", 7 | ] 8 | test_file = dict( 9 | val=[ 10 | f"{anno_root_downstream}/ssv2_ret_label_val_small.json", 11 | f"{data_root}/ssv2", 12 | "video", 13 | ], 14 | ) 15 | 16 | test_types = ["val"] 17 | stop_key = None # used to choose the best ckpt. If None, save the last. 18 | 19 | has_multi_vision_gt = True 20 | 21 | scheduler["epochs"] = 10 22 | optimizer["lr"] = 1e-4 23 | 24 | max_txt_l = 25 25 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/configs/ret_ssv2_template.py: -------------------------------------------------------------------------------- 1 | from .ret_msrvtt import * 2 | 3 | train_file = [ 4 | f"{anno_root_downstream}/ssv2_ret_template_train.json", 5 | f"{data_root}/ssv2", 6 | "video", 7 | ] 8 | test_file = dict( 9 | val=[ 10 | f"{anno_root_downstream}/ssv2_ret_template_val_small.json", 11 | f"{data_root}/ssv2", 12 | "video", 13 | ], 14 | ) 15 | 16 | test_types = ["val"] 17 | stop_key = None # used to choose the best ckpt. If None, save the last. 18 | 19 | has_multi_vision_gt = True 20 | 21 | scheduler["epochs"] = 10 22 | optimizer["lr"] = 1e-4 23 | 24 | max_txt_l = 22 25 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/exp/exp_pretrain_ViCLIP/viclip_base/run.sh: -------------------------------------------------------------------------------- 1 | torchrun --rdzv_endpoint=${MASTER_NODE}:${MASTER_PORT} \ 2 | --nnodes=${NNODE} \ 3 | --nproc_per_node=${NUM_GPUS} \ 4 | --rdzv_backend=c10d \ 5 | tasks/pretrain.py \ 6 | $(dirname $0)/config.py \ 7 | wandb.enable False \ 8 | model.vision_encoder.pretrained 'CLIP-ViT-B/16' \ 9 | model.text_encoder.pretrained 'CLIP-ViT-B/16' \ 10 | output_dir ${OUTPUT_DIR} 11 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/beit/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/beit/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/bert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/bert/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/blip_toremove/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/blip_toremove/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/backbones/clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/modules/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/models/modules/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/preprocess/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | 4 | 5 | def get_video_duration(filename): 6 | 7 | result = subprocess.check_output( 8 | f'ffprobe -v quiet -show_streams -select_streams v:0 -of json "{filename}"', shell=True 9 | ).decode() 10 | fields = json.loads(result)["streams"][0] 11 | 12 | duration = float(fields["duration"]) 13 | return duration 14 | 15 | if __name__ == "__main__": 16 | import os 17 | fp = os.path.join(os.environ["SL_DATA_DIR"], "videos_images/webvid_10m_2fps_224/22920757.mp4") 18 | print(get_video_duration(fp)) 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/tests/test_cfg.py: -------------------------------------------------------------------------------- 1 | from utils.config import Config 2 | 3 | cfg = Config.get_config() 4 | 5 | cfg_text = Config.pretty_text(cfg) 6 | print(cfg_text) 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo1/Pretrain/ViCLIP/tools/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import socket 4 | 5 | 6 | def has_slurm(): 7 | """determine the system has slurm or not 8 | Returns: True if has else False. 9 | 10 | """ 11 | return shutil.which("sbatch") is not None 12 | 13 | def random_port(): 14 | """random a unused port 15 | Returns: str 16 | 17 | """ 18 | with socket.socket() as s: 19 | s.bind(("", 0)) 20 | return s.getsockname()[1] 21 | 22 | def runcmd(cmd): 23 | """run command 24 | 25 | Args: 26 | cmd (str): The command to run 27 | 28 | """ 29 | os.system(cmd) 30 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/figs/teaser-internvideo2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/figs/teaser-internvideo2.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/figs/wechatgrp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/figs/wechatgrp.png -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/demo/example1.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/demo/example1.mp4 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .internvideo2_clip import InternVideo2_CLIP 2 | from .internvideo2_stage2 import InternVideo2_Stage2 3 | # from .internvideo2_stage2_audio import InternVideo2_Stage2_audio 4 | 5 | __all__ = [ 6 | 'InternVideo2_CLIP', 7 | 'InternVideo2_Stage2', 8 | # 'InternVideo2_Stage2_audio' 9 | ] 10 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/models/backbones/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/models/backbones/beats/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/beats/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/models/backbones/bert/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/multi_modality/models/backbones/bert/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/models/backbones/internvideo2/__init__.py: -------------------------------------------------------------------------------- 1 | from .internvl_clip_vision import internvl_clip_6b 2 | from .internvideo2 import pretrain_internvideo2_1b_patch14_224, pretrain_internvideo2_6b_patch14_224 3 | from .internvideo2_clip_vision import InternVideo2 4 | from .internvideo2_clip_text import LLaMA, Tokenizer -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/preprocess/utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import subprocess 3 | 4 | 5 | def get_video_duration(filename): 6 | 7 | result = subprocess.check_output( 8 | f'ffprobe -v quiet -show_streams -select_streams v:0 -of json "{filename}"', shell=True 9 | ).decode() 10 | fields = json.loads(result)["streams"][0] 11 | 12 | duration = float(fields["duration"]) 13 | return duration 14 | 15 | if __name__ == "__main__": 16 | import os 17 | fp = os.path.join(os.environ["SL_DATA_DIR"], "videos_images/webvid_10m_2fps_224/22920757.mp4") 18 | print(get_video_duration(fp)) 19 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/tests/test_cfg.py: -------------------------------------------------------------------------------- 1 | from utils.config import Config 2 | 3 | cfg = Config.get_config() 4 | 5 | cfg_text = Config.pretty_text(cfg) 6 | print(cfg_text) 7 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/tools/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import socket 4 | 5 | 6 | def has_slurm(): 7 | """determine the system has slurm or not 8 | Returns: True if has else False. 9 | 10 | """ 11 | return shutil.which("sbatch") is not None 12 | 13 | def random_port(): 14 | """random a unused port 15 | Returns: str 16 | 17 | """ 18 | with socket.socket() as s: 19 | s.bind(("", 0)) 20 | return s.getsockname()[1] 21 | 22 | def runcmd(cmd): 23 | """run command 24 | 25 | Args: 26 | cmd (str): The command to run 27 | 28 | """ 29 | os.system(cmd) 30 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/multi_modality/torchrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | MASTER_NODE=$(scontrol show hostnames "$SLURM_JOB_NODELIST" | head -n 1) 3 | ALL_NODES=$(scontrol show hostnames "$SLURM_JOB_NODELIST") 4 | MASTER_PORT=$((10660 + $RANDOM % 10)) 5 | 6 | echo "All nodes used:" 7 | echo ${ALL_NODES} 8 | echo "Master node:" 9 | echo ${MASTER_NODE} 10 | echo "Args:" 11 | echo $@ 12 | 13 | torchrun --rdzv_endpoint=${MASTER_NODE}:10069 $@ 14 | -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/single_modality/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | from .build import build_dataset, build_pretraining_dataset, build_multi_pretraining_dataset -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/single_modality/engines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/InternVideo/InternVideo2/single_modality/engines/__init__.py -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/single_modality/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .internvl_clip_vision import internvl_clip_6b 2 | from .videomae import mae_g14_hybrid 3 | from .internvideo2 import internvideo2_1B_patch14_224, internvideo2_6B_patch14_224 4 | from .internvideo2_cat import internvideo2_cat_1B_patch14_224, internvideo2_cat_6B_patch14_224 5 | from .internvideo2_ap import internvideo2_ap_1B_patch14_224, internvideo2_ap_6B_patch14_224 6 | from .internvideo2_pretrain import pretrain_internvideo2_1B_patch14_224, pretrain_internvideo2_6B_patch14_224 -------------------------------------------------------------------------------- /third_party/InternVideo/InternVideo2/single_modality/requirements.txt: -------------------------------------------------------------------------------- 1 | apex==0.9.10dev 2 | auto_augment==1.0.0 3 | decord==0.6.0 4 | deepspeed==0.10.1 5 | einops==0.7.0 6 | flash_attn==2.0.8 7 | fvcore==0.1.5.post20221221 8 | numpy==1.24.4 9 | opencv_python==4.8.0.76 10 | pandas==2.0.3 11 | Pillow==10.0.0 12 | scipy==1.13.0 13 | skimage==0.0 14 | tensorboardX==2.6.2 15 | timm==0.5.4 16 | torch==1.13.1+cu117 17 | torchvision==0.14.1+cu117 18 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/.gitattributes: -------------------------------------------------------------------------------- 1 | *.zip filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/NOTICE: -------------------------------------------------------------------------------- 1 | Apache Relay Policy Learning 2 | Copyright Google LLC The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/.gitignore: -------------------------------------------------------------------------------- 1 | # General 2 | .DS_Store 3 | *.swp 4 | *.profraw 5 | 6 | # Editors 7 | .vscode 8 | .idea 9 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/README.public.md: -------------------------------------------------------------------------------- 1 | # D'Suite Scenes 2 | 3 | This repository is based on a collection of [MuJoCo](http://www.mujoco.org/) simulation 4 | scenes and common assets for D'Suite environments. Based on code in the ROBEL suite 5 | https://github.com/google-research/robel 6 | 7 | ## Disclaimer 8 | 9 | This is not an official Google product. 10 | 11 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/__init__.py -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/counters.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/hingecabinet.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/kettle.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate_mesh.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/burnerplate_mesh.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetbase.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetbase.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetdrawer.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinetdrawer.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinethandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/cabinethandle.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/countertop.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/countertop.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/faucet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/faucet.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/handle2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/handle2.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/hingecabinet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingecabinet.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/hingedoor.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingedoor.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/hingehandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hingehandle.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/hood.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/hood.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/kettle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/kettle.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/kettlehandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/kettlehandle.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/knob.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/knob.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitch.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitch.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitchbase.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/lightswitchbase.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/micro.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/micro.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/microbutton.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microbutton.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/microdoor.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microdoor.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/microefeet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microefeet.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/microfeet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microfeet.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/microhandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microhandle.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/microwindow.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/microwindow.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/oven.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/oven.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenhandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenhandle.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/oventop.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/oventop.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenwindow.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/ovenwindow.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/slidecabinet.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/slidecabinet.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/slidedoor.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/slidedoor.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/stoverim.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/stoverim.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/tile.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/tile.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/meshes/wall.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/meshes/wall.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/microwave.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/oven.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/slidecabinet.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/textures/marble1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/marble1.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/textures/metal1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/metal1.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/textures/tile1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/tile1.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/kitchen/textures/wood1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/kitchen/textures/wood1.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/adept_models/scenes/textures/white_marble_tile2.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/README.md: -------------------------------------------------------------------------------- 1 | # franka 2 | Franka panda mujoco models 3 | 4 | 5 | # Environment 6 | 7 | franka_panda.xml | comming soon 8 | :-------------------------:|:-------------------------: 9 | ![Alt text](franka_panda.png?raw=false "sawyer") | comming soon 10 | -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/franka_panda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/franka_panda.png -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/finger.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/hand.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link0.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link1.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link2.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link3.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link4.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link5.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link6.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/collision/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/collision/link7.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/finger.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/finger.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/hand.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/hand.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link0.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link1.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link2.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link3.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link4.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link5.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link6.stl -------------------------------------------------------------------------------- /third_party/relay-policy-learning/third_party/franka/meshes/visual/link7.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/third_party/relay-policy-learning/third_party/franka/meshes/visual/link7.stl -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mazpie/genrl/4cd839470a30fbf23841b6930f900846d213c312/tools/__init__.py --------------------------------------------------------------------------------