├── GAVS ├── avs_model.py ├── segment_anything │ ├── build_sam.py │ ├── checkpoint │ │ └── sam_vit_b_01ec64.pth │ ├── config.py │ ├── dataset │ │ ├── avs_bench.py │ │ ├── avs_bench_zsfs.py │ │ └── v3 │ │ │ ├── meta_v3_seen.csv │ │ │ ├── meta_v3_seen_train.csv │ │ │ ├── meta_v3_seen_val.csv │ │ │ ├── meta_v3_unseen.csv │ │ │ ├── metadata.csv │ │ │ ├── v3_1_shot │ │ │ ├── test.csv │ │ │ └── train.csv │ │ │ ├── v3_3_shot │ │ │ ├── test.csv │ │ │ └── train.csv │ │ │ └── v3_5_shot │ │ │ ├── test.csv │ │ │ └── train.csv │ ├── feature_extract │ │ ├── v2_img_embed │ │ │ ├── --iSerV5DbY_119000_129000_f0.pth │ │ │ ├── --iSerV5DbY_119000_129000_f9.pth │ │ │ └── --iSerV5DbY_68000_78000_f0.pth │ │ └── v2_vggish_embs │ │ │ ├── --iSerV5DbY_119000_129000.npy │ │ │ └── --iSerV5DbY_68000_78000.npy │ ├── loss │ │ └── loss.py │ ├── modeling │ │ ├── Audio_Proj.py │ │ ├── Visual_Proj.py │ │ ├── __init__.py │ │ ├── common.py │ │ ├── image_encoder.py │ │ ├── mask_decoder.py │ │ ├── prompt_encoder.py │ │ ├── sam.py │ │ └── transformer.py │ ├── run_v1m.sh │ ├── run_v1m_x.py │ ├── run_v3.sh │ ├── run_v3_x.py │ └── utils │ │ ├── __init__.py │ │ ├── amg.py │ │ ├── mask_to_bbox.py │ │ ├── onnx.py │ │ ├── transforms.py │ │ ├── utils.py │ │ ├── v1m │ │ ├── __pycache__ │ │ │ ├── pyutils.cpython-38.pyc │ │ │ └── utility.cpython-38.pyc │ │ ├── loss.py │ │ ├── pyutils.py │ │ ├── system.py │ │ └── utility.py │ │ ├── v1s │ │ ├── loss.py │ │ ├── pyutils.py │ │ ├── system.py │ │ └── utility.py │ │ └── v2 │ │ ├── __pycache__ │ │ ├── compute_color_metrics.cpython-38.pyc │ │ ├── loss.cpython-38.pyc │ │ ├── pyutils.cpython-38.pyc │ │ └── utility.cpython-38.pyc │ │ ├── compute_color_metrics.py │ │ ├── loss.py │ │ ├── pyutils.py │ │ └── utility.py └── test.py ├── LICENSE ├── README.md ├── assets └── README │ ├── image-1.png │ ├── image-2.png │ ├── image-3.png │ ├── image-4.png │ ├── image-5.png │ ├── image-6.png │ └── image.png └── v3.zip /GAVS/avs_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/avs_model.py -------------------------------------------------------------------------------- /GAVS/segment_anything/build_sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/build_sam.py -------------------------------------------------------------------------------- /GAVS/segment_anything/checkpoint/sam_vit_b_01ec64.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/checkpoint/sam_vit_b_01ec64.pth -------------------------------------------------------------------------------- /GAVS/segment_anything/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/config.py -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/avs_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/avs_bench.py -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/avs_bench_zsfs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/avs_bench_zsfs.py -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/meta_v3_seen.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/meta_v3_seen.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/meta_v3_seen_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/meta_v3_seen_train.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/meta_v3_seen_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/meta_v3_seen_val.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/meta_v3_unseen.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/meta_v3_unseen.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/metadata.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/metadata.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/v3_1_shot/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/v3_1_shot/test.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/v3_1_shot/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/v3_1_shot/train.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/v3_3_shot/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/v3_3_shot/test.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/v3_3_shot/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/v3_3_shot/train.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/v3_5_shot/test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/v3_5_shot/test.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/dataset/v3/v3_5_shot/train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/dataset/v3/v3_5_shot/train.csv -------------------------------------------------------------------------------- /GAVS/segment_anything/feature_extract/v2_img_embed/--iSerV5DbY_119000_129000_f0.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/feature_extract/v2_img_embed/--iSerV5DbY_119000_129000_f0.pth -------------------------------------------------------------------------------- /GAVS/segment_anything/feature_extract/v2_img_embed/--iSerV5DbY_119000_129000_f9.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/feature_extract/v2_img_embed/--iSerV5DbY_119000_129000_f9.pth -------------------------------------------------------------------------------- /GAVS/segment_anything/feature_extract/v2_img_embed/--iSerV5DbY_68000_78000_f0.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/feature_extract/v2_img_embed/--iSerV5DbY_68000_78000_f0.pth -------------------------------------------------------------------------------- /GAVS/segment_anything/feature_extract/v2_vggish_embs/--iSerV5DbY_119000_129000.npy: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /GAVS/segment_anything/feature_extract/v2_vggish_embs/--iSerV5DbY_68000_78000.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/feature_extract/v2_vggish_embs/--iSerV5DbY_68000_78000.npy -------------------------------------------------------------------------------- /GAVS/segment_anything/loss/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/loss/loss.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/Audio_Proj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/Audio_Proj.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/Visual_Proj.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/Visual_Proj.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/__init__.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/common.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/image_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/image_encoder.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/mask_decoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/mask_decoder.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/prompt_encoder.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/prompt_encoder.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/sam.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/sam.py -------------------------------------------------------------------------------- /GAVS/segment_anything/modeling/transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/modeling/transformer.py -------------------------------------------------------------------------------- /GAVS/segment_anything/run_v1m.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/run_v1m.sh -------------------------------------------------------------------------------- /GAVS/segment_anything/run_v1m_x.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/run_v1m_x.py -------------------------------------------------------------------------------- /GAVS/segment_anything/run_v3.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/run_v3.sh -------------------------------------------------------------------------------- /GAVS/segment_anything/run_v3_x.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/run_v3_x.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/__init__.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/amg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/amg.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/mask_to_bbox.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/mask_to_bbox.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/onnx.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/onnx.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/transforms.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/transforms.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/utils.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1m/__pycache__/pyutils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1m/__pycache__/pyutils.cpython-38.pyc -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1m/__pycache__/utility.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1m/__pycache__/utility.cpython-38.pyc -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1m/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1m/loss.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1m/pyutils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1m/pyutils.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1m/system.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1m/system.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1m/utility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1m/utility.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1s/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1s/loss.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1s/pyutils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1s/pyutils.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1s/system.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1s/system.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v1s/utility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v1s/utility.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/__pycache__/compute_color_metrics.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/__pycache__/compute_color_metrics.cpython-38.pyc -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/__pycache__/loss.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/__pycache__/loss.cpython-38.pyc -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/__pycache__/pyutils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/__pycache__/pyutils.cpython-38.pyc -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/__pycache__/utility.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/__pycache__/utility.cpython-38.pyc -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/compute_color_metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/compute_color_metrics.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/loss.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/pyutils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/pyutils.py -------------------------------------------------------------------------------- /GAVS/segment_anything/utils/v2/utility.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/GAVS/segment_anything/utils/v2/utility.py -------------------------------------------------------------------------------- /GAVS/test.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/README.md -------------------------------------------------------------------------------- /assets/README/image-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image-1.png -------------------------------------------------------------------------------- /assets/README/image-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image-2.png -------------------------------------------------------------------------------- /assets/README/image-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image-3.png -------------------------------------------------------------------------------- /assets/README/image-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image-4.png -------------------------------------------------------------------------------- /assets/README/image-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image-5.png -------------------------------------------------------------------------------- /assets/README/image-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image-6.png -------------------------------------------------------------------------------- /assets/README/image.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/assets/README/image.png -------------------------------------------------------------------------------- /v3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GeWu-Lab/Generalizable-Audio-Visual-Segmentation/HEAD/v3.zip --------------------------------------------------------------------------------