├── .gitignore ├── LICENSE ├── MANIFEST.in ├── NOTICE.txt ├── README.md ├── batch_inference.py ├── datasets ├── hico │ ├── hico_600_annots.txt │ └── hico_600_taglist.txt ├── imagenet_multi │ ├── imagenet_multi_1000_annots.txt │ └── imagenet_multi_1000_taglist.txt ├── openimages_common_214 │ ├── imgs │ │ └── .gitkeep │ ├── openimages_common_214_ram_annots.txt │ ├── openimages_common_214_ram_taglist.txt │ ├── openimages_common_214_tag2text_idannots.txt │ └── openimages_common_214_tag2text_tagidlist.txt └── openimages_rare_200 │ ├── imgs │ └── .gitkeep │ ├── openimages_rare_200_llm_tag_descriptions.json │ ├── openimages_rare_200_ram_annots.txt │ └── openimages_rare_200_ram_taglist.txt ├── finetune.py ├── generate_tag_des_llm.py ├── gui_demo.ipynb ├── images ├── 1641173_2291260800.jpg ├── demo │ ├── demo1.jpg │ ├── demo2.jpg │ ├── demo3.jpg │ └── demo4.jpg ├── experiment_comparison.png ├── localization_and_recognition.jpg ├── openset_example.jpg ├── ram_grounded_sam.jpg ├── ram_plus_compare.jpg ├── ram_plus_experiment.png ├── ram_plus_framework.jpg ├── ram_plus_visualization.jpg ├── tag2text_framework.png ├── tag2text_grounded_sam.jpg ├── tag2text_retrieval_visualization.png ├── tag2text_visualization.png └── tagging_results.jpg ├── inference_ram.py ├── inference_ram_openset.py ├── inference_ram_plus.py ├── inference_ram_plus_openset.py ├── inference_tag2text.py ├── pretrain.py ├── ram ├── __init__.py ├── configs │ ├── finetune.yaml │ ├── finetune_tag2text.yaml │ ├── med_config.json │ ├── pretrain.yaml │ ├── pretrain_tag2text.yaml │ ├── q2l_config.json │ └── swin │ │ ├── config_swinB_224.json │ │ ├── config_swinB_384.json │ │ ├── config_swinL_224.json │ │ └── config_swinL_384.json ├── data │ ├── __init__.py │ ├── dataset.py │ ├── ram_tag_list.txt │ ├── ram_tag_list_chinese.txt │ ├── ram_tag_list_threshold.txt │ ├── randaugment.py │ ├── tag2text_ori_tag_list.txt │ ├── tag_list.txt │ └── utils.py ├── inference.py ├── models │ ├── __init__.py │ ├── bert.py │ ├── ram.py │ ├── ram_plus.py │ ├── swin_transformer.py │ ├── tag2text.py │ ├── utils.py │ └── vit.py ├── transform.py └── utils │ ├── __init__.py │ ├── metrics.py │ └── openset_utils.py ├── recognize_anything_demo.ipynb ├── requirements.txt ├── setup.cfg ├── setup.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/LICENSE -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/MANIFEST.in -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/NOTICE.txt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/README.md -------------------------------------------------------------------------------- /batch_inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/batch_inference.py -------------------------------------------------------------------------------- /datasets/hico/hico_600_annots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/hico/hico_600_annots.txt -------------------------------------------------------------------------------- /datasets/hico/hico_600_taglist.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/hico/hico_600_taglist.txt -------------------------------------------------------------------------------- /datasets/imagenet_multi/imagenet_multi_1000_annots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/imagenet_multi/imagenet_multi_1000_annots.txt -------------------------------------------------------------------------------- /datasets/imagenet_multi/imagenet_multi_1000_taglist.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/imagenet_multi/imagenet_multi_1000_taglist.txt -------------------------------------------------------------------------------- /datasets/openimages_common_214/imgs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/openimages_common_214/openimages_common_214_ram_annots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_common_214/openimages_common_214_ram_annots.txt -------------------------------------------------------------------------------- /datasets/openimages_common_214/openimages_common_214_ram_taglist.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_common_214/openimages_common_214_ram_taglist.txt -------------------------------------------------------------------------------- /datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_common_214/openimages_common_214_tag2text_idannots.txt -------------------------------------------------------------------------------- /datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_common_214/openimages_common_214_tag2text_tagidlist.txt -------------------------------------------------------------------------------- /datasets/openimages_rare_200/imgs/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_rare_200/openimages_rare_200_llm_tag_descriptions.json -------------------------------------------------------------------------------- /datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_rare_200/openimages_rare_200_ram_annots.txt -------------------------------------------------------------------------------- /datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/datasets/openimages_rare_200/openimages_rare_200_ram_taglist.txt -------------------------------------------------------------------------------- /finetune.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/finetune.py -------------------------------------------------------------------------------- /generate_tag_des_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/generate_tag_des_llm.py -------------------------------------------------------------------------------- /gui_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/gui_demo.ipynb -------------------------------------------------------------------------------- /images/1641173_2291260800.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/1641173_2291260800.jpg -------------------------------------------------------------------------------- /images/demo/demo1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/demo/demo1.jpg -------------------------------------------------------------------------------- /images/demo/demo2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/demo/demo2.jpg -------------------------------------------------------------------------------- /images/demo/demo3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/demo/demo3.jpg -------------------------------------------------------------------------------- /images/demo/demo4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/demo/demo4.jpg -------------------------------------------------------------------------------- /images/experiment_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/experiment_comparison.png -------------------------------------------------------------------------------- /images/localization_and_recognition.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/localization_and_recognition.jpg -------------------------------------------------------------------------------- /images/openset_example.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/openset_example.jpg -------------------------------------------------------------------------------- /images/ram_grounded_sam.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/ram_grounded_sam.jpg -------------------------------------------------------------------------------- /images/ram_plus_compare.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/ram_plus_compare.jpg -------------------------------------------------------------------------------- /images/ram_plus_experiment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/ram_plus_experiment.png -------------------------------------------------------------------------------- /images/ram_plus_framework.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/ram_plus_framework.jpg -------------------------------------------------------------------------------- /images/ram_plus_visualization.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/ram_plus_visualization.jpg -------------------------------------------------------------------------------- /images/tag2text_framework.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/tag2text_framework.png -------------------------------------------------------------------------------- /images/tag2text_grounded_sam.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/tag2text_grounded_sam.jpg -------------------------------------------------------------------------------- /images/tag2text_retrieval_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/tag2text_retrieval_visualization.png -------------------------------------------------------------------------------- /images/tag2text_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/tag2text_visualization.png -------------------------------------------------------------------------------- /images/tagging_results.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/images/tagging_results.jpg -------------------------------------------------------------------------------- /inference_ram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/inference_ram.py -------------------------------------------------------------------------------- /inference_ram_openset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/inference_ram_openset.py -------------------------------------------------------------------------------- /inference_ram_plus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/inference_ram_plus.py -------------------------------------------------------------------------------- /inference_ram_plus_openset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/inference_ram_plus_openset.py -------------------------------------------------------------------------------- /inference_tag2text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/inference_tag2text.py -------------------------------------------------------------------------------- /pretrain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/pretrain.py -------------------------------------------------------------------------------- /ram/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/__init__.py -------------------------------------------------------------------------------- /ram/configs/finetune.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/finetune.yaml -------------------------------------------------------------------------------- /ram/configs/finetune_tag2text.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/finetune_tag2text.yaml -------------------------------------------------------------------------------- /ram/configs/med_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/med_config.json -------------------------------------------------------------------------------- /ram/configs/pretrain.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/pretrain.yaml -------------------------------------------------------------------------------- /ram/configs/pretrain_tag2text.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/pretrain_tag2text.yaml -------------------------------------------------------------------------------- /ram/configs/q2l_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/q2l_config.json -------------------------------------------------------------------------------- /ram/configs/swin/config_swinB_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/swin/config_swinB_224.json -------------------------------------------------------------------------------- /ram/configs/swin/config_swinB_384.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/swin/config_swinB_384.json -------------------------------------------------------------------------------- /ram/configs/swin/config_swinL_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/swin/config_swinL_224.json -------------------------------------------------------------------------------- /ram/configs/swin/config_swinL_384.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/configs/swin/config_swinL_384.json -------------------------------------------------------------------------------- /ram/data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/__init__.py -------------------------------------------------------------------------------- /ram/data/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/dataset.py -------------------------------------------------------------------------------- /ram/data/ram_tag_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/ram_tag_list.txt -------------------------------------------------------------------------------- /ram/data/ram_tag_list_chinese.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/ram_tag_list_chinese.txt -------------------------------------------------------------------------------- /ram/data/ram_tag_list_threshold.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/ram_tag_list_threshold.txt -------------------------------------------------------------------------------- /ram/data/randaugment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/randaugment.py -------------------------------------------------------------------------------- /ram/data/tag2text_ori_tag_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/tag2text_ori_tag_list.txt -------------------------------------------------------------------------------- /ram/data/tag_list.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/tag_list.txt -------------------------------------------------------------------------------- /ram/data/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/data/utils.py -------------------------------------------------------------------------------- /ram/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/inference.py -------------------------------------------------------------------------------- /ram/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/__init__.py -------------------------------------------------------------------------------- /ram/models/bert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/bert.py -------------------------------------------------------------------------------- /ram/models/ram.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/ram.py -------------------------------------------------------------------------------- /ram/models/ram_plus.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/ram_plus.py -------------------------------------------------------------------------------- /ram/models/swin_transformer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/swin_transformer.py -------------------------------------------------------------------------------- /ram/models/tag2text.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/tag2text.py -------------------------------------------------------------------------------- /ram/models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/utils.py -------------------------------------------------------------------------------- /ram/models/vit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/models/vit.py -------------------------------------------------------------------------------- /ram/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/transform.py -------------------------------------------------------------------------------- /ram/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/utils/__init__.py -------------------------------------------------------------------------------- /ram/utils/metrics.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/utils/metrics.py -------------------------------------------------------------------------------- /ram/utils/openset_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/ram/utils/openset_utils.py -------------------------------------------------------------------------------- /recognize_anything_demo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/recognize_anything_demo.ipynb -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/setup.cfg -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/setup.py -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xinyu1205/recognize-anything/HEAD/utils.py --------------------------------------------------------------------------------