├── README.md ├── compute_all_metric.py ├── config └── model_paths.json ├── evaluation ├── __init__.py ├── pac_score │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-310.pyc │ │ ├── __init__.cpython-39.pyc │ │ ├── pac_score.cpython-310.pyc │ │ └── pac_score.cpython-39.pyc │ └── pac_score.py └── tokenizer.py ├── images ├── qualitatives.png ├── result.jpg └── taxonomy.png ├── metrics ├── __init__.py ├── base_metric.py ├── bert_score.py ├── clip_score.py ├── polos.py └── standard.py ├── models ├── __init__.py ├── clip │ ├── __init__.py │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── clip.py │ ├── model.py │ └── simple_tokenizer.py ├── clip_lora │ ├── __init__.py │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── clip_lora.py │ ├── model.py │ └── simple_tokenizer.py ├── custom_loralib │ ├── __init__.py │ ├── layers.py │ └── utils.py ├── open_clip │ ├── __init__.py │ ├── bpe_simple_vocab_16e6.txt.gz │ ├── constants.py │ ├── factory.py │ ├── loss.py │ ├── model.py │ ├── model_configs │ │ ├── RN101-quickgelu.json │ │ ├── RN101.json │ │ ├── RN50-quickgelu.json │ │ ├── RN50.json │ │ ├── RN50x16.json │ │ ├── RN50x4.json │ │ ├── ViT-B-16-plus-240.json │ │ ├── ViT-B-16-plus.json │ │ ├── ViT-B-16.json │ │ ├── ViT-B-32-plus-256.json │ │ ├── ViT-B-32-quickgelu.json │ │ ├── ViT-B-32.json │ │ ├── ViT-H-14.json │ │ ├── ViT-H-16.json │ │ ├── ViT-L-14-280.json │ │ ├── ViT-L-14-336.json │ │ ├── ViT-L-14.json │ │ ├── ViT-L-16-320.json │ │ ├── ViT-L-16.json │ │ ├── ViT-g-14.json │ │ ├── timm-efficientnetv2_rw_s.json │ │ ├── timm-resnet50d.json │ │ ├── timm-resnetaa50d.json │ │ ├── timm-resnetblur50.json │ │ ├── timm-swin_base_patch4_window7_224.json │ │ ├── timm-vit_base_patch16_224.json │ │ ├── timm-vit_base_patch32_224.json │ │ └── timm-vit_small_patch16_224.json │ ├── openai.py │ ├── pretrained.py │ ├── timm_model.py │ ├── tokenizer.py │ ├── transform.py │ ├── utils.py │ └── version.py └── utils.py ├── requirements.txt ├── test_captions ├── blip2_results_xe.json ├── llava-1.5-7b_briefly.json ├── llava-1.5-7b_default.json ├── m2_transformer.json └── reference_captions.json └── utils ├── __init__.py ├── config.py └── utils.py /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/README.md -------------------------------------------------------------------------------- /compute_all_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/compute_all_metric.py -------------------------------------------------------------------------------- /config/model_paths.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/config/model_paths.json -------------------------------------------------------------------------------- /evaluation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/__init__.py -------------------------------------------------------------------------------- /evaluation/pac_score/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/pac_score/__init__.py -------------------------------------------------------------------------------- /evaluation/pac_score/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/pac_score/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /evaluation/pac_score/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/pac_score/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /evaluation/pac_score/__pycache__/pac_score.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/pac_score/__pycache__/pac_score.cpython-310.pyc -------------------------------------------------------------------------------- /evaluation/pac_score/__pycache__/pac_score.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/pac_score/__pycache__/pac_score.cpython-39.pyc -------------------------------------------------------------------------------- /evaluation/pac_score/pac_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/pac_score/pac_score.py -------------------------------------------------------------------------------- /evaluation/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/evaluation/tokenizer.py -------------------------------------------------------------------------------- /images/qualitatives.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/images/qualitatives.png -------------------------------------------------------------------------------- /images/result.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/images/result.jpg -------------------------------------------------------------------------------- /images/taxonomy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/images/taxonomy.png -------------------------------------------------------------------------------- /metrics/__init__.py: -------------------------------------------------------------------------------- 1 | from utils import * -------------------------------------------------------------------------------- /metrics/base_metric.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/metrics/base_metric.py -------------------------------------------------------------------------------- /metrics/bert_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/metrics/bert_score.py -------------------------------------------------------------------------------- /metrics/clip_score.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/metrics/clip_score.py -------------------------------------------------------------------------------- /metrics/polos.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/metrics/polos.py -------------------------------------------------------------------------------- /metrics/standard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/metrics/standard.py -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/clip/__init__.py: -------------------------------------------------------------------------------- 1 | from .clip import * 2 | -------------------------------------------------------------------------------- /models/clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /models/clip/clip.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip/clip.py -------------------------------------------------------------------------------- /models/clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip/model.py -------------------------------------------------------------------------------- /models/clip/simple_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip/simple_tokenizer.py -------------------------------------------------------------------------------- /models/clip_lora/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip_lora/__init__.py -------------------------------------------------------------------------------- /models/clip_lora/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip_lora/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /models/clip_lora/clip_lora.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip_lora/clip_lora.py -------------------------------------------------------------------------------- /models/clip_lora/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip_lora/model.py -------------------------------------------------------------------------------- /models/clip_lora/simple_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/clip_lora/simple_tokenizer.py -------------------------------------------------------------------------------- /models/custom_loralib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/custom_loralib/__init__.py -------------------------------------------------------------------------------- /models/custom_loralib/layers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/custom_loralib/layers.py -------------------------------------------------------------------------------- /models/custom_loralib/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/custom_loralib/utils.py -------------------------------------------------------------------------------- /models/open_clip/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/__init__.py -------------------------------------------------------------------------------- /models/open_clip/bpe_simple_vocab_16e6.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/bpe_simple_vocab_16e6.txt.gz -------------------------------------------------------------------------------- /models/open_clip/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/constants.py -------------------------------------------------------------------------------- /models/open_clip/factory.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/factory.py -------------------------------------------------------------------------------- /models/open_clip/loss.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/loss.py -------------------------------------------------------------------------------- /models/open_clip/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model.py -------------------------------------------------------------------------------- /models/open_clip/model_configs/RN101-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/RN101-quickgelu.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/RN101.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/RN101.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/RN50-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/RN50-quickgelu.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/RN50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/RN50.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/RN50x16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/RN50x16.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/RN50x4.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/RN50x4.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-B-16-plus-240.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-B-16-plus-240.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-B-16-plus.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-B-16-plus.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-B-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-B-16.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-B-32-plus-256.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-B-32-plus-256.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-B-32-quickgelu.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-B-32-quickgelu.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-B-32.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-B-32.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-H-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-H-14.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-H-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-H-16.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-L-14-280.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-L-14-280.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-L-14-336.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-L-14-336.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-L-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-L-14.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-L-16-320.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-L-16-320.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-L-16.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-L-16.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/ViT-g-14.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/ViT-g-14.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-efficientnetv2_rw_s.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-efficientnetv2_rw_s.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-resnet50d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-resnet50d.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-resnetaa50d.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-resnetaa50d.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-resnetblur50.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-resnetblur50.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-swin_base_patch4_window7_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-swin_base_patch4_window7_224.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-vit_base_patch16_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-vit_base_patch16_224.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-vit_base_patch32_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-vit_base_patch32_224.json -------------------------------------------------------------------------------- /models/open_clip/model_configs/timm-vit_small_patch16_224.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/model_configs/timm-vit_small_patch16_224.json -------------------------------------------------------------------------------- /models/open_clip/openai.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/openai.py -------------------------------------------------------------------------------- /models/open_clip/pretrained.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/pretrained.py -------------------------------------------------------------------------------- /models/open_clip/timm_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/timm_model.py -------------------------------------------------------------------------------- /models/open_clip/tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/tokenizer.py -------------------------------------------------------------------------------- /models/open_clip/transform.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/transform.py -------------------------------------------------------------------------------- /models/open_clip/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/open_clip/utils.py -------------------------------------------------------------------------------- /models/open_clip/version.py: -------------------------------------------------------------------------------- 1 | __version__ = '2.0.2' 2 | -------------------------------------------------------------------------------- /models/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/models/utils.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/requirements.txt -------------------------------------------------------------------------------- /test_captions/blip2_results_xe.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/test_captions/blip2_results_xe.json -------------------------------------------------------------------------------- /test_captions/llava-1.5-7b_briefly.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/test_captions/llava-1.5-7b_briefly.json -------------------------------------------------------------------------------- /test_captions/llava-1.5-7b_default.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/test_captions/llava-1.5-7b_default.json -------------------------------------------------------------------------------- /test_captions/m2_transformer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/test_captions/m2_transformer.json -------------------------------------------------------------------------------- /test_captions/reference_captions.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/test_captions/reference_captions.json -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/utils/config.py -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aimagelab/awesome-captioning-evaluation/HEAD/utils/utils.py --------------------------------------------------------------------------------