├── images └── geneval_figure_1.png ├── evaluation ├── download_models.sh ├── object_names.txt ├── summary_scores.py └── evaluate_images.py ├── prompts ├── object_names.txt ├── create_prompts.py ├── generation_prompts.txt └── evaluation_metadata.jsonl ├── LICENSE ├── .gitignore ├── README.md ├── generation └── diffusers_generate.py └── environment.yml /images/geneval_figure_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/djghosh13/geneval/HEAD/images/geneval_figure_1.png -------------------------------------------------------------------------------- /evaluation/download_models.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Download Mask2Former object detection config and weights 4 | 5 | if [ ! -z "$1" ] 6 | then 7 | mkdir -p "$1" 8 | wget https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth -O "$1/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.pth" 9 | fi 10 | -------------------------------------------------------------------------------- /prompts/object_names.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | computer mouse 66 | tv remote 67 | computer keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /evaluation/object_names.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorcycle 5 | airplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | couch 59 | potted plant 60 | bed 61 | dining table 62 | toilet 63 | tv 64 | laptop 65 | computer mouse 66 | tv remote 67 | computer keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush 81 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Dhruba Ghosh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /evaluation/summary_scores.py: -------------------------------------------------------------------------------- 1 | # Get results of evaluation 2 | 3 | import argparse 4 | import os 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument("filename", type=str) 12 | args = parser.parse_args() 13 | 14 | # Load classnames 15 | 16 | with open(os.path.join(os.path.dirname(__file__), "object_names.txt")) as cls_file: 17 | classnames = [line.strip() for line in cls_file] 18 | cls_to_idx = {"_".join(cls.split()):idx for idx, cls in enumerate(classnames)} 19 | 20 | # Load results 21 | 22 | df = pd.read_json(args.filename, orient="records", lines=True) 23 | 24 | # Measure overall success 25 | 26 | print("Summary") 27 | print("=======") 28 | print(f"Total images: {len(df)}") 29 | print(f"Total prompts: {len(df.groupby('metadata'))}") 30 | print(f"% correct images: {df['correct'].mean():.2%}") 31 | print(f"% correct prompts: {df.groupby('metadata')['correct'].any().mean():.2%}") 32 | print() 33 | 34 | # By group 35 | 36 | task_scores = [] 37 | 38 | print("Task breakdown") 39 | print("==============") 40 | for tag, task_df in df.groupby('tag', sort=False): 41 | task_scores.append(task_df['correct'].mean()) 42 | print(f"{tag:<16} = {task_df['correct'].mean():.2%} ({task_df['correct'].sum()} / {len(task_df)})") 43 | print() 44 | 45 | print(f"Overall score (avg. over tasks): {np.mean(task_scores):.5f}") -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GenEval: An Object-Focused Framework for Evaluating Text-to-Image Alignment 2 | 3 | This repository contains code for the paper [GenEval: An Object-Focused Framework for Evaluating Text-to-Image Alignment](https://arxiv.org/abs/2310.11513) by Dhruba Ghosh, Hanna Hajishirzi, and Ludwig Schmidt. 4 | 5 | TLDR: We demonstrate the advantages of evaluating text-to-image models using existing object detection methods, to produce a fine-grained instance-level analysis of compositional capabilities. 6 | 7 | ### Abstract 8 | *Recent breakthroughs in diffusion models, multimodal pretraining, and efficient finetuning have led to an explosion of text-to-image generative models. 9 | Given human evaluation is expensive and difficult to scale, automated methods are critical for evaluating the increasingly large number of new models. 10 | However, most current automated evaluation metrics like FID or CLIPScore only offer a holistic measure of image quality or image-text alignment, and are unsuited for fine-grained or instance-level analysis. 11 | In this paper, we introduce GenEval, an object-focused framework to evaluate compositional image properties such as object co-occurrence, position, count, and color. 12 | We show that current object detection models can be leveraged to evaluate text-to-image models on a variety of generation tasks with strong human agreement, and that other discriminative vision models can be linked to this pipeline to further verify properties like object color. 13 | We then evaluate several open-source text-to-image models and analyze their relative generative capabilities on our benchmark. 14 | We find that recent models demonstrate significant improvement on these tasks, though they are still lacking in complex capabilities such as spatial relations and attribute binding. 15 | Finally, we demonstrate how GenEval might be used to help discover existing failure modes, in order to inform development of the next generation of text-to-image models.* 16 | 17 | ### Summary figure 18 | 19 |

20 | figure1 21 |

22 | 23 | ### Main results 24 | 25 | | Model | Overall | Single object | Two object | Counting | Colors | Position | Color attribution | 26 | | ----- | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | 27 | | CLIP retrieval (baseline) | **0.35** | 0.89 | 0.22 | 0.37 | 0.62 | 0.03 | 0.00 | 28 | minDALL-E | **0.23** | 0.73 | 0.11 | 0.12 | 0.37 | 0.02 | 0.01 | 29 | Stable Diffusion v1.5 | **0.43** | 0.97 | 0.38 | 0.35 | 0.76 | 0.04 | 0.06 | 30 | Stable Diffusion v2.1 | **0.50** | 0.98 | 0.51 | 0.44 | 0.85 | 0.07 | 0.17 | 31 | Stable Diffusion XL | **0.55** | 0.98 | 0.74 | 0.39 | 0.85 | 0.15 | 0.23 | 32 | IF-XL | **0.61** | 0.97 | 0.74 | 0.66 | 0.81 | 0.13 | 0.35 | 33 | 34 | ## Code 35 | 36 | ### Setup 37 | 38 | Install the dependencies, including `mmdet`, and download the Mask2Former object detector: 39 | ```bash 40 | git clone https://github.com/djghosh13/geneval.git 41 | cd geneval 42 | conda env create -f environment.yml 43 | conda activate geneval 44 | ./evaluation/download_models.sh "/" 45 | 46 | git clone https://github.com/open-mmlab/mmdetection.git 47 | cd mmdetection; git checkout 2.x 48 | pip install -v -e . 49 | ``` 50 | 51 | The original GenEval prompts from the paper are already in `prompts/`, but you can sample new prompts with different random seeds using 52 | ```bash 53 | python prompts/create_prompts.py --seed -n -o "/" 54 | ``` 55 | 56 | ### Image generation 57 | 58 | Sample image generation code for Stable Diffusion models is given in `generation/diffusers_generate.py`. Run 59 | ```bash 60 | python generation/diffusers_generate.py \ 61 | "/evaluation_metadata.jsonl" \ 62 | --model "stable-diffusion-v1-5/stable-diffusion-v1-5" \ 63 | --outdir "" 64 | ``` 65 | to generate 4 images per prompt using Stable Diffusion v1.5 and save in ``. 66 | 67 | The generated format should be 68 | ``` 69 | / 70 | 00000/ 71 | metadata.jsonl 72 | grid.png 73 | samples/ 74 | 0000.png 75 | 0001.png 76 | 0002.png 77 | 0003.png 78 | 00001/ 79 | ... 80 | ``` 81 | where `metadata.jsonl` contains the `N`-th line from `evaluation_metadata.jsonl`. `grid.png` is optional here. 82 | 83 | ### Evaluation 84 | 85 | ```bash 86 | python evaluation/evaluate_images.py \ 87 | "" \ 88 | --outfile "/results.jsonl" \ 89 | --model-path "" 90 | ``` 91 | 92 | This will result in a JSONL file with each line corresponding to an image. In particular, each line has a `correct` key and a `reason` key specifying whether the generated image was deemed correct and, if applicable, why it was marked incorrect. You can run 93 | 94 | ```bash 95 | python evaluation/summary_scores.py "/results.jsonl" 96 | ``` 97 | 98 | to get the score across each task, and the overall GenEval score. 99 | -------------------------------------------------------------------------------- /generation/diffusers_generate.py: -------------------------------------------------------------------------------- 1 | """Adapted from TODO""" 2 | 3 | import argparse 4 | import json 5 | import os 6 | 7 | import torch 8 | import numpy as np 9 | from PIL import Image 10 | from tqdm import tqdm, trange 11 | from einops import rearrange 12 | from torchvision.utils import make_grid 13 | from torchvision.transforms import ToTensor 14 | from pytorch_lightning import seed_everything 15 | from diffusers import DiffusionPipeline, StableDiffusionPipeline 16 | 17 | 18 | torch.set_grad_enabled(False) 19 | 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser() 23 | parser.add_argument( 24 | "metadata_file", 25 | type=str, 26 | help="JSONL file containing lines of metadata for each prompt" 27 | ) 28 | parser.add_argument( 29 | "--model", 30 | type=str, 31 | default="runwayml/stable-diffusion-v1-5", 32 | help="Huggingface model name" 33 | ) 34 | parser.add_argument( 35 | "--outdir", 36 | type=str, 37 | nargs="?", 38 | help="dir to write results to", 39 | default="outputs" 40 | ) 41 | parser.add_argument( 42 | "--n_samples", 43 | type=int, 44 | default=4, 45 | help="number of samples", 46 | ) 47 | parser.add_argument( 48 | "--steps", 49 | type=int, 50 | default=50, 51 | help="number of ddim sampling steps", 52 | ) 53 | parser.add_argument( 54 | "--negative-prompt", 55 | type=str, 56 | nargs="?", 57 | const="ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face", 58 | default=None, 59 | help="negative prompt for guidance" 60 | ) 61 | parser.add_argument( 62 | "--H", 63 | type=int, 64 | default=None, 65 | help="image height, in pixel space", 66 | ) 67 | parser.add_argument( 68 | "--W", 69 | type=int, 70 | default=None, 71 | help="image width, in pixel space", 72 | ) 73 | parser.add_argument( 74 | "--scale", 75 | type=float, 76 | default=9.0, 77 | help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))", 78 | ) 79 | parser.add_argument( 80 | "--seed", 81 | type=int, 82 | default=42, 83 | help="the seed (for reproducible sampling)", 84 | ) 85 | parser.add_argument( 86 | "--batch_size", 87 | type=int, 88 | default=1, 89 | help="how many samples can be produced simultaneously", 90 | ) 91 | parser.add_argument( 92 | "--skip_grid", 93 | action="store_true", 94 | help="skip saving grid", 95 | ) 96 | opt = parser.parse_args() 97 | return opt 98 | 99 | 100 | def main(opt): 101 | # Load prompts 102 | with open(opt.metadata_file) as fp: 103 | metadatas = [json.loads(line) for line in fp] 104 | 105 | # Load model 106 | if opt.model == "stabilityai/stable-diffusion-xl-base-1.0": 107 | model = DiffusionPipeline.from_pretrained(opt.model, torch_dtype=torch.float16, use_safetensors=True, variant="fp16") 108 | model.enable_xformers_memory_efficient_attention() 109 | else: 110 | model = StableDiffusionPipeline.from_pretrained(opt.model, torch_dtype=torch.float16) 111 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 112 | model = model.to(device) 113 | model.enable_attention_slicing() 114 | 115 | for index, metadata in enumerate(metadatas): 116 | seed_everything(opt.seed) 117 | 118 | outpath = os.path.join(opt.outdir, f"{index:0>5}") 119 | os.makedirs(outpath, exist_ok=True) 120 | 121 | prompt = metadata['prompt'] 122 | n_rows = batch_size = opt.batch_size 123 | print(f"Prompt ({index: >3}/{len(metadatas)}): '{prompt}'") 124 | 125 | sample_path = os.path.join(outpath, "samples") 126 | os.makedirs(sample_path, exist_ok=True) 127 | with open(os.path.join(outpath, "metadata.jsonl"), "w") as fp: 128 | json.dump(metadata, fp) 129 | 130 | sample_count = 0 131 | 132 | with torch.no_grad(): 133 | all_samples = list() 134 | for n in trange((opt.n_samples + batch_size - 1) // batch_size, desc="Sampling"): 135 | # Generate images 136 | samples = model( 137 | prompt, 138 | height=opt.H, 139 | width=opt.W, 140 | num_inference_steps=opt.steps, 141 | guidance_scale=opt.scale, 142 | num_images_per_prompt=min(batch_size, opt.n_samples - sample_count), 143 | negative_prompt=opt.negative_prompt or None 144 | ).images 145 | for sample in samples: 146 | sample.save(os.path.join(sample_path, f"{sample_count:05}.png")) 147 | sample_count += 1 148 | if not opt.skip_grid: 149 | all_samples.append(torch.stack([ToTensor()(sample) for sample in samples], 0)) 150 | 151 | if not opt.skip_grid: 152 | # additionally, save as grid 153 | grid = torch.stack(all_samples, 0) 154 | grid = rearrange(grid, 'n b c h w -> (n b) c h w') 155 | grid = make_grid(grid, nrow=n_rows) 156 | 157 | # to image 158 | grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy() 159 | grid = Image.fromarray(grid.astype(np.uint8)) 160 | grid.save(os.path.join(outpath, f'grid.png')) 161 | del grid 162 | del all_samples 163 | 164 | print("Done.") 165 | 166 | 167 | if __name__ == "__main__": 168 | opt = parse_args() 169 | main(opt) 170 | -------------------------------------------------------------------------------- /prompts/create_prompts.py: -------------------------------------------------------------------------------- 1 | """ 2 | Generate prompts for evaluation 3 | """ 4 | 5 | import argparse 6 | import json 7 | import os 8 | import yaml 9 | 10 | import numpy as np 11 | 12 | # Load classnames 13 | 14 | with open("object_names.txt") as cls_file: 15 | classnames = [line.strip() for line in cls_file] 16 | 17 | # Proper a vs an 18 | 19 | def with_article(name: str): 20 | if name[0] in "aeiou": 21 | return f"an {name}" 22 | return f"a {name}" 23 | 24 | # Proper plural 25 | 26 | def make_plural(name: str): 27 | if name[-1] in "s": 28 | return f"{name}es" 29 | return f"{name}s" 30 | 31 | # Generates single object samples 32 | 33 | def generate_single_object_sample(rng: np.random.Generator, size: int = None): 34 | TAG = "single_object" 35 | if size > len(classnames): 36 | size = len(classnames) 37 | print(f"Not enough distinct classes, generating only {size} samples") 38 | return_scalar = size is None 39 | size = size or 1 40 | idxs = rng.choice(len(classnames), size=size, replace=False) 41 | samples = [dict( 42 | tag=TAG, 43 | include=[ 44 | {"class": classnames[idx], "count": 1} 45 | ], 46 | prompt=f"a photo of {with_article(classnames[idx])}" 47 | ) for idx in idxs] 48 | if return_scalar: 49 | return samples[0] 50 | return samples 51 | 52 | # Generate two object samples 53 | 54 | def generate_two_object_sample(rng: np.random.Generator): 55 | TAG = "two_object" 56 | idx_a, idx_b = rng.choice(len(classnames), size=2, replace=False) 57 | return dict( 58 | tag=TAG, 59 | include=[ 60 | {"class": classnames[idx_a], "count": 1}, 61 | {"class": classnames[idx_b], "count": 1} 62 | ], 63 | prompt=f"a photo of {with_article(classnames[idx_a])} and {with_article(classnames[idx_b])}" 64 | ) 65 | 66 | # Generate counting samples 67 | 68 | numbers = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"] 69 | 70 | def generate_counting_sample(rng: np.random.Generator, max_count=4): 71 | TAG = "counting" 72 | idx = rng.choice(len(classnames)) 73 | num = int(rng.integers(2, max_count, endpoint=True)) 74 | return dict( 75 | tag=TAG, 76 | include=[ 77 | {"class": classnames[idx], "count": num} 78 | ], 79 | exclude=[ 80 | {"class": classnames[idx], "count": num + 1} 81 | ], 82 | prompt=f"a photo of {numbers[num]} {make_plural(classnames[idx])}" 83 | ) 84 | 85 | # Generate color samples 86 | 87 | colors = ["red", "orange", "yellow", "green", "blue", "purple", "pink", "brown", "black", "white"] 88 | 89 | def generate_color_sample(rng: np.random.Generator): 90 | TAG = "colors" 91 | idx = rng.choice(len(classnames) - 1) + 1 92 | idx = (idx + classnames.index("person")) % len(classnames) # No "[COLOR] person" prompts 93 | color = colors[rng.choice(len(colors))] 94 | return dict( 95 | tag=TAG, 96 | include=[ 97 | {"class": classnames[idx], "count": 1, "color": color} 98 | ], 99 | prompt=f"a photo of {with_article(color)} {classnames[idx]}" 100 | ) 101 | 102 | # Generate position samples 103 | 104 | positions = ["left of", "right of", "above", "below"] 105 | 106 | def generate_position_sample(rng: np.random.Generator): 107 | TAG = "position" 108 | idx_a, idx_b = rng.choice(len(classnames), size=2, replace=False) 109 | position = positions[rng.choice(len(positions))] 110 | return dict( 111 | tag=TAG, 112 | include=[ 113 | {"class": classnames[idx_b], "count": 1}, 114 | {"class": classnames[idx_a], "count": 1, "position": (position, 0)} 115 | ], 116 | prompt=f"a photo of {with_article(classnames[idx_a])} {position} {with_article(classnames[idx_b])}" 117 | ) 118 | 119 | # Generate color attribution samples 120 | 121 | def generate_color_attribution_sample(rng: np.random.Generator): 122 | TAG = "color_attr" 123 | idxs = rng.choice(len(classnames) - 1, size=2, replace=False) + 1 124 | idx_a, idx_b = (idxs + classnames.index("person")) % len(classnames) # No "[COLOR] person" prompts 125 | cidx_a, cidx_b = rng.choice(len(colors), size=2, replace=False) 126 | return dict( 127 | tag=TAG, 128 | include=[ 129 | {"class": classnames[idx_a], "count": 1, "color": colors[cidx_a]}, 130 | {"class": classnames[idx_b], "count": 1, "color": colors[cidx_b]} 131 | ], 132 | prompt=f"a photo of {with_article(colors[cidx_a])} {classnames[idx_a]} and {with_article(colors[cidx_b])} {classnames[idx_b]}" 133 | ) 134 | 135 | 136 | # Generate evaluation suite 137 | 138 | def generate_suite(rng: np.random.Generator, n: int = 100, output_path: str = ""): 139 | samples = [] 140 | # Generate single object samples for all COCO classnames 141 | samples.extend(generate_single_object_sample(rng, size=len(classnames))) 142 | # Generate two object samples (~100) 143 | for _ in range(n): 144 | samples.append(generate_two_object_sample(rng)) 145 | # Generate counting samples 146 | for _ in range(n): 147 | samples.append(generate_counting_sample(rng, max_count=4)) 148 | # Generate color samples 149 | for _ in range(n): 150 | samples.append(generate_color_sample(rng)) 151 | # Generate position samples 152 | for _ in range(n): 153 | samples.append(generate_position_sample(rng)) 154 | # Generate color attribution samples 155 | for _ in range(n): 156 | samples.append(generate_color_attribution_sample(rng)) 157 | # De-duplicate 158 | unique_samples, used_samples = [], set() 159 | for sample in samples: 160 | sample_text = yaml.safe_dump(sample) 161 | if sample_text not in used_samples: 162 | unique_samples.append(sample) 163 | used_samples.add(sample_text) 164 | 165 | # Write to files 166 | os.makedirs(output_path, exist_ok=True) 167 | with open(os.path.join(output_path, "generation_prompts.txt"), "w") as fp: 168 | for sample in unique_samples: 169 | print(sample['prompt'], file=fp) 170 | with open(os.path.join(output_path, "evaluation_metadata.jsonl"), "w") as fp: 171 | for sample in unique_samples: 172 | print(json.dumps(sample), file=fp) 173 | 174 | 175 | if __name__ == "__main__": 176 | parser = argparse.ArgumentParser() 177 | parser.add_argument("--seed", type=int, default=43, help="generation seed (default: 43)") 178 | parser.add_argument("--num-prompts", "-n", type=int, default=100, help="number of prompts per task (default: 100)") 179 | parser.add_argument("--output-path", "-o", type=str, default="prompts", help="output folder for prompts and metadata (default: 'prompts/')") 180 | args = parser.parse_args() 181 | rng = np.random.default_rng(args.seed) 182 | generate_suite(rng, args.num_prompts, args.output_path) 183 | 184 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: geneval 2 | channels: 3 | - pytorch 4 | - nvidia/label/cuda-11.3.0 5 | - conda-forge 6 | - defaults 7 | dependencies: 8 | - _libgcc_mutex=0.1=main 9 | - _openmp_mutex=5.1=1_gnu 10 | - blas=1.0=mkl 11 | - brotlipy=0.7.0=py39h27cfd23_1003 12 | - bzip2=1.0.8=h7b6447c_0 13 | - ca-certificates=2023.11.17=hbcca054_0 14 | - certifi=2023.11.17=pyhd8ed1ab_0 15 | - cffi=1.15.1=py39h5eee18b_3 16 | - charset-normalizer=2.0.4=pyhd3eb1b0_0 17 | - colorama=0.4.6=pyhd8ed1ab_0 18 | - cryptography=39.0.1=py39h9ce1e76_0 19 | - cuda-nvcc=11.3.58=h2467b9f_0 20 | - cudatoolkit=11.3.1=h2bc3f7f_2 21 | - diffusers=0.24.0=pyhd8ed1ab_0 22 | - ffmpeg=4.3=hf484d3e_0 23 | - freetype=2.12.1=h4a9f257_0 24 | - giflib=5.2.1=h5eee18b_3 25 | - gmp=6.2.1=h295c915_3 26 | - gnutls=3.6.15=he1e5248_0 27 | - huggingface_hub=0.19.4=pyhd8ed1ab_0 28 | - idna=3.4=py39h06a4308_0 29 | - intel-openmp=2021.4.0=h06a4308_3561 30 | - jpeg=9e=h5eee18b_1 31 | - lame=3.100=h7b6447c_0 32 | - lcms2=2.12=h3be6417_0 33 | - ld_impl_linux-64=2.38=h1181459_1 34 | - lerc=3.0=h295c915_0 35 | - libdeflate=1.17=h5eee18b_0 36 | - libffi=3.4.4=h6a678d5_0 37 | - libgcc-ng=11.2.0=h1234567_1 38 | - libgomp=11.2.0=h1234567_1 39 | - libiconv=1.16=h7f8727e_2 40 | - libidn2=2.3.4=h5eee18b_0 41 | - libpng=1.6.39=h5eee18b_0 42 | - libstdcxx-ng=11.2.0=h1234567_1 43 | - libtasn1=4.19.0=h5eee18b_0 44 | - libtiff=4.5.0=h6a678d5_2 45 | - libunistring=0.9.10=h27cfd23_0 46 | - libwebp=1.2.4=h11a3e52_1 47 | - libwebp-base=1.2.4=h5eee18b_1 48 | - lz4-c=1.9.4=h6a678d5_0 49 | - mkl=2021.4.0=h06a4308_640 50 | - mkl-service=2.4.0=py39h7f8727e_0 51 | - mkl_fft=1.3.1=py39hd3c417c_0 52 | - mkl_random=1.2.2=py39h51133e4_0 53 | - ncurses=6.4=h6a678d5_0 54 | - nettle=3.7.3=hbbd107a_1 55 | - numpy=1.23.1=py39h6c91a56_0 56 | - numpy-base=1.23.1=py39ha15fc14_0 57 | - openh264=2.1.1=h4ff587b_0 58 | - openssl=1.1.1w=h7f8727e_0 59 | - pillow=9.4.0=py39h6a678d5_0 60 | - pip=20.3.3=py39h06a4308_0 61 | - pycparser=2.21=pyhd3eb1b0_0 62 | - pyopenssl=23.0.0=py39h06a4308_0 63 | - pysocks=1.7.1=py39h06a4308_0 64 | - python=3.9.16=h7a1cb2a_2 65 | - python_abi=3.9=2_cp39 66 | - pytorch=1.12.1=py3.9_cuda11.3_cudnn8.3.2_0 67 | - pytorch-mutex=1.0=cuda 68 | - pyyaml=6.0=py39hb9d737c_4 69 | - readline=8.2=h5eee18b_0 70 | - requests=2.29.0=py39h06a4308_0 71 | - setuptools=66.0.0=py39h06a4308_0 72 | - six=1.16.0=pyhd3eb1b0_1 73 | - sqlite=3.41.2=h5eee18b_0 74 | - tk=8.6.12=h1ccaba5_0 75 | - torchvision=0.13.1=py39_cu113 76 | - typing-extensions=4.5.0=hd8ed1ab_0 77 | - typing_extensions=4.5.0=pyha770c72_0 78 | - urllib3=1.26.15=py39h06a4308_0 79 | - wheel=0.38.4=py39h06a4308_0 80 | - xz=5.4.2=h5eee18b_0 81 | - yaml=0.2.5=h7f98852_2 82 | - zlib=1.2.13=h5eee18b_0 83 | - zstd=1.5.5=hc292b87_0 84 | - pip: 85 | - absl-py==1.4.0 86 | - addict==2.4.0 87 | - aiohttp==3.8.4 88 | - aiosignal==1.3.1 89 | - albumentations==1.3.0 90 | - altair==5.0.0 91 | - aniso8601==9.0.1 92 | - antlr4-python3-runtime==4.8 93 | - async-timeout==4.0.2 94 | - attrs==23.1.0 95 | - autofaiss==2.15.8 96 | - blinker==1.6.2 97 | - braceexpand==0.1.7 98 | - cachetools==5.3.0 99 | - click==8.1.3 100 | - clip-anytorch==2.5.2 101 | - clip-benchmark==1.4.0 102 | - clip-retrieval==2.37.0 103 | - cloudpickle==2.2.1 104 | - coloredlogs==15.0.1 105 | - contourpy==1.0.7 106 | - cycler==0.11.0 107 | - cython==0.29.34 108 | - dataclasses==0.6 109 | - decorator==5.1.1 110 | - docker-pycreds==0.4.0 111 | - einops==0.3.0 112 | - embedding-reader==1.5.1 113 | - exifread-nocycle==3.0.1 114 | - faiss-cpu==1.7.4 115 | - filelock==3.12.0 116 | - fire==0.4.0 117 | - flask==2.3.3 118 | - flask-cors==3.0.10 119 | - flask-restful==0.3.10 120 | - flatbuffers==23.5.9 121 | - fonttools==4.39.4 122 | - frozenlist==1.3.3 123 | - fsspec==2022.11.0 124 | - ftfy==6.1.1 125 | - future==0.18.3 126 | - gitdb==4.0.10 127 | - gitpython==3.1.31 128 | - google-auth==2.18.1 129 | - google-auth-oauthlib==1.0.0 130 | - grpcio==1.55.0 131 | - h5py==3.8.0 132 | - humanfriendly==10.0 133 | - imageio==2.9.0 134 | - imageio-ffmpeg==0.4.2 135 | - img2dataset==1.42.0 136 | - importlib-metadata==6.6.0 137 | - importlib-resources==5.12.0 138 | - invisible-watermark==0.1.5 139 | - itsdangerous==2.1.2 140 | - jinja2==3.1.2 141 | - joblib==1.2.0 142 | - jsonschema==4.17.3 143 | - kiwisolver==1.4.4 144 | - kornia==0.6.0 145 | - lazy-loader==0.2 146 | - markdown==3.4.3 147 | - markdown-it-py==2.2.0 148 | - markupsafe==2.1.2 149 | - matplotlib==3.7.1 150 | - mdurl==0.1.2 151 | - mmcv-full==1.7.1 152 | - mmengine==0.7.3 153 | - model-index==0.1.11 154 | - mpmath==1.3.0 155 | - multidict==6.0.4 156 | - multilingual-clip==1.0.10 157 | - networkx==3.1 158 | - nltk==3.8.1 159 | - nvidia-cublas-cu11==2022.4.8 160 | - nvidia-cublas-cu117==11.10.1.25 161 | - nvidia-cuda-runtime-cu11==2022.4.25 162 | - nvidia-cuda-runtime-cu117==11.7.60 163 | - nvidia-cudnn-cu11==2022.5.19 164 | - nvidia-cudnn-cu116==8.4.0.27 165 | - nvidia-cusolver-cu11==2022.4.8 166 | - nvidia-cusolver-cu117==11.3.5.50 167 | - nvidia-cusparse-cu11==2022.4.8 168 | - nvidia-cusparse-cu117==11.7.3.50 169 | - nvidia-pyindex==1.0.9 170 | - oauthlib==3.2.2 171 | - omegaconf==2.1.1 172 | - onnx==1.14.0 173 | - onnxruntime==1.14.1 174 | - open-clip-torch==2.20.0 175 | - opencv-python==4.6.0.66 176 | - opencv-python-headless==4.7.0.72 177 | - openmim==0.3.7 178 | - ordered-set==4.1.0 179 | - packaging==23.1 180 | - pandas==1.5.3 181 | - pathtools==0.1.2 182 | - prometheus-client==0.17.1 183 | - promise==2.3 184 | - protobuf==3.20.3 185 | - psutil==5.9.5 186 | - pyarrow==7.0.0 187 | - pyasn1==0.5.0 188 | - pyasn1-modules==0.3.0 189 | - pycocoevalcap==1.2 190 | - pycocotools==2.0.6 191 | - pydeck==0.8.1b0 192 | - pydeprecate==0.3.1 193 | - pygments==2.15.1 194 | - pympler==1.0.1 195 | - pyparsing==3.0.9 196 | - pyrsistent==0.19.3 197 | - python-dateutil==2.8.2 198 | - pytorch-lightning==1.4.2 199 | - pytz==2023.3 200 | - pywavelets==1.4.1 201 | - qudida==0.0.4 202 | - regex==2023.5.5 203 | - requests-oauthlib==1.3.1 204 | - rich==13.3.5 205 | - rsa==4.9 206 | - safetensors==0.3.1 207 | - scikit-image==0.20.0 208 | - scikit-learn==1.2.2 209 | - scipy==1.9.1 210 | - semver==3.0.0 211 | - sentence-transformers==2.2.2 212 | - sentencepiece==0.1.99 213 | - sentry-sdk==1.29.2 214 | - setproctitle==1.3.2 215 | - shapely==2.0.1 216 | - shortuuid==1.0.11 217 | - smmap==5.0.0 218 | - streamlit==1.12.1 219 | - streamlit-drawable-canvas==0.8.0 220 | - submitit==1.4.5 221 | - sympy==1.12 222 | - tabulate==0.9.0 223 | - tensorboard==2.13.0 224 | - tensorboard-data-server==0.7.0 225 | - termcolor==2.3.0 226 | - terminaltables==3.1.10 227 | - test-tube==0.7.5 228 | - threadpoolctl==3.1.0 229 | - tifffile==2023.4.12 230 | - timm==0.9.2 231 | - tokenizers==0.15.0 232 | - toml==0.10.2 233 | - tomli==2.0.1 234 | - toolz==0.12.0 235 | - torchmetrics==0.6.0 236 | - tornado==6.3.2 237 | - tqdm==4.65.0 238 | - transformers==4.36.1 239 | - tzdata==2023.3 240 | - tzlocal==5.0.1 241 | - validators==0.20.0 242 | - wandb==0.12.21 243 | - watchdog==3.0.0 244 | - wcwidth==0.2.6 245 | - webdataset==0.2.48 246 | - werkzeug==2.3.7 247 | - yapf==0.33.0 248 | - yarl==1.9.2 249 | - zipp==3.15.0 250 | -------------------------------------------------------------------------------- /evaluation/evaluate_images.py: -------------------------------------------------------------------------------- 1 | """ 2 | Evaluate generated images using Mask2Former (or other object detector model) 3 | """ 4 | 5 | import argparse 6 | import json 7 | import os 8 | import re 9 | import sys 10 | import time 11 | 12 | import warnings 13 | warnings.filterwarnings("ignore") 14 | 15 | import numpy as np 16 | import pandas as pd 17 | from PIL import Image, ImageOps 18 | import torch 19 | import mmdet 20 | from mmdet.apis import inference_detector, init_detector 21 | 22 | import open_clip 23 | from clip_benchmark.metrics import zeroshot_classification as zsc 24 | zsc.tqdm = lambda it, *args, **kwargs: it 25 | 26 | # Get directory path 27 | 28 | def parse_args(): 29 | parser = argparse.ArgumentParser() 30 | parser.add_argument("imagedir", type=str) 31 | parser.add_argument("--outfile", type=str, default="results.jsonl") 32 | parser.add_argument("--model-config", type=str, default=None) 33 | parser.add_argument("--model-path", type=str, default="./") 34 | # Other arguments 35 | parser.add_argument("--options", nargs="*", type=str, default=[]) 36 | args = parser.parse_args() 37 | args.options = dict(opt.split("=", 1) for opt in args.options) 38 | if args.model_config is None: 39 | args.model_config = os.path.join( 40 | os.path.dirname(mmdet.__file__), 41 | "../configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py" 42 | ) 43 | return args 44 | 45 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu" 46 | assert DEVICE == "cuda" 47 | 48 | def timed(fn): 49 | def wrapper(*args, **kwargs): 50 | startt = time.time() 51 | result = fn(*args, **kwargs) 52 | endt = time.time() 53 | print(f'Function {fn.__name__!r} executed in {endt - startt:.3f}s', file=sys.stderr) 54 | return result 55 | return wrapper 56 | 57 | # Load models 58 | 59 | @timed 60 | def load_models(args): 61 | CONFIG_PATH = args.model_config 62 | OBJECT_DETECTOR = args.options.get('model', "mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco") 63 | CKPT_PATH = os.path.join(args.model_path, f"{OBJECT_DETECTOR}.pth") 64 | object_detector = init_detector(CONFIG_PATH, CKPT_PATH, device=DEVICE) 65 | 66 | clip_arch = args.options.get('clip_model', "ViT-L-14") 67 | clip_model, _, transform = open_clip.create_model_and_transforms(clip_arch, pretrained="openai", device=DEVICE) 68 | tokenizer = open_clip.get_tokenizer(clip_arch) 69 | 70 | with open(os.path.join(os.path.dirname(__file__), "object_names.txt")) as cls_file: 71 | classnames = [line.strip() for line in cls_file] 72 | 73 | return object_detector, (clip_model, transform, tokenizer), classnames 74 | 75 | 76 | COLORS = ["red", "orange", "yellow", "green", "blue", "purple", "pink", "brown", "black", "white"] 77 | COLOR_CLASSIFIERS = {} 78 | 79 | # Evaluation parts 80 | 81 | class ImageCrops(torch.utils.data.Dataset): 82 | def __init__(self, image: Image.Image, objects): 83 | self._image = image.convert("RGB") 84 | bgcolor = args.options.get('bgcolor', "#999") 85 | if bgcolor == "original": 86 | self._blank = self._image.copy() 87 | else: 88 | self._blank = Image.new("RGB", image.size, color=bgcolor) 89 | self._objects = objects 90 | 91 | def __len__(self): 92 | return len(self._objects) 93 | 94 | def __getitem__(self, index): 95 | box, mask = self._objects[index] 96 | if mask is not None: 97 | assert tuple(self._image.size[::-1]) == tuple(mask.shape), (index, self._image.size[::-1], mask.shape) 98 | image = Image.composite(self._image, self._blank, Image.fromarray(mask)) 99 | else: 100 | image = self._image 101 | if args.options.get('crop', '1') == '1': 102 | image = image.crop(box[:4]) 103 | # if args.save: 104 | # base_count = len(os.listdir(args.save)) 105 | # image.save(os.path.join(args.save, f"cropped_{base_count:05}.png")) 106 | return (transform(image), 0) 107 | 108 | 109 | def color_classification(image, bboxes, classname): 110 | if classname not in COLOR_CLASSIFIERS: 111 | COLOR_CLASSIFIERS[classname] = zsc.zero_shot_classifier( 112 | clip_model, tokenizer, COLORS, 113 | [ 114 | f"a photo of a {{c}} {classname}", 115 | f"a photo of a {{c}}-colored {classname}", 116 | f"a photo of a {{c}} object" 117 | ], 118 | DEVICE 119 | ) 120 | clf = COLOR_CLASSIFIERS[classname] 121 | dataloader = torch.utils.data.DataLoader( 122 | ImageCrops(image, bboxes), 123 | batch_size=16, num_workers=4 124 | ) 125 | with torch.no_grad(): 126 | pred, _ = zsc.run_classification(clip_model, clf, dataloader, DEVICE) 127 | return [COLORS[index.item()] for index in pred.argmax(1)] 128 | 129 | 130 | def compute_iou(box_a, box_b): 131 | area_fn = lambda box: max(box[2] - box[0] + 1, 0) * max(box[3] - box[1] + 1, 0) 132 | i_area = area_fn([ 133 | max(box_a[0], box_b[0]), max(box_a[1], box_b[1]), 134 | min(box_a[2], box_b[2]), min(box_a[3], box_b[3]) 135 | ]) 136 | u_area = area_fn(box_a) + area_fn(box_b) - i_area 137 | return i_area / u_area if u_area else 0 138 | 139 | 140 | def relative_position(obj_a, obj_b): 141 | """Give position of A relative to B, factoring in object dimensions""" 142 | boxes = np.array([obj_a[0], obj_b[0]])[:, :4].reshape(2, 2, 2) 143 | center_a, center_b = boxes.mean(axis=-2) 144 | dim_a, dim_b = np.abs(np.diff(boxes, axis=-2))[..., 0, :] 145 | offset = center_a - center_b 146 | # 147 | revised_offset = np.maximum(np.abs(offset) - POSITION_THRESHOLD * (dim_a + dim_b), 0) * np.sign(offset) 148 | if np.all(np.abs(revised_offset) < 1e-3): 149 | return set() 150 | # 151 | dx, dy = revised_offset / np.linalg.norm(offset) 152 | relations = set() 153 | if dx < -0.5: relations.add("left of") 154 | if dx > 0.5: relations.add("right of") 155 | if dy < -0.5: relations.add("above") 156 | if dy > 0.5: relations.add("below") 157 | return relations 158 | 159 | 160 | def evaluate(image, objects, metadata): 161 | """ 162 | Evaluate given image using detected objects on the global metadata specifications. 163 | Assumptions: 164 | * Metadata combines 'include' clauses with AND, and 'exclude' clauses with OR 165 | * All clauses are independent, i.e., duplicating a clause has no effect on the correctness 166 | * CHANGED: Color and position will only be evaluated on the most confidently predicted objects; 167 | therefore, objects are expected to appear in sorted order 168 | """ 169 | correct = True 170 | reason = [] 171 | matched_groups = [] 172 | # Check for expected objects 173 | for req in metadata.get('include', []): 174 | classname = req['class'] 175 | matched = True 176 | found_objects = objects.get(classname, [])[:req['count']] 177 | if len(found_objects) < req['count']: 178 | correct = matched = False 179 | reason.append(f"expected {classname}>={req['count']}, found {len(found_objects)}") 180 | else: 181 | if 'color' in req: 182 | # Color check 183 | colors = color_classification(image, found_objects, classname) 184 | if colors.count(req['color']) < req['count']: 185 | correct = matched = False 186 | reason.append( 187 | f"expected {req['color']} {classname}>={req['count']}, found " + 188 | f"{colors.count(req['color'])} {req['color']}; and " + 189 | ", ".join(f"{colors.count(c)} {c}" for c in COLORS if c in colors) 190 | ) 191 | if 'position' in req and matched: 192 | # Relative position check 193 | expected_rel, target_group = req['position'] 194 | if matched_groups[target_group] is None: 195 | correct = matched = False 196 | reason.append(f"no target for {classname} to be {expected_rel}") 197 | else: 198 | for obj in found_objects: 199 | for target_obj in matched_groups[target_group]: 200 | true_rels = relative_position(obj, target_obj) 201 | if expected_rel not in true_rels: 202 | correct = matched = False 203 | reason.append( 204 | f"expected {classname} {expected_rel} target, found " + 205 | f"{' and '.join(true_rels)} target" 206 | ) 207 | break 208 | if not matched: 209 | break 210 | if matched: 211 | matched_groups.append(found_objects) 212 | else: 213 | matched_groups.append(None) 214 | # Check for non-expected objects 215 | for req in metadata.get('exclude', []): 216 | classname = req['class'] 217 | if len(objects.get(classname, [])) >= req['count']: 218 | correct = False 219 | reason.append(f"expected {classname}<{req['count']}, found {len(objects[classname])}") 220 | return correct, "\n".join(reason) 221 | 222 | 223 | def evaluate_image(filepath, metadata): 224 | result = inference_detector(object_detector, filepath) 225 | bbox = result[0] if isinstance(result, tuple) else result 226 | segm = result[1] if isinstance(result, tuple) and len(result) > 1 else None 227 | image = ImageOps.exif_transpose(Image.open(filepath)) 228 | detected = {} 229 | # Determine bounding boxes to keep 230 | confidence_threshold = THRESHOLD if metadata['tag'] != "counting" else COUNTING_THRESHOLD 231 | for index, classname in enumerate(classnames): 232 | ordering = np.argsort(bbox[index][:, 4])[::-1] 233 | ordering = ordering[bbox[index][ordering, 4] > confidence_threshold] # Threshold 234 | ordering = ordering[:MAX_OBJECTS].tolist() # Limit number of detected objects per class 235 | detected[classname] = [] 236 | while ordering: 237 | max_obj = ordering.pop(0) 238 | detected[classname].append((bbox[index][max_obj], None if segm is None else segm[index][max_obj])) 239 | ordering = [ 240 | obj for obj in ordering 241 | if NMS_THRESHOLD == 1 or compute_iou(bbox[index][max_obj], bbox[index][obj]) < NMS_THRESHOLD 242 | ] 243 | if not detected[classname]: 244 | del detected[classname] 245 | # Evaluate 246 | is_correct, reason = evaluate(image, detected, metadata) 247 | return { 248 | 'filename': filepath, 249 | 'tag': metadata['tag'], 250 | 'prompt': metadata['prompt'], 251 | 'correct': is_correct, 252 | 'reason': reason, 253 | 'metadata': json.dumps(metadata), 254 | 'details': json.dumps({ 255 | key: [box.tolist() for box, _ in value] 256 | for key, value in detected.items() 257 | }) 258 | } 259 | 260 | 261 | def main(args): 262 | full_results = [] 263 | for subfolder in os.listdir(args.imagedir): 264 | folderpath = os.path.join(args.imagedir, subfolder) 265 | if not os.path.isdir(folderpath) or not subfolder.isdigit(): 266 | continue 267 | with open(os.path.join(folderpath, "metadata.jsonl")) as fp: 268 | metadata = json.load(fp) 269 | # Evaluate each image 270 | for imagename in os.listdir(os.path.join(folderpath, "samples")): 271 | imagepath = os.path.join(folderpath, "samples", imagename) 272 | if not os.path.isfile(imagepath) or not re.match(r"\d+\.png", imagename): 273 | continue 274 | result = evaluate_image(imagepath, metadata) 275 | full_results.append(result) 276 | # Save results 277 | if os.path.dirname(args.outfile): 278 | os.makedirs(os.path.dirname(args.outfile), exist_ok=True) 279 | with open(args.outfile, "w") as fp: 280 | pd.DataFrame(full_results).to_json(fp, orient="records", lines=True) 281 | 282 | 283 | if __name__ == "__main__": 284 | args = parse_args() 285 | object_detector, (clip_model, transform, tokenizer), classnames = load_models(args) 286 | THRESHOLD = float(args.options.get('threshold', 0.3)) 287 | COUNTING_THRESHOLD = float(args.options.get('counting_threshold', 0.9)) 288 | MAX_OBJECTS = int(args.options.get('max_objects', 16)) 289 | NMS_THRESHOLD = float(args.options.get('max_overlap', 1.0)) 290 | POSITION_THRESHOLD = float(args.options.get('position_threshold', 0.1)) 291 | 292 | main(args) 293 | -------------------------------------------------------------------------------- /prompts/generation_prompts.txt: -------------------------------------------------------------------------------- 1 | a photo of a bench 2 | a photo of a cow 3 | a photo of a bicycle 4 | a photo of a clock 5 | a photo of a carrot 6 | a photo of a suitcase 7 | a photo of a fork 8 | a photo of a surfboard 9 | a photo of a refrigerator 10 | a photo of a cup 11 | a photo of a microwave 12 | a photo of a potted plant 13 | a photo of a snowboard 14 | a photo of a zebra 15 | a photo of a parking meter 16 | a photo of a spoon 17 | a photo of a skateboard 18 | a photo of a car 19 | a photo of a motorcycle 20 | a photo of a traffic light 21 | a photo of a book 22 | a photo of a couch 23 | a photo of a backpack 24 | a photo of a computer keyboard 25 | a photo of a toaster 26 | a photo of a bird 27 | a photo of a bowl 28 | a photo of a dog 29 | a photo of a tie 30 | a photo of a laptop 31 | a photo of a computer mouse 32 | a photo of a sandwich 33 | a photo of a baseball bat 34 | a photo of a train 35 | a photo of a cell phone 36 | a photo of a chair 37 | a photo of a tv 38 | a photo of a broccoli 39 | a photo of a bed 40 | a photo of a skis 41 | a photo of a handbag 42 | a photo of a pizza 43 | a photo of a frisbee 44 | a photo of a scissors 45 | a photo of a bottle 46 | a photo of an elephant 47 | a photo of a toilet 48 | a photo of an oven 49 | a photo of an orange 50 | a photo of a person 51 | a photo of a teddy bear 52 | a photo of a vase 53 | a photo of a banana 54 | a photo of a toothbrush 55 | a photo of a tv remote 56 | a photo of a dining table 57 | a photo of a stop sign 58 | a photo of a sheep 59 | a photo of a fire hydrant 60 | a photo of an airplane 61 | a photo of a giraffe 62 | a photo of a horse 63 | a photo of a cat 64 | a photo of a donut 65 | a photo of a boat 66 | a photo of a baseball glove 67 | a photo of a hair drier 68 | a photo of a sink 69 | a photo of a cake 70 | a photo of a wine glass 71 | a photo of an apple 72 | a photo of a bus 73 | a photo of a tennis racket 74 | a photo of a knife 75 | a photo of a hot dog 76 | a photo of a truck 77 | a photo of an umbrella 78 | a photo of a sports ball 79 | a photo of a bear 80 | a photo of a kite 81 | a photo of a bench and a sports ball 82 | a photo of a toothbrush and a snowboard 83 | a photo of a toaster and an oven 84 | a photo of a broccoli and a vase 85 | a photo of a tennis racket and a wine glass 86 | a photo of a fork and a knife 87 | a photo of a hair drier and a cake 88 | a photo of a horse and a giraffe 89 | a photo of a horse and a computer keyboard 90 | a photo of a toothbrush and a carrot 91 | a photo of a cake and a zebra 92 | a photo of a hair drier and a bear 93 | a photo of a knife and a zebra 94 | a photo of a couch and a wine glass 95 | a photo of a frisbee and a vase 96 | a photo of a book and a laptop 97 | a photo of a dining table and a bear 98 | a photo of a frisbee and a couch 99 | a photo of a couch and a horse 100 | a photo of a toilet and a computer mouse 101 | a photo of a bottle and a refrigerator 102 | a photo of a potted plant and a backpack 103 | a photo of a skateboard and a cake 104 | a photo of a broccoli and a parking meter 105 | a photo of a zebra and a bed 106 | a photo of an oven and a bed 107 | a photo of a baseball bat and a fork 108 | a photo of a vase and a spoon 109 | a photo of a skateboard and a sink 110 | a photo of a pizza and a bench 111 | a photo of a bowl and a pizza 112 | a photo of a tennis racket and a bird 113 | a photo of a wine glass and a bear 114 | a photo of a fork and a book 115 | a photo of a scissors and a bowl 116 | a photo of a laptop and a carrot 117 | a photo of a stop sign and a bottle 118 | a photo of a microwave and a truck 119 | a photo of a person and a bear 120 | a photo of a frisbee and a cell phone 121 | a photo of a parking meter and a teddy bear 122 | a photo of a tennis racket and a bicycle 123 | a photo of a stop sign and a motorcycle 124 | a photo of a fire hydrant and a tennis racket 125 | a photo of a scissors and a sandwich 126 | a photo of a pizza and a book 127 | a photo of a giraffe and a computer mouse 128 | a photo of a stop sign and a toaster 129 | a photo of a computer mouse and a zebra 130 | a photo of a chair and a bench 131 | a photo of a tv and a carrot 132 | a photo of a surfboard and a suitcase 133 | a photo of a computer keyboard and a laptop 134 | a photo of a computer keyboard and a microwave 135 | a photo of a scissors and a bird 136 | a photo of a person and a snowboard 137 | a photo of a cow and a horse 138 | a photo of a handbag and a refrigerator 139 | a photo of a chair and a laptop 140 | a photo of a toothbrush and a bench 141 | a photo of a book and a baseball bat 142 | a photo of a horse and a train 143 | a photo of a bench and a vase 144 | a photo of a traffic light and a backpack 145 | a photo of a sports ball and a cow 146 | a photo of a computer mouse and a spoon 147 | a photo of a tv and a bicycle 148 | a photo of a bench and a snowboard 149 | a photo of a toothbrush and a toilet 150 | a photo of a person and an apple 151 | a photo of a sink and a sports ball 152 | a photo of a stop sign and a dog 153 | a photo of a knife and a stop sign 154 | a photo of a wine glass and a handbag 155 | a photo of a bowl and a skis 156 | a photo of a frisbee and an apple 157 | a photo of a computer keyboard and a cell phone 158 | a photo of a stop sign and a fork 159 | a photo of a potted plant and a boat 160 | a photo of a tv and a cell phone 161 | a photo of a tie and a broccoli 162 | a photo of a potted plant and a donut 163 | a photo of a person and a sink 164 | a photo of a couch and a snowboard 165 | a photo of a fork and a baseball glove 166 | a photo of an apple and a toothbrush 167 | a photo of a bus and a baseball glove 168 | a photo of a person and a stop sign 169 | a photo of a carrot and a couch 170 | a photo of a baseball bat and a bear 171 | a photo of a fire hydrant and a train 172 | a photo of a baseball glove and a carrot 173 | a photo of a microwave and a bench 174 | a photo of a cake and a stop sign 175 | a photo of a car and a computer mouse 176 | a photo of a suitcase and a dining table 177 | a photo of a person and a traffic light 178 | a photo of a cell phone and a horse 179 | a photo of a baseball bat and a giraffe 180 | a photo of two clocks 181 | a photo of two backpacks 182 | a photo of four handbags 183 | a photo of two frisbees 184 | a photo of three sports balls 185 | a photo of two bears 186 | a photo of two ties 187 | a photo of four sinks 188 | a photo of two toothbrushs 189 | a photo of three persons 190 | a photo of three tennis rackets 191 | a photo of four bowls 192 | a photo of four vases 193 | a photo of three cups 194 | a photo of four computer keyboards 195 | a photo of three sinks 196 | a photo of two ovens 197 | a photo of two toilets 198 | a photo of two bicycles 199 | a photo of two trains 200 | a photo of three oranges 201 | a photo of three buses 202 | a photo of three handbags 203 | a photo of three snowboards 204 | a photo of two snowboards 205 | a photo of four dogs 206 | a photo of three apples 207 | a photo of two sheeps 208 | a photo of three hot dogs 209 | a photo of three zebras 210 | a photo of three kites 211 | a photo of four apples 212 | a photo of three cell phones 213 | a photo of four baseball gloves 214 | a photo of three computer keyboards 215 | a photo of two beds 216 | a photo of two tv remotes 217 | a photo of three fire hydrants 218 | a photo of three books 219 | a photo of four giraffes 220 | a photo of two vases 221 | a photo of four donuts 222 | a photo of four chairs 223 | a photo of three baseball bats 224 | a photo of four stop signs 225 | a photo of two pizzas 226 | a photo of three refrigerators 227 | a photo of two fire hydrants 228 | a photo of three giraffes 229 | a photo of four tvs 230 | a photo of three wine glasses 231 | a photo of four broccolis 232 | a photo of three trucks 233 | a photo of two trucks 234 | a photo of two carrots 235 | a photo of two sandwichs 236 | a photo of four traffic lights 237 | a photo of four clocks 238 | a photo of two cars 239 | a photo of two bananas 240 | a photo of two wine glasses 241 | a photo of three pizzas 242 | a photo of four knifes 243 | a photo of three suitcases 244 | a photo of four zebras 245 | a photo of two teddy bears 246 | a photo of four skateboards 247 | a photo of four hot dogs 248 | a photo of three birds 249 | a photo of four boats 250 | a photo of four microwaves 251 | a photo of two hair driers 252 | a photo of three laptops 253 | a photo of three cows 254 | a photo of two parking meters 255 | a photo of four benchs 256 | a photo of three benchs 257 | a photo of four frisbees 258 | a photo of four books 259 | a photo of four buses 260 | a photo of a blue fire hydrant 261 | a photo of a pink car 262 | a photo of a purple cup 263 | a photo of a blue cow 264 | a photo of a yellow boat 265 | a photo of a blue umbrella 266 | a photo of a blue elephant 267 | a photo of a yellow elephant 268 | a photo of a red bicycle 269 | a photo of a purple suitcase 270 | a photo of a purple hair drier 271 | a photo of a white sandwich 272 | a photo of a purple elephant 273 | a photo of a green microwave 274 | a photo of a red zebra 275 | a photo of a red apple 276 | a photo of a yellow tv remote 277 | a photo of a blue toilet 278 | a photo of an orange orange 279 | a photo of a black donut 280 | a photo of a red vase 281 | a photo of a purple pizza 282 | a photo of a pink skateboard 283 | a photo of a green skateboard 284 | a photo of a purple bear 285 | a photo of a brown chair 286 | a photo of a brown computer keyboard 287 | a photo of an orange cow 288 | a photo of a brown skis 289 | a photo of a white kite 290 | a photo of a red dog 291 | a photo of a green couch 292 | a photo of a yellow airplane 293 | a photo of an orange tv 294 | a photo of a white scissors 295 | a photo of a pink cell phone 296 | a photo of a green surfboard 297 | a photo of a white fire hydrant 298 | a photo of a black bicycle 299 | a photo of a purple carrot 300 | a photo of a black dining table 301 | a photo of a purple potted plant 302 | a photo of a purple backpack 303 | a photo of a yellow train 304 | a photo of a pink potted plant 305 | a photo of a red giraffe 306 | a photo of a brown bear 307 | a photo of a black train 308 | a photo of an orange laptop 309 | a photo of a green hot dog 310 | a photo of a yellow parking meter 311 | a photo of a red potted plant 312 | a photo of a green traffic light 313 | a photo of a blue tv 314 | a photo of a brown refrigerator 315 | a photo of a black tv remote 316 | a photo of a purple scissors 317 | a photo of a yellow orange 318 | a photo of a brown toaster 319 | a photo of a red parking meter 320 | a photo of a brown orange 321 | a photo of a green clock 322 | a photo of a white sheep 323 | a photo of a yellow oven 324 | a photo of a green vase 325 | a photo of a black teddy bear 326 | a photo of a yellow carrot 327 | a photo of a black hot dog 328 | a photo of a red scissors 329 | a photo of a white teddy bear 330 | a photo of a black skis 331 | a photo of a blue dining table 332 | a photo of a black refrigerator 333 | a photo of a white dog 334 | a photo of an orange scissors 335 | a photo of a red cell phone 336 | a photo of a white orange 337 | a photo of a blue clock 338 | a photo of a blue carrot 339 | a photo of a green motorcycle 340 | a photo of a pink stop sign 341 | a photo of a black vase 342 | a photo of a black backpack 343 | a photo of a red car 344 | a photo of a green computer mouse 345 | a photo of a red backpack 346 | a photo of a green bus 347 | a photo of an orange toaster 348 | a photo of a yellow fork 349 | a photo of a pink parking meter 350 | a photo of a blue book 351 | a photo of a yellow broccoli 352 | a photo of an orange computer mouse 353 | a photo of a red cake 354 | a photo of a dog right of a teddy bear 355 | a photo of a wine glass above a kite 356 | a photo of a couch below a cup 357 | a photo of a laptop left of a cow 358 | a photo of a fork above a hair drier 359 | a photo of a tie right of a baseball bat 360 | a photo of a stop sign above a fork 361 | a photo of a bird below a skateboard 362 | a photo of an apple above a tv 363 | a photo of a train above a potted plant 364 | a photo of a truck left of a refrigerator 365 | a photo of a tv remote below a cow 366 | a photo of a bottle right of a train 367 | a photo of a dog above a cow 368 | a photo of a skateboard above a person 369 | a photo of a baseball glove below an umbrella 370 | a photo of a dining table right of an oven 371 | a photo of a hot dog left of a suitcase 372 | a photo of a bus below a toothbrush 373 | a photo of a backpack right of a sandwich 374 | a photo of a cake below a baseball bat 375 | a photo of a dog right of a tie 376 | a photo of a suitcase right of a boat 377 | a photo of a bear above a clock 378 | a photo of a tv remote left of an umbrella 379 | a photo of a sports ball left of an umbrella 380 | a photo of a train right of a dining table 381 | a photo of a hair drier below an elephant 382 | a photo of a tennis racket right of a spoon 383 | a photo of a wine glass right of a hot dog 384 | a photo of a computer mouse left of a bench 385 | a photo of a carrot left of an orange 386 | a photo of a kite above a toothbrush 387 | a photo of a toaster below a traffic light 388 | a photo of a cat below a baseball glove 389 | a photo of a skis right of a zebra 390 | a photo of a stop sign above a chair 391 | a photo of a stop sign above a parking meter 392 | a photo of a hot dog right of a skateboard 393 | a photo of a pizza below a computer keyboard 394 | a photo of a hair drier left of a toilet 395 | a photo of a cow left of a stop sign 396 | a photo of a suitcase above a skis 397 | a photo of a book above a laptop 398 | a photo of a toothbrush below a pizza 399 | a photo of a toilet left of a kite 400 | a photo of a tie above a sink 401 | a photo of a bird left of a couch 402 | a photo of a bed right of a sports ball 403 | a photo of an elephant below a surfboard 404 | a photo of a frisbee right of a motorcycle 405 | a photo of a vase above a fire hydrant 406 | a photo of a zebra left of an elephant 407 | a photo of a bench left of a bear 408 | a photo of a donut right of a bench 409 | a photo of a frisbee below a horse 410 | a photo of a computer keyboard above a snowboard 411 | a photo of a tv below a cow 412 | a photo of an elephant below a horse 413 | a photo of a suitcase left of a banana 414 | a photo of a train below an airplane 415 | a photo of a cat below a backpack 416 | a photo of a backpack below a cake 417 | a photo of a sandwich below a knife 418 | a photo of a bicycle above a parking meter 419 | a photo of a knife right of a suitcase 420 | a photo of a hot dog above a knife 421 | a photo of a zebra right of a parking meter 422 | a photo of a chair left of a zebra 423 | a photo of a cow below an airplane 424 | a photo of a cup left of an umbrella 425 | a photo of a zebra below a computer keyboard 426 | a photo of a zebra below a broccoli 427 | a photo of a laptop below a sports ball 428 | a photo of a truck left of a baseball bat 429 | a photo of a refrigerator above a baseball bat 430 | a photo of a tv above a baseball bat 431 | a photo of a baseball glove right of a bear 432 | a photo of a refrigerator below a scissors 433 | a photo of a dining table above a suitcase 434 | a photo of a parking meter above a broccoli 435 | a photo of a frisbee above a truck 436 | a photo of a pizza right of a banana 437 | a photo of a bus above a boat 438 | a photo of a cell phone left of a tennis racket 439 | a photo of a horse right of a broccoli 440 | a photo of a broccoli above a bottle 441 | a photo of a vase right of a horse 442 | a photo of a bear above a spoon 443 | a photo of a zebra right of a bed 444 | a photo of a cow right of a laptop 445 | a photo of a bed right of a frisbee 446 | a photo of a tie right of a motorcycle 447 | a photo of a laptop right of a tv 448 | a photo of a cell phone right of a chair 449 | a photo of a couch below a potted plant 450 | a photo of a clock below a tv 451 | a photo of a couch below a vase 452 | a photo of a donut below a cat 453 | a photo of a couch left of a toaster 454 | a photo of a purple wine glass and a black apple 455 | a photo of a green bus and a purple microwave 456 | a photo of a green skis and a brown airplane 457 | a photo of a yellow computer keyboard and a black sink 458 | a photo of a pink oven and a green motorcycle 459 | a photo of a purple parking meter and a red laptop 460 | a photo of a yellow skateboard and an orange computer mouse 461 | a photo of a red skis and a brown tie 462 | a photo of a pink skateboard and a black train 463 | a photo of a white handbag and a purple bed 464 | a photo of a purple elephant and a brown sports ball 465 | a photo of a purple dog and a black dining table 466 | a photo of a white dining table and a red car 467 | a photo of a blue cell phone and a green apple 468 | a photo of a red car and an orange potted plant 469 | a photo of a brown carrot and a white potted plant 470 | a photo of a black kite and a green bear 471 | a photo of a blue laptop and a brown bear 472 | a photo of a green teddy bear and a brown kite 473 | a photo of a yellow stop sign and a blue potted plant 474 | a photo of an orange snowboard and a green cat 475 | a photo of an orange truck and a pink sink 476 | a photo of a brown hot dog and a purple pizza 477 | a photo of a green couch and an orange umbrella 478 | a photo of a brown bed and a pink cell phone 479 | a photo of a black broccoli and a yellow cake 480 | a photo of a red train and a purple bear 481 | a photo of a purple tennis racket and a black sink 482 | a photo of a blue vase and a black banana 483 | a photo of a blue clock and a white cup 484 | a photo of a red umbrella and a blue couch 485 | a photo of a white handbag and a red giraffe 486 | a photo of a pink tv remote and a blue airplane 487 | a photo of a pink handbag and a black scissors 488 | a photo of a brown car and a pink hair drier 489 | a photo of a black bus and a brown cell phone 490 | a photo of a purple sheep and a pink banana 491 | a photo of a blue handbag and a white cell phone 492 | a photo of a white pizza and a green umbrella 493 | a photo of a white tie and a purple skateboard 494 | a photo of a yellow sports ball and a green boat 495 | a photo of a white wine glass and a brown giraffe 496 | a photo of a yellow bowl and a white baseball glove 497 | a photo of an orange microwave and a black spoon 498 | a photo of an orange skateboard and a pink bowl 499 | a photo of a blue toilet and a white suitcase 500 | a photo of a white boat and an orange hot dog 501 | a photo of a yellow dining table and a pink dog 502 | a photo of a red cake and a purple chair 503 | a photo of a blue tie and a pink dining table 504 | a photo of a blue cow and a black computer keyboard 505 | a photo of a yellow pizza and a green oven 506 | a photo of a red laptop and a brown car 507 | a photo of a purple computer keyboard and a blue scissors 508 | a photo of a green surfboard and an orange oven 509 | a photo of a yellow parking meter and a pink refrigerator 510 | a photo of a brown computer mouse and a purple bottle 511 | a photo of a red umbrella and a green cow 512 | a photo of a red giraffe and a black cell phone 513 | a photo of a brown oven and a purple train 514 | a photo of a blue baseball bat and a pink book 515 | a photo of a green cup and a yellow bowl 516 | a photo of a yellow suitcase and a brown bus 517 | a photo of an orange motorcycle and a pink donut 518 | a photo of an orange giraffe and a white baseball glove 519 | a photo of an orange handbag and a green carrot 520 | a photo of a black bottle and a white refrigerator 521 | a photo of a white dog and a blue potted plant 522 | a photo of an orange handbag and a red car 523 | a photo of a red stop sign and a blue book 524 | a photo of a yellow car and an orange toothbrush 525 | a photo of a black potted plant and a yellow toilet 526 | a photo of a brown dining table and a white suitcase 527 | a photo of an orange donut and a yellow stop sign 528 | a photo of a green suitcase and a blue boat 529 | a photo of an orange tennis racket and a yellow sports ball 530 | a photo of a purple computer keyboard and a red chair 531 | a photo of a purple suitcase and an orange pizza 532 | a photo of a white bottle and a blue sheep 533 | a photo of a purple backpack and a white umbrella 534 | a photo of an orange potted plant and a black spoon 535 | a photo of a green tennis racket and a black dog 536 | a photo of a yellow handbag and a blue refrigerator 537 | a photo of a pink broccoli and a red sink 538 | a photo of a red bowl and a pink sink 539 | a photo of a white toilet and a red apple 540 | a photo of a pink dining table and a black sandwich 541 | a photo of a black car and a green parking meter 542 | a photo of a yellow bird and a black motorcycle 543 | a photo of a brown giraffe and a white stop sign 544 | a photo of a white banana and a black elephant 545 | a photo of an orange cow and a purple sandwich 546 | a photo of a red clock and a black cell phone 547 | a photo of a brown knife and a blue donut 548 | a photo of a red cup and a pink handbag 549 | a photo of a yellow bicycle and a red motorcycle 550 | a photo of a red orange and a purple broccoli 551 | a photo of an orange traffic light and a white toilet 552 | a photo of a green cup and a red pizza 553 | a photo of a blue pizza and a yellow baseball glove 554 | -------------------------------------------------------------------------------- /prompts/evaluation_metadata.jsonl: -------------------------------------------------------------------------------- 1 | {"tag": "single_object", "include": [{"class": "bench", "count": 1}], "prompt": "a photo of a bench"} 2 | {"tag": "single_object", "include": [{"class": "cow", "count": 1}], "prompt": "a photo of a cow"} 3 | {"tag": "single_object", "include": [{"class": "bicycle", "count": 1}], "prompt": "a photo of a bicycle"} 4 | {"tag": "single_object", "include": [{"class": "clock", "count": 1}], "prompt": "a photo of a clock"} 5 | {"tag": "single_object", "include": [{"class": "carrot", "count": 1}], "prompt": "a photo of a carrot"} 6 | {"tag": "single_object", "include": [{"class": "suitcase", "count": 1}], "prompt": "a photo of a suitcase"} 7 | {"tag": "single_object", "include": [{"class": "fork", "count": 1}], "prompt": "a photo of a fork"} 8 | {"tag": "single_object", "include": [{"class": "surfboard", "count": 1}], "prompt": "a photo of a surfboard"} 9 | {"tag": "single_object", "include": [{"class": "refrigerator", "count": 1}], "prompt": "a photo of a refrigerator"} 10 | {"tag": "single_object", "include": [{"class": "cup", "count": 1}], "prompt": "a photo of a cup"} 11 | {"tag": "single_object", "include": [{"class": "microwave", "count": 1}], "prompt": "a photo of a microwave"} 12 | {"tag": "single_object", "include": [{"class": "potted plant", "count": 1}], "prompt": "a photo of a potted plant"} 13 | {"tag": "single_object", "include": [{"class": "snowboard", "count": 1}], "prompt": "a photo of a snowboard"} 14 | {"tag": "single_object", "include": [{"class": "zebra", "count": 1}], "prompt": "a photo of a zebra"} 15 | {"tag": "single_object", "include": [{"class": "parking meter", "count": 1}], "prompt": "a photo of a parking meter"} 16 | {"tag": "single_object", "include": [{"class": "spoon", "count": 1}], "prompt": "a photo of a spoon"} 17 | {"tag": "single_object", "include": [{"class": "skateboard", "count": 1}], "prompt": "a photo of a skateboard"} 18 | {"tag": "single_object", "include": [{"class": "car", "count": 1}], "prompt": "a photo of a car"} 19 | {"tag": "single_object", "include": [{"class": "motorcycle", "count": 1}], "prompt": "a photo of a motorcycle"} 20 | {"tag": "single_object", "include": [{"class": "traffic light", "count": 1}], "prompt": "a photo of a traffic light"} 21 | {"tag": "single_object", "include": [{"class": "book", "count": 1}], "prompt": "a photo of a book"} 22 | {"tag": "single_object", "include": [{"class": "couch", "count": 1}], "prompt": "a photo of a couch"} 23 | {"tag": "single_object", "include": [{"class": "backpack", "count": 1}], "prompt": "a photo of a backpack"} 24 | {"tag": "single_object", "include": [{"class": "computer keyboard", "count": 1}], "prompt": "a photo of a computer keyboard"} 25 | {"tag": "single_object", "include": [{"class": "toaster", "count": 1}], "prompt": "a photo of a toaster"} 26 | {"tag": "single_object", "include": [{"class": "bird", "count": 1}], "prompt": "a photo of a bird"} 27 | {"tag": "single_object", "include": [{"class": "bowl", "count": 1}], "prompt": "a photo of a bowl"} 28 | {"tag": "single_object", "include": [{"class": "dog", "count": 1}], "prompt": "a photo of a dog"} 29 | {"tag": "single_object", "include": [{"class": "tie", "count": 1}], "prompt": "a photo of a tie"} 30 | {"tag": "single_object", "include": [{"class": "laptop", "count": 1}], "prompt": "a photo of a laptop"} 31 | {"tag": "single_object", "include": [{"class": "computer mouse", "count": 1}], "prompt": "a photo of a computer mouse"} 32 | {"tag": "single_object", "include": [{"class": "sandwich", "count": 1}], "prompt": "a photo of a sandwich"} 33 | {"tag": "single_object", "include": [{"class": "baseball bat", "count": 1}], "prompt": "a photo of a baseball bat"} 34 | {"tag": "single_object", "include": [{"class": "train", "count": 1}], "prompt": "a photo of a train"} 35 | {"tag": "single_object", "include": [{"class": "cell phone", "count": 1}], "prompt": "a photo of a cell phone"} 36 | {"tag": "single_object", "include": [{"class": "chair", "count": 1}], "prompt": "a photo of a chair"} 37 | {"tag": "single_object", "include": [{"class": "tv", "count": 1}], "prompt": "a photo of a tv"} 38 | {"tag": "single_object", "include": [{"class": "broccoli", "count": 1}], "prompt": "a photo of a broccoli"} 39 | {"tag": "single_object", "include": [{"class": "bed", "count": 1}], "prompt": "a photo of a bed"} 40 | {"tag": "single_object", "include": [{"class": "skis", "count": 1}], "prompt": "a photo of a skis"} 41 | {"tag": "single_object", "include": [{"class": "handbag", "count": 1}], "prompt": "a photo of a handbag"} 42 | {"tag": "single_object", "include": [{"class": "pizza", "count": 1}], "prompt": "a photo of a pizza"} 43 | {"tag": "single_object", "include": [{"class": "frisbee", "count": 1}], "prompt": "a photo of a frisbee"} 44 | {"tag": "single_object", "include": [{"class": "scissors", "count": 1}], "prompt": "a photo of a scissors"} 45 | {"tag": "single_object", "include": [{"class": "bottle", "count": 1}], "prompt": "a photo of a bottle"} 46 | {"tag": "single_object", "include": [{"class": "elephant", "count": 1}], "prompt": "a photo of an elephant"} 47 | {"tag": "single_object", "include": [{"class": "toilet", "count": 1}], "prompt": "a photo of a toilet"} 48 | {"tag": "single_object", "include": [{"class": "oven", "count": 1}], "prompt": "a photo of an oven"} 49 | {"tag": "single_object", "include": [{"class": "orange", "count": 1}], "prompt": "a photo of an orange"} 50 | {"tag": "single_object", "include": [{"class": "person", "count": 1}], "prompt": "a photo of a person"} 51 | {"tag": "single_object", "include": [{"class": "teddy bear", "count": 1}], "prompt": "a photo of a teddy bear"} 52 | {"tag": "single_object", "include": [{"class": "vase", "count": 1}], "prompt": "a photo of a vase"} 53 | {"tag": "single_object", "include": [{"class": "banana", "count": 1}], "prompt": "a photo of a banana"} 54 | {"tag": "single_object", "include": [{"class": "toothbrush", "count": 1}], "prompt": "a photo of a toothbrush"} 55 | {"tag": "single_object", "include": [{"class": "tv remote", "count": 1}], "prompt": "a photo of a tv remote"} 56 | {"tag": "single_object", "include": [{"class": "dining table", "count": 1}], "prompt": "a photo of a dining table"} 57 | {"tag": "single_object", "include": [{"class": "stop sign", "count": 1}], "prompt": "a photo of a stop sign"} 58 | {"tag": "single_object", "include": [{"class": "sheep", "count": 1}], "prompt": "a photo of a sheep"} 59 | {"tag": "single_object", "include": [{"class": "fire hydrant", "count": 1}], "prompt": "a photo of a fire hydrant"} 60 | {"tag": "single_object", "include": [{"class": "airplane", "count": 1}], "prompt": "a photo of an airplane"} 61 | {"tag": "single_object", "include": [{"class": "giraffe", "count": 1}], "prompt": "a photo of a giraffe"} 62 | {"tag": "single_object", "include": [{"class": "horse", "count": 1}], "prompt": "a photo of a horse"} 63 | {"tag": "single_object", "include": [{"class": "cat", "count": 1}], "prompt": "a photo of a cat"} 64 | {"tag": "single_object", "include": [{"class": "donut", "count": 1}], "prompt": "a photo of a donut"} 65 | {"tag": "single_object", "include": [{"class": "boat", "count": 1}], "prompt": "a photo of a boat"} 66 | {"tag": "single_object", "include": [{"class": "baseball glove", "count": 1}], "prompt": "a photo of a baseball glove"} 67 | {"tag": "single_object", "include": [{"class": "hair drier", "count": 1}], "prompt": "a photo of a hair drier"} 68 | {"tag": "single_object", "include": [{"class": "sink", "count": 1}], "prompt": "a photo of a sink"} 69 | {"tag": "single_object", "include": [{"class": "cake", "count": 1}], "prompt": "a photo of a cake"} 70 | {"tag": "single_object", "include": [{"class": "wine glass", "count": 1}], "prompt": "a photo of a wine glass"} 71 | {"tag": "single_object", "include": [{"class": "apple", "count": 1}], "prompt": "a photo of an apple"} 72 | {"tag": "single_object", "include": [{"class": "bus", "count": 1}], "prompt": "a photo of a bus"} 73 | {"tag": "single_object", "include": [{"class": "tennis racket", "count": 1}], "prompt": "a photo of a tennis racket"} 74 | {"tag": "single_object", "include": [{"class": "knife", "count": 1}], "prompt": "a photo of a knife"} 75 | {"tag": "single_object", "include": [{"class": "hot dog", "count": 1}], "prompt": "a photo of a hot dog"} 76 | {"tag": "single_object", "include": [{"class": "truck", "count": 1}], "prompt": "a photo of a truck"} 77 | {"tag": "single_object", "include": [{"class": "umbrella", "count": 1}], "prompt": "a photo of an umbrella"} 78 | {"tag": "single_object", "include": [{"class": "sports ball", "count": 1}], "prompt": "a photo of a sports ball"} 79 | {"tag": "single_object", "include": [{"class": "bear", "count": 1}], "prompt": "a photo of a bear"} 80 | {"tag": "single_object", "include": [{"class": "kite", "count": 1}], "prompt": "a photo of a kite"} 81 | {"tag": "two_object", "include": [{"class": "bench", "count": 1}, {"class": "sports ball", "count": 1}], "prompt": "a photo of a bench and a sports ball"} 82 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a toothbrush and a snowboard"} 83 | {"tag": "two_object", "include": [{"class": "toaster", "count": 1}, {"class": "oven", "count": 1}], "prompt": "a photo of a toaster and an oven"} 84 | {"tag": "two_object", "include": [{"class": "broccoli", "count": 1}, {"class": "vase", "count": 1}], "prompt": "a photo of a broccoli and a vase"} 85 | {"tag": "two_object", "include": [{"class": "tennis racket", "count": 1}, {"class": "wine glass", "count": 1}], "prompt": "a photo of a tennis racket and a wine glass"} 86 | {"tag": "two_object", "include": [{"class": "fork", "count": 1}, {"class": "knife", "count": 1}], "prompt": "a photo of a fork and a knife"} 87 | {"tag": "two_object", "include": [{"class": "hair drier", "count": 1}, {"class": "cake", "count": 1}], "prompt": "a photo of a hair drier and a cake"} 88 | {"tag": "two_object", "include": [{"class": "horse", "count": 1}, {"class": "giraffe", "count": 1}], "prompt": "a photo of a horse and a giraffe"} 89 | {"tag": "two_object", "include": [{"class": "horse", "count": 1}, {"class": "computer keyboard", "count": 1}], "prompt": "a photo of a horse and a computer keyboard"} 90 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a toothbrush and a carrot"} 91 | {"tag": "two_object", "include": [{"class": "cake", "count": 1}, {"class": "zebra", "count": 1}], "prompt": "a photo of a cake and a zebra"} 92 | {"tag": "two_object", "include": [{"class": "hair drier", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a hair drier and a bear"} 93 | {"tag": "two_object", "include": [{"class": "knife", "count": 1}, {"class": "zebra", "count": 1}], "prompt": "a photo of a knife and a zebra"} 94 | {"tag": "two_object", "include": [{"class": "couch", "count": 1}, {"class": "wine glass", "count": 1}], "prompt": "a photo of a couch and a wine glass"} 95 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "vase", "count": 1}], "prompt": "a photo of a frisbee and a vase"} 96 | {"tag": "two_object", "include": [{"class": "book", "count": 1}, {"class": "laptop", "count": 1}], "prompt": "a photo of a book and a laptop"} 97 | {"tag": "two_object", "include": [{"class": "dining table", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a dining table and a bear"} 98 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "couch", "count": 1}], "prompt": "a photo of a frisbee and a couch"} 99 | {"tag": "two_object", "include": [{"class": "couch", "count": 1}, {"class": "horse", "count": 1}], "prompt": "a photo of a couch and a horse"} 100 | {"tag": "two_object", "include": [{"class": "toilet", "count": 1}, {"class": "computer mouse", "count": 1}], "prompt": "a photo of a toilet and a computer mouse"} 101 | {"tag": "two_object", "include": [{"class": "bottle", "count": 1}, {"class": "refrigerator", "count": 1}], "prompt": "a photo of a bottle and a refrigerator"} 102 | {"tag": "two_object", "include": [{"class": "potted plant", "count": 1}, {"class": "backpack", "count": 1}], "prompt": "a photo of a potted plant and a backpack"} 103 | {"tag": "two_object", "include": [{"class": "skateboard", "count": 1}, {"class": "cake", "count": 1}], "prompt": "a photo of a skateboard and a cake"} 104 | {"tag": "two_object", "include": [{"class": "broccoli", "count": 1}, {"class": "parking meter", "count": 1}], "prompt": "a photo of a broccoli and a parking meter"} 105 | {"tag": "two_object", "include": [{"class": "zebra", "count": 1}, {"class": "bed", "count": 1}], "prompt": "a photo of a zebra and a bed"} 106 | {"tag": "two_object", "include": [{"class": "oven", "count": 1}, {"class": "bed", "count": 1}], "prompt": "a photo of an oven and a bed"} 107 | {"tag": "two_object", "include": [{"class": "baseball bat", "count": 1}, {"class": "fork", "count": 1}], "prompt": "a photo of a baseball bat and a fork"} 108 | {"tag": "two_object", "include": [{"class": "vase", "count": 1}, {"class": "spoon", "count": 1}], "prompt": "a photo of a vase and a spoon"} 109 | {"tag": "two_object", "include": [{"class": "skateboard", "count": 1}, {"class": "sink", "count": 1}], "prompt": "a photo of a skateboard and a sink"} 110 | {"tag": "two_object", "include": [{"class": "pizza", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a pizza and a bench"} 111 | {"tag": "two_object", "include": [{"class": "bowl", "count": 1}, {"class": "pizza", "count": 1}], "prompt": "a photo of a bowl and a pizza"} 112 | {"tag": "two_object", "include": [{"class": "tennis racket", "count": 1}, {"class": "bird", "count": 1}], "prompt": "a photo of a tennis racket and a bird"} 113 | {"tag": "two_object", "include": [{"class": "wine glass", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a wine glass and a bear"} 114 | {"tag": "two_object", "include": [{"class": "fork", "count": 1}, {"class": "book", "count": 1}], "prompt": "a photo of a fork and a book"} 115 | {"tag": "two_object", "include": [{"class": "scissors", "count": 1}, {"class": "bowl", "count": 1}], "prompt": "a photo of a scissors and a bowl"} 116 | {"tag": "two_object", "include": [{"class": "laptop", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a laptop and a carrot"} 117 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "bottle", "count": 1}], "prompt": "a photo of a stop sign and a bottle"} 118 | {"tag": "two_object", "include": [{"class": "microwave", "count": 1}, {"class": "truck", "count": 1}], "prompt": "a photo of a microwave and a truck"} 119 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a person and a bear"} 120 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "cell phone", "count": 1}], "prompt": "a photo of a frisbee and a cell phone"} 121 | {"tag": "two_object", "include": [{"class": "parking meter", "count": 1}, {"class": "teddy bear", "count": 1}], "prompt": "a photo of a parking meter and a teddy bear"} 122 | {"tag": "two_object", "include": [{"class": "tennis racket", "count": 1}, {"class": "bicycle", "count": 1}], "prompt": "a photo of a tennis racket and a bicycle"} 123 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "motorcycle", "count": 1}], "prompt": "a photo of a stop sign and a motorcycle"} 124 | {"tag": "two_object", "include": [{"class": "fire hydrant", "count": 1}, {"class": "tennis racket", "count": 1}], "prompt": "a photo of a fire hydrant and a tennis racket"} 125 | {"tag": "two_object", "include": [{"class": "scissors", "count": 1}, {"class": "sandwich", "count": 1}], "prompt": "a photo of a scissors and a sandwich"} 126 | {"tag": "two_object", "include": [{"class": "pizza", "count": 1}, {"class": "book", "count": 1}], "prompt": "a photo of a pizza and a book"} 127 | {"tag": "two_object", "include": [{"class": "giraffe", "count": 1}, {"class": "computer mouse", "count": 1}], "prompt": "a photo of a giraffe and a computer mouse"} 128 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "toaster", "count": 1}], "prompt": "a photo of a stop sign and a toaster"} 129 | {"tag": "two_object", "include": [{"class": "computer mouse", "count": 1}, {"class": "zebra", "count": 1}], "prompt": "a photo of a computer mouse and a zebra"} 130 | {"tag": "two_object", "include": [{"class": "chair", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a chair and a bench"} 131 | {"tag": "two_object", "include": [{"class": "tv", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a tv and a carrot"} 132 | {"tag": "two_object", "include": [{"class": "surfboard", "count": 1}, {"class": "suitcase", "count": 1}], "prompt": "a photo of a surfboard and a suitcase"} 133 | {"tag": "two_object", "include": [{"class": "computer keyboard", "count": 1}, {"class": "laptop", "count": 1}], "prompt": "a photo of a computer keyboard and a laptop"} 134 | {"tag": "two_object", "include": [{"class": "computer keyboard", "count": 1}, {"class": "microwave", "count": 1}], "prompt": "a photo of a computer keyboard and a microwave"} 135 | {"tag": "two_object", "include": [{"class": "scissors", "count": 1}, {"class": "bird", "count": 1}], "prompt": "a photo of a scissors and a bird"} 136 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a person and a snowboard"} 137 | {"tag": "two_object", "include": [{"class": "cow", "count": 1}, {"class": "horse", "count": 1}], "prompt": "a photo of a cow and a horse"} 138 | {"tag": "two_object", "include": [{"class": "handbag", "count": 1}, {"class": "refrigerator", "count": 1}], "prompt": "a photo of a handbag and a refrigerator"} 139 | {"tag": "two_object", "include": [{"class": "chair", "count": 1}, {"class": "laptop", "count": 1}], "prompt": "a photo of a chair and a laptop"} 140 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a toothbrush and a bench"} 141 | {"tag": "two_object", "include": [{"class": "book", "count": 1}, {"class": "baseball bat", "count": 1}], "prompt": "a photo of a book and a baseball bat"} 142 | {"tag": "two_object", "include": [{"class": "horse", "count": 1}, {"class": "train", "count": 1}], "prompt": "a photo of a horse and a train"} 143 | {"tag": "two_object", "include": [{"class": "bench", "count": 1}, {"class": "vase", "count": 1}], "prompt": "a photo of a bench and a vase"} 144 | {"tag": "two_object", "include": [{"class": "traffic light", "count": 1}, {"class": "backpack", "count": 1}], "prompt": "a photo of a traffic light and a backpack"} 145 | {"tag": "two_object", "include": [{"class": "sports ball", "count": 1}, {"class": "cow", "count": 1}], "prompt": "a photo of a sports ball and a cow"} 146 | {"tag": "two_object", "include": [{"class": "computer mouse", "count": 1}, {"class": "spoon", "count": 1}], "prompt": "a photo of a computer mouse and a spoon"} 147 | {"tag": "two_object", "include": [{"class": "tv", "count": 1}, {"class": "bicycle", "count": 1}], "prompt": "a photo of a tv and a bicycle"} 148 | {"tag": "two_object", "include": [{"class": "bench", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a bench and a snowboard"} 149 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "toilet", "count": 1}], "prompt": "a photo of a toothbrush and a toilet"} 150 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "apple", "count": 1}], "prompt": "a photo of a person and an apple"} 151 | {"tag": "two_object", "include": [{"class": "sink", "count": 1}, {"class": "sports ball", "count": 1}], "prompt": "a photo of a sink and a sports ball"} 152 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "dog", "count": 1}], "prompt": "a photo of a stop sign and a dog"} 153 | {"tag": "two_object", "include": [{"class": "knife", "count": 1}, {"class": "stop sign", "count": 1}], "prompt": "a photo of a knife and a stop sign"} 154 | {"tag": "two_object", "include": [{"class": "wine glass", "count": 1}, {"class": "handbag", "count": 1}], "prompt": "a photo of a wine glass and a handbag"} 155 | {"tag": "two_object", "include": [{"class": "bowl", "count": 1}, {"class": "skis", "count": 1}], "prompt": "a photo of a bowl and a skis"} 156 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "apple", "count": 1}], "prompt": "a photo of a frisbee and an apple"} 157 | {"tag": "two_object", "include": [{"class": "computer keyboard", "count": 1}, {"class": "cell phone", "count": 1}], "prompt": "a photo of a computer keyboard and a cell phone"} 158 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "fork", "count": 1}], "prompt": "a photo of a stop sign and a fork"} 159 | {"tag": "two_object", "include": [{"class": "potted plant", "count": 1}, {"class": "boat", "count": 1}], "prompt": "a photo of a potted plant and a boat"} 160 | {"tag": "two_object", "include": [{"class": "tv", "count": 1}, {"class": "cell phone", "count": 1}], "prompt": "a photo of a tv and a cell phone"} 161 | {"tag": "two_object", "include": [{"class": "tie", "count": 1}, {"class": "broccoli", "count": 1}], "prompt": "a photo of a tie and a broccoli"} 162 | {"tag": "two_object", "include": [{"class": "potted plant", "count": 1}, {"class": "donut", "count": 1}], "prompt": "a photo of a potted plant and a donut"} 163 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "sink", "count": 1}], "prompt": "a photo of a person and a sink"} 164 | {"tag": "two_object", "include": [{"class": "couch", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a couch and a snowboard"} 165 | {"tag": "two_object", "include": [{"class": "fork", "count": 1}, {"class": "baseball glove", "count": 1}], "prompt": "a photo of a fork and a baseball glove"} 166 | {"tag": "two_object", "include": [{"class": "apple", "count": 1}, {"class": "toothbrush", "count": 1}], "prompt": "a photo of an apple and a toothbrush"} 167 | {"tag": "two_object", "include": [{"class": "bus", "count": 1}, {"class": "baseball glove", "count": 1}], "prompt": "a photo of a bus and a baseball glove"} 168 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "stop sign", "count": 1}], "prompt": "a photo of a person and a stop sign"} 169 | {"tag": "two_object", "include": [{"class": "carrot", "count": 1}, {"class": "couch", "count": 1}], "prompt": "a photo of a carrot and a couch"} 170 | {"tag": "two_object", "include": [{"class": "baseball bat", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a baseball bat and a bear"} 171 | {"tag": "two_object", "include": [{"class": "fire hydrant", "count": 1}, {"class": "train", "count": 1}], "prompt": "a photo of a fire hydrant and a train"} 172 | {"tag": "two_object", "include": [{"class": "baseball glove", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a baseball glove and a carrot"} 173 | {"tag": "two_object", "include": [{"class": "microwave", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a microwave and a bench"} 174 | {"tag": "two_object", "include": [{"class": "cake", "count": 1}, {"class": "stop sign", "count": 1}], "prompt": "a photo of a cake and a stop sign"} 175 | {"tag": "two_object", "include": [{"class": "car", "count": 1}, {"class": "computer mouse", "count": 1}], "prompt": "a photo of a car and a computer mouse"} 176 | {"tag": "two_object", "include": [{"class": "suitcase", "count": 1}, {"class": "dining table", "count": 1}], "prompt": "a photo of a suitcase and a dining table"} 177 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "traffic light", "count": 1}], "prompt": "a photo of a person and a traffic light"} 178 | {"tag": "two_object", "include": [{"class": "cell phone", "count": 1}, {"class": "horse", "count": 1}], "prompt": "a photo of a cell phone and a horse"} 179 | {"tag": "two_object", "include": [{"class": "baseball bat", "count": 1}, {"class": "giraffe", "count": 1}], "prompt": "a photo of a baseball bat and a giraffe"} 180 | {"tag": "counting", "include": [{"class": "clock", "count": 2}], "exclude": [{"class": "clock", "count": 3}], "prompt": "a photo of two clocks"} 181 | {"tag": "counting", "include": [{"class": "backpack", "count": 2}], "exclude": [{"class": "backpack", "count": 3}], "prompt": "a photo of two backpacks"} 182 | {"tag": "counting", "include": [{"class": "handbag", "count": 4}], "exclude": [{"class": "handbag", "count": 5}], "prompt": "a photo of four handbags"} 183 | {"tag": "counting", "include": [{"class": "frisbee", "count": 2}], "exclude": [{"class": "frisbee", "count": 3}], "prompt": "a photo of two frisbees"} 184 | {"tag": "counting", "include": [{"class": "sports ball", "count": 3}], "exclude": [{"class": "sports ball", "count": 4}], "prompt": "a photo of three sports balls"} 185 | {"tag": "counting", "include": [{"class": "bear", "count": 2}], "exclude": [{"class": "bear", "count": 3}], "prompt": "a photo of two bears"} 186 | {"tag": "counting", "include": [{"class": "tie", "count": 2}], "exclude": [{"class": "tie", "count": 3}], "prompt": "a photo of two ties"} 187 | {"tag": "counting", "include": [{"class": "sink", "count": 4}], "exclude": [{"class": "sink", "count": 5}], "prompt": "a photo of four sinks"} 188 | {"tag": "counting", "include": [{"class": "toothbrush", "count": 2}], "exclude": [{"class": "toothbrush", "count": 3}], "prompt": "a photo of two toothbrushs"} 189 | {"tag": "counting", "include": [{"class": "person", "count": 3}], "exclude": [{"class": "person", "count": 4}], "prompt": "a photo of three persons"} 190 | {"tag": "counting", "include": [{"class": "tennis racket", "count": 3}], "exclude": [{"class": "tennis racket", "count": 4}], "prompt": "a photo of three tennis rackets"} 191 | {"tag": "counting", "include": [{"class": "bowl", "count": 4}], "exclude": [{"class": "bowl", "count": 5}], "prompt": "a photo of four bowls"} 192 | {"tag": "counting", "include": [{"class": "vase", "count": 4}], "exclude": [{"class": "vase", "count": 5}], "prompt": "a photo of four vases"} 193 | {"tag": "counting", "include": [{"class": "cup", "count": 3}], "exclude": [{"class": "cup", "count": 4}], "prompt": "a photo of three cups"} 194 | {"tag": "counting", "include": [{"class": "computer keyboard", "count": 4}], "exclude": [{"class": "computer keyboard", "count": 5}], "prompt": "a photo of four computer keyboards"} 195 | {"tag": "counting", "include": [{"class": "sink", "count": 3}], "exclude": [{"class": "sink", "count": 4}], "prompt": "a photo of three sinks"} 196 | {"tag": "counting", "include": [{"class": "oven", "count": 2}], "exclude": [{"class": "oven", "count": 3}], "prompt": "a photo of two ovens"} 197 | {"tag": "counting", "include": [{"class": "toilet", "count": 2}], "exclude": [{"class": "toilet", "count": 3}], "prompt": "a photo of two toilets"} 198 | {"tag": "counting", "include": [{"class": "bicycle", "count": 2}], "exclude": [{"class": "bicycle", "count": 3}], "prompt": "a photo of two bicycles"} 199 | {"tag": "counting", "include": [{"class": "train", "count": 2}], "exclude": [{"class": "train", "count": 3}], "prompt": "a photo of two trains"} 200 | {"tag": "counting", "include": [{"class": "orange", "count": 3}], "exclude": [{"class": "orange", "count": 4}], "prompt": "a photo of three oranges"} 201 | {"tag": "counting", "include": [{"class": "bus", "count": 3}], "exclude": [{"class": "bus", "count": 4}], "prompt": "a photo of three buses"} 202 | {"tag": "counting", "include": [{"class": "handbag", "count": 3}], "exclude": [{"class": "handbag", "count": 4}], "prompt": "a photo of three handbags"} 203 | {"tag": "counting", "include": [{"class": "snowboard", "count": 3}], "exclude": [{"class": "snowboard", "count": 4}], "prompt": "a photo of three snowboards"} 204 | {"tag": "counting", "include": [{"class": "snowboard", "count": 2}], "exclude": [{"class": "snowboard", "count": 3}], "prompt": "a photo of two snowboards"} 205 | {"tag": "counting", "include": [{"class": "dog", "count": 4}], "exclude": [{"class": "dog", "count": 5}], "prompt": "a photo of four dogs"} 206 | {"tag": "counting", "include": [{"class": "apple", "count": 3}], "exclude": [{"class": "apple", "count": 4}], "prompt": "a photo of three apples"} 207 | {"tag": "counting", "include": [{"class": "sheep", "count": 2}], "exclude": [{"class": "sheep", "count": 3}], "prompt": "a photo of two sheeps"} 208 | {"tag": "counting", "include": [{"class": "hot dog", "count": 3}], "exclude": [{"class": "hot dog", "count": 4}], "prompt": "a photo of three hot dogs"} 209 | {"tag": "counting", "include": [{"class": "zebra", "count": 3}], "exclude": [{"class": "zebra", "count": 4}], "prompt": "a photo of three zebras"} 210 | {"tag": "counting", "include": [{"class": "kite", "count": 3}], "exclude": [{"class": "kite", "count": 4}], "prompt": "a photo of three kites"} 211 | {"tag": "counting", "include": [{"class": "apple", "count": 4}], "exclude": [{"class": "apple", "count": 5}], "prompt": "a photo of four apples"} 212 | {"tag": "counting", "include": [{"class": "cell phone", "count": 3}], "exclude": [{"class": "cell phone", "count": 4}], "prompt": "a photo of three cell phones"} 213 | {"tag": "counting", "include": [{"class": "baseball glove", "count": 4}], "exclude": [{"class": "baseball glove", "count": 5}], "prompt": "a photo of four baseball gloves"} 214 | {"tag": "counting", "include": [{"class": "computer keyboard", "count": 3}], "exclude": [{"class": "computer keyboard", "count": 4}], "prompt": "a photo of three computer keyboards"} 215 | {"tag": "counting", "include": [{"class": "bed", "count": 2}], "exclude": [{"class": "bed", "count": 3}], "prompt": "a photo of two beds"} 216 | {"tag": "counting", "include": [{"class": "tv remote", "count": 2}], "exclude": [{"class": "tv remote", "count": 3}], "prompt": "a photo of two tv remotes"} 217 | {"tag": "counting", "include": [{"class": "fire hydrant", "count": 3}], "exclude": [{"class": "fire hydrant", "count": 4}], "prompt": "a photo of three fire hydrants"} 218 | {"tag": "counting", "include": [{"class": "book", "count": 3}], "exclude": [{"class": "book", "count": 4}], "prompt": "a photo of three books"} 219 | {"tag": "counting", "include": [{"class": "giraffe", "count": 4}], "exclude": [{"class": "giraffe", "count": 5}], "prompt": "a photo of four giraffes"} 220 | {"tag": "counting", "include": [{"class": "vase", "count": 2}], "exclude": [{"class": "vase", "count": 3}], "prompt": "a photo of two vases"} 221 | {"tag": "counting", "include": [{"class": "donut", "count": 4}], "exclude": [{"class": "donut", "count": 5}], "prompt": "a photo of four donuts"} 222 | {"tag": "counting", "include": [{"class": "chair", "count": 4}], "exclude": [{"class": "chair", "count": 5}], "prompt": "a photo of four chairs"} 223 | {"tag": "counting", "include": [{"class": "baseball bat", "count": 3}], "exclude": [{"class": "baseball bat", "count": 4}], "prompt": "a photo of three baseball bats"} 224 | {"tag": "counting", "include": [{"class": "stop sign", "count": 4}], "exclude": [{"class": "stop sign", "count": 5}], "prompt": "a photo of four stop signs"} 225 | {"tag": "counting", "include": [{"class": "pizza", "count": 2}], "exclude": [{"class": "pizza", "count": 3}], "prompt": "a photo of two pizzas"} 226 | {"tag": "counting", "include": [{"class": "refrigerator", "count": 3}], "exclude": [{"class": "refrigerator", "count": 4}], "prompt": "a photo of three refrigerators"} 227 | {"tag": "counting", "include": [{"class": "fire hydrant", "count": 2}], "exclude": [{"class": "fire hydrant", "count": 3}], "prompt": "a photo of two fire hydrants"} 228 | {"tag": "counting", "include": [{"class": "giraffe", "count": 3}], "exclude": [{"class": "giraffe", "count": 4}], "prompt": "a photo of three giraffes"} 229 | {"tag": "counting", "include": [{"class": "tv", "count": 4}], "exclude": [{"class": "tv", "count": 5}], "prompt": "a photo of four tvs"} 230 | {"tag": "counting", "include": [{"class": "wine glass", "count": 3}], "exclude": [{"class": "wine glass", "count": 4}], "prompt": "a photo of three wine glasses"} 231 | {"tag": "counting", "include": [{"class": "broccoli", "count": 4}], "exclude": [{"class": "broccoli", "count": 5}], "prompt": "a photo of four broccolis"} 232 | {"tag": "counting", "include": [{"class": "truck", "count": 3}], "exclude": [{"class": "truck", "count": 4}], "prompt": "a photo of three trucks"} 233 | {"tag": "counting", "include": [{"class": "truck", "count": 2}], "exclude": [{"class": "truck", "count": 3}], "prompt": "a photo of two trucks"} 234 | {"tag": "counting", "include": [{"class": "carrot", "count": 2}], "exclude": [{"class": "carrot", "count": 3}], "prompt": "a photo of two carrots"} 235 | {"tag": "counting", "include": [{"class": "sandwich", "count": 2}], "exclude": [{"class": "sandwich", "count": 3}], "prompt": "a photo of two sandwichs"} 236 | {"tag": "counting", "include": [{"class": "traffic light", "count": 4}], "exclude": [{"class": "traffic light", "count": 5}], "prompt": "a photo of four traffic lights"} 237 | {"tag": "counting", "include": [{"class": "clock", "count": 4}], "exclude": [{"class": "clock", "count": 5}], "prompt": "a photo of four clocks"} 238 | {"tag": "counting", "include": [{"class": "car", "count": 2}], "exclude": [{"class": "car", "count": 3}], "prompt": "a photo of two cars"} 239 | {"tag": "counting", "include": [{"class": "banana", "count": 2}], "exclude": [{"class": "banana", "count": 3}], "prompt": "a photo of two bananas"} 240 | {"tag": "counting", "include": [{"class": "wine glass", "count": 2}], "exclude": [{"class": "wine glass", "count": 3}], "prompt": "a photo of two wine glasses"} 241 | {"tag": "counting", "include": [{"class": "pizza", "count": 3}], "exclude": [{"class": "pizza", "count": 4}], "prompt": "a photo of three pizzas"} 242 | {"tag": "counting", "include": [{"class": "knife", "count": 4}], "exclude": [{"class": "knife", "count": 5}], "prompt": "a photo of four knifes"} 243 | {"tag": "counting", "include": [{"class": "suitcase", "count": 3}], "exclude": [{"class": "suitcase", "count": 4}], "prompt": "a photo of three suitcases"} 244 | {"tag": "counting", "include": [{"class": "zebra", "count": 4}], "exclude": [{"class": "zebra", "count": 5}], "prompt": "a photo of four zebras"} 245 | {"tag": "counting", "include": [{"class": "teddy bear", "count": 2}], "exclude": [{"class": "teddy bear", "count": 3}], "prompt": "a photo of two teddy bears"} 246 | {"tag": "counting", "include": [{"class": "skateboard", "count": 4}], "exclude": [{"class": "skateboard", "count": 5}], "prompt": "a photo of four skateboards"} 247 | {"tag": "counting", "include": [{"class": "hot dog", "count": 4}], "exclude": [{"class": "hot dog", "count": 5}], "prompt": "a photo of four hot dogs"} 248 | {"tag": "counting", "include": [{"class": "bird", "count": 3}], "exclude": [{"class": "bird", "count": 4}], "prompt": "a photo of three birds"} 249 | {"tag": "counting", "include": [{"class": "boat", "count": 4}], "exclude": [{"class": "boat", "count": 5}], "prompt": "a photo of four boats"} 250 | {"tag": "counting", "include": [{"class": "microwave", "count": 4}], "exclude": [{"class": "microwave", "count": 5}], "prompt": "a photo of four microwaves"} 251 | {"tag": "counting", "include": [{"class": "hair drier", "count": 2}], "exclude": [{"class": "hair drier", "count": 3}], "prompt": "a photo of two hair driers"} 252 | {"tag": "counting", "include": [{"class": "laptop", "count": 3}], "exclude": [{"class": "laptop", "count": 4}], "prompt": "a photo of three laptops"} 253 | {"tag": "counting", "include": [{"class": "cow", "count": 3}], "exclude": [{"class": "cow", "count": 4}], "prompt": "a photo of three cows"} 254 | {"tag": "counting", "include": [{"class": "parking meter", "count": 2}], "exclude": [{"class": "parking meter", "count": 3}], "prompt": "a photo of two parking meters"} 255 | {"tag": "counting", "include": [{"class": "bench", "count": 4}], "exclude": [{"class": "bench", "count": 5}], "prompt": "a photo of four benchs"} 256 | {"tag": "counting", "include": [{"class": "bench", "count": 3}], "exclude": [{"class": "bench", "count": 4}], "prompt": "a photo of three benchs"} 257 | {"tag": "counting", "include": [{"class": "frisbee", "count": 4}], "exclude": [{"class": "frisbee", "count": 5}], "prompt": "a photo of four frisbees"} 258 | {"tag": "counting", "include": [{"class": "book", "count": 4}], "exclude": [{"class": "book", "count": 5}], "prompt": "a photo of four books"} 259 | {"tag": "counting", "include": [{"class": "bus", "count": 4}], "exclude": [{"class": "bus", "count": 5}], "prompt": "a photo of four buses"} 260 | {"tag": "colors", "include": [{"class": "fire hydrant", "count": 1, "color": "blue"}], "prompt": "a photo of a blue fire hydrant"} 261 | {"tag": "colors", "include": [{"class": "car", "count": 1, "color": "pink"}], "prompt": "a photo of a pink car"} 262 | {"tag": "colors", "include": [{"class": "cup", "count": 1, "color": "purple"}], "prompt": "a photo of a purple cup"} 263 | {"tag": "colors", "include": [{"class": "cow", "count": 1, "color": "blue"}], "prompt": "a photo of a blue cow"} 264 | {"tag": "colors", "include": [{"class": "boat", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow boat"} 265 | {"tag": "colors", "include": [{"class": "umbrella", "count": 1, "color": "blue"}], "prompt": "a photo of a blue umbrella"} 266 | {"tag": "colors", "include": [{"class": "elephant", "count": 1, "color": "blue"}], "prompt": "a photo of a blue elephant"} 267 | {"tag": "colors", "include": [{"class": "elephant", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow elephant"} 268 | {"tag": "colors", "include": [{"class": "bicycle", "count": 1, "color": "red"}], "prompt": "a photo of a red bicycle"} 269 | {"tag": "colors", "include": [{"class": "suitcase", "count": 1, "color": "purple"}], "prompt": "a photo of a purple suitcase"} 270 | {"tag": "colors", "include": [{"class": "hair drier", "count": 1, "color": "purple"}], "prompt": "a photo of a purple hair drier"} 271 | {"tag": "colors", "include": [{"class": "sandwich", "count": 1, "color": "white"}], "prompt": "a photo of a white sandwich"} 272 | {"tag": "colors", "include": [{"class": "elephant", "count": 1, "color": "purple"}], "prompt": "a photo of a purple elephant"} 273 | {"tag": "colors", "include": [{"class": "microwave", "count": 1, "color": "green"}], "prompt": "a photo of a green microwave"} 274 | {"tag": "colors", "include": [{"class": "zebra", "count": 1, "color": "red"}], "prompt": "a photo of a red zebra"} 275 | {"tag": "colors", "include": [{"class": "apple", "count": 1, "color": "red"}], "prompt": "a photo of a red apple"} 276 | {"tag": "colors", "include": [{"class": "tv remote", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow tv remote"} 277 | {"tag": "colors", "include": [{"class": "toilet", "count": 1, "color": "blue"}], "prompt": "a photo of a blue toilet"} 278 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "orange"}], "prompt": "a photo of an orange orange"} 279 | {"tag": "colors", "include": [{"class": "donut", "count": 1, "color": "black"}], "prompt": "a photo of a black donut"} 280 | {"tag": "colors", "include": [{"class": "vase", "count": 1, "color": "red"}], "prompt": "a photo of a red vase"} 281 | {"tag": "colors", "include": [{"class": "pizza", "count": 1, "color": "purple"}], "prompt": "a photo of a purple pizza"} 282 | {"tag": "colors", "include": [{"class": "skateboard", "count": 1, "color": "pink"}], "prompt": "a photo of a pink skateboard"} 283 | {"tag": "colors", "include": [{"class": "skateboard", "count": 1, "color": "green"}], "prompt": "a photo of a green skateboard"} 284 | {"tag": "colors", "include": [{"class": "bear", "count": 1, "color": "purple"}], "prompt": "a photo of a purple bear"} 285 | {"tag": "colors", "include": [{"class": "chair", "count": 1, "color": "brown"}], "prompt": "a photo of a brown chair"} 286 | {"tag": "colors", "include": [{"class": "computer keyboard", "count": 1, "color": "brown"}], "prompt": "a photo of a brown computer keyboard"} 287 | {"tag": "colors", "include": [{"class": "cow", "count": 1, "color": "orange"}], "prompt": "a photo of an orange cow"} 288 | {"tag": "colors", "include": [{"class": "skis", "count": 1, "color": "brown"}], "prompt": "a photo of a brown skis"} 289 | {"tag": "colors", "include": [{"class": "kite", "count": 1, "color": "white"}], "prompt": "a photo of a white kite"} 290 | {"tag": "colors", "include": [{"class": "dog", "count": 1, "color": "red"}], "prompt": "a photo of a red dog"} 291 | {"tag": "colors", "include": [{"class": "couch", "count": 1, "color": "green"}], "prompt": "a photo of a green couch"} 292 | {"tag": "colors", "include": [{"class": "airplane", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow airplane"} 293 | {"tag": "colors", "include": [{"class": "tv", "count": 1, "color": "orange"}], "prompt": "a photo of an orange tv"} 294 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "white"}], "prompt": "a photo of a white scissors"} 295 | {"tag": "colors", "include": [{"class": "cell phone", "count": 1, "color": "pink"}], "prompt": "a photo of a pink cell phone"} 296 | {"tag": "colors", "include": [{"class": "surfboard", "count": 1, "color": "green"}], "prompt": "a photo of a green surfboard"} 297 | {"tag": "colors", "include": [{"class": "fire hydrant", "count": 1, "color": "white"}], "prompt": "a photo of a white fire hydrant"} 298 | {"tag": "colors", "include": [{"class": "bicycle", "count": 1, "color": "black"}], "prompt": "a photo of a black bicycle"} 299 | {"tag": "colors", "include": [{"class": "carrot", "count": 1, "color": "purple"}], "prompt": "a photo of a purple carrot"} 300 | {"tag": "colors", "include": [{"class": "dining table", "count": 1, "color": "black"}], "prompt": "a photo of a black dining table"} 301 | {"tag": "colors", "include": [{"class": "potted plant", "count": 1, "color": "purple"}], "prompt": "a photo of a purple potted plant"} 302 | {"tag": "colors", "include": [{"class": "backpack", "count": 1, "color": "purple"}], "prompt": "a photo of a purple backpack"} 303 | {"tag": "colors", "include": [{"class": "train", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow train"} 304 | {"tag": "colors", "include": [{"class": "potted plant", "count": 1, "color": "pink"}], "prompt": "a photo of a pink potted plant"} 305 | {"tag": "colors", "include": [{"class": "giraffe", "count": 1, "color": "red"}], "prompt": "a photo of a red giraffe"} 306 | {"tag": "colors", "include": [{"class": "bear", "count": 1, "color": "brown"}], "prompt": "a photo of a brown bear"} 307 | {"tag": "colors", "include": [{"class": "train", "count": 1, "color": "black"}], "prompt": "a photo of a black train"} 308 | {"tag": "colors", "include": [{"class": "laptop", "count": 1, "color": "orange"}], "prompt": "a photo of an orange laptop"} 309 | {"tag": "colors", "include": [{"class": "hot dog", "count": 1, "color": "green"}], "prompt": "a photo of a green hot dog"} 310 | {"tag": "colors", "include": [{"class": "parking meter", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow parking meter"} 311 | {"tag": "colors", "include": [{"class": "potted plant", "count": 1, "color": "red"}], "prompt": "a photo of a red potted plant"} 312 | {"tag": "colors", "include": [{"class": "traffic light", "count": 1, "color": "green"}], "prompt": "a photo of a green traffic light"} 313 | {"tag": "colors", "include": [{"class": "tv", "count": 1, "color": "blue"}], "prompt": "a photo of a blue tv"} 314 | {"tag": "colors", "include": [{"class": "refrigerator", "count": 1, "color": "brown"}], "prompt": "a photo of a brown refrigerator"} 315 | {"tag": "colors", "include": [{"class": "tv remote", "count": 1, "color": "black"}], "prompt": "a photo of a black tv remote"} 316 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "purple"}], "prompt": "a photo of a purple scissors"} 317 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow orange"} 318 | {"tag": "colors", "include": [{"class": "toaster", "count": 1, "color": "brown"}], "prompt": "a photo of a brown toaster"} 319 | {"tag": "colors", "include": [{"class": "parking meter", "count": 1, "color": "red"}], "prompt": "a photo of a red parking meter"} 320 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "brown"}], "prompt": "a photo of a brown orange"} 321 | {"tag": "colors", "include": [{"class": "clock", "count": 1, "color": "green"}], "prompt": "a photo of a green clock"} 322 | {"tag": "colors", "include": [{"class": "sheep", "count": 1, "color": "white"}], "prompt": "a photo of a white sheep"} 323 | {"tag": "colors", "include": [{"class": "oven", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow oven"} 324 | {"tag": "colors", "include": [{"class": "vase", "count": 1, "color": "green"}], "prompt": "a photo of a green vase"} 325 | {"tag": "colors", "include": [{"class": "teddy bear", "count": 1, "color": "black"}], "prompt": "a photo of a black teddy bear"} 326 | {"tag": "colors", "include": [{"class": "carrot", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow carrot"} 327 | {"tag": "colors", "include": [{"class": "hot dog", "count": 1, "color": "black"}], "prompt": "a photo of a black hot dog"} 328 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "red"}], "prompt": "a photo of a red scissors"} 329 | {"tag": "colors", "include": [{"class": "teddy bear", "count": 1, "color": "white"}], "prompt": "a photo of a white teddy bear"} 330 | {"tag": "colors", "include": [{"class": "skis", "count": 1, "color": "black"}], "prompt": "a photo of a black skis"} 331 | {"tag": "colors", "include": [{"class": "dining table", "count": 1, "color": "blue"}], "prompt": "a photo of a blue dining table"} 332 | {"tag": "colors", "include": [{"class": "refrigerator", "count": 1, "color": "black"}], "prompt": "a photo of a black refrigerator"} 333 | {"tag": "colors", "include": [{"class": "dog", "count": 1, "color": "white"}], "prompt": "a photo of a white dog"} 334 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "orange"}], "prompt": "a photo of an orange scissors"} 335 | {"tag": "colors", "include": [{"class": "cell phone", "count": 1, "color": "red"}], "prompt": "a photo of a red cell phone"} 336 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "white"}], "prompt": "a photo of a white orange"} 337 | {"tag": "colors", "include": [{"class": "clock", "count": 1, "color": "blue"}], "prompt": "a photo of a blue clock"} 338 | {"tag": "colors", "include": [{"class": "carrot", "count": 1, "color": "blue"}], "prompt": "a photo of a blue carrot"} 339 | {"tag": "colors", "include": [{"class": "motorcycle", "count": 1, "color": "green"}], "prompt": "a photo of a green motorcycle"} 340 | {"tag": "colors", "include": [{"class": "stop sign", "count": 1, "color": "pink"}], "prompt": "a photo of a pink stop sign"} 341 | {"tag": "colors", "include": [{"class": "vase", "count": 1, "color": "black"}], "prompt": "a photo of a black vase"} 342 | {"tag": "colors", "include": [{"class": "backpack", "count": 1, "color": "black"}], "prompt": "a photo of a black backpack"} 343 | {"tag": "colors", "include": [{"class": "car", "count": 1, "color": "red"}], "prompt": "a photo of a red car"} 344 | {"tag": "colors", "include": [{"class": "computer mouse", "count": 1, "color": "green"}], "prompt": "a photo of a green computer mouse"} 345 | {"tag": "colors", "include": [{"class": "backpack", "count": 1, "color": "red"}], "prompt": "a photo of a red backpack"} 346 | {"tag": "colors", "include": [{"class": "bus", "count": 1, "color": "green"}], "prompt": "a photo of a green bus"} 347 | {"tag": "colors", "include": [{"class": "toaster", "count": 1, "color": "orange"}], "prompt": "a photo of an orange toaster"} 348 | {"tag": "colors", "include": [{"class": "fork", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow fork"} 349 | {"tag": "colors", "include": [{"class": "parking meter", "count": 1, "color": "pink"}], "prompt": "a photo of a pink parking meter"} 350 | {"tag": "colors", "include": [{"class": "book", "count": 1, "color": "blue"}], "prompt": "a photo of a blue book"} 351 | {"tag": "colors", "include": [{"class": "broccoli", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow broccoli"} 352 | {"tag": "colors", "include": [{"class": "computer mouse", "count": 1, "color": "orange"}], "prompt": "a photo of an orange computer mouse"} 353 | {"tag": "colors", "include": [{"class": "cake", "count": 1, "color": "red"}], "prompt": "a photo of a red cake"} 354 | {"tag": "position", "include": [{"class": "teddy bear", "count": 1}, {"class": "dog", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a dog right of a teddy bear"} 355 | {"tag": "position", "include": [{"class": "kite", "count": 1}, {"class": "wine glass", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a wine glass above a kite"} 356 | {"tag": "position", "include": [{"class": "cup", "count": 1}, {"class": "couch", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a couch below a cup"} 357 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "laptop", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a laptop left of a cow"} 358 | {"tag": "position", "include": [{"class": "hair drier", "count": 1}, {"class": "fork", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a fork above a hair drier"} 359 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "tie", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a tie right of a baseball bat"} 360 | {"tag": "position", "include": [{"class": "fork", "count": 1}, {"class": "stop sign", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a stop sign above a fork"} 361 | {"tag": "position", "include": [{"class": "skateboard", "count": 1}, {"class": "bird", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a bird below a skateboard"} 362 | {"tag": "position", "include": [{"class": "tv", "count": 1}, {"class": "apple", "count": 1, "position": ["above", 0]}], "prompt": "a photo of an apple above a tv"} 363 | {"tag": "position", "include": [{"class": "potted plant", "count": 1}, {"class": "train", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a train above a potted plant"} 364 | {"tag": "position", "include": [{"class": "refrigerator", "count": 1}, {"class": "truck", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a truck left of a refrigerator"} 365 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "tv remote", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a tv remote below a cow"} 366 | {"tag": "position", "include": [{"class": "train", "count": 1}, {"class": "bottle", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a bottle right of a train"} 367 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "dog", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a dog above a cow"} 368 | {"tag": "position", "include": [{"class": "person", "count": 1}, {"class": "skateboard", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a skateboard above a person"} 369 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "baseball glove", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a baseball glove below an umbrella"} 370 | {"tag": "position", "include": [{"class": "oven", "count": 1}, {"class": "dining table", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a dining table right of an oven"} 371 | {"tag": "position", "include": [{"class": "suitcase", "count": 1}, {"class": "hot dog", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a hot dog left of a suitcase"} 372 | {"tag": "position", "include": [{"class": "toothbrush", "count": 1}, {"class": "bus", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a bus below a toothbrush"} 373 | {"tag": "position", "include": [{"class": "sandwich", "count": 1}, {"class": "backpack", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a backpack right of a sandwich"} 374 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "cake", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cake below a baseball bat"} 375 | {"tag": "position", "include": [{"class": "tie", "count": 1}, {"class": "dog", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a dog right of a tie"} 376 | {"tag": "position", "include": [{"class": "boat", "count": 1}, {"class": "suitcase", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a suitcase right of a boat"} 377 | {"tag": "position", "include": [{"class": "clock", "count": 1}, {"class": "bear", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bear above a clock"} 378 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "tv remote", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a tv remote left of an umbrella"} 379 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "sports ball", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a sports ball left of an umbrella"} 380 | {"tag": "position", "include": [{"class": "dining table", "count": 1}, {"class": "train", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a train right of a dining table"} 381 | {"tag": "position", "include": [{"class": "elephant", "count": 1}, {"class": "hair drier", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a hair drier below an elephant"} 382 | {"tag": "position", "include": [{"class": "spoon", "count": 1}, {"class": "tennis racket", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a tennis racket right of a spoon"} 383 | {"tag": "position", "include": [{"class": "hot dog", "count": 1}, {"class": "wine glass", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a wine glass right of a hot dog"} 384 | {"tag": "position", "include": [{"class": "bench", "count": 1}, {"class": "computer mouse", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a computer mouse left of a bench"} 385 | {"tag": "position", "include": [{"class": "orange", "count": 1}, {"class": "carrot", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a carrot left of an orange"} 386 | {"tag": "position", "include": [{"class": "toothbrush", "count": 1}, {"class": "kite", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a kite above a toothbrush"} 387 | {"tag": "position", "include": [{"class": "traffic light", "count": 1}, {"class": "toaster", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a toaster below a traffic light"} 388 | {"tag": "position", "include": [{"class": "baseball glove", "count": 1}, {"class": "cat", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cat below a baseball glove"} 389 | {"tag": "position", "include": [{"class": "zebra", "count": 1}, {"class": "skis", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a skis right of a zebra"} 390 | {"tag": "position", "include": [{"class": "chair", "count": 1}, {"class": "stop sign", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a stop sign above a chair"} 391 | {"tag": "position", "include": [{"class": "parking meter", "count": 1}, {"class": "stop sign", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a stop sign above a parking meter"} 392 | {"tag": "position", "include": [{"class": "skateboard", "count": 1}, {"class": "hot dog", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a hot dog right of a skateboard"} 393 | {"tag": "position", "include": [{"class": "computer keyboard", "count": 1}, {"class": "pizza", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a pizza below a computer keyboard"} 394 | {"tag": "position", "include": [{"class": "toilet", "count": 1}, {"class": "hair drier", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a hair drier left of a toilet"} 395 | {"tag": "position", "include": [{"class": "stop sign", "count": 1}, {"class": "cow", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a cow left of a stop sign"} 396 | {"tag": "position", "include": [{"class": "skis", "count": 1}, {"class": "suitcase", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a suitcase above a skis"} 397 | {"tag": "position", "include": [{"class": "laptop", "count": 1}, {"class": "book", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a book above a laptop"} 398 | {"tag": "position", "include": [{"class": "pizza", "count": 1}, {"class": "toothbrush", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a toothbrush below a pizza"} 399 | {"tag": "position", "include": [{"class": "kite", "count": 1}, {"class": "toilet", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a toilet left of a kite"} 400 | {"tag": "position", "include": [{"class": "sink", "count": 1}, {"class": "tie", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a tie above a sink"} 401 | {"tag": "position", "include": [{"class": "couch", "count": 1}, {"class": "bird", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a bird left of a couch"} 402 | {"tag": "position", "include": [{"class": "sports ball", "count": 1}, {"class": "bed", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a bed right of a sports ball"} 403 | {"tag": "position", "include": [{"class": "surfboard", "count": 1}, {"class": "elephant", "count": 1, "position": ["below", 0]}], "prompt": "a photo of an elephant below a surfboard"} 404 | {"tag": "position", "include": [{"class": "motorcycle", "count": 1}, {"class": "frisbee", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a frisbee right of a motorcycle"} 405 | {"tag": "position", "include": [{"class": "fire hydrant", "count": 1}, {"class": "vase", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a vase above a fire hydrant"} 406 | {"tag": "position", "include": [{"class": "elephant", "count": 1}, {"class": "zebra", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a zebra left of an elephant"} 407 | {"tag": "position", "include": [{"class": "bear", "count": 1}, {"class": "bench", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a bench left of a bear"} 408 | {"tag": "position", "include": [{"class": "bench", "count": 1}, {"class": "donut", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a donut right of a bench"} 409 | {"tag": "position", "include": [{"class": "horse", "count": 1}, {"class": "frisbee", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a frisbee below a horse"} 410 | {"tag": "position", "include": [{"class": "snowboard", "count": 1}, {"class": "computer keyboard", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a computer keyboard above a snowboard"} 411 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "tv", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a tv below a cow"} 412 | {"tag": "position", "include": [{"class": "horse", "count": 1}, {"class": "elephant", "count": 1, "position": ["below", 0]}], "prompt": "a photo of an elephant below a horse"} 413 | {"tag": "position", "include": [{"class": "banana", "count": 1}, {"class": "suitcase", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a suitcase left of a banana"} 414 | {"tag": "position", "include": [{"class": "airplane", "count": 1}, {"class": "train", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a train below an airplane"} 415 | {"tag": "position", "include": [{"class": "backpack", "count": 1}, {"class": "cat", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cat below a backpack"} 416 | {"tag": "position", "include": [{"class": "cake", "count": 1}, {"class": "backpack", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a backpack below a cake"} 417 | {"tag": "position", "include": [{"class": "knife", "count": 1}, {"class": "sandwich", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a sandwich below a knife"} 418 | {"tag": "position", "include": [{"class": "parking meter", "count": 1}, {"class": "bicycle", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bicycle above a parking meter"} 419 | {"tag": "position", "include": [{"class": "suitcase", "count": 1}, {"class": "knife", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a knife right of a suitcase"} 420 | {"tag": "position", "include": [{"class": "knife", "count": 1}, {"class": "hot dog", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a hot dog above a knife"} 421 | {"tag": "position", "include": [{"class": "parking meter", "count": 1}, {"class": "zebra", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a zebra right of a parking meter"} 422 | {"tag": "position", "include": [{"class": "zebra", "count": 1}, {"class": "chair", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a chair left of a zebra"} 423 | {"tag": "position", "include": [{"class": "airplane", "count": 1}, {"class": "cow", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cow below an airplane"} 424 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "cup", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a cup left of an umbrella"} 425 | {"tag": "position", "include": [{"class": "computer keyboard", "count": 1}, {"class": "zebra", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a zebra below a computer keyboard"} 426 | {"tag": "position", "include": [{"class": "broccoli", "count": 1}, {"class": "zebra", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a zebra below a broccoli"} 427 | {"tag": "position", "include": [{"class": "sports ball", "count": 1}, {"class": "laptop", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a laptop below a sports ball"} 428 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "truck", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a truck left of a baseball bat"} 429 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "refrigerator", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a refrigerator above a baseball bat"} 430 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "tv", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a tv above a baseball bat"} 431 | {"tag": "position", "include": [{"class": "bear", "count": 1}, {"class": "baseball glove", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a baseball glove right of a bear"} 432 | {"tag": "position", "include": [{"class": "scissors", "count": 1}, {"class": "refrigerator", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a refrigerator below a scissors"} 433 | {"tag": "position", "include": [{"class": "suitcase", "count": 1}, {"class": "dining table", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a dining table above a suitcase"} 434 | {"tag": "position", "include": [{"class": "broccoli", "count": 1}, {"class": "parking meter", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a parking meter above a broccoli"} 435 | {"tag": "position", "include": [{"class": "truck", "count": 1}, {"class": "frisbee", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a frisbee above a truck"} 436 | {"tag": "position", "include": [{"class": "banana", "count": 1}, {"class": "pizza", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a pizza right of a banana"} 437 | {"tag": "position", "include": [{"class": "boat", "count": 1}, {"class": "bus", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bus above a boat"} 438 | {"tag": "position", "include": [{"class": "tennis racket", "count": 1}, {"class": "cell phone", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a cell phone left of a tennis racket"} 439 | {"tag": "position", "include": [{"class": "broccoli", "count": 1}, {"class": "horse", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a horse right of a broccoli"} 440 | {"tag": "position", "include": [{"class": "bottle", "count": 1}, {"class": "broccoli", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a broccoli above a bottle"} 441 | {"tag": "position", "include": [{"class": "horse", "count": 1}, {"class": "vase", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a vase right of a horse"} 442 | {"tag": "position", "include": [{"class": "spoon", "count": 1}, {"class": "bear", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bear above a spoon"} 443 | {"tag": "position", "include": [{"class": "bed", "count": 1}, {"class": "zebra", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a zebra right of a bed"} 444 | {"tag": "position", "include": [{"class": "laptop", "count": 1}, {"class": "cow", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a cow right of a laptop"} 445 | {"tag": "position", "include": [{"class": "frisbee", "count": 1}, {"class": "bed", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a bed right of a frisbee"} 446 | {"tag": "position", "include": [{"class": "motorcycle", "count": 1}, {"class": "tie", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a tie right of a motorcycle"} 447 | {"tag": "position", "include": [{"class": "tv", "count": 1}, {"class": "laptop", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a laptop right of a tv"} 448 | {"tag": "position", "include": [{"class": "chair", "count": 1}, {"class": "cell phone", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a cell phone right of a chair"} 449 | {"tag": "position", "include": [{"class": "potted plant", "count": 1}, {"class": "couch", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a couch below a potted plant"} 450 | {"tag": "position", "include": [{"class": "tv", "count": 1}, {"class": "clock", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a clock below a tv"} 451 | {"tag": "position", "include": [{"class": "vase", "count": 1}, {"class": "couch", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a couch below a vase"} 452 | {"tag": "position", "include": [{"class": "cat", "count": 1}, {"class": "donut", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a donut below a cat"} 453 | {"tag": "position", "include": [{"class": "toaster", "count": 1}, {"class": "couch", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a couch left of a toaster"} 454 | {"tag": "color_attr", "include": [{"class": "wine glass", "count": 1, "color": "purple"}, {"class": "apple", "count": 1, "color": "black"}], "prompt": "a photo of a purple wine glass and a black apple"} 455 | {"tag": "color_attr", "include": [{"class": "bus", "count": 1, "color": "green"}, {"class": "microwave", "count": 1, "color": "purple"}], "prompt": "a photo of a green bus and a purple microwave"} 456 | {"tag": "color_attr", "include": [{"class": "skis", "count": 1, "color": "green"}, {"class": "airplane", "count": 1, "color": "brown"}], "prompt": "a photo of a green skis and a brown airplane"} 457 | {"tag": "color_attr", "include": [{"class": "computer keyboard", "count": 1, "color": "yellow"}, {"class": "sink", "count": 1, "color": "black"}], "prompt": "a photo of a yellow computer keyboard and a black sink"} 458 | {"tag": "color_attr", "include": [{"class": "oven", "count": 1, "color": "pink"}, {"class": "motorcycle", "count": 1, "color": "green"}], "prompt": "a photo of a pink oven and a green motorcycle"} 459 | {"tag": "color_attr", "include": [{"class": "parking meter", "count": 1, "color": "purple"}, {"class": "laptop", "count": 1, "color": "red"}], "prompt": "a photo of a purple parking meter and a red laptop"} 460 | {"tag": "color_attr", "include": [{"class": "skateboard", "count": 1, "color": "yellow"}, {"class": "computer mouse", "count": 1, "color": "orange"}], "prompt": "a photo of a yellow skateboard and an orange computer mouse"} 461 | {"tag": "color_attr", "include": [{"class": "skis", "count": 1, "color": "red"}, {"class": "tie", "count": 1, "color": "brown"}], "prompt": "a photo of a red skis and a brown tie"} 462 | {"tag": "color_attr", "include": [{"class": "skateboard", "count": 1, "color": "pink"}, {"class": "train", "count": 1, "color": "black"}], "prompt": "a photo of a pink skateboard and a black train"} 463 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "white"}, {"class": "bed", "count": 1, "color": "purple"}], "prompt": "a photo of a white handbag and a purple bed"} 464 | {"tag": "color_attr", "include": [{"class": "elephant", "count": 1, "color": "purple"}, {"class": "sports ball", "count": 1, "color": "brown"}], "prompt": "a photo of a purple elephant and a brown sports ball"} 465 | {"tag": "color_attr", "include": [{"class": "dog", "count": 1, "color": "purple"}, {"class": "dining table", "count": 1, "color": "black"}], "prompt": "a photo of a purple dog and a black dining table"} 466 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "white"}, {"class": "car", "count": 1, "color": "red"}], "prompt": "a photo of a white dining table and a red car"} 467 | {"tag": "color_attr", "include": [{"class": "cell phone", "count": 1, "color": "blue"}, {"class": "apple", "count": 1, "color": "green"}], "prompt": "a photo of a blue cell phone and a green apple"} 468 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "red"}, {"class": "potted plant", "count": 1, "color": "orange"}], "prompt": "a photo of a red car and an orange potted plant"} 469 | {"tag": "color_attr", "include": [{"class": "carrot", "count": 1, "color": "brown"}, {"class": "potted plant", "count": 1, "color": "white"}], "prompt": "a photo of a brown carrot and a white potted plant"} 470 | {"tag": "color_attr", "include": [{"class": "kite", "count": 1, "color": "black"}, {"class": "bear", "count": 1, "color": "green"}], "prompt": "a photo of a black kite and a green bear"} 471 | {"tag": "color_attr", "include": [{"class": "laptop", "count": 1, "color": "blue"}, {"class": "bear", "count": 1, "color": "brown"}], "prompt": "a photo of a blue laptop and a brown bear"} 472 | {"tag": "color_attr", "include": [{"class": "teddy bear", "count": 1, "color": "green"}, {"class": "kite", "count": 1, "color": "brown"}], "prompt": "a photo of a green teddy bear and a brown kite"} 473 | {"tag": "color_attr", "include": [{"class": "stop sign", "count": 1, "color": "yellow"}, {"class": "potted plant", "count": 1, "color": "blue"}], "prompt": "a photo of a yellow stop sign and a blue potted plant"} 474 | {"tag": "color_attr", "include": [{"class": "snowboard", "count": 1, "color": "orange"}, {"class": "cat", "count": 1, "color": "green"}], "prompt": "a photo of an orange snowboard and a green cat"} 475 | {"tag": "color_attr", "include": [{"class": "truck", "count": 1, "color": "orange"}, {"class": "sink", "count": 1, "color": "pink"}], "prompt": "a photo of an orange truck and a pink sink"} 476 | {"tag": "color_attr", "include": [{"class": "hot dog", "count": 1, "color": "brown"}, {"class": "pizza", "count": 1, "color": "purple"}], "prompt": "a photo of a brown hot dog and a purple pizza"} 477 | {"tag": "color_attr", "include": [{"class": "couch", "count": 1, "color": "green"}, {"class": "umbrella", "count": 1, "color": "orange"}], "prompt": "a photo of a green couch and an orange umbrella"} 478 | {"tag": "color_attr", "include": [{"class": "bed", "count": 1, "color": "brown"}, {"class": "cell phone", "count": 1, "color": "pink"}], "prompt": "a photo of a brown bed and a pink cell phone"} 479 | {"tag": "color_attr", "include": [{"class": "broccoli", "count": 1, "color": "black"}, {"class": "cake", "count": 1, "color": "yellow"}], "prompt": "a photo of a black broccoli and a yellow cake"} 480 | {"tag": "color_attr", "include": [{"class": "train", "count": 1, "color": "red"}, {"class": "bear", "count": 1, "color": "purple"}], "prompt": "a photo of a red train and a purple bear"} 481 | {"tag": "color_attr", "include": [{"class": "tennis racket", "count": 1, "color": "purple"}, {"class": "sink", "count": 1, "color": "black"}], "prompt": "a photo of a purple tennis racket and a black sink"} 482 | {"tag": "color_attr", "include": [{"class": "vase", "count": 1, "color": "blue"}, {"class": "banana", "count": 1, "color": "black"}], "prompt": "a photo of a blue vase and a black banana"} 483 | {"tag": "color_attr", "include": [{"class": "clock", "count": 1, "color": "blue"}, {"class": "cup", "count": 1, "color": "white"}], "prompt": "a photo of a blue clock and a white cup"} 484 | {"tag": "color_attr", "include": [{"class": "umbrella", "count": 1, "color": "red"}, {"class": "couch", "count": 1, "color": "blue"}], "prompt": "a photo of a red umbrella and a blue couch"} 485 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "white"}, {"class": "giraffe", "count": 1, "color": "red"}], "prompt": "a photo of a white handbag and a red giraffe"} 486 | {"tag": "color_attr", "include": [{"class": "tv remote", "count": 1, "color": "pink"}, {"class": "airplane", "count": 1, "color": "blue"}], "prompt": "a photo of a pink tv remote and a blue airplane"} 487 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "pink"}, {"class": "scissors", "count": 1, "color": "black"}], "prompt": "a photo of a pink handbag and a black scissors"} 488 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "brown"}, {"class": "hair drier", "count": 1, "color": "pink"}], "prompt": "a photo of a brown car and a pink hair drier"} 489 | {"tag": "color_attr", "include": [{"class": "bus", "count": 1, "color": "black"}, {"class": "cell phone", "count": 1, "color": "brown"}], "prompt": "a photo of a black bus and a brown cell phone"} 490 | {"tag": "color_attr", "include": [{"class": "sheep", "count": 1, "color": "purple"}, {"class": "banana", "count": 1, "color": "pink"}], "prompt": "a photo of a purple sheep and a pink banana"} 491 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "blue"}, {"class": "cell phone", "count": 1, "color": "white"}], "prompt": "a photo of a blue handbag and a white cell phone"} 492 | {"tag": "color_attr", "include": [{"class": "pizza", "count": 1, "color": "white"}, {"class": "umbrella", "count": 1, "color": "green"}], "prompt": "a photo of a white pizza and a green umbrella"} 493 | {"tag": "color_attr", "include": [{"class": "tie", "count": 1, "color": "white"}, {"class": "skateboard", "count": 1, "color": "purple"}], "prompt": "a photo of a white tie and a purple skateboard"} 494 | {"tag": "color_attr", "include": [{"class": "sports ball", "count": 1, "color": "yellow"}, {"class": "boat", "count": 1, "color": "green"}], "prompt": "a photo of a yellow sports ball and a green boat"} 495 | {"tag": "color_attr", "include": [{"class": "wine glass", "count": 1, "color": "white"}, {"class": "giraffe", "count": 1, "color": "brown"}], "prompt": "a photo of a white wine glass and a brown giraffe"} 496 | {"tag": "color_attr", "include": [{"class": "bowl", "count": 1, "color": "yellow"}, {"class": "baseball glove", "count": 1, "color": "white"}], "prompt": "a photo of a yellow bowl and a white baseball glove"} 497 | {"tag": "color_attr", "include": [{"class": "microwave", "count": 1, "color": "orange"}, {"class": "spoon", "count": 1, "color": "black"}], "prompt": "a photo of an orange microwave and a black spoon"} 498 | {"tag": "color_attr", "include": [{"class": "skateboard", "count": 1, "color": "orange"}, {"class": "bowl", "count": 1, "color": "pink"}], "prompt": "a photo of an orange skateboard and a pink bowl"} 499 | {"tag": "color_attr", "include": [{"class": "toilet", "count": 1, "color": "blue"}, {"class": "suitcase", "count": 1, "color": "white"}], "prompt": "a photo of a blue toilet and a white suitcase"} 500 | {"tag": "color_attr", "include": [{"class": "boat", "count": 1, "color": "white"}, {"class": "hot dog", "count": 1, "color": "orange"}], "prompt": "a photo of a white boat and an orange hot dog"} 501 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "yellow"}, {"class": "dog", "count": 1, "color": "pink"}], "prompt": "a photo of a yellow dining table and a pink dog"} 502 | {"tag": "color_attr", "include": [{"class": "cake", "count": 1, "color": "red"}, {"class": "chair", "count": 1, "color": "purple"}], "prompt": "a photo of a red cake and a purple chair"} 503 | {"tag": "color_attr", "include": [{"class": "tie", "count": 1, "color": "blue"}, {"class": "dining table", "count": 1, "color": "pink"}], "prompt": "a photo of a blue tie and a pink dining table"} 504 | {"tag": "color_attr", "include": [{"class": "cow", "count": 1, "color": "blue"}, {"class": "computer keyboard", "count": 1, "color": "black"}], "prompt": "a photo of a blue cow and a black computer keyboard"} 505 | {"tag": "color_attr", "include": [{"class": "pizza", "count": 1, "color": "yellow"}, {"class": "oven", "count": 1, "color": "green"}], "prompt": "a photo of a yellow pizza and a green oven"} 506 | {"tag": "color_attr", "include": [{"class": "laptop", "count": 1, "color": "red"}, {"class": "car", "count": 1, "color": "brown"}], "prompt": "a photo of a red laptop and a brown car"} 507 | {"tag": "color_attr", "include": [{"class": "computer keyboard", "count": 1, "color": "purple"}, {"class": "scissors", "count": 1, "color": "blue"}], "prompt": "a photo of a purple computer keyboard and a blue scissors"} 508 | {"tag": "color_attr", "include": [{"class": "surfboard", "count": 1, "color": "green"}, {"class": "oven", "count": 1, "color": "orange"}], "prompt": "a photo of a green surfboard and an orange oven"} 509 | {"tag": "color_attr", "include": [{"class": "parking meter", "count": 1, "color": "yellow"}, {"class": "refrigerator", "count": 1, "color": "pink"}], "prompt": "a photo of a yellow parking meter and a pink refrigerator"} 510 | {"tag": "color_attr", "include": [{"class": "computer mouse", "count": 1, "color": "brown"}, {"class": "bottle", "count": 1, "color": "purple"}], "prompt": "a photo of a brown computer mouse and a purple bottle"} 511 | {"tag": "color_attr", "include": [{"class": "umbrella", "count": 1, "color": "red"}, {"class": "cow", "count": 1, "color": "green"}], "prompt": "a photo of a red umbrella and a green cow"} 512 | {"tag": "color_attr", "include": [{"class": "giraffe", "count": 1, "color": "red"}, {"class": "cell phone", "count": 1, "color": "black"}], "prompt": "a photo of a red giraffe and a black cell phone"} 513 | {"tag": "color_attr", "include": [{"class": "oven", "count": 1, "color": "brown"}, {"class": "train", "count": 1, "color": "purple"}], "prompt": "a photo of a brown oven and a purple train"} 514 | {"tag": "color_attr", "include": [{"class": "baseball bat", "count": 1, "color": "blue"}, {"class": "book", "count": 1, "color": "pink"}], "prompt": "a photo of a blue baseball bat and a pink book"} 515 | {"tag": "color_attr", "include": [{"class": "cup", "count": 1, "color": "green"}, {"class": "bowl", "count": 1, "color": "yellow"}], "prompt": "a photo of a green cup and a yellow bowl"} 516 | {"tag": "color_attr", "include": [{"class": "suitcase", "count": 1, "color": "yellow"}, {"class": "bus", "count": 1, "color": "brown"}], "prompt": "a photo of a yellow suitcase and a brown bus"} 517 | {"tag": "color_attr", "include": [{"class": "motorcycle", "count": 1, "color": "orange"}, {"class": "donut", "count": 1, "color": "pink"}], "prompt": "a photo of an orange motorcycle and a pink donut"} 518 | {"tag": "color_attr", "include": [{"class": "giraffe", "count": 1, "color": "orange"}, {"class": "baseball glove", "count": 1, "color": "white"}], "prompt": "a photo of an orange giraffe and a white baseball glove"} 519 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "orange"}, {"class": "carrot", "count": 1, "color": "green"}], "prompt": "a photo of an orange handbag and a green carrot"} 520 | {"tag": "color_attr", "include": [{"class": "bottle", "count": 1, "color": "black"}, {"class": "refrigerator", "count": 1, "color": "white"}], "prompt": "a photo of a black bottle and a white refrigerator"} 521 | {"tag": "color_attr", "include": [{"class": "dog", "count": 1, "color": "white"}, {"class": "potted plant", "count": 1, "color": "blue"}], "prompt": "a photo of a white dog and a blue potted plant"} 522 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "orange"}, {"class": "car", "count": 1, "color": "red"}], "prompt": "a photo of an orange handbag and a red car"} 523 | {"tag": "color_attr", "include": [{"class": "stop sign", "count": 1, "color": "red"}, {"class": "book", "count": 1, "color": "blue"}], "prompt": "a photo of a red stop sign and a blue book"} 524 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "yellow"}, {"class": "toothbrush", "count": 1, "color": "orange"}], "prompt": "a photo of a yellow car and an orange toothbrush"} 525 | {"tag": "color_attr", "include": [{"class": "potted plant", "count": 1, "color": "black"}, {"class": "toilet", "count": 1, "color": "yellow"}], "prompt": "a photo of a black potted plant and a yellow toilet"} 526 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "brown"}, {"class": "suitcase", "count": 1, "color": "white"}], "prompt": "a photo of a brown dining table and a white suitcase"} 527 | {"tag": "color_attr", "include": [{"class": "donut", "count": 1, "color": "orange"}, {"class": "stop sign", "count": 1, "color": "yellow"}], "prompt": "a photo of an orange donut and a yellow stop sign"} 528 | {"tag": "color_attr", "include": [{"class": "suitcase", "count": 1, "color": "green"}, {"class": "boat", "count": 1, "color": "blue"}], "prompt": "a photo of a green suitcase and a blue boat"} 529 | {"tag": "color_attr", "include": [{"class": "tennis racket", "count": 1, "color": "orange"}, {"class": "sports ball", "count": 1, "color": "yellow"}], "prompt": "a photo of an orange tennis racket and a yellow sports ball"} 530 | {"tag": "color_attr", "include": [{"class": "computer keyboard", "count": 1, "color": "purple"}, {"class": "chair", "count": 1, "color": "red"}], "prompt": "a photo of a purple computer keyboard and a red chair"} 531 | {"tag": "color_attr", "include": [{"class": "suitcase", "count": 1, "color": "purple"}, {"class": "pizza", "count": 1, "color": "orange"}], "prompt": "a photo of a purple suitcase and an orange pizza"} 532 | {"tag": "color_attr", "include": [{"class": "bottle", "count": 1, "color": "white"}, {"class": "sheep", "count": 1, "color": "blue"}], "prompt": "a photo of a white bottle and a blue sheep"} 533 | {"tag": "color_attr", "include": [{"class": "backpack", "count": 1, "color": "purple"}, {"class": "umbrella", "count": 1, "color": "white"}], "prompt": "a photo of a purple backpack and a white umbrella"} 534 | {"tag": "color_attr", "include": [{"class": "potted plant", "count": 1, "color": "orange"}, {"class": "spoon", "count": 1, "color": "black"}], "prompt": "a photo of an orange potted plant and a black spoon"} 535 | {"tag": "color_attr", "include": [{"class": "tennis racket", "count": 1, "color": "green"}, {"class": "dog", "count": 1, "color": "black"}], "prompt": "a photo of a green tennis racket and a black dog"} 536 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "yellow"}, {"class": "refrigerator", "count": 1, "color": "blue"}], "prompt": "a photo of a yellow handbag and a blue refrigerator"} 537 | {"tag": "color_attr", "include": [{"class": "broccoli", "count": 1, "color": "pink"}, {"class": "sink", "count": 1, "color": "red"}], "prompt": "a photo of a pink broccoli and a red sink"} 538 | {"tag": "color_attr", "include": [{"class": "bowl", "count": 1, "color": "red"}, {"class": "sink", "count": 1, "color": "pink"}], "prompt": "a photo of a red bowl and a pink sink"} 539 | {"tag": "color_attr", "include": [{"class": "toilet", "count": 1, "color": "white"}, {"class": "apple", "count": 1, "color": "red"}], "prompt": "a photo of a white toilet and a red apple"} 540 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "pink"}, {"class": "sandwich", "count": 1, "color": "black"}], "prompt": "a photo of a pink dining table and a black sandwich"} 541 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "black"}, {"class": "parking meter", "count": 1, "color": "green"}], "prompt": "a photo of a black car and a green parking meter"} 542 | {"tag": "color_attr", "include": [{"class": "bird", "count": 1, "color": "yellow"}, {"class": "motorcycle", "count": 1, "color": "black"}], "prompt": "a photo of a yellow bird and a black motorcycle"} 543 | {"tag": "color_attr", "include": [{"class": "giraffe", "count": 1, "color": "brown"}, {"class": "stop sign", "count": 1, "color": "white"}], "prompt": "a photo of a brown giraffe and a white stop sign"} 544 | {"tag": "color_attr", "include": [{"class": "banana", "count": 1, "color": "white"}, {"class": "elephant", "count": 1, "color": "black"}], "prompt": "a photo of a white banana and a black elephant"} 545 | {"tag": "color_attr", "include": [{"class": "cow", "count": 1, "color": "orange"}, {"class": "sandwich", "count": 1, "color": "purple"}], "prompt": "a photo of an orange cow and a purple sandwich"} 546 | {"tag": "color_attr", "include": [{"class": "clock", "count": 1, "color": "red"}, {"class": "cell phone", "count": 1, "color": "black"}], "prompt": "a photo of a red clock and a black cell phone"} 547 | {"tag": "color_attr", "include": [{"class": "knife", "count": 1, "color": "brown"}, {"class": "donut", "count": 1, "color": "blue"}], "prompt": "a photo of a brown knife and a blue donut"} 548 | {"tag": "color_attr", "include": [{"class": "cup", "count": 1, "color": "red"}, {"class": "handbag", "count": 1, "color": "pink"}], "prompt": "a photo of a red cup and a pink handbag"} 549 | {"tag": "color_attr", "include": [{"class": "bicycle", "count": 1, "color": "yellow"}, {"class": "motorcycle", "count": 1, "color": "red"}], "prompt": "a photo of a yellow bicycle and a red motorcycle"} 550 | {"tag": "color_attr", "include": [{"class": "orange", "count": 1, "color": "red"}, {"class": "broccoli", "count": 1, "color": "purple"}], "prompt": "a photo of a red orange and a purple broccoli"} 551 | {"tag": "color_attr", "include": [{"class": "traffic light", "count": 1, "color": "orange"}, {"class": "toilet", "count": 1, "color": "white"}], "prompt": "a photo of an orange traffic light and a white toilet"} 552 | {"tag": "color_attr", "include": [{"class": "cup", "count": 1, "color": "green"}, {"class": "pizza", "count": 1, "color": "red"}], "prompt": "a photo of a green cup and a red pizza"} 553 | {"tag": "color_attr", "include": [{"class": "pizza", "count": 1, "color": "blue"}, {"class": "baseball glove", "count": 1, "color": "yellow"}], "prompt": "a photo of a blue pizza and a yellow baseball glove"} 554 | --------------------------------------------------------------------------------