├── images
└── geneval_figure_1.png
├── evaluation
├── download_models.sh
├── object_names.txt
├── summary_scores.py
└── evaluate_images.py
├── prompts
├── object_names.txt
├── create_prompts.py
├── generation_prompts.txt
└── evaluation_metadata.jsonl
├── LICENSE
├── .gitignore
├── README.md
├── generation
└── diffusers_generate.py
└── environment.yml
/images/geneval_figure_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/djghosh13/geneval/HEAD/images/geneval_figure_1.png
--------------------------------------------------------------------------------
/evaluation/download_models.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Download Mask2Former object detection config and weights
4 |
5 | if [ ! -z "$1" ]
6 | then
7 | mkdir -p "$1"
8 | wget https://download.openmmlab.com/mmdetection/v2.0/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco_20220504_001756-743b7d99.pth -O "$1/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.pth"
9 | fi
10 |
--------------------------------------------------------------------------------
/prompts/object_names.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | computer mouse
66 | tv remote
67 | computer keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/evaluation/object_names.txt:
--------------------------------------------------------------------------------
1 | person
2 | bicycle
3 | car
4 | motorcycle
5 | airplane
6 | bus
7 | train
8 | truck
9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | couch
59 | potted plant
60 | bed
61 | dining table
62 | toilet
63 | tv
64 | laptop
65 | computer mouse
66 | tv remote
67 | computer keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush
81 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Dhruba Ghosh
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/evaluation/summary_scores.py:
--------------------------------------------------------------------------------
1 | # Get results of evaluation
2 |
3 | import argparse
4 | import os
5 |
6 | import numpy as np
7 | import pandas as pd
8 |
9 |
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument("filename", type=str)
12 | args = parser.parse_args()
13 |
14 | # Load classnames
15 |
16 | with open(os.path.join(os.path.dirname(__file__), "object_names.txt")) as cls_file:
17 | classnames = [line.strip() for line in cls_file]
18 | cls_to_idx = {"_".join(cls.split()):idx for idx, cls in enumerate(classnames)}
19 |
20 | # Load results
21 |
22 | df = pd.read_json(args.filename, orient="records", lines=True)
23 |
24 | # Measure overall success
25 |
26 | print("Summary")
27 | print("=======")
28 | print(f"Total images: {len(df)}")
29 | print(f"Total prompts: {len(df.groupby('metadata'))}")
30 | print(f"% correct images: {df['correct'].mean():.2%}")
31 | print(f"% correct prompts: {df.groupby('metadata')['correct'].any().mean():.2%}")
32 | print()
33 |
34 | # By group
35 |
36 | task_scores = []
37 |
38 | print("Task breakdown")
39 | print("==============")
40 | for tag, task_df in df.groupby('tag', sort=False):
41 | task_scores.append(task_df['correct'].mean())
42 | print(f"{tag:<16} = {task_df['correct'].mean():.2%} ({task_df['correct'].sum()} / {len(task_df)})")
43 | print()
44 |
45 | print(f"Overall score (avg. over tasks): {np.mean(task_scores):.5f}")
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # GenEval: An Object-Focused Framework for Evaluating Text-to-Image Alignment
2 |
3 | This repository contains code for the paper [GenEval: An Object-Focused Framework for Evaluating Text-to-Image Alignment](https://arxiv.org/abs/2310.11513) by Dhruba Ghosh, Hanna Hajishirzi, and Ludwig Schmidt.
4 |
5 | TLDR: We demonstrate the advantages of evaluating text-to-image models using existing object detection methods, to produce a fine-grained instance-level analysis of compositional capabilities.
6 |
7 | ### Abstract
8 | *Recent breakthroughs in diffusion models, multimodal pretraining, and efficient finetuning have led to an explosion of text-to-image generative models.
9 | Given human evaluation is expensive and difficult to scale, automated methods are critical for evaluating the increasingly large number of new models.
10 | However, most current automated evaluation metrics like FID or CLIPScore only offer a holistic measure of image quality or image-text alignment, and are unsuited for fine-grained or instance-level analysis.
11 | In this paper, we introduce GenEval, an object-focused framework to evaluate compositional image properties such as object co-occurrence, position, count, and color.
12 | We show that current object detection models can be leveraged to evaluate text-to-image models on a variety of generation tasks with strong human agreement, and that other discriminative vision models can be linked to this pipeline to further verify properties like object color.
13 | We then evaluate several open-source text-to-image models and analyze their relative generative capabilities on our benchmark.
14 | We find that recent models demonstrate significant improvement on these tasks, though they are still lacking in complex capabilities such as spatial relations and attribute binding.
15 | Finally, we demonstrate how GenEval might be used to help discover existing failure modes, in order to inform development of the next generation of text-to-image models.*
16 |
17 | ### Summary figure
18 |
19 |
20 |
21 |
22 |
23 | ### Main results
24 |
25 | | Model | Overall | Single object | Two object | Counting | Colors | Position | Color attribution |
26 | | ----- | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: | :-----: |
27 | | CLIP retrieval (baseline) | **0.35** | 0.89 | 0.22 | 0.37 | 0.62 | 0.03 | 0.00 |
28 | minDALL-E | **0.23** | 0.73 | 0.11 | 0.12 | 0.37 | 0.02 | 0.01 |
29 | Stable Diffusion v1.5 | **0.43** | 0.97 | 0.38 | 0.35 | 0.76 | 0.04 | 0.06 |
30 | Stable Diffusion v2.1 | **0.50** | 0.98 | 0.51 | 0.44 | 0.85 | 0.07 | 0.17 |
31 | Stable Diffusion XL | **0.55** | 0.98 | 0.74 | 0.39 | 0.85 | 0.15 | 0.23 |
32 | IF-XL | **0.61** | 0.97 | 0.74 | 0.66 | 0.81 | 0.13 | 0.35 |
33 |
34 | ## Code
35 |
36 | ### Setup
37 |
38 | Install the dependencies, including `mmdet`, and download the Mask2Former object detector:
39 | ```bash
40 | git clone https://github.com/djghosh13/geneval.git
41 | cd geneval
42 | conda env create -f environment.yml
43 | conda activate geneval
44 | ./evaluation/download_models.sh "/"
45 |
46 | git clone https://github.com/open-mmlab/mmdetection.git
47 | cd mmdetection; git checkout 2.x
48 | pip install -v -e .
49 | ```
50 |
51 | The original GenEval prompts from the paper are already in `prompts/`, but you can sample new prompts with different random seeds using
52 | ```bash
53 | python prompts/create_prompts.py --seed -n -o "/"
54 | ```
55 |
56 | ### Image generation
57 |
58 | Sample image generation code for Stable Diffusion models is given in `generation/diffusers_generate.py`. Run
59 | ```bash
60 | python generation/diffusers_generate.py \
61 | "/evaluation_metadata.jsonl" \
62 | --model "stable-diffusion-v1-5/stable-diffusion-v1-5" \
63 | --outdir ""
64 | ```
65 | to generate 4 images per prompt using Stable Diffusion v1.5 and save in ``.
66 |
67 | The generated format should be
68 | ```
69 | /
70 | 00000/
71 | metadata.jsonl
72 | grid.png
73 | samples/
74 | 0000.png
75 | 0001.png
76 | 0002.png
77 | 0003.png
78 | 00001/
79 | ...
80 | ```
81 | where `metadata.jsonl` contains the `N`-th line from `evaluation_metadata.jsonl`. `grid.png` is optional here.
82 |
83 | ### Evaluation
84 |
85 | ```bash
86 | python evaluation/evaluate_images.py \
87 | "" \
88 | --outfile "/results.jsonl" \
89 | --model-path ""
90 | ```
91 |
92 | This will result in a JSONL file with each line corresponding to an image. In particular, each line has a `correct` key and a `reason` key specifying whether the generated image was deemed correct and, if applicable, why it was marked incorrect. You can run
93 |
94 | ```bash
95 | python evaluation/summary_scores.py "/results.jsonl"
96 | ```
97 |
98 | to get the score across each task, and the overall GenEval score.
99 |
--------------------------------------------------------------------------------
/generation/diffusers_generate.py:
--------------------------------------------------------------------------------
1 | """Adapted from TODO"""
2 |
3 | import argparse
4 | import json
5 | import os
6 |
7 | import torch
8 | import numpy as np
9 | from PIL import Image
10 | from tqdm import tqdm, trange
11 | from einops import rearrange
12 | from torchvision.utils import make_grid
13 | from torchvision.transforms import ToTensor
14 | from pytorch_lightning import seed_everything
15 | from diffusers import DiffusionPipeline, StableDiffusionPipeline
16 |
17 |
18 | torch.set_grad_enabled(False)
19 |
20 |
21 | def parse_args():
22 | parser = argparse.ArgumentParser()
23 | parser.add_argument(
24 | "metadata_file",
25 | type=str,
26 | help="JSONL file containing lines of metadata for each prompt"
27 | )
28 | parser.add_argument(
29 | "--model",
30 | type=str,
31 | default="runwayml/stable-diffusion-v1-5",
32 | help="Huggingface model name"
33 | )
34 | parser.add_argument(
35 | "--outdir",
36 | type=str,
37 | nargs="?",
38 | help="dir to write results to",
39 | default="outputs"
40 | )
41 | parser.add_argument(
42 | "--n_samples",
43 | type=int,
44 | default=4,
45 | help="number of samples",
46 | )
47 | parser.add_argument(
48 | "--steps",
49 | type=int,
50 | default=50,
51 | help="number of ddim sampling steps",
52 | )
53 | parser.add_argument(
54 | "--negative-prompt",
55 | type=str,
56 | nargs="?",
57 | const="ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face",
58 | default=None,
59 | help="negative prompt for guidance"
60 | )
61 | parser.add_argument(
62 | "--H",
63 | type=int,
64 | default=None,
65 | help="image height, in pixel space",
66 | )
67 | parser.add_argument(
68 | "--W",
69 | type=int,
70 | default=None,
71 | help="image width, in pixel space",
72 | )
73 | parser.add_argument(
74 | "--scale",
75 | type=float,
76 | default=9.0,
77 | help="unconditional guidance scale: eps = eps(x, empty) + scale * (eps(x, cond) - eps(x, empty))",
78 | )
79 | parser.add_argument(
80 | "--seed",
81 | type=int,
82 | default=42,
83 | help="the seed (for reproducible sampling)",
84 | )
85 | parser.add_argument(
86 | "--batch_size",
87 | type=int,
88 | default=1,
89 | help="how many samples can be produced simultaneously",
90 | )
91 | parser.add_argument(
92 | "--skip_grid",
93 | action="store_true",
94 | help="skip saving grid",
95 | )
96 | opt = parser.parse_args()
97 | return opt
98 |
99 |
100 | def main(opt):
101 | # Load prompts
102 | with open(opt.metadata_file) as fp:
103 | metadatas = [json.loads(line) for line in fp]
104 |
105 | # Load model
106 | if opt.model == "stabilityai/stable-diffusion-xl-base-1.0":
107 | model = DiffusionPipeline.from_pretrained(opt.model, torch_dtype=torch.float16, use_safetensors=True, variant="fp16")
108 | model.enable_xformers_memory_efficient_attention()
109 | else:
110 | model = StableDiffusionPipeline.from_pretrained(opt.model, torch_dtype=torch.float16)
111 | device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
112 | model = model.to(device)
113 | model.enable_attention_slicing()
114 |
115 | for index, metadata in enumerate(metadatas):
116 | seed_everything(opt.seed)
117 |
118 | outpath = os.path.join(opt.outdir, f"{index:0>5}")
119 | os.makedirs(outpath, exist_ok=True)
120 |
121 | prompt = metadata['prompt']
122 | n_rows = batch_size = opt.batch_size
123 | print(f"Prompt ({index: >3}/{len(metadatas)}): '{prompt}'")
124 |
125 | sample_path = os.path.join(outpath, "samples")
126 | os.makedirs(sample_path, exist_ok=True)
127 | with open(os.path.join(outpath, "metadata.jsonl"), "w") as fp:
128 | json.dump(metadata, fp)
129 |
130 | sample_count = 0
131 |
132 | with torch.no_grad():
133 | all_samples = list()
134 | for n in trange((opt.n_samples + batch_size - 1) // batch_size, desc="Sampling"):
135 | # Generate images
136 | samples = model(
137 | prompt,
138 | height=opt.H,
139 | width=opt.W,
140 | num_inference_steps=opt.steps,
141 | guidance_scale=opt.scale,
142 | num_images_per_prompt=min(batch_size, opt.n_samples - sample_count),
143 | negative_prompt=opt.negative_prompt or None
144 | ).images
145 | for sample in samples:
146 | sample.save(os.path.join(sample_path, f"{sample_count:05}.png"))
147 | sample_count += 1
148 | if not opt.skip_grid:
149 | all_samples.append(torch.stack([ToTensor()(sample) for sample in samples], 0))
150 |
151 | if not opt.skip_grid:
152 | # additionally, save as grid
153 | grid = torch.stack(all_samples, 0)
154 | grid = rearrange(grid, 'n b c h w -> (n b) c h w')
155 | grid = make_grid(grid, nrow=n_rows)
156 |
157 | # to image
158 | grid = 255. * rearrange(grid, 'c h w -> h w c').cpu().numpy()
159 | grid = Image.fromarray(grid.astype(np.uint8))
160 | grid.save(os.path.join(outpath, f'grid.png'))
161 | del grid
162 | del all_samples
163 |
164 | print("Done.")
165 |
166 |
167 | if __name__ == "__main__":
168 | opt = parse_args()
169 | main(opt)
170 |
--------------------------------------------------------------------------------
/prompts/create_prompts.py:
--------------------------------------------------------------------------------
1 | """
2 | Generate prompts for evaluation
3 | """
4 |
5 | import argparse
6 | import json
7 | import os
8 | import yaml
9 |
10 | import numpy as np
11 |
12 | # Load classnames
13 |
14 | with open("object_names.txt") as cls_file:
15 | classnames = [line.strip() for line in cls_file]
16 |
17 | # Proper a vs an
18 |
19 | def with_article(name: str):
20 | if name[0] in "aeiou":
21 | return f"an {name}"
22 | return f"a {name}"
23 |
24 | # Proper plural
25 |
26 | def make_plural(name: str):
27 | if name[-1] in "s":
28 | return f"{name}es"
29 | return f"{name}s"
30 |
31 | # Generates single object samples
32 |
33 | def generate_single_object_sample(rng: np.random.Generator, size: int = None):
34 | TAG = "single_object"
35 | if size > len(classnames):
36 | size = len(classnames)
37 | print(f"Not enough distinct classes, generating only {size} samples")
38 | return_scalar = size is None
39 | size = size or 1
40 | idxs = rng.choice(len(classnames), size=size, replace=False)
41 | samples = [dict(
42 | tag=TAG,
43 | include=[
44 | {"class": classnames[idx], "count": 1}
45 | ],
46 | prompt=f"a photo of {with_article(classnames[idx])}"
47 | ) for idx in idxs]
48 | if return_scalar:
49 | return samples[0]
50 | return samples
51 |
52 | # Generate two object samples
53 |
54 | def generate_two_object_sample(rng: np.random.Generator):
55 | TAG = "two_object"
56 | idx_a, idx_b = rng.choice(len(classnames), size=2, replace=False)
57 | return dict(
58 | tag=TAG,
59 | include=[
60 | {"class": classnames[idx_a], "count": 1},
61 | {"class": classnames[idx_b], "count": 1}
62 | ],
63 | prompt=f"a photo of {with_article(classnames[idx_a])} and {with_article(classnames[idx_b])}"
64 | )
65 |
66 | # Generate counting samples
67 |
68 | numbers = ["zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten"]
69 |
70 | def generate_counting_sample(rng: np.random.Generator, max_count=4):
71 | TAG = "counting"
72 | idx = rng.choice(len(classnames))
73 | num = int(rng.integers(2, max_count, endpoint=True))
74 | return dict(
75 | tag=TAG,
76 | include=[
77 | {"class": classnames[idx], "count": num}
78 | ],
79 | exclude=[
80 | {"class": classnames[idx], "count": num + 1}
81 | ],
82 | prompt=f"a photo of {numbers[num]} {make_plural(classnames[idx])}"
83 | )
84 |
85 | # Generate color samples
86 |
87 | colors = ["red", "orange", "yellow", "green", "blue", "purple", "pink", "brown", "black", "white"]
88 |
89 | def generate_color_sample(rng: np.random.Generator):
90 | TAG = "colors"
91 | idx = rng.choice(len(classnames) - 1) + 1
92 | idx = (idx + classnames.index("person")) % len(classnames) # No "[COLOR] person" prompts
93 | color = colors[rng.choice(len(colors))]
94 | return dict(
95 | tag=TAG,
96 | include=[
97 | {"class": classnames[idx], "count": 1, "color": color}
98 | ],
99 | prompt=f"a photo of {with_article(color)} {classnames[idx]}"
100 | )
101 |
102 | # Generate position samples
103 |
104 | positions = ["left of", "right of", "above", "below"]
105 |
106 | def generate_position_sample(rng: np.random.Generator):
107 | TAG = "position"
108 | idx_a, idx_b = rng.choice(len(classnames), size=2, replace=False)
109 | position = positions[rng.choice(len(positions))]
110 | return dict(
111 | tag=TAG,
112 | include=[
113 | {"class": classnames[idx_b], "count": 1},
114 | {"class": classnames[idx_a], "count": 1, "position": (position, 0)}
115 | ],
116 | prompt=f"a photo of {with_article(classnames[idx_a])} {position} {with_article(classnames[idx_b])}"
117 | )
118 |
119 | # Generate color attribution samples
120 |
121 | def generate_color_attribution_sample(rng: np.random.Generator):
122 | TAG = "color_attr"
123 | idxs = rng.choice(len(classnames) - 1, size=2, replace=False) + 1
124 | idx_a, idx_b = (idxs + classnames.index("person")) % len(classnames) # No "[COLOR] person" prompts
125 | cidx_a, cidx_b = rng.choice(len(colors), size=2, replace=False)
126 | return dict(
127 | tag=TAG,
128 | include=[
129 | {"class": classnames[idx_a], "count": 1, "color": colors[cidx_a]},
130 | {"class": classnames[idx_b], "count": 1, "color": colors[cidx_b]}
131 | ],
132 | prompt=f"a photo of {with_article(colors[cidx_a])} {classnames[idx_a]} and {with_article(colors[cidx_b])} {classnames[idx_b]}"
133 | )
134 |
135 |
136 | # Generate evaluation suite
137 |
138 | def generate_suite(rng: np.random.Generator, n: int = 100, output_path: str = ""):
139 | samples = []
140 | # Generate single object samples for all COCO classnames
141 | samples.extend(generate_single_object_sample(rng, size=len(classnames)))
142 | # Generate two object samples (~100)
143 | for _ in range(n):
144 | samples.append(generate_two_object_sample(rng))
145 | # Generate counting samples
146 | for _ in range(n):
147 | samples.append(generate_counting_sample(rng, max_count=4))
148 | # Generate color samples
149 | for _ in range(n):
150 | samples.append(generate_color_sample(rng))
151 | # Generate position samples
152 | for _ in range(n):
153 | samples.append(generate_position_sample(rng))
154 | # Generate color attribution samples
155 | for _ in range(n):
156 | samples.append(generate_color_attribution_sample(rng))
157 | # De-duplicate
158 | unique_samples, used_samples = [], set()
159 | for sample in samples:
160 | sample_text = yaml.safe_dump(sample)
161 | if sample_text not in used_samples:
162 | unique_samples.append(sample)
163 | used_samples.add(sample_text)
164 |
165 | # Write to files
166 | os.makedirs(output_path, exist_ok=True)
167 | with open(os.path.join(output_path, "generation_prompts.txt"), "w") as fp:
168 | for sample in unique_samples:
169 | print(sample['prompt'], file=fp)
170 | with open(os.path.join(output_path, "evaluation_metadata.jsonl"), "w") as fp:
171 | for sample in unique_samples:
172 | print(json.dumps(sample), file=fp)
173 |
174 |
175 | if __name__ == "__main__":
176 | parser = argparse.ArgumentParser()
177 | parser.add_argument("--seed", type=int, default=43, help="generation seed (default: 43)")
178 | parser.add_argument("--num-prompts", "-n", type=int, default=100, help="number of prompts per task (default: 100)")
179 | parser.add_argument("--output-path", "-o", type=str, default="prompts", help="output folder for prompts and metadata (default: 'prompts/')")
180 | args = parser.parse_args()
181 | rng = np.random.default_rng(args.seed)
182 | generate_suite(rng, args.num_prompts, args.output_path)
183 |
184 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: geneval
2 | channels:
3 | - pytorch
4 | - nvidia/label/cuda-11.3.0
5 | - conda-forge
6 | - defaults
7 | dependencies:
8 | - _libgcc_mutex=0.1=main
9 | - _openmp_mutex=5.1=1_gnu
10 | - blas=1.0=mkl
11 | - brotlipy=0.7.0=py39h27cfd23_1003
12 | - bzip2=1.0.8=h7b6447c_0
13 | - ca-certificates=2023.11.17=hbcca054_0
14 | - certifi=2023.11.17=pyhd8ed1ab_0
15 | - cffi=1.15.1=py39h5eee18b_3
16 | - charset-normalizer=2.0.4=pyhd3eb1b0_0
17 | - colorama=0.4.6=pyhd8ed1ab_0
18 | - cryptography=39.0.1=py39h9ce1e76_0
19 | - cuda-nvcc=11.3.58=h2467b9f_0
20 | - cudatoolkit=11.3.1=h2bc3f7f_2
21 | - diffusers=0.24.0=pyhd8ed1ab_0
22 | - ffmpeg=4.3=hf484d3e_0
23 | - freetype=2.12.1=h4a9f257_0
24 | - giflib=5.2.1=h5eee18b_3
25 | - gmp=6.2.1=h295c915_3
26 | - gnutls=3.6.15=he1e5248_0
27 | - huggingface_hub=0.19.4=pyhd8ed1ab_0
28 | - idna=3.4=py39h06a4308_0
29 | - intel-openmp=2021.4.0=h06a4308_3561
30 | - jpeg=9e=h5eee18b_1
31 | - lame=3.100=h7b6447c_0
32 | - lcms2=2.12=h3be6417_0
33 | - ld_impl_linux-64=2.38=h1181459_1
34 | - lerc=3.0=h295c915_0
35 | - libdeflate=1.17=h5eee18b_0
36 | - libffi=3.4.4=h6a678d5_0
37 | - libgcc-ng=11.2.0=h1234567_1
38 | - libgomp=11.2.0=h1234567_1
39 | - libiconv=1.16=h7f8727e_2
40 | - libidn2=2.3.4=h5eee18b_0
41 | - libpng=1.6.39=h5eee18b_0
42 | - libstdcxx-ng=11.2.0=h1234567_1
43 | - libtasn1=4.19.0=h5eee18b_0
44 | - libtiff=4.5.0=h6a678d5_2
45 | - libunistring=0.9.10=h27cfd23_0
46 | - libwebp=1.2.4=h11a3e52_1
47 | - libwebp-base=1.2.4=h5eee18b_1
48 | - lz4-c=1.9.4=h6a678d5_0
49 | - mkl=2021.4.0=h06a4308_640
50 | - mkl-service=2.4.0=py39h7f8727e_0
51 | - mkl_fft=1.3.1=py39hd3c417c_0
52 | - mkl_random=1.2.2=py39h51133e4_0
53 | - ncurses=6.4=h6a678d5_0
54 | - nettle=3.7.3=hbbd107a_1
55 | - numpy=1.23.1=py39h6c91a56_0
56 | - numpy-base=1.23.1=py39ha15fc14_0
57 | - openh264=2.1.1=h4ff587b_0
58 | - openssl=1.1.1w=h7f8727e_0
59 | - pillow=9.4.0=py39h6a678d5_0
60 | - pip=20.3.3=py39h06a4308_0
61 | - pycparser=2.21=pyhd3eb1b0_0
62 | - pyopenssl=23.0.0=py39h06a4308_0
63 | - pysocks=1.7.1=py39h06a4308_0
64 | - python=3.9.16=h7a1cb2a_2
65 | - python_abi=3.9=2_cp39
66 | - pytorch=1.12.1=py3.9_cuda11.3_cudnn8.3.2_0
67 | - pytorch-mutex=1.0=cuda
68 | - pyyaml=6.0=py39hb9d737c_4
69 | - readline=8.2=h5eee18b_0
70 | - requests=2.29.0=py39h06a4308_0
71 | - setuptools=66.0.0=py39h06a4308_0
72 | - six=1.16.0=pyhd3eb1b0_1
73 | - sqlite=3.41.2=h5eee18b_0
74 | - tk=8.6.12=h1ccaba5_0
75 | - torchvision=0.13.1=py39_cu113
76 | - typing-extensions=4.5.0=hd8ed1ab_0
77 | - typing_extensions=4.5.0=pyha770c72_0
78 | - urllib3=1.26.15=py39h06a4308_0
79 | - wheel=0.38.4=py39h06a4308_0
80 | - xz=5.4.2=h5eee18b_0
81 | - yaml=0.2.5=h7f98852_2
82 | - zlib=1.2.13=h5eee18b_0
83 | - zstd=1.5.5=hc292b87_0
84 | - pip:
85 | - absl-py==1.4.0
86 | - addict==2.4.0
87 | - aiohttp==3.8.4
88 | - aiosignal==1.3.1
89 | - albumentations==1.3.0
90 | - altair==5.0.0
91 | - aniso8601==9.0.1
92 | - antlr4-python3-runtime==4.8
93 | - async-timeout==4.0.2
94 | - attrs==23.1.0
95 | - autofaiss==2.15.8
96 | - blinker==1.6.2
97 | - braceexpand==0.1.7
98 | - cachetools==5.3.0
99 | - click==8.1.3
100 | - clip-anytorch==2.5.2
101 | - clip-benchmark==1.4.0
102 | - clip-retrieval==2.37.0
103 | - cloudpickle==2.2.1
104 | - coloredlogs==15.0.1
105 | - contourpy==1.0.7
106 | - cycler==0.11.0
107 | - cython==0.29.34
108 | - dataclasses==0.6
109 | - decorator==5.1.1
110 | - docker-pycreds==0.4.0
111 | - einops==0.3.0
112 | - embedding-reader==1.5.1
113 | - exifread-nocycle==3.0.1
114 | - faiss-cpu==1.7.4
115 | - filelock==3.12.0
116 | - fire==0.4.0
117 | - flask==2.3.3
118 | - flask-cors==3.0.10
119 | - flask-restful==0.3.10
120 | - flatbuffers==23.5.9
121 | - fonttools==4.39.4
122 | - frozenlist==1.3.3
123 | - fsspec==2022.11.0
124 | - ftfy==6.1.1
125 | - future==0.18.3
126 | - gitdb==4.0.10
127 | - gitpython==3.1.31
128 | - google-auth==2.18.1
129 | - google-auth-oauthlib==1.0.0
130 | - grpcio==1.55.0
131 | - h5py==3.8.0
132 | - humanfriendly==10.0
133 | - imageio==2.9.0
134 | - imageio-ffmpeg==0.4.2
135 | - img2dataset==1.42.0
136 | - importlib-metadata==6.6.0
137 | - importlib-resources==5.12.0
138 | - invisible-watermark==0.1.5
139 | - itsdangerous==2.1.2
140 | - jinja2==3.1.2
141 | - joblib==1.2.0
142 | - jsonschema==4.17.3
143 | - kiwisolver==1.4.4
144 | - kornia==0.6.0
145 | - lazy-loader==0.2
146 | - markdown==3.4.3
147 | - markdown-it-py==2.2.0
148 | - markupsafe==2.1.2
149 | - matplotlib==3.7.1
150 | - mdurl==0.1.2
151 | - mmcv-full==1.7.1
152 | - mmengine==0.7.3
153 | - model-index==0.1.11
154 | - mpmath==1.3.0
155 | - multidict==6.0.4
156 | - multilingual-clip==1.0.10
157 | - networkx==3.1
158 | - nltk==3.8.1
159 | - nvidia-cublas-cu11==2022.4.8
160 | - nvidia-cublas-cu117==11.10.1.25
161 | - nvidia-cuda-runtime-cu11==2022.4.25
162 | - nvidia-cuda-runtime-cu117==11.7.60
163 | - nvidia-cudnn-cu11==2022.5.19
164 | - nvidia-cudnn-cu116==8.4.0.27
165 | - nvidia-cusolver-cu11==2022.4.8
166 | - nvidia-cusolver-cu117==11.3.5.50
167 | - nvidia-cusparse-cu11==2022.4.8
168 | - nvidia-cusparse-cu117==11.7.3.50
169 | - nvidia-pyindex==1.0.9
170 | - oauthlib==3.2.2
171 | - omegaconf==2.1.1
172 | - onnx==1.14.0
173 | - onnxruntime==1.14.1
174 | - open-clip-torch==2.20.0
175 | - opencv-python==4.6.0.66
176 | - opencv-python-headless==4.7.0.72
177 | - openmim==0.3.7
178 | - ordered-set==4.1.0
179 | - packaging==23.1
180 | - pandas==1.5.3
181 | - pathtools==0.1.2
182 | - prometheus-client==0.17.1
183 | - promise==2.3
184 | - protobuf==3.20.3
185 | - psutil==5.9.5
186 | - pyarrow==7.0.0
187 | - pyasn1==0.5.0
188 | - pyasn1-modules==0.3.0
189 | - pycocoevalcap==1.2
190 | - pycocotools==2.0.6
191 | - pydeck==0.8.1b0
192 | - pydeprecate==0.3.1
193 | - pygments==2.15.1
194 | - pympler==1.0.1
195 | - pyparsing==3.0.9
196 | - pyrsistent==0.19.3
197 | - python-dateutil==2.8.2
198 | - pytorch-lightning==1.4.2
199 | - pytz==2023.3
200 | - pywavelets==1.4.1
201 | - qudida==0.0.4
202 | - regex==2023.5.5
203 | - requests-oauthlib==1.3.1
204 | - rich==13.3.5
205 | - rsa==4.9
206 | - safetensors==0.3.1
207 | - scikit-image==0.20.0
208 | - scikit-learn==1.2.2
209 | - scipy==1.9.1
210 | - semver==3.0.0
211 | - sentence-transformers==2.2.2
212 | - sentencepiece==0.1.99
213 | - sentry-sdk==1.29.2
214 | - setproctitle==1.3.2
215 | - shapely==2.0.1
216 | - shortuuid==1.0.11
217 | - smmap==5.0.0
218 | - streamlit==1.12.1
219 | - streamlit-drawable-canvas==0.8.0
220 | - submitit==1.4.5
221 | - sympy==1.12
222 | - tabulate==0.9.0
223 | - tensorboard==2.13.0
224 | - tensorboard-data-server==0.7.0
225 | - termcolor==2.3.0
226 | - terminaltables==3.1.10
227 | - test-tube==0.7.5
228 | - threadpoolctl==3.1.0
229 | - tifffile==2023.4.12
230 | - timm==0.9.2
231 | - tokenizers==0.15.0
232 | - toml==0.10.2
233 | - tomli==2.0.1
234 | - toolz==0.12.0
235 | - torchmetrics==0.6.0
236 | - tornado==6.3.2
237 | - tqdm==4.65.0
238 | - transformers==4.36.1
239 | - tzdata==2023.3
240 | - tzlocal==5.0.1
241 | - validators==0.20.0
242 | - wandb==0.12.21
243 | - watchdog==3.0.0
244 | - wcwidth==0.2.6
245 | - webdataset==0.2.48
246 | - werkzeug==2.3.7
247 | - yapf==0.33.0
248 | - yarl==1.9.2
249 | - zipp==3.15.0
250 |
--------------------------------------------------------------------------------
/evaluation/evaluate_images.py:
--------------------------------------------------------------------------------
1 | """
2 | Evaluate generated images using Mask2Former (or other object detector model)
3 | """
4 |
5 | import argparse
6 | import json
7 | import os
8 | import re
9 | import sys
10 | import time
11 |
12 | import warnings
13 | warnings.filterwarnings("ignore")
14 |
15 | import numpy as np
16 | import pandas as pd
17 | from PIL import Image, ImageOps
18 | import torch
19 | import mmdet
20 | from mmdet.apis import inference_detector, init_detector
21 |
22 | import open_clip
23 | from clip_benchmark.metrics import zeroshot_classification as zsc
24 | zsc.tqdm = lambda it, *args, **kwargs: it
25 |
26 | # Get directory path
27 |
28 | def parse_args():
29 | parser = argparse.ArgumentParser()
30 | parser.add_argument("imagedir", type=str)
31 | parser.add_argument("--outfile", type=str, default="results.jsonl")
32 | parser.add_argument("--model-config", type=str, default=None)
33 | parser.add_argument("--model-path", type=str, default="./")
34 | # Other arguments
35 | parser.add_argument("--options", nargs="*", type=str, default=[])
36 | args = parser.parse_args()
37 | args.options = dict(opt.split("=", 1) for opt in args.options)
38 | if args.model_config is None:
39 | args.model_config = os.path.join(
40 | os.path.dirname(mmdet.__file__),
41 | "../configs/mask2former/mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco.py"
42 | )
43 | return args
44 |
45 | DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
46 | assert DEVICE == "cuda"
47 |
48 | def timed(fn):
49 | def wrapper(*args, **kwargs):
50 | startt = time.time()
51 | result = fn(*args, **kwargs)
52 | endt = time.time()
53 | print(f'Function {fn.__name__!r} executed in {endt - startt:.3f}s', file=sys.stderr)
54 | return result
55 | return wrapper
56 |
57 | # Load models
58 |
59 | @timed
60 | def load_models(args):
61 | CONFIG_PATH = args.model_config
62 | OBJECT_DETECTOR = args.options.get('model', "mask2former_swin-s-p4-w7-224_lsj_8x2_50e_coco")
63 | CKPT_PATH = os.path.join(args.model_path, f"{OBJECT_DETECTOR}.pth")
64 | object_detector = init_detector(CONFIG_PATH, CKPT_PATH, device=DEVICE)
65 |
66 | clip_arch = args.options.get('clip_model', "ViT-L-14")
67 | clip_model, _, transform = open_clip.create_model_and_transforms(clip_arch, pretrained="openai", device=DEVICE)
68 | tokenizer = open_clip.get_tokenizer(clip_arch)
69 |
70 | with open(os.path.join(os.path.dirname(__file__), "object_names.txt")) as cls_file:
71 | classnames = [line.strip() for line in cls_file]
72 |
73 | return object_detector, (clip_model, transform, tokenizer), classnames
74 |
75 |
76 | COLORS = ["red", "orange", "yellow", "green", "blue", "purple", "pink", "brown", "black", "white"]
77 | COLOR_CLASSIFIERS = {}
78 |
79 | # Evaluation parts
80 |
81 | class ImageCrops(torch.utils.data.Dataset):
82 | def __init__(self, image: Image.Image, objects):
83 | self._image = image.convert("RGB")
84 | bgcolor = args.options.get('bgcolor', "#999")
85 | if bgcolor == "original":
86 | self._blank = self._image.copy()
87 | else:
88 | self._blank = Image.new("RGB", image.size, color=bgcolor)
89 | self._objects = objects
90 |
91 | def __len__(self):
92 | return len(self._objects)
93 |
94 | def __getitem__(self, index):
95 | box, mask = self._objects[index]
96 | if mask is not None:
97 | assert tuple(self._image.size[::-1]) == tuple(mask.shape), (index, self._image.size[::-1], mask.shape)
98 | image = Image.composite(self._image, self._blank, Image.fromarray(mask))
99 | else:
100 | image = self._image
101 | if args.options.get('crop', '1') == '1':
102 | image = image.crop(box[:4])
103 | # if args.save:
104 | # base_count = len(os.listdir(args.save))
105 | # image.save(os.path.join(args.save, f"cropped_{base_count:05}.png"))
106 | return (transform(image), 0)
107 |
108 |
109 | def color_classification(image, bboxes, classname):
110 | if classname not in COLOR_CLASSIFIERS:
111 | COLOR_CLASSIFIERS[classname] = zsc.zero_shot_classifier(
112 | clip_model, tokenizer, COLORS,
113 | [
114 | f"a photo of a {{c}} {classname}",
115 | f"a photo of a {{c}}-colored {classname}",
116 | f"a photo of a {{c}} object"
117 | ],
118 | DEVICE
119 | )
120 | clf = COLOR_CLASSIFIERS[classname]
121 | dataloader = torch.utils.data.DataLoader(
122 | ImageCrops(image, bboxes),
123 | batch_size=16, num_workers=4
124 | )
125 | with torch.no_grad():
126 | pred, _ = zsc.run_classification(clip_model, clf, dataloader, DEVICE)
127 | return [COLORS[index.item()] for index in pred.argmax(1)]
128 |
129 |
130 | def compute_iou(box_a, box_b):
131 | area_fn = lambda box: max(box[2] - box[0] + 1, 0) * max(box[3] - box[1] + 1, 0)
132 | i_area = area_fn([
133 | max(box_a[0], box_b[0]), max(box_a[1], box_b[1]),
134 | min(box_a[2], box_b[2]), min(box_a[3], box_b[3])
135 | ])
136 | u_area = area_fn(box_a) + area_fn(box_b) - i_area
137 | return i_area / u_area if u_area else 0
138 |
139 |
140 | def relative_position(obj_a, obj_b):
141 | """Give position of A relative to B, factoring in object dimensions"""
142 | boxes = np.array([obj_a[0], obj_b[0]])[:, :4].reshape(2, 2, 2)
143 | center_a, center_b = boxes.mean(axis=-2)
144 | dim_a, dim_b = np.abs(np.diff(boxes, axis=-2))[..., 0, :]
145 | offset = center_a - center_b
146 | #
147 | revised_offset = np.maximum(np.abs(offset) - POSITION_THRESHOLD * (dim_a + dim_b), 0) * np.sign(offset)
148 | if np.all(np.abs(revised_offset) < 1e-3):
149 | return set()
150 | #
151 | dx, dy = revised_offset / np.linalg.norm(offset)
152 | relations = set()
153 | if dx < -0.5: relations.add("left of")
154 | if dx > 0.5: relations.add("right of")
155 | if dy < -0.5: relations.add("above")
156 | if dy > 0.5: relations.add("below")
157 | return relations
158 |
159 |
160 | def evaluate(image, objects, metadata):
161 | """
162 | Evaluate given image using detected objects on the global metadata specifications.
163 | Assumptions:
164 | * Metadata combines 'include' clauses with AND, and 'exclude' clauses with OR
165 | * All clauses are independent, i.e., duplicating a clause has no effect on the correctness
166 | * CHANGED: Color and position will only be evaluated on the most confidently predicted objects;
167 | therefore, objects are expected to appear in sorted order
168 | """
169 | correct = True
170 | reason = []
171 | matched_groups = []
172 | # Check for expected objects
173 | for req in metadata.get('include', []):
174 | classname = req['class']
175 | matched = True
176 | found_objects = objects.get(classname, [])[:req['count']]
177 | if len(found_objects) < req['count']:
178 | correct = matched = False
179 | reason.append(f"expected {classname}>={req['count']}, found {len(found_objects)}")
180 | else:
181 | if 'color' in req:
182 | # Color check
183 | colors = color_classification(image, found_objects, classname)
184 | if colors.count(req['color']) < req['count']:
185 | correct = matched = False
186 | reason.append(
187 | f"expected {req['color']} {classname}>={req['count']}, found " +
188 | f"{colors.count(req['color'])} {req['color']}; and " +
189 | ", ".join(f"{colors.count(c)} {c}" for c in COLORS if c in colors)
190 | )
191 | if 'position' in req and matched:
192 | # Relative position check
193 | expected_rel, target_group = req['position']
194 | if matched_groups[target_group] is None:
195 | correct = matched = False
196 | reason.append(f"no target for {classname} to be {expected_rel}")
197 | else:
198 | for obj in found_objects:
199 | for target_obj in matched_groups[target_group]:
200 | true_rels = relative_position(obj, target_obj)
201 | if expected_rel not in true_rels:
202 | correct = matched = False
203 | reason.append(
204 | f"expected {classname} {expected_rel} target, found " +
205 | f"{' and '.join(true_rels)} target"
206 | )
207 | break
208 | if not matched:
209 | break
210 | if matched:
211 | matched_groups.append(found_objects)
212 | else:
213 | matched_groups.append(None)
214 | # Check for non-expected objects
215 | for req in metadata.get('exclude', []):
216 | classname = req['class']
217 | if len(objects.get(classname, [])) >= req['count']:
218 | correct = False
219 | reason.append(f"expected {classname}<{req['count']}, found {len(objects[classname])}")
220 | return correct, "\n".join(reason)
221 |
222 |
223 | def evaluate_image(filepath, metadata):
224 | result = inference_detector(object_detector, filepath)
225 | bbox = result[0] if isinstance(result, tuple) else result
226 | segm = result[1] if isinstance(result, tuple) and len(result) > 1 else None
227 | image = ImageOps.exif_transpose(Image.open(filepath))
228 | detected = {}
229 | # Determine bounding boxes to keep
230 | confidence_threshold = THRESHOLD if metadata['tag'] != "counting" else COUNTING_THRESHOLD
231 | for index, classname in enumerate(classnames):
232 | ordering = np.argsort(bbox[index][:, 4])[::-1]
233 | ordering = ordering[bbox[index][ordering, 4] > confidence_threshold] # Threshold
234 | ordering = ordering[:MAX_OBJECTS].tolist() # Limit number of detected objects per class
235 | detected[classname] = []
236 | while ordering:
237 | max_obj = ordering.pop(0)
238 | detected[classname].append((bbox[index][max_obj], None if segm is None else segm[index][max_obj]))
239 | ordering = [
240 | obj for obj in ordering
241 | if NMS_THRESHOLD == 1 or compute_iou(bbox[index][max_obj], bbox[index][obj]) < NMS_THRESHOLD
242 | ]
243 | if not detected[classname]:
244 | del detected[classname]
245 | # Evaluate
246 | is_correct, reason = evaluate(image, detected, metadata)
247 | return {
248 | 'filename': filepath,
249 | 'tag': metadata['tag'],
250 | 'prompt': metadata['prompt'],
251 | 'correct': is_correct,
252 | 'reason': reason,
253 | 'metadata': json.dumps(metadata),
254 | 'details': json.dumps({
255 | key: [box.tolist() for box, _ in value]
256 | for key, value in detected.items()
257 | })
258 | }
259 |
260 |
261 | def main(args):
262 | full_results = []
263 | for subfolder in os.listdir(args.imagedir):
264 | folderpath = os.path.join(args.imagedir, subfolder)
265 | if not os.path.isdir(folderpath) or not subfolder.isdigit():
266 | continue
267 | with open(os.path.join(folderpath, "metadata.jsonl")) as fp:
268 | metadata = json.load(fp)
269 | # Evaluate each image
270 | for imagename in os.listdir(os.path.join(folderpath, "samples")):
271 | imagepath = os.path.join(folderpath, "samples", imagename)
272 | if not os.path.isfile(imagepath) or not re.match(r"\d+\.png", imagename):
273 | continue
274 | result = evaluate_image(imagepath, metadata)
275 | full_results.append(result)
276 | # Save results
277 | if os.path.dirname(args.outfile):
278 | os.makedirs(os.path.dirname(args.outfile), exist_ok=True)
279 | with open(args.outfile, "w") as fp:
280 | pd.DataFrame(full_results).to_json(fp, orient="records", lines=True)
281 |
282 |
283 | if __name__ == "__main__":
284 | args = parse_args()
285 | object_detector, (clip_model, transform, tokenizer), classnames = load_models(args)
286 | THRESHOLD = float(args.options.get('threshold', 0.3))
287 | COUNTING_THRESHOLD = float(args.options.get('counting_threshold', 0.9))
288 | MAX_OBJECTS = int(args.options.get('max_objects', 16))
289 | NMS_THRESHOLD = float(args.options.get('max_overlap', 1.0))
290 | POSITION_THRESHOLD = float(args.options.get('position_threshold', 0.1))
291 |
292 | main(args)
293 |
--------------------------------------------------------------------------------
/prompts/generation_prompts.txt:
--------------------------------------------------------------------------------
1 | a photo of a bench
2 | a photo of a cow
3 | a photo of a bicycle
4 | a photo of a clock
5 | a photo of a carrot
6 | a photo of a suitcase
7 | a photo of a fork
8 | a photo of a surfboard
9 | a photo of a refrigerator
10 | a photo of a cup
11 | a photo of a microwave
12 | a photo of a potted plant
13 | a photo of a snowboard
14 | a photo of a zebra
15 | a photo of a parking meter
16 | a photo of a spoon
17 | a photo of a skateboard
18 | a photo of a car
19 | a photo of a motorcycle
20 | a photo of a traffic light
21 | a photo of a book
22 | a photo of a couch
23 | a photo of a backpack
24 | a photo of a computer keyboard
25 | a photo of a toaster
26 | a photo of a bird
27 | a photo of a bowl
28 | a photo of a dog
29 | a photo of a tie
30 | a photo of a laptop
31 | a photo of a computer mouse
32 | a photo of a sandwich
33 | a photo of a baseball bat
34 | a photo of a train
35 | a photo of a cell phone
36 | a photo of a chair
37 | a photo of a tv
38 | a photo of a broccoli
39 | a photo of a bed
40 | a photo of a skis
41 | a photo of a handbag
42 | a photo of a pizza
43 | a photo of a frisbee
44 | a photo of a scissors
45 | a photo of a bottle
46 | a photo of an elephant
47 | a photo of a toilet
48 | a photo of an oven
49 | a photo of an orange
50 | a photo of a person
51 | a photo of a teddy bear
52 | a photo of a vase
53 | a photo of a banana
54 | a photo of a toothbrush
55 | a photo of a tv remote
56 | a photo of a dining table
57 | a photo of a stop sign
58 | a photo of a sheep
59 | a photo of a fire hydrant
60 | a photo of an airplane
61 | a photo of a giraffe
62 | a photo of a horse
63 | a photo of a cat
64 | a photo of a donut
65 | a photo of a boat
66 | a photo of a baseball glove
67 | a photo of a hair drier
68 | a photo of a sink
69 | a photo of a cake
70 | a photo of a wine glass
71 | a photo of an apple
72 | a photo of a bus
73 | a photo of a tennis racket
74 | a photo of a knife
75 | a photo of a hot dog
76 | a photo of a truck
77 | a photo of an umbrella
78 | a photo of a sports ball
79 | a photo of a bear
80 | a photo of a kite
81 | a photo of a bench and a sports ball
82 | a photo of a toothbrush and a snowboard
83 | a photo of a toaster and an oven
84 | a photo of a broccoli and a vase
85 | a photo of a tennis racket and a wine glass
86 | a photo of a fork and a knife
87 | a photo of a hair drier and a cake
88 | a photo of a horse and a giraffe
89 | a photo of a horse and a computer keyboard
90 | a photo of a toothbrush and a carrot
91 | a photo of a cake and a zebra
92 | a photo of a hair drier and a bear
93 | a photo of a knife and a zebra
94 | a photo of a couch and a wine glass
95 | a photo of a frisbee and a vase
96 | a photo of a book and a laptop
97 | a photo of a dining table and a bear
98 | a photo of a frisbee and a couch
99 | a photo of a couch and a horse
100 | a photo of a toilet and a computer mouse
101 | a photo of a bottle and a refrigerator
102 | a photo of a potted plant and a backpack
103 | a photo of a skateboard and a cake
104 | a photo of a broccoli and a parking meter
105 | a photo of a zebra and a bed
106 | a photo of an oven and a bed
107 | a photo of a baseball bat and a fork
108 | a photo of a vase and a spoon
109 | a photo of a skateboard and a sink
110 | a photo of a pizza and a bench
111 | a photo of a bowl and a pizza
112 | a photo of a tennis racket and a bird
113 | a photo of a wine glass and a bear
114 | a photo of a fork and a book
115 | a photo of a scissors and a bowl
116 | a photo of a laptop and a carrot
117 | a photo of a stop sign and a bottle
118 | a photo of a microwave and a truck
119 | a photo of a person and a bear
120 | a photo of a frisbee and a cell phone
121 | a photo of a parking meter and a teddy bear
122 | a photo of a tennis racket and a bicycle
123 | a photo of a stop sign and a motorcycle
124 | a photo of a fire hydrant and a tennis racket
125 | a photo of a scissors and a sandwich
126 | a photo of a pizza and a book
127 | a photo of a giraffe and a computer mouse
128 | a photo of a stop sign and a toaster
129 | a photo of a computer mouse and a zebra
130 | a photo of a chair and a bench
131 | a photo of a tv and a carrot
132 | a photo of a surfboard and a suitcase
133 | a photo of a computer keyboard and a laptop
134 | a photo of a computer keyboard and a microwave
135 | a photo of a scissors and a bird
136 | a photo of a person and a snowboard
137 | a photo of a cow and a horse
138 | a photo of a handbag and a refrigerator
139 | a photo of a chair and a laptop
140 | a photo of a toothbrush and a bench
141 | a photo of a book and a baseball bat
142 | a photo of a horse and a train
143 | a photo of a bench and a vase
144 | a photo of a traffic light and a backpack
145 | a photo of a sports ball and a cow
146 | a photo of a computer mouse and a spoon
147 | a photo of a tv and a bicycle
148 | a photo of a bench and a snowboard
149 | a photo of a toothbrush and a toilet
150 | a photo of a person and an apple
151 | a photo of a sink and a sports ball
152 | a photo of a stop sign and a dog
153 | a photo of a knife and a stop sign
154 | a photo of a wine glass and a handbag
155 | a photo of a bowl and a skis
156 | a photo of a frisbee and an apple
157 | a photo of a computer keyboard and a cell phone
158 | a photo of a stop sign and a fork
159 | a photo of a potted plant and a boat
160 | a photo of a tv and a cell phone
161 | a photo of a tie and a broccoli
162 | a photo of a potted plant and a donut
163 | a photo of a person and a sink
164 | a photo of a couch and a snowboard
165 | a photo of a fork and a baseball glove
166 | a photo of an apple and a toothbrush
167 | a photo of a bus and a baseball glove
168 | a photo of a person and a stop sign
169 | a photo of a carrot and a couch
170 | a photo of a baseball bat and a bear
171 | a photo of a fire hydrant and a train
172 | a photo of a baseball glove and a carrot
173 | a photo of a microwave and a bench
174 | a photo of a cake and a stop sign
175 | a photo of a car and a computer mouse
176 | a photo of a suitcase and a dining table
177 | a photo of a person and a traffic light
178 | a photo of a cell phone and a horse
179 | a photo of a baseball bat and a giraffe
180 | a photo of two clocks
181 | a photo of two backpacks
182 | a photo of four handbags
183 | a photo of two frisbees
184 | a photo of three sports balls
185 | a photo of two bears
186 | a photo of two ties
187 | a photo of four sinks
188 | a photo of two toothbrushs
189 | a photo of three persons
190 | a photo of three tennis rackets
191 | a photo of four bowls
192 | a photo of four vases
193 | a photo of three cups
194 | a photo of four computer keyboards
195 | a photo of three sinks
196 | a photo of two ovens
197 | a photo of two toilets
198 | a photo of two bicycles
199 | a photo of two trains
200 | a photo of three oranges
201 | a photo of three buses
202 | a photo of three handbags
203 | a photo of three snowboards
204 | a photo of two snowboards
205 | a photo of four dogs
206 | a photo of three apples
207 | a photo of two sheeps
208 | a photo of three hot dogs
209 | a photo of three zebras
210 | a photo of three kites
211 | a photo of four apples
212 | a photo of three cell phones
213 | a photo of four baseball gloves
214 | a photo of three computer keyboards
215 | a photo of two beds
216 | a photo of two tv remotes
217 | a photo of three fire hydrants
218 | a photo of three books
219 | a photo of four giraffes
220 | a photo of two vases
221 | a photo of four donuts
222 | a photo of four chairs
223 | a photo of three baseball bats
224 | a photo of four stop signs
225 | a photo of two pizzas
226 | a photo of three refrigerators
227 | a photo of two fire hydrants
228 | a photo of three giraffes
229 | a photo of four tvs
230 | a photo of three wine glasses
231 | a photo of four broccolis
232 | a photo of three trucks
233 | a photo of two trucks
234 | a photo of two carrots
235 | a photo of two sandwichs
236 | a photo of four traffic lights
237 | a photo of four clocks
238 | a photo of two cars
239 | a photo of two bananas
240 | a photo of two wine glasses
241 | a photo of three pizzas
242 | a photo of four knifes
243 | a photo of three suitcases
244 | a photo of four zebras
245 | a photo of two teddy bears
246 | a photo of four skateboards
247 | a photo of four hot dogs
248 | a photo of three birds
249 | a photo of four boats
250 | a photo of four microwaves
251 | a photo of two hair driers
252 | a photo of three laptops
253 | a photo of three cows
254 | a photo of two parking meters
255 | a photo of four benchs
256 | a photo of three benchs
257 | a photo of four frisbees
258 | a photo of four books
259 | a photo of four buses
260 | a photo of a blue fire hydrant
261 | a photo of a pink car
262 | a photo of a purple cup
263 | a photo of a blue cow
264 | a photo of a yellow boat
265 | a photo of a blue umbrella
266 | a photo of a blue elephant
267 | a photo of a yellow elephant
268 | a photo of a red bicycle
269 | a photo of a purple suitcase
270 | a photo of a purple hair drier
271 | a photo of a white sandwich
272 | a photo of a purple elephant
273 | a photo of a green microwave
274 | a photo of a red zebra
275 | a photo of a red apple
276 | a photo of a yellow tv remote
277 | a photo of a blue toilet
278 | a photo of an orange orange
279 | a photo of a black donut
280 | a photo of a red vase
281 | a photo of a purple pizza
282 | a photo of a pink skateboard
283 | a photo of a green skateboard
284 | a photo of a purple bear
285 | a photo of a brown chair
286 | a photo of a brown computer keyboard
287 | a photo of an orange cow
288 | a photo of a brown skis
289 | a photo of a white kite
290 | a photo of a red dog
291 | a photo of a green couch
292 | a photo of a yellow airplane
293 | a photo of an orange tv
294 | a photo of a white scissors
295 | a photo of a pink cell phone
296 | a photo of a green surfboard
297 | a photo of a white fire hydrant
298 | a photo of a black bicycle
299 | a photo of a purple carrot
300 | a photo of a black dining table
301 | a photo of a purple potted plant
302 | a photo of a purple backpack
303 | a photo of a yellow train
304 | a photo of a pink potted plant
305 | a photo of a red giraffe
306 | a photo of a brown bear
307 | a photo of a black train
308 | a photo of an orange laptop
309 | a photo of a green hot dog
310 | a photo of a yellow parking meter
311 | a photo of a red potted plant
312 | a photo of a green traffic light
313 | a photo of a blue tv
314 | a photo of a brown refrigerator
315 | a photo of a black tv remote
316 | a photo of a purple scissors
317 | a photo of a yellow orange
318 | a photo of a brown toaster
319 | a photo of a red parking meter
320 | a photo of a brown orange
321 | a photo of a green clock
322 | a photo of a white sheep
323 | a photo of a yellow oven
324 | a photo of a green vase
325 | a photo of a black teddy bear
326 | a photo of a yellow carrot
327 | a photo of a black hot dog
328 | a photo of a red scissors
329 | a photo of a white teddy bear
330 | a photo of a black skis
331 | a photo of a blue dining table
332 | a photo of a black refrigerator
333 | a photo of a white dog
334 | a photo of an orange scissors
335 | a photo of a red cell phone
336 | a photo of a white orange
337 | a photo of a blue clock
338 | a photo of a blue carrot
339 | a photo of a green motorcycle
340 | a photo of a pink stop sign
341 | a photo of a black vase
342 | a photo of a black backpack
343 | a photo of a red car
344 | a photo of a green computer mouse
345 | a photo of a red backpack
346 | a photo of a green bus
347 | a photo of an orange toaster
348 | a photo of a yellow fork
349 | a photo of a pink parking meter
350 | a photo of a blue book
351 | a photo of a yellow broccoli
352 | a photo of an orange computer mouse
353 | a photo of a red cake
354 | a photo of a dog right of a teddy bear
355 | a photo of a wine glass above a kite
356 | a photo of a couch below a cup
357 | a photo of a laptop left of a cow
358 | a photo of a fork above a hair drier
359 | a photo of a tie right of a baseball bat
360 | a photo of a stop sign above a fork
361 | a photo of a bird below a skateboard
362 | a photo of an apple above a tv
363 | a photo of a train above a potted plant
364 | a photo of a truck left of a refrigerator
365 | a photo of a tv remote below a cow
366 | a photo of a bottle right of a train
367 | a photo of a dog above a cow
368 | a photo of a skateboard above a person
369 | a photo of a baseball glove below an umbrella
370 | a photo of a dining table right of an oven
371 | a photo of a hot dog left of a suitcase
372 | a photo of a bus below a toothbrush
373 | a photo of a backpack right of a sandwich
374 | a photo of a cake below a baseball bat
375 | a photo of a dog right of a tie
376 | a photo of a suitcase right of a boat
377 | a photo of a bear above a clock
378 | a photo of a tv remote left of an umbrella
379 | a photo of a sports ball left of an umbrella
380 | a photo of a train right of a dining table
381 | a photo of a hair drier below an elephant
382 | a photo of a tennis racket right of a spoon
383 | a photo of a wine glass right of a hot dog
384 | a photo of a computer mouse left of a bench
385 | a photo of a carrot left of an orange
386 | a photo of a kite above a toothbrush
387 | a photo of a toaster below a traffic light
388 | a photo of a cat below a baseball glove
389 | a photo of a skis right of a zebra
390 | a photo of a stop sign above a chair
391 | a photo of a stop sign above a parking meter
392 | a photo of a hot dog right of a skateboard
393 | a photo of a pizza below a computer keyboard
394 | a photo of a hair drier left of a toilet
395 | a photo of a cow left of a stop sign
396 | a photo of a suitcase above a skis
397 | a photo of a book above a laptop
398 | a photo of a toothbrush below a pizza
399 | a photo of a toilet left of a kite
400 | a photo of a tie above a sink
401 | a photo of a bird left of a couch
402 | a photo of a bed right of a sports ball
403 | a photo of an elephant below a surfboard
404 | a photo of a frisbee right of a motorcycle
405 | a photo of a vase above a fire hydrant
406 | a photo of a zebra left of an elephant
407 | a photo of a bench left of a bear
408 | a photo of a donut right of a bench
409 | a photo of a frisbee below a horse
410 | a photo of a computer keyboard above a snowboard
411 | a photo of a tv below a cow
412 | a photo of an elephant below a horse
413 | a photo of a suitcase left of a banana
414 | a photo of a train below an airplane
415 | a photo of a cat below a backpack
416 | a photo of a backpack below a cake
417 | a photo of a sandwich below a knife
418 | a photo of a bicycle above a parking meter
419 | a photo of a knife right of a suitcase
420 | a photo of a hot dog above a knife
421 | a photo of a zebra right of a parking meter
422 | a photo of a chair left of a zebra
423 | a photo of a cow below an airplane
424 | a photo of a cup left of an umbrella
425 | a photo of a zebra below a computer keyboard
426 | a photo of a zebra below a broccoli
427 | a photo of a laptop below a sports ball
428 | a photo of a truck left of a baseball bat
429 | a photo of a refrigerator above a baseball bat
430 | a photo of a tv above a baseball bat
431 | a photo of a baseball glove right of a bear
432 | a photo of a refrigerator below a scissors
433 | a photo of a dining table above a suitcase
434 | a photo of a parking meter above a broccoli
435 | a photo of a frisbee above a truck
436 | a photo of a pizza right of a banana
437 | a photo of a bus above a boat
438 | a photo of a cell phone left of a tennis racket
439 | a photo of a horse right of a broccoli
440 | a photo of a broccoli above a bottle
441 | a photo of a vase right of a horse
442 | a photo of a bear above a spoon
443 | a photo of a zebra right of a bed
444 | a photo of a cow right of a laptop
445 | a photo of a bed right of a frisbee
446 | a photo of a tie right of a motorcycle
447 | a photo of a laptop right of a tv
448 | a photo of a cell phone right of a chair
449 | a photo of a couch below a potted plant
450 | a photo of a clock below a tv
451 | a photo of a couch below a vase
452 | a photo of a donut below a cat
453 | a photo of a couch left of a toaster
454 | a photo of a purple wine glass and a black apple
455 | a photo of a green bus and a purple microwave
456 | a photo of a green skis and a brown airplane
457 | a photo of a yellow computer keyboard and a black sink
458 | a photo of a pink oven and a green motorcycle
459 | a photo of a purple parking meter and a red laptop
460 | a photo of a yellow skateboard and an orange computer mouse
461 | a photo of a red skis and a brown tie
462 | a photo of a pink skateboard and a black train
463 | a photo of a white handbag and a purple bed
464 | a photo of a purple elephant and a brown sports ball
465 | a photo of a purple dog and a black dining table
466 | a photo of a white dining table and a red car
467 | a photo of a blue cell phone and a green apple
468 | a photo of a red car and an orange potted plant
469 | a photo of a brown carrot and a white potted plant
470 | a photo of a black kite and a green bear
471 | a photo of a blue laptop and a brown bear
472 | a photo of a green teddy bear and a brown kite
473 | a photo of a yellow stop sign and a blue potted plant
474 | a photo of an orange snowboard and a green cat
475 | a photo of an orange truck and a pink sink
476 | a photo of a brown hot dog and a purple pizza
477 | a photo of a green couch and an orange umbrella
478 | a photo of a brown bed and a pink cell phone
479 | a photo of a black broccoli and a yellow cake
480 | a photo of a red train and a purple bear
481 | a photo of a purple tennis racket and a black sink
482 | a photo of a blue vase and a black banana
483 | a photo of a blue clock and a white cup
484 | a photo of a red umbrella and a blue couch
485 | a photo of a white handbag and a red giraffe
486 | a photo of a pink tv remote and a blue airplane
487 | a photo of a pink handbag and a black scissors
488 | a photo of a brown car and a pink hair drier
489 | a photo of a black bus and a brown cell phone
490 | a photo of a purple sheep and a pink banana
491 | a photo of a blue handbag and a white cell phone
492 | a photo of a white pizza and a green umbrella
493 | a photo of a white tie and a purple skateboard
494 | a photo of a yellow sports ball and a green boat
495 | a photo of a white wine glass and a brown giraffe
496 | a photo of a yellow bowl and a white baseball glove
497 | a photo of an orange microwave and a black spoon
498 | a photo of an orange skateboard and a pink bowl
499 | a photo of a blue toilet and a white suitcase
500 | a photo of a white boat and an orange hot dog
501 | a photo of a yellow dining table and a pink dog
502 | a photo of a red cake and a purple chair
503 | a photo of a blue tie and a pink dining table
504 | a photo of a blue cow and a black computer keyboard
505 | a photo of a yellow pizza and a green oven
506 | a photo of a red laptop and a brown car
507 | a photo of a purple computer keyboard and a blue scissors
508 | a photo of a green surfboard and an orange oven
509 | a photo of a yellow parking meter and a pink refrigerator
510 | a photo of a brown computer mouse and a purple bottle
511 | a photo of a red umbrella and a green cow
512 | a photo of a red giraffe and a black cell phone
513 | a photo of a brown oven and a purple train
514 | a photo of a blue baseball bat and a pink book
515 | a photo of a green cup and a yellow bowl
516 | a photo of a yellow suitcase and a brown bus
517 | a photo of an orange motorcycle and a pink donut
518 | a photo of an orange giraffe and a white baseball glove
519 | a photo of an orange handbag and a green carrot
520 | a photo of a black bottle and a white refrigerator
521 | a photo of a white dog and a blue potted plant
522 | a photo of an orange handbag and a red car
523 | a photo of a red stop sign and a blue book
524 | a photo of a yellow car and an orange toothbrush
525 | a photo of a black potted plant and a yellow toilet
526 | a photo of a brown dining table and a white suitcase
527 | a photo of an orange donut and a yellow stop sign
528 | a photo of a green suitcase and a blue boat
529 | a photo of an orange tennis racket and a yellow sports ball
530 | a photo of a purple computer keyboard and a red chair
531 | a photo of a purple suitcase and an orange pizza
532 | a photo of a white bottle and a blue sheep
533 | a photo of a purple backpack and a white umbrella
534 | a photo of an orange potted plant and a black spoon
535 | a photo of a green tennis racket and a black dog
536 | a photo of a yellow handbag and a blue refrigerator
537 | a photo of a pink broccoli and a red sink
538 | a photo of a red bowl and a pink sink
539 | a photo of a white toilet and a red apple
540 | a photo of a pink dining table and a black sandwich
541 | a photo of a black car and a green parking meter
542 | a photo of a yellow bird and a black motorcycle
543 | a photo of a brown giraffe and a white stop sign
544 | a photo of a white banana and a black elephant
545 | a photo of an orange cow and a purple sandwich
546 | a photo of a red clock and a black cell phone
547 | a photo of a brown knife and a blue donut
548 | a photo of a red cup and a pink handbag
549 | a photo of a yellow bicycle and a red motorcycle
550 | a photo of a red orange and a purple broccoli
551 | a photo of an orange traffic light and a white toilet
552 | a photo of a green cup and a red pizza
553 | a photo of a blue pizza and a yellow baseball glove
554 |
--------------------------------------------------------------------------------
/prompts/evaluation_metadata.jsonl:
--------------------------------------------------------------------------------
1 | {"tag": "single_object", "include": [{"class": "bench", "count": 1}], "prompt": "a photo of a bench"}
2 | {"tag": "single_object", "include": [{"class": "cow", "count": 1}], "prompt": "a photo of a cow"}
3 | {"tag": "single_object", "include": [{"class": "bicycle", "count": 1}], "prompt": "a photo of a bicycle"}
4 | {"tag": "single_object", "include": [{"class": "clock", "count": 1}], "prompt": "a photo of a clock"}
5 | {"tag": "single_object", "include": [{"class": "carrot", "count": 1}], "prompt": "a photo of a carrot"}
6 | {"tag": "single_object", "include": [{"class": "suitcase", "count": 1}], "prompt": "a photo of a suitcase"}
7 | {"tag": "single_object", "include": [{"class": "fork", "count": 1}], "prompt": "a photo of a fork"}
8 | {"tag": "single_object", "include": [{"class": "surfboard", "count": 1}], "prompt": "a photo of a surfboard"}
9 | {"tag": "single_object", "include": [{"class": "refrigerator", "count": 1}], "prompt": "a photo of a refrigerator"}
10 | {"tag": "single_object", "include": [{"class": "cup", "count": 1}], "prompt": "a photo of a cup"}
11 | {"tag": "single_object", "include": [{"class": "microwave", "count": 1}], "prompt": "a photo of a microwave"}
12 | {"tag": "single_object", "include": [{"class": "potted plant", "count": 1}], "prompt": "a photo of a potted plant"}
13 | {"tag": "single_object", "include": [{"class": "snowboard", "count": 1}], "prompt": "a photo of a snowboard"}
14 | {"tag": "single_object", "include": [{"class": "zebra", "count": 1}], "prompt": "a photo of a zebra"}
15 | {"tag": "single_object", "include": [{"class": "parking meter", "count": 1}], "prompt": "a photo of a parking meter"}
16 | {"tag": "single_object", "include": [{"class": "spoon", "count": 1}], "prompt": "a photo of a spoon"}
17 | {"tag": "single_object", "include": [{"class": "skateboard", "count": 1}], "prompt": "a photo of a skateboard"}
18 | {"tag": "single_object", "include": [{"class": "car", "count": 1}], "prompt": "a photo of a car"}
19 | {"tag": "single_object", "include": [{"class": "motorcycle", "count": 1}], "prompt": "a photo of a motorcycle"}
20 | {"tag": "single_object", "include": [{"class": "traffic light", "count": 1}], "prompt": "a photo of a traffic light"}
21 | {"tag": "single_object", "include": [{"class": "book", "count": 1}], "prompt": "a photo of a book"}
22 | {"tag": "single_object", "include": [{"class": "couch", "count": 1}], "prompt": "a photo of a couch"}
23 | {"tag": "single_object", "include": [{"class": "backpack", "count": 1}], "prompt": "a photo of a backpack"}
24 | {"tag": "single_object", "include": [{"class": "computer keyboard", "count": 1}], "prompt": "a photo of a computer keyboard"}
25 | {"tag": "single_object", "include": [{"class": "toaster", "count": 1}], "prompt": "a photo of a toaster"}
26 | {"tag": "single_object", "include": [{"class": "bird", "count": 1}], "prompt": "a photo of a bird"}
27 | {"tag": "single_object", "include": [{"class": "bowl", "count": 1}], "prompt": "a photo of a bowl"}
28 | {"tag": "single_object", "include": [{"class": "dog", "count": 1}], "prompt": "a photo of a dog"}
29 | {"tag": "single_object", "include": [{"class": "tie", "count": 1}], "prompt": "a photo of a tie"}
30 | {"tag": "single_object", "include": [{"class": "laptop", "count": 1}], "prompt": "a photo of a laptop"}
31 | {"tag": "single_object", "include": [{"class": "computer mouse", "count": 1}], "prompt": "a photo of a computer mouse"}
32 | {"tag": "single_object", "include": [{"class": "sandwich", "count": 1}], "prompt": "a photo of a sandwich"}
33 | {"tag": "single_object", "include": [{"class": "baseball bat", "count": 1}], "prompt": "a photo of a baseball bat"}
34 | {"tag": "single_object", "include": [{"class": "train", "count": 1}], "prompt": "a photo of a train"}
35 | {"tag": "single_object", "include": [{"class": "cell phone", "count": 1}], "prompt": "a photo of a cell phone"}
36 | {"tag": "single_object", "include": [{"class": "chair", "count": 1}], "prompt": "a photo of a chair"}
37 | {"tag": "single_object", "include": [{"class": "tv", "count": 1}], "prompt": "a photo of a tv"}
38 | {"tag": "single_object", "include": [{"class": "broccoli", "count": 1}], "prompt": "a photo of a broccoli"}
39 | {"tag": "single_object", "include": [{"class": "bed", "count": 1}], "prompt": "a photo of a bed"}
40 | {"tag": "single_object", "include": [{"class": "skis", "count": 1}], "prompt": "a photo of a skis"}
41 | {"tag": "single_object", "include": [{"class": "handbag", "count": 1}], "prompt": "a photo of a handbag"}
42 | {"tag": "single_object", "include": [{"class": "pizza", "count": 1}], "prompt": "a photo of a pizza"}
43 | {"tag": "single_object", "include": [{"class": "frisbee", "count": 1}], "prompt": "a photo of a frisbee"}
44 | {"tag": "single_object", "include": [{"class": "scissors", "count": 1}], "prompt": "a photo of a scissors"}
45 | {"tag": "single_object", "include": [{"class": "bottle", "count": 1}], "prompt": "a photo of a bottle"}
46 | {"tag": "single_object", "include": [{"class": "elephant", "count": 1}], "prompt": "a photo of an elephant"}
47 | {"tag": "single_object", "include": [{"class": "toilet", "count": 1}], "prompt": "a photo of a toilet"}
48 | {"tag": "single_object", "include": [{"class": "oven", "count": 1}], "prompt": "a photo of an oven"}
49 | {"tag": "single_object", "include": [{"class": "orange", "count": 1}], "prompt": "a photo of an orange"}
50 | {"tag": "single_object", "include": [{"class": "person", "count": 1}], "prompt": "a photo of a person"}
51 | {"tag": "single_object", "include": [{"class": "teddy bear", "count": 1}], "prompt": "a photo of a teddy bear"}
52 | {"tag": "single_object", "include": [{"class": "vase", "count": 1}], "prompt": "a photo of a vase"}
53 | {"tag": "single_object", "include": [{"class": "banana", "count": 1}], "prompt": "a photo of a banana"}
54 | {"tag": "single_object", "include": [{"class": "toothbrush", "count": 1}], "prompt": "a photo of a toothbrush"}
55 | {"tag": "single_object", "include": [{"class": "tv remote", "count": 1}], "prompt": "a photo of a tv remote"}
56 | {"tag": "single_object", "include": [{"class": "dining table", "count": 1}], "prompt": "a photo of a dining table"}
57 | {"tag": "single_object", "include": [{"class": "stop sign", "count": 1}], "prompt": "a photo of a stop sign"}
58 | {"tag": "single_object", "include": [{"class": "sheep", "count": 1}], "prompt": "a photo of a sheep"}
59 | {"tag": "single_object", "include": [{"class": "fire hydrant", "count": 1}], "prompt": "a photo of a fire hydrant"}
60 | {"tag": "single_object", "include": [{"class": "airplane", "count": 1}], "prompt": "a photo of an airplane"}
61 | {"tag": "single_object", "include": [{"class": "giraffe", "count": 1}], "prompt": "a photo of a giraffe"}
62 | {"tag": "single_object", "include": [{"class": "horse", "count": 1}], "prompt": "a photo of a horse"}
63 | {"tag": "single_object", "include": [{"class": "cat", "count": 1}], "prompt": "a photo of a cat"}
64 | {"tag": "single_object", "include": [{"class": "donut", "count": 1}], "prompt": "a photo of a donut"}
65 | {"tag": "single_object", "include": [{"class": "boat", "count": 1}], "prompt": "a photo of a boat"}
66 | {"tag": "single_object", "include": [{"class": "baseball glove", "count": 1}], "prompt": "a photo of a baseball glove"}
67 | {"tag": "single_object", "include": [{"class": "hair drier", "count": 1}], "prompt": "a photo of a hair drier"}
68 | {"tag": "single_object", "include": [{"class": "sink", "count": 1}], "prompt": "a photo of a sink"}
69 | {"tag": "single_object", "include": [{"class": "cake", "count": 1}], "prompt": "a photo of a cake"}
70 | {"tag": "single_object", "include": [{"class": "wine glass", "count": 1}], "prompt": "a photo of a wine glass"}
71 | {"tag": "single_object", "include": [{"class": "apple", "count": 1}], "prompt": "a photo of an apple"}
72 | {"tag": "single_object", "include": [{"class": "bus", "count": 1}], "prompt": "a photo of a bus"}
73 | {"tag": "single_object", "include": [{"class": "tennis racket", "count": 1}], "prompt": "a photo of a tennis racket"}
74 | {"tag": "single_object", "include": [{"class": "knife", "count": 1}], "prompt": "a photo of a knife"}
75 | {"tag": "single_object", "include": [{"class": "hot dog", "count": 1}], "prompt": "a photo of a hot dog"}
76 | {"tag": "single_object", "include": [{"class": "truck", "count": 1}], "prompt": "a photo of a truck"}
77 | {"tag": "single_object", "include": [{"class": "umbrella", "count": 1}], "prompt": "a photo of an umbrella"}
78 | {"tag": "single_object", "include": [{"class": "sports ball", "count": 1}], "prompt": "a photo of a sports ball"}
79 | {"tag": "single_object", "include": [{"class": "bear", "count": 1}], "prompt": "a photo of a bear"}
80 | {"tag": "single_object", "include": [{"class": "kite", "count": 1}], "prompt": "a photo of a kite"}
81 | {"tag": "two_object", "include": [{"class": "bench", "count": 1}, {"class": "sports ball", "count": 1}], "prompt": "a photo of a bench and a sports ball"}
82 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a toothbrush and a snowboard"}
83 | {"tag": "two_object", "include": [{"class": "toaster", "count": 1}, {"class": "oven", "count": 1}], "prompt": "a photo of a toaster and an oven"}
84 | {"tag": "two_object", "include": [{"class": "broccoli", "count": 1}, {"class": "vase", "count": 1}], "prompt": "a photo of a broccoli and a vase"}
85 | {"tag": "two_object", "include": [{"class": "tennis racket", "count": 1}, {"class": "wine glass", "count": 1}], "prompt": "a photo of a tennis racket and a wine glass"}
86 | {"tag": "two_object", "include": [{"class": "fork", "count": 1}, {"class": "knife", "count": 1}], "prompt": "a photo of a fork and a knife"}
87 | {"tag": "two_object", "include": [{"class": "hair drier", "count": 1}, {"class": "cake", "count": 1}], "prompt": "a photo of a hair drier and a cake"}
88 | {"tag": "two_object", "include": [{"class": "horse", "count": 1}, {"class": "giraffe", "count": 1}], "prompt": "a photo of a horse and a giraffe"}
89 | {"tag": "two_object", "include": [{"class": "horse", "count": 1}, {"class": "computer keyboard", "count": 1}], "prompt": "a photo of a horse and a computer keyboard"}
90 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a toothbrush and a carrot"}
91 | {"tag": "two_object", "include": [{"class": "cake", "count": 1}, {"class": "zebra", "count": 1}], "prompt": "a photo of a cake and a zebra"}
92 | {"tag": "two_object", "include": [{"class": "hair drier", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a hair drier and a bear"}
93 | {"tag": "two_object", "include": [{"class": "knife", "count": 1}, {"class": "zebra", "count": 1}], "prompt": "a photo of a knife and a zebra"}
94 | {"tag": "two_object", "include": [{"class": "couch", "count": 1}, {"class": "wine glass", "count": 1}], "prompt": "a photo of a couch and a wine glass"}
95 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "vase", "count": 1}], "prompt": "a photo of a frisbee and a vase"}
96 | {"tag": "two_object", "include": [{"class": "book", "count": 1}, {"class": "laptop", "count": 1}], "prompt": "a photo of a book and a laptop"}
97 | {"tag": "two_object", "include": [{"class": "dining table", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a dining table and a bear"}
98 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "couch", "count": 1}], "prompt": "a photo of a frisbee and a couch"}
99 | {"tag": "two_object", "include": [{"class": "couch", "count": 1}, {"class": "horse", "count": 1}], "prompt": "a photo of a couch and a horse"}
100 | {"tag": "two_object", "include": [{"class": "toilet", "count": 1}, {"class": "computer mouse", "count": 1}], "prompt": "a photo of a toilet and a computer mouse"}
101 | {"tag": "two_object", "include": [{"class": "bottle", "count": 1}, {"class": "refrigerator", "count": 1}], "prompt": "a photo of a bottle and a refrigerator"}
102 | {"tag": "two_object", "include": [{"class": "potted plant", "count": 1}, {"class": "backpack", "count": 1}], "prompt": "a photo of a potted plant and a backpack"}
103 | {"tag": "two_object", "include": [{"class": "skateboard", "count": 1}, {"class": "cake", "count": 1}], "prompt": "a photo of a skateboard and a cake"}
104 | {"tag": "two_object", "include": [{"class": "broccoli", "count": 1}, {"class": "parking meter", "count": 1}], "prompt": "a photo of a broccoli and a parking meter"}
105 | {"tag": "two_object", "include": [{"class": "zebra", "count": 1}, {"class": "bed", "count": 1}], "prompt": "a photo of a zebra and a bed"}
106 | {"tag": "two_object", "include": [{"class": "oven", "count": 1}, {"class": "bed", "count": 1}], "prompt": "a photo of an oven and a bed"}
107 | {"tag": "two_object", "include": [{"class": "baseball bat", "count": 1}, {"class": "fork", "count": 1}], "prompt": "a photo of a baseball bat and a fork"}
108 | {"tag": "two_object", "include": [{"class": "vase", "count": 1}, {"class": "spoon", "count": 1}], "prompt": "a photo of a vase and a spoon"}
109 | {"tag": "two_object", "include": [{"class": "skateboard", "count": 1}, {"class": "sink", "count": 1}], "prompt": "a photo of a skateboard and a sink"}
110 | {"tag": "two_object", "include": [{"class": "pizza", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a pizza and a bench"}
111 | {"tag": "two_object", "include": [{"class": "bowl", "count": 1}, {"class": "pizza", "count": 1}], "prompt": "a photo of a bowl and a pizza"}
112 | {"tag": "two_object", "include": [{"class": "tennis racket", "count": 1}, {"class": "bird", "count": 1}], "prompt": "a photo of a tennis racket and a bird"}
113 | {"tag": "two_object", "include": [{"class": "wine glass", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a wine glass and a bear"}
114 | {"tag": "two_object", "include": [{"class": "fork", "count": 1}, {"class": "book", "count": 1}], "prompt": "a photo of a fork and a book"}
115 | {"tag": "two_object", "include": [{"class": "scissors", "count": 1}, {"class": "bowl", "count": 1}], "prompt": "a photo of a scissors and a bowl"}
116 | {"tag": "two_object", "include": [{"class": "laptop", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a laptop and a carrot"}
117 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "bottle", "count": 1}], "prompt": "a photo of a stop sign and a bottle"}
118 | {"tag": "two_object", "include": [{"class": "microwave", "count": 1}, {"class": "truck", "count": 1}], "prompt": "a photo of a microwave and a truck"}
119 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a person and a bear"}
120 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "cell phone", "count": 1}], "prompt": "a photo of a frisbee and a cell phone"}
121 | {"tag": "two_object", "include": [{"class": "parking meter", "count": 1}, {"class": "teddy bear", "count": 1}], "prompt": "a photo of a parking meter and a teddy bear"}
122 | {"tag": "two_object", "include": [{"class": "tennis racket", "count": 1}, {"class": "bicycle", "count": 1}], "prompt": "a photo of a tennis racket and a bicycle"}
123 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "motorcycle", "count": 1}], "prompt": "a photo of a stop sign and a motorcycle"}
124 | {"tag": "two_object", "include": [{"class": "fire hydrant", "count": 1}, {"class": "tennis racket", "count": 1}], "prompt": "a photo of a fire hydrant and a tennis racket"}
125 | {"tag": "two_object", "include": [{"class": "scissors", "count": 1}, {"class": "sandwich", "count": 1}], "prompt": "a photo of a scissors and a sandwich"}
126 | {"tag": "two_object", "include": [{"class": "pizza", "count": 1}, {"class": "book", "count": 1}], "prompt": "a photo of a pizza and a book"}
127 | {"tag": "two_object", "include": [{"class": "giraffe", "count": 1}, {"class": "computer mouse", "count": 1}], "prompt": "a photo of a giraffe and a computer mouse"}
128 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "toaster", "count": 1}], "prompt": "a photo of a stop sign and a toaster"}
129 | {"tag": "two_object", "include": [{"class": "computer mouse", "count": 1}, {"class": "zebra", "count": 1}], "prompt": "a photo of a computer mouse and a zebra"}
130 | {"tag": "two_object", "include": [{"class": "chair", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a chair and a bench"}
131 | {"tag": "two_object", "include": [{"class": "tv", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a tv and a carrot"}
132 | {"tag": "two_object", "include": [{"class": "surfboard", "count": 1}, {"class": "suitcase", "count": 1}], "prompt": "a photo of a surfboard and a suitcase"}
133 | {"tag": "two_object", "include": [{"class": "computer keyboard", "count": 1}, {"class": "laptop", "count": 1}], "prompt": "a photo of a computer keyboard and a laptop"}
134 | {"tag": "two_object", "include": [{"class": "computer keyboard", "count": 1}, {"class": "microwave", "count": 1}], "prompt": "a photo of a computer keyboard and a microwave"}
135 | {"tag": "two_object", "include": [{"class": "scissors", "count": 1}, {"class": "bird", "count": 1}], "prompt": "a photo of a scissors and a bird"}
136 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a person and a snowboard"}
137 | {"tag": "two_object", "include": [{"class": "cow", "count": 1}, {"class": "horse", "count": 1}], "prompt": "a photo of a cow and a horse"}
138 | {"tag": "two_object", "include": [{"class": "handbag", "count": 1}, {"class": "refrigerator", "count": 1}], "prompt": "a photo of a handbag and a refrigerator"}
139 | {"tag": "two_object", "include": [{"class": "chair", "count": 1}, {"class": "laptop", "count": 1}], "prompt": "a photo of a chair and a laptop"}
140 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a toothbrush and a bench"}
141 | {"tag": "two_object", "include": [{"class": "book", "count": 1}, {"class": "baseball bat", "count": 1}], "prompt": "a photo of a book and a baseball bat"}
142 | {"tag": "two_object", "include": [{"class": "horse", "count": 1}, {"class": "train", "count": 1}], "prompt": "a photo of a horse and a train"}
143 | {"tag": "two_object", "include": [{"class": "bench", "count": 1}, {"class": "vase", "count": 1}], "prompt": "a photo of a bench and a vase"}
144 | {"tag": "two_object", "include": [{"class": "traffic light", "count": 1}, {"class": "backpack", "count": 1}], "prompt": "a photo of a traffic light and a backpack"}
145 | {"tag": "two_object", "include": [{"class": "sports ball", "count": 1}, {"class": "cow", "count": 1}], "prompt": "a photo of a sports ball and a cow"}
146 | {"tag": "two_object", "include": [{"class": "computer mouse", "count": 1}, {"class": "spoon", "count": 1}], "prompt": "a photo of a computer mouse and a spoon"}
147 | {"tag": "two_object", "include": [{"class": "tv", "count": 1}, {"class": "bicycle", "count": 1}], "prompt": "a photo of a tv and a bicycle"}
148 | {"tag": "two_object", "include": [{"class": "bench", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a bench and a snowboard"}
149 | {"tag": "two_object", "include": [{"class": "toothbrush", "count": 1}, {"class": "toilet", "count": 1}], "prompt": "a photo of a toothbrush and a toilet"}
150 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "apple", "count": 1}], "prompt": "a photo of a person and an apple"}
151 | {"tag": "two_object", "include": [{"class": "sink", "count": 1}, {"class": "sports ball", "count": 1}], "prompt": "a photo of a sink and a sports ball"}
152 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "dog", "count": 1}], "prompt": "a photo of a stop sign and a dog"}
153 | {"tag": "two_object", "include": [{"class": "knife", "count": 1}, {"class": "stop sign", "count": 1}], "prompt": "a photo of a knife and a stop sign"}
154 | {"tag": "two_object", "include": [{"class": "wine glass", "count": 1}, {"class": "handbag", "count": 1}], "prompt": "a photo of a wine glass and a handbag"}
155 | {"tag": "two_object", "include": [{"class": "bowl", "count": 1}, {"class": "skis", "count": 1}], "prompt": "a photo of a bowl and a skis"}
156 | {"tag": "two_object", "include": [{"class": "frisbee", "count": 1}, {"class": "apple", "count": 1}], "prompt": "a photo of a frisbee and an apple"}
157 | {"tag": "two_object", "include": [{"class": "computer keyboard", "count": 1}, {"class": "cell phone", "count": 1}], "prompt": "a photo of a computer keyboard and a cell phone"}
158 | {"tag": "two_object", "include": [{"class": "stop sign", "count": 1}, {"class": "fork", "count": 1}], "prompt": "a photo of a stop sign and a fork"}
159 | {"tag": "two_object", "include": [{"class": "potted plant", "count": 1}, {"class": "boat", "count": 1}], "prompt": "a photo of a potted plant and a boat"}
160 | {"tag": "two_object", "include": [{"class": "tv", "count": 1}, {"class": "cell phone", "count": 1}], "prompt": "a photo of a tv and a cell phone"}
161 | {"tag": "two_object", "include": [{"class": "tie", "count": 1}, {"class": "broccoli", "count": 1}], "prompt": "a photo of a tie and a broccoli"}
162 | {"tag": "two_object", "include": [{"class": "potted plant", "count": 1}, {"class": "donut", "count": 1}], "prompt": "a photo of a potted plant and a donut"}
163 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "sink", "count": 1}], "prompt": "a photo of a person and a sink"}
164 | {"tag": "two_object", "include": [{"class": "couch", "count": 1}, {"class": "snowboard", "count": 1}], "prompt": "a photo of a couch and a snowboard"}
165 | {"tag": "two_object", "include": [{"class": "fork", "count": 1}, {"class": "baseball glove", "count": 1}], "prompt": "a photo of a fork and a baseball glove"}
166 | {"tag": "two_object", "include": [{"class": "apple", "count": 1}, {"class": "toothbrush", "count": 1}], "prompt": "a photo of an apple and a toothbrush"}
167 | {"tag": "two_object", "include": [{"class": "bus", "count": 1}, {"class": "baseball glove", "count": 1}], "prompt": "a photo of a bus and a baseball glove"}
168 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "stop sign", "count": 1}], "prompt": "a photo of a person and a stop sign"}
169 | {"tag": "two_object", "include": [{"class": "carrot", "count": 1}, {"class": "couch", "count": 1}], "prompt": "a photo of a carrot and a couch"}
170 | {"tag": "two_object", "include": [{"class": "baseball bat", "count": 1}, {"class": "bear", "count": 1}], "prompt": "a photo of a baseball bat and a bear"}
171 | {"tag": "two_object", "include": [{"class": "fire hydrant", "count": 1}, {"class": "train", "count": 1}], "prompt": "a photo of a fire hydrant and a train"}
172 | {"tag": "two_object", "include": [{"class": "baseball glove", "count": 1}, {"class": "carrot", "count": 1}], "prompt": "a photo of a baseball glove and a carrot"}
173 | {"tag": "two_object", "include": [{"class": "microwave", "count": 1}, {"class": "bench", "count": 1}], "prompt": "a photo of a microwave and a bench"}
174 | {"tag": "two_object", "include": [{"class": "cake", "count": 1}, {"class": "stop sign", "count": 1}], "prompt": "a photo of a cake and a stop sign"}
175 | {"tag": "two_object", "include": [{"class": "car", "count": 1}, {"class": "computer mouse", "count": 1}], "prompt": "a photo of a car and a computer mouse"}
176 | {"tag": "two_object", "include": [{"class": "suitcase", "count": 1}, {"class": "dining table", "count": 1}], "prompt": "a photo of a suitcase and a dining table"}
177 | {"tag": "two_object", "include": [{"class": "person", "count": 1}, {"class": "traffic light", "count": 1}], "prompt": "a photo of a person and a traffic light"}
178 | {"tag": "two_object", "include": [{"class": "cell phone", "count": 1}, {"class": "horse", "count": 1}], "prompt": "a photo of a cell phone and a horse"}
179 | {"tag": "two_object", "include": [{"class": "baseball bat", "count": 1}, {"class": "giraffe", "count": 1}], "prompt": "a photo of a baseball bat and a giraffe"}
180 | {"tag": "counting", "include": [{"class": "clock", "count": 2}], "exclude": [{"class": "clock", "count": 3}], "prompt": "a photo of two clocks"}
181 | {"tag": "counting", "include": [{"class": "backpack", "count": 2}], "exclude": [{"class": "backpack", "count": 3}], "prompt": "a photo of two backpacks"}
182 | {"tag": "counting", "include": [{"class": "handbag", "count": 4}], "exclude": [{"class": "handbag", "count": 5}], "prompt": "a photo of four handbags"}
183 | {"tag": "counting", "include": [{"class": "frisbee", "count": 2}], "exclude": [{"class": "frisbee", "count": 3}], "prompt": "a photo of two frisbees"}
184 | {"tag": "counting", "include": [{"class": "sports ball", "count": 3}], "exclude": [{"class": "sports ball", "count": 4}], "prompt": "a photo of three sports balls"}
185 | {"tag": "counting", "include": [{"class": "bear", "count": 2}], "exclude": [{"class": "bear", "count": 3}], "prompt": "a photo of two bears"}
186 | {"tag": "counting", "include": [{"class": "tie", "count": 2}], "exclude": [{"class": "tie", "count": 3}], "prompt": "a photo of two ties"}
187 | {"tag": "counting", "include": [{"class": "sink", "count": 4}], "exclude": [{"class": "sink", "count": 5}], "prompt": "a photo of four sinks"}
188 | {"tag": "counting", "include": [{"class": "toothbrush", "count": 2}], "exclude": [{"class": "toothbrush", "count": 3}], "prompt": "a photo of two toothbrushs"}
189 | {"tag": "counting", "include": [{"class": "person", "count": 3}], "exclude": [{"class": "person", "count": 4}], "prompt": "a photo of three persons"}
190 | {"tag": "counting", "include": [{"class": "tennis racket", "count": 3}], "exclude": [{"class": "tennis racket", "count": 4}], "prompt": "a photo of three tennis rackets"}
191 | {"tag": "counting", "include": [{"class": "bowl", "count": 4}], "exclude": [{"class": "bowl", "count": 5}], "prompt": "a photo of four bowls"}
192 | {"tag": "counting", "include": [{"class": "vase", "count": 4}], "exclude": [{"class": "vase", "count": 5}], "prompt": "a photo of four vases"}
193 | {"tag": "counting", "include": [{"class": "cup", "count": 3}], "exclude": [{"class": "cup", "count": 4}], "prompt": "a photo of three cups"}
194 | {"tag": "counting", "include": [{"class": "computer keyboard", "count": 4}], "exclude": [{"class": "computer keyboard", "count": 5}], "prompt": "a photo of four computer keyboards"}
195 | {"tag": "counting", "include": [{"class": "sink", "count": 3}], "exclude": [{"class": "sink", "count": 4}], "prompt": "a photo of three sinks"}
196 | {"tag": "counting", "include": [{"class": "oven", "count": 2}], "exclude": [{"class": "oven", "count": 3}], "prompt": "a photo of two ovens"}
197 | {"tag": "counting", "include": [{"class": "toilet", "count": 2}], "exclude": [{"class": "toilet", "count": 3}], "prompt": "a photo of two toilets"}
198 | {"tag": "counting", "include": [{"class": "bicycle", "count": 2}], "exclude": [{"class": "bicycle", "count": 3}], "prompt": "a photo of two bicycles"}
199 | {"tag": "counting", "include": [{"class": "train", "count": 2}], "exclude": [{"class": "train", "count": 3}], "prompt": "a photo of two trains"}
200 | {"tag": "counting", "include": [{"class": "orange", "count": 3}], "exclude": [{"class": "orange", "count": 4}], "prompt": "a photo of three oranges"}
201 | {"tag": "counting", "include": [{"class": "bus", "count": 3}], "exclude": [{"class": "bus", "count": 4}], "prompt": "a photo of three buses"}
202 | {"tag": "counting", "include": [{"class": "handbag", "count": 3}], "exclude": [{"class": "handbag", "count": 4}], "prompt": "a photo of three handbags"}
203 | {"tag": "counting", "include": [{"class": "snowboard", "count": 3}], "exclude": [{"class": "snowboard", "count": 4}], "prompt": "a photo of three snowboards"}
204 | {"tag": "counting", "include": [{"class": "snowboard", "count": 2}], "exclude": [{"class": "snowboard", "count": 3}], "prompt": "a photo of two snowboards"}
205 | {"tag": "counting", "include": [{"class": "dog", "count": 4}], "exclude": [{"class": "dog", "count": 5}], "prompt": "a photo of four dogs"}
206 | {"tag": "counting", "include": [{"class": "apple", "count": 3}], "exclude": [{"class": "apple", "count": 4}], "prompt": "a photo of three apples"}
207 | {"tag": "counting", "include": [{"class": "sheep", "count": 2}], "exclude": [{"class": "sheep", "count": 3}], "prompt": "a photo of two sheeps"}
208 | {"tag": "counting", "include": [{"class": "hot dog", "count": 3}], "exclude": [{"class": "hot dog", "count": 4}], "prompt": "a photo of three hot dogs"}
209 | {"tag": "counting", "include": [{"class": "zebra", "count": 3}], "exclude": [{"class": "zebra", "count": 4}], "prompt": "a photo of three zebras"}
210 | {"tag": "counting", "include": [{"class": "kite", "count": 3}], "exclude": [{"class": "kite", "count": 4}], "prompt": "a photo of three kites"}
211 | {"tag": "counting", "include": [{"class": "apple", "count": 4}], "exclude": [{"class": "apple", "count": 5}], "prompt": "a photo of four apples"}
212 | {"tag": "counting", "include": [{"class": "cell phone", "count": 3}], "exclude": [{"class": "cell phone", "count": 4}], "prompt": "a photo of three cell phones"}
213 | {"tag": "counting", "include": [{"class": "baseball glove", "count": 4}], "exclude": [{"class": "baseball glove", "count": 5}], "prompt": "a photo of four baseball gloves"}
214 | {"tag": "counting", "include": [{"class": "computer keyboard", "count": 3}], "exclude": [{"class": "computer keyboard", "count": 4}], "prompt": "a photo of three computer keyboards"}
215 | {"tag": "counting", "include": [{"class": "bed", "count": 2}], "exclude": [{"class": "bed", "count": 3}], "prompt": "a photo of two beds"}
216 | {"tag": "counting", "include": [{"class": "tv remote", "count": 2}], "exclude": [{"class": "tv remote", "count": 3}], "prompt": "a photo of two tv remotes"}
217 | {"tag": "counting", "include": [{"class": "fire hydrant", "count": 3}], "exclude": [{"class": "fire hydrant", "count": 4}], "prompt": "a photo of three fire hydrants"}
218 | {"tag": "counting", "include": [{"class": "book", "count": 3}], "exclude": [{"class": "book", "count": 4}], "prompt": "a photo of three books"}
219 | {"tag": "counting", "include": [{"class": "giraffe", "count": 4}], "exclude": [{"class": "giraffe", "count": 5}], "prompt": "a photo of four giraffes"}
220 | {"tag": "counting", "include": [{"class": "vase", "count": 2}], "exclude": [{"class": "vase", "count": 3}], "prompt": "a photo of two vases"}
221 | {"tag": "counting", "include": [{"class": "donut", "count": 4}], "exclude": [{"class": "donut", "count": 5}], "prompt": "a photo of four donuts"}
222 | {"tag": "counting", "include": [{"class": "chair", "count": 4}], "exclude": [{"class": "chair", "count": 5}], "prompt": "a photo of four chairs"}
223 | {"tag": "counting", "include": [{"class": "baseball bat", "count": 3}], "exclude": [{"class": "baseball bat", "count": 4}], "prompt": "a photo of three baseball bats"}
224 | {"tag": "counting", "include": [{"class": "stop sign", "count": 4}], "exclude": [{"class": "stop sign", "count": 5}], "prompt": "a photo of four stop signs"}
225 | {"tag": "counting", "include": [{"class": "pizza", "count": 2}], "exclude": [{"class": "pizza", "count": 3}], "prompt": "a photo of two pizzas"}
226 | {"tag": "counting", "include": [{"class": "refrigerator", "count": 3}], "exclude": [{"class": "refrigerator", "count": 4}], "prompt": "a photo of three refrigerators"}
227 | {"tag": "counting", "include": [{"class": "fire hydrant", "count": 2}], "exclude": [{"class": "fire hydrant", "count": 3}], "prompt": "a photo of two fire hydrants"}
228 | {"tag": "counting", "include": [{"class": "giraffe", "count": 3}], "exclude": [{"class": "giraffe", "count": 4}], "prompt": "a photo of three giraffes"}
229 | {"tag": "counting", "include": [{"class": "tv", "count": 4}], "exclude": [{"class": "tv", "count": 5}], "prompt": "a photo of four tvs"}
230 | {"tag": "counting", "include": [{"class": "wine glass", "count": 3}], "exclude": [{"class": "wine glass", "count": 4}], "prompt": "a photo of three wine glasses"}
231 | {"tag": "counting", "include": [{"class": "broccoli", "count": 4}], "exclude": [{"class": "broccoli", "count": 5}], "prompt": "a photo of four broccolis"}
232 | {"tag": "counting", "include": [{"class": "truck", "count": 3}], "exclude": [{"class": "truck", "count": 4}], "prompt": "a photo of three trucks"}
233 | {"tag": "counting", "include": [{"class": "truck", "count": 2}], "exclude": [{"class": "truck", "count": 3}], "prompt": "a photo of two trucks"}
234 | {"tag": "counting", "include": [{"class": "carrot", "count": 2}], "exclude": [{"class": "carrot", "count": 3}], "prompt": "a photo of two carrots"}
235 | {"tag": "counting", "include": [{"class": "sandwich", "count": 2}], "exclude": [{"class": "sandwich", "count": 3}], "prompt": "a photo of two sandwichs"}
236 | {"tag": "counting", "include": [{"class": "traffic light", "count": 4}], "exclude": [{"class": "traffic light", "count": 5}], "prompt": "a photo of four traffic lights"}
237 | {"tag": "counting", "include": [{"class": "clock", "count": 4}], "exclude": [{"class": "clock", "count": 5}], "prompt": "a photo of four clocks"}
238 | {"tag": "counting", "include": [{"class": "car", "count": 2}], "exclude": [{"class": "car", "count": 3}], "prompt": "a photo of two cars"}
239 | {"tag": "counting", "include": [{"class": "banana", "count": 2}], "exclude": [{"class": "banana", "count": 3}], "prompt": "a photo of two bananas"}
240 | {"tag": "counting", "include": [{"class": "wine glass", "count": 2}], "exclude": [{"class": "wine glass", "count": 3}], "prompt": "a photo of two wine glasses"}
241 | {"tag": "counting", "include": [{"class": "pizza", "count": 3}], "exclude": [{"class": "pizza", "count": 4}], "prompt": "a photo of three pizzas"}
242 | {"tag": "counting", "include": [{"class": "knife", "count": 4}], "exclude": [{"class": "knife", "count": 5}], "prompt": "a photo of four knifes"}
243 | {"tag": "counting", "include": [{"class": "suitcase", "count": 3}], "exclude": [{"class": "suitcase", "count": 4}], "prompt": "a photo of three suitcases"}
244 | {"tag": "counting", "include": [{"class": "zebra", "count": 4}], "exclude": [{"class": "zebra", "count": 5}], "prompt": "a photo of four zebras"}
245 | {"tag": "counting", "include": [{"class": "teddy bear", "count": 2}], "exclude": [{"class": "teddy bear", "count": 3}], "prompt": "a photo of two teddy bears"}
246 | {"tag": "counting", "include": [{"class": "skateboard", "count": 4}], "exclude": [{"class": "skateboard", "count": 5}], "prompt": "a photo of four skateboards"}
247 | {"tag": "counting", "include": [{"class": "hot dog", "count": 4}], "exclude": [{"class": "hot dog", "count": 5}], "prompt": "a photo of four hot dogs"}
248 | {"tag": "counting", "include": [{"class": "bird", "count": 3}], "exclude": [{"class": "bird", "count": 4}], "prompt": "a photo of three birds"}
249 | {"tag": "counting", "include": [{"class": "boat", "count": 4}], "exclude": [{"class": "boat", "count": 5}], "prompt": "a photo of four boats"}
250 | {"tag": "counting", "include": [{"class": "microwave", "count": 4}], "exclude": [{"class": "microwave", "count": 5}], "prompt": "a photo of four microwaves"}
251 | {"tag": "counting", "include": [{"class": "hair drier", "count": 2}], "exclude": [{"class": "hair drier", "count": 3}], "prompt": "a photo of two hair driers"}
252 | {"tag": "counting", "include": [{"class": "laptop", "count": 3}], "exclude": [{"class": "laptop", "count": 4}], "prompt": "a photo of three laptops"}
253 | {"tag": "counting", "include": [{"class": "cow", "count": 3}], "exclude": [{"class": "cow", "count": 4}], "prompt": "a photo of three cows"}
254 | {"tag": "counting", "include": [{"class": "parking meter", "count": 2}], "exclude": [{"class": "parking meter", "count": 3}], "prompt": "a photo of two parking meters"}
255 | {"tag": "counting", "include": [{"class": "bench", "count": 4}], "exclude": [{"class": "bench", "count": 5}], "prompt": "a photo of four benchs"}
256 | {"tag": "counting", "include": [{"class": "bench", "count": 3}], "exclude": [{"class": "bench", "count": 4}], "prompt": "a photo of three benchs"}
257 | {"tag": "counting", "include": [{"class": "frisbee", "count": 4}], "exclude": [{"class": "frisbee", "count": 5}], "prompt": "a photo of four frisbees"}
258 | {"tag": "counting", "include": [{"class": "book", "count": 4}], "exclude": [{"class": "book", "count": 5}], "prompt": "a photo of four books"}
259 | {"tag": "counting", "include": [{"class": "bus", "count": 4}], "exclude": [{"class": "bus", "count": 5}], "prompt": "a photo of four buses"}
260 | {"tag": "colors", "include": [{"class": "fire hydrant", "count": 1, "color": "blue"}], "prompt": "a photo of a blue fire hydrant"}
261 | {"tag": "colors", "include": [{"class": "car", "count": 1, "color": "pink"}], "prompt": "a photo of a pink car"}
262 | {"tag": "colors", "include": [{"class": "cup", "count": 1, "color": "purple"}], "prompt": "a photo of a purple cup"}
263 | {"tag": "colors", "include": [{"class": "cow", "count": 1, "color": "blue"}], "prompt": "a photo of a blue cow"}
264 | {"tag": "colors", "include": [{"class": "boat", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow boat"}
265 | {"tag": "colors", "include": [{"class": "umbrella", "count": 1, "color": "blue"}], "prompt": "a photo of a blue umbrella"}
266 | {"tag": "colors", "include": [{"class": "elephant", "count": 1, "color": "blue"}], "prompt": "a photo of a blue elephant"}
267 | {"tag": "colors", "include": [{"class": "elephant", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow elephant"}
268 | {"tag": "colors", "include": [{"class": "bicycle", "count": 1, "color": "red"}], "prompt": "a photo of a red bicycle"}
269 | {"tag": "colors", "include": [{"class": "suitcase", "count": 1, "color": "purple"}], "prompt": "a photo of a purple suitcase"}
270 | {"tag": "colors", "include": [{"class": "hair drier", "count": 1, "color": "purple"}], "prompt": "a photo of a purple hair drier"}
271 | {"tag": "colors", "include": [{"class": "sandwich", "count": 1, "color": "white"}], "prompt": "a photo of a white sandwich"}
272 | {"tag": "colors", "include": [{"class": "elephant", "count": 1, "color": "purple"}], "prompt": "a photo of a purple elephant"}
273 | {"tag": "colors", "include": [{"class": "microwave", "count": 1, "color": "green"}], "prompt": "a photo of a green microwave"}
274 | {"tag": "colors", "include": [{"class": "zebra", "count": 1, "color": "red"}], "prompt": "a photo of a red zebra"}
275 | {"tag": "colors", "include": [{"class": "apple", "count": 1, "color": "red"}], "prompt": "a photo of a red apple"}
276 | {"tag": "colors", "include": [{"class": "tv remote", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow tv remote"}
277 | {"tag": "colors", "include": [{"class": "toilet", "count": 1, "color": "blue"}], "prompt": "a photo of a blue toilet"}
278 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "orange"}], "prompt": "a photo of an orange orange"}
279 | {"tag": "colors", "include": [{"class": "donut", "count": 1, "color": "black"}], "prompt": "a photo of a black donut"}
280 | {"tag": "colors", "include": [{"class": "vase", "count": 1, "color": "red"}], "prompt": "a photo of a red vase"}
281 | {"tag": "colors", "include": [{"class": "pizza", "count": 1, "color": "purple"}], "prompt": "a photo of a purple pizza"}
282 | {"tag": "colors", "include": [{"class": "skateboard", "count": 1, "color": "pink"}], "prompt": "a photo of a pink skateboard"}
283 | {"tag": "colors", "include": [{"class": "skateboard", "count": 1, "color": "green"}], "prompt": "a photo of a green skateboard"}
284 | {"tag": "colors", "include": [{"class": "bear", "count": 1, "color": "purple"}], "prompt": "a photo of a purple bear"}
285 | {"tag": "colors", "include": [{"class": "chair", "count": 1, "color": "brown"}], "prompt": "a photo of a brown chair"}
286 | {"tag": "colors", "include": [{"class": "computer keyboard", "count": 1, "color": "brown"}], "prompt": "a photo of a brown computer keyboard"}
287 | {"tag": "colors", "include": [{"class": "cow", "count": 1, "color": "orange"}], "prompt": "a photo of an orange cow"}
288 | {"tag": "colors", "include": [{"class": "skis", "count": 1, "color": "brown"}], "prompt": "a photo of a brown skis"}
289 | {"tag": "colors", "include": [{"class": "kite", "count": 1, "color": "white"}], "prompt": "a photo of a white kite"}
290 | {"tag": "colors", "include": [{"class": "dog", "count": 1, "color": "red"}], "prompt": "a photo of a red dog"}
291 | {"tag": "colors", "include": [{"class": "couch", "count": 1, "color": "green"}], "prompt": "a photo of a green couch"}
292 | {"tag": "colors", "include": [{"class": "airplane", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow airplane"}
293 | {"tag": "colors", "include": [{"class": "tv", "count": 1, "color": "orange"}], "prompt": "a photo of an orange tv"}
294 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "white"}], "prompt": "a photo of a white scissors"}
295 | {"tag": "colors", "include": [{"class": "cell phone", "count": 1, "color": "pink"}], "prompt": "a photo of a pink cell phone"}
296 | {"tag": "colors", "include": [{"class": "surfboard", "count": 1, "color": "green"}], "prompt": "a photo of a green surfboard"}
297 | {"tag": "colors", "include": [{"class": "fire hydrant", "count": 1, "color": "white"}], "prompt": "a photo of a white fire hydrant"}
298 | {"tag": "colors", "include": [{"class": "bicycle", "count": 1, "color": "black"}], "prompt": "a photo of a black bicycle"}
299 | {"tag": "colors", "include": [{"class": "carrot", "count": 1, "color": "purple"}], "prompt": "a photo of a purple carrot"}
300 | {"tag": "colors", "include": [{"class": "dining table", "count": 1, "color": "black"}], "prompt": "a photo of a black dining table"}
301 | {"tag": "colors", "include": [{"class": "potted plant", "count": 1, "color": "purple"}], "prompt": "a photo of a purple potted plant"}
302 | {"tag": "colors", "include": [{"class": "backpack", "count": 1, "color": "purple"}], "prompt": "a photo of a purple backpack"}
303 | {"tag": "colors", "include": [{"class": "train", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow train"}
304 | {"tag": "colors", "include": [{"class": "potted plant", "count": 1, "color": "pink"}], "prompt": "a photo of a pink potted plant"}
305 | {"tag": "colors", "include": [{"class": "giraffe", "count": 1, "color": "red"}], "prompt": "a photo of a red giraffe"}
306 | {"tag": "colors", "include": [{"class": "bear", "count": 1, "color": "brown"}], "prompt": "a photo of a brown bear"}
307 | {"tag": "colors", "include": [{"class": "train", "count": 1, "color": "black"}], "prompt": "a photo of a black train"}
308 | {"tag": "colors", "include": [{"class": "laptop", "count": 1, "color": "orange"}], "prompt": "a photo of an orange laptop"}
309 | {"tag": "colors", "include": [{"class": "hot dog", "count": 1, "color": "green"}], "prompt": "a photo of a green hot dog"}
310 | {"tag": "colors", "include": [{"class": "parking meter", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow parking meter"}
311 | {"tag": "colors", "include": [{"class": "potted plant", "count": 1, "color": "red"}], "prompt": "a photo of a red potted plant"}
312 | {"tag": "colors", "include": [{"class": "traffic light", "count": 1, "color": "green"}], "prompt": "a photo of a green traffic light"}
313 | {"tag": "colors", "include": [{"class": "tv", "count": 1, "color": "blue"}], "prompt": "a photo of a blue tv"}
314 | {"tag": "colors", "include": [{"class": "refrigerator", "count": 1, "color": "brown"}], "prompt": "a photo of a brown refrigerator"}
315 | {"tag": "colors", "include": [{"class": "tv remote", "count": 1, "color": "black"}], "prompt": "a photo of a black tv remote"}
316 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "purple"}], "prompt": "a photo of a purple scissors"}
317 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow orange"}
318 | {"tag": "colors", "include": [{"class": "toaster", "count": 1, "color": "brown"}], "prompt": "a photo of a brown toaster"}
319 | {"tag": "colors", "include": [{"class": "parking meter", "count": 1, "color": "red"}], "prompt": "a photo of a red parking meter"}
320 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "brown"}], "prompt": "a photo of a brown orange"}
321 | {"tag": "colors", "include": [{"class": "clock", "count": 1, "color": "green"}], "prompt": "a photo of a green clock"}
322 | {"tag": "colors", "include": [{"class": "sheep", "count": 1, "color": "white"}], "prompt": "a photo of a white sheep"}
323 | {"tag": "colors", "include": [{"class": "oven", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow oven"}
324 | {"tag": "colors", "include": [{"class": "vase", "count": 1, "color": "green"}], "prompt": "a photo of a green vase"}
325 | {"tag": "colors", "include": [{"class": "teddy bear", "count": 1, "color": "black"}], "prompt": "a photo of a black teddy bear"}
326 | {"tag": "colors", "include": [{"class": "carrot", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow carrot"}
327 | {"tag": "colors", "include": [{"class": "hot dog", "count": 1, "color": "black"}], "prompt": "a photo of a black hot dog"}
328 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "red"}], "prompt": "a photo of a red scissors"}
329 | {"tag": "colors", "include": [{"class": "teddy bear", "count": 1, "color": "white"}], "prompt": "a photo of a white teddy bear"}
330 | {"tag": "colors", "include": [{"class": "skis", "count": 1, "color": "black"}], "prompt": "a photo of a black skis"}
331 | {"tag": "colors", "include": [{"class": "dining table", "count": 1, "color": "blue"}], "prompt": "a photo of a blue dining table"}
332 | {"tag": "colors", "include": [{"class": "refrigerator", "count": 1, "color": "black"}], "prompt": "a photo of a black refrigerator"}
333 | {"tag": "colors", "include": [{"class": "dog", "count": 1, "color": "white"}], "prompt": "a photo of a white dog"}
334 | {"tag": "colors", "include": [{"class": "scissors", "count": 1, "color": "orange"}], "prompt": "a photo of an orange scissors"}
335 | {"tag": "colors", "include": [{"class": "cell phone", "count": 1, "color": "red"}], "prompt": "a photo of a red cell phone"}
336 | {"tag": "colors", "include": [{"class": "orange", "count": 1, "color": "white"}], "prompt": "a photo of a white orange"}
337 | {"tag": "colors", "include": [{"class": "clock", "count": 1, "color": "blue"}], "prompt": "a photo of a blue clock"}
338 | {"tag": "colors", "include": [{"class": "carrot", "count": 1, "color": "blue"}], "prompt": "a photo of a blue carrot"}
339 | {"tag": "colors", "include": [{"class": "motorcycle", "count": 1, "color": "green"}], "prompt": "a photo of a green motorcycle"}
340 | {"tag": "colors", "include": [{"class": "stop sign", "count": 1, "color": "pink"}], "prompt": "a photo of a pink stop sign"}
341 | {"tag": "colors", "include": [{"class": "vase", "count": 1, "color": "black"}], "prompt": "a photo of a black vase"}
342 | {"tag": "colors", "include": [{"class": "backpack", "count": 1, "color": "black"}], "prompt": "a photo of a black backpack"}
343 | {"tag": "colors", "include": [{"class": "car", "count": 1, "color": "red"}], "prompt": "a photo of a red car"}
344 | {"tag": "colors", "include": [{"class": "computer mouse", "count": 1, "color": "green"}], "prompt": "a photo of a green computer mouse"}
345 | {"tag": "colors", "include": [{"class": "backpack", "count": 1, "color": "red"}], "prompt": "a photo of a red backpack"}
346 | {"tag": "colors", "include": [{"class": "bus", "count": 1, "color": "green"}], "prompt": "a photo of a green bus"}
347 | {"tag": "colors", "include": [{"class": "toaster", "count": 1, "color": "orange"}], "prompt": "a photo of an orange toaster"}
348 | {"tag": "colors", "include": [{"class": "fork", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow fork"}
349 | {"tag": "colors", "include": [{"class": "parking meter", "count": 1, "color": "pink"}], "prompt": "a photo of a pink parking meter"}
350 | {"tag": "colors", "include": [{"class": "book", "count": 1, "color": "blue"}], "prompt": "a photo of a blue book"}
351 | {"tag": "colors", "include": [{"class": "broccoli", "count": 1, "color": "yellow"}], "prompt": "a photo of a yellow broccoli"}
352 | {"tag": "colors", "include": [{"class": "computer mouse", "count": 1, "color": "orange"}], "prompt": "a photo of an orange computer mouse"}
353 | {"tag": "colors", "include": [{"class": "cake", "count": 1, "color": "red"}], "prompt": "a photo of a red cake"}
354 | {"tag": "position", "include": [{"class": "teddy bear", "count": 1}, {"class": "dog", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a dog right of a teddy bear"}
355 | {"tag": "position", "include": [{"class": "kite", "count": 1}, {"class": "wine glass", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a wine glass above a kite"}
356 | {"tag": "position", "include": [{"class": "cup", "count": 1}, {"class": "couch", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a couch below a cup"}
357 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "laptop", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a laptop left of a cow"}
358 | {"tag": "position", "include": [{"class": "hair drier", "count": 1}, {"class": "fork", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a fork above a hair drier"}
359 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "tie", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a tie right of a baseball bat"}
360 | {"tag": "position", "include": [{"class": "fork", "count": 1}, {"class": "stop sign", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a stop sign above a fork"}
361 | {"tag": "position", "include": [{"class": "skateboard", "count": 1}, {"class": "bird", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a bird below a skateboard"}
362 | {"tag": "position", "include": [{"class": "tv", "count": 1}, {"class": "apple", "count": 1, "position": ["above", 0]}], "prompt": "a photo of an apple above a tv"}
363 | {"tag": "position", "include": [{"class": "potted plant", "count": 1}, {"class": "train", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a train above a potted plant"}
364 | {"tag": "position", "include": [{"class": "refrigerator", "count": 1}, {"class": "truck", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a truck left of a refrigerator"}
365 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "tv remote", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a tv remote below a cow"}
366 | {"tag": "position", "include": [{"class": "train", "count": 1}, {"class": "bottle", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a bottle right of a train"}
367 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "dog", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a dog above a cow"}
368 | {"tag": "position", "include": [{"class": "person", "count": 1}, {"class": "skateboard", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a skateboard above a person"}
369 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "baseball glove", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a baseball glove below an umbrella"}
370 | {"tag": "position", "include": [{"class": "oven", "count": 1}, {"class": "dining table", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a dining table right of an oven"}
371 | {"tag": "position", "include": [{"class": "suitcase", "count": 1}, {"class": "hot dog", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a hot dog left of a suitcase"}
372 | {"tag": "position", "include": [{"class": "toothbrush", "count": 1}, {"class": "bus", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a bus below a toothbrush"}
373 | {"tag": "position", "include": [{"class": "sandwich", "count": 1}, {"class": "backpack", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a backpack right of a sandwich"}
374 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "cake", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cake below a baseball bat"}
375 | {"tag": "position", "include": [{"class": "tie", "count": 1}, {"class": "dog", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a dog right of a tie"}
376 | {"tag": "position", "include": [{"class": "boat", "count": 1}, {"class": "suitcase", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a suitcase right of a boat"}
377 | {"tag": "position", "include": [{"class": "clock", "count": 1}, {"class": "bear", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bear above a clock"}
378 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "tv remote", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a tv remote left of an umbrella"}
379 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "sports ball", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a sports ball left of an umbrella"}
380 | {"tag": "position", "include": [{"class": "dining table", "count": 1}, {"class": "train", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a train right of a dining table"}
381 | {"tag": "position", "include": [{"class": "elephant", "count": 1}, {"class": "hair drier", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a hair drier below an elephant"}
382 | {"tag": "position", "include": [{"class": "spoon", "count": 1}, {"class": "tennis racket", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a tennis racket right of a spoon"}
383 | {"tag": "position", "include": [{"class": "hot dog", "count": 1}, {"class": "wine glass", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a wine glass right of a hot dog"}
384 | {"tag": "position", "include": [{"class": "bench", "count": 1}, {"class": "computer mouse", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a computer mouse left of a bench"}
385 | {"tag": "position", "include": [{"class": "orange", "count": 1}, {"class": "carrot", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a carrot left of an orange"}
386 | {"tag": "position", "include": [{"class": "toothbrush", "count": 1}, {"class": "kite", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a kite above a toothbrush"}
387 | {"tag": "position", "include": [{"class": "traffic light", "count": 1}, {"class": "toaster", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a toaster below a traffic light"}
388 | {"tag": "position", "include": [{"class": "baseball glove", "count": 1}, {"class": "cat", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cat below a baseball glove"}
389 | {"tag": "position", "include": [{"class": "zebra", "count": 1}, {"class": "skis", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a skis right of a zebra"}
390 | {"tag": "position", "include": [{"class": "chair", "count": 1}, {"class": "stop sign", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a stop sign above a chair"}
391 | {"tag": "position", "include": [{"class": "parking meter", "count": 1}, {"class": "stop sign", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a stop sign above a parking meter"}
392 | {"tag": "position", "include": [{"class": "skateboard", "count": 1}, {"class": "hot dog", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a hot dog right of a skateboard"}
393 | {"tag": "position", "include": [{"class": "computer keyboard", "count": 1}, {"class": "pizza", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a pizza below a computer keyboard"}
394 | {"tag": "position", "include": [{"class": "toilet", "count": 1}, {"class": "hair drier", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a hair drier left of a toilet"}
395 | {"tag": "position", "include": [{"class": "stop sign", "count": 1}, {"class": "cow", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a cow left of a stop sign"}
396 | {"tag": "position", "include": [{"class": "skis", "count": 1}, {"class": "suitcase", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a suitcase above a skis"}
397 | {"tag": "position", "include": [{"class": "laptop", "count": 1}, {"class": "book", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a book above a laptop"}
398 | {"tag": "position", "include": [{"class": "pizza", "count": 1}, {"class": "toothbrush", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a toothbrush below a pizza"}
399 | {"tag": "position", "include": [{"class": "kite", "count": 1}, {"class": "toilet", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a toilet left of a kite"}
400 | {"tag": "position", "include": [{"class": "sink", "count": 1}, {"class": "tie", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a tie above a sink"}
401 | {"tag": "position", "include": [{"class": "couch", "count": 1}, {"class": "bird", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a bird left of a couch"}
402 | {"tag": "position", "include": [{"class": "sports ball", "count": 1}, {"class": "bed", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a bed right of a sports ball"}
403 | {"tag": "position", "include": [{"class": "surfboard", "count": 1}, {"class": "elephant", "count": 1, "position": ["below", 0]}], "prompt": "a photo of an elephant below a surfboard"}
404 | {"tag": "position", "include": [{"class": "motorcycle", "count": 1}, {"class": "frisbee", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a frisbee right of a motorcycle"}
405 | {"tag": "position", "include": [{"class": "fire hydrant", "count": 1}, {"class": "vase", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a vase above a fire hydrant"}
406 | {"tag": "position", "include": [{"class": "elephant", "count": 1}, {"class": "zebra", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a zebra left of an elephant"}
407 | {"tag": "position", "include": [{"class": "bear", "count": 1}, {"class": "bench", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a bench left of a bear"}
408 | {"tag": "position", "include": [{"class": "bench", "count": 1}, {"class": "donut", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a donut right of a bench"}
409 | {"tag": "position", "include": [{"class": "horse", "count": 1}, {"class": "frisbee", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a frisbee below a horse"}
410 | {"tag": "position", "include": [{"class": "snowboard", "count": 1}, {"class": "computer keyboard", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a computer keyboard above a snowboard"}
411 | {"tag": "position", "include": [{"class": "cow", "count": 1}, {"class": "tv", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a tv below a cow"}
412 | {"tag": "position", "include": [{"class": "horse", "count": 1}, {"class": "elephant", "count": 1, "position": ["below", 0]}], "prompt": "a photo of an elephant below a horse"}
413 | {"tag": "position", "include": [{"class": "banana", "count": 1}, {"class": "suitcase", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a suitcase left of a banana"}
414 | {"tag": "position", "include": [{"class": "airplane", "count": 1}, {"class": "train", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a train below an airplane"}
415 | {"tag": "position", "include": [{"class": "backpack", "count": 1}, {"class": "cat", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cat below a backpack"}
416 | {"tag": "position", "include": [{"class": "cake", "count": 1}, {"class": "backpack", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a backpack below a cake"}
417 | {"tag": "position", "include": [{"class": "knife", "count": 1}, {"class": "sandwich", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a sandwich below a knife"}
418 | {"tag": "position", "include": [{"class": "parking meter", "count": 1}, {"class": "bicycle", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bicycle above a parking meter"}
419 | {"tag": "position", "include": [{"class": "suitcase", "count": 1}, {"class": "knife", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a knife right of a suitcase"}
420 | {"tag": "position", "include": [{"class": "knife", "count": 1}, {"class": "hot dog", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a hot dog above a knife"}
421 | {"tag": "position", "include": [{"class": "parking meter", "count": 1}, {"class": "zebra", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a zebra right of a parking meter"}
422 | {"tag": "position", "include": [{"class": "zebra", "count": 1}, {"class": "chair", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a chair left of a zebra"}
423 | {"tag": "position", "include": [{"class": "airplane", "count": 1}, {"class": "cow", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a cow below an airplane"}
424 | {"tag": "position", "include": [{"class": "umbrella", "count": 1}, {"class": "cup", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a cup left of an umbrella"}
425 | {"tag": "position", "include": [{"class": "computer keyboard", "count": 1}, {"class": "zebra", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a zebra below a computer keyboard"}
426 | {"tag": "position", "include": [{"class": "broccoli", "count": 1}, {"class": "zebra", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a zebra below a broccoli"}
427 | {"tag": "position", "include": [{"class": "sports ball", "count": 1}, {"class": "laptop", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a laptop below a sports ball"}
428 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "truck", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a truck left of a baseball bat"}
429 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "refrigerator", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a refrigerator above a baseball bat"}
430 | {"tag": "position", "include": [{"class": "baseball bat", "count": 1}, {"class": "tv", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a tv above a baseball bat"}
431 | {"tag": "position", "include": [{"class": "bear", "count": 1}, {"class": "baseball glove", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a baseball glove right of a bear"}
432 | {"tag": "position", "include": [{"class": "scissors", "count": 1}, {"class": "refrigerator", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a refrigerator below a scissors"}
433 | {"tag": "position", "include": [{"class": "suitcase", "count": 1}, {"class": "dining table", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a dining table above a suitcase"}
434 | {"tag": "position", "include": [{"class": "broccoli", "count": 1}, {"class": "parking meter", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a parking meter above a broccoli"}
435 | {"tag": "position", "include": [{"class": "truck", "count": 1}, {"class": "frisbee", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a frisbee above a truck"}
436 | {"tag": "position", "include": [{"class": "banana", "count": 1}, {"class": "pizza", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a pizza right of a banana"}
437 | {"tag": "position", "include": [{"class": "boat", "count": 1}, {"class": "bus", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bus above a boat"}
438 | {"tag": "position", "include": [{"class": "tennis racket", "count": 1}, {"class": "cell phone", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a cell phone left of a tennis racket"}
439 | {"tag": "position", "include": [{"class": "broccoli", "count": 1}, {"class": "horse", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a horse right of a broccoli"}
440 | {"tag": "position", "include": [{"class": "bottle", "count": 1}, {"class": "broccoli", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a broccoli above a bottle"}
441 | {"tag": "position", "include": [{"class": "horse", "count": 1}, {"class": "vase", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a vase right of a horse"}
442 | {"tag": "position", "include": [{"class": "spoon", "count": 1}, {"class": "bear", "count": 1, "position": ["above", 0]}], "prompt": "a photo of a bear above a spoon"}
443 | {"tag": "position", "include": [{"class": "bed", "count": 1}, {"class": "zebra", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a zebra right of a bed"}
444 | {"tag": "position", "include": [{"class": "laptop", "count": 1}, {"class": "cow", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a cow right of a laptop"}
445 | {"tag": "position", "include": [{"class": "frisbee", "count": 1}, {"class": "bed", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a bed right of a frisbee"}
446 | {"tag": "position", "include": [{"class": "motorcycle", "count": 1}, {"class": "tie", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a tie right of a motorcycle"}
447 | {"tag": "position", "include": [{"class": "tv", "count": 1}, {"class": "laptop", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a laptop right of a tv"}
448 | {"tag": "position", "include": [{"class": "chair", "count": 1}, {"class": "cell phone", "count": 1, "position": ["right of", 0]}], "prompt": "a photo of a cell phone right of a chair"}
449 | {"tag": "position", "include": [{"class": "potted plant", "count": 1}, {"class": "couch", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a couch below a potted plant"}
450 | {"tag": "position", "include": [{"class": "tv", "count": 1}, {"class": "clock", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a clock below a tv"}
451 | {"tag": "position", "include": [{"class": "vase", "count": 1}, {"class": "couch", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a couch below a vase"}
452 | {"tag": "position", "include": [{"class": "cat", "count": 1}, {"class": "donut", "count": 1, "position": ["below", 0]}], "prompt": "a photo of a donut below a cat"}
453 | {"tag": "position", "include": [{"class": "toaster", "count": 1}, {"class": "couch", "count": 1, "position": ["left of", 0]}], "prompt": "a photo of a couch left of a toaster"}
454 | {"tag": "color_attr", "include": [{"class": "wine glass", "count": 1, "color": "purple"}, {"class": "apple", "count": 1, "color": "black"}], "prompt": "a photo of a purple wine glass and a black apple"}
455 | {"tag": "color_attr", "include": [{"class": "bus", "count": 1, "color": "green"}, {"class": "microwave", "count": 1, "color": "purple"}], "prompt": "a photo of a green bus and a purple microwave"}
456 | {"tag": "color_attr", "include": [{"class": "skis", "count": 1, "color": "green"}, {"class": "airplane", "count": 1, "color": "brown"}], "prompt": "a photo of a green skis and a brown airplane"}
457 | {"tag": "color_attr", "include": [{"class": "computer keyboard", "count": 1, "color": "yellow"}, {"class": "sink", "count": 1, "color": "black"}], "prompt": "a photo of a yellow computer keyboard and a black sink"}
458 | {"tag": "color_attr", "include": [{"class": "oven", "count": 1, "color": "pink"}, {"class": "motorcycle", "count": 1, "color": "green"}], "prompt": "a photo of a pink oven and a green motorcycle"}
459 | {"tag": "color_attr", "include": [{"class": "parking meter", "count": 1, "color": "purple"}, {"class": "laptop", "count": 1, "color": "red"}], "prompt": "a photo of a purple parking meter and a red laptop"}
460 | {"tag": "color_attr", "include": [{"class": "skateboard", "count": 1, "color": "yellow"}, {"class": "computer mouse", "count": 1, "color": "orange"}], "prompt": "a photo of a yellow skateboard and an orange computer mouse"}
461 | {"tag": "color_attr", "include": [{"class": "skis", "count": 1, "color": "red"}, {"class": "tie", "count": 1, "color": "brown"}], "prompt": "a photo of a red skis and a brown tie"}
462 | {"tag": "color_attr", "include": [{"class": "skateboard", "count": 1, "color": "pink"}, {"class": "train", "count": 1, "color": "black"}], "prompt": "a photo of a pink skateboard and a black train"}
463 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "white"}, {"class": "bed", "count": 1, "color": "purple"}], "prompt": "a photo of a white handbag and a purple bed"}
464 | {"tag": "color_attr", "include": [{"class": "elephant", "count": 1, "color": "purple"}, {"class": "sports ball", "count": 1, "color": "brown"}], "prompt": "a photo of a purple elephant and a brown sports ball"}
465 | {"tag": "color_attr", "include": [{"class": "dog", "count": 1, "color": "purple"}, {"class": "dining table", "count": 1, "color": "black"}], "prompt": "a photo of a purple dog and a black dining table"}
466 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "white"}, {"class": "car", "count": 1, "color": "red"}], "prompt": "a photo of a white dining table and a red car"}
467 | {"tag": "color_attr", "include": [{"class": "cell phone", "count": 1, "color": "blue"}, {"class": "apple", "count": 1, "color": "green"}], "prompt": "a photo of a blue cell phone and a green apple"}
468 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "red"}, {"class": "potted plant", "count": 1, "color": "orange"}], "prompt": "a photo of a red car and an orange potted plant"}
469 | {"tag": "color_attr", "include": [{"class": "carrot", "count": 1, "color": "brown"}, {"class": "potted plant", "count": 1, "color": "white"}], "prompt": "a photo of a brown carrot and a white potted plant"}
470 | {"tag": "color_attr", "include": [{"class": "kite", "count": 1, "color": "black"}, {"class": "bear", "count": 1, "color": "green"}], "prompt": "a photo of a black kite and a green bear"}
471 | {"tag": "color_attr", "include": [{"class": "laptop", "count": 1, "color": "blue"}, {"class": "bear", "count": 1, "color": "brown"}], "prompt": "a photo of a blue laptop and a brown bear"}
472 | {"tag": "color_attr", "include": [{"class": "teddy bear", "count": 1, "color": "green"}, {"class": "kite", "count": 1, "color": "brown"}], "prompt": "a photo of a green teddy bear and a brown kite"}
473 | {"tag": "color_attr", "include": [{"class": "stop sign", "count": 1, "color": "yellow"}, {"class": "potted plant", "count": 1, "color": "blue"}], "prompt": "a photo of a yellow stop sign and a blue potted plant"}
474 | {"tag": "color_attr", "include": [{"class": "snowboard", "count": 1, "color": "orange"}, {"class": "cat", "count": 1, "color": "green"}], "prompt": "a photo of an orange snowboard and a green cat"}
475 | {"tag": "color_attr", "include": [{"class": "truck", "count": 1, "color": "orange"}, {"class": "sink", "count": 1, "color": "pink"}], "prompt": "a photo of an orange truck and a pink sink"}
476 | {"tag": "color_attr", "include": [{"class": "hot dog", "count": 1, "color": "brown"}, {"class": "pizza", "count": 1, "color": "purple"}], "prompt": "a photo of a brown hot dog and a purple pizza"}
477 | {"tag": "color_attr", "include": [{"class": "couch", "count": 1, "color": "green"}, {"class": "umbrella", "count": 1, "color": "orange"}], "prompt": "a photo of a green couch and an orange umbrella"}
478 | {"tag": "color_attr", "include": [{"class": "bed", "count": 1, "color": "brown"}, {"class": "cell phone", "count": 1, "color": "pink"}], "prompt": "a photo of a brown bed and a pink cell phone"}
479 | {"tag": "color_attr", "include": [{"class": "broccoli", "count": 1, "color": "black"}, {"class": "cake", "count": 1, "color": "yellow"}], "prompt": "a photo of a black broccoli and a yellow cake"}
480 | {"tag": "color_attr", "include": [{"class": "train", "count": 1, "color": "red"}, {"class": "bear", "count": 1, "color": "purple"}], "prompt": "a photo of a red train and a purple bear"}
481 | {"tag": "color_attr", "include": [{"class": "tennis racket", "count": 1, "color": "purple"}, {"class": "sink", "count": 1, "color": "black"}], "prompt": "a photo of a purple tennis racket and a black sink"}
482 | {"tag": "color_attr", "include": [{"class": "vase", "count": 1, "color": "blue"}, {"class": "banana", "count": 1, "color": "black"}], "prompt": "a photo of a blue vase and a black banana"}
483 | {"tag": "color_attr", "include": [{"class": "clock", "count": 1, "color": "blue"}, {"class": "cup", "count": 1, "color": "white"}], "prompt": "a photo of a blue clock and a white cup"}
484 | {"tag": "color_attr", "include": [{"class": "umbrella", "count": 1, "color": "red"}, {"class": "couch", "count": 1, "color": "blue"}], "prompt": "a photo of a red umbrella and a blue couch"}
485 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "white"}, {"class": "giraffe", "count": 1, "color": "red"}], "prompt": "a photo of a white handbag and a red giraffe"}
486 | {"tag": "color_attr", "include": [{"class": "tv remote", "count": 1, "color": "pink"}, {"class": "airplane", "count": 1, "color": "blue"}], "prompt": "a photo of a pink tv remote and a blue airplane"}
487 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "pink"}, {"class": "scissors", "count": 1, "color": "black"}], "prompt": "a photo of a pink handbag and a black scissors"}
488 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "brown"}, {"class": "hair drier", "count": 1, "color": "pink"}], "prompt": "a photo of a brown car and a pink hair drier"}
489 | {"tag": "color_attr", "include": [{"class": "bus", "count": 1, "color": "black"}, {"class": "cell phone", "count": 1, "color": "brown"}], "prompt": "a photo of a black bus and a brown cell phone"}
490 | {"tag": "color_attr", "include": [{"class": "sheep", "count": 1, "color": "purple"}, {"class": "banana", "count": 1, "color": "pink"}], "prompt": "a photo of a purple sheep and a pink banana"}
491 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "blue"}, {"class": "cell phone", "count": 1, "color": "white"}], "prompt": "a photo of a blue handbag and a white cell phone"}
492 | {"tag": "color_attr", "include": [{"class": "pizza", "count": 1, "color": "white"}, {"class": "umbrella", "count": 1, "color": "green"}], "prompt": "a photo of a white pizza and a green umbrella"}
493 | {"tag": "color_attr", "include": [{"class": "tie", "count": 1, "color": "white"}, {"class": "skateboard", "count": 1, "color": "purple"}], "prompt": "a photo of a white tie and a purple skateboard"}
494 | {"tag": "color_attr", "include": [{"class": "sports ball", "count": 1, "color": "yellow"}, {"class": "boat", "count": 1, "color": "green"}], "prompt": "a photo of a yellow sports ball and a green boat"}
495 | {"tag": "color_attr", "include": [{"class": "wine glass", "count": 1, "color": "white"}, {"class": "giraffe", "count": 1, "color": "brown"}], "prompt": "a photo of a white wine glass and a brown giraffe"}
496 | {"tag": "color_attr", "include": [{"class": "bowl", "count": 1, "color": "yellow"}, {"class": "baseball glove", "count": 1, "color": "white"}], "prompt": "a photo of a yellow bowl and a white baseball glove"}
497 | {"tag": "color_attr", "include": [{"class": "microwave", "count": 1, "color": "orange"}, {"class": "spoon", "count": 1, "color": "black"}], "prompt": "a photo of an orange microwave and a black spoon"}
498 | {"tag": "color_attr", "include": [{"class": "skateboard", "count": 1, "color": "orange"}, {"class": "bowl", "count": 1, "color": "pink"}], "prompt": "a photo of an orange skateboard and a pink bowl"}
499 | {"tag": "color_attr", "include": [{"class": "toilet", "count": 1, "color": "blue"}, {"class": "suitcase", "count": 1, "color": "white"}], "prompt": "a photo of a blue toilet and a white suitcase"}
500 | {"tag": "color_attr", "include": [{"class": "boat", "count": 1, "color": "white"}, {"class": "hot dog", "count": 1, "color": "orange"}], "prompt": "a photo of a white boat and an orange hot dog"}
501 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "yellow"}, {"class": "dog", "count": 1, "color": "pink"}], "prompt": "a photo of a yellow dining table and a pink dog"}
502 | {"tag": "color_attr", "include": [{"class": "cake", "count": 1, "color": "red"}, {"class": "chair", "count": 1, "color": "purple"}], "prompt": "a photo of a red cake and a purple chair"}
503 | {"tag": "color_attr", "include": [{"class": "tie", "count": 1, "color": "blue"}, {"class": "dining table", "count": 1, "color": "pink"}], "prompt": "a photo of a blue tie and a pink dining table"}
504 | {"tag": "color_attr", "include": [{"class": "cow", "count": 1, "color": "blue"}, {"class": "computer keyboard", "count": 1, "color": "black"}], "prompt": "a photo of a blue cow and a black computer keyboard"}
505 | {"tag": "color_attr", "include": [{"class": "pizza", "count": 1, "color": "yellow"}, {"class": "oven", "count": 1, "color": "green"}], "prompt": "a photo of a yellow pizza and a green oven"}
506 | {"tag": "color_attr", "include": [{"class": "laptop", "count": 1, "color": "red"}, {"class": "car", "count": 1, "color": "brown"}], "prompt": "a photo of a red laptop and a brown car"}
507 | {"tag": "color_attr", "include": [{"class": "computer keyboard", "count": 1, "color": "purple"}, {"class": "scissors", "count": 1, "color": "blue"}], "prompt": "a photo of a purple computer keyboard and a blue scissors"}
508 | {"tag": "color_attr", "include": [{"class": "surfboard", "count": 1, "color": "green"}, {"class": "oven", "count": 1, "color": "orange"}], "prompt": "a photo of a green surfboard and an orange oven"}
509 | {"tag": "color_attr", "include": [{"class": "parking meter", "count": 1, "color": "yellow"}, {"class": "refrigerator", "count": 1, "color": "pink"}], "prompt": "a photo of a yellow parking meter and a pink refrigerator"}
510 | {"tag": "color_attr", "include": [{"class": "computer mouse", "count": 1, "color": "brown"}, {"class": "bottle", "count": 1, "color": "purple"}], "prompt": "a photo of a brown computer mouse and a purple bottle"}
511 | {"tag": "color_attr", "include": [{"class": "umbrella", "count": 1, "color": "red"}, {"class": "cow", "count": 1, "color": "green"}], "prompt": "a photo of a red umbrella and a green cow"}
512 | {"tag": "color_attr", "include": [{"class": "giraffe", "count": 1, "color": "red"}, {"class": "cell phone", "count": 1, "color": "black"}], "prompt": "a photo of a red giraffe and a black cell phone"}
513 | {"tag": "color_attr", "include": [{"class": "oven", "count": 1, "color": "brown"}, {"class": "train", "count": 1, "color": "purple"}], "prompt": "a photo of a brown oven and a purple train"}
514 | {"tag": "color_attr", "include": [{"class": "baseball bat", "count": 1, "color": "blue"}, {"class": "book", "count": 1, "color": "pink"}], "prompt": "a photo of a blue baseball bat and a pink book"}
515 | {"tag": "color_attr", "include": [{"class": "cup", "count": 1, "color": "green"}, {"class": "bowl", "count": 1, "color": "yellow"}], "prompt": "a photo of a green cup and a yellow bowl"}
516 | {"tag": "color_attr", "include": [{"class": "suitcase", "count": 1, "color": "yellow"}, {"class": "bus", "count": 1, "color": "brown"}], "prompt": "a photo of a yellow suitcase and a brown bus"}
517 | {"tag": "color_attr", "include": [{"class": "motorcycle", "count": 1, "color": "orange"}, {"class": "donut", "count": 1, "color": "pink"}], "prompt": "a photo of an orange motorcycle and a pink donut"}
518 | {"tag": "color_attr", "include": [{"class": "giraffe", "count": 1, "color": "orange"}, {"class": "baseball glove", "count": 1, "color": "white"}], "prompt": "a photo of an orange giraffe and a white baseball glove"}
519 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "orange"}, {"class": "carrot", "count": 1, "color": "green"}], "prompt": "a photo of an orange handbag and a green carrot"}
520 | {"tag": "color_attr", "include": [{"class": "bottle", "count": 1, "color": "black"}, {"class": "refrigerator", "count": 1, "color": "white"}], "prompt": "a photo of a black bottle and a white refrigerator"}
521 | {"tag": "color_attr", "include": [{"class": "dog", "count": 1, "color": "white"}, {"class": "potted plant", "count": 1, "color": "blue"}], "prompt": "a photo of a white dog and a blue potted plant"}
522 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "orange"}, {"class": "car", "count": 1, "color": "red"}], "prompt": "a photo of an orange handbag and a red car"}
523 | {"tag": "color_attr", "include": [{"class": "stop sign", "count": 1, "color": "red"}, {"class": "book", "count": 1, "color": "blue"}], "prompt": "a photo of a red stop sign and a blue book"}
524 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "yellow"}, {"class": "toothbrush", "count": 1, "color": "orange"}], "prompt": "a photo of a yellow car and an orange toothbrush"}
525 | {"tag": "color_attr", "include": [{"class": "potted plant", "count": 1, "color": "black"}, {"class": "toilet", "count": 1, "color": "yellow"}], "prompt": "a photo of a black potted plant and a yellow toilet"}
526 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "brown"}, {"class": "suitcase", "count": 1, "color": "white"}], "prompt": "a photo of a brown dining table and a white suitcase"}
527 | {"tag": "color_attr", "include": [{"class": "donut", "count": 1, "color": "orange"}, {"class": "stop sign", "count": 1, "color": "yellow"}], "prompt": "a photo of an orange donut and a yellow stop sign"}
528 | {"tag": "color_attr", "include": [{"class": "suitcase", "count": 1, "color": "green"}, {"class": "boat", "count": 1, "color": "blue"}], "prompt": "a photo of a green suitcase and a blue boat"}
529 | {"tag": "color_attr", "include": [{"class": "tennis racket", "count": 1, "color": "orange"}, {"class": "sports ball", "count": 1, "color": "yellow"}], "prompt": "a photo of an orange tennis racket and a yellow sports ball"}
530 | {"tag": "color_attr", "include": [{"class": "computer keyboard", "count": 1, "color": "purple"}, {"class": "chair", "count": 1, "color": "red"}], "prompt": "a photo of a purple computer keyboard and a red chair"}
531 | {"tag": "color_attr", "include": [{"class": "suitcase", "count": 1, "color": "purple"}, {"class": "pizza", "count": 1, "color": "orange"}], "prompt": "a photo of a purple suitcase and an orange pizza"}
532 | {"tag": "color_attr", "include": [{"class": "bottle", "count": 1, "color": "white"}, {"class": "sheep", "count": 1, "color": "blue"}], "prompt": "a photo of a white bottle and a blue sheep"}
533 | {"tag": "color_attr", "include": [{"class": "backpack", "count": 1, "color": "purple"}, {"class": "umbrella", "count": 1, "color": "white"}], "prompt": "a photo of a purple backpack and a white umbrella"}
534 | {"tag": "color_attr", "include": [{"class": "potted plant", "count": 1, "color": "orange"}, {"class": "spoon", "count": 1, "color": "black"}], "prompt": "a photo of an orange potted plant and a black spoon"}
535 | {"tag": "color_attr", "include": [{"class": "tennis racket", "count": 1, "color": "green"}, {"class": "dog", "count": 1, "color": "black"}], "prompt": "a photo of a green tennis racket and a black dog"}
536 | {"tag": "color_attr", "include": [{"class": "handbag", "count": 1, "color": "yellow"}, {"class": "refrigerator", "count": 1, "color": "blue"}], "prompt": "a photo of a yellow handbag and a blue refrigerator"}
537 | {"tag": "color_attr", "include": [{"class": "broccoli", "count": 1, "color": "pink"}, {"class": "sink", "count": 1, "color": "red"}], "prompt": "a photo of a pink broccoli and a red sink"}
538 | {"tag": "color_attr", "include": [{"class": "bowl", "count": 1, "color": "red"}, {"class": "sink", "count": 1, "color": "pink"}], "prompt": "a photo of a red bowl and a pink sink"}
539 | {"tag": "color_attr", "include": [{"class": "toilet", "count": 1, "color": "white"}, {"class": "apple", "count": 1, "color": "red"}], "prompt": "a photo of a white toilet and a red apple"}
540 | {"tag": "color_attr", "include": [{"class": "dining table", "count": 1, "color": "pink"}, {"class": "sandwich", "count": 1, "color": "black"}], "prompt": "a photo of a pink dining table and a black sandwich"}
541 | {"tag": "color_attr", "include": [{"class": "car", "count": 1, "color": "black"}, {"class": "parking meter", "count": 1, "color": "green"}], "prompt": "a photo of a black car and a green parking meter"}
542 | {"tag": "color_attr", "include": [{"class": "bird", "count": 1, "color": "yellow"}, {"class": "motorcycle", "count": 1, "color": "black"}], "prompt": "a photo of a yellow bird and a black motorcycle"}
543 | {"tag": "color_attr", "include": [{"class": "giraffe", "count": 1, "color": "brown"}, {"class": "stop sign", "count": 1, "color": "white"}], "prompt": "a photo of a brown giraffe and a white stop sign"}
544 | {"tag": "color_attr", "include": [{"class": "banana", "count": 1, "color": "white"}, {"class": "elephant", "count": 1, "color": "black"}], "prompt": "a photo of a white banana and a black elephant"}
545 | {"tag": "color_attr", "include": [{"class": "cow", "count": 1, "color": "orange"}, {"class": "sandwich", "count": 1, "color": "purple"}], "prompt": "a photo of an orange cow and a purple sandwich"}
546 | {"tag": "color_attr", "include": [{"class": "clock", "count": 1, "color": "red"}, {"class": "cell phone", "count": 1, "color": "black"}], "prompt": "a photo of a red clock and a black cell phone"}
547 | {"tag": "color_attr", "include": [{"class": "knife", "count": 1, "color": "brown"}, {"class": "donut", "count": 1, "color": "blue"}], "prompt": "a photo of a brown knife and a blue donut"}
548 | {"tag": "color_attr", "include": [{"class": "cup", "count": 1, "color": "red"}, {"class": "handbag", "count": 1, "color": "pink"}], "prompt": "a photo of a red cup and a pink handbag"}
549 | {"tag": "color_attr", "include": [{"class": "bicycle", "count": 1, "color": "yellow"}, {"class": "motorcycle", "count": 1, "color": "red"}], "prompt": "a photo of a yellow bicycle and a red motorcycle"}
550 | {"tag": "color_attr", "include": [{"class": "orange", "count": 1, "color": "red"}, {"class": "broccoli", "count": 1, "color": "purple"}], "prompt": "a photo of a red orange and a purple broccoli"}
551 | {"tag": "color_attr", "include": [{"class": "traffic light", "count": 1, "color": "orange"}, {"class": "toilet", "count": 1, "color": "white"}], "prompt": "a photo of an orange traffic light and a white toilet"}
552 | {"tag": "color_attr", "include": [{"class": "cup", "count": 1, "color": "green"}, {"class": "pizza", "count": 1, "color": "red"}], "prompt": "a photo of a green cup and a red pizza"}
553 | {"tag": "color_attr", "include": [{"class": "pizza", "count": 1, "color": "blue"}, {"class": "baseball glove", "count": 1, "color": "yellow"}], "prompt": "a photo of a blue pizza and a yellow baseball glove"}
554 |
--------------------------------------------------------------------------------