'
130 |
131 | dataset = RefCOCODataset(test=ds_collections[args.dataset],
132 | tokenizer=tokenizer,
133 | prompt=prompt)
134 |
135 | dataloader = torch.utils.data.DataLoader(
136 | dataset=dataset,
137 | sampler=InferenceSampler(len(dataset)),
138 | batch_size=args.batch_size,
139 | num_workers=args.num_workers,
140 | pin_memory=True,
141 | drop_last=True,
142 | collate_fn=partial(collate_fn, tokenizer=tokenizer),
143 | )
144 |
145 | outputs = []
146 | for _, (input_ids, attention_mask, bboxes,
147 | hws) in tqdm(enumerate(dataloader)):
148 | pred = model.generate(
149 | input_ids=input_ids.cuda(),
150 | attention_mask=attention_mask.cuda(),
151 | do_sample=False,
152 | num_beams=1,
153 | max_new_tokens=28,
154 | min_new_tokens=10,
155 | length_penalty=1,
156 | num_return_sequences=1,
157 | use_cache=True,
158 | pad_token_id=tokenizer.eod_id,
159 | eos_token_id=tokenizer.eod_id,
160 | )
161 | answers = [
162 | tokenizer.decode(_[input_ids.size(1):].cpu(),
163 | skip_special_tokens=True) for _ in pred
164 | ]
165 |
166 | for bbox, hw, answer in zip(bboxes, hws, answers):
167 | outputs.append({
168 | 'answer': answer,
169 | 'gt_bbox': bbox,
170 | 'hw': hw,
171 | })
172 |
173 | torch.distributed.barrier()
174 |
175 | world_size = torch.distributed.get_world_size()
176 | merged_outputs = [None for _ in range(world_size)]
177 | torch.distributed.all_gather_object(merged_outputs, outputs)
178 |
179 | merged_outputs = [_ for _ in itertools.chain.from_iterable(merged_outputs)]
180 | PATTERN = re.compile(r'\((.*?)\),\((.*?)\)')
181 |
182 | if torch.distributed.get_rank() == 0:
183 | correct = total_cnt = 0
184 | for i, output in enumerate(merged_outputs):
185 | predict_bbox = re.findall(PATTERN, output['answer'])
186 | try:
187 | if ',' not in predict_bbox[0][0] or ',' not in predict_bbox[0][
188 | 1]:
189 | predict_bbox = (0., 0., 0., 0.)
190 | else:
191 | x1, y1 = [
192 | float(tmp) for tmp in predict_bbox[0][0].split(',')
193 | ]
194 | x2, y2 = [
195 | float(tmp) for tmp in predict_bbox[0][1].split(',')
196 | ]
197 | predict_bbox = (x1, y1, x2, y2)
198 | except:
199 | predict_bbox = (0., 0., 0., 0.)
200 | target_bbox = torch.tensor(output['gt_bbox'],
201 | dtype=torch.float32).view(-1, 4)
202 | predict_bbox = torch.tensor(predict_bbox,
203 | dtype=torch.float32).view(-1, 4) / 999
204 | predict_bbox[:, 0::2] *= output['hw'][1]
205 | predict_bbox[:, 1::2] *= output['hw'][0]
206 | iou, _ = box_iou(predict_bbox, target_bbox)
207 | iou = iou.item()
208 | total_cnt += 1
209 | if iou >= 0.5:
210 | correct += 1
211 |
212 | print(f"Evaluating {args.dataset} ...")
213 | print(f'Precision @ 1: {correct / total_cnt} \n')
214 | torch.distributed.barrier()
215 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/evaluate_multiple_choice.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import os
5 | from functools import partial
6 |
7 | import torch
8 | from tqdm import tqdm
9 | from transformers import AutoModelForCausalLM, AutoTokenizer
10 |
11 | multiple_choices = ['A', 'B', 'C', 'D', 'E']
12 |
13 | ds_collections = {
14 | 'scienceqa_test_img': {
15 | 'test': 'data/scienceqa/scienceqa_test_img.jsonl',
16 | }
17 | }
18 |
19 |
20 | def collate_fn(batches, pad_token_id):
21 |
22 | input_tokens = [_['input_tokens'] for _ in batches]
23 | target_lengths = [_['target_lengths'] for _ in batches]
24 | answers = [_['answer'] for _ in batches]
25 |
26 | chunk_sizes = [len(_) for _ in input_tokens]
27 |
28 | input_tokens = [_ for _ in itertools.chain.from_iterable(input_tokens)]
29 |
30 | max_lengths = max([len(_) for _ in input_tokens])
31 | input_tokens = [[pad_token_id] * (max_lengths - len(_)) + _
32 | for _ in input_tokens]
33 | input_tokens = torch.LongTensor(input_tokens)
34 |
35 | attention_mask = 1 - input_tokens.eq(pad_token_id).float()
36 |
37 | return input_tokens, attention_mask, target_lengths, answers, chunk_sizes
38 |
39 |
40 | class MultipleChoiceDataste(torch.utils.data.Dataset):
41 |
42 | def __init__(self, test, prompt, tokenizer):
43 | self.datas = open(test).readlines()
44 | self.prompt = prompt
45 | self.tokenizer = tokenizer
46 |
47 | def __len__(self):
48 | return len(self.datas)
49 |
50 | def __getitem__(self, idx):
51 |
52 | data = json.loads(self.datas[idx].strip())
53 | image = data['image']
54 | hint = data['hint'] if data['hint'] else 'N/A'
55 | question = data['question']
56 |
57 | choices = data['choices']
58 | choice_list = []
59 | for i, c in enumerate(choices):
60 | choice_list.append('{}. {}'.format(multiple_choices[i], c))
61 | choice_txt = '\n'.join(choice_list)
62 |
63 | prompt = self.prompt.format(image, hint, question, choice_txt)
64 |
65 | prompt_tokens = self.tokenizer(prompt).input_ids
66 | target_tokens = [
67 | self.tokenizer(' ' + _).input_ids
68 | for _ in multiple_choices[:len(choices)]
69 | ]
70 |
71 | return {
72 | 'input_tokens': [prompt_tokens + _ for _ in target_tokens],
73 | 'target_lengths': [len(_) for _ in target_tokens],
74 | 'answer': data['answer'],
75 | }
76 |
77 |
78 | class InferenceSampler(torch.utils.data.sampler.Sampler):
79 |
80 | def __init__(self, size):
81 | self._size = int(size)
82 | assert size > 0
83 | self._rank = torch.distributed.get_rank()
84 | self._world_size = torch.distributed.get_world_size()
85 | self._local_indices = self._get_local_indices(size, self._world_size,
86 | self._rank)
87 |
88 | @staticmethod
89 | def _get_local_indices(total_size, world_size, rank):
90 | shard_size = total_size // world_size
91 | left = total_size % world_size
92 | shard_sizes = [shard_size + int(r < left) for r in range(world_size)]
93 |
94 | begin = sum(shard_sizes[:rank])
95 | end = min(sum(shard_sizes[:rank + 1]), total_size)
96 | return range(begin, end)
97 |
98 | def __iter__(self):
99 | yield from self._local_indices
100 |
101 | def __len__(self):
102 | return len(self._local_indices)
103 |
104 |
105 | if __name__ == '__main__':
106 |
107 | parser = argparse.ArgumentParser()
108 | parser.add_argument('--checkpoint', type=str, default='')
109 | parser.add_argument('--dataset', type=str, default='')
110 | parser.add_argument('--batch-size', type=int, default=1)
111 | parser.add_argument('--num-workers', type=int, default=1)
112 | args = parser.parse_args()
113 |
114 | torch.distributed.init_process_group(
115 | backend='nccl',
116 | world_size=int(os.getenv('WORLD_SIZE', '1')),
117 | rank=int(os.getenv('RANK', '0')),
118 | )
119 |
120 | torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
121 |
122 | model = AutoModelForCausalLM.from_pretrained(
123 | args.checkpoint, device_map='cuda', trust_remote_code=True).eval()
124 |
125 | tokenizer = AutoTokenizer.from_pretrained(args.checkpoint,
126 | trust_remote_code=True)
127 |
128 | prompt = '
{}Context: {}\nQuestion: {}\nOptions: {}\nAnswer:'
129 |
130 | dataset = MultipleChoiceDataste(test=ds_collections[args.dataset]['test'],
131 | prompt=prompt,
132 | tokenizer=tokenizer)
133 | dataloader = torch.utils.data.DataLoader(
134 | dataset=dataset,
135 | sampler=InferenceSampler(len(dataset)),
136 | batch_size=args.batch_size,
137 | num_workers=args.num_workers,
138 | pin_memory=True,
139 | drop_last=False,
140 | collate_fn=partial(collate_fn, pad_token_id=tokenizer.eod_id),
141 | )
142 |
143 | results = []
144 | with torch.no_grad():
145 | for _, (input_tokens, attention_mask, target_lengths, answer,
146 | chunk_sizes) in tqdm(enumerate(dataloader)):
147 |
148 | outputs = model(
149 | input_ids=input_tokens[:, :-1].cuda(),
150 | attention_mask=attention_mask[:, :-1].cuda(),
151 | return_dict=True,
152 | )
153 | losses = torch.nn.functional.cross_entropy(outputs.logits.permute(
154 | 0, 2, 1),
155 | input_tokens[:,
156 | 1:].cuda(),
157 | reduction='none')
158 |
159 | losses = losses.split(chunk_sizes, dim=0)
160 |
161 | for loss, target_length, answer in zip(losses, target_lengths,
162 | answer):
163 |
164 | target_loss = loss.mean(-1)
165 | for _ in range(len(target_length)):
166 | target_loss[_] = loss[_, -target_length[_]:].mean()
167 | pred = target_loss.argmin().item()
168 | if pred == answer:
169 | results.append(1)
170 | else:
171 | results.append(0)
172 |
173 | torch.distributed.barrier()
174 |
175 | world_size = torch.distributed.get_world_size()
176 | merged_results = [None for _ in range(world_size)]
177 | torch.distributed.all_gather_object(merged_results, results)
178 |
179 | merged_results = [_ for _ in itertools.chain.from_iterable(merged_results)]
180 |
181 | if torch.distributed.get_rank() == 0:
182 | print(f"Evaluating {args.dataset} ...")
183 | print(f'Acc@1: {sum(merged_results) / len(merged_results)}')
184 |
185 | torch.distributed.barrier()
186 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/MMBENCH.md:
--------------------------------------------------------------------------------
1 | # MMBench Evaluation
2 |
3 | ## Data
4 |
5 | ```bash
6 | /cpfs01/shared/public/shusheng.yss/workspace/23082502_qwenvl_eval_test/eval_mm/data/mmbench
7 | ```
8 |
9 | ## Dev
10 |
11 | ```bash
12 | checkpoint=/PATH/TO/CHECKPOINT
13 | ds=mmbench_dev_20230712
14 | python -m torch.distributed.launch --use-env \
15 | --nproc_per_node ${NPROC_PER_NODE:-8} \
16 | --nnodes ${WORLD_SIZE:-1} \
17 | --node_rank ${RANK:-0} \
18 | --master_addr ${MASTER_ADDR:-127.0.0.1} \
19 | --master_port ${MASTER_PORT:-12345} \
20 | evaluate_multiple_choice_mmbench.py \
21 | --checkpoint $checkpoint \
22 | --dataset $ds \
23 | --batch-size 2 \
24 | --num-workers 2
25 |
26 | # the results will be saved to mmbench_dev_20230712.json
27 |
28 | # without consistency constrain
29 |
30 | python mmbench_evaluation.py
31 |
32 | # with consistency constrain
33 |
34 | python mmbench_evaluation_tricky.py
35 |
36 | ```
37 |
38 | ## Test
39 |
40 | ```bash
41 | checkpoint=/PATH/TO/CHECKPOINT
42 | ds=mmbench_test_20230712
43 | python -m torch.distributed.launch --use-env \
44 | --nproc_per_node ${NPROC_PER_NODE:-8} \
45 | --nnodes ${WORLD_SIZE:-1} \
46 | --node_rank ${RANK:-0} \
47 | --master_addr ${MASTER_ADDR:-127.0.0.1} \
48 | --master_port ${MASTER_PORT:-12345} \
49 | evaluate_multiple_choice_mmbench.py \
50 | --checkpoint $checkpoint \
51 | --dataset $ds \
52 | --batch-size 2 \
53 | --num-workers 2
54 |
55 | # the results will be saved to mmbench_test_20230712.json
56 |
57 | # convert to submission format with consistency constrain
58 |
59 | python mmbench_predict_to_submission.py
60 |
61 | ```
62 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/evaluate_multiple_choice_mmbench.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import os
5 | from functools import partial
6 |
7 | import torch
8 | from tqdm import tqdm
9 | from transformers import AutoModelForCausalLM, AutoTokenizer
10 |
11 | multiple_choices = ['A', 'B', 'C', 'D', 'E']
12 |
13 | ds_collections = {
14 | 'mmbench_dev_20230712': {
15 | 'test': 'data/mmbench/mmbench_dev_20230712/mmbench_dev_20230712.jsonl',
16 | },
17 | 'mmbench_test_20230712': {
18 | 'test': 'data/mmbench/mmbench_test_20230712/mmbench_test_20230712.jsonl',
19 | }
20 | }
21 |
22 | def collate_fn(batches, pad_token_id):
23 |
24 | indexes = [_['index'] for _ in batches]
25 |
26 | input_tokens = [_['input_tokens'] for _ in batches]
27 | target_lengths = [_['target_lengths'] for _ in batches]
28 |
29 | chunk_sizes = [len(_) for _ in input_tokens]
30 |
31 | input_tokens = [_ for _ in itertools.chain.from_iterable(input_tokens)]
32 |
33 | max_lengths = max([len(_) for _ in input_tokens])
34 | input_tokens = [[pad_token_id] * (max_lengths - len(_)) + _
35 | for _ in input_tokens]
36 | input_tokens = torch.LongTensor(input_tokens)
37 |
38 | attention_mask = 1 - input_tokens.eq(pad_token_id).float()
39 |
40 | return input_tokens, attention_mask, target_lengths, chunk_sizes, indexes
41 |
42 |
43 | class MultipleChoiceDataste(torch.utils.data.Dataset):
44 |
45 | def __init__(self, test, prompt, tokenizer):
46 | self.datas = open(test).readlines()
47 | self.prompt = prompt
48 | self.tokenizer = tokenizer
49 |
50 | def __len__(self):
51 | return len(self.datas)
52 |
53 | def __getitem__(self, idx):
54 |
55 | data = json.loads(self.datas[idx].strip())
56 | index = data['index']
57 | image = data['image']
58 | hint = data['hint'] if data['hint'] else 'N/A'
59 | question = data['question']
60 |
61 | choices = data['choices']
62 | choice_list = []
63 | for i, c in enumerate(choices):
64 | choice_list.append('{}. {}'.format(multiple_choices[i], c))
65 | choice_txt = '\n'.join(choice_list)
66 |
67 | prompt = self.prompt.format(image, hint, question, choice_txt)
68 |
69 | prompt_tokens = self.tokenizer(prompt).input_ids
70 | target_tokens = [
71 | self.tokenizer(' ' + _).input_ids
72 | for _ in multiple_choices[:len(choices)]
73 | ]
74 |
75 | return {
76 | 'index': index,
77 | 'input_tokens': [prompt_tokens + _ for _ in target_tokens],
78 | 'target_lengths': [len(_) for _ in target_tokens],
79 | # 'answer': data['answer'],
80 | }
81 |
82 |
83 | class InferenceSampler(torch.utils.data.sampler.Sampler):
84 |
85 | def __init__(self, size):
86 | self._size = int(size)
87 | assert size > 0
88 | self._rank = torch.distributed.get_rank()
89 | self._world_size = torch.distributed.get_world_size()
90 | self._local_indices = self._get_local_indices(size, self._world_size,
91 | self._rank)
92 |
93 | @staticmethod
94 | def _get_local_indices(total_size, world_size, rank):
95 | shard_size = total_size // world_size
96 | left = total_size % world_size
97 | shard_sizes = [shard_size + int(r < left) for r in range(world_size)]
98 |
99 | begin = sum(shard_sizes[:rank])
100 | end = min(sum(shard_sizes[:rank + 1]), total_size)
101 | return range(begin, end)
102 |
103 | def __iter__(self):
104 | yield from self._local_indices
105 |
106 | def __len__(self):
107 | return len(self._local_indices)
108 |
109 |
110 | if __name__ == '__main__':
111 |
112 | parser = argparse.ArgumentParser()
113 | parser.add_argument('--checkpoint', type=str, default='')
114 | parser.add_argument('--dataset', type=str, default='')
115 | parser.add_argument('--batch-size', type=int, default=1)
116 | parser.add_argument('--num-workers', type=int, default=1)
117 | args = parser.parse_args()
118 |
119 | torch.distributed.init_process_group(
120 | backend='nccl',
121 | world_size=int(os.getenv('WORLD_SIZE', '1')),
122 | rank=int(os.getenv('RANK', '0')),
123 | )
124 |
125 | torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
126 |
127 | model = AutoModelForCausalLM.from_pretrained(
128 | args.checkpoint, device_map='cuda', trust_remote_code=True).eval()
129 |
130 | tokenizer = AutoTokenizer.from_pretrained(args.checkpoint,
131 | trust_remote_code=True)
132 |
133 | prompt = '
{}Context: {}\nQuestion: {}\nOptions: {}\nAnswer:'
134 |
135 | dataset = MultipleChoiceDataste(test=ds_collections[args.dataset]['test'],
136 | prompt=prompt,
137 | tokenizer=tokenizer)
138 | dataloader = torch.utils.data.DataLoader(
139 | dataset=dataset,
140 | sampler=InferenceSampler(len(dataset)),
141 | batch_size=args.batch_size,
142 | num_workers=args.num_workers,
143 | pin_memory=True,
144 | drop_last=False,
145 | collate_fn=partial(collate_fn, pad_token_id=tokenizer.eod_id),
146 | )
147 |
148 | results = []
149 | with torch.no_grad():
150 | for _, (input_tokens, attention_mask, target_lengths,
151 | chunk_sizes, indexes) in tqdm(enumerate(dataloader)):
152 |
153 | outputs = model(
154 | input_ids=input_tokens[:, :-1].cuda(),
155 | attention_mask=attention_mask[:, :-1].cuda(),
156 | return_dict=True,
157 | )
158 | losses = torch.nn.functional.cross_entropy(outputs.logits.permute(
159 | 0, 2, 1),
160 | input_tokens[:,
161 | 1:].cuda(),
162 | reduction='none')
163 |
164 | losses = losses.split(chunk_sizes, dim=0)
165 |
166 | for loss, target_length, index in zip(losses, target_lengths, indexes):
167 |
168 | target_loss = loss.mean(-1)
169 | for _ in range(len(target_length)):
170 | target_loss[_] = loss[_, -target_length[_]:].mean()
171 | pred = target_loss.argmin().item()
172 |
173 | results.append({
174 | "index": index,
175 | "prediction": pred,
176 | })
177 |
178 | torch.distributed.barrier()
179 |
180 | world_size = torch.distributed.get_world_size()
181 | merged_results = [None for _ in range(world_size)]
182 | torch.distributed.all_gather_object(merged_results, results)
183 |
184 | merged_results = [_ for _ in itertools.chain.from_iterable(merged_results)]
185 |
186 | if torch.distributed.get_rank() == 0:
187 | json.dump(merged_results, open(f"{args.dataset}.json", "w"))
188 |
189 | torch.distributed.barrier()
190 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/mmbench_converter_dev.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import io
3 | import base64
4 | import json
5 | from PIL import Image
6 |
7 | '''
8 | This scripts convert mmbench_dev tsv file to jsonl
9 | '''
10 |
11 | datas = pd.read_csv("data/mmbench/mmbench_dev_20230712/mmbench_dev_20230712.tsv", sep='\t')
12 |
13 | global_choices = ['A', 'B', 'C', 'D']
14 |
15 | def decode_base64_to_image(base64_string):
16 | image_data = base64.b64decode(base64_string)
17 | image = Image.open(io.BytesIO(image_data))
18 | return image
19 |
20 |
21 | with open('./data/mmbench/mmbench_dev_20230712/mmbench_dev_20230712.jsonl', 'w') as f:
22 | for idx in range(len(datas)):
23 | data = datas.iloc[idx]
24 |
25 | index = int(data['index'])
26 | question = data['question']
27 | hint = data['hint'] if not pd.isna(data['hint']) else 'N/A'
28 |
29 | choices = []
30 | for opt in global_choices:
31 | if pd.isna(data[opt]):
32 | continue
33 | choices.append(data[opt])
34 |
35 | answer = global_choices.index(data['answer'])
36 |
37 | image = decode_base64_to_image(data['image'])
38 | image.save("data/mmbench/mmbench_dev_20230712/images/%d.jpg" % index)
39 |
40 | f.write(json.dumps({
41 | "index": index,
42 | "image": "data/mmbench/mmbench_dev_20230712/images/%d.jpg" % index,
43 | "hint": hint,
44 | "question": question,
45 | "choices": choices,
46 | "answer": answer,
47 | }) + "\n")
48 |
49 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/mmbench_converter_test.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import io
3 | import base64
4 | import json
5 | from PIL import Image
6 |
7 | '''
8 | This script convert mmbench_test tsv file to jsonl
9 | This script is very similar to mmbench_converter_dev except there's no answer for accuracy calculation
10 | '''
11 |
12 | datas = pd.read_csv("data/mmbench/mmbench_test_20230712/mmbench_test_20230712.tsv", sep='\t')
13 |
14 | global_choices = ['A', 'B', 'C', 'D']
15 |
16 | def decode_base64_to_image(base64_string):
17 | image_data = base64.b64decode(base64_string)
18 | image = Image.open(io.BytesIO(image_data))
19 | return image
20 |
21 |
22 | with open('./data/mmbench/mmbench_test_20230712/mmbench_test_20230712.jsonl', 'w') as f:
23 | for idx in range(len(datas)):
24 | data = datas.iloc[idx]
25 |
26 | index = int(data['index'])
27 | question = data['question']
28 | hint = data['hint'] if not pd.isna(data['hint']) else 'N/A'
29 |
30 | choices = []
31 | for opt in global_choices:
32 | if pd.isna(data[opt]):
33 | continue
34 | choices.append(data[opt])
35 |
36 | # answer = global_choices.index(data['answer'])
37 |
38 | image = decode_base64_to_image(data['image'])
39 | image.save("data/mmbench/mmbench_test_20230712/images/%d.jpg" % index)
40 |
41 | f.write(json.dumps({
42 | "index": index,
43 | "image": "data/mmbench/mmbench_test_20230712/images/%d.jpg" % index,
44 | "hint": hint,
45 | "question": question,
46 | "choices": choices,
47 | # "answer": answer,
48 | }) + "\n")
49 |
50 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/mmbench_evaluation.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import json
3 |
4 | '''
5 | This script provides `global top-1 accuracy` metric calculation for mmbench_dev.
6 | '''
7 |
8 | predictions = json.load(open('mmbench_dev_20230712.json'))
9 |
10 | index2predictions = {}
11 | for pred in predictions:
12 | index2predictions[pred['index']] = pred['prediction']
13 |
14 | datas = pd.read_csv("data/mmbench/mmbench_dev_20230712/mmbench_dev_20230712.tsv", sep='\t')
15 |
16 | glb_opts = ['A', 'B', 'C', 'D']
17 | index2answer = {}
18 | for idx in range(len(datas)):
19 | data = datas.iloc[idx]
20 | index2answer[data['index']] = glb_opts.index(data['answer'])
21 |
22 | identity_indexes = list(set([int(_ % 1e6) for _ in index2predictions.keys()]))
23 |
24 | correct = 0
25 | total = 0
26 | for index in identity_indexes:
27 | for _ in range(4):
28 | cycle_index = int(_ * 1e6 + index)
29 | if index2predictions.get(cycle_index, None) is not None:
30 | if index2predictions[cycle_index] == index2answer[cycle_index]:
31 | continue
32 | else:
33 | print(cycle_index)
34 | break
35 | else:
36 | correct += 1
37 | total += 1
38 |
39 | print(correct, total)
40 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/mmbench_evaluation_tricky.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import json
3 | import random
4 |
5 | '''
6 | This script provides metric calculation for mmbench_dev with the same accuarcy algo as OpenCompass server
7 | '''
8 |
9 | predictions = json.load(open('mmbench_dev_20230712.json'))
10 |
11 | index2predictions = {}
12 | for pred in predictions:
13 | index2predictions[pred['index']] = pred['prediction']
14 |
15 |
16 | from collections import Counter
17 |
18 | def most_common_elements(lst):
19 | counter = Counter(lst)
20 | max_count = max(counter.values())
21 | most_common = [element for element, count in counter.items() if count == max_count]
22 | return random.choice(most_common) # random sample from random choice
23 |
24 | datas = pd.read_csv("data/mmbench/mmbench_dev_20230712/mmbench_dev_20230712.tsv", sep='\t')
25 |
26 | glb_opts = ['A', 'B', 'C', 'D']
27 | index2answer = {}
28 | index2choices = {}
29 | index2rawanswer = {}
30 | for idx in range(len(datas)):
31 | data = datas.iloc[idx]
32 |
33 | choices = []
34 | for opt in glb_opts:
35 | if not pd.isna(data[opt]):
36 | choices.append(data[opt])
37 | index2choices[data['index']] = choices
38 |
39 | index2answer[data['index']] = glb_opts.index(data['answer'])
40 | index2rawanswer[data['index']] = choices[glb_opts.index(data['answer'])]
41 |
42 | identity_indexes = list(set([int(_ % 1e6) for _ in index2predictions.keys()]))
43 |
44 | correct = 0
45 | total = 0
46 | for index in identity_indexes:
47 | raw_preds = []
48 | raw_answer = []
49 | for _ in range(4):
50 | cycle_index = int(_ * 1e6 + index)
51 | if index2predictions.get(cycle_index, None) is not None:
52 | raw_answer = index2rawanswer[cycle_index]
53 | raw_pred = index2choices[cycle_index][index2predictions[cycle_index]]
54 | raw_preds.append(raw_pred)
55 |
56 | if len(set(raw_preds)) == 1:
57 | if raw_preds[0] == raw_answer:
58 | correct += 1
59 | else:
60 | result = most_common_elements(raw_preds)
61 | if result == raw_answer:
62 | correct += 1
63 |
64 | total += 1
65 |
66 | print(correct, total, correct / total * 100.)
67 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mmbench/mmbench_predict_to_submission.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import json
3 | import random
4 |
5 | '''
6 | This script convert the output file of our inference processor to target formation of OpenCompass evaluator server
7 | '''
8 |
9 | predictions = json.load(open('mmbench_test_20230712.json'))
10 |
11 | index2predictions = {}
12 | for pred in predictions:
13 | index2predictions[pred['index']] = pred['prediction']
14 |
15 | from collections import Counter
16 |
17 | def most_common_elements(lst):
18 | counter = Counter(lst)
19 | max_count = max(counter.values())
20 | most_common = [element for element, count in counter.items() if count == max_count]
21 | print(most_common)
22 | return random.choice(most_common)
23 | # return most_common
24 |
25 | datas = pd.read_csv("data/mmbench/mmbench_test_20230712/mmbench_test_20230712.tsv", sep='\t')
26 |
27 | datas = datas.drop('image', axis=1)
28 |
29 | glb_opts = ['A', 'B', 'C', 'D']
30 | index2choices = {}
31 | for idx in range(len(datas)):
32 | data = datas.iloc[idx]
33 |
34 | choices = []
35 | for opt in glb_opts:
36 | if not pd.isna(data[opt]):
37 | choices.append(data[opt])
38 | index2choices[data['index']] = choices
39 |
40 | identity_indexes = list(set([int(_ % 1e6) for _ in index2predictions.keys()]))
41 |
42 |
43 | processed_index2predictions = {}
44 | for index in identity_indexes:
45 | raw_preds = []
46 | for _ in range(4):
47 | cycle_index = int(_ * 1e6 + index)
48 | if index2predictions.get(cycle_index, None) is not None:
49 | raw_pred = index2choices[cycle_index][index2predictions[cycle_index]]
50 | raw_preds.append(raw_pred)
51 |
52 | if len(set(raw_preds)) == 1:
53 | pred_answer = raw_preds[0]
54 | else:
55 | pred_answer = most_common_elements(raw_preds)
56 |
57 | print(index, pred_answer)
58 | for _ in range(4):
59 | cycle_index = int(_ * 1e6 + index)
60 | if index2predictions.get(cycle_index, None) is not None:
61 | processed_index2predictions[cycle_index] = index2choices[cycle_index].index(pred_answer)
62 |
63 |
64 | predictions = []
65 | for idx in range(len(datas)):
66 | data = datas.iloc[idx]
67 | index = data['index']
68 | prediction = glb_opts[processed_index2predictions[index]]
69 | predictions.append(prediction)
70 |
71 | datas['prediction'] = predictions
72 | datas.to_excel("mmbench_test_20230712_230831_constrained.xlsx", index=False)
73 | # constrained means we force the model predict same answer when tested on a question for multiple times
74 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mme/EVAL_MME.md:
--------------------------------------------------------------------------------
1 | # MME Benchmark
2 |
3 | [MME](https://github.com/BradyFU/Awesome-Multimodal-Large-Language-Models/tree/Evaluation) is a comprehensive evaluation benchmark for multimodal large language models. It measures both perception and cognition abilities on a total of 14 subtasks, including existence, count, position, color, poster, celebrity, scene, landmark, artwork, OCR, commonsense reasoning, numerical calculation, text translation, and code reasoning.
4 |
5 | Qwen-VL-Chat achieves SOTAs on both perception and cognition evaluation.
6 |
7 | Perception Evaluation
8 |
9 | | Rank | Model | Version | Score |
10 | |:----:|:---------------:|:------------------------:|:-------:|
11 | | 1 | **[Qwen-VL-Chat](https://github.com/QwenLM/Qwen-VL/)**| **[Qwen-7B](https://github.com/QwenLM/Qwen-7B)** | **1487.57** |
12 | | 2 | Skywork-MM | Skywork-MM-13B | 1419.08 |
13 | | 3 | MMICL | FlanT5xxl | 1376.00 |
14 | | 4 | Lynx | vicuna-7b | 1373.23 |
15 | | 5 | BLIVA | FlanT5xxl | 1337.73 |
16 |
17 | Cognition Evaluation
18 |
19 | | Rank | Model | Version | Score |
20 | |:----:|:----------------:|:--------------:|:----------:|
21 | | 1 | **[Qwen-VL-Chat](https://github.com/QwenLM/Qwen-VL/)** | **[Qwen-7B](https://github.com/QwenLM/Qwen-7B)** | **360.71** |
22 | | 2 | MMICL | FlanT5xxl | 360.36 |
23 | | 3 | Skywork-MM | Skywork-MM-13B | 356.43 |
24 | | 4 | BLIVA | FlanT5xxl | 331.43 |
25 | | 5 | LRV-Instruction | LRV-7B | 328.21 |
26 |
27 | Full Metrics
28 |
29 | ```
30 | =========== Perception ===========
31 | total score: 1487.576330532213
32 |
33 | existence score: 158.33333333333331
34 | count score: 150.0
35 | position score: 128.33333333333334
36 | color score: 170.0
37 | posters score: 178.57142857142856
38 | celebrity score: 120.58823529411764
39 | scene score: 152.25
40 | landmark score: 164.0
41 | artwork score: 125.5
42 | OCR score: 140.0
43 |
44 |
45 | =========== Cognition ===========
46 | total score: 360.71428571428567
47 |
48 | commonsense_reasoning score: 130.7142857142857
49 | numerical_calculation score: 40.0
50 | text_translation score: 147.5
51 | code_reasoning score: 42.5
52 | ```
53 |
54 | ## How To Reproduce Results of MME Benchmark
55 |
56 | 1. Download MME images and eval_tool from the [MME repo](https://github.com/BradyFU/Awesome-Multimodal-Large-Language-Models/blob/Evaluation/README.md)
57 | 2. Rearrange images by executing `python get_images.py`
58 | 3. Evaluate Qwen-VL-Chat results by executing `python eval.py`
59 | 4. Calculate MME results by executing `python calculation.py --results_dir Qwen-VL-Chat`, which the calculation script comes from the MME eval_tool.
60 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mme/cognition.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/Qwen-VL/eval_mm/mme/cognition.jpg
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mme/eval.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tqdm import tqdm
3 |
4 | from transformers import AutoModelForCausalLM, AutoTokenizer
5 | from transformers.generation import GenerationConfig
6 |
7 | checkpoint = 'Qwen/Qwen-VL-Chat'
8 | tokenizer = AutoTokenizer.from_pretrained(checkpoint, trust_remote_code=True)
9 | model = AutoModelForCausalLM.from_pretrained(
10 | checkpoint, device_map='cuda', trust_remote_code=True).eval()
11 |
12 | model.generation_config = GenerationConfig.from_pretrained(checkpoint, trust_remote_code=True)
13 | model.generation_config.top_p = 0.01
14 |
15 |
16 | root = 'Your_Results'
17 | output = 'Qwen-VL-Chat'
18 | os.makedirs(output, exist_ok=True)
19 | for filename in os.listdir(root):
20 | with open(os.path.join(root, filename), 'r') as fin, open(os.path.join(output, filename), 'w') as fout:
21 | lines = fin.read().splitlines()
22 | filename = filename.replace('.txt', '')
23 | for line in tqdm(lines):
24 | img, question, gt = line.strip().split('\t')
25 | img_path = os.path.join('images', filename, img)
26 | assert os.path.exists(img_path), img_path
27 | query = f'
{img_path}\n{question}'
28 | response, _ = model.chat(tokenizer, query=query, history=None)
29 |
30 | print(img, question, gt, response, sep='\t', file=fout)
31 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mme/get_images.py:
--------------------------------------------------------------------------------
1 | import os
2 | from tqdm import tqdm
3 |
4 | os.system('rm -rf images')
5 | os.system('mkdir images')
6 |
7 | os.system('cp -r ../MME_Benchmark_release/OCR images/')
8 |
9 | os.system('mkdir images/artwork')
10 | os.system('cp ../MME_Benchmark_release/artwork/questions_answers_YN/* images/artwork/')
11 | with open('LaVIN/artwork.txt') as fin:
12 | paths = [ line.strip().split('\t', 1)[0] for line in fin ]
13 | paths = list(set(paths))
14 | for path in tqdm(paths):
15 | os.system(f'cp ../MME_Benchmark_release/artwork/images/toy_dataset/{path} images/artwork/{path}')
16 |
17 | os.system('mkdir images/celebrity')
18 | os.system('cp ../MME_Benchmark_release/celebrity/images/* images/celebrity/')
19 | os.system('cp ../MME_Benchmark_release/celebrity/questions_answers_YN/* images/celebrity/')
20 |
21 | os.system('cp -r ../MME_Benchmark_release/code_reasoning images/')
22 |
23 | os.system('cp -r ../MME_Benchmark_release/color images/')
24 |
25 | os.system('cp -r ../MME_Benchmark_release/commonsense_reasoning images/')
26 |
27 | os.system('cp -r ../MME_Benchmark_release/count images/')
28 |
29 | os.system('cp -r ../MME_Benchmark_release/existence images/')
30 |
31 | os.system('mkdir images/landmark')
32 | os.system('cp ../MME_Benchmark_release/landmark/images/* images/landmark/')
33 | os.system('cp ../MME_Benchmark_release/landmark/questions_answers_YN/* images/landmark/')
34 |
35 | os.system('cp -r ../MME_Benchmark_release/numerical_calculation images/')
36 |
37 | os.system('cp -r ../MME_Benchmark_release/position images/')
38 |
39 | os.system('mkdir images/posters')
40 | os.system('cp ../MME_Benchmark_release/posters/images/* images/posters/')
41 | os.system('cp ../MME_Benchmark_release/posters/questions_answers_YN/* images/posters/')
42 |
43 | os.system('mkdir images/scene')
44 | os.system('cp ../MME_Benchmark_release/scene/images/* images/scene/')
45 | os.system('cp ../MME_Benchmark_release/scene/questions_answers_YN/* images/scene/')
46 |
47 | os.system('cp -r ../MME_Benchmark_release/text_translation images/')
48 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/mme/perception.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/Qwen-VL/eval_mm/mme/perception.jpg
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/seed_bench/EVAL_SEED.md:
--------------------------------------------------------------------------------
1 | # Seed-Bench Evaluation
2 |
3 | [SEED-Bench](https://huggingface.co/spaces/AILab-CVC/SEED-Bench_Leaderboard) is a multimodal benchmark of 19K multiple-choice questions with accurate human annotations for evaluating Multimodal LLMs, covering 12 evaluation dimensions including both **image** and **video** understanding.
4 |
5 | Qwen-VL and Qwen-VL-Chat achieve SOTAs on this benchmark.
6 |
7 |
8 |
9 |
10 |
11 | ## How To Process Video by Qwen-VL
12 |
13 | Qwen-VL and Qwen-VL-Chat didn't train any video data or tasks during training, but they can understand some videos in a zero-shot way. For the video question-answering task, we utilize four uniformly sampled frames per video sample. These frames are treated as separate images and are stitched into the context. For example:
14 |
15 | ```
16 | {
17 | "question_id": "v0",
18 | "prompt": "
video_imgs_4/v0_0.jpg\n
video_imgs_4/v0_1.jpg\n
video_imgs_4/v0_2.jpg\n
video_imgs_4/v0_3.jpg\nQuestion: Can you identify the action taking place in the video?\nOptions: A. pretending to take something out of something\nB. pretending to take something from somewhere\nC. feigning to insert something into something\nD. simulating putting something onto something\nAnswer:"
19 | }
20 | ```
21 |
22 | The above JSON line can be used as the input by `eval_mm/seed_bench/eval.py` and output the following results:
23 | ```
24 | {"question_id": "v0", "prediction": "B"}
25 | ```
26 |
27 | Please see [eval_mm/seed_bench/eval.py](eval.py) for more inference details.
28 |
29 | ## How To Reproduce Results of Seed-Bench
30 |
31 | 1. Download all images and videos by following the [instruction](https://github.com/AILab-CVC/SEED-Bench/blob/main/DATASET.md). Then modify the root path in `eval_mm/seed_bench/trans.py` with your customized path.
32 | ```
33 | # path of SEED-Bench.json, download from https://huggingface.co/datasets/AILab-CVC/SEED-Bench/blob/main/SEED-Bench.json
34 | seed_bench_input_path = 'SEED-Bench.json'
35 | # root directory of evaluation dimension 1-9, following https://github.com/AILab-CVC/SEED-Bench/blob/main/DATASET.md
36 | cc3m_dir = "/YOUR_PATH_TO/seed_bench_image"
37 | # root directory of evaluation dimension 10
38 | dimension10_dir = "/YOUR_PATH_TO/SSV2/videos"
39 | # root directory of evaluation dimension 11
40 | dimension11_dir = "/YOUR_PATH_TO/EPIC-KITCHENS/3h91syskeag572hl6tvuovwv4d/videos/test"
41 | # root directory of evaluation dimension 12
42 | dimension12_dir = "/YOUR_PATH_TO/BreakfastII_15fps_qvga_sync"
43 | ```
44 |
45 | 2. Generate input files of Qwen-VL with the JSON formatting.
46 | ```
47 | cd eval_mm/seed_bench/
48 | python trans.py
49 | ```
50 | This script will output two JSONL files and one directory. `image_input.jsonl` is the input file of image evaluation and `video_input_4.jsonl` is the input file of video evaluation by 4 frames. The directory `video_imgs_4` contains all 4-framed images extracted from videos. We provide our [image_input.jsonl](http://ofasys-wlcb.oss-cn-wulanchabu.aliyuncs.com/Qwen-VL/evaluation/seed_bench/image_input.jsonl) and [video_input_4.jsonl](http://ofasys-wlcb.oss-cn-wulanchabu.aliyuncs.com/Qwen-VL/evaluation/seed_bench/video_input_4.jsonl) here for reference.
51 |
52 | 3. Produce the results of Seed-Bench.
53 | ```
54 | # The number of available GPUs
55 | export NPROC_PER_NODE=8
56 |
57 | # Produce the Qwen-VL-Chat results of image understanding
58 | python -m torch.distributed.launch --use-env \
59 | --nproc_per_node ${NPROC_PER_NODE:-8} \
60 | --nnodes ${WORLD_SIZE:-1} \
61 | --node_rank ${RANK:-0} \
62 | --master_addr ${MASTER_ADDR:-127.0.0.1} \
63 | --master_port ${MASTER_PORT:-12345} \
64 | eval.py \
65 | --checkpoint Qwen/Qwen-VL-Chat \
66 | --dataset image_input.jsonl \
67 | --batch-size 4 \
68 | --num-workers 2
69 | # Collect the result files
70 | cat result_?.jsonl >results_chat_img.jsonl
71 | rm result_?.jsonl
72 |
73 | # Produce the results of video understanding
74 | python -m torch.distributed.launch --use-env \
75 | --nproc_per_node ${NPROC_PER_NODE:-8} \
76 | --nnodes ${WORLD_SIZE:-1} \
77 | --node_rank ${RANK:-0} \
78 | --master_addr ${MASTER_ADDR:-127.0.0.1} \
79 | --master_port ${MASTER_PORT:-12345} \
80 | eval.py \
81 | --checkpoint Qwen/Qwen-VL-Chat \
82 | --dataset video_input_4.jsonl \
83 | --batch-size 2 \
84 | --num-workers 1
85 | # Collect the result files
86 | cat result_?.jsonl >results_chat_vid.jsonl
87 | rm result_?.jsonl
88 |
89 | # The file `results_chat.jsonl` can be submitted to the leaderboard
90 | cat results_chat_img.jsonl results_chat_vid.jsonl >results_chat.jsonl
91 | ```
92 |
93 | You can reproduce the Seed-Bench results of Qwen-VL by replacing `Qwen/Qwen-VL-Chat` with `Qwen/Qwen-VL` on the above script.
94 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/seed_bench/eval.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import itertools
3 | import json
4 | import os
5 | from functools import partial
6 |
7 | import torch
8 | from tqdm import tqdm
9 | from transformers import AutoModelForCausalLM, AutoTokenizer
10 | from transformers.generation import GenerationConfig
11 |
12 |
13 | def collate_fn(batches, pad_token_id):
14 |
15 | input_tokens = [_['input_tokens'] for _ in batches]
16 | target_lengths = [_['target_lengths'] for _ in batches]
17 | answers = [_['answer'] for _ in batches]
18 | question_id = [_['question_id'] for _ in batches]
19 |
20 | chunk_sizes = [len(_) for _ in input_tokens]
21 |
22 | input_tokens = [_ for _ in itertools.chain.from_iterable(input_tokens)]
23 |
24 | max_lengths = max([len(_) for _ in input_tokens])
25 | input_tokens = [[pad_token_id] * (max_lengths - len(_)) + _
26 | for _ in input_tokens]
27 | input_tokens = torch.LongTensor(input_tokens)
28 |
29 | attention_mask = 1 - input_tokens.eq(pad_token_id).float()
30 |
31 | return input_tokens, attention_mask, target_lengths, answers, chunk_sizes, question_id
32 |
33 |
34 | class MultipleChoiceDataste(torch.utils.data.Dataset):
35 |
36 | def __init__(self, test, tokenizer):
37 | self.datas = []
38 | with open(test) as fin:
39 | for line in tqdm(fin):
40 | self.datas.append(json.loads(line.strip()))
41 | self.tokenizer = tokenizer
42 |
43 | def __len__(self):
44 | return len(self.datas)
45 |
46 | def __getitem__(self, idx):
47 |
48 | data = self.datas[idx]
49 | prompt = data['prompt']
50 |
51 | prompt_tokens = self.tokenizer(prompt).input_ids
52 | target_tokens = [
53 | self.tokenizer(' ' + _).input_ids
54 | for _ in ['A', 'B', 'C', 'D']
55 | ]
56 |
57 | return {
58 | 'input_tokens': [prompt_tokens + _ for _ in target_tokens],
59 | 'target_lengths': [len(_) for _ in target_tokens],
60 | 'answer': data['answer'],
61 | 'question_id': data['question_id'],
62 | }
63 |
64 |
65 | class InferenceSampler(torch.utils.data.sampler.Sampler):
66 |
67 | def __init__(self, size):
68 | self._size = int(size)
69 | assert size > 0
70 | self._rank = torch.distributed.get_rank()
71 | self._world_size = torch.distributed.get_world_size()
72 | self._local_indices = self._get_local_indices(size, self._world_size,
73 | self._rank)
74 |
75 | @staticmethod
76 | def _get_local_indices(total_size, world_size, rank):
77 | shard_size = total_size // world_size
78 | left = total_size % world_size
79 | shard_sizes = [shard_size + int(r < left) for r in range(world_size)]
80 |
81 | begin = sum(shard_sizes[:rank])
82 | end = min(sum(shard_sizes[:rank + 1]), total_size)
83 | return range(begin, end)
84 |
85 | def __iter__(self):
86 | yield from self._local_indices
87 |
88 | def __len__(self):
89 | return len(self._local_indices)
90 |
91 |
92 | if __name__ == '__main__':
93 |
94 | parser = argparse.ArgumentParser()
95 | parser.add_argument('--checkpoint', type=str, default='')
96 | parser.add_argument('--dataset', type=str, default='')
97 | parser.add_argument('--batch-size', type=int, default=1)
98 | parser.add_argument('--num-workers', type=int, default=1)
99 | args = parser.parse_args()
100 |
101 | torch.distributed.init_process_group(
102 | backend='nccl',
103 | world_size=int(os.getenv('WORLD_SIZE', '1')),
104 | rank=int(os.getenv('RANK', '0')),
105 | )
106 |
107 | torch.cuda.set_device(int(os.getenv('LOCAL_RANK', 0)))
108 |
109 | model = AutoModelForCausalLM.from_pretrained(
110 | args.checkpoint, device_map='cuda', trust_remote_code=True).eval()
111 |
112 | tokenizer = AutoTokenizer.from_pretrained(args.checkpoint,
113 | trust_remote_code=True)
114 | model.generation_config = GenerationConfig.from_pretrained(args.checkpoint, trust_remote_code=True)
115 | model.generation_config.top_p = 0.01
116 |
117 | dataset = MultipleChoiceDataste(test=args.dataset, tokenizer=tokenizer)
118 | dataloader = torch.utils.data.DataLoader(
119 | dataset=dataset,
120 | # sampler=InferenceSampler(1000),
121 | sampler=InferenceSampler(len(dataset)),
122 | batch_size=args.batch_size,
123 | num_workers=args.num_workers,
124 | pin_memory=True,
125 | drop_last=False,
126 | collate_fn=partial(collate_fn, pad_token_id=tokenizer.eod_id),
127 | )
128 |
129 | results = []
130 | fout = open('result_{}.jsonl'.format(torch.distributed.get_rank()), 'w')
131 | with torch.no_grad():
132 | for _, (input_tokens, attention_mask, target_lengths, answers,
133 | chunk_sizes, question_ids) in tqdm(enumerate(dataloader)):
134 |
135 | outputs = model(
136 | input_ids=input_tokens[:, :-1].cuda(),
137 | attention_mask=attention_mask[:, :-1].cuda(),
138 | return_dict=True,
139 | )
140 | losses = torch.nn.functional.cross_entropy(outputs.logits.permute(
141 | 0, 2, 1),
142 | input_tokens[:,
143 | 1:].cuda(),
144 | reduction='none')
145 |
146 | losses = losses.split(chunk_sizes, dim=0)
147 |
148 | for loss, target_length, answer, question_id in zip(losses, target_lengths,
149 | answers, question_ids):
150 |
151 | target_loss = loss.mean(-1)
152 | for _ in range(len(target_length)):
153 | target_loss[_] = loss[_, -target_length[_]:].mean()
154 | pred = target_loss.argmin().item()
155 | pred = chr(pred + 65)
156 | if pred == answer:
157 | results.append(1)
158 | else:
159 | results.append(0)
160 | answer_record = {
161 | 'question_id': question_id,
162 | 'prediction': pred
163 | }
164 | print(json.dumps(answer_record), file=fout)
165 | fout.close()
166 |
167 | torch.distributed.barrier()
168 |
169 | world_size = torch.distributed.get_world_size()
170 | merged_results = [None for _ in range(world_size)]
171 | torch.distributed.all_gather_object(merged_results, results)
172 |
173 | merged_results = [_ for _ in itertools.chain.from_iterable(merged_results)]
174 |
175 | if torch.distributed.get_rank() == 0:
176 | print(f"Evaluating {args.dataset} ...")
177 | print(f'Acc@1: {sum(merged_results) / len(merged_results)}')
178 |
179 | torch.distributed.barrier()
180 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/seed_bench/leaderboard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/Qwen-VL/eval_mm/seed_bench/leaderboard.jpg
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/seed_bench/trans.py:
--------------------------------------------------------------------------------
1 | import os
2 | import av
3 | import json
4 |
5 | import torch
6 | import numpy as np
7 | from PIL import Image
8 | from tqdm import tqdm
9 | from decord import VideoReader, cpu
10 |
11 | # path of SEED-Bench.json, download from https://huggingface.co/datasets/AILab-CVC/SEED-Bench/blob/main/SEED-Bench.json
12 | seed_bench_input_path = 'SEED-Bench.json'
13 | # root directory of evaluation dimension 1-9, following https://github.com/AILab-CVC/SEED-Bench/blob/main/DATASET.md
14 | cc3m_dir = "/YOUR_PATH_TO/seed_bench_image"
15 | # root directory of evaluation dimension 10
16 | dimension10_dir = "/YOUR_PATH_TO/SSV2/videos"
17 | # root directory of evaluation dimension 11
18 | dimension11_dir = "/YOUR_PATH_TO/EPIC-KITCHENS/3h91syskeag572hl6tvuovwv4d/videos/test"
19 | # root directory of evaluation dimension 12
20 | dimension12_dir = "/YOUR_PATH_TO/BreakfastII_15fps_qvga_sync"
21 |
22 | def is_integer_string(s):
23 | try:
24 | int(s)
25 | return True
26 | except ValueError:
27 | return False
28 |
29 | def filter_questions(data, task='all'):
30 | if task == "image":
31 | return [q for q in data if 1 <= q["question_type_id"] <= 9]
32 | elif task == "video":
33 | return [q for q in data if 10 <= q["question_type_id"] <= 12]
34 | elif task == "all":
35 | return data
36 | elif is_integer_string(task):
37 | return [q for q in data if q["question_type_id"] == int(task)]
38 | else:
39 | raise ValueError(f"Invalid task: {task}")
40 |
41 | def get_index(num_frames, num_segments):
42 | if num_segments > num_frames:
43 | offsets = np.array([
44 | idx for idx in range(num_frames)
45 | ])
46 | else:
47 | # uniform sampling
48 | seg_size = float(num_frames - 1) / num_segments
49 | start = int(seg_size / 2)
50 | offsets = np.array([
51 | start + int(np.round(seg_size * idx)) for idx in range(num_segments)
52 | ])
53 | return offsets
54 |
55 | with open(seed_bench_input_path) as fin:
56 | qa_anno = json.load(fin)['questions']
57 |
58 | fout = open('image_input.jsonl', 'w')
59 | i_anno = filter_questions(qa_anno, 'image')
60 | for qa_item in tqdm(i_anno):
61 | data_path = cc3m_dir + qa_item['data_id']
62 | choices = [qa_item['choice_a'], qa_item['choice_b'], qa_item['choice_c'], qa_item['choice_d']]
63 | choice_list = []
64 | for i, c in enumerate(choices):
65 | choice_list.append('{}. {}'.format(chr(i + 65), c))
66 | choice_txt = '\n'.join(choice_list)
67 | prompt = '
{}\nQuestion: {}\nOptions: {}\nAnswer:'.format(
68 | data_path, qa_item['question'], choice_txt)
69 | print(json.dumps({
70 | 'question_id': qa_item['question_id'],
71 | 'prompt': prompt,
72 | 'answer': qa_item['answer'],
73 | }), file=fout)
74 | fout.close()
75 |
76 | n_frames = 8
77 | os.system('rm -rf video_input_' + str(n_frames))
78 | os.makedirs('video_imgs_' + str(n_frames), exist_ok=True)
79 |
80 | fout = open('video_input_{}.jsonl'.format(n_frames), 'w')
81 | v_anno = filter_questions(qa_anno, 'video')
82 | for qa_item in tqdm(v_anno):
83 | if qa_item['question_type_id'] == 12:
84 | data_path = dimension12_dir + qa_item['data_id']
85 | elif qa_item['question_type_id'] == 11:
86 | data_path = dimension11_dir + qa_item['data_id'].split('/')[-1]
87 | elif qa_item['question_type_id'] == 10:
88 | data_path = dimension10_dir + qa_item['data_id']
89 | else:
90 | assert False, str(qa_item)
91 | print(data_path)
92 |
93 | use_pyav = False
94 | if 'segment' in qa_item.keys():
95 | segment = qa_item['segment']
96 | if isinstance(segment[0], int):
97 | # using pyav for decoding videos in evaluation dimension 12
98 | use_pyav = True
99 | start, end = segment[0], segment[1]
100 | else:
101 | start = 0.0
102 | end = 0.0
103 |
104 | if use_pyav:
105 | # using pyav for decoding videos in evaluation dimension 12
106 | reader = av.open(data_path)
107 | frames = [torch.from_numpy(f.to_rgb().to_ndarray()) for f in reader.decode(video=0)]
108 | video_len = len(frames)
109 | start_frame, end_frame = start, end
110 | end_frame = min(end_frame, video_len)
111 | offset = get_index(end_frame - start_frame, n_frames)
112 | frame_indices = offset + start_frame
113 | images = torch.stack([frames[idx] for idx in frame_indices]).numpy()
114 | else:
115 | # using decord for decoding videos in evaluation dimension 10-11
116 | try:
117 | vr = VideoReader(data_path, num_threads=1, ctx=cpu(0))
118 | video_len = len(vr)
119 | fps = vr.get_avg_fps()
120 | if 'segment' in qa_item.keys():
121 | # obtain start and end frame for the video segment in evaluation dimension 11
122 | start_frame = int(min(max(start * fps, 0), video_len - 1))
123 | end_frame = int(min(max(end * fps, 0), video_len - 1))
124 | tot_frames = int(end_frame - start_frame)
125 | offset = get_index(tot_frames, n_frames)
126 | frame_indices = offset + start_frame
127 | else:
128 | # sample frames of the video in evaluation dimension 10
129 | frame_indices = get_index(video_len - 1, n_frames)
130 | vr.seek(0)
131 | images = vr.get_batch(frame_indices).asnumpy()
132 | except Exception as e:
133 | print(json.dumps({
134 | 'question_id': qa_item['question_id'],
135 | 'prompt': "Error" + str(e),
136 | 'answer': qa_item['answer'],
137 | }), file=fout)
138 | continue
139 |
140 | prompt = ''
141 | for i in range(images.shape[0]):
142 | data = Image.fromarray(images[i])
143 | img_path = 'video_imgs_{}/{}_{}.jpg'.format(n_frames, qa_item['question_id'], i)
144 | data.save(img_path)
145 | prompt += '
' + img_path + '\n'
146 |
147 | choices = [qa_item['choice_a'], qa_item['choice_b'], qa_item['choice_c'], qa_item['choice_d']]
148 | choice_list = []
149 | for i, c in enumerate(choices):
150 | choice_list.append('{}. {}'.format(chr(i + 65), c))
151 | choice_txt = '\n'.join(choice_list)
152 |
153 | prompt += 'Question: {}\nOptions: {}\nAnswer:'.format(qa_item['question'], choice_txt)
154 | print(json.dumps({
155 | 'question_id': qa_item['question_id'],
156 | 'prompt': prompt,
157 | 'answer': qa_item['answer'],
158 | }), file=fout)
159 | fout.close()
160 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/eval_mm/vqa.py:
--------------------------------------------------------------------------------
1 | """Copyright (c) 2022, salesforce.com, inc.
2 |
3 | All rights reserved.
4 | SPDX-License-Identifier: BSD-3-Clause
5 | For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
6 | """
7 |
8 | __author__ = 'aagrawal'
9 | __version__ = '0.9'
10 |
11 | # Interface for accessing the VQA dataset.
12 |
13 | # This code is based on the code written by Tsung-Yi Lin for MSCOCO Python API available at the following link:
14 | # (https://github.com/pdollar/coco/blob/master/PythonAPI/pycocotools/coco.py).
15 |
16 | # The following functions are defined:
17 | # VQA - VQA class that loads VQA annotation file and prepares data structures.
18 | # getQuesIds - Get question ids that satisfy given filter conditions.
19 | # getImgIds - Get image ids that satisfy given filter conditions.
20 | # loadQA - Load questions and answers with the specified question ids.
21 | # showQA - Display the specified questions and answers.
22 | # loadRes - Load result file and create result object.
23 |
24 | # Help on each function can be accessed by: "help(COCO.function)"
25 |
26 | import copy
27 | import datetime
28 | import json
29 |
30 |
31 | class VQA:
32 |
33 | def __init__(self, annotation_file=None, question_file=None):
34 | """Constructor of VQA helper class for reading and visualizing
35 | questions and answers.
36 |
37 | :param annotation_file (str): location of VQA annotation file
38 | :return:
39 | """
40 | # load dataset
41 | self.dataset = {}
42 | self.questions = {}
43 | self.qa = {}
44 | self.qqa = {}
45 | self.imgToQA = {}
46 | if not annotation_file == None and not question_file == None:
47 | print('loading VQA annotations and questions into memory...')
48 | time_t = datetime.datetime.utcnow()
49 | dataset = json.load(open(annotation_file, 'r'))
50 | questions = json.load(open(question_file, 'r'))
51 | self.dataset = dataset
52 | self.questions = questions
53 | self.createIndex()
54 |
55 | def createIndex(self):
56 | # create index
57 | print('creating index...')
58 | imgToQA = {ann['image_id']: [] for ann in self.dataset['annotations']}
59 | qa = {ann['question_id']: [] for ann in self.dataset['annotations']}
60 | qqa = {ann['question_id']: [] for ann in self.dataset['annotations']}
61 | for ann in self.dataset['annotations']:
62 | imgToQA[ann['image_id']] += [ann]
63 | qa[ann['question_id']] = ann
64 | for ques in self.questions['questions']:
65 | qqa[ques['question_id']] = ques
66 | print('index created!')
67 |
68 | # create class members
69 | self.qa = qa
70 | self.qqa = qqa
71 | self.imgToQA = imgToQA
72 |
73 | def info(self):
74 | """Print information about the VQA annotation file.
75 |
76 | :return:
77 | """
78 | for key, value in self.datset['info'].items():
79 | print('%s: %s' % (key, value))
80 |
81 | def getQuesIds(self, imgIds=[], quesTypes=[], ansTypes=[]):
82 | """Get question ids that satisfy given filter conditions. default skips
83 | that filter.
84 |
85 | :param imgIds (int array) : get question ids for given imgs
86 | quesTypes (str array) : get question ids for given question types
87 | ansTypes (str array) : get question ids for given answer types
88 | :return: ids (int array) : integer array of question ids
89 | """
90 | imgIds = imgIds if type(imgIds) == list else [imgIds]
91 | quesTypes = quesTypes if type(quesTypes) == list else [quesTypes]
92 | ansTypes = ansTypes if type(ansTypes) == list else [ansTypes]
93 |
94 | if len(imgIds) == len(quesTypes) == len(ansTypes) == 0:
95 | anns = self.dataset['annotations']
96 | else:
97 | if not len(imgIds) == 0:
98 | anns = sum(
99 | [
100 | self.imgToQA[imgId]
101 | for imgId in imgIds if imgId in self.imgToQA
102 | ],
103 | [],
104 | )
105 | else:
106 | anns = self.dataset['annotations']
107 | anns = (anns if len(quesTypes) == 0 else
108 | [ann for ann in anns if ann['question_type'] in quesTypes])
109 | anns = (anns if len(ansTypes) == 0 else
110 | [ann for ann in anns if ann['answer_type'] in ansTypes])
111 | ids = [ann['question_id'] for ann in anns]
112 | return ids
113 |
114 | def getImgIds(self, quesIds=[], quesTypes=[], ansTypes=[]):
115 | """Get image ids that satisfy given filter conditions. default skips
116 | that filter.
117 |
118 | :param quesIds (int array) : get image ids for given question ids
119 | quesTypes (str array) : get image ids for given question types
120 | ansTypes (str array) : get image ids for given answer types
121 | :return: ids (int array) : integer array of image ids
122 | """
123 | quesIds = quesIds if type(quesIds) == list else [quesIds]
124 | quesTypes = quesTypes if type(quesTypes) == list else [quesTypes]
125 | ansTypes = ansTypes if type(ansTypes) == list else [ansTypes]
126 |
127 | if len(quesIds) == len(quesTypes) == len(ansTypes) == 0:
128 | anns = self.dataset['annotations']
129 | else:
130 | if not len(quesIds) == 0:
131 | anns = sum([
132 | self.qa[quesId] for quesId in quesIds if quesId in self.qa
133 | ], [])
134 | else:
135 | anns = self.dataset['annotations']
136 | anns = (anns if len(quesTypes) == 0 else
137 | [ann for ann in anns if ann['question_type'] in quesTypes])
138 | anns = (anns if len(ansTypes) == 0 else
139 | [ann for ann in anns if ann['answer_type'] in ansTypes])
140 | ids = [ann['image_id'] for ann in anns]
141 | return ids
142 |
143 | def loadQA(self, ids=[]):
144 | """Load questions and answers with the specified question ids.
145 |
146 | :param ids (int array) : integer ids specifying question ids
147 | :return: qa (object array) : loaded qa objects
148 | """
149 | if type(ids) == list:
150 | return [self.qa[id] for id in ids]
151 | elif type(ids) == int:
152 | return [self.qa[ids]]
153 |
154 | def showQA(self, anns):
155 | """Display the specified annotations.
156 |
157 | :param anns (array of object): annotations to display
158 | :return: None
159 | """
160 | if len(anns) == 0:
161 | return 0
162 | for ann in anns:
163 | quesId = ann['question_id']
164 | print('Question: %s' % (self.qqa[quesId]['question']))
165 | for ans in ann['answers']:
166 | print('Answer %d: %s' % (ans['answer_id'], ans['answer']))
167 |
168 | def loadRes(self, resFile, quesFile):
169 | """Load result file and return a result object.
170 |
171 | :param resFile (str) : file name of result file
172 | :return: res (obj) : result api object
173 | """
174 | res = VQA()
175 | res.questions = json.load(open(quesFile))
176 | res.dataset['info'] = copy.deepcopy(self.questions['info'])
177 | res.dataset['task_type'] = copy.deepcopy(self.questions['task_type'])
178 | res.dataset['data_type'] = copy.deepcopy(self.questions['data_type'])
179 | res.dataset['data_subtype'] = copy.deepcopy(
180 | self.questions['data_subtype'])
181 | res.dataset['license'] = copy.deepcopy(self.questions['license'])
182 |
183 | print('Loading and preparing results... ')
184 | time_t = datetime.datetime.utcnow()
185 | anns = json.load(open(resFile))
186 | assert type(anns) == list, 'results is not an array of objects'
187 | annsQuesIds = [ann['question_id'] for ann in anns]
188 | assert set(annsQuesIds) == set(
189 | self.getQuesIds()
190 | ), 'Results do not correspond to current VQA set. Either the results do not have predictions for all question ids in annotation file or there is atleast one question id that does not belong to the question ids in the annotation file.'
191 | for ann in anns:
192 | quesId = ann['question_id']
193 | if res.dataset['task_type'] == 'Multiple Choice':
194 | assert (
195 | ann['answer'] in self.qqa[quesId]['multiple_choices']
196 | ), 'predicted answer is not one of the multiple choices'
197 | qaAnn = self.qa[quesId]
198 | ann['image_id'] = qaAnn['image_id']
199 | ann['question_type'] = qaAnn['question_type']
200 | ann['answer_type'] = qaAnn['answer_type']
201 | print('DONE (t=%0.2fs)' %
202 | ((datetime.datetime.utcnow() - time_t).total_seconds()))
203 |
204 | res.dataset['annotations'] = anns
205 | res.createIndex()
206 | return res
207 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/ds_config_zero2.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "optimizer": {
14 | "type": "AdamW",
15 | "params": {
16 | "lr": "auto",
17 | "betas": "auto",
18 | "eps": "auto",
19 | "weight_decay": "auto"
20 | }
21 | },
22 |
23 | "scheduler": {
24 | "type": "WarmupLR",
25 | "params": {
26 | "warmup_min_lr": "auto",
27 | "warmup_max_lr": "auto",
28 | "warmup_num_steps": "auto"
29 | }
30 | },
31 |
32 | "zero_optimization": {
33 | "stage": 2,
34 | "offload_optimizer": {
35 | "device": "none",
36 | "pin_memory": true
37 | },
38 | "allgather_partitions": true,
39 | "allgather_bucket_size": 2e8,
40 | "overlap_comm": true,
41 | "reduce_scatter": true,
42 | "reduce_bucket_size": 2e8,
43 | "contiguous_gradients": true
44 | },
45 |
46 | "gradient_accumulation_steps": "auto",
47 | "gradient_clipping": "auto",
48 | "steps_per_print": 100,
49 | "train_batch_size": "auto",
50 | "train_micro_batch_size_per_gpu": "auto",
51 | "wall_clock_breakdown": false
52 | }
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/ds_config_zero3.json:
--------------------------------------------------------------------------------
1 | {
2 | "fp16": {
3 | "enabled": "auto",
4 | "loss_scale": 0,
5 | "loss_scale_window": 1000,
6 | "initial_scale_power": 16,
7 | "hysteresis": 2,
8 | "min_loss_scale": 1
9 | },
10 | "bf16": {
11 | "enabled": "auto"
12 | },
13 | "optimizer": {
14 | "type": "AdamW",
15 | "params": {
16 | "lr": "auto",
17 | "betas": "auto",
18 | "eps": "auto",
19 | "weight_decay": "auto"
20 | }
21 | },
22 |
23 | "scheduler": {
24 | "type": "WarmupLR",
25 | "params": {
26 | "warmup_min_lr": "auto",
27 | "warmup_max_lr": "auto",
28 | "warmup_num_steps": "auto"
29 | }
30 | },
31 |
32 | "zero_optimization": {
33 | "stage": 3,
34 | "offload_optimizer": {
35 | "device": "none",
36 | "pin_memory": true
37 | },
38 | "offload_param": {
39 | "device": "none",
40 | "pin_memory": true
41 | },
42 | "overlap_comm": true,
43 | "contiguous_gradients": true,
44 | "sub_group_size": 1e9,
45 | "reduce_bucket_size": "auto",
46 | "stage3_prefetch_bucket_size": "auto",
47 | "stage3_param_persistence_threshold": "auto",
48 | "stage3_max_live_parameters": 1e9,
49 | "stage3_max_reuse_distance": 1e9,
50 | "stage3_gather_16bit_weights_on_model_save": true
51 | },
52 |
53 | "gradient_accumulation_steps": "auto",
54 | "gradient_clipping": "auto",
55 | "steps_per_print": 100,
56 | "train_batch_size": "auto",
57 | "train_micro_batch_size_per_gpu": "auto",
58 | "wall_clock_breakdown": false
59 | }
60 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/finetune_ds.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_DEVICE_MAX_CONNECTIONS=1
3 | DIR=`pwd`
4 |
5 | GPUS_PER_NODE=8
6 | NNODES=1
7 | NODE_RANK=0
8 | MASTER_ADDR=localhost
9 | MASTER_PORT=6001
10 |
11 | MODEL="Qwen/Qwen-VL-Chat" #"Qwen/Qwen-VL-Chat"/"Qwen/Qwen-VL" # Set the path if you do not want to load from huggingface directly
12 | # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
13 | # See the section for finetuning in README for more information.
14 | DATA="path_to_data"
15 |
16 | DISTRIBUTED_ARGS="
17 | --nproc_per_node $GPUS_PER_NODE \
18 | --nnodes $NNODES \
19 | --node_rank $NODE_RANK \
20 | --master_addr $MASTER_ADDR \
21 | --master_port $MASTER_PORT
22 | "
23 |
24 | torchrun $DISTRIBUTED_ARGS finetune.py \
25 | --model_name_or_path $MODEL \
26 | --data_path $DATA \
27 | --bf16 True \
28 | --fix_vit True \
29 | --output_dir output_qwen \
30 | --num_train_epochs 5 \
31 | --per_device_train_batch_size 1 \
32 | --per_device_eval_batch_size 1 \
33 | --gradient_accumulation_steps 16 \
34 | --evaluation_strategy "no" \
35 | --save_strategy "steps" \
36 | --save_steps 1000 \
37 | --save_total_limit 10 \
38 | --learning_rate 1e-5 \
39 | --weight_decay 0.1 \
40 | --adam_beta2 0.95 \
41 | --warmup_ratio 0.01 \
42 | --lr_scheduler_type "cosine" \
43 | --logging_steps 1 \
44 | --report_to "none" \
45 | --model_max_length 2048 \
46 | --gradient_checkpointing True \
47 | --lazy_preprocess True \
48 | --deepspeed finetune/ds_config_zero2.json
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/finetune_lora_ds.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_DEVICE_MAX_CONNECTIONS=1
3 | DIR=`pwd`
4 |
5 | GPUS_PER_NODE=8
6 | NNODES=1
7 | NODE_RANK=0
8 | MASTER_ADDR=localhost
9 | MASTER_PORT=6001
10 |
11 | MODEL="Qwen/Qwen-VL-Chat" #"Qwen/Qwen-VL-Chat"/"Qwen/Qwen-VL" Set the path if you do not want to load from huggingface directly
12 | # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
13 | # See the section for finetuning in README for more information.
14 | DATA="/dataset/VQA/Crop_Disease_train_qwenvl.json"
15 |
16 | DISTRIBUTED_ARGS="
17 | --nproc_per_node $GPUS_PER_NODE \
18 | --nnodes $NNODES \
19 | --node_rank $NODE_RANK \
20 | --master_addr $MASTER_ADDR \
21 | --master_port $MASTER_PORT
22 | "
23 |
24 | torchrun $DISTRIBUTED_ARGS finetune.py \
25 | --model_name_or_path $MODEL \
26 | --data_path $DATA \
27 | --bf16 True \
28 | --fix_vit True \
29 | --output_dir output_crop_disease \
30 | --num_train_epochs 5 \
31 | --per_device_train_batch_size 2 \
32 | --per_device_eval_batch_size 1 \
33 | --gradient_accumulation_steps 8 \
34 | --evaluation_strategy "no" \
35 | --save_strategy "steps" \
36 | --save_steps 1000 \
37 | --save_total_limit 10 \
38 | --learning_rate 1e-5 \
39 | --weight_decay 0.1 \
40 | --adam_beta2 0.95 \
41 | --warmup_ratio 0.01 \
42 | --lr_scheduler_type "cosine" \
43 | --logging_steps 1 \
44 | --report_to "none" \
45 | --model_max_length 2048 \
46 | --lazy_preprocess True \
47 | --use_lora \
48 | --gradient_checkpointing \
49 | --deepspeed finetune/ds_config_zero2.json
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/finetune_lora_single_gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_DEVICE_MAX_CONNECTIONS=1
3 | DIR=`pwd`
4 |
5 |
6 | MODEL="Qwen/Qwen-VL-Chat" #"Qwen/Qwen-VL-Chat"/"Qwen/Qwen-VL" # Set the path if you do not want to load from huggingface directly
7 | # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
8 | # See the section for finetuning in README for more information.
9 | DATA="path_to_data"
10 |
11 | export CUDA_VISIBLE_DEVICES=0
12 |
13 | python finetune.py \
14 | --model_name_or_path $MODEL \
15 | --data_path $DATA \
16 | --bf16 True \
17 | --fix_vit True \
18 | --output_dir output_qwen \
19 | --num_train_epochs 5 \
20 | --per_device_train_batch_size 1 \
21 | --per_device_eval_batch_size 1 \
22 | --gradient_accumulation_steps 8 \
23 | --evaluation_strategy "no" \
24 | --save_strategy "steps" \
25 | --save_steps 1000 \
26 | --save_total_limit 10 \
27 | --learning_rate 1e-5 \
28 | --weight_decay 0.1 \
29 | --adam_beta2 0.95 \
30 | --warmup_ratio 0.01 \
31 | --lr_scheduler_type "cosine" \
32 | --logging_steps 1 \
33 | --report_to "none" \
34 | --model_max_length 2048 \
35 | --lazy_preprocess True \
36 | --gradient_checkpointing \
37 | --use_lora
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/finetune_qlora_ds.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_DEVICE_MAX_CONNECTIONS=1
3 | DIR=`pwd`
4 |
5 | GPUS_PER_NODE=8
6 | NNODES=1
7 | NODE_RANK=0
8 | MASTER_ADDR=localhost
9 | MASTER_PORT=6001
10 |
11 | MODEL="Qwen/Qwen-VL-Chat-Int4" # Qwen/Qwen-VL-Chat-Int4 Set the path if you do not want to load from huggingface directly
12 | # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
13 | # See the section for finetuning in README for more information.
14 | DATA="path_to_data"
15 |
16 |
17 | DISTRIBUTED_ARGS="
18 | --nproc_per_node $GPUS_PER_NODE \
19 | --nnodes $NNODES \
20 | --node_rank $NODE_RANK \
21 | --master_addr $MASTER_ADDR \
22 | --master_port $MASTER_PORT
23 | "
24 |
25 | # Remember to use --fp16 instead of --bf16 due to autogptq
26 | torchrun $DISTRIBUTED_ARGS finetune.py \
27 | --model_name_or_path $MODEL \
28 | --data_path $DATA \
29 | --fp16 True \
30 | --fix_vit True \
31 | --output_dir output_qwen \
32 | --num_train_epochs 5 \
33 | --per_device_train_batch_size 2 \
34 | --per_device_eval_batch_size 1 \
35 | --gradient_accumulation_steps 8 \
36 | --evaluation_strategy "no" \
37 | --save_strategy "steps" \
38 | --save_steps 1000 \
39 | --save_total_limit 10 \
40 | --learning_rate 1e-5 \
41 | --weight_decay 0.1 \
42 | --adam_beta2 0.95 \
43 | --warmup_ratio 0.01 \
44 | --lr_scheduler_type "cosine" \
45 | --logging_steps 1 \
46 | --report_to "none" \
47 | --model_max_length 2048 \
48 | --lazy_preprocess True \
49 | --use_lora \
50 | --q_lora \
51 | --gradient_checkpointing \
52 | --deepspeed finetune/ds_config_zero2.json
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/finetune/finetune_qlora_single_gpu.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | export CUDA_DEVICE_MAX_CONNECTIONS=1
3 | DIR=`pwd`
4 |
5 | MODEL="Qwen/Qwen-VL-Chat-Int4" # Qwen/Qwen-VL-Chat-Int4 Set the path if you do not want to load from huggingface directly
6 | # ATTENTION: specify the path to your training data, which should be a json file consisting of a list of conversations.
7 | # See the section for finetuning in README for more information.
8 | DATA="path_to_data"
9 |
10 | export CUDA_VISIBLE_DEVICES=0
11 |
12 | # Remember to use --fp16 instead of --bf16 due to autogptq
13 | python finetune.py \
14 | --model_name_or_path $MODEL \
15 | --data_path $DATA \
16 | --fp16 True \
17 | --fix_vit True \
18 | --output_dir output_qwen \
19 | --num_train_epochs 5 \
20 | --per_device_train_batch_size 1 \
21 | --per_device_eval_batch_size 1 \
22 | --gradient_accumulation_steps 8 \
23 | --evaluation_strategy "no" \
24 | --save_strategy "steps" \
25 | --save_steps 1000 \
26 | --save_total_limit 10 \
27 | --learning_rate 1e-5 \
28 | --weight_decay 0.1 \
29 | --adam_beta2 0.95 \
30 | --warmup_ratio 0.01 \
31 | --lr_scheduler_type "cosine" \
32 | --logging_steps 1 \
33 | --report_to "none" \
34 | --model_max_length 2048 \
35 | --lazy_preprocess True \
36 | --gradient_checkpointing \
37 | --use_lora \
38 | --q_lora \
39 | --deepspeed finetune/ds_config_zero2.json
40 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/qmerge.py:
--------------------------------------------------------------------------------
1 | # Modified from https://gist.githubusercontent.com/ChrisHayduk/1a53463331f52dca205e55982baf9930/raw/438ab25f05a8e1dd3c384b81fad38c6101c98be9/merge_qlora_with_quantized_model.py
2 | import argparse
3 | import torch
4 | import peft
5 | import json
6 | import shutil
7 | from peft.utils import _get_submodules
8 | import os
9 | # import bitsandbytes as bnb
10 | # from bitsandbytes.functional import dequantize_4bit
11 | from peft import PeftModel
12 | from transformers import AutoModelForCausalLM, LlamaForCausalLM, LlamaTokenizer, BitsAndBytesConfig #
13 | import transformers
14 | import gc
15 | import copy
16 |
17 | def get_args():
18 | parser = argparse.ArgumentParser()
19 | parser.add_argument("--base", type=str)
20 | parser.add_argument("--peft", type=str)
21 | parser.add_argument("--out", type=str)
22 | parser.add_argument("--push", action="store_true")
23 | return parser.parse_args()
24 |
25 | def dequantize_model(model, tokenizer, to, dtype=torch.bfloat16, device="cuda"):
26 | """
27 | 'model': the peftmodel you loaded with qlora.
28 | 'tokenizer': the model's corresponding hf's tokenizer.
29 | 'to': directory to save the dequantized model
30 | 'dtype': dtype that the model was trained using
31 | 'device': device to load the model to
32 | """
33 | if os.path.exists(to):
34 | shutil.rmtree(to)
35 | os.makedirs(to, exist_ok=True)
36 | cls = bnb.nn.Linear4bit
37 | with torch.no_grad():
38 | for name, module in model.named_modules():
39 | if isinstance(module, cls):
40 | print(f"Dequantizing `{name}`...")
41 | quant_state = copy.deepcopy(module.weight.quant_state)
42 | quant_state[2] = dtype
43 | weights = dequantize_4bit(module.weight.data, quant_state=quant_state, quant_type="nf4").to(dtype)
44 | new_module = torch.nn.Linear(module.in_features, module.out_features, bias=None, dtype=dtype)
45 | new_module.weight = torch.nn.Parameter(weights)
46 | new_module.to(device=device, dtype=dtype)
47 | parent, target, target_name = _get_submodules(model, name)
48 | setattr(parent, target_name, new_module)
49 | model.is_loaded_in_4bit = False
50 | print("Saving dequantized model...")
51 | model.save_pretrained(to)
52 | tokenizer.save_pretrained(to)
53 | config_data = json.loads(open(os.path.join(to, 'config.json'), 'r').read())
54 | config_data.pop("quantization_config", None)
55 | config_data.pop("pretraining_tp", None)
56 | with open(os.path.join(to, 'config.json'), 'w') as config:
57 | config.write(json.dumps(config_data, indent=2))
58 | return model
59 |
60 | def main():
61 | args = get_args()
62 | model_path = args.base
63 | adapter_path = args.peft
64 | device_map = None
65 | print(f"Loading base model: {model_path}")
66 | config = transformers.AutoConfig.from_pretrained(
67 | model_path,
68 | cache_dir=None,
69 | trust_remote_code=True,
70 | )
71 | config.use_cache = False
72 |
73 | # Load model and tokenizer
74 | model = transformers.AutoModelForCausalLM.from_pretrained(
75 | model_path,
76 | config=config,
77 | cache_dir=None,
78 | device_map=device_map,
79 | trust_remote_code=True,
80 | quantization_config=None,
81 | )
82 | tokenizer = transformers.AutoTokenizer.from_pretrained(
83 | model_path,
84 | cache_dir=None,
85 | model_max_length=2048,
86 | padding_side="right",
87 | use_fast=False,
88 | trust_remote_code=True,
89 | )
90 | tokenizer.pad_token_id = tokenizer.eod_id
91 | model = PeftModel.from_pretrained(model=model, model_id=adapter_path)
92 | model = model.merge_and_unload()
93 | model.save_pretrained(args.out)
94 | tokenizer.save_pretrained(args.out)
95 |
96 |
97 | if __name__ == "__main__":
98 | main()
99 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/requirements.txt:
--------------------------------------------------------------------------------
1 | transformers==4.32.0
2 | accelerate
3 | tiktoken
4 | einops
5 | transformers_stream_generator==0.0.4
6 | scipy
7 | torchvision
8 | pillow
9 | tensorboard
10 | matplotlib
11 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/requirements_openai_api.txt:
--------------------------------------------------------------------------------
1 | fastapi
2 | uvicorn
3 | openai
4 | pydantic
5 | sse_starlette
6 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/requirements_web_demo.txt:
--------------------------------------------------------------------------------
1 | gradio
2 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/touchstone/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 中文  |  English |  日本語
10 |
11 |
12 |
13 | **TOUCHSTONE** is a comprehensive assessment of multimodal language models, encompassing not only basic recognition and comprehension but also extending to literary creation. By automating the evaluation process and converting multimodal information into text, our TouchStone allows for efficient and accurate assessment of dialogue quality, leveraging the power of advanced language models without the need for manual intervention.
14 |
15 | ## DATASET
16 |
17 | To evaluate the abilities of LVLMs, we construct a diverse and comprehensive dataset that covers five key dimensions: basic descriptive ability, visual recognition ability, visual comprehension ability, visual storytelling ability, and multi-image analysis ability.
18 |
19 | - **Basic Descriptive Ability** Image description involves the ability of a model to describe the information contained in an image, including simple and detailed descriptions. Simple descriptions are typically short phrases that describe the main subject and action of the image, while detailed descriptions provide more in-depth information about the image scene, their attributes, and relationships.
20 |
21 | - **Visual Recognition Ability** Image recognition is the task of recognizing objects or scenes within an image and inferring relevant information. This area can be further divided into several sub-tasks, including attribute QA, movie/TV recognition, art recognition, landmark recognition, celebrity recognition, emotion recognition, text recognition, object recognition, and structure content recognition.
22 |
23 | - **Visual Comprehension Ability** Image understanding involves the ability of a model to understand the meaning of an image and associated tasks. This area encompasses several sub-tasks, such as style appreciation, abstract image understanding, meme understanding, image analysis, chart analysis, general problem-solving, and reasoning QA.
24 |
25 | - **Visual Storytelling Ability** The visual storytelling ability is the process of literary creation based on visual content, including writing emails, poetry, stories, ads/commodity recommendations, and brainstorming.
26 |
27 | - **Multi-Image Analysis Ability** Multi-image analysis is the task of analyzing and comparing multiple images. This area includes tasks such as comparing two/multiple images, summarizing multiple image information, comparing commodities, and step-by-step analysis of images.
28 |
29 |
30 |
31 |
32 |
33 |
34 | We comprehensively evaluate the model's ability from five dimensions. As shown in the figure above, an example of 27 subtasks is given. From perception to cognition to creativity, as the difficulty increases, the requirements for models are also getting higher and higher. Currently, LVLM capabilities are in their early stages. Our dataset contains 800+ questions and 27 categories.
35 |
36 | ## Methods
37 |
38 |
39 | We apply a powerful LLM as a judge to enable automated evaluation. To effectively comprehend the contents of an image, we manually substitute the actual image input with fine-grained textual annotations. By inputting these annotations and corresponding questions to a powerful LLM like GPT4, we obtain reference answers.
40 |
41 | For the evaluation of the LVLMs, we provide actual images and questions as input and obtain their respective answers. Finally, we employ GPT4 to score the answers generated by the LVLMs based on the fine-grained annotations and questions. The scoring instructions require the model to assess the usefulness, relevance, and accuracy of the answers, considering the annotations as the content of the images. To ensure fairness in the evaluation, each model's answer is compared against a consistent reference answer from GPT4. The average score of the model in all questions is taken as the final score.
42 |
43 | To eliminate the influence of answer position, we perform a second scoring round by swapping the positions of the answers and then compute the average of the two scores obtained. This approach aims to mitigate any bias introduced by the placement of the answers.
44 |
45 |
46 |
47 |
48 |
49 | ### Evaluation
50 |
51 | #### Evaluation in English-based Multimodal Dialogue
52 |
53 | | Model | Score |
54 | |---------------|-------|
55 | | PandaGPT | 488.5 |
56 | | MiniGPT4 | 531.7 |
57 | | InstructBLIP | 552.4 |
58 | | LLaMA-AdapterV2 | 590.1 |
59 | | mPLUG-Owl | 605.4 |
60 | | LLaVA | 602.7 |
61 | | Qwen-VL-Chat | 645.2 |
62 |
63 | #### Evaluation in Chinese-based Multimodal Dialogue
64 |
65 | | Model | Score |
66 | |---------------|-------|
67 | | VisualGLM | 247.1 |
68 | | Qwen-VL-Chat | 401.2 |
69 |
70 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/touchstone/README_CN.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 中文  |  English |  日本語
10 |
11 |
12 |
13 | **TOUCHSTONE** 是一种针对多模态语言模型(LVLM)的自动化综合评估方法,评估不仅包括基本的认知和理解,还延伸到文学创作。通过人类注解将多模态信息转换为文本,我们的 TouchStone 可以利用SOTA的语言模型来自动化地完成对LVLMs的多模态对话质量评估。
14 |
15 | ## 数据集
16 |
17 | 为了评估 LVLMs 的能力,我们构建了一个多样化且全面的数据集,涵盖五个关键维度:基本描述能力、视觉识别能力、视觉理解能力、视觉叙事能力和多图分析能力。
18 |
19 | - **基本描述能力** 图像描述考验模型总结图片信息的能力,包括简单描述和详细描述。 简单描述通常是描述图像的主要内容和关系的简短短语,而详细描述则提供有关图像场景、其属性和关系的更深入的信息。
20 |
21 | - **视觉识别能力** 图像识别考察模型提取图像中内容的属性以及关联到知识库的能力。为了考察这方面能力,测试的问题包括属性QA、影视识别、艺术识别、地标识别、名人识别、情感识别、文本识别、物体识别和结构内容识别。
22 |
23 | - **视觉理解能力** 图像理解需要模型理解图像内容并完成推理进行相关任务。 这方面包含了例如风格欣赏、抽象图像理解、模因理解、图像分析、图表分析、一般问题解决和推理问答等任务。
24 |
25 | - **视觉叙事能力** 视觉叙事能力是基于视觉内容的文学创作能力,包括撰写电子邮件、诗歌、故事、广告/商品推荐、头脑风暴等。
26 |
27 | - **多图分析能力** 多图分析是分析和比较多幅图像的任务。该领域包括比较两个/多个图像、总结多个图像信息、比较商品以及逐步分析图像等任务。
28 |
29 |
30 |
31 |
32 |
33 | 我们从五个维度综合评估了模型的能力。 如上图所示,给出了27个子任务的示例。 从感知到认知,再到创造力,随着难度的增加,对模型的要求也越来越高。 目前,LVLM的能力还处于早期阶段。 我们的数据集包含800+道题目、27个类别。
34 |
35 | ## 测评方式
36 |
37 | 我们应用SOTA的LLM进行自动化评估。 为了有效地理解图像的内容,我们人工用细粒度的文本注释替换实际的图像输入。 通过将这些注释和相应的问题输入到像GPT4这样强LLM中,我们可以获得参考答案。
38 |
39 | 对于待测评的LVLM,我们提供实际图像和问题作为输入并获得各自的答案。 最后,我们使用GPT4根据细粒度注释和问题对LVLM生成的答案进行评分。 评分指令要求模型评估答案的有用性、相关性和准确性,并将人工注解视为图像的内容。 为了确保评估的公平性,每个模型的答案都会与 GPT4生成的参考答案进行比较。 模型在所有问题上的平均得分作为最终得分。
40 |
41 | 为了消除答案位置的影响,我们通过交换答案的位置来进行第二轮评分,然后计算获得的两次分数的平均值。
42 |
43 |
44 |
45 |
46 |
47 |
48 | ## 测评结果
49 |
50 | #### 英文版本测评
51 |
52 | | Model | Score |
53 | |---------------|-------|
54 | | PandaGPT | 488.5 |
55 | | MiniGPT4 | 531.7 |
56 | | InstructBLIP | 552.4 |
57 | | LLaMA-AdapterV2 | 590.1 |
58 | | mPLUG-Owl | 605.4 |
59 | | LLaVA | 602.7 |
60 | | Qwen-VL-Chat | 645.2 |
61 |
62 | #### 中文版本测评
63 |
64 | | Model | Score |
65 | |---------------|-------|
66 | | VisualGLM | 247.1 |
67 | | Qwen-VL-Chat | 401.2 |
68 |
69 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/touchstone/README_JA.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 | 中文  |  English|  日本語
10 |
11 |
12 |
13 | **TOUCHSTONE** は、マルチモーダル言語モデルの包括的な評価であり、基本的な認識や理解だけでなく、文学的な創作にまで及びます。評価プロセスを自動化し、マルチモーダル情報をテキストに変換することで、私達の TouchStone は、人手を介することなく高度な言語モデルの力を活用し、対話の質を効率的かつ正確に評価することができます。
14 |
15 | ## DATASET
16 |
17 | LVLMの能力を評価するために、基本的な記述能力、視覚認識能力、視覚理解能力、視覚ストーリーテリング能力、複数画像解析能力の5つの主要な次元をカバーする多様で包括的なデータセットを構築する。
18 |
19 | - **基本的描写力** 画像記述には、単純な記述と詳細な記述を含め、画像に含まれる情報を記述するモデルの能力が含まれる。単純な記述は、通常、画像の主な主題とアクションを記述する短いフレーズであり、詳細な記述は、画像のシーン、それらの属性、および関係についてのより詳細な情報を提供します。
20 |
21 | - **視覚認識能力** 画像認識とは、画像内のオブジェクトやシーンを認識し、関連情報を推論するタスクである。この分野はさらに、属性QA、映画/テレビ認識、アート認識、ランドマーク認識、有名人認識、感情認識、テキスト認識、オブジェクト認識、構造コンテンツ認識など、いくつかのサブタスクに分けることができる。
22 |
23 | - **視覚理解能力** 画像理解とは、モデルが画像の意味や関連するタスクを理解する能力のことである。この分野には、スタイル理解、抽象画像理解、ミーム理解、画像分析、チャート分析、一般的な問題解決、推論QAなど、いくつかのサブタスクが含まれる。
24 |
25 | - **視覚的ストーリーテリング能力** ビジュアルストーリーテリング能力とは、メール、詩、物語、広告/商品推薦、ブレーンストーミングの執筆など、ビジュアルコンテンツに基づいた文学創作のプロセスである。
26 |
27 | - **マルチ画像解析能力** 複数画像解析とは、複数の画像を解析・比較する作業である。この分野には、2つまたは複数の画像を比較する、複数の画像情報を要約する、商品を比較する、画像を段階的に分析するなどのタスクが含まれます。
28 |
29 |
30 |
31 |
32 |
33 |
34 | モデルの能力を 5 つの次元から総合的に評価する。上図のように、27 のサブタスクの例を示す。知覚から認知、創造性まで、難易度が上がるにつれて、モデルに求められる要件もどんどん高くなっている。現在、LVLM の機能は初期段階にある。我々のデータセットには 800 以上の質問と 27 のカテゴリーが含まれている。
35 |
36 | ## 方法
37 |
38 |
39 | 自動評価を可能にするために、強力な LLM を判定器として適用する。画像の内容を効果的に理解するために、実際の画像入力をきめ細かいテキスト注釈に手動で置き換える。これらの注釈と対応する質問を GPT4 のような強力な LLM に入力することで、参照解答を得る。
40 |
41 | LVLMの評価には、実際の画像と質問を入力として与え、それぞれの回答を得る。最後に、GPT4を用いて、LVLMが生成した回答を、細かいアノテーションと質問に基づいてスコアリングする。スコアリングの指示は、注釈を画像の内容とみなして、回答の有用性、関連性、正確性を評価するようモデルに要求する。評価の公平性を確保するため、各モデルの回答はGPT4の一貫した参照回答と比較されます。全問題におけるモデルの平均スコアを最終スコアとする。
42 |
43 | 解答位置の影響を排除するために、解答位置を入れ替えて2回目の採点ラウンドを行い、得られた2つのスコアの平均を計算します。このアプローチは、解答の配置によって生じるバイアスを軽減することを目的としています。
44 |
45 |
46 |
47 |
48 | ### 評価
49 |
50 | #### 英語ベースのマルチモーダル対話における評価
51 |
52 | | Model | Score |
53 | |---------------|-------|
54 | | PandaGPT | 488.5 |
55 | | MiniGPT4 | 531.7 |
56 | | InstructBLIP | 552.4 |
57 | | LLaMA-AdapterV2 | 590.1 |
58 | | mPLUG-Owl | 605.4 |
59 | | LLaVA | 602.7 |
60 | | Qwen-VL-Chat | 645.2 |
61 |
62 | #### 中国語ベースのマルチモーダル対話における評価
63 |
64 | | Model | Score |
65 | |---------------|-------|
66 | | VisualGLM | 247.1 |
67 | | Qwen-VL-Chat | 401.2 |
68 |
69 |
--------------------------------------------------------------------------------
/CDDMBench/Qwen-VL/web_demo_mm.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Alibaba Cloud.
2 | #
3 | # This source code is licensed under the license found in the
4 | # LICENSE file in the root directory of this source tree.
5 |
6 | """A simple web interactive chat demo based on gradio."""
7 |
8 | from argparse import ArgumentParser
9 | from pathlib import Path
10 |
11 | import copy
12 | import gradio as gr
13 | import os
14 | import re
15 | import secrets
16 | import tempfile
17 | from transformers import AutoModelForCausalLM, AutoTokenizer
18 | from transformers.generation import GenerationConfig
19 |
20 | DEFAULT_CKPT_PATH = 'Qwen/Qwen-VL-Chat'
21 | BOX_TAG_PATTERN = r"([\s\S]*?)"
22 | PUNCTUATION = "!?。"#$%&'()*+,-/:;<=>@[\]^_`{|}~⦅⦆「」、、〃》「」『』【】〔〕〖〗〘〙〚〛〜〝〞〟〰〾〿–—‘’‛“”„‟…‧﹏."
23 |
24 |
25 | def _get_args():
26 | parser = ArgumentParser()
27 | parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
28 | help="Checkpoint name or path, default to %(default)r")
29 | parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")
30 |
31 | parser.add_argument("--share", action="store_true", default=False,
32 | help="Create a publicly shareable link for the interface.")
33 | parser.add_argument("--inbrowser", action="store_true", default=False,
34 | help="Automatically launch the interface in a new tab on the default browser.")
35 | parser.add_argument("--server-port", type=int, default=8000,
36 | help="Demo server port.")
37 | parser.add_argument("--server-name", type=str, default="127.0.0.1",
38 | help="Demo server name.")
39 |
40 | args = parser.parse_args()
41 | return args
42 |
43 |
44 | def _load_model_tokenizer(args):
45 | tokenizer = AutoTokenizer.from_pretrained(
46 | args.checkpoint_path, trust_remote_code=True, resume_download=True,
47 | )
48 |
49 | if args.cpu_only:
50 | device_map = "cpu"
51 | else:
52 | device_map = "cuda"
53 |
54 | model = AutoModelForCausalLM.from_pretrained(
55 | args.checkpoint_path,
56 | device_map=device_map,
57 | trust_remote_code=True,
58 | resume_download=True,
59 | ).eval()
60 | model.generation_config = GenerationConfig.from_pretrained(
61 | args.checkpoint_path, trust_remote_code=True, resume_download=True,
62 | )
63 |
64 | return model, tokenizer
65 |
66 |
67 | def _parse_text(text):
68 | lines = text.split("\n")
69 | lines = [line for line in lines if line != ""]
70 | count = 0
71 | for i, line in enumerate(lines):
72 | if "```" in line:
73 | count += 1
74 | items = line.split("`")
75 | if count % 2 == 1:
76 | lines[i] = f'
'
77 | else:
78 | lines[i] = f"
"
79 | else:
80 | if i > 0:
81 | if count % 2 == 1:
82 | line = line.replace("`", r"\`")
83 | line = line.replace("<", "<")
84 | line = line.replace(">", ">")
85 | line = line.replace(" ", " ")
86 | line = line.replace("*", "*")
87 | line = line.replace("_", "_")
88 | line = line.replace("-", "-")
89 | line = line.replace(".", ".")
90 | line = line.replace("!", "!")
91 | line = line.replace("(", "(")
92 | line = line.replace(")", ")")
93 | line = line.replace("$", "$")
94 | lines[i] = "
" + line
95 | text = "".join(lines)
96 | return text
97 |
98 |
99 | def _launch_demo(args, model, tokenizer):
100 | uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
101 | Path(tempfile.gettempdir()) / "gradio"
102 | )
103 |
104 | def predict(_chatbot, task_history):
105 | chat_query = _chatbot[-1][0]
106 | query = task_history[-1][0]
107 | print("User: " + _parse_text(query))
108 | history_cp = copy.deepcopy(task_history)
109 | full_response = ""
110 |
111 | history_filter = []
112 | pic_idx = 1
113 | pre = ""
114 | for i, (q, a) in enumerate(history_cp):
115 | if isinstance(q, (tuple, list)):
116 | q = f'Picture {pic_idx}:
{q[0]}'
117 | pre += q + '\n'
118 | pic_idx += 1
119 | else:
120 | pre += q
121 | history_filter.append((pre, a))
122 | pre = ""
123 | history, message = history_filter[:-1], history_filter[-1][0]
124 | response, history = model.chat(tokenizer, message, history=history)
125 | image = tokenizer.draw_bbox_on_latest_picture(response, history)
126 | if image is not None:
127 | temp_dir = secrets.token_hex(20)
128 | temp_dir = Path(uploaded_file_dir) / temp_dir
129 | temp_dir.mkdir(exist_ok=True, parents=True)
130 | name = f"tmp{secrets.token_hex(5)}.jpg"
131 | filename = temp_dir / name
132 | image.save(str(filename))
133 | _chatbot[-1] = (_parse_text(chat_query), (str(filename),))
134 | chat_response = response.replace("[", "")
135 | chat_response = chat_response.replace(r"]", "")
136 | chat_response = re.sub(BOX_TAG_PATTERN, "", chat_response)
137 | if chat_response != "":
138 | _chatbot.append((None, chat_response))
139 | else:
140 | _chatbot[-1] = (_parse_text(chat_query), response)
141 | full_response = _parse_text(response)
142 |
143 | task_history[-1] = (query, full_response)
144 | print("Qwen-VL-Chat: " + _parse_text(full_response))
145 | return _chatbot
146 |
147 | def regenerate(_chatbot, task_history):
148 | if not task_history:
149 | return _chatbot
150 | item = task_history[-1]
151 | if item[1] is None:
152 | return _chatbot
153 | task_history[-1] = (item[0], None)
154 | chatbot_item = _chatbot.pop(-1)
155 | if chatbot_item[0] is None:
156 | _chatbot[-1] = (_chatbot[-1][0], None)
157 | else:
158 | _chatbot.append((chatbot_item[0], None))
159 | return predict(_chatbot, task_history)
160 |
161 | def add_text(history, task_history, text):
162 | task_text = text
163 | if len(text) >= 2 and text[-1] in PUNCTUATION and text[-2] not in PUNCTUATION:
164 | task_text = text[:-1]
165 | history = history + [(_parse_text(text), None)]
166 | task_history = task_history + [(task_text, None)]
167 | return history, task_history, ""
168 |
169 | def add_file(history, task_history, file):
170 | history = history + [((file.name,), None)]
171 | task_history = task_history + [((file.name,), None)]
172 | return history, task_history
173 |
174 | def reset_user_input():
175 | return gr.update(value="")
176 |
177 | def reset_state(task_history):
178 | task_history.clear()
179 | return []
180 |
181 | with gr.Blocks() as demo:
182 | gr.Markdown("""\
183 | 
""")
185 | gr.Markdown("""
Qwen-VL-Chat Bot""")
186 | gr.Markdown(
187 | """\
188 | This WebUI is based on Qwen-VL-Chat, developed by Alibaba Cloud. \
189 | (本WebUI基于Qwen-VL-Chat打造,实现聊天机器人功能。)""")
190 | gr.Markdown("""\
191 | Qwen-VL 🤖
192 | | 🤗  |
193 | Qwen-VL-Chat 🤖 |
194 | 🤗  |
195 |  Github""")
196 |
197 | chatbot = gr.Chatbot(label='Qwen-VL-Chat', elem_classes="control-height", height=750)
198 | query = gr.Textbox(lines=2, label='Input')
199 | task_history = gr.State([])
200 |
201 | with gr.Row():
202 | empty_bin = gr.Button("🧹 Clear History (清除历史)")
203 | submit_btn = gr.Button("🚀 Submit (发送)")
204 | regen_btn = gr.Button("🤔️ Regenerate (重试)")
205 | addfile_btn = gr.UploadButton("📁 Upload (上传文件)", file_types=["image"])
206 |
207 | submit_btn.click(add_text, [chatbot, task_history, query], [chatbot, task_history]).then(
208 | predict, [chatbot, task_history], [chatbot], show_progress=True
209 | )
210 | submit_btn.click(reset_user_input, [], [query])
211 | empty_bin.click(reset_state, [task_history], [chatbot], show_progress=True)
212 | regen_btn.click(regenerate, [chatbot, task_history], [chatbot], show_progress=True)
213 | addfile_btn.upload(add_file, [chatbot, task_history, addfile_btn], [chatbot, task_history], show_progress=True)
214 |
215 | gr.Markdown("""\
216 | Note: This demo is governed by the original license of Qwen-VL. \
217 | We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, \
218 | including hate speech, violence, pornography, deception, etc. \
219 | (注:本演示受Qwen-VL的许可协议限制。我们强烈建议,用户不应传播及不应允许他人传播以下内容,\
220 | 包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)""")
221 |
222 | demo.queue().launch(
223 | share=args.share,
224 | inbrowser=args.inbrowser,
225 | server_port=args.server_port,
226 | server_name=args.server_name,
227 | )
228 |
229 |
230 | def main():
231 | args = _get_args()
232 |
233 | model, tokenizer = _load_model_tokenizer(args)
234 |
235 | _launch_demo(args, model, tokenizer)
236 |
237 |
238 | if __name__ == '__main__':
239 | main()
240 |
--------------------------------------------------------------------------------
/CDDMBench/README.md:
--------------------------------------------------------------------------------
1 | # A Multimodal Benchmark Dataset and Model for Crop Disease Diagnosis
2 |
3 | ## Introduction
4 | CDDM dataset is the crop disease domain multimodal dataset, a pioneering resource designed to advance the field of agricultural research through the application of multimodal learning techniques.
5 |
6 |
7 | ## CDDM dataset
8 | The CDDM dataset includes images and conversation data.
9 | ### CDDM images:
10 | Please download CDDM images from the following link and extract it to the /dataset/ directory.
11 | - [Google Drive](https://drive.google.com/file/d/1kfB3zkittoef4BasOhwvAb8Cb66EPXst/view?usp=sharing)
12 | - [Baidu Yun Pan](https://pan.baidu.com/s/1CgmO2MyEKV6EE42eNS0sIw?pwd=ip1r): ip1r
13 |
14 |
15 | ### CDDM conversation:
16 | We offer the conversation data in two formats suitable for training Qwen-VL and LLaVA models. The data covers crop disease diagnosis and knowledge.
17 |
18 | Please extract the conversation data to the /dataset/VQA/ directory.
19 | - [Qwen-VL training data](dataset/VQA/Crop_Disease_train_qwenvl.zip)
20 | - [LLaVA training data](dataset/VQA/Crop_Disease_train_llava.zip)
21 |
22 | ## Train
23 | ### Qwen-VL: To run on a machine with 8 GPUs:
24 | ```shell
25 | cd Qwen-VL
26 | sh finetune/finetune_lora_ds.sh
27 | ```
28 |
29 | ## Citation
30 | If you find our dataset or model useful, please cite our work:
31 |
32 | ```bibtex
33 | @InProceedings{10.1007/978-3-031-73016-0_10,
34 | author="Xiang, Liu
35 | and Zhaoxiang, Liu
36 | and Huan, Hu
37 | and Zezhou, Chen
38 | and Kohou, Wang
39 | and Kai, Wang
40 | and Shiguo, Lian",
41 | title="A Multimodal Benchmark Dataset and Model for Crop Disease Diagnosis",
42 | booktitle="Computer Vision -- ECCV 2024",
43 | year="2025",
44 | publisher="Springer Nature Switzerland",
45 | address="Cham",
46 | pages="157--170",
47 | isbn="978-3-031-73016-0"
48 | }
49 | ```
50 | ## Paper
51 | For more details, please refer to our paper: [ECCV 2024 Paper](https://www.ecva.net/papers/eccv_2024/papers_ECCV/papers/11606.pdf) , [arxiv](https://arxiv.org/abs/2503.06973)
52 |
--------------------------------------------------------------------------------
/CDDMBench/dataset/VQA/Crop_Disease_train_llava.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/VQA/Crop_Disease_train_llava.zip
--------------------------------------------------------------------------------
/CDDMBench/dataset/VQA/Crop_Disease_train_qwenvl.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/VQA/Crop_Disease_train_qwenvl.zip
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Alternaria Blotch/plant_69422.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Alternaria Blotch/plant_69422.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Alternaria Blotch/plant_69423.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Alternaria Blotch/plant_69423.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Black Rot/plant_74765.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Black Rot/plant_74765.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Black Rot/plant_74766.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Black Rot/plant_74766.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Brown Spot/plant_75386.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Brown Spot/plant_75386.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Brown Spot/plant_75387.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Brown Spot/plant_75387.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Cedar Apple Rust/plant_81041.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Cedar Apple Rust/plant_81041.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Cedar Apple Rust/plant_81042.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Cedar Apple Rust/plant_81042.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Frog Eye Leaf Spot/plant_81316.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Frog Eye Leaf Spot/plant_81316.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Frog Eye Leaf Spot/plant_81317.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Frog Eye Leaf Spot/plant_81317.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Grey Spot/plant_84497.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Grey Spot/plant_84497.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Grey Spot/plant_84498.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Grey Spot/plant_84498.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Healthy/plant_89307.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Healthy/plant_89307.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Healthy/plant_89308.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Healthy/plant_89308.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Leaf Rust/plant_100000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Leaf Rust/plant_100000.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Leaf Rust/plant_100001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Leaf Rust/plant_100001.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Mosaic Virus/plant_103130.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Mosaic Virus/plant_103130.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Mosaic Virus/plant_103131.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Mosaic Virus/plant_103131.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Powdery Mildew/plant_108005.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Powdery Mildew/plant_108005.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Powdery Mildew/plant_108006.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Powdery Mildew/plant_108006.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Scab/plant_109265.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Scab/plant_109265.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Apple,Scab/plant_109549.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Apple,Scab/plant_109549.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Bell Pepper,Bacterial Spot/plant_127656.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Bell Pepper,Bacterial Spot/plant_127656.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Bell Pepper,Bacterial Spot/plant_127657.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Bell Pepper,Bacterial Spot/plant_127657.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Bell Pepper,Healthy/plant_128653.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Bell Pepper,Healthy/plant_128653.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Bell Pepper,Healthy/plant_128654.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Bell Pepper,Healthy/plant_128654.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Blueberry,Healthy/plant_62981.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Blueberry,Healthy/plant_62981.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Blueberry,Healthy/plant_62982.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Blueberry,Healthy/plant_62982.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Cherry,Healthy/plant_136288.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Cherry,Healthy/plant_136288.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Cherry,Healthy/plant_136289.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Cherry,Healthy/plant_136289.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Cherry,Powdery Mildew/plant_137142.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Cherry,Powdery Mildew/plant_137142.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Cherry,Powdery Mildew/plant_137143.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Cherry,Powdery Mildew/plant_137143.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Healthy/plant_138194.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Healthy/plant_138194.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Healthy/plant_138195.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Healthy/plant_138195.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Leaf Rust/plant_139356.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Leaf Rust/plant_139356.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Leaf Rust/plant_139357.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Leaf Rust/plant_139357.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Leaf Spot/plant_140548.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Leaf Spot/plant_140548.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Leaf Spot/plant_140549.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Leaf Spot/plant_140549.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Northern Leaf Blight/plant_141061.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Northern Leaf Blight/plant_141061.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Corn,Northern Leaf Blight/plant_141062.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Corn,Northern Leaf Blight/plant_141062.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Black Rot/plant_114645.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Black Rot/plant_114645.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Black Rot/plant_114646.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Black Rot/plant_114646.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Esca/plant_115825.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Esca/plant_115825.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Esca/plant_115826.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Esca/plant_115826.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Healthy/plant_117208.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Healthy/plant_117208.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Healthy/plant_117209.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Healthy/plant_117209.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Leaf Blight/plant_117631.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Leaf Blight/plant_117631.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Grape,Leaf Blight/plant_117632.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Grape,Leaf Blight/plant_117632.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Orange,Citrus Greening/plant_1567.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Orange,Citrus Greening/plant_1567.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Orange,Healthy/plant_145000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Orange,Healthy/plant_145000.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Orange,Healthy/plant_145001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Orange,Healthy/plant_145001.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Peach,Bacterial Spot/plant_124999.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Peach,Bacterial Spot/plant_124999.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Peach,Bacterial Spot/plant_125000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Peach,Bacterial Spot/plant_125000.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Peach,Healthy/plant_127296.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Peach,Healthy/plant_127296.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Peach,Healthy/plant_127297.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Peach,Healthy/plant_127297.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Potato,Early Blight/plant_64483.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Potato,Early Blight/plant_64483.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Potato,Early Blight/plant_64484.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Potato,Early Blight/plant_64484.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Potato,Healthy/plant_65483.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Potato,Healthy/plant_65483.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Potato,Healthy/plant_65484.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Potato,Healthy/plant_65484.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Potato,Late Blight/plant_65635.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Potato,Late Blight/plant_65635.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Potato,Late Blight/plant_65636.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Potato,Late Blight/plant_65636.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Pumpkin,Powdery Mildew/plant_67587.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Pumpkin,Powdery Mildew/plant_67587.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Pumpkin,Powdery Mildew/plant_67588.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Pumpkin,Powdery Mildew/plant_67588.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Raspberry,Healthy/plant_62610.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Raspberry,Healthy/plant_62610.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Raspberry,Healthy/plant_62611.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Raspberry,Healthy/plant_62611.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Bacterial Leaf Blight/plant_118707.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Bacterial Leaf Blight/plant_118707.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Bacterial Leaf Blight/plant_118708.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Bacterial Leaf Blight/plant_118708.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Blast/plant_120331.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Blast/plant_120331.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Blast/plant_120332.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Blast/plant_120332.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Brown Spot/plant_121851.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Brown Spot/plant_121851.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Brown Spot/plant_121852.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Brown Spot/plant_121852.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Leaf Blight/plant_123491.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Leaf Blight/plant_123491.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Leaf Blight/plant_123492.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Leaf Blight/plant_123492.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Leaf Smut/plant_123571.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Leaf Smut/plant_123571.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Leaf Smut/plant_123572.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Leaf Smut/plant_123572.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Tungro/plant_123611.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Tungro/plant_123611.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Rice,Tungro/plant_123612.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Rice,Tungro/plant_123612.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Soybean,Healthy/plant_10000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Soybean,Healthy/plant_10000.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Soybean,Healthy/plant_10001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Soybean,Healthy/plant_10001.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Strawberry,Healthy/plant_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Strawberry,Healthy/plant_1.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Strawberry,Healthy/plant_10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Strawberry,Healthy/plant_10.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Strawberry,Leaf Scorch/plant_1000.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Strawberry,Leaf Scorch/plant_1000.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Strawberry,Leaf Scorch/plant_1001.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Strawberry,Leaf Scorch/plant_1001.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Bacterial Spot/plant_12163.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Bacterial Spot/plant_12163.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Bacterial Spot/plant_12164.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Bacterial Spot/plant_12164.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Early Blight/plant_17947.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Early Blight/plant_17947.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Early Blight/plant_17948.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Early Blight/plant_17948.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Healthy/plant_22126.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Healthy/plant_22126.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Healthy/plant_22127.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Healthy/plant_22127.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Late Blight/plant_27633.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Late Blight/plant_27633.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Late Blight/plant_27634.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Late Blight/plant_27634.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Leaf Mold/plant_33556.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Leaf Mold/plant_33556.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Leaf Mold/plant_33557.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Leaf Mold/plant_33557.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Mosaic Virus/plant_37133.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Mosaic Virus/plant_37133.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Mosaic Virus/plant_37134.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Mosaic Virus/plant_37134.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Powdery Mildew/plant_40296.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Powdery Mildew/plant_40296.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Powdery Mildew/plant_40297.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Powdery Mildew/plant_40297.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Septoria Leaf Spot/plant_41552.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Septoria Leaf Spot/plant_41552.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Septoria Leaf Spot/plant_41553.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Septoria Leaf Spot/plant_41553.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Spider Mites/plant_47097.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Spider Mites/plant_47097.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Spider Mites/plant_47138.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Spider Mites/plant_47138.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Target Spot/plant_50956.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Target Spot/plant_50956.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Target Spot/plant_50964.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Target Spot/plant_50964.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Yellow Leaf Curl Virus/plant_54643.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Yellow Leaf Curl Virus/plant_54643.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Tomato,Yellow Leaf Curl Virus/plant_54644.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Tomato,Yellow Leaf Curl Virus/plant_54644.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Healthy/plant_131164.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Healthy/plant_131164.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Leaf Rust/plant_132688.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Leaf Rust/plant_132688.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Leaf Rust/plant_132689.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Leaf Rust/plant_132689.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Loose Smut/plant_134668.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Loose Smut/plant_134668.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Root Rot/plant_130134.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Root Rot/plant_130134.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Septoria Leaf Spot/plant_135607.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Septoria Leaf Spot/plant_135607.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Stem Rust/plant_135704.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Stem Rust/plant_135704.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Stem Rust/plant_135705.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Stem Rust/plant_135705.jpg
--------------------------------------------------------------------------------
/CDDMBench/dataset/images/Wheat,Stripe Rust/plant_136080.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CDDMBench/dataset/images/Wheat,Stripe Rust/plant_136080.jpg
--------------------------------------------------------------------------------
/CHiSafetyBench/README.md:
--------------------------------------------------------------------------------
1 | # CHiSafetyBench: A Chinese Hierarchical Safety Benchmark for Large Language Models
2 |
3 |
4 |
5 | ## Introduction
6 | CHiSafetyBench is a dedicated safety benchmark for evaluating LLMs’ capabilities in identifying risky content and refusing answering risky questions in Chinese contexts. CHiSafetyBench incorporates a [dataset](./dataset) that covers a hierarchical Chinese safety taxonomy consisting of 5 risk areas and 31 categories. This dataset comprises two types of tasks: multiple-choice questions and question-answering, evaluating LLMs from the perspectives of risk content identification and the ability to refuse answering risky questions respectively.
7 |
8 | * We introduce CHiSafetyBench, a hierarchical safety evaluation benchmark tailored for Chinese safety scenarios. It encompasses a two-tiered structure, comprising 5 major areas and 31 categories. Additionally, we offer multi-level fine-grained evaluations tailored to this hierarchical structure, ensuring comprehensive assessment.
9 |
10 | * We create and open-source the first Chinese hierarchical risk assessment dataset, which comprises two types of task data: multiple-choice questions(MCQ) and question-answering(QA). The MCQ data primarily focuses on identifying risk content, while the QA data incorporates risky questions that must be refused. The integration of these two types of task data enhances the diversity of safety verification.
11 |
12 | * We innovatively introduce risk questions with conversational history into the QA task, in addition to single-turn risk questions. The inclusion of dialogue history better aligns with realistic interactions and poses more complex challenges for testing the security boundaries of LLMs.
13 |
14 | * We propose an LLM-based automatic evaluation method that rivals human assessment, facilitating cost-effective and efficient testing of model safety detection and defense capabilities, significantly reducing the burden of human evaluation.
15 |
16 | * We conduct a comprehensive safety evaluation of 10 SOTA Chinese LLMs. By comparing the performance of different models, we provide LLM developers with a safety reference for model usage.
17 | 
18 |
19 | ## Evaluation
20 |
21 | - **Risk Content Identification**
22 |
23 | The evaluation is conducted directly based on the correctness of the large model's selections. The evaluation metric is ACC (Accuracy).
24 |
25 | - **Refusal to Answer**
26 |
27 | The questions and their corresponding large model's response are fed into the Qwen-72B model for automatic assessment. The evaluation metrics are:
28 |
29 | - Rejection Rate (RR-1): Measures the percentage of questions where the model refuses to answer.
30 | - Responsible Response Rate (RR-2): Evaluates the percentage of answers that are deemed responsible and appropriate.
31 | - Harmfulness Rate (HR): Calculates the percentage of responses that are considered harmful or inappropriate.
32 |
33 | -
34 |
35 | 
36 |
37 |
38 |
39 | ## Results
40 |
41 | - Risk Content Identification
42 |
43 | 
44 |
45 | - Refuse to Answer
46 |
47 | 
48 |
49 | - The performance between risky questions without and with conversational history on the refusal to answer subset
50 |
51 | 
52 | ## Citation
53 |
54 |
55 |
56 | ```
57 | @misc{zhang2024chisafetybench,
58 | title={CHiSafetyBench: A Chinese Hierarchical Safety Benchmark for Large Language Models},
59 | author={Wenjing Zhang and Xuejiao Lei and Zhaoxiang Liu and Meijuan An and Bikun Yang and KaiKai Zhao and Kai Wang and Shiguo Lian},
60 | year={2024},
61 | eprint={2406.10311},
62 | archivePrefix={arXiv}
63 | }
64 | ```
65 |
66 |
--------------------------------------------------------------------------------
/CHiSafetyBench/images/figure1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CHiSafetyBench/images/figure1.png
--------------------------------------------------------------------------------
/CHiSafetyBench/images/figure2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CHiSafetyBench/images/figure2.png
--------------------------------------------------------------------------------
/CHiSafetyBench/images/figure3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CHiSafetyBench/images/figure3.png
--------------------------------------------------------------------------------
/CHiSafetyBench/images/figure4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CHiSafetyBench/images/figure4.png
--------------------------------------------------------------------------------
/CHiSafetyBench/images/figure5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/UnicomAI/UnicomBenchmark/d04b87973ef1361b06dd25faa9604e1449115eae/CHiSafetyBench/images/figure5.png
--------------------------------------------------------------------------------
/RAODBench/README.md:
--------------------------------------------------------------------------------
1 | # RAOD: A Benchmark for Road Abandoned Object Detection from Video Surveillance
2 |
3 | ## Introduction
4 | We introduce a large-scale Road Abandoned Object Detection (RAOD) benchmark derived from video surveillance, addressing the lack of abundant datasets in current research. This benchmark specifically targets highway scenarios, which have unique challenges not adequately covered by existing autonomous driving datasets due to differences in camera perspective and scope.
5 |
6 | We have collected a substantial amount of real-world video clips containing various potential abandoned object categories on highways from our commercial Intelligent Transportation Systems (ITS). The resulting dataset comprises 557 video sequences and 18,953 images, all with pixel-level manual annotations. This extensive dataset provides a rich resource for training and evaluating road abandoned object detection models.
7 |
8 | To demonstrate the effectiveness of different approaches, we conducted comprehensive evaluation experiments using a variety of baseline models from mainstream algorithms on our RAOD dataset. These experiments provide insights into the performance of different methods and serve as a benchmark for future research in this area.
9 |
10 | We propose a novel image segmentation framework that incorporates an area-aware attention mechanism. Our experimental results show that this method significantly outperforms the UNet-based model, achieving nearly a 9% improvement in dice score. This advancement represents a step forward in the accuracy and reliability of road abandoned object detection.
11 |
12 | ## RAOD dataset
13 | The RAOD dataset comprises 557 video sequences and 18,953 images, all with pixel-level manual annotations.
14 |
15 | Please download RAOD dataset from the following link:
16 | - [Baidu Yun Pan]( https://pan.baidu.com/s/1MdjOxZ2TQ-5PX_cB6PJQYg): 5tGb
17 | - [Google Drive](https://drive.google.com/file/d/1WsaBYKtHT55_bdx0JW2vohxsXjFYMdh9/view?usp=drive_link)
18 |
19 | ## Citation
20 | If you use our benchmark or dataset in your research, please cite our paper.
21 | ```bash
22 | @ARTICLE{10542978,
23 | author={Xu, Yajun and Hu, Huan and Zhu, Xiaoya and Nan, Yibing and Wang, Kai and Liu, Zhaoxiang and Lian, Shiguo},
24 | journal={IEEE Access},
25 | title={RAOD: A Benchmark for Road Abandoned Object Detection From Video Surveillance},
26 | year={2024},
27 | volume={12},
28 | number={},
29 | pages={123985-123994},
30 | keywords={Roads;Benchmark testing;Image segmentation;Video surveillance;Uncertainty;Object recognition;Training;Intelligent transportation systems;Road abandoned object detection;intelligent transportation system;video surveillance;area-aware attention mechanism},
31 | doi={10.1109/ACCESS.2024.3407955}
32 | }
33 |
34 |
35 | ```
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # UnicomAI Benchmark
2 |
3 | * [A-Eval](./A-Eval):A benchmark designed to evaluate Chat LLMs of various scales from a practical application perspective.
4 | * [CHiSafetyBench](./CHiSafetyBench):A benchmark for LLMs safety, which is designed based on the standard "Basic security requirements for generative artificial intelligence service" issued by the Chinese government on February 29, 2024.
5 | * [CDDMBench](./CDDMBench):A multimodal benchmark dataset and model for crop disease diagnosis.
6 | * [RAODBench](./RAODBench):A Benchmark for Road Abandoned Object Detection from Video Surveillance.
7 | * [TADBench](./TADBench):A Large-scale Benchmark for Traffic Accidents Detection from Video Surveillance.
8 | ## News
9 | 2024.6 We released an application-driven benchmark A-Eval.
10 |
11 | 2024.6 We released a Chinese safety benchmark CHiSafetyBench.
12 |
13 | 2024.7 We released A multimodal benchmark dataset for crop disease diagnosis, CDDMBench.
14 |
15 | 2024.12 We released a benchmark for road abandoned object detection from video surveillance, RAODBench.
16 |
17 | 2025.1 We released a large-scale benchmark for traffic accidents detection from video surveillance, TADBench.
18 | ## Contact
19 | China Unicom AI Innovation Center, China United Network Communication Group Co.,Ltd.
20 |
--------------------------------------------------------------------------------
/TADBench/README.md:
--------------------------------------------------------------------------------
1 | # TAD: A Large-scale Benchmark for Traffic Accidents Detection from Video Surveillance
2 |
3 | ## Introduction
4 | We introduced and developed a large-scale traffic accident dataset named TAD (Traffic Accident Detection). We sourced this dataset from video surveillance, focusing specifically on highway scenarios. By addressing the limitations of existing datasets—such as their small scale and limited scope—we aimed to enhance the effectiveness and applicability of traffic accident detection systems.
5 |
6 | We recognized that current traffic accident datasets are often not derived from surveillance cameras, not publicly available, or not tailored for highway situations. To meet the urgent need for an open-sourced dataset that encompasses various scenes from surveillance cameras, we created the TAD dataset. This addresses a significant gap in the field and provides practical importance.
7 |
8 | We conducted a range of experiments on image classification, video classification, and object detection tasks using public mainstream vision algorithms or frameworks. We did this to evaluate and demonstrate the performance of different methods, allowing us to assess and compare the effectiveness of various technologies in the context of traffic accident detection.
9 |
10 | We presented the proposed TAD dataset along with our experimental results as a new benchmark. We established this benchmark to advance research in computer vision, particularly in the application of Intelligent Transportation Systems (ITS), by providing a standardized platform for evaluating and improving detection methods.
11 |
12 | ## TAD dataset
13 | TAD contains serious traffic incidents caused by rain, vandalism or other factors, with a total of 344 videos covering 277 positive ones with traffic accidents and 127 negative ones without traffic accidents.
14 |
15 | We made the TAD dataset publicly available on GitHub. We ensured that researchers worldwide can access and utilize this valuable resource for further research and development, fostering collaboration and innovation in the field.
16 |
17 | Please download TAD dataset from the following link:
18 | - [Baidu Yun Pan](https://pan.baidu.com/s/1X8xRJWZ5izXuyUgGbGppjw): gi9f
19 | - [Google Drive](https://drive.google.com/file/d/14GNlNcWLzN-sbzvmrMuSbAg_rZZ5yd26/view?usp=drive_link)
20 |
21 | ## Citation
22 | If you use our benchmark or dataset in your research, please cite our paper.
23 | ```bash
24 | @ARTICLE{10815954,
25 | author={Xu, Yajun and Hu, Huan and Huang, Chuwen and Nan, Yibing and Liu, Yuyao and Wang, Kai and Liu, Zhaoxiang and Lian, Shiguo},
26 | journal={IEEE Access},
27 | title={TAD: A Large-Scale Benchmark for Traffic Accidents Detection From Video Surveillance},
28 | year={2025},
29 | volume={13},
30 | number={},
31 | pages={2018-2033},
32 | keywords={Accidents;Benchmark testing;Video surveillance;Feature extraction;Classification algorithms;YOLO;Visualization;Cameras;Traffic control;Prediction algorithms;Traffic accidents;large-scale;surveillance cameras;open-sourced},
33 | doi={10.1109/ACCESS.2024.3522384}
34 | }
35 |
36 | ```
37 |
--------------------------------------------------------------------------------