├── .gitignore ├── LICENSE ├── README.md ├── data_engine ├── all_seed │ ├── 00001_image_style.json │ ├── 00002_image_scene.json │ ├── 00003_image_emotion.json │ ├── 00004_image_quality.json │ ├── 00005_image_description.json │ ├── 00006_object_localization.json │ ├── 00007_attribute_recognition.json │ ├── 00008_celebrity_recognition.json │ ├── 00009_ocr.json │ ├── 00010_object_relation.json │ ├── 00011_image_comparison.json │ ├── 00012_structuralized_imagetext_understanding.json │ ├── 00013_commonsense_reasoning.json │ ├── 00014_complex_reasoning.json │ ├── 00015_social_relation.json │ ├── 00016_future_prediction.json │ ├── 00017_artwork.json │ ├── 00018_landmark.json │ ├── 00019_numerical_calculation.json │ ├── 00020_spatial_relationship.json │ ├── 00021_posters.json │ ├── 00022_meme_comprehension.json │ ├── 00023_writing.json │ ├── 00024_brand_recognition.json │ └── 00025_species_recognition.json ├── end_prompt.json ├── gpt35_qa.py ├── gpt4v_caption.py ├── image_retrieval_bing_spider.py └── image_retrieval_clip.py ├── figs ├── data-engine.png ├── example_in_domain.pdf └── example_in_domain.png └── train_dataset_for_llava.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__ 3 | *.pyc 4 | *.egg-info 5 | dist 6 | 7 | # Log 8 | *.log 9 | *.log.* 10 | 11 | # Data 12 | !**/alpaca-data-conversation.json 13 | 14 | # Editor 15 | .idea 16 | *.swp 17 | 18 | # Other 19 | .DS_Store 20 | wandb 21 | output 22 | 23 | checkpoints 24 | ckpts* 25 | 26 | .ipynb_checkpoints 27 | *.ipynb 28 | 29 | # DevContainer 30 | !.devcontainer/* 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MMInstruct 2 | 3 | The official implementation of the paper "[MMInstruct: A High-Quality Multi-Modal Instruction Tuning Dataset with Extensive Diversity](http://arxiv.org/abs/2407.15838)". 4 | 5 | The dataset is available on Hugging Face at [🤗 yuecao0119/MMInstruct](https://huggingface.co/datasets/yuecao0119/MMInstruct-GPT4V). 6 | 7 | ## 📣 News 8 | 9 | - **[Oct 14, 2024]** Our paper is accepted by SCIENCE CHINA Information Sciences! 10 | - **[Aug 6, 2024]** The dataset is already accessible on Hugging Face at [🤗 yuecao0119/MMInstruct](https://huggingface.co/datasets/yuecao0119/MMInstruct-GPT4V). 11 | - **[Jul 22, 2024]** The paper has been released on [arXiv](https://arxiv.org/abs/2407.15838)! 12 | - **[Jul 22, 2024]** Code has been released. 13 | 14 | ## Todo List 15 | 16 | - [x] Data Engine. 17 | - [x] Open Source Datasets. 18 | - [ ] Release the checkpoint. 19 | 20 | ## Introduction 21 | 22 | Vision-language supervised fine-tuning effectively enhances VLLM performance, but existing visual instruction tuning datasets have limitations: 23 | 24 | 1. **Instruction Annotation Quality**: Despite strong performance, advanced VLLMs may generate instructions with inaccuracies, such as hallucinations. 25 | 2. **Instruction and Image Diversity**: Limited instruction types and lack of diverse image data impact the model's ability to generate varied and realistic outputs. 26 | 27 | 28 | ### MMInstruct Dataset 29 | 30 | To address these challenges, we created the MMInstruct dataset, featuring: 31 | - **973K instructions** from **24 domains** 32 | - Four instruction types: Judgement, Multiple-Choice, Long Visual Question Answering, and Short Visual Question Answering. 33 | 34 | image 35 | 36 | The open source datasets on Hugging Face [🤗 yuecao0119/MMInstruct](https://huggingface.co/datasets/yuecao0119/MMInstruct-GPT4V) include: 37 | 38 | * `caption_cn`: 144K English detailed image caption data generated using *gpt-4-vision-preview*. 39 | * `caption_en`: 18.2K Chinese detailed image caption data generated using *gpt-4-vision-preview*. 40 | * `qa_en`: 216K instruction data generated using *GPT-3.5-turbo*, including 161K multi-round long questions and answers and 55K manually corrected instruction data from 23 fields, as shown in the figure below. 41 | 42 | We also expand MMInstruct with other open-source data, including: 43 | 44 | | Domain | Dataset | 45 | | -------------------- | ------------------------------------------------------------ | 46 | | mathematics datasets | [GEOS](https://aclanthology.org/D15-1171.pdf); [UniGeo](https://arxiv.org/abs/2212.02746); [GeoQA+](https://aclanthology.org/2022.coling-1.130/); [Geometry3k](https://arxiv.org/abs/2105.04165); [CLEVR-Math](https://arxiv.org/abs/2208.05358); [Supre-CLEVR](https://openaccess.thecvf.com/content/CVPR2023/html/Li_Super-CLEVR_A_Virtual_Benchmark_To_Diagnose_Domain_Robustness_in_Visual_CVPR_2023_paper.html); [TabMWP](https://arxiv.org/abs/2209.14610) | 47 | | charts and plots | [DVQA (100K)](https://openaccess.thecvf.com/content_cvpr_2018/html/Kafle_DVQA_Understanding_Data_CVPR_2018_paper.html); [FigureQA](https://arxiv.org/abs/1710.07300) | 48 | | scientific figure | [TQA](https://openaccess.thecvf.com/content_cvpr_2017/html/Kembhavi_Are_You_Smarter_CVPR_2017_paper.html) | 49 | | map chart | [MapQA](https://arxiv.org/abs/2211.08545) | 50 | 51 | ### Data Engine 52 | 53 | We developed an instruction generation data engine leveraging GPT-4V, GPT-3.5, and manual correction. This engine allows semi-automatic, low-cost, multi-domain instruction generation at 1/6 the cost of manual construction. 54 | 55 | image 56 | 57 | As described in [our paper](http://arxiv.org/abs/2407.15838), we mainly proposed a semi-automatic and low-cost instruction generation data engine using GPT-4V, GPT-3.5 and manual correction. Our data engine consists of six steps: (a) image collection, (b) image caption generation, (c) seed question collection, (d) automatic instruction generation, (e) dataset expansion and (f) manual correction. 58 | 59 | (a) First, we collect a large number of different images from various sources, which are mainly obtained through some selected source images, and then retrieved by crawlers and clips, etc., as shown in [image_retrieval_bing_spider.py](https://github.com/yuecao0119/MMInstruct/blob/main/data_engine/image_retrieval_bing_spider.py) and [image_retrieval_clip.py](https://github.com/yuecao0119/MMInstruct/blob/main/data_engine/image_retrieval_clip.py). 60 | 61 | (b) And use GPT-4V to generate detailed image captions, as shown in [gpt4v_caption.py](https://github.com/yuecao0119/MMInstruct/blob/main/data_engine/gpt4v_caption.py). 62 | 63 | (c) Then experts designed corresponding [seed questions](https://github.com/yuecao0119/MMInstruct/tree/main/data_engine/all_seed) for different fields. 64 | 65 | (d) We use image captions and seed questions to automatically generate a rich and diverse set of instruction data through GPT-3.5, as shown in [gpt35_qa.py](https://github.com/yuecao0119/MMInstruct/blob/main/data_engine/gpt35_qa.py). 66 | 67 | (e), (f) In addition, we also use various methods to expand our dataset. Finally, manual correction is performed to ensure data quality and accuracy. 68 | 69 | 70 | ### Performance 71 | 72 | image 73 | 74 | ## Citation 75 | 76 | If this work is helpful for your research, please consider citing the following BibTeX entry. 77 | 78 | ``` 79 | @article{liu2024mminstruct, 80 | title={MMInstruct: A High-Quality Multi-Modal Instruction Tuning Dataset with Extensive Diversity}, 81 | author={Liu, Yangzhou and Cao, Yue and Gao, Zhangwei and Wang, Weiyun and Chen, Zhe and Wang, Wenhai and Tian, Hao and Lu, Lewei and Zhu, Xizhou and Lu, Tong and others}, 82 | journal={arXiv preprint arXiv:2407.15838}, 83 | year={2024} 84 | } 85 | ``` 86 | -------------------------------------------------------------------------------- /data_engine/all_seed/00001_image_style.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这张图片的艺术风格是?" 5 | ], 6 | "English": [ 7 | "Is the style of this image a ?" 8 | ] 9 | }, 10 | "choice":{ 11 | "Chinese": [ 12 | "这张图片展示了什么艺术风格?", 13 | "识别此图像的艺术风格。" 14 | ], 15 | "English": [ 16 | "What art style is showcased in this image?", 17 | "Identify the art style of this image." 18 | ] 19 | }, 20 | "sentence": { 21 | "Chinese": [ 22 | "这张图片展示了什么艺术风格?", 23 | "识别此图像的艺术风格。" 24 | ], 25 | "English": [ 26 | "What art style is showcased in this image?", 27 | "Identify the art style of this image." 28 | ] 29 | } 30 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00002_image_scene.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这张图片拍摄于吗?", 5 | "这张图像的场景类别是吗?", 6 | "这张图像的环境类型是吗?", 7 | "这张图像的季节是吗?", 8 | "这张图像的温度状态是吗?", 9 | "图片显示的是吗?" 10 | ], 11 | "English": [ 12 | "Was this picture taken in ?", 13 | "Is the scene category of this image ?", 14 | "Is the environment type of this image ?", 15 | "Is the season of this image ?", 16 | "Is the temperature status of this image ?", 17 | "Does the picture show ?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "哪个场景类别与此图像最匹配?", 23 | "图中描绘了什么样的环境类型?", 24 | "图中描绘的是哪个季节?", 25 | "图片中描绘了什么样的温度状态?", 26 | "图片显示的是什么场景?" 27 | ], 28 | "English": [ 29 | "Which scene category best matches this image?", 30 | "What type of environment is depicted in the image?", 31 | "What season is depicted in the picture?", 32 | "What temperature state is depicted in the picture?", 33 | "What scene does the picture show?" 34 | ] 35 | }, 36 | "sentence": { 37 | "Chinese": [ 38 | "请告诉我照片中拍摄的环境。", 39 | "解释此图中可见的环境类型。", 40 | "解释图中所示的温度状态。", 41 | "照片中显示的是哪个季节?", 42 | "图片显示的什么场景?" 43 | ], 44 | "English": [ 45 | "Please tell me about the environment in which the photos were taken.", 46 | "Explain the type of environment visible in this image.", 47 | "Explain the temperature conditions shown in the picture.", 48 | "Which season is shown in the photo?", 49 | "What scene is shown in the picture?" 50 | ] 51 | } 52 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00003_image_emotion.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "图片中的的情绪是积极的吗?", 5 | "图片中的的情绪是消极的吗?", 6 | "这张图片传达了的情绪?" 7 | ], 8 | "English": [ 9 | "Is the emotion of in the picture positive?", 10 | "Is the emotion of in the picture negative?", 11 | "Does this image convey the emotion of ?" 12 | ] 13 | }, 14 | "choice": { 15 | "Chinese": [ 16 | "这张照片描绘了什么样的情感?", 17 | "识别此图像中表达的情感。", 18 | "这张图片传达了什么样的情绪?" 19 | ], 20 | "English": [ 21 | "What emotion does this photo depict?", 22 | "Identify the emotion expressed in this image.", 23 | "Which mood does this image convey?" 24 | ] 25 | }, 26 | "sentence": { 27 | "Chinese": [ 28 | "这张照片描绘了什么样的情感?", 29 | "识别此图像中表达的情感。", 30 | "这张图片传达了什么样的情绪?" 31 | ], 32 | "English": [ 33 | "What emotion does this photo depict?", 34 | "Identify the emotion expressed in this image.", 35 | "Which mood does this image convey?" 36 | ] 37 | } 38 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00004_image_quality.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "第一张图像的对比度是否高于第二张图像?", 5 | "第二个图像的亮度比第一个图像的低吗?", 6 | "第一个图像比第二个图像更清晰吗?", 7 | "第二张图像的对比度是否高于第一张图像?", 8 | "第一个图像的清晰度是否低于第二个图像?", 9 | "第一个图像比第二个图像暗吗?" 10 | ], 11 | "English": [ 12 | "Does the first image shows the higher contrast than the second image?", 13 | "Is the brightness of the second image lower than that of the first image?", 14 | "Is the first image clearer than the second image?", 15 | "Does the second image shows the higher contrast than the first image?", 16 | "Is the clarity of the first image lower than that of the second image?", 17 | "Is the first image darker than the second image?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "哪个图像的亮度最<高/低>?", 23 | "这两张图像中哪个清晰度最<高/低>?", 24 | "在哪幅图像中,颜色对比度最<高/低>?" 25 | ], 26 | "English": [ 27 | "Which image has the highest brightness?", 28 | "Which image shows the highest sharpness?", 29 | "In which image do the colors stand out most from each other?" 30 | ] 31 | }, 32 | "sentence": { 33 | "Chinese": [ 34 | "哪个图像的亮度最<高/低>?", 35 | "这两张图像中哪个清晰度最<高/低>?", 36 | "在哪幅图像中,颜色对比度最<高/低>?", 37 | "描述第张图像的清晰度。", 38 | "描述第张图像的对比度。", 39 | "描述第张图像的亮度。" 40 | ], 41 | "English": [ 42 | "Which image has the highest brightness?", 43 | "Which image shows the highest sharpness?", 44 | "In which image do the colors stand out most from each other?", 45 | "Describe the clarity of the image.", 46 | "Describe the contrast of the image.", 47 | "Describe the brightness of the image." 48 | ] 49 | } 50 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00005_image_description.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这张图片发生了的事件吗?", 5 | "关于这个图片的具体内容,是这样的吗?", 6 | "这个图片的主题是吗?", 7 | "照片中的场景是吗?", 8 | "这张照片中的关键元素是吗?" 9 | ], 10 | "English": [ 11 | "Did the incident of occur in this picture?", 12 | "Is the specific content of this picture like ?", 13 | "Is the theme of this picture ?", 14 | "Is the scene in the photo ?", 15 | "Is the key element in this photo ?" 16 | ] 17 | }, 18 | "choice": { 19 | "Chinese": [ 20 | "请描述此图片的详细内容。", 21 | "你能描述一下这张照片的焦点吗?", 22 | "适合这个图片的标题是什么?", 23 | "请描述一下图中的场景信息。", 24 | "请列出图片中的主要元素。", 25 | "你认为这张图片中发生了什么?" 26 | ], 27 | "English": [ 28 | "Please describe the details of this image.", 29 | "Can you describe the focus of this photo?", 30 | "What would be a suitable title for this image?", 31 | "Please describe the scene information in the picture.", 32 | "Please list the main elements in the image.", 33 | "What do you think is happening in this picture?" 34 | ] 35 | }, 36 | "sentence": { 37 | "Chinese": [ 38 | "请描述此图片的详细内容。", 39 | "你能描述一下这张照片的焦点吗?", 40 | "适合这个图片的标题是什么?", 41 | "请描述一下图中的场景信息。", 42 | "请列出图片中的主要元素。", 43 | "你认为这张图片中发生了什么?" 44 | ], 45 | "English": [ 46 | "Please describe the details of this image.", 47 | "Can you describe the focus of this photo?", 48 | "What would be a suitable title for this image?", 49 | "Please describe the scene information in the picture.", 50 | "Please list the main elements in the image.", 51 | "What do you think is happening in this picture?" 52 | ] 53 | } 54 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00006_object_localization.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "在图片中的精确位置是吗?", 5 | "在这张图片中,朝向的是吗?", 6 | "这张图片中的可见数量是吗?" 7 | ], 8 | "English": [ 9 | "Is the precise position of in the picture ?", 10 | "In this picture, is facing ?", 11 | "Is the visible number of in this image ?" 12 | ] 13 | }, 14 | "choice": { 15 | "Chinese": [ 16 | "在图片中的精确位置是什么?", 17 | "在这张图片中,朝向哪个方向?", 18 | "请估算出这张图片中的可见数量?" 19 | ], 20 | "English": [ 21 | "What is the precise position of in the picture?", 22 | "In this picture, which direction does face?", 23 | "Please estimate the visible number of s in this image?" 24 | ] 25 | }, 26 | "sentence": { 27 | "Chinese": [ 28 | "在图片中的精确位置是什么?", 29 | "在这张图片中,朝向哪个方向?", 30 | "请估算出这张图片中的可见数量?" 31 | ], 32 | "English": [ 33 | "What is the precise position of in the picture?", 34 | "In this picture, which direction does face?", 35 | "Please estimate the visible number of s in this image?" 36 | ] 37 | } 38 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00007_attribute_recognition.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/data_engine/all_seed/00007_attribute_recognition.json -------------------------------------------------------------------------------- /data_engine/all_seed/00008_celebrity_recognition.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "照片上的这个人是吗?", 5 | "这个人的名字是吗?", 6 | "图片中这个人来自吗?", 7 | "照片上这个人的职业是吗?" 8 | ], 9 | "English": [ 10 | "Is this person in the photo?", 11 | "Is this person's name ?", 12 | "Is the person in the picture from ?", 13 | "Is the occupation of the person in the photo ?" 14 | ] 15 | }, 16 | "choice": { 17 | "English": [ 18 | "Is this person in the photo ?", 19 | "What is the occupation of the person in the photo?", 20 | "Describe personal information about the person in the image.", 21 | "What is the name of the person in this photo?", 22 | "What country is the person in the picture from?" 23 | ], 24 | "Chinese": [ 25 | "照片上的这个人是吗?", 26 | "照片上这个人的职业是什么?", 27 | "描述图片中这个人的个人信息。", 28 | "这张照片中的人的名字是什么?", 29 | "图片中这个人来自哪个国家?" 30 | ] 31 | }, 32 | "sentence": { 33 | "Chinese": [ 34 | "照片上的这个人是吗?", 35 | "照片上这个人的职业是什么?", 36 | "描述图片中这个人的个人信息。", 37 | "这张照片中的人的名字是什么?", 38 | "图片中这个人来自哪个国家?" 39 | ], 40 | "English": [ 41 | "Is this person in the photo ?", 42 | "What is the occupation of the person in the photo?", 43 | "Describe personal information about the person in the image.", 44 | "What is the name of the person in this photo?", 45 | "What country is the person in the picture from?" 46 | ] 47 | } 48 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00009_ocr.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "图像是否包含<特定文本/字符>?", 5 | "图像中的<所有/上方/中间/下方/左边/右边等>文本信息是否是?", 6 | "图像中的文本信息的<中文/英文/日语等>翻译结果是否是?" 7 | ], 8 | "English": [ 9 | "Does the image contain ?", 10 | "Is the text information in the image ?", 11 | "Is the translation result of the text information in the image ?" 12 | ] 13 | }, 14 | "choice": { 15 | "Chinese": [ 16 | "识别图片中的<所有/上方/中间/下方/左边/右边等>文本信息。", 17 | "解释此图片中的文本信息。", 18 | "翻译此图片中的文本信息为<中文/英文/日语等>。" 19 | ], 20 | "English": [ 21 | "Recognize text information in the picture.", 22 | "Explain the textual information in this image.", 23 | "Translate the text information in this image to ." 24 | ] 25 | }, 26 | "sentence": { 27 | "Chinese": [ 28 | "识别图片中的<所有/上方/中间/下方/左边/右边等>文本信息。", 29 | "解释此图片中的文本信息。", 30 | "翻译此图片中的文本信息为<中文/英文/日语等>。" 31 | ], 32 | "English": [ 33 | "Recognize text information in the picture.", 34 | "Explain the textual information in this image.", 35 | "Translate the text information in this image to ." 36 | ] 37 | } 38 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00010_object_relation.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "图中生物之间的自然关系是否是?", 5 | "图中所示的生物与人类的自然关系是否是", 6 | "图中所示的生物与的自然关系是否是?", 7 | "相对于的位置是吗?", 8 | "是否在?", 9 | "是否位于图像的?" 10 | ], 11 | "English": [ 12 | "Is the natural relationship between the creatures in the picture a ?", 13 | "Is the natural relationship between the creatures shown in the picture and humans a ", 14 | "Is the natural relationship between the creatures shown in the picture and a ?", 15 | "Is the position of relative to a ?", 16 | "Is in the of ?", 17 | "Is located in of the image?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "图中生物之间存在什么样的自然关系?", 23 | "在自然界中,图中所示的生物与人类之间有什么关系?", 24 | "在自然界中,图中所示的生物与<其他特定生物>之间有什么关系?", 25 | "就二维平面上的角度而言,<物体1>相对于<物体2>的位置是什么?", 26 | "图中,<物体1>和<物体2>之间的相对位置是什么?", 27 | "<地点1>在<地点2>的什么方向?", 28 | "哪个<地点>位于<东部/南部/西部/北部等>?" 29 | ], 30 | "English": [ 31 | "What kind of natural relationships exist between creatures in the picture?", 32 | "What is the relationship between the creatures shown in the picture and humans in nature?", 33 | "What is the relationship between the creatures shown in the figure and in nature?", 34 | "In terms of angles on a two-dimensional plane, what is the position of relative to ?", 35 | "What is the relative position between and in the figure?", 36 | "What direction is in ?", 37 | "Which is located in ?" 38 | ] 39 | }, 40 | "sentence": { 41 | "Chinese": [ 42 | "图中生物之间存在什么样的自然关系?", 43 | "在自然界中,图中所示的生物与人类之间有什么关系?", 44 | "在自然界中,图中所示的生物与<其他特定生物>之间有什么关系?", 45 | "就二维平面上的角度而言,<物体1>相对于<物体2>的位置是什么?", 46 | "图中,<物体1>和<物体2>之间的相对位置是什么?", 47 | "<地点1>在<地点2>的什么方向?", 48 | "哪个<地点>位于<东部/南部/西部/北部等>?" 49 | ], 50 | "English": [ 51 | "What kind of natural relationships exist between creatures in the picture?", 52 | "What is the relationship between the creatures shown in the picture and humans in nature?", 53 | "What is the relationship between the creatures shown in the figure and in nature?", 54 | "In terms of angles on a two-dimensional plane, what is the position of relative to ?", 55 | "What is the relative position between and in the figure?", 56 | "What direction is in ?", 57 | "Which is located in ?" 58 | ] 59 | } 60 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00011_image_comparison.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "图中的<物体>一样大吗?", 5 | "图中的<物体>颜色一样吗?", 6 | "图中的商品<特定商品属性>一样吗?", 7 | "图中的商品适用场合一样吗?", 8 | "图中的商品适用人群一样吗?", 9 | "图片中是否阐明了这样的道理?" 10 | ], 11 | "English": [ 12 | "Are the in the picture the same size?", 13 | "Are the in the picture the same color?", 14 | "Are the products in the picture the same?", 15 | "Are the products in the picture suitable for the same situations?", 16 | "Are the products in the picture suitable for the same people?", 17 | "Does the picture illustrate ?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "图中的<物体>一样大吗?", 23 | "图中的<物体>颜色一样吗?", 24 | "对比图中<物体>的<特定属性>。", 25 | "图中的商品<特定商品属性>一样吗?", 26 | "图中的商品适用场合一样吗?", 27 | "图中的商品适用人群一样吗?", 28 | "请详细描述画面,并告诉我图片中发生了什么事情。", 29 | "请详细描述图片,并告诉我图片阐明了什么道理。", 30 | "请详细解释图片,并说明图片想表达的核心思想。" 31 | ], 32 | "English": [ 33 | "Is the in the picture the same size?", 34 | "Is the color of the in the picture the same?", 35 | "Compare the of the in the figure.", 36 | "Is the product in the picture the same?", 37 | "Is the product in the picture suitable for the same occasion?", 38 | "Is the product in the picture suitable for the same audience?", 39 | "Please describe the scene in detail and tell me what happened in the picture.", 40 | "Please provide a detailed description of the image and tell me what it illustrates.", 41 | "Please provide a detailed explanation of the image and explain the core idea it intends to convey." 42 | ] 43 | }, 44 | "sentence": { 45 | "Chinese": [ 46 | "图中的<物体>一样大吗?", 47 | "图中的<物体>颜色一样吗?", 48 | "对比图中<物体>的<特定属性>。", 49 | "图中的商品<特定商品属性>一样吗?", 50 | "图中的商品适用场合一样吗?", 51 | "图中的商品适用人群一样吗?", 52 | "请详细描述画面,并告诉我图片中发生了什么事情。", 53 | "请详细描述图片,并告诉我图片阐明了什么道理。", 54 | "请详细解释图片,并说明图片想表达的核心思想。" 55 | ], 56 | "English": [ 57 | "Is the in the picture the same size?", 58 | "Is the color of the in the picture the same?", 59 | "Compare the of the in the figure.", 60 | "Is the product in the picture the same?", 61 | "Is the product in the picture suitable for the same occasion?", 62 | "Is the product in the picture suitable for the same audience?", 63 | "Please describe the scene in detail and tell me what happened in the picture.", 64 | "Please provide a detailed description of the image and tell me what it illustrates.", 65 | "Please provide a detailed explanation of the image and explain the core idea it intends to convey." 66 | ] 67 | } 68 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00012_structuralized_imagetext_understanding.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/data_engine/all_seed/00012_structuralized_imagetext_understanding.json -------------------------------------------------------------------------------- /data_engine/all_seed/00013_commonsense_reasoning.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/data_engine/all_seed/00013_commonsense_reasoning.json -------------------------------------------------------------------------------- /data_engine/all_seed/00014_complex_reasoning.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/data_engine/all_seed/00014_complex_reasoning.json -------------------------------------------------------------------------------- /data_engine/all_seed/00015_social_relation.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这两位人物可能是合作伙伴吗?", 5 | "他们之间的联系可能是家庭关系吗?", 6 | "他们可能是情侣吗?", 7 | "这两人可能是同事吗?", 8 | "这两位人物可能是同一家公司的员工吗?", 9 | "他们之间可能存在师生关系吗?", 10 | "这两人可能是一对兄弟姐妹吗?", 11 | "在这个环境中,他们可能是陌生人吗?", 12 | "他们可能是彼此的竞争对手吗?", 13 | "你认为这两人可能是一对亲子吗?", 14 | "你认为这两人可能是邻居吗?", 15 | "你认为这两人可能是一对旅行伴侣吗?", 16 | "这两位人物之间有可能是亲戚吗?", 17 | "这张图片中的两位人物可能是朋友吗?" 18 | ], 19 | "English": [ 20 | "Could these two individuals be partners?", 21 | "Is the connection between them possibly a family relationship?", 22 | "Could they be a couple?", 23 | "Are these two individuals possibly colleagues?", 24 | "Could these two characters be employees of the same company?", 25 | "Is there a possibility of a teacher-student relationship between them?", 26 | "Could these two people be siblings?", 27 | "In this setting, could they be strangers to each other?", 28 | "Is it possible that they are competitors?", 29 | "Do you think these two individuals could be parent and child?", 30 | "Do you think these two people could be neighbors?", 31 | "Do you think these two individuals could be travel companions?", 32 | "Is there a possibility of them being relatives?", 33 | "Could the two individuals in this picture be friends?" 34 | ] 35 | }, 36 | "choice": { 37 | "Chinese": [ 38 | "这张图片中的人物之间存在什么社会关系?", 39 | "你觉得这几个人之间的亲密度是什么样的?", 40 | "你认为这几个人是因为什么而相识的?", 41 | "在这张图片中,他们之间可能存在着怎样的互动?", 42 | "你认为这几人之间的互动可能有何特点?", 43 | "这张图片中的人物之间信任程度是怎么样的?" 44 | ], 45 | "English": [ 46 | "What social relationships exist between the people in this picture?", 47 | "What do you think the intimacy between these people is like?", 48 | "Why do you think these people got to know each other?", 49 | "What kind of interaction might there be between them in this picture?", 50 | "What do you think might be the characteristics of the interaction between these people?", 51 | "What is the level of trust between the people in this picture?" 52 | ] 53 | }, 54 | "sentence": { 55 | "Chinese": [ 56 | "这张图片中的人物之间存在什么社会关系?", 57 | "你觉得这几个人之间的亲密度是什么样的?", 58 | "你认为这几个人是因为什么而相识的?", 59 | "在这张图片中,他们之间可能存在着怎样的互动?", 60 | "你认为这几人之间的互动可能有何特点?", 61 | "这张图片中的人物之间信任程度是怎么样的?" 62 | ], 63 | "English": [ 64 | "What social relationships exist between the people in this picture?", 65 | "What do you think the intimacy between these people is like?", 66 | "Why do you think these people got to know each other?", 67 | "What kind of interaction might there be between them in this picture?", 68 | "What do you think might be the characteristics of the interaction between these people?", 69 | "What is the level of trust between the people in this picture?" 70 | ] 71 | } 72 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00016_future_prediction.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "根据图像,时,是他可能面临的一个关键问题吗?", 5 | "根据这张图,接下来可能会发生这样的事吗?", 6 | "这张图片的未来结果可能是吗?", 7 | "这张图片未来可能会是的积极结果吗?", 8 | "这张图片未来可能会是的不幸结果吗?", 9 | "这张图片未来的天气可能会是吗?" 10 | ], 11 | "English": [ 12 | "Based on the image, is one key issue that may face when ?", 13 | "Based on this picture, is it possible that next?", 14 | "Is the expected result of this image ?", 15 | "Is the expected positive result of this image ?", 16 | "Is the expected unfortunate outcome of this image ?", 17 | "Will the weather in this picture be next?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "根据图像,时可能面临的一个关键问题是什么?", 23 | "根据这张图片,请预测接下来会发生什么?", 24 | "这张图片的预期结果是什么?", 25 | "这张图片预期的积极结果是什么?", 26 | "这张图片预期的不幸结果是什么?", 27 | "预测这张图片中的天气之后会是怎么样的。" 28 | ], 29 | "English": [ 30 | "Based on the image, what is one key issue that might face when ?", 31 | "Based on this image, please predict what will happen next?", 32 | "What is the intended outcome in this image?", 33 | "What is the positive result in this image?", 34 | "What is the unfortunate outcome in this image?", 35 | "Predict what the weather in this picture will be like next." 36 | ] 37 | }, 38 | "sentence": { 39 | "Chinese": [ 40 | "根据图像,时可能面临的一个关键问题是什么?", 41 | "根据这张图片,请预测接下来会发生什么?", 42 | "这张图片的预期结果是什么?", 43 | "这张图片预期的积极结果是什么?", 44 | "这张图片预期的不幸结果是什么?", 45 | "预测这张图片中的天气之后会是怎么样的。" 46 | ], 47 | "English": [ 48 | "Based on the image, what is one key issue that might face when ?", 49 | "Based on this image, please predict what will happen next?", 50 | "What is the intended outcome in this image?", 51 | "What is the positive result in this image?", 52 | "What is the unfortunate outcome in this image?", 53 | "Predict what the weather in this picture will be like next." 54 | ] 55 | } 56 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00017_artwork.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这个图片是不是描述了?", 5 | "。这段对于这张图片的赏析是否恰当?", 6 | "“”这首诗是否适合这幅画?", 7 | "这件艺术品属于体裁吗?", 8 | "这件艺术品是以的形式存在的吗?", 9 | "这东西看起来像吗?", 10 | "这件艺术品是创作的吗?", 11 | "这件艺术品是否曾在展览?", 12 | "这件艺术品的标题是吗?", 13 | "这幅画的构图是否具有<characteristics>这样的特征?", 14 | "这幅画属于<style>的艺术风格吗?", 15 | "这个<artwork>中的<object>代表了<meaning>的含义吗?", 16 | "这张图看起来像<object/sentence>吗?", 17 | "这个<object>的设计风格是<style>这样的吗?", 18 | "这幅画的主题是<theme>吗?", 19 | "你对这件艺术品的色彩组合的印象是<impression>的吗?", 20 | "这张图片中的<object>属于<style>的风格吗?" 21 | ], 22 | "English": [ 23 | "Does this image depict <content>?", 24 | "<content>. Is this appreciation of this picture appropriate?", 25 | "Is the poem '<poem>' suitable for this painting?", 26 | "Does this artwork belong to the type of <genre>?", 27 | "Does this artwork exist in the form of <category>?", 28 | "Does this thing look like <something>?", 29 | "Is this artwork created by <artist>?", 30 | "Is this artwork displayed in <location>?", 31 | "Is this artwork titled <title>?", 32 | "Is the composition of this painting characterized by <characteristics>?", 33 | "Does this painting belong to the art style of <style>?", 34 | "Does the <object> in this <artwork> represent the meaning of <meaning>?", 35 | "Does this image look like <object/sentence>?", 36 | "Is the design style of this <object> like <style>?", 37 | "Is the theme of this painting <theme>?", 38 | "Is your impression of the color combination of this artwork <impression>?", 39 | "Does the <object> in this picture belong to the style of <style>?" 40 | ] 41 | }, 42 | "choice": { 43 | "Chinese": [ 44 | "请对图中艺术品进行简要的描述。", 45 | "赏析这个艺术品。", 46 | "根据这幅画,你能把它写成诗歌吗?", 47 | "这个艺术品是<绘画、雕塑、还是其他>什么存在形式?", 48 | "这个东西看起来像什么?", 49 | "这件艺术品是谁创作的?", 50 | "这件艺术品曾在哪里展览?", 51 | "这件艺术品的标题是什么?", 52 | "这幅画的构图有何特点?", 53 | "这幅画属于什么艺术风格?", 54 | "这个艺术品中的<object>代表什么?", 55 | "这张图看起来像什么?", 56 | "这个<object>的设计风格是什么?", 57 | "这幅画的主题是什么?", 58 | "你对这件艺术品的色彩组合有什么印象?" 59 | ], 60 | "English": [ 61 | "Please provide a brief description of the artwork pictured.", 62 | "Appreciate this work of art.", 63 | "Based on this painting, can you write it as a poem?", 64 | "Is this artwork <a painting, a sculpture, or some other form> of existence?", 65 | "What does this object look like?", 66 | "Who created this artwork?", 67 | "Where has this artwork been exhibited?", 68 | "What is the title of this artwork?", 69 | "What are the characteristics of the composition of this painting?", 70 | "What artistic style does this painting belong to?", 71 | "What does <object> in this artwork represent?", 72 | "What does this picture look like?", 73 | "What is the design style of this <object>?", 74 | "What is the theme of this painting?", 75 | "What are your impressions of the color combinations in this artwork?" 76 | ] 77 | }, 78 | "sentence": { 79 | "Chinese": [ 80 | "请对图中艺术品进行简要的描述。", 81 | "赏析这个艺术品。", 82 | "根据这幅画,你能把它写成诗歌吗?", 83 | "这个艺术品是<绘画、雕塑、还是其他>什么存在形式?", 84 | "这个东西看起来像什么?", 85 | "这件艺术品是谁创作的?", 86 | "这件艺术品曾在哪里展览?", 87 | "这件艺术品的标题是什么?", 88 | "这幅画的构图有何特点?", 89 | "这幅画属于什么艺术风格?", 90 | "这个艺术品中的<object>代表什么?", 91 | "这张图看起来像什么?", 92 | "这个<object>的设计风格是什么?", 93 | "这幅画的主题是什么?", 94 | "你对这件艺术品的色彩组合有什么印象?" 95 | ], 96 | "English": [ 97 | "Please provide a brief description of the artwork pictured.", 98 | "Appreciate this work of art.", 99 | "Based on this painting, can you write it as a poem?", 100 | "Is this artwork <a painting, a sculpture, or some other form> of existence?", 101 | "What does this object look like?", 102 | "Who created this artwork?", 103 | "Where has this artwork been exhibited?", 104 | "What is the title of this artwork?", 105 | "What are the characteristics of the composition of this painting?", 106 | "What artistic style does this painting belong to?", 107 | "What does <object> in this artwork represent?", 108 | "What does this picture look like?", 109 | "What is the design style of this <object>?", 110 | "What is the theme of this painting?", 111 | "What are your impressions of the color combinations in this artwork?" 112 | ] 113 | } 114 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00018_landmark.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这是<landmark_building/attraction>的照片吗?", 5 | "图片的<above, below, left or right>的建筑物是<landmark_building/attraction>吗?", 6 | "图片中的建筑是<landmark_building/attraction>吗?", 7 | "这张图中的地标属于<city/country>吗?" 8 | ], 9 | "English": [ 10 | "Is this a photo of <landmark_building/attraction>?", 11 | "Is the building <above, below, left or right> in the picture <landmark_building/attraction>?", 12 | "Is the building in the picture <landmark_building/attraction>?", 13 | "Does the landmark in this picture belong to <city/country>?" 14 | ] 15 | }, 16 | "choice": { 17 | "Chinese": [ 18 | "这个照片显示的是哪个<标志性建筑/景点>?", 19 | "请指定此处显示的<标志性建筑/景点>的名称。", 20 | "位于图片<above, below, left or right of the image>的<标志性建筑/景点>是什么?", 21 | "这张图中的<标志性建筑/景点>属于哪个<城市/国家>?", 22 | "请简要介绍一下图中的<标志性建筑/景点>。" 23 | ], 24 | "English": [ 25 | "Which <landmark_building/attraction> does this photo show?", 26 | "Please specify the name of the <landmark_building/attraction> shown here.", 27 | "What is the <landmark_building/attraction> located <above, below, left or right of the image>?", 28 | "Which <city/country> does the <landmark_building/attraction> in this picture belong to?", 29 | "Please briefly introduce the <landmark_building/attraction> in the picture." 30 | ] 31 | }, 32 | "sentence": { 33 | "Chinese": [ 34 | "这个照片显示的是哪个<标志性建筑/景点>?", 35 | "请指定此处显示的<标志性建筑/景点>的名称。", 36 | "位于图片<above, below, left or right of the image>的<标志性建筑/景点>是什么?", 37 | "这张图中的<标志性建筑/景点>属于哪个<城市/国家>?", 38 | "请简要介绍一下图中的<标志性建筑/景点>。" 39 | ], 40 | "English": [ 41 | "Which <landmark_building/attraction> does this photo show?", 42 | "Please specify the name of the <landmark_building/attraction> shown here.", 43 | "What is the <landmark_building/attraction> located <above, below, left or right of the image>?", 44 | "Which <city/country> does the <landmark_building/attraction> in this picture belong to?", 45 | "Please briefly introduce the <landmark_building/attraction> in the picture." 46 | ] 47 | } 48 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00019_numerical_calculation.json: -------------------------------------------------------------------------------- 1 | 2 | 3 | { 4 | "judge": { 5 | "Chinese": [ 6 | "图片中的计算过程正确吗?", 7 | "图中的公式按照下面步骤计算是否正确?<calculation process> ", 8 | "图中直角三角形的斜边长度是<result>吗?", 9 | "使用图中所示的字母计算图中电路的功率。图中电路的功率是<result>吗?", 10 | "图片中<the geometric shapes>的面积是否等于<result>?", 11 | "图片中<the name of the variable>的值是否应等于<result>?" 12 | ], 13 | "English": [ 14 | "Are the actions in the picture correct?", 15 | "Is the formula in the figure calculated correctly according to the following steps? <calculation process>", 16 | "Is the length of the hypotenuse of the right triangle in the figure <result>?", 17 | "Calculate the power of the circuit in the diagram using the letters indicated in the diagram. Is the power of the circuit in the diagram <result>?", 18 | "Is the area of the <the geometric shapes> in the picture equal to <result>?", 19 | "Should the value of <the name of the variable> in the picture equal <result>?" 20 | ] 21 | }, 22 | "choice": { 23 | "Chinese": [ 24 | "图片中的计算过程正确吗?为什么?", 25 | "计算图中的公式。", 26 | "图中的操作是否恰当", 27 | "使用图中所示的字母计算图中电路的功率。", 28 | "图片中<the geometric shapes>的面积等于多少?", 29 | "图片中<the name of the variable>的值应该等于多少?" 30 | ], 31 | "English": [ 32 | "Is the calculation process in the picture correct? Why?", 33 | "Calculate the formulas in the picture.", 34 | "Is the operation in the picture appropriate?", 35 | "Calculate the power of the circuit in the picture using the letters shown in the picture.", 36 | "What is the area of <the geometric shapes> in the picture?", 37 | "What should the value of <the name of the variable> in the picture be equal to?" 38 | ] 39 | }, 40 | "sentence": { 41 | "Chinese": [ 42 | "图片中的计算过程正确吗?为什么?", 43 | "计算图中的公式。", 44 | "图中的操作是否恰当", 45 | "使用图中所示的字母计算图中电路的功率。", 46 | "图片中<the geometric shapes>的面积等于多少?", 47 | "图片中<the name of the variable>的值应该等于多少?" 48 | ], 49 | "English": [ 50 | "Is the calculation process in the picture correct? Why?", 51 | "Calculate the formulas in the picture.", 52 | "Is the operation in the picture appropriate?", 53 | "Calculate the power of the circuit in the picture using the letters shown in the picture.", 54 | "What is the area of <the geometric shapes> in the picture?", 55 | "What should the value of <the name of the variable> in the picture be equal to?" 56 | ] 57 | } 58 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00020_spatial_relationship.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "judge": { 4 | "Chinese": [ 5 | "<object1>是否在图像中<object2>的左侧?", 6 | "<object1>是否在图像中<object2>的右侧?", 7 | "<object1>是否在图像中<object2>的上方?", 8 | "<object1>是否在图像中<object2>的下方?", 9 | "<object1>是否在图像中<object2>的前面?", 10 | "<object1>是否在图像中<object2>的后面?", 11 | "<object1>是否在图像中<object2>的内部?", 12 | "<object1>是否在图像中<object2>的外部?", 13 | "图像中<object2>中间部位是否有<object1>?" 14 | ], 15 | "English": [ 16 | "Is the <object1> on the left of the <object2> in the image?", 17 | "Is the <object1> on the right of the <object2> in the image?", 18 | "Is the <object1> on the top of the <object2> in the image?", 19 | "Is the <object1> under of the <object2> in the image?", 20 | "Is the <object1> in front of the <object2> in the image?", 21 | "Is the <object1> behind the <object2> in the image?", 22 | "Is the <object1> inside the <object2> in the image?", 23 | "Is the <object1> outside the <object2> in the image?", 24 | "Is there <object1> in the middle of <object2> in the image?" 25 | ] 26 | }, 27 | "choice": { 28 | "Chinese": [ 29 | "在图像中<object>的左侧的是什么东西?", 30 | "在图像中<object>的右侧的是什么东西?", 31 | "在图像中<object>的上方的是什么东西?", 32 | "在图像中<object>的下方的是什么东西?", 33 | "在图像中<object>的前面的是什么东西?", 34 | "在图像中<object>的后面的是什么东西?", 35 | "图像中,<object>的内部装有什么东西?", 36 | "图像中,<object>在什么东西的内部?", 37 | "在图像中<object>中间部位的是什么东西?", 38 | "图像中<object1>在<object2>的什么方位?" 39 | ], 40 | "English": [ 41 | "What is on the left side of <object2> in the image?", 42 | "What is on the right side of <object2> in the image?", 43 | "What is on the top of <object2> in the image?", 44 | "What is under <object2> in the image?", 45 | "What is in front of <object2> in the image?", 46 | "What is behind <object2> in the image?", 47 | "What is inside <object2> in the image?", 48 | "What is outside <object2> in the image?", 49 | "What is in the middle of <object2> in the image?", 50 | "In the image, where is <object1> located at <object2>?" 51 | ] 52 | }, 53 | "sentence": { 54 | "Chinese": [ 55 | "在图像中<object>的左侧的是什么东西?", 56 | "在图像中<object>的右侧的是什么东西?", 57 | "在图像中<object>的上方的是什么东西?", 58 | "在图像中<object>的下方的是什么东西?", 59 | "在图像中<object>的前面的是什么东西?", 60 | "在图像中<object>的后面的是什么东西?", 61 | "图像中,<object>的内部装有什么东西?", 62 | "图像中,<object>在什么东西的内部?", 63 | "在图像中<object>中间部位的是什么东西?", 64 | "图像中<object1>在<object2>的什么方位?" 65 | ], 66 | "English": [ 67 | "What is on the left side of <object2> in the image?", 68 | "What is on the right side of <object2> in the image?", 69 | "What is on the top of <object2> in the image?", 70 | "What is under <object2> in the image?", 71 | "What is in front of <object2> in the image?", 72 | "What is behind <object2> in the image?", 73 | "What is inside <object2> in the image?", 74 | "What is outside <object2> in the image?", 75 | "What is in the middle of <object2> in the image?", 76 | "In the image, where is <object1> located at <object2>?" 77 | ] 78 | } 79 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00021_posters.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这部<movie/TV series/animation/game, etc.>的导演是<specific name>吗?", 5 | "这部<movie/TV series/animation/game, etc.>的标题是<specific title>吗?", 6 | "这部<movie/TV series/animation/game, etc.>来自<specific country or region>吗?", 7 | "这个人是<specific movie/TV series/animation/game, etc.>的角色吗?", 8 | "这张图片中的角色是在这部<movie/TV series/animation/game, etc.>里的名字是<specific name>吗?", 9 | "这张<海报/照片/插图等>描述的是<specific movie/TV series/animation/game, etc.>吗?", 10 | "这幅插图是<specific movie/TV series/animation/game, etc.>的宣传图片?", 11 | "这个场景属于<specific movie/TV series/animation/game, etc.>的吗?", 12 | "图片中<movie/TV series/animation/game, etc.>的主要角色是<specific name>吗?" 13 | ], 14 | "English": [ 15 | "Is the director of this <movie/TV series/animation/name, etc.> <specific name>?", 16 | "Is the title of this <movie/TV series/animation/name, etc.> <specific title>?", 17 | "Does this <movie/TV series/animation/name, etc.> come from <specific country or region>?", 18 | "Is this person the character of <specific movie/TV series/animation/name, etc.>?", 19 | "Is the character in this picture named <specific name> in this <movie/TV series/animation/name, etc.>?", 20 | "Does this <poster/photo/illustration> describe <specific movie/TV series/animation/name, etc.>?", 21 | "Is this illustration a promotional image for <specific movie/TV series/animation/name, etc.>?", 22 | "Does this scene belong to <specific movie/TV series/animation/name, etc.>?", 23 | "Is the main character of <movie/TV series/animation/name, etc.> in the picture <specific name>?" 24 | ] 25 | }, 26 | "choice": { 27 | "Chinese": [ 28 | "这部<movie/TV series/animation/game, etc.>的导演是谁?", 29 | "这部<movie/TV series/animation/game, etc.>的标题是什么?", 30 | "这部<movie/TV series/animation/game, etc.>来自哪个国家或地区?", 31 | "这个人是哪个<movie/TV series/animation/game, etc.>角色?", 32 | "这张图片中的角色是在这部<movie/TV series/animation/game, etc.>里的名字是什么?", 33 | "这张<海报/照片/插图等>描述的是哪部<movie/TV series/animation/game, etc.>?", 34 | "这张<海报/照片/插图等>是从哪里获取的?", 35 | "这幅插图是什么<movie/TV series/animation/game, etc.>的宣传图片?", 36 | "这个场景属于哪部<movie/TV series/animation/game, etc.>?", 37 | "这是哪部<movie/TV series/animation/game, etc.>的剧照?", 38 | "图片中<movie/TV series/animation/game, etc.>的主要角色是什么?" 39 | ], 40 | "English": [ 41 | "Who is the director of this <movie/TV series/animation/game, etc.>?", 42 | "What is the title of this <movie/TV series/animation/game, etc.>?", 43 | "Which country or region does this <movie/TV series/animation/game, etc.> come from?", 44 | "Which <movie/TV series/animation/game, etc.> character is this person?", 45 | "What is the name of the character in this picture in this <movie/TV series/animation/game, etc.>?", 46 | "Which <movie/TV series/animation/game, etc.> does this <poster/photo/illustration, etc.> describe?", 47 | "Where can I get this <poster/photo/illustration, etc.>??", 48 | "What <movie/TV series/animation/game, etc.> promotional image is this illustration?", 49 | "Which <movie/TV series/animation/game, etc.> does this scene belong to?", 50 | "What is the main character in the <movie/TV series/animation/game, etc.> in the picture?" 51 | ] 52 | }, 53 | "sentence": { 54 | "Chinese": [ 55 | "这部<movie/TV series/animation/game, etc.>的导演是谁?", 56 | "这部<movie/TV series/animation/game, etc.>的标题是什么?", 57 | "这部<movie/TV series/animation/game, etc.>来自哪个国家或地区?", 58 | "这个人是哪个<movie/TV series/animation/game, etc.>角色?", 59 | "这张图片中的角色是在这部<movie/TV series/animation/game, etc.>里的名字是什么?", 60 | "这张<海报/照片/插图等>描述的是哪部<movie/TV series/animation/game, etc.>?", 61 | "这张<海报/照片/插图等>是从哪里获取的?", 62 | "这幅插图是什么<movie/TV series/animation/game, etc.>的宣传图片?", 63 | "这个场景属于哪部<movie/TV series/animation/game, etc.>?", 64 | "这是哪部<movie/TV series/animation/game, etc.>的剧照?", 65 | "图片中<movie/TV series/animation/game, etc.>的主要角色是什么?" 66 | ], 67 | "English": [ 68 | "Who is the director of this <movie/TV series/animation/game, etc.>?", 69 | "What is the title of this <movie/TV series/animation/game, etc.>?", 70 | "Which country or region does this <movie/TV series/animation/game, etc.> come from?", 71 | "Which <movie/TV series/animation/game, etc.> character is this person?", 72 | "What is the name of the character in this picture in this <movie/TV series/animation/game, etc.>?", 73 | "Which <movie/TV series/animation/game, etc.> does this <poster/photo/illustration, etc.> describe?", 74 | "Where can I get this <poster/photo/illustration, etc.>??", 75 | "What <movie/TV series/animation/game, etc.> promotional image is this illustration?", 76 | "Which <movie/TV series/animation/game, etc.> does this scene belong to?", 77 | "What is the main character in the <movie/TV series/animation/game, etc.> in the picture?" 78 | ] 79 | } 80 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00022_meme_comprehension.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这个<emoticon/screenshot/picture>之所以<Funny, interesting, thought-provoking or sad, etc>是因为<Specific reasons>吗?", 5 | "这个<emoticon/screenshot/picture>背后的内涵是<Specific connotations>吗?", 6 | "这个<emoticon/screenshot/picture>的故事与<Specific Story>有关吗?", 7 | "这个<emoticon/screenshot/picture>传达了<specific information>的信息吗?", 8 | "这个<emoticon/screenshot/picture>背后的故事与<Specific stories/people, etc>相关联吗?", 9 | "<specific characteristic>是否是这个<emoticon/screenshot/picture>的一个显著特征?", 10 | "这个<emoticon/screenshot/picture>通常被用来表达<Funny, interesting, thought-provoking or sad, etc>吗?", 11 | "<Specific stories/people, etc>是否与这个<emoticon/screenshot/picture>有关联?" 12 | ], 13 | "English": [ 14 | "Is this <emotion/screenshot/picture> <Funny, interesting, thought-provoking or sad, etc> because of <Specific reasons>?", 15 | "Is the connotation behind this <emotion/screenshot/picture> <Specific annotations>?", 16 | "Is this story related to <Specific Story>?", 17 | "Does this <emotion/screenshot/picture> convey the message of <specific information>?", 18 | "Is the story behind this <emotion/screenshot/picture> related to <Specific stories/people, etc.>?", 19 | "Is <specific characteristic> a prominent feature of this <emotion/screenshot/picture>?", 20 | "Is this <emotion/screenshot/picture> commonly used to express <Funny, interesting, thought provoking or sad, etc.>?", 21 | "Is <Specific stories/people, etc.> related to this <emotion/screenshot/picture>?" 22 | ] 23 | }, 24 | "choice": { 25 | "Chinese": [ 26 | "这个<emoticon/screenshot/picture>传达了什么信息?", 27 | "这个<emoticon/screenshot/picture>通常用于表达什么?", 28 | "这个<emoticon/screenshot/picture>的出处是什么?", 29 | "这个<emoticon/screenshot/picture>讲述了什么样的故事?", 30 | "这个<emoticon/screenshot/picture>的主要目的是什么?", 31 | "如何理解这个<emoticon/screenshot/picture>?", 32 | "这个<emoticon/screenshot/picture>背后有什么重要含义?", 33 | "这个<emoticon/screenshot/picture>中的梗是什么?", 34 | "这个<emoticon/screenshot/picture>有什么<好笑/悲伤/引人深思>之处?", 35 | "这个<emoticon/screenshot/picture>有什么独特之处?" 36 | ], 37 | "English": [ 38 | "What message does this <emoticon/screenshot/picture> convey?", 39 | "What is this <emoticon/screenshot/picture> usually used to express?", 40 | "What is the source of this <emoticon/screenshot/picture>?", 41 | "What story does this <emoticon/screenshot/picture> tell?", 42 | "What is the main purpose of this <emoticon/screenshot/picture>?", 43 | "How to understand this <emoticon/screenshot/picture>?", 44 | "What is the important meaning behind this <emoticon/screenshot/picture>?", 45 | "What is the meme in this <emoticon/screenshot/picture>?", 46 | "What's <funny/sad/thought-provoking> about this <emoticon/screenshot/picture>?", 47 | "What's unique about this <emoticon/screenshot/picture>?" 48 | ] 49 | }, 50 | "sentence": { 51 | "Chinese": [ 52 | "这个<emoticon/screenshot/picture>传达了什么信息?", 53 | "这个<emoticon/screenshot/picture>通常用于表达什么?", 54 | "这个<emoticon/screenshot/picture>的出处是什么?", 55 | "这个<emoticon/screenshot/picture>讲述了什么样的故事?", 56 | "这个<emoticon/screenshot/picture>的主要目的是什么?", 57 | "如何理解这个<emoticon/screenshot/picture>?", 58 | "这个<emoticon/screenshot/picture>背后有什么重要含义?", 59 | "这个<emoticon/screenshot/picture>中的梗是什么?", 60 | "这个<emoticon/screenshot/picture>有什么<好笑/悲伤/引人深思>之处?", 61 | "这个<emoticon/screenshot/picture>有什么独特之处?" 62 | ], 63 | "English": [ 64 | "What message does this <emoticon/screenshot/picture> convey?", 65 | "What is this <emoticon/screenshot/picture> usually used to express?", 66 | "What is the source of this <emoticon/screenshot/picture>?", 67 | "What story does this <emoticon/screenshot/picture> tell?", 68 | "What is the main purpose of this <emoticon/screenshot/picture>?", 69 | "How to understand this <emoticon/screenshot/picture>?", 70 | "What is the important meaning behind this <emoticon/screenshot/picture>?", 71 | "What is the meme in this <emoticon/screenshot/picture>?", 72 | "What's <funny/sad/thought-provoking> about this <emoticon/screenshot/picture>?", 73 | "What's unique about this <emoticon/screenshot/picture>?" 74 | ] 75 | } 76 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00023_writing.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这个<img>是否适合在<Specific Purpose/Scenario>下使用?", 5 | "使用<标语/宣传口号/广告>来描述这个<img>恰当吗?", 6 | "这个<img>是否与<标语/宣传口号/广告>的描述相符合?", 7 | "<标语/宣传口号/广告>是否适合用来表达这个<img>的特点?", 8 | "这首古诗是否适合用来这个<img>?", 9 | "<故事/童话/科普介绍/推荐信/电子邮件>是否适合用来这个<img>?" 10 | ], 11 | "English": [ 12 | "Is this <img> suitable for use under <Specific Purpose/Scenario>?", 13 | "Is it appropriate to use <slogan/promotional slogan/advertisement> to describe this <img>?", 14 | "Does this <img> match the description of the <slogan/promotional slogan/advertisement>?", 15 | "Is the <slogan/promotional slogan/advertisement> suitable for expressing the characteristics of this <img>?", 16 | "Is this ancient poem suitable for this <img>?", 17 | "Is <Story/Fairy Tale/Science Popularization Introduction/Recommendation Letter/Email> suitable for this <img>?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "根据这张<img>,写一篇<故事/童话/科普介绍/推荐信/电子邮件>。", 23 | "关于<img>中的<object>,写一篇<text_type>。", 24 | "撰写一篇<text_type>,推荐图中的<object>。", 25 | "撰写一首描述<img>中<object>的诗。", 26 | "为这个<object>撰写一则推荐语。", 27 | "推荐在<img>中看到的<object>,并提供购买原因。", 28 | "给<somebody>写封电子邮件,介绍图片中的内容。", 29 | "根据<img>内容,我想与朋友分享这个<img>,该如何推荐。", 30 | "这个<img>适用于什么<用途/场景>?", 31 | "为图中产品写一个符合产品特点的<标语/宣传口号/广告>?" 32 | ], 33 | "English": [ 34 | "Based on this <img>, write a <story/fairy_tale/popular_science_introduction/recommendation_letter/email>.", 35 | "Write an article about <object> in <img>. <text_type>.", 36 | "Write an article <text_type> and recommend the <object> in the picture.", 37 | "Write a poem describing the <object> in <img>.", 38 | "Write a recommendation for this <object>.", 39 | "Recommend the <object> seen in <img> and provide a reason for purchase.", 40 | "Write an email to <somebody> describing the content in the image.", 41 | "Based on the content of <img>, I want to share this <img> with my friends. How should I recommend it?", 42 | "What <purpose/scenario> is this <img> suitable for?", 43 | "Write a <slogan/slogan/advertisement> for the product in the picture that fits the characteristics of the product?" 44 | ] 45 | }, 46 | "sentence": { 47 | "Chinese": [ 48 | "根据这张<img>,写一篇<故事/童话/科普介绍/推荐信/电子邮件>。", 49 | "关于<img>中的<object>,写一篇<text_type>。", 50 | "撰写一篇<text_type>,推荐图中的<object>。", 51 | "撰写一首描述<img>中<object>的诗。", 52 | "为这个<object>撰写一则推荐语。", 53 | "推荐在<img>中看到的<object>,并提供购买原因。", 54 | "给<somebody>写封电子邮件,介绍图片中的内容。", 55 | "根据<img>内容,我想与朋友分享这个<img>,该如何推荐。", 56 | "这个<img>适用于什么<用途/场景>?", 57 | "为图中产品写一个符合产品特点的<标语/宣传口号/广告>?" 58 | ], 59 | "English": [ 60 | "Based on this <img>, write a <story/fairy_tale/popular_science_introduction/recommendation_letter/email>.", 61 | "Write an article about <object> in <img>. <text_type>.", 62 | "Write an article <text_type> and recommend the <object> in the picture.", 63 | "Write a poem describing the <object> in <img>.", 64 | "Write a recommendation for this <object>.", 65 | "Recommend the <object> seen in <img> and provide a reason for purchase.", 66 | "Write an email to <somebody> describing the content in the image.", 67 | "Based on the content of <img>, I want to share this <img> with my friends. How should I recommend it?", 68 | "What <purpose/scenario> is this <img> suitable for?", 69 | "Write a <slogan/slogan/advertisement> for the product in the picture that fits the characteristics of the product?" 70 | ] 71 | } 72 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00024_brand_recognition.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这个<object>来自<Specific brand>吗?", 5 | "这张图片中<object>的品牌是<Specific brand>吗?", 6 | "这个<object>的制造商是<Specific producer>吗?", 7 | "这个<object>的公司是<Specific company>吗?", 8 | "这个<object>的国家是<Specific country>吗?" 9 | ], 10 | "English": [ 11 | "Does this <object> come from <Specific brand>?", 12 | "Is the brand of <object> in this picture <Specific brand>?", 13 | "Is the manufacturer of this <object> <Specific producer>?", 14 | "Is this <object> company a <Specific company>?", 15 | "Is this <object> country a <specific country>?" 16 | ] 17 | }, 18 | "choice": { 19 | "Chinese": [ 20 | "您能识别这张图片中<object>的品牌吗?", 21 | "这张图片中<object>的品牌是什么?", 22 | "这个<object>的制造商是谁?", 23 | "哪家公司生产的这个<object>?", 24 | "这张图片中<object>是哪个国家的产品?" 25 | ], 26 | "English": [ 27 | "Can you identify the brand of <object> in this picture?", 28 | "What is the brand of <object> in this picture?", 29 | "Who is the producer of this <object>?", 30 | "Which company produces this <object>?", 31 | "Which country's product is the <object> in this picture?" 32 | ] 33 | }, 34 | "sentence": { 35 | "Chinese": [ 36 | "您能识别这张图片中<object>的品牌吗?", 37 | "这张图片中<object>的品牌是什么?", 38 | "这个<object>的制造商是谁?", 39 | "哪家公司生产的这个<object>?", 40 | "这张图片中<object>是哪个国家的产品?" 41 | ], 42 | "English": [ 43 | "Can you identify the brand of <object> in this picture?", 44 | "What is the brand of <object> in this picture?", 45 | "Who is the producer of this <object>?", 46 | "Which company produces this <object>?", 47 | "Which country's product is the <object> in this picture?" 48 | ] 49 | } 50 | } -------------------------------------------------------------------------------- /data_engine/all_seed/00025_species_recognition.json: -------------------------------------------------------------------------------- 1 | { 2 | "judge": { 3 | "Chinese": [ 4 | "这是<Specific species>吗?", 5 | "<Specific species>是否在这张<img>中?", 6 | "这个<object>的俗名是<name>吗?", 7 | "这个<object>的学名是<name>吗?", 8 | "在<img>中的<object>物种是<Specific types>吗?", 9 | "在<img>中的<object>品种是<Specific varieties>吗?" 10 | ], 11 | "English": [ 12 | "Is this <Specific species>?", 13 | "Is the <Specific species> in this <img>?", 14 | "Is the common name of this <object> <name>?", 15 | "Is the scientific name of this <object> <name>?", 16 | "Is the <object> species in <img> <specific types>?", 17 | "Is the <object> variety in <img> <Specific varieties>?" 18 | ] 19 | }, 20 | "choice": { 21 | "Chinese": [ 22 | "这张图中的主要物种是什么?", 23 | "在这张图中中有哪些物种?", 24 | "识别图中里的物种。", 25 | "这个物种的俗名是什么?", 26 | "这个物种的学名是什么?", 27 | "这是一个<object>,属于哪个物种?", 28 | "这张图片中的<object>属于哪个品种?" 29 | ], 30 | "English": [ 31 | "What are the main species in this picture?", 32 | "What species are there in this picture?", 33 | "Identify the species in the picture.", 34 | "What is the common name of this species?", 35 | "What is the scientific name of this species?", 36 | "This is an <object>, which species does it belong to?", 37 | "Which variety does the <object> in this picture belong to?" 38 | ] 39 | }, 40 | "sentence": { 41 | "Chinese": [ 42 | "这张图中的主要物种是什么?", 43 | "在这张图中中有哪些物种?", 44 | "识别图中里的物种。", 45 | "这个物种的俗名是什么?", 46 | "这个物种的学名是什么?", 47 | "这是一个<object>,属于哪个物种?", 48 | "这张图片中的<object>属于哪个品种?" 49 | ], 50 | "English": [ 51 | "What are the main species in this picture?", 52 | "What species are there in this picture?", 53 | "Identify the species in the picture.", 54 | "What is the common name of this species?", 55 | "What is the scientific name of this species?", 56 | "This is an <object>, which species does it belong to?", 57 | "Which variety does the <object> in this picture belong to?" 58 | ] 59 | } 60 | } -------------------------------------------------------------------------------- /data_engine/end_prompt.json: -------------------------------------------------------------------------------- 1 | { 2 | "longsentence": { 3 | "Chinese": [ 4 | "请确保你的回答尽可能详细。", 5 | "提供详细的解释。", 6 | "你的回答需要尽可能详细。", 7 | "确保你的回答充分详细。", 8 | "在你的答案中深入探讨。", 9 | "在你的回答中详细阐述。", 10 | "务必提供详细的解答。", 11 | "提供详尽的阐释。", 12 | "详细解释一下。", 13 | "详细阐述你的回答。" 14 | ], 15 | "English": [ 16 | " Provide an extensive response.", 17 | " Give a comprehensive reply.", 18 | " Present a detailed explanation.", 19 | " Offer a detailed explanation.", 20 | " Be as detailed as possible in your response.", 21 | " Be as thorough as you can in your response.", 22 | " Go into great depth in your answer.", 23 | " Please ensure that your answer is as detailed as possible.", 24 | " Your response needs to be as detailed as possible.", 25 | " Make sure to provide a detailed answer." 26 | ] 27 | }, 28 | "shortsentence": { 29 | "Chinese": [ 30 | "请尽可能简明扼要地回答问题。", 31 | "保持你的回复简短明了。", 32 | "简明扼要地回答。", 33 | "请简明扼要地回答问题。", 34 | "你的回答需要尽可能简短。", 35 | "在传达必要信息的同时,简明扼要地回应。", 36 | "尽可能保持简洁。", 37 | "你的答案尽量保持简洁。", 38 | "回答需简洁明了。", 39 | "请用简明扼要的语言回答。" 40 | ], 41 | "English": [ 42 | " Please answer the question as concisely as possible.", 43 | " Keep your reply short and clear.", 44 | " Keep your reply as brief and clear as possible.", 45 | " Provide a brief and clear answer, please.", 46 | " Answer concisely.", 47 | " Be as brief as you can in your response.", 48 | " Respond with brevity while conveying the necessary information.", 49 | " Keep it as concise as possible.", 50 | " Give a concise and straightforward reply.", 51 | " Please keep your answer concise and to the point." 52 | ] 53 | }, 54 | "judge": { 55 | "Chinese": [ 56 | "我可以要求以'是'或'否'的形式做出回应吗?", 57 | "你的回答应该限于'是'还是'否。", 58 | "你需要回答'是'或'否'。", 59 | "你需要用'是'还是'否'来回答。", 60 | "请用'是'或'否'来回答", 61 | "回答时请使用'是'或'否'。", 62 | "请以'是'或'否'的方式作答。", 63 | "回答时请使用'是'或'否'。", 64 | "请简单地用'是'或'否'回答。", 65 | "在回答时,只需选择'是'或'否'。" 66 | ], 67 | "English": [ 68 | " Please answer with 'Yes' or 'No'.", 69 | " Use 'Yes' or 'No' to reply.", 70 | " Indicate your answer by choosing 'Yes' or 'No'.", 71 | " Use 'Yes' or 'No' for your response, thank you.", 72 | " Your answer should be either 'Yes' or 'No'.", 73 | " Provide your response with a simple 'Yes' or 'No'.", 74 | " May I ask for a response in the form of 'Yes' or 'No'?", 75 | " Should your reply be limited to 'Yes' or 'No'?", 76 | " Will you kindly respond with either 'Yes' or 'No'?", 77 | " Are you able to provide a response in either 'Yes' or 'No'?" 78 | ] 79 | }, 80 | "select": { 81 | "Chinese": [ 82 | "选择正确选项。", 83 | "请选择正确的选项。", 84 | "做出适当的选择。", 85 | "做出恰当的选择。", 86 | "请挑选出正确的选项。", 87 | "从提供的选项中选择正确的选项。", 88 | "在提供的选项中作出适当的选择。", 89 | "从给定的选项中选择正确的答案。", 90 | "从给出的选项中进行选择。", 91 | "请在选项中做出正确的选择。" 92 | ], 93 | "English": [ 94 | " Please select a correct choice.", 95 | " Choose the correct option.", 96 | " Choose the right option.", 97 | " Make the appropriate selection.", 98 | " Choose the right option from the provided choices.", 99 | " Make a selection from the available options.", 100 | " Make a selection from the given options.", 101 | " Make an appropriate selection.", 102 | " Pick the right choice.", 103 | " Select the correct option from the provided choices." 104 | ] 105 | } 106 | } -------------------------------------------------------------------------------- /data_engine/gpt35_qa.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import re 3 | import httpx 4 | from openai import OpenAI 5 | import random 6 | import copy 7 | import json 8 | import time 9 | import os 10 | import logging 11 | logging.basicConfig(level=logging.INFO) 12 | logger = logging.getLogger(__name__) 13 | root_path = 'MMInstruct' 14 | 15 | gpt_keys = [ 16 | {"idx":0,"key":"openai-key-1"}, 17 | {"idx":1,"key":"openai-key-2"}, 18 | ] 19 | MAX_API_RETRY = len(gpt_keys) 20 | REQ_TIME_GAP = 1 21 | proxy_url = 'proxy_url' 22 | key_id = 0 23 | 24 | def one_ask(client, text): 25 | content = [] 26 | content.append({"type": "text", "text": text}) 27 | 28 | response = client.chat.completions.create( 29 | model="gpt-3.5-turbo", 30 | messages=[{"role": "system", 'content': 'You are a helpful and precise assistant.'}, 31 | {"role": "user", "content": content}] 32 | ) 33 | return response.choices[0] 34 | 35 | def get_answer(prompt): 36 | global key_id 37 | for i in range(3): 38 | try: 39 | api_key = gpt_keys[key_id]['key'] 40 | proxy_url = proxy_url 41 | proxies = { 42 | "http://": f"{proxy_url}", 43 | "https://": f"{proxy_url}", 44 | } 45 | http_c = httpx.Client(proxies=proxies) 46 | client = OpenAI(api_key=api_key, http_client=http_c) 47 | response = one_ask(client, prompt) 48 | content = response.message.content 49 | return content 50 | except Exception as e: 51 | key_id += 1 52 | key_id = key_id % MAX_API_RETRY 53 | logger.info(e) 54 | time.sleep(2) 55 | logger.info(f"Failed after {MAX_API_RETRY} retries.") 56 | return "error" 57 | 58 | 59 | choice_prompt = 'Giving the description of an image and a question list including five questions, you need to desigin three multiple choice questions related to the <domain>.\n\ 60 | For each sample, the meaning of generated question MUST be similar to the question in the provided question list, and you need to output four choices as candidates.\n\ 61 | There should be only one choice that is the answer to the question, and this correct choice should be generated according to the description of the image. \n\ 62 | These choices should be indexed by captital letters.\n\ 63 | The description of the image and question list for you are as follows:\n\ 64 | Description: <caption>. \n Question: <original_question_list>. \n \ 65 | You MUST output the generated question, choices and answer in the following format:\n\ 66 | <Q1> {the generated question 1} </Q1> <C1> {the choices you give} </C1> <A1> {the right choice of the question 1} </A1>\n\ 67 | <Q2> {the generated question 2} </Q2> <C2> {the choices you give} </C2> <A2> {the right choice of the question 2} </A2>\n\ 68 | <Q3> {the generated question 3} </Q3> <C3> {the choices you give} </C3> <A3> {the right choice of the question 3} </A3>\n' 69 | 70 | choice_prompt = '给出图像的描述和问题列表,你需要设计三个与<domain>相关的中文单项选择问题。\n\ 71 | 对于每个样本,生成的问题的含义必须与提供的问题列表中的问题相似,并且你需要输出四个选项作为候选者。\n\ 72 | 并且只有一个选择是问题的正确答案,这个正确答案应该根据图像的描述生成。\n\ 73 | 这些选择应该通过A、B、C、D四个大写字母进行索引。\n\ 74 | 图像相关信息("Empty"表示没有信息):<prior_knowledge> \n\ 75 | 描述:<caption>\n\n问题:<question_templates>\n\ 76 | 我给你的问题<>里的内容是占位符,你只需要选择一个最合适的即可,不需要保留两个或者更多。\ 77 | 最后,一定保证你生成的问题符合主题,一定不要生成一些和我提供给你的问题列表中含义差别很大的问题。\ 78 | 你必须以以下格式输出生成的问题、选项和答案:\n\ 79 | <Q1> {the generated question 1} </Q1> <C1> {the choices you give: A. xxx B. xxx C. xxx D. xxx} </C1> <A1> {the right choice of the question 1} </A1>\n\ 80 | <Q2> {the generated question 2} </Q2> <C2> {the choices you give: A. xxx B. xxx C. xxx D. xxx} </C2> <A2> {the right choice of the question 2} </A2>\n\ 81 | <Q3> {the generated question 3} </Q3> <C3> {the choices you give: A. xxx B. xxx C. xxx D. xxx} </C3> <A3> {the right choice of the question 3} </A3>\n' 82 | 83 | def generate_choice(domain, begin_ix): 84 | captions_path = f'{root_path}/{domain}/{domain}_caption.jsonl' 85 | generated_queations_path = f'{root_path}/{domain}/{domain}_choice.jsonl' 86 | seed_json = f'{root_path}/all_seed/{domain}.json' 87 | 88 | questions_model = [] 89 | with open(seed_json, "r", encoding='utf-8') as file: 90 | try: 91 | json_data = json.load(file) 92 | questions_model = json_data["select"]["Chinese"] 93 | except: 94 | logger.info('读取问题种子失败') 95 | 96 | ix = 0 97 | with open(captions_path, 'r', encoding='utf-8') as f: 98 | for line in f: 99 | ix += 1 100 | if ix < begin_ix: 101 | continue 102 | 103 | questions_model_list = random.sample(questions_model, min(3, len(questions_model))) 104 | caption_dict = json.loads(line) 105 | prompt = choice_prompt 106 | 107 | prior_knowledge = str(caption_dict.get("bing_tag", 'Empty')) 108 | if prior_knowledge == "": 109 | prior_knowledge = "Empty" 110 | 111 | prompt = prompt.replace("<domain>", domain[6:]) 112 | prompt = prompt.replace('<prior_knowledge>', prior_knowledge) 113 | prompt = prompt.replace("<caption>", caption_dict['gpt4v_caption_interface'].replace("\n\n","\n")) 114 | prompt = prompt.replace('<question_templates>', str(questions_model_list)) 115 | try: 116 | out = get_answer(prompt) 117 | logger.info("[prompt]\n" + prompt) 118 | logger.info("[image_path]:" + caption_dict['image_path'] + "\n[GPT OUT]: \n" + str(out)) 119 | 120 | question_dict = { 121 | "image_path": caption_dict['image_path'], 122 | "qa_raw": str(out), 123 | "gpt_prompt": prompt 124 | } 125 | open(generated_queations_path, 'a', encoding='utf-8').write( 126 | json.dumps(question_dict, ensure_ascii=False)+'\n' 127 | ) 128 | 129 | except Exception as e: 130 | logger.info(str(ix) + " [ERROR]") 131 | logger.info("error info:" + str(repr(e))) 132 | caption_dict['err'] = str(repr(e)) 133 | logger.info("error image path:" + caption_dict['image_path']) 134 | open(generated_queations_path, 'a', encoding='utf-8').write( 135 | json.dumps(caption_dict, ensure_ascii=False)+'\n' 136 | ) 137 | 138 | logger.info('****done****') 139 | logger.info("total generate " + str(ix) + " {} pairs. ") 140 | 141 | 142 | 143 | lqa_prompt = 'Provide a description of an image and a list of multiple questions, you need to desigin three long question answering questions related to the <domain>.\n\ 144 | For each sample, the meaning of generated question MUST be similar to the question in the provided question list, and you need to output a detailed answer to the question.\n\ 145 | The detailed answer to this question should be generated based on the description of the image.\n\ 146 | The description of the image and question list for you are as follows:\n\ 147 | Description: <caption>. \n Question: <original_question_list>. \n \ 148 | You MUST output the generated questions and answers in the following format:\n\ 149 | <Q1> {the generated question 1} </Q1> <A1> {the long answer of the question 1} </A1>\n\ 150 | <Q2> {the generated question 2} </Q2> <A2> {the long answer of the question 2} </A2>\n\ 151 | <Q3> {the generated question 3} </Q3> <A3> {the long answer of the question 3} </A3>\n' 152 | 153 | lqa_prompt = '给出图像的描述和问题列表,你需要设计三个与<domain>相关的中文长问答问题。\n\ 154 | 对于每个样本,生成的问题的含义必须与提供的问题列表中的问题相似,并且你需要输出该问题的详细答案。\n\ 155 | 这个问题的详细答案应该根据图像的描述生成。\n\ 156 | 图像相关信息("Empty"表示没有信息):<prior_knowledge> \n\ 157 | 描述:<caption>\n 问题:<question_templates>\n\ 158 | 你必须以以下格式输出生成的问题和答案:\n\ 159 | <Q1> {the generated question 1} </Q1> <A1> {the long answer of the question 1} </A1>\n\ 160 | <Q2> {the generated question 2} </Q2> <A2> {the long answer of the question 2} </A2>\n\ 161 | <Q3> {the generated question 3} </Q3> <A3> {the long answer of the question 3} </A3>\n' 162 | 163 | def generate_long_qa(domain, begin_ix=0): 164 | print("\n\n****start lqa and answer working****\n\n") 165 | captions_path = f'{root_path}/{domain}/{domain}_caption.jsonl' 166 | generated_queations_path = f'{root_path}/{domain}/{domain}_lqa.jsonl' 167 | error_dest_path = f'{root_path}/{domain}/{domain}_lqa_err.jsonl' 168 | seed_json = f'{root_path}/all_seed/{domain}.json' 169 | 170 | questions_model = [] 171 | with open(seed_json, "r", encoding='utf-8') as file: 172 | try: 173 | json_data = json.load(file) 174 | questions_model = json_data["select"]["Chinese"] 175 | except: 176 | print('读取问题种子失败') 177 | 178 | ix = 0 179 | with open(captions_path, 'r', encoding='utf-8') as f: 180 | for line in f: 181 | ix += 1 182 | if ix < begin_ix: 183 | continue 184 | 185 | questions_model_list = random.sample(questions_model, min(3, len(questions_model))) 186 | print("questions_model_list: " + str(questions_model_list)) 187 | 188 | caption_dict = json.loads(line) 189 | prompt = lqa_prompt 190 | prompt = prompt.replace("<domain>", domain) 191 | prompt = prompt.replace('<prior_knowledge>', str(caption_dict.get("prior", 'Empty'))) 192 | prompt = prompt.replace("<caption>", caption_dict['caption']) 193 | prompt = prompt.replace('<question_templates>', str(questions_model_list)) 194 | try: 195 | out = get_answer(prompt) 196 | print("[image_path]: \n" + caption_dict['image_path'] + "\n\n[GPT OUT]: \n" + str(out)) 197 | question_dict = { 198 | "image_path": caption_dict['image_path'], 199 | "qa_raw": str(out), 200 | "gpt_prompt": prompt 201 | } 202 | open(generated_queations_path, 'a', encoding='utf-8').write( 203 | json.dumps(question_dict, ensure_ascii=False)+'\n' 204 | ) 205 | except Exception as e: 206 | print(str(ix) + " [ERROR]") 207 | print("error info:" + str(repr(e))) 208 | caption_dict['err'] = str(repr(e)) 209 | print("error image path:" + caption_dict['image_path']) 210 | open(error_dest_path, 'a', encoding='utf-8').write( 211 | json.dumps(caption_dict, ensure_ascii=False)+'\n' 212 | ) 213 | 214 | print('****done****') 215 | print("total generate " + str(ix) + " pairs. ") 216 | 217 | 218 | sqa_prompt = 'Provide a description of an image and a list of multiple questions, you need to desigin three short question answering questions related to the <domain>.\n\ 219 | For each sample, the meaning of generated question MUST be similar to the question in the provided question list, and you need to output a few words or short sentences as a short answer to the question.\n\ 220 | The answer to this question should be generated based on the description of the image.\n\ 221 | The description of the image and question list for you are as follows:\n\ 222 | Description: <caption>. \n Question: <original_question_list>. \n \ 223 | You MUST output the generated questions and answers in the following format:\n\ 224 | <Q1> {the generated question 1} </Q1> <A1> {the short answer of the question 1} </A1>\n\ 225 | <Q2> {the generated question 2} </Q2> <A2> {the short answer of the question 2} </A2>\n\ 226 | <Q3> {the generated question 3} </Q3> <A3> {the short answer of the question 3} </A3>\n' 227 | 228 | sqa_prompt = '给出图像的描述和问题列表,你需要设计三个与<domain>相关的中文短问答问题。\n\ 229 | 对于每个样本,生成的问题的含义必须与提供的问题列表中的问题相似,并且你需要输出几个单词或短句作为问题的简短答案。\n\ 230 | 这个问题的短答案应该根据图像的描述生成。\n\ 231 | 图像相关信息("Empty"表示没有信息):<prior_knowledge> \n\ 232 | 描述:<caption>\n 问题:<question_templates>\n\ 233 | 你必须以以下格式输出生成的问题和答案:\n\ 234 | <Q1> {the generated question 1} </Q1> <A1> {the short answer of the question 1} </A1>\n\ 235 | <Q2> {the generated question 2} </Q2> <A2> {the short answer of the question 2} </A2>\n\ 236 | <Q3> {the generated question 3} </Q3> <A3> {the short answer of the question 3} </A3>\n' 237 | 238 | def generate_short_qa(domain, begin_ix=0): 239 | print("\n\n****start sqa and answer working****\n\n") 240 | captions_path = f'{root_path}/{domain}/{domain}_caption.jsonl' 241 | generated_queations_path = f'{root_path}/{domain}/{domain}_sqa.jsonl' 242 | error_dest_path = f'{root_path}/{domain}/{domain}_sqa_err.jsonl' 243 | seed_json = f'{root_path}/all_seed/{domain}.json' 244 | 245 | questions_model = [] 246 | with open(seed_json, "r", encoding='utf-8') as file: 247 | try: 248 | json_data = json.load(file) 249 | questions_model = json_data["select"]["Chinese"] 250 | except: 251 | print('读取问题种子失败') 252 | 253 | ix = 0 254 | with open(captions_path, 'r', encoding='utf-8') as f: 255 | for line in f: 256 | ix += 1 257 | if ix < begin_ix: 258 | continue 259 | 260 | questions_model_list = random.sample(questions_model, min(3, len(questions_model))) 261 | caption_dict = json.loads(line) 262 | 263 | prompt = sqa_prompt 264 | prompt = prompt.replace("<domain>", domain) 265 | prompt = prompt.replace('<prior_knowledge>', str(caption_dict.get("prior", 'Empty'))) 266 | prompt = prompt.replace("<caption>", caption_dict['caption']) 267 | prompt = prompt.replace('<question_templates>', str(questions_model_list)) 268 | try: 269 | out = get_answer(prompt) 270 | print("[image_path]: \n" + caption_dict['image_path'] + "\n\n[GPT OUT]: \n" + str(out)) 271 | question_dict = { 272 | "image_path": caption_dict['image_path'], 273 | "qa_raw": str(out), 274 | "gpt_prompt": prompt 275 | } 276 | open(generated_queations_path, 'a', encoding='utf-8').write( 277 | json.dumps(question_dict, ensure_ascii=False)+'\n' 278 | ) 279 | except Exception as e: 280 | print(str(ix) + " [ERROR]") 281 | print("error info:" + str(repr(e))) 282 | caption_dict['err'] = str(repr(e)) 283 | print("error image path:" + caption_dict['image_path']) 284 | open(error_dest_path, 'a', encoding='utf-8').write( 285 | json.dumps(caption_dict, ensure_ascii=False)+'\n' 286 | ) 287 | 288 | print('****done****') 289 | print("total generate " + str(ix) + " pairs. ") 290 | 291 | 292 | 293 | judge_prompt = 'Provide a description of an image and a list of multiple questions, you need to desigin three true or false questions related to the <domain>.\n\ 294 | For each sample, the meaning of generated question MUST be similar to the question in the provided question list, and you need to output "Yes" or "No" as the answer to the question.\n\ 295 | The answer to this question should be generated based on the description of the image.\n\ 296 | The description of the image and question list for you are as follows:\n\ 297 | Description: <caption>. \n Question: <original_question_list>. \n \ 298 | You MUST output the generated questions and answers in the following format:\n\ 299 | <Q1> {the generated question 1} </Q1> <C1> {"Yes", "No"} </C1> <A1> {the right choice of the question 1} </A1>\n\ 300 | <Q2> {the generated question 2} </Q2> <C2> {"Yes", "No"} </C2> <A2> {the right choice of the question 2} </A2>\n\ 301 | <Q3> {the generated question 3} </Q3> <C3> {"Yes", "No"} </C3> <A3> {the right choice of the question 3} </A3>\n' 302 | 303 | judge_prompt = '给出图像的描述和问题列表,你需要设计四个与<domain>相关的中文判断题。\n\ 304 | 对于每个样本,生成的问题的含义必须与提供的问题列表中的问题相似,并且你需要输出“是”或“否”作为问题的答案。\n\ 305 | 注意答案只能是“是”或“否”的其中之一,这个正确答案应该根据图像的描述生成。\n\ 306 | 图像相关信息("Empty"表示没有信息):<prior_knowledge> \n\ 307 | 描述:<caption>\n 问题:<question_templates>\n\ 308 | 我给你的问题<>里的内容是占位符,你需要进行根据图像相关信息和描述来生成。\n\ 309 | 你生成的四个判断题题目,应该保证其根据图像的描述生成的对应正确答案中的两个为“是”,另外两个为“否”。\n\ 310 | 答案为“否”的判断题题目,你可以随机生成一些错误但与图像相关信息和描述相关的词语。\n\ 311 | 最后,一定保证你生成的问题逻辑通顺、符合主题且与图像相关信息和描述相关。\n\ 312 | 你必须以下格式输出生成的问题、选项和答案:\n\ 313 | <Q1> {the generated question 1} </Q1> <C1> {"是", "否"} </C1> <A1> {"是" or "否"} </A1>\n\ 314 | <Q2> {the generated question 2} </Q2> <C2> {"是", "否"} </C2> <A2> {"是" or "否"} </A2>\n\ 315 | <Q3> {the generated question 3} </Q3> <C3> {"是", "否"} </C3> <A3> {"是" or "否"} </A3>\n\ 316 | <Q4> {the generated question 3} </Q4> <C4> {"是", "否"} </C4> <A4> {"是" or "否"} </A4>\n\ 317 | ' 318 | 319 | def generate_judge(domain, begin_ix=0): 320 | print("\n\n****start judge and answer working****\n\n") 321 | captions_path = f'{root_path}/{domain}/{domain}_caption.jsonl' 322 | generated_queations_path = f'{root_path}/{domain}/{domain}_judge.jsonl' 323 | seed_json = f'{root_path}/all_seed/{domain}.json' 324 | 325 | questions_model = [] 326 | with open(seed_json, "r", encoding='utf-8') as file: 327 | try: 328 | json_data = json.load(file) 329 | questions_model = json_data["judge"]["Chinese"] 330 | except: 331 | logger.info('读取问题种子失败') 332 | return 333 | 334 | ix = 0 335 | with open(captions_path, 'r', encoding='utf-8') as f: 336 | for line in f: 337 | ix += 1 338 | if ix < begin_ix: 339 | continue 340 | 341 | questions_model_list = random.sample(questions_model, min(3, len(questions_model))) 342 | caption_dict = json.loads(line) 343 | 344 | prompt = judge_prompt 345 | prompt = prompt.replace("<domain>", domain[6:]) 346 | prior_knowledge = str(caption_dict.get("bing_tag", 'Empty')) 347 | if prior_knowledge == "": 348 | prior_knowledge = "Empty" 349 | prompt = prompt.replace('<prior_knowledge>', prior_knowledge) 350 | prompt = prompt.replace("<caption>", caption_dict['gpt4v_caption_interface'].replace("\n\n","\n")) 351 | prompt = prompt.replace('<question_templates>', str(questions_model_list)) 352 | try: 353 | out = get_answer(prompt) 354 | question_dict = { 355 | "image_path": caption_dict['image_path'], 356 | "qa_raw": str(out), 357 | "gpt_prompt": prompt 358 | } 359 | open(generated_queations_path, 'a', encoding='utf-8').write( 360 | json.dumps(question_dict, ensure_ascii=False)+'\n' 361 | ) 362 | 363 | except Exception as e: 364 | logger.info(str(ix) + " [ERROR]") 365 | logger.info("error info:" + str(repr(e))) 366 | caption_dict['err'] = str(repr(e)) 367 | logger.info("error image path:" + caption_dict['image_path']) 368 | open(generated_queations_path, 'a', encoding='utf-8').write( 369 | json.dumps(caption_dict, ensure_ascii=False)+'\n' 370 | ) 371 | 372 | logger.info('****done****') 373 | logger.info("total generate " + str(ix) + " pairs. ") 374 | 375 | if __name__ == "__main__": 376 | domain = "poster" 377 | generate_choice(domain,begin_ix=0) 378 | -------------------------------------------------------------------------------- /data_engine/gpt4v_caption.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | from openai import OpenAI 4 | from PIL import Image 5 | import imghdr 6 | import base64 7 | import io 8 | import httpx 9 | import logging 10 | import time 11 | import os 12 | 13 | logging.basicConfig(level=logging.INFO) 14 | logger = logging.getLogger(__name__) 15 | 16 | gpt_keys = [ 17 | {"idx":0,"key":"openai-key-1"}, 18 | {"idx":1,"key":"openai-key-2"}, 19 | ] 20 | 21 | MAX_API_RETRY = len(gpt_keys) 22 | key_id = 0 23 | proxy_url = 'proxy_url' 24 | 25 | def list_to_str(tmp): 26 | res = '' 27 | for item in tmp: 28 | res += '\n' + str(item) 29 | return res 30 | 31 | def one_ask(text, image_paths, image_size=(512, 512), detail='low'): 32 | global key_id 33 | for i in range(MAX_API_RETRY): 34 | try: 35 | api_key = gpt_keys[key_id]['key'] 36 | proxy_url = proxy_url 37 | proxies = { 38 | "http://": f"{proxy_url}", 39 | "https://": f"{proxy_url}", 40 | } 41 | http_c = httpx.Client(proxies=proxies) 42 | client = OpenAI(api_key=api_key, http_client=http_c) 43 | 44 | content = [] 45 | content.append({"type": "text", "text": text}) 46 | for image in image_paths: 47 | image_type = imghdr.what(image) 48 | 49 | with Image.open(image) as img: 50 | # 缩略图 51 | if img.size[0] > image_size[0] or img.size[1] > image_size[1]: 52 | img.thumbnail(image_size, Image.LANCZOS) 53 | byte_stream = io.BytesIO() 54 | img.save(byte_stream, format=image_type) 55 | encoded_string = base64.b64encode(byte_stream.getvalue()).decode('utf-8') 56 | 57 | img_src_attr_value = f'data:image/{image_type};base64,{encoded_string}' 58 | content.append({"type": "image_url", "image_url": {"url": img_src_attr_value, "detail": detail}}) 59 | 60 | response = client.chat.completions.create( 61 | model="gpt-4-vision-preview", 62 | messages=[{"role": "user", "content": content}], 63 | max_tokens=4096, 64 | ) 65 | content = response.choices[0].message.content 66 | logger.info(text) 67 | logger.info(content) 68 | key_id += 1 69 | key_id = key_id % MAX_API_RETRY 70 | 71 | return content 72 | except Exception as e: 73 | key_id += 1 74 | key_id = key_id % MAX_API_RETRY 75 | logger.error('[error in one ask]:' + repr(e)) 76 | time.sleep(1.5) 77 | logger.error(f"Failed after {MAX_API_RETRY} retries.") 78 | return "error" 79 | 80 | 81 | caption_prompt = "Please describe the image for me in as much detail as possible. You need to generate a description of at least 120 words. If you can, identify what objects are present in the image." 82 | caption_prompt = "请尽可能详细描述这幅图像。你需要生成至少200字的描述。如果可以的话,识别图像中的物体。" 83 | 84 | caption_prompt_text = "这是一张图和图中的文字信息,文字信息内容为:<ocr_text>。根据图片本身和其中的文本内容理解这幅图,然后尽可能详细描述这幅图像,生成至少200字的描述。" 85 | caption_prompt_text = "This is an image accompanied by text information, with the content of the text being: <ocr_text>. Based on both the image itself and the text content, understand the image and then describe it as comprehensively as possible, generating a description of at least 200 words." 86 | 87 | def get_gpt4v_caption(img_folder, source_path, dest_path, begin_ix): 88 | with open(source_path, 'r', encoding='utf-8') as f: 89 | source_json = json.load(f) 90 | 91 | for ix, data in enumerate(source_json): 92 | if ix < begin_ix: 93 | continue 94 | 95 | logger.info("processing " + str(ix) +" total " + str(len(source_json))) 96 | try: 97 | prompt = caption_prompt 98 | if len(data['text']) >= 1: 99 | # if text in image, new prompt 100 | prompt = caption_prompt_text.replace("<ocr_text>",list_to_str(data['text'])) 101 | 102 | image_file = os.path.join(img_folder, data['image_path']) 103 | logger.info(image_file) 104 | gptout = one_ask(prompt, [image_file] ) # max 4 image at one ask 105 | 106 | new_item = data.copy() 107 | new_item['gpt4v_caption_interface'] = gptout 108 | new_item['gpt4v_prompt'] = prompt 109 | 110 | open(dest_path, 'a', encoding='utf-8').write( 111 | json.dumps(new_item, ensure_ascii=False)+'\n' 112 | ) 113 | time.sleep(1.5) 114 | except Exception as e: 115 | logger.error("[error]: " + str(repr(e))) 116 | 117 | 118 | if __name__ == '__main__': 119 | img_folder = 'poster' 120 | source_path = 'poster.jsonl' 121 | dest_path = 'poster_caption.jsonl' 122 | begin_ix = 0 123 | get_gpt4v_caption(img_folder, source_path, dest_path, begin_ix) 124 | logger.info("done.") 125 | -------------------------------------------------------------------------------- /data_engine/image_retrieval_bing_spider.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from lxml import etree 3 | import os 4 | from multiprocessing.dummy import Pool 5 | import json 6 | import time 7 | from PIL import Image 8 | 9 | class BingImagesSpider: 10 | thread_amount = 1000 11 | per_page_images = 30 12 | count = 0 13 | success_count = 0 14 | ignore_chars = ['|', '.', ',', ',', '', '', 15 | '/', '@', ':', ':', ';', ';', '[', ']', '+', ' - '] 16 | image_types = ['jpg', 'png','jpeg'] 17 | headers = { 18 | 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.83 Safari/537.36' 19 | } 20 | bing_image_url_pattern = 'https://www.bing.com/images/async?q={}&first={}&count={}&mmasync=1' 21 | 22 | def __init__(self, domain, keywords, amount, save_url, json_path): 23 | self.domain = domain 24 | self.json_path = json_path 25 | self.keywords = keywords 26 | self.keyword = None 27 | self.amount = amount 28 | self.path = save_url 29 | self.item_list = [] 30 | self.thread_pool = Pool(self.thread_amount) 31 | 32 | def __del__(self): 33 | self.thread_pool.close() 34 | self.thread_pool.join() 35 | 36 | def request_homepage(self, url): 37 | return requests.get(url, headers=self.headers) 38 | 39 | def parse_homepage_response(self, response): 40 | tree = etree.HTML(response.text) 41 | m_list = tree.xpath('//*[@class="imgpt"]/a/@m') 42 | 43 | info_list = [] 44 | for m in m_list: 45 | dic = json.loads(m) 46 | image_title = dic['t'] 47 | for char in self.ignore_chars: 48 | image_title = image_title.replace(char, ' ') 49 | image_title = image_title.replace( 50 | " ", " ").replace(" ", " ").strip() 51 | 52 | image_type = dic['murl'].split('.')[-1] 53 | if image_type not in self.image_types: 54 | image_type = 'jpg' 55 | 56 | info = dict() 57 | info['image_title'] = image_title 58 | info['image_type'] = image_type 59 | info['image_md5'] = dic['md5'] 60 | info['image_url'] = dic['murl'] 61 | 62 | info_list.append(info) 63 | return info_list 64 | 65 | def request_and_save_image(self, info): 66 | try: 67 | bing_tag = info['image_title'] 68 | filename = '{}.{}'.format( 69 | self.domain + '_' + str(int(time.time() * 1e6))[-10:], info['image_type']) 70 | filepath = os.path.join(self.path, filename) 71 | 72 | response = requests.get(info['image_url'], headers=self.headers, timeout=1.5) 73 | with open(filepath, 'wb') as fp: 74 | fp.write(response.content) 75 | 76 | self.count += 1 77 | self.success_count += 1 78 | self.item_list.append({ 79 | 'image_path': filename, 80 | 'bing_tag': bing_tag, 81 | 'retrieval_keyword': self.keyword, 82 | 'source': "bing", 83 | }) 84 | 85 | except Exception as e: 86 | self.count += 1 87 | 88 | def deduplication(self, info_list): 89 | result = [] 90 | md5_set = set() 91 | for info in info_list: 92 | if info['image_md5'] not in md5_set: 93 | result.append(info) 94 | md5_set.add(info['image_md5']) 95 | return result 96 | 97 | def run_all(self): 98 | print("*** spider ***") 99 | if not os.path.exists(self.path): 100 | os.mkdir(self.path) 101 | 102 | self.keyword = None 103 | self.item_list = [] 104 | for keyword in self.keywords: 105 | self.keyword = keyword 106 | print(f'keyword: {keyword}') 107 | self.run() 108 | time.sleep(5) 109 | 110 | print('done, save total ' + 111 | str(len(self.item_list)) + ' images.') 112 | with open(self.json_path, 'a', encoding='utf-8') as output_file: 113 | for item in self.item_list: 114 | output_file.write(json.dumps(item, ensure_ascii=False) + '\n') 115 | 116 | def run(self): 117 | homepage_urls = [] 118 | for i in range(int(self.amount/self.per_page_images * 3) + 1): 119 | url = self.bing_image_url_pattern.format( 120 | self.keyword, i*self.per_page_images, self.per_page_images) 121 | homepage_urls.append(url) 122 | print('homepage_urls len {}'.format(len(homepage_urls))) 123 | 124 | homepage_responses = self.thread_pool.map( 125 | self.request_homepage, homepage_urls) 126 | 127 | info_list = [] 128 | for response in homepage_responses: 129 | try: 130 | result = self.parse_homepage_response(response) 131 | info_list += result 132 | except Exception as e: 133 | pass 134 | print('info amount before deduplication', len(info_list)) 135 | 136 | info_list = self.deduplication(info_list) 137 | print('info amount after deduplication', len(info_list)) 138 | info_list = info_list[: self.amount] 139 | print('info amount after split', len(info_list)) 140 | 141 | self.thread_pool.map(self.request_and_save_image, info_list) 142 | 143 | print('{} done. Total {} successfully downloaded, {} failed.'.format(self.keyword, 144 | self.success_count, self.count - self.success_count)) 145 | 146 | 147 | def read_keywords(file_path): 148 | with open(file_path, 'r', encoding='utf-8') as file: 149 | lines = file.readlines() 150 | lines = [line.strip() for line in lines] 151 | return list(set(lines)) 152 | 153 | def remove_broken(image_path): 154 | images = os.listdir(image_path) 155 | i = 0 156 | for image_name in images: 157 | try: 158 | image = Image.open(image_path + "/" + image_name) 159 | except Exception as e: 160 | i+= 1 161 | os.remove(image_path + "/" + image_name) 162 | continue 163 | print("remove ", i," images") 164 | 165 | 166 | if __name__ == "__main__": 167 | root_path = 'MMInstruct' 168 | domain_list = ["poster"] 169 | os.makedirs(os.path.join(root_path, 'bing_images'), exist_ok=True) 170 | os.makedirs(os.path.join(root_path, 'bing_images/json'), exist_ok=True) 171 | for domain in domain_list: 172 | keywords = read_keywords(root_path + "keywords/" + domain + ".txt") 173 | print(f'keywords: {keywords}') 174 | count = 15 175 | save_path = os.path.join(root_path, 'bing_images', domain) 176 | os.makedirs(save_path, exist_ok=True) 177 | json_path = root_path + '/bing_images/json/' + domain + '.jsonl' 178 | spider = BingImagesSpider(domain, keywords, count, save_path, json_path) 179 | spider.run_all() 180 | 181 | remove_broken(save_path) 182 | 183 | print('done all.') 184 | -------------------------------------------------------------------------------- /data_engine/image_retrieval_clip.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import json 4 | import tqdm 5 | from clip_retrieval.clip_client import ClipClient, Modality 6 | import requests 7 | 8 | root_path = 'MMInstruct' 9 | domain_list = os.listdir(root_path + '/images') 10 | domain_list = sorted(domain_list, key=str.lower) 11 | istart_list = [0]*len(domain_list) 12 | os.makedirs(root_path + '/clip_retrieval_images', exist_ok=True) 13 | os.makedirs(root_path + '/clip_retrieval_images/json', exist_ok=True) 14 | 15 | for ix, domain in enumerate(domain_list): 16 | in_images_path = os.path.join(root_path, "source_domain", domain, "images" ) 17 | in_images_list = [i for i in os.listdir(in_images_path) if i.endswith('.jpg') or i.endswith('.png')] 18 | out_images_dir = os.path.join(root_path, "clip_retrieval_images", domain) 19 | out_json_path = os.path.join(root_path, "clip_retrieval_images/json", domain + ".jsonl") 20 | err_json_path = os.path.join(root_path, "clip_retrieval_images/json", domain + "_err.jsonl") 21 | 22 | client = ClipClient(url="https://knn.laion.ai/knn-service", indice_name="laion5B-L-14", num_images=200) 23 | 24 | if not os.path.exists(out_images_dir): 25 | os.makedirs(out_images_dir) 26 | message = f"The directory '{out_images_dir}' has been created." 27 | else: 28 | message = f"The directory '{out_images_dir}' already exists." 29 | 30 | for i in tqdm.tqdm(range(len(in_images_list))): 31 | if i < istart_list[ix]: 32 | continue 33 | 34 | aug_item = {'image_path': in_images_list[i]} 35 | image_path = os.path.join(in_images_path, in_images_list[i]) 36 | aug_item["retrieval"] = [] 37 | 38 | try: 39 | results = client.query(image=image_path) 40 | except Exception as e: 41 | print(repr(e)) 42 | open(err_json_path, 'a', encoding='utf-8').write(json.dumps(aug_item, ensure_ascii=False)+'\n') 43 | continue 44 | 45 | count = 0 46 | for i, item in enumerate(results): 47 | try: 48 | url = item['url'] 49 | file_path = out_images_dir + '/{}.jpg'.format(item['id']) 50 | response = requests.get(url, timeout=5) 51 | except: 52 | print("Skip fig {}".format(item['id'])) 53 | continue 54 | if response.status_code == 200: 55 | with open(file_path, 'wb') as file: 56 | file.write(response.content) 57 | print(f'download {file_path}') 58 | aug_item["retrieval"].append({"image_path":file_path, "caption":item['caption'], "similarity":item['similarity']}) 59 | count += 1 60 | else: 61 | print('HTTP Error:', response.status_code) 62 | aug_item["count"] = count 63 | open(out_json_path, 'a', encoding='utf-8').write(json.dumps(aug_item, ensure_ascii=False)+'\n') 64 | -------------------------------------------------------------------------------- /figs/data-engine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/figs/data-engine.png -------------------------------------------------------------------------------- /figs/example_in_domain.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/figs/example_in_domain.pdf -------------------------------------------------------------------------------- /figs/example_in_domain.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yuecao0119/MMInstruct/eb6aacbcc64b1880092de22142eeec0ad7f2422d/figs/example_in_domain.png -------------------------------------------------------------------------------- /train_dataset_for_llava.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | 4 | import json 5 | import logging 6 | import pathlib 7 | import torch 8 | import random 9 | import transformers 10 | import tokenizers 11 | from PIL import Image 12 | from typing import Dict, Optional, Sequence, List 13 | from dataclasses import dataclass, field 14 | from torch.utils.data import Dataset 15 | local_rank = None 16 | 17 | 18 | def rank0_print(*args): 19 | if local_rank == 0: 20 | print(*args) 21 | 22 | @dataclass 23 | class DataArguments: 24 | data_path: str = field(default=None, 25 | metadata={"help": "Path to the training data."}) 26 | lazy_preprocess: bool = False 27 | is_multimodal: bool = False 28 | image_folder: Optional[str] = field(default=None) 29 | image_aspect_ratio: str = 'square' 30 | more_data: Optional[str] = field(default=None) # new add 31 | 32 | 33 | class LazySupervisedDataset(Dataset): 34 | """Dataset for supervised fine-tuning.""" 35 | 36 | def __init__(self, data_path: str, 37 | tokenizer: transformers.PreTrainedTokenizer, 38 | data_args: DataArguments): 39 | super(LazySupervisedDataset, self).__init__() 40 | list_data_dict = json.load(open(data_path, "r")) 41 | rank0_print(f"Total count of list_data_dict load from {data_path}: {len(list_data_dict)}") 42 | 43 | # new add 44 | if data_args.more_data is not None and data_args.more_data != "": 45 | rank0_print("Append more data.") 46 | more_data_dict = self.load_self_defined_data(data_args.more_data) 47 | list_data_dict += more_data_dict 48 | rank0_print(f"Total count of list_data_dict after append data.: {len(list_data_dict)}") 49 | 50 | rank0_print("Formatting inputs...Skip in lazy mode") 51 | 52 | self.tokenizer = tokenizer 53 | self.list_data_dict = list_data_dict 54 | self.data_args = data_args 55 | 56 | def __len__(self): 57 | return len(self.list_data_dict) 58 | 59 | @property 60 | def lengths(self): 61 | length_list = [] 62 | for sample in self.list_data_dict: 63 | img_tokens = 128 if 'image' in sample else 0 64 | length_list.append(sum(len(conv['value'].split()) for conv in sample['conversations']) + img_tokens) 65 | return length_list 66 | 67 | @property 68 | def modality_lengths(self): 69 | length_list = [] 70 | for sample in self.list_data_dict: 71 | try: 72 | cur_len = sum(len(conv['value'].split()) for conv in sample['conversations']) 73 | cur_len = cur_len if 'image' in sample else -cur_len 74 | length_list.append(cur_len) 75 | except Exception as e: 76 | rank0_print(f'modality_lengths line 701 {repr(e)}') 77 | rank0_print(sample) 78 | raise e 79 | 80 | return length_list 81 | 82 | def __getitem__(self, i) -> Dict[str, torch.Tensor]: 83 | image_folder = None 84 | image_file = None 85 | flag = False 86 | while not flag: 87 | try: 88 | sources = self.list_data_dict[i] 89 | if isinstance(i, int): 90 | sources = [sources] 91 | assert len(sources) == 1, "Don't know why it is wrapped to a list" # FIXME 92 | if 'image' in sources[0]: 93 | image_file = self.list_data_dict[i]['image'] 94 | image_folder = self.data_args.image_folder 95 | processor = self.data_args.image_processor 96 | image = Image.open(os.path.join(image_folder, image_file)).convert('RGB') 97 | if self.data_args.image_aspect_ratio == 'pad': 98 | def expand2square(pil_img, background_color): 99 | width, height = pil_img.size 100 | if width == height: 101 | return pil_img 102 | elif width > height: 103 | result = Image.new(pil_img.mode, (width, width), background_color) 104 | result.paste(pil_img, (0, (width - height) // 2)) 105 | return result 106 | else: 107 | result = Image.new(pil_img.mode, (height, height), background_color) 108 | result.paste(pil_img, ((height - width) // 2, 0)) 109 | return result 110 | 111 | image = expand2square(image, tuple(int(x * 255) for x in processor.image_mean)) 112 | image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] 113 | else: 114 | image = processor.preprocess(image, return_tensors='pt')['pixel_values'][0] 115 | sources = preprocess_multimodal( 116 | copy.deepcopy([e["conversations"] for e in sources]), 117 | self.data_args 118 | ) 119 | else: 120 | sources = copy.deepcopy([e["conversations"] for e in sources]) 121 | 122 | data_dict = preprocess( 123 | sources, 124 | self.tokenizer, 125 | has_image=('image' in self.list_data_dict[i]) 126 | ) 127 | if isinstance(i, int): 128 | data_dict = dict( 129 | input_ids=data_dict["input_ids"][0], 130 | labels=data_dict["labels"][0] 131 | ) 132 | 133 | # image exist in the data 134 | if 'image' in self.list_data_dict[i]: 135 | data_dict['image'] = image 136 | elif self.data_args.is_multimodal: 137 | # image does not exist in the data, but the model is multimodal 138 | crop_size = self.data_args.image_processor.crop_size 139 | data_dict['image'] = torch.zeros(3, crop_size['height'], crop_size['width']) 140 | 141 | flag = True 142 | except Exception as e: 143 | rank0_print(f"{repr(e)} image file can't open {image_folder} {image_file}") 144 | i = random.randint(0, len(self.list_data_dict) - 1) 145 | 146 | return data_dict 147 | 148 | def get_json_files(self, data_dir): 149 | json_files = [] 150 | # 递归遍历目录,可以读到链接文件 151 | for root, dirs, files in os.walk(data_dir, followlinks=True): 152 | for file in files: 153 | if file.endswith('.json'): 154 | json_files.append(os.path.join(root, file)) 155 | return json_files 156 | 157 | def load_self_defined_data(self, data_dir): 158 | more_data_dict = [] 159 | json_files = None 160 | if data_dir.endswith('.json'): 161 | json_files = [data_dir] 162 | else: 163 | json_files = self.get_json_files(data_dir) 164 | 165 | for more_data_path in json_files: 166 | more_data = json.load(open(more_data_path, "r")) 167 | rank0_print(f"Count of {more_data_path}: {len(more_data)}") 168 | # rank0_print(more_data[0]) 169 | 170 | more_data_dict += more_data 171 | 172 | rank0_print(f"Total json file {len(json_files)}") 173 | rank0_print(f"Total Count {len(more_data_dict)}") 174 | rank0_print(type(more_data_dict)) 175 | rank0_print(more_data_dict[0]) 176 | return more_data_dict 177 | --------------------------------------------------------------------------------