├── __init__.py ├── figures ├── Workflow.png └── Capabilities.png ├── utils ├── __init__.py ├── testf │ ├── __init__.py │ ├── test_types.py │ └── functions.py └── types.py ├── methods ├── __init__.py ├── generator.py ├── agent.py ├── evaluator.py ├── collect_result.py ├── evaluate.py └── metric.py ├── setup.py ├── prompts ├── prompt.py ├── __init__.py ├── refresh_table.txt ├── map_text.txt ├── refresh_text.txt ├── map_table.txt ├── add_text.txt ├── add_table.txt ├── add_vector.txt ├── revise_text.txt ├── delete_vector.txt ├── map_vector.txt ├── refresh_vector.txt └── revise_table.txt ├── requirements.txt ├── LICENSE ├── evaluation.py ├── .gitignore └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /figures/Workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eason-Li-AIS/DrafterBench/HEAD/figures/Workflow.png -------------------------------------------------------------------------------- /figures/Capabilities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Eason-Li-AIS/DrafterBench/HEAD/figures/Capabilities.png -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | # ./testf stores the dual functions designed to record factual operations when executing the generated codes 2 | -------------------------------------------------------------------------------- /methods/__init__.py: -------------------------------------------------------------------------------- 1 | task_sets = [ 2 | "add_table", 3 | "revise_table", 4 | "map_table", 5 | "refresh_table", 6 | "add_text", 7 | "revise_text", 8 | "map_text", 9 | "refresh_text", 10 | "add_vector", 11 | "delete_vector", 12 | "map_vector", 13 | "refresh_vector", 14 | ] 15 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | with open("requirements.txt") as f: 4 | requirements = f.read().splitlines() 5 | 6 | setup( 7 | name="DrafterBench", 8 | version="0.1.0", 9 | description="A benchmark evaluates LLMs' performance in automating drawing revision tasks.", 10 | author="Yinsheng Li, Zhen Dong, Yi Shao", 11 | author_email="yinsheng.li@mail.mcgill.ca", 12 | packages=find_packages(), 13 | python_requires=">=3.11,<3.12", 14 | install_requires=requirements, 15 | ) 16 | -------------------------------------------------------------------------------- /prompts/prompt.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.append(os.path.abspath("../..")) 5 | 6 | from DrafterBench.prompts import Backend_prompt 7 | 8 | 9 | class Prompt: 10 | def __init__( 11 | self, 12 | task_index: str, 13 | user_instruction: str, 14 | ): 15 | self.system = Backend_prompt[task_index] 16 | self.user_instruction = user_instruction 17 | 18 | def message(self): 19 | return [ 20 | {"role": "system", "content": self.system}, 21 | {"role": "user", "content": self.user_instruction}, 22 | ] 23 | -------------------------------------------------------------------------------- /utils/testf/__init__.py: -------------------------------------------------------------------------------- 1 | # Dual functions/tools 2 | 3 | import sys 4 | import os 5 | 6 | sys.path.append(os.path.abspath('../..')) 7 | 8 | from DrafterBench.utils.testf.test_types import ( 9 | fileobject, 10 | TEXT_ALIGN_LEFT, 11 | TEXT_ALIGN_RIGHT, 12 | TEXT_ALIGN_JUSTIFY, 13 | TEXT_ALIGN_CENTER, 14 | ) 15 | from DrafterBench.utils.testf.functions import ( 16 | open, 17 | extractanno, 18 | selector, 19 | select_from_drawings, 20 | manipulate_text, 21 | extract_table, 22 | manipulate_table, 23 | draw_drawer, 24 | delete, 25 | repairer, 26 | manipulate_draw, 27 | recorder, 28 | Projector, 29 | project_draw, 30 | ) 31 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pymupdf==1.24.3 2 | numpy 3 | argparse 4 | sentence-transformers==4.1.0 5 | multiprocess 6 | datasets==3.6.0 7 | litellm==1.72.2 8 | frontend==0.0.3 9 | huggingface-hub==0.32.4 10 | transformers==v4.52.4 11 | aiohttp==3.12.11 12 | openai==1.81.0 13 | python-dotenv==1.1.0 14 | tiktoken 15 | importlib-metadata==7.0 16 | importlib_resources==6.4.0 17 | cohere 18 | redis==5.2.1 19 | redisvl==0.4.1 20 | anthropic==0.53.0 21 | orjson==3.10.12 22 | pydantic==2.10.2 23 | pydantic-settings==2.9.1 24 | google-cloud-aiplatform==1.43.0 25 | fastapi-sso==0.16.0 26 | uvloop==0.21.0 27 | mcp==1.5.0 28 | packaging==23.2 29 | torch==2.2.1 30 | torchvision==0.17.1 31 | tokenizers==0.21.1 32 | oauthlib==3.2.2 33 | hf-xet==1.1.3 34 | -------------------------------------------------------------------------------- /prompts/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | def open_file(filepath): 5 | with open(filepath, "r", encoding="utf-8") as file: 6 | content = file.read() 7 | return content 8 | 9 | 10 | prefix = os.path.abspath(".") 11 | 12 | Backend_prompt = { 13 | "1": open_file(prefix + "/prompts/add_table.txt"), 14 | "2": open_file(prefix + "/prompts/revise_table.txt"), 15 | "3": open_file(prefix + "/prompts/map_table.txt"), 16 | "4": open_file(prefix + "/prompts/refresh_table.txt"), 17 | "5": open_file(prefix + "/prompts/add_text.txt"), 18 | "6": open_file(prefix + "/prompts/revise_text.txt"), 19 | "7": open_file(prefix + "/prompts/map_text.txt"), 20 | "8": open_file(prefix + "/prompts/refresh_text.txt"), 21 | "9": open_file(prefix + "/prompts/add_vector.txt"), 22 | "10": open_file(prefix + "/prompts/delete_vector.txt"), 23 | "11": open_file(prefix + "/prompts/map_vector.txt"), 24 | "12": open_file(prefix + "/prompts/refresh_vector.txt"), 25 | } 26 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Eason-Li-AIS 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /methods/generator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import copy 4 | import sys 5 | import os 6 | 7 | sys.path.append(os.path.abspath("..")) 8 | 9 | from DrafterBench.methods import task_sets 10 | from DrafterBench.methods.agent import Drafter_agent 11 | from DrafterBench.prompts.prompt import Prompt 12 | 13 | 14 | def openfile(file): 15 | f = open(file, "r", encoding="utf-8") 16 | content = json.load(f) 17 | return content 18 | 19 | 20 | def savedate(data, jsonpath): 21 | with open(jsonpath, "w", encoding="utf-8") as w: 22 | json.dump(data, w, indent=4) 23 | 24 | 25 | def generator(model, model_provider, temperature, vllm_url, max_length, response_results, result_path, task): 26 | if len(response_results) <= max_length: 27 | agent = Drafter_agent( 28 | model, model_provider, temperature, vllm_url 29 | ) 30 | indx = task_sets.index(task["Tasktype"]) + 1 31 | prompt = Prompt(str(indx), task["Instruction"]) 32 | pre_code = agent.get_response(messages=prompt.message()) 33 | ground_code = task["Groundtruth"] 34 | response = copy.deepcopy(task) 35 | response.update({"Response_code": pre_code}) 36 | response_results.append(copy.deepcopy(response)) 37 | savedate(list(response_results), result_path) 38 | return response_results 39 | -------------------------------------------------------------------------------- /utils/types.py: -------------------------------------------------------------------------------- 1 | class Score_builder: 2 | def ground_fill(self, ground_details): 3 | self.total_score = 0 4 | ( 5 | self.total_arguments, 6 | self.total_variables_transfer, 7 | self.total_function_calls, 8 | self.total_single_tool_calls, 9 | self.total_multi_tool_calls, 10 | self.total_plan_execution, 11 | ) = ground_details 12 | return self 13 | 14 | def result(self, point: float, code_details, plan_details): 15 | self.total_score = point 16 | self.arguments_count = code_details[0] 17 | self.variables_transfer_count = code_details[1] 18 | self.function_calls_count = code_details[2] 19 | self.single_tool_calls_count = code_details[3] 20 | self.multi_tool_calls_count = code_details[4] 21 | self.intersected_plan_count = code_details[5] 22 | self.all_plan = plan_details[5] 23 | return self 24 | 25 | def fail(self): 26 | self.total_score = 0 27 | self.arguments_count = 0 28 | self.variables_transfer_count = 0 29 | self.function_calls_count = 0 30 | self.single_tool_calls_count = 0 31 | self.multi_tool_calls_count = 0 32 | self.intersected_plan_count = 0 33 | self.all_plan = self.total_plan_execution 34 | return self 35 | -------------------------------------------------------------------------------- /methods/agent.py: -------------------------------------------------------------------------------- 1 | import re 2 | import litellm 3 | from litellm import completion 4 | from typing import List, Dict, Any 5 | from openai import OpenAI 6 | 7 | # litellm._turn_on_debug() 8 | 9 | class Drafter_agent: 10 | def __init__( 11 | self, 12 | model: str, 13 | provider: str, 14 | temperature: float = 0.0, 15 | vllm_url: str = None, 16 | ): 17 | self.model = model 18 | self.provider = provider 19 | self.temperature = temperature 20 | self.vllm_url = vllm_url 21 | 22 | def get_response(self, messages: List[Dict[str, Any]]): 23 | generated_code = None 24 | trail = 0 25 | while not generated_code and trail < 10: 26 | if not self.vllm_url: 27 | res = completion( 28 | model=self.model, 29 | messages=messages, 30 | temperature=self.temperature, 31 | ) 32 | else: 33 | client = OpenAI( 34 | base_url=self.vllm_url # litellm-proxy-base url 35 | ) 36 | 37 | res = client.chat.completions.create( 38 | model=self.model, 39 | messages=messages, 40 | max_tokens=2500, 41 | temperature=self.temperature 42 | ) 43 | response = res.choices[0].message.content 44 | code_search = re.search(r"`python\s*([^`]+)`", response) 45 | generated_code = code_search.group(1) if code_search else None 46 | trail += 1 47 | return generated_code if generated_code else "Fail to generate code" 48 | -------------------------------------------------------------------------------- /methods/evaluator.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import json 3 | import sys 4 | import os 5 | 6 | sys.path.append(os.path.abspath("..")) 7 | 8 | from DrafterBench.methods import metric 9 | from DrafterBench.utils.types import Score_builder 10 | from DrafterBench.methods.collect_result import collect_result, process_code, execute_code 11 | 12 | def openfile(file): 13 | f = open(file, "r", encoding="utf-8") 14 | content = json.load(f) 15 | return content 16 | 17 | 18 | def savedate(data, jsonpath): 19 | with open(jsonpath, "w", encoding="utf-8") as w: 20 | json.dump(data, w, ensure_ascii=False, indent=4) 21 | 22 | 23 | def evaluator(result_path, eval_result, response_result): 24 | test_code = response_result["Response_code"] 25 | test_code = process_code(test_code) 26 | test_info = execute_code(test_code) 27 | ground_code = response_result["Groundtruth"] 28 | ground_code = process_code(ground_code) 29 | ground_info = execute_code(ground_code) 30 | ground_details = metric.ground_check(ground_info) 31 | prompt_score = ( 32 | Score_builder() 33 | .ground_fill(ground_details) 34 | .result( 35 | *metric.cross_check( 36 | ground_info, 37 | test_info, 38 | ( 39 | "precise" 40 | if response_result["Precise|Vague"] == "Precise" 41 | else "vaguely" 42 | ), 43 | ) 44 | ) 45 | if test_info 46 | else Score_builder().ground_fill(ground_details).fail() 47 | ) 48 | response_result.update(collect_result(prompt_score)) 49 | eval_result.append(response_result) 50 | eval_data = list(eval_result) 51 | savedate(eval_data, result_path.replace(".json", "_score.json")) 52 | return eval_result 53 | -------------------------------------------------------------------------------- /evaluation.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import time 4 | import sys 5 | from litellm import provider_list 6 | from methods.evaluate import evaluate 7 | 8 | sys.path.append(os.path.abspath("..")) 9 | 10 | def parse_args(): 11 | parser = argparse.ArgumentParser() 12 | parser.add_argument( 13 | "--model", 14 | type=str, 15 | help="The model to use for the agent", 16 | ) 17 | parser.add_argument( 18 | "--model-provider", 19 | type=str, 20 | choices=provider_list, 21 | help="The model provider for the agent", 22 | ) 23 | parser.add_argument( 24 | "--temperature", 25 | type=float, 26 | default=0.0, 27 | help="The temperature for the agent", 28 | ) 29 | parser.add_argument("--exp_name", type=str, default=f"default_name") 30 | parser.add_argument("--result_dir", type=str, default="results/") 31 | parser.add_argument("--proc_num", type=int, default=16) 32 | parser.add_argument("--debug", type=bool, default=False) 33 | parser.add_argument( 34 | "--task_group", 35 | type=str, 36 | choices=[ 37 | "Structured", 38 | "Unstructured", 39 | "Precise", 40 | "Vague", 41 | "Complete", 42 | "Error", 43 | "Single_Object", 44 | "Multiple_Objects", 45 | "Single_Operation", 46 | "Multiple_Operations", 47 | "All", 48 | ], 49 | default="All", 50 | ) 51 | parser.add_argument("--huggingface_user_name", type=str, default=None) 52 | parser.add_argument("--huggingface_private", type=bool, default=True) 53 | parser.add_argument("--vllm_url", type=str, default=None) 54 | parser.add_argument("--resume_from", type=str, default=None) 55 | parser.add_argument("--auto_resume", type=bool, default=False) 56 | args = parser.parse_args() 57 | 58 | return args 59 | 60 | 61 | def main(): 62 | starttime = time.time() 63 | args = parse_args() 64 | evaluate(args) 65 | endtime = time.time() 66 | timespan = endtime - starttime 67 | print(f"Benchmark finsihed in {timespan} s.") 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /methods/collect_result.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import re 4 | import signal 5 | import copy 6 | 7 | sys.path.append(os.path.abspath("..")) 8 | 9 | from DrafterBench.utils.types import Score_builder 10 | from DrafterBench.utils import testf 11 | 12 | 13 | def timeout_handler(signum, frame): 14 | raise TimeoutError("Code execution timed out.") 15 | 16 | 17 | def execute_code(code_string): 18 | try: 19 | testf.functions.taskinformation.clear() 20 | variables = {} 21 | signal.signal(signal.SIGALRM, timeout_handler) 22 | signal.alarm(5) 23 | exec(code_string, variables) 24 | signal.alarm(0) 25 | code_information = copy.deepcopy(variables["testf"].functions.taskinformation) 26 | except Exception as e: 27 | code_information = [] 28 | return code_information 29 | 30 | 31 | def process_code(pre_code): 32 | test_code = re.sub( 33 | "import PDFbf|import fitz", 34 | "from DrafterBench.utils import testf", 35 | pre_code, 36 | ) 37 | test_code = re.sub("PDFbf|fitz", "testf", test_code) 38 | return test_code 39 | 40 | 41 | def collect_result( 42 | code_quality: Score_builder, 43 | ): 44 | return { 45 | "Task_score": { 46 | "Task_score": code_quality.total_score, 47 | "Success_arguments_define": code_quality.arguments_count, 48 | "Total_arguments_define": code_quality.total_arguments, 49 | "Success_variable_transfer": code_quality.variables_transfer_count, 50 | "Total_variable_transfer": code_quality.total_variables_transfer, 51 | "Success_function_calling": code_quality.function_calls_count, 52 | "Total_function_calling": code_quality.total_function_calls, 53 | "Success_single_tool_selection": code_quality.single_tool_calls_count, 54 | "Total_single_tool_selection": code_quality.total_single_tool_calls, 55 | "Success_multi_tool_selection": code_quality.multi_tool_calls_count, 56 | "Total_multi_tool_selection": code_quality.total_multi_tool_calls, 57 | "Intersected_plan_execution": code_quality.intersected_plan_count, 58 | "Total_plans_appeared": code_quality.all_plan, 59 | "Ground_plan_execution": code_quality.total_plan_execution, 60 | }, 61 | } 62 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # UV 98 | # Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | #uv.lock 102 | 103 | # poetry 104 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 105 | # This is especially recommended for binary packages to ensure reproducibility, and is more 106 | # commonly ignored for libraries. 107 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 108 | #poetry.lock 109 | 110 | # pdm 111 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 112 | #pdm.lock 113 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 114 | # in version control. 115 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 116 | .pdm.toml 117 | .pdm-python 118 | .pdm-build/ 119 | 120 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 121 | __pypackages__/ 122 | 123 | # Celery stuff 124 | celerybeat-schedule 125 | celerybeat.pid 126 | 127 | # SageMath parsed files 128 | *.sage.py 129 | 130 | # Environments 131 | .env 132 | .venv 133 | env/ 134 | venv/ 135 | ENV/ 136 | env.bak/ 137 | venv.bak/ 138 | 139 | # Spyder project settings 140 | .spyderproject 141 | .spyproject 142 | 143 | # Rope project settings 144 | .ropeproject 145 | 146 | # mkdocs documentation 147 | /site 148 | 149 | # mypy 150 | .mypy_cache/ 151 | .dmypy.json 152 | dmypy.json 153 | 154 | # Pyre type checker 155 | .pyre/ 156 | 157 | # pytype static type analyzer 158 | .pytype/ 159 | 160 | # Cython debug symbols 161 | cython_debug/ 162 | 163 | # PyCharm 164 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 165 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 166 | # and can be added to the global gitignore or merged into this file. For a more nuclear 167 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 168 | #.idea/ 169 | 170 | # Ruff stuff: 171 | .ruff_cache/ 172 | 173 | # PyPI configuration file 174 | .pypirc 175 | 176 | result/ 177 | -------------------------------------------------------------------------------- /utils/testf/test_types.py: -------------------------------------------------------------------------------- 1 | import string 2 | import random 3 | import pymupdf 4 | from collections import defaultdict 5 | 6 | fontlist = [ 7 | "Lucida Console", 8 | "lucida console", 9 | "Tahoma", 10 | "tahoma", 11 | "georgia", 12 | "Georgia", 13 | "Arial", 14 | "verdana", 15 | "Verdana", 16 | "arial", 17 | "helv", 18 | "Times New Roman", 19 | "Times-roman", 20 | "times new roman", 21 | "courier", 22 | "Courier", 23 | "courier-oblique", 24 | "Courier-Oblique", 25 | "courier-bold", 26 | "Courier-Bold", 27 | "courier-boldoblique", 28 | "Courier-BoldOblique", 29 | "helvetica", 30 | "Helvetica", 31 | "helvetica-oblique", 32 | "Helvetica-Oblique", 33 | "helvetica-bold", 34 | "Helvetica-Bold", 35 | "helvetica-boldoblique", 36 | "Helvetica-BoldOblique", 37 | "times-roman", 38 | "Times-Roman", 39 | "times-italic", 40 | "Times-Italic", 41 | "times-bold", 42 | "Times-Bold", 43 | "times-bolditalic", 44 | "Times-BoldItalic", 45 | "symbol", 46 | "Symbol", 47 | "zapfdingbats", 48 | "ZapfDingbats", 49 | "helv", 50 | "Helvetica", 51 | "heit", 52 | "Helvetica-Oblique", 53 | "hebo", 54 | "Helvetica-Bold", 55 | "hebi", 56 | "Helvetica-BoldOblique", 57 | "cour", 58 | "Courier", 59 | "coit", 60 | "Courier-Oblique", 61 | "cobo", 62 | "Courier-Bold", 63 | "cobi", 64 | "Courier-BoldOblique", 65 | "tiro", 66 | "Times-Roman", 67 | "tibo", 68 | "Times-Bold", 69 | "tiit", 70 | "Times-Italic", 71 | "tibi", 72 | "Times-BoldItalic", 73 | "symb", 74 | "Symbol", 75 | "zadb", 76 | "ZapfDingbats", 77 | "Calibri", 78 | "calibri", 79 | "helv-bold", 80 | "helv-light", 81 | "Arial Bold", 82 | "Courier New", 83 | ] 84 | 85 | TEXT_ALIGN_CENTER = pymupdf.TEXT_ALIGN_CENTER 86 | TEXT_ALIGN_RIGHT = pymupdf.TEXT_ALIGN_RIGHT 87 | TEXT_ALIGN_JUSTIFY = pymupdf.TEXT_ALIGN_JUSTIFY 88 | TEXT_ALIGN_LEFT = pymupdf.TEXT_ALIGN_LEFT 89 | 90 | 91 | def generate_random_string(length): 92 | all_chars = string.ascii_letters + string.digits 93 | random_string = "".join(random.choice(all_chars) for _ in range(length)) 94 | return random_string 95 | 96 | 97 | def file_format(filepath: any): 98 | if filepath in ["missing", "Missing"]: 99 | validity = True 100 | elif type(filepath) is str and filepath[-4:] == ".pdf": 101 | validity = True 102 | else: 103 | validity = False 104 | fileformat = { 105 | "filepath": (filepath, validity), 106 | } 107 | return defaultdict(list, fileformat) 108 | 109 | 110 | class fileobject: 111 | def __init__(self, filepath: any = "Unknown"): 112 | self.fileblock = file_format(filepath) 113 | 114 | def save(self, new_path: any = "Unkown"): 115 | # default frame 116 | save_format = { 117 | "save_path": ( 118 | new_path, 119 | True if type(new_path) is str and new_path[-4:] == ".pdf" else False, 120 | ), 121 | } 122 | 123 | self.fileblock["save"].append(save_format) 124 | # save actions 125 | if save_format["save_path"][1]: 126 | if self.fileblock["change_maker"]: 127 | for a, _ in enumerate(self.fileblock["change_maker"]): 128 | try: 129 | self.fileblock["change_maker"][a]["save"] = True 130 | for tool in self.fileblock["change_maker"][a]["tool_callings"]: 131 | tool["save"] = True 132 | except Exception as e: 133 | continue 134 | for a, _ in enumerate(self.fileblock["post_change_maker"]): 135 | try: 136 | self.fileblock["post_change_maker"][a]["save"] = True 137 | for tool in self.fileblock["post_change_maker"][a][ 138 | "tool_callings" 139 | ]: 140 | tool["save"] = True 141 | except Exception as e: 142 | continue 143 | for a, _ in enumerate(self.fileblock["recorder"]): 144 | try: 145 | self.fileblock["recorder"][a]["save"] = True 146 | for tool in self.fileblock["recorder"][a]["tool_callings"]: 147 | tool["save"] = True 148 | except Exception as e: 149 | continue 150 | 151 | 152 | class extrracted_anno: 153 | def __init__( 154 | self, 155 | doc: tuple = ("Unknown", False), 156 | page: tuple = ("Unknown", False), 157 | order_or_annocolor: tuple = ("Unknown", False), 158 | ): 159 | self.doc = doc 160 | self.page = page 161 | self.order_or_annocolor = order_or_annocolor 162 | -------------------------------------------------------------------------------- /prompts/refresh_table.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter wants to update the table format on the target area boxed by rectangle markups. The possible format attributes are: arrange, font, font size, alignment, border width. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing atte least one of the format attributs) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks. Don't assume the value of missing necessary information (only the value of mentioned format attributes can be assumed), but identify tasks lacking necessary information, record the missing details, and execute the task that all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for updating text format by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Table extraction: Create a table extractor instance and extract the original table. 13 | 6. Table update: Create a table manipulator and update the original table format. 14 | 7. Save the file. 15 | 16 | Available tools: 17 | Two constant defined by the user: 18 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 19 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 20 | 21 | Argument collection: 22 | Extract the argument values from the instruction and store them as a dictionary for the table format update task. Here is an example dictionary including all possible attributes: 23 | {“clip”: a rectangle refers to the region of text should be dealing with. “arrange”: a list of row height and column width, general form:[[row height],[column width]], if only row height is defined, it is noted as [[row height], None], and vice versa as [None,[column width]]; “font”: a string refers to the font of the text; “fontsize”: an int refers to the size of text, “borderwidth”: an int refers to the width of the table border, “align”: a string refers to the alignment of the data in the table, which can be one of “center”, “left”, “right”, "justify";} 24 | 25 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 26 | "import fitz 27 | import PDFbf" 28 | 29 | File open: 30 | You can open the file by calling the following function: 31 | "doc = fitz.open("filepath")" Open the file with the file path. 32 | 33 | Save the updated file: 34 | You can save the file with an updated name by calling the following function: 35 | " 36 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 37 | doc.save(updated_file) 38 | " 39 | 40 | Target position extraction: 41 | An annotation extractor is instantiated by providing the essential arguments document(doc): 42 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 43 | Get a rectangle and its reference points: 44 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 45 | 46 | Table extracttion: 47 | A table extractor can be instantiated by defining the document(doc), page number(pagenumber), clip like this: 48 | “table_extractor = PDFbf.extract_table(doc=fitz.document, pagenumber=int, clip=rectangle)” 49 | You can get a list of each row date of the table by calling following function: 50 | “table_data = table_extractor.data” 51 | 52 | Table update: 53 | A table manipulator can be instantiated by defining the necessary arguments (document(doc), page number(pagenumber), clip, data) and optionally mentioned arguments (arrange, font, fontsize, borderwidth, align) like this: 54 | “Table_manipulator = PDFbf.manipulate_table(doc=fitz.document, pagenumber=int, clip=rectangle, data=list, arrange=list, font=string, fontsize=int, borderwidth=int, align=string)” 55 | You can get a doc with the table updated by calling the following functions: 56 | “doc = Table_manipulator.addtable()” 57 | 58 | Incomplete instruction recording: 59 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 60 | "recorder = PDFbf.recorder(missing_information=str)" 61 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no related part) or related part in the original instruction following with an "incomplete" in brackets (if there is some related description but some necessary information is missing) '''. 62 | The incomplete instruction can be recorded by calling the following function: 63 | "recorder.recording()" 64 | 65 | Here is an example. You should respond in the same way to execute the user's instructions. 66 | User: The file name is “2anipusdflate_table.pdf”. For the table in the third box on page two, change the align to the right. For the table on page one, change the font to "Times-Roman". 67 | Response: 68 | ```python 69 | 70 | import fitz 71 | import PDFbf 72 | 73 | #Task 1 74 | #file 1 75 | filepath = “2anipusdflate_table.pdf” 76 | doc = fitz.open(filepath) 77 | 78 | annoextractor = PDFbf.extractanno(doc=doc) 79 | 80 | #page 2 81 | pagenumber0=1 82 | #rectangle 3 83 | rectangleorder0=2 84 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 85 | #table 1 86 | argument0 = dict(clip=rectangle0,align="right") 87 | table_extractor0 = PDFbf.extract_table(doc=doc,pagenumber=pagenumber0,clip=argument0.get("clip")) 88 | table_data0 = table_extractor0.data 89 | Table_manipulator0 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), data=table_data0, align=argument0.get("align")) 90 | doc = Table_manipulator0.addtable() 91 | 92 | #task 2 record the missing information 93 | missing_information1 = '''file-level:2anipusdflate_table.pdf, page-level: 1, order-level: missing, base-level: change the font to "Times-Roman"''' 94 | recorder1 = PDFbf.recorder(missing_information=missing_information1) 95 | recorder1.recording() 96 | 97 | updatedfilepath = filepath[:-4] + "_updated.pdf" 98 | doc.save(updatedfilepath) 99 | ``` 100 | -------------------------------------------------------------------------------- /prompts/map_text.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter wants to change the position of text on the target area boxed by rectangle markups. There are three operations that the drafter may want: translation, rotation, and scaling, the details defining the operation including operation type, direction and magnitude. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the details for at least one of the three operations) from high to low. What needs to be made more clear is that the parts of the details defining each type of operation that can be considered necessary information are pairs of information consisting of the type of operation and the direction, translation-direction, rotation-direction, and scale-factor or at least bigger or smaller than the original text, respectively. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks. Don't assume the value of missing necessary information (only the magnitude of operation can be assumed), but identify tasks lacking necessary information, record the missing details, and execute the task with all essential information gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for changing a text position by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Changing position: Create a text projector instance and change position. 13 | 6. Save the file. 14 | 15 | Available tools: 16 | Two constant defined by the user: 17 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 18 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 19 | 20 | Argument collection: 21 | Extract the argument values from the instruction and store them as a dictionary for the text position-changing task. Here is an example dictionary including all possible attributes: 22 | {"clip": a rectangle boxes the text that need to be dealt with; "move": a list refers to translation details, general form: [move in its right direction (positive value if move right, negative value if move left), move in its upper direction (positive value if move up, negative value if move down), reference_point], "rotation": a list refers to the rotation details, general form: ['r', rotation degrees in the clockwise direction (positive value in clockwise direction, negative value in counterclockwise direction),reference_point], if the direction of rotation is not specified, clockwise is considered; "scal": a list refers to the scaling, general form: ['sc', [zoom factor X direction, zoom factor Y direction], reference_point]} 23 | 24 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 25 | "import fitz 26 | import PDFbf" 27 | 28 | File open: 29 | You can open the file by calling the following function: 30 | "doc = fitz.open("filepath")" Open the file with the file path. 31 | 32 | Save the updated file: 33 | You can save the file with an updated name by calling the following function: 34 | " 35 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 36 | doc.save(updated_file) 37 | " 38 | 39 | Target position extraction: 40 | An annotation extractor is instantiated by providing the essential arguments document(doc): 41 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 42 | Get a rectangle and its reference points: 43 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 44 | 45 | Changing position: 46 | A projector is instantiated by specifying the arguments (document(doc), page number(pagenumber), clip) and 1-3(at least one) optional arguments (translation (move), rotation(rotation) and scaling(scal)) on demand like this: 47 | "text_projector=PDFbf.Projector(doc=fitz.document,pagenumber=int,clip=rectangle,move=[move_in_right_direction, move_in_up_direction, reference_point],rotation=['r', rotation degrees in clockwise direction,reference_point],scal=['sc',[zoom factor in X direction, zoom factor in Y direction], reference_point])" 48 | You can get a doc with the text mapped by calling the following function: 49 | "doc = text_projector.project()" 50 | 51 | Incomplete instruction recording: 52 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 53 | "recorder = PDFbf.recorder(missing_information=str)" 54 | Where "missing_information" a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no related part) or related part in the original instruction following with an "incomplete" in brackets (if there is some related description but some necessary information is missing) '''. 55 | The incomplete instruction can be recorded by calling the following function: 56 | "recorder.recording()" 57 | 58 | Here is an example. You should respond in the same way. 59 | 60 | User: Move the text in the second box on the tenth page. For the file "47_mapqweqping_text.pdf", move the text in the first box on page 5 20 units left,30 units up. rotate the text 45 degrees clockwise. 61 | Response: 62 | ```python 63 | import fitz 64 | import PDFbf 65 | 66 | # Task 1 67 | # Recording the missing information for Task 1 (not enough details) 68 | missing_information0 = '''file-level:incomplete, page-level: 10, order-level: 2, base-level: Move the text in the second box on the tenth page.(incomplete)''' 69 | recorder0 = PDFbf.recorder(missing_information=missing_information0) 70 | recorder0.recording() 71 | 72 | 73 | 74 | # Task 2 75 | # file 2 76 | filepath1 = "47_mapqweqping_text.pdf" 77 | doc = fitz.open(filepath1) 78 | 79 | annoextractor1 = PDFbf.extractanno(doc=doc) 80 | 81 | # page 2 82 | pagenumber1 = 4 83 | 84 | # rectangle 2 85 | rectangleorder1 = 0 86 | rectangle1, rfpoint1 = annoextractor1.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 87 | argument1 = dict(clip=rectangle1, move=[-20, 30, rfpoint1], rotation=['r', 45, rfpoint1]) 88 | text_project1 = PDFbf.Projector(doc=doc, pagenumber=pagenumber1, clip=argument1.get("clip"), move=argument1.get("move"), rotation=argument1.get("rotation")) 89 | doc = text_project1.project() 90 | 91 | updatedfilepath = filepath1[:-4] + "_updated.pdf" 92 | doc.save(updatedfilepath) 93 | ``` 94 | -------------------------------------------------------------------------------- /prompts/refresh_text.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter wants to update the text format on the target area boxed by rectangle markups. The possible format attributes are: font, font size, alignment, text color. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing at least one of the format attributes) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't assume the value of missing necessary information (if a format attribute is mentioned without specifying the value clearly, you can only assume that value), but identify tasks lacking necessary information, record the missing details, and execute the task only once all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for updating text format by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Update format: create a text manipulator instance, extract original text, refresh the text formate. 13 | 6. Save the file. 14 | 15 | Available tools: 16 | Two constant defined by the user: 17 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 18 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 19 | 20 | Argument collection: 21 | Extract the argument values from the instruction and store them as a dictionary for the text position-changing task. Here is an example dictionary including all possible attributes: 22 | {"clip": a rectangle boxes the text that needed to be updated; "font": a string refers to the font of the text; "fontsize":an int refers to the font size; "align": a constant get by ```fitz.TEXT_ALIGN_RIGHT``` or ```fitz.TEXT_ALIGN_LEFT``` or ```fitz.TEXT_ALIGN_CENTER``` or ```fitz.TEXT_ALIGN_JUSTIFY``` refers to the alignment of the text in box;"rotate": an int in 0, or 90 refers to the direction of the text, 0 means horizontal text, 90 means vertical text, if the direction of the text is not mentioned, the text is horizontal text; "textcolor": a string refers to the color of the text} 23 | 24 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 25 | "import fitz 26 | import PDFbf" 27 | 28 | File open: 29 | You can open the file by calling the following function: 30 | "doc = fitz.open("filepath")" Open the file with the file path. 31 | 32 | Save the updated file: 33 | You can save the file with an updated name by calling the following function: 34 | " 35 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 36 | doc.save(updated_file) 37 | " 38 | 39 | Target position extraction: 40 | An annotation extractor is instantiated by providing the essential arguments document(doc): 41 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 42 | Get a rectangle and its reference points: 43 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 44 | 45 | 46 | Update format: 47 | A text manipulator is instantiated by specifying arguments (document(doc), page number(pagenumber), clip) and optional arguments (font, fontsize, textcolor, rotate, align) on demand like this: "Text_manipulator=PDFbf.manipulate_text(doc=fitz.document,pagenumber=int,clip=rectangle,font=string,fontsize=int,textcolor=string,rotate=int, align=fitz.TEXT_ALIGN_LEFT)" 48 | You can extract the original horizontal text by calling the following function: 49 | "Text_manipulator.text = Text_manipulator.gethortext()" 50 | You can get a string of original vertical text by calling the following function: 51 | "Text_manipulator.text = Text_manipulator.getvertext()" 52 | After extracting the original text, you can get a doc with the original text covered by refreshed new text added by calling the following functions: 53 | "doc = Text_manipulator.addtext()" 54 | 55 | Incomplete instruction recording: 56 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 57 | "recorder = PDFbf.recorder(missing_information=str)" 58 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no related part) or related part in the original instruction following with an "incomplete" in a brace (if there is some related description but some necessary information is missing) '''. 59 | The incomplete instruction can be recorded by calling the following function: 60 | "recorder.recording()" 61 | 62 | 63 | Here is an example. You should respond in the same way. 64 | User: The file name is "234_Manipdfgwulate_text.pdf". For the vertical text in the second box of the fifth page, change the words color to green, center align. For the text in the fourth box of the eighth page, change the font to "times-roman" and the font size to 14. Update the text in the sixth box of the twelfth page. 65 | Response: 66 | ```python 67 | 68 | import fitz 69 | import PDFbf 70 | 71 | #task 1 72 | #file 1 73 | filepath = "234_Manipdfgwulate_text.pdf" 74 | doc = fitz.open(filepath) 75 | 76 | annoextractor = PDFbf.extractanno(doc=doc) 77 | 78 | #page 1 79 | pagenumber0 = 4 80 | #rectangle 1 81 | rectangleorder0 = 1 82 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 83 | argument0 = dict(clip=rectangle0, rotate=90, textcolor="green", align=fitz.TEXT_ALIGN_CENTER) 84 | Text_manipulator0 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), rotate=argument0.get("rotate"), textcolor=argument0.get("textcolor"), align=argument0.get("align")) 85 | Text_manipulator0.text = Text_manipulator0.getvertext() 86 | doc = Text_manipulator0.addtext() 87 | 88 | #task 2 89 | #page 2 90 | pagenumber1 = 7 91 | #rectangle 2 92 | rectangleorder1 = 3 93 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 94 | argument1 = dict(clip=rectangle1, font="times-roman", fontsize=14) 95 | Text_manipulator1 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber1, clip=argument1.get("clip"), font=argument1.get("font"), fontsize=argument1.get("fontsize")) 96 | Text_manipulator1.text = Text_manipulator1.gethortext() 97 | doc = Text_manipulator1.addtext() 98 | 99 | #task 3 100 | missing_information1 = '''file-level:234_Manipdfgwulate_text.pdf, page-level: 12, order-level: 6, base-level: missing''' 101 | recorder2 = PDFbf.recorder(missing_information=missing_information1) 102 | recorder2.recording() 103 | 104 | updatedfilepath = filepath[:-4] + "_updated.pdf" 105 | doc.save(updatedfilepath) 106 | ``` 107 | -------------------------------------------------------------------------------- /prompts/map_table.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter wants to change the position of a table on the target area boxed by rectangle markups. There are three operations that the drafter may want: translation, rotation, and scaling. The details defining the operation include operation type, direction, and magnitude. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the details for at least one of the three operations) from high to low. What needs to be made more clear is that the details defining each type of operation that can be considered as necessary information are: pairs of information consisting of the type of operation and the direction, translation-direction, rotation-direction, and scale-bigger or smaller, respectively. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't assume the value of missing necessary information (only the magnitude of operation can be assumed), but identify tasks lacking necessary information, record the missing details, and execute the task that all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for changing a table position by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Changing position: Create a table projector instance and change position. 13 | 6. Save the file. 14 | 15 | Available tools: 16 | Two constant defined by the user: 17 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 18 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 19 | 20 | Argument collection: 21 | Extract the argument values from the instruction and store them as a dictionary for the table position-changing task. Here is an example dictionary including all possible attributes: 22 | {"clip": a rectangle boxes the text that need to be dealt with; "move": a list refers to translation details, general form: [move in its right direction (positive value if move right, negative value if move left), move in its upper direction (positive value if move up, negative value if move down), reference_point], "rotation": a list refers to the rotation details, general form: ['r', rotation degrees in the clockwise direction (positive value in clockwise direction, negative value in counterclockwise direction),reference_point], if the direction of rotation is not specified, clockwise is considered; "scal": a list refers to the scaling, general form: [‘sc’, [zoom factor X direction, zoom factor Y direction], reference_point]} 23 | 24 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 25 | "import fitz 26 | import PDFbf" 27 | 28 | File open: 29 | You can open the file by calling the following function: 30 | "doc = fitz.open("filepath")" Open the file with the file path. 31 | 32 | Save the updated file: 33 | You can save the file with an updated name by calling the following function: 34 | " 35 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 36 | doc.save(updated_file) 37 | " 38 | 39 | Target position extraction: 40 | An annotation extractor is instantiated by providing the essential arguments document(doc): 41 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 42 | Get a rectangle and its reference points: 43 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 44 | 45 | Changing position: 46 | A projector is instantiated by specifying the essential arguments (document(doc), page number(pagenumber), clip) and optional arguments (movement of translation (move), rotation(rotation) and scaling(scal)) like this: 47 | "table_projector=PDFbf.Projector(doc=fitz.document,pagenumber=int,clip=rectangle,move=[move_in_right_direction, move_in_up_direction, reference_point],rotation=['r', rotation degrees in clockwise direction,reference_point],scal=['sc',[zoom factor in X direction, zoom factor in Y direction], reference_point])" 48 | You can get a doc with a table mapped by calling following functions: 49 | "doc = table_projector.project()" 50 | 51 | Incomplete instruction recording: 52 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 53 | "recorder = PDFbf.recorder(missing_information=str)" 54 | Where "missing_information" is a string indicating the missing information, the standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: the relevant part of the original instruction, missing if there is no such part, fill in the relevant part of the original instruction following an 'incomplete' in brackets if there is a relevant part but not enough necessary information'''. 55 | The incomplete instruction can be recorded by calling the following function: 56 | "recorder.recording()" 57 | 58 | Here is an examples. You should respond in the same way. 59 | 60 | User: The file name is "47_mapqweqping_table.pdf". Move the table in the first box on page 5 20 units left,30 units up. Rotate another table 45 degrees clockwise. Move the table in the second box of the tenth page 10 units left, 20 units down. 61 | Response: 62 | ```python 63 | import fitz 64 | import PDFbf 65 | 66 | #Task 1 67 | #File 1 68 | filepath = "47_mapqweqping_table.pdf" 69 | doc = fitz.open(filepath) 70 | 71 | annoextractor = PDFbf.extractanno(doc=doc) 72 | 73 | 74 | # page 5 75 | pagenumber0 = 4 76 | #rectangle 1 77 | rectangleorder0 = 0 78 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 79 | #Mapping 1 80 | argument0 = dict(clip=rectangle0, move=[-20, 30, rfpoint0]) 81 | table_project0 = PDFbf.Projector(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), move=argument0.get("move")) 82 | doc = table_project0.project() 83 | 84 | #Task 2 85 | # Recording the missing information for Task 2 (not enough details for rotating a table) 86 | missing_information1 = '''file-level:47_mapqweqping_table.pdf, page-level: missing, order-level: missing, base-level: rotate 45 degrees clockwise''' 87 | 88 | recorder1 = PDFbf.recorder( 89 | missing_information=missing_information1 90 | ) 91 | recorder1.recording() 92 | 93 | #Task 3 94 | #Page 10 95 | pagenumber1 = 9 96 | #rectangle 2 97 | rectangleorder1 = 1 98 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 99 | # Mapping 3 100 | argument2 = dict(clip=rectangle1, move=[-10, -20, rfpoint1]) 101 | table_project2 = PDFbf.Projector(doc=doc, pagenumber=pagenumber1, clip=argument1.get("clip"), move=argument1.get("move")) 102 | doc = table_project2.project() 103 | 104 | updatedfilepath = filepath[:-4] + "_updated.pdf" 105 | doc.save(updatedfilepath) 106 | ``` 107 | -------------------------------------------------------------------------------- /prompts/add_text.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter wants to insert text on the target position boxed by rectangle markups. One string of text is seen as one task. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the text string) from high to low. The high-level information can correspond to multiple low-level information. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't make assumptions if the previous four necessary information are missing, but you can assume a rational value according to your knowledge if other information is missing. Don't assume the value of necessary information, but identify tasks lacking necessary information, record the missing details, and execute the task only once all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | 6 | Task plan: 7 | The following is a standard process for completing a task: 8 | 9 | 0. Incomplete information recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the remaining operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information can be found. 10 | 1. Import libraries 11 | 2. Open the file. 12 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 13 | 4. Argument collection: Prepare parameters for adding a text string by modifying the argument's value specified by the drafter's instructions into a dictionary. 14 | 5. Text adding: Create a text manipulator instance and add text. 15 | 6. Save the file you have opened as a new file with an updated name. 16 | 17 | Available tools: 18 | Two constant defined by the user: 19 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 20 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 21 | 22 | Argument collection: 23 | Extract the argument values from the instruction and store them as a dictionary for the text manipulator instance. Here is an example dictionary including all possible attributes: 24 | argument = {"Clip": extracted target rectangle; "text": a string of new texts, "font": a string refers to the font of the text; "fontsize": an int refers to the font size; "textcolor": a string refers to the color of the text; "rotate": an int in 90, 180, 270, refers to the rotation of the text, "align": a fitz_constant selected from ```fitz.TEXT_ALIGN_RIGHT``` or ```fitz.TEXT_ALIGN_LEFT``` or ```fitz.TEXT_ALIGN_CENTER``` or ```fitz.TEXT_ALIGN_JUSTIFY``` refers to the alignment of the text} 25 | The drafter will selectively specify parameters such as font, font size, text color, rotation, alignment, etc. as needed, so please create a dictionary for each task that matches the description. 26 | 27 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 28 | "import fitz 29 | import PDFbf" 30 | 31 | File open: 32 | You can open the file by calling the following function: 33 | "doc = fitz.open("filepath")" Open the file with the file path. 34 | 35 | Save the updated file: 36 | You can save the file with an updated name by calling the following function: 37 | " 38 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 39 | doc.save(updated_file) 40 | " 41 | 42 | Target position extraction: 43 | An annotation extractor is instantiated by providing the essential arguments document(doc): 44 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 45 | Get a rectangle and its reference points: 46 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 47 | 48 | 49 | Text adding: 50 | A text manipulator is instantiated by specifying the essential arguments (document(doc), pagenumber, clip, text) and mentioned optional arguments (font, fontsize, text color, rotate, align). Here is an example with all possible arguments are mentioned: 51 | "Text_manipulator=PDFbf.manipulate_text(doc=fitz.document, pagenumber=int, clip=rectangle, text=string, font=string, fontsize=int, textcolor=string, rotate=int, align=fitz_constant)" 52 | You can get a doc with texts added by calling the following function: 53 | "doc=Text_manipulator.addtext()" 54 | 55 | Incomplete instruction recording: 56 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 57 | "recorder = PDFbf.recorder(missing_information=str)" 58 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no realted part) or related part in the original instruction following with an "incomplete" in brack (if there is some related description but some necessary information is missing) '''. 59 | The incomplete instruction can be recorded by calling the following function: 60 | "recorder.recording()" 61 | 62 | Here is an example and you should respond in a similar way. 63 | User: The file name is "a123gfsdfd_text.pdf". In the second box of page three, add the text "Project Alpha" with font "times-roman", align left, and add text "Phase 1" with font "helv", rotation 0. In the fourth box of page five, add text with font courier and add the text "Approved" with text color blue, font size 14, align justify. 64 | 65 | Response: 66 | ```python 67 | 68 | import fitz 69 | import PDFbf 70 | 71 | #file 1 72 | filepath = "a123gfsdfd_text.pdf" 73 | doc = fitz.open(filepath) 74 | 75 | annoextractor = PDFbf.extractanno(doc=doc) 76 | 77 | # page 3 78 | pagenumber0=2 79 | 80 | # rectangle 2 81 | rectangleorder0=1 82 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 83 | # text string 1 84 | argument0 = dict(clip=rectangle0, text="Project Alpha", font="times-roman", align=fitz.TEXT_ALIGN_LEFT) 85 | Text_manipulator0 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), text=argument0.get("text"), font=argument0.get("font"), align=argument0.get("align")) 86 | doc = Text_manipulator0.addtext() 87 | # text string 2 88 | argument1 = dict(clip=rectangle0, text="Phase 1", font="helv", rotate=0) 89 | Text_manipulator1 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber0, clip=argument1.get("clip"), text=argument1.get("text"), font=argument1.get("font"), rotate=argument1.get("rotate"), align=argument1.get("align")) 90 | doc = Text_manipulator1.addtext() 91 | 92 | # Page 5 93 | pagenumber1=4 94 | # rectangle 4 95 | rectangleorder1=3 96 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 97 | 98 | # text string 3 base_level missing 99 | missing_inf2 = '''file-level: a123gfsdfd_text.pdf, page-level: 5, order-level: 4, base-level: add text with font courier(incomplete)''' 100 | recorder2 = PDFbf.recorder(missing_information=missing_inf2) 101 | recorder2.recording() 102 | 103 | # text string 4 104 | argument3 = dict(clip=rectangle1, text="Approved", textcolor="blue", fontsize=14, align=fitz.TEXT_ALIGN_JUSTIFY) 105 | Text_manipulator3 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber1, clip=argument3.get("clip"), text=argument3.get("text"), textcolor=argument3.get("textcolor"), fontsize=argument3.get("fontsize"), align=argument3.get("align")) 106 | doc = Text_manipulator3.addtext() 107 | 108 | updatedfilepath = filepath[:-4] + "_updated.pdf" 109 | doc.save(updatedfilepath) 110 | ``` 111 | -------------------------------------------------------------------------------- /prompts/add_table.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter is trying to insert a table on the target area boxed by rectangle markups. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the content of a table) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't assume the value of missing necessary information, but identify tasks lacking necessary information, record the missing details, and execute the task that all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 0.1 Import libraries 9 | 1. Open the file. 10 | 2. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 3. Modify the task instructions into dictionaries. Each dictionary contains the arguments for a table-adding mission. 12 | 4. Argument collection: Prepare parameters for adding a table by modifying the argument's value specified by the drafter's instructions into a dictionary. 13 | 5. Table adding: Create a table manipulator instance and add the table. 14 | 6. Save the file you have opened as a new file with an updated name. 15 | 16 | 17 | Available tools: 18 | Two constants defined by the user: 19 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 20 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 21 | 22 | 23 | Argument collection: 24 | Extract the argument values from the instruction and store them as a dictionary for the text manipulator instance. Here is an example dictionary including all possible attributes: 25 | argument = {“clip”: a rectangle refers to the target position; “data”: a list contains the content of each row, general form: [[row1 content],[row2 content], ..., [ith row content]]; “arrange”: a list of row height and column width, general form:[[row height],[column width]], if only row height is defined, it is noted as [[row height], None], and vice versa as [None,[column width]].; “font”: a string refers to the font of the text; “fontsize”: an int refers to the size of text } 26 | The drafter will selectively specify parameters such as font, font size etc. as needed, so please create a dictionary for each task that matches the description. 27 | 28 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 29 | "import fitz 30 | import PDFbf" 31 | 32 | File open: 33 | You can open the file by calling the following function: 34 | "doc = fitz.open("filepath")" Open the file with the file path. 35 | 36 | Save the updated file: 37 | You can save the file with an updated name by calling the following function: 38 | " 39 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 40 | doc.save(updated_file) 41 | " 42 | 43 | Target position extraction: 44 | An annotation extractor is instantiated by providing the essential arguments document(doc): 45 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 46 | Get a rectangle and its reference points: 47 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 48 | 49 | Table adding: 50 | A table adder is instantiated by specifying the essential arguments (document(doc), page number(pagenumber), clip, data) and mentioned optional arguments (arrange, font, fontsize) on demand like this: 51 | “Table_adder = PDFbf.manipulate_table(doc=fitz.document, pagenumber=int, clip=rectangle, data=list, arrange=list, font=string, fontsize=int)” 52 | You can get a doc with a table added by calling the following function: 53 | “doc = Table_adder.addtable()” 54 | 55 | Incomplete instruction recording: 56 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 57 | "recorder = PDFbf.recorder(missing_information=str)" 58 | Where "missing_information" is a string indicating the missing information, the standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: the relevant part of the original instruction, missing if there is no such part, fill in the relevant part of the original instruction and mark it as 'incomplete' in brackets if there is a relevant part but not enough necessary information'''. 59 | The incomplete instruction can be recorded by calling the following function: 60 | "recorder.recording()" 61 | 62 | 63 | Here is an example: 64 | 65 | User: The file name is “af35dsdd_table.pdf”. Add tables in the first box on the second page. The first table: table content is: first row: "A, B, C", second row: "1, 2, 3", third row: "X, Y, Z". Font size 12. Row height: [30, 18, 18].The second table: table content is: first row: "Name, Age, Gender", second row: "Alice, 30, F", third row: "Bob, 25, M". Row height: [20, 15, 15]. Column width 50. 66 | Add tables in the second box on page 4. The first table content is: first row: "Item, Price", second row: "Apple, 1.0", third row: "Banana, 0.5". Font size 14. Column width: [20, 20]. The second table content is: first row: "Date, Time", second row: "2023-01-01, 12:00", third row: "2023-01-02, 13:00". Row height 20, column width: 60. The third table content is: 67 | 68 | Response: 69 | ```python 70 | 71 | import fitz 72 | import PDFbf 73 | 74 | #Task 1 75 | #file 1 76 | filepath = "af35dsdd_table.pdf" 77 | doc = fitz.open(filepath) 78 | 79 | annoextractor = PDFbf.extractanno(doc=doc) 80 | 81 | #page 1 82 | pagenumber0 = 1 83 | #rectangle 1 84 | rectangleorder0 = 0 85 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 86 | #table 1 87 | argument0 = dict(clip=rectangle0, data=[["A", "B", "C"], ["1", "2", "3"], ["X", "Y", "Z"]], arrange=[[30, 18, 18], None], fontsize=12) 88 | table_adder0 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), data=argument0.get("data"), arrange=argument0.get("arrange"), fontsize=argument0.get("fontsize")) 89 | doc = table_adder0.addtable() 90 | #Task 2 91 | #table 2 92 | argument1 = dict(clip=rectangle0, data=[["Name", "Age", "Gender"], ["Alice", "30", "F"], ["Bob", "25", "M"]], arrange=[[20, 15, 15], [50, 50, 50]]) 93 | table_adder1 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber0, clip=argument1.get("clip"), data=argument1.get("data"), arrange=argument1.get("arrange")) 94 | doc = table_adder1.addtable() 95 | 96 | #Task 3 97 | #page 2 98 | pagenumber1 = 3 99 | #rectangle 2 100 | rectangleorder1 = 1 101 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 102 | #table 3 103 | argument2 = dict(clip=rectangle1, data=[["Item", "Price"], ["Apple", "1.0"], ["Banana", "0.5"]], arrange=[None,[20, 20]], fontsize=14) 104 | table_adder2 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber1, clip=argument2.get("clip"), data=argument2.get("data"), arrange=argument2.get("arrange"), fontsize=argument2.get("fontsize")) 105 | doc = table_adder2.addtable() 106 | #Task 4 107 | #table 4 108 | argument3 = dict(clip=rectangle1, data=[["Date", "Time"], ["2023-01-01", "12:00"], ["2023-01-02", "13:00"]], arrange=[[20, 20, 20], [60, 60]]) 109 | table_adder3 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber1, clip=argument3.get("clip"), data=argument3.get("data"), arrange=argument3.get("arrange")) 110 | doc = table_adder3.addtable() 111 | 112 | # Task 5 113 | # Recording the missing information for Task 5 (not enough details for table adding) 114 | missing_information1 = '''file-level:af35dsdd_table.pdf, page-level: 4, order-level: 2, base-level: missing''' 115 | 116 | recorder1 = PDFbf.recorder( 117 | missing_information=missing_information1 118 | ) 119 | recorder1.recording() 120 | 121 | updatedfilepath = filepath[:-4] + "_updated.pdf" 122 | doc.save(updatedfilepath) 123 | ``` 124 | -------------------------------------------------------------------------------- /prompts/add_vector.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter is trying to add the vector graphics drawn manually as standard drawings. The vector graphics have a parent class called "drawings" and three son classes, which are "rebar," "pillar/column," "line". Add a class of vector graphics is seen as a task. The user will specify the target class he wants to deal with. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level(providing the annotation color) and base level(providing the vector class and format of new vectors) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. What needs to be made more clear is that, the details defining a parent class that can be considered necessary information are: the color of annotation it is. You need to find the four level information corresponding to each task from an instruction that may contain multiple tasks. Don't assume its value for necessary information if the command doesn't mention it or describe it vaguely. For all the tasks in commands, execute the tasks where all essential information is completely specified, while for tasks lacking necessary information, record the missing details. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Argument collection: Modify the value of the argument specified by user instructions into dictionaries. Each dictionary contains the arguments for each stroke-adding action. 11 | 4. Get target class of vector graphics 12 | 4.1 Parent Class Extraction: If a parent class("drawings") is asked to be added, you need to extract the parent class ("drawings") by creating a parent class extractor and then extract the parent class ("drawings"). 13 | 4.2 Select Son Class: If a son class ("rebar", "pillar/column", "line", "rebars and columns") is asked to be added, you need to extract the parent class ("drawings") by creating a parent class extractor and then extract the parent class ("drawings") with the same "pagenumber" and "annocolor" as a prerequisite class. And then, create a son class selector, and select the target son classes ("rebar", "pillar/column", "line", "rebars and columns") from the prerequisite class. 14 | 5. Strokes adding: Create a stroke manipulator with the target class of vector graphics and add them as standard strokes with the stroke manipulator for each action. 15 | 6. Save the file. 16 | 17 | Available tools: 18 | Two constant defined by the user: 19 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 20 | 21 | Argument collection: 22 | Extract the argument values from the instruction and store them as a dictionary for the vector graphics adding task. Here is an example dictionary including all possible attributes: 23 | {"annocolor": a string indicating the color of the annotation needed to be extracted; "drwcolor": a string indicating the color of the strokes when redrawing them as standard strokes; "width": an int indicating the line width of strokes when redrawing them as standard strokes} 24 | 25 | The target class of vector graphics should be named in this format: specified color + page number + class name: For example "C_4_rebars" means the rebar in color C on page 5. "A_3_columns" means the columns in color A on page 4. "T_0_lines" means the lines in color T on page 0. 26 | 27 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 28 | "import fitz 29 | import PDFbf" 30 | You have to import the library befor you using the following tools provided. 31 | 32 | File open: 33 | You can open the file by calling the following function: 34 | "doc = fitz.open("filepath")" Open the file with the file path. 35 | 36 | Save the updated file: 37 | You can save the file with an updated name by calling the following function: 38 | " 39 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 40 | doc.save(updated_file) 41 | " 42 | 43 | Parent Class Extraction: 44 | A parent class extractor can be instantiated by specifying the document(doc), page number(pagenumber), and annotation color(annocolor) like this: 45 | "anno_extractor = PDFbf.extractanno (doc=fitz.document, pagenumber=int, annocolor=string)" 46 | You can extract a list of the parent class ("drawings") vector graphics in the annotation by calling the following function: 47 | "color_pagenumber_drawings = anno_extractor.anno()"; 48 | 49 | Select Son Class: 50 | A son classes selector can be instantiated by specifying the document(doc), page number(pagenumber), and the parent class with the same specified color and pagenumber of target son class(cdrawings) to extract a son classes ("rebars", "columns", "lines") with specified color and pagenumber, like this: 51 | "color_pagenumber_son_classes_selector=PDFbf.select_from_drawings(doc=fitz.document, pagenumber=int, cdrawings=color_pagenumber_drawings)" 52 | You can get a son classes ("rebars", "columns", "lines") with specified color on specified page from the parent class ("drawings") with the same specified color and page by calling the following functions: 53 | "color_pagenumber_rebars=color_pagenumber_son_classes_selector.mode2_rebars_Cross_Touch_Intersect()", get a "rebars" son class with specified color on a specified page; 54 | "color_pagenumber_columns=color_pagenumber_son_classes_selector.mode2_columns_Cross_Touch_Intersect()", get a "columns" son class with specified color on a specified page; 55 | "color_pagenumber_lines=color_pagenumber_son_classes_selector.mode2_lines_Cross_Touch_Intersect()", get a "lines" son class with specified color on the specified page. 56 | "color_pagenumber_rebar_and_column=color_pagenumber_son_classes_selector.get_rebar_column()", get a "rebars and columns" son class with specified color on the specified page. 57 | 58 | Strokes adding: 59 | A stoke manipulator can be instantiated by specifying the document(doc), page number(pagenumber), a list of the target class of vector graphics(sel_drawings) and optional arguments(stroke colors(drwcolor), line width of stroke(width)) like this: 60 | "drawings_manipulator=PDFbf.manipulate_draw(doc=fitz.document, pagenumber=int, sel_drawings=list, drwcolor=string, width=int)" 61 | You can get a doc with target vector graphics added by calling the following function: 62 | "doc=drawings_manipulator.add_standrawing()" 63 | 64 | Incomplete instruction recording: 65 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 66 | "recorder = PDFbf.recorder(missing_information=str)" 67 | Where "missing_information" a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no realted part) or related part in the original instruction following with an "incompleted" in brack (if there is some related description but some necessary information is missing) '''. 68 | The incomplete instruction can be recorded by calling the following function: 69 | "recorder.recording()" 70 | 71 | Here is an example of executing the user's instructions. You should respond in the same way. 72 | 73 | User: The file name is "sdh.pdf". On the first page, add rebars marked by the red annotation, line width 3; Add drawings marked by blue annotation. The line color is green, line width is 2. On the third page, add columns. Line width is 4. 74 | Response: 75 | ```python 76 | 77 | import fitz 78 | import PDFbf 79 | 80 | #Task 1 81 | #File 1 82 | filepath = "sdh.pdf" 83 | doc = fitz.open(filepath) 84 | #Page 1 85 | pagenumber0 = 0 86 | #Target class 1 87 | argument0 = dict(annocolor="red", width=3) 88 | annoextractor0 = PDFbf.extractanno(doc=doc, pagenumber=pagenumber0, annocolor=argument0.get("annocolor")) 89 | red_0_drawings = annoextractor0.anno() 90 | red_0_son_classes_selector=PDFbf.select_from_drawings(doc=doc, pagenumber=pagenumber0, cdrawings=red_0_drawings) 91 | red_0_rebars=red_0_son_classes_selector.mode2_rebars_Cross_Touch_Intersect() 92 | drawings_manipulator0 = PDFbf.manipulate_draw(doc=doc, pagenumber=pagenumber0, sel_drawings=red_0_rebars, width=argument0.get("width")) 93 | doc = drawings_manipulator0.add_standrawing() 94 | 95 | #Task 2 96 | #Target class 2 97 | argument1 = dict(annocolor="blue", drwcolor="green", width=2) 98 | annoextractor1 = PDFbf.extractanno(doc=doc, pagenumber=pagenumber0, annocolor=argument1.get("annocolor")) 99 | blue_0_drawings = annoextractor1.anno() 100 | drawings_manipulator1 = PDFbf.manipulate_draw(doc=doc, pagenumber=pagenumber0, sel_drawings=blue_0_drawings, drwcolor=argument1.get("drwcolor"), width=argument1.get("width")) 101 | doc = drawings_manipulator1.add_standrawing() 102 | 103 | #Task 3 104 | #Lacking necessary information 105 | missing_information1='''file-level: sdh.pdf, page-level:3, order-level: missing, base-level: add columns. Line width is 4''' 106 | recorder1 = PDFbf.recorder(missing_information=missing_information1) 107 | recorder1.recording() 108 | 109 | updatedfilepath = filepath[:-4] + "_updated.pdf" 110 | doc.save(updatedfilepath) 111 | ``` 112 | -------------------------------------------------------------------------------- /prompts/revise_text.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter wants to edit the text content on the target area boxed by rectangle markups. Editing text on a target position is seen as a task. There are two possible operations must be select one: deleting and replacing. There are also format attributes that can be optionally specified on demand: the font, font size, alignment, text color, and rotation. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the necessary information of the operation) from high to low. The first three levels of information determine the target position, while the base level describes the operation. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the four level information corresponding to each task from an instruction that may contain multiple tasks. Don't assume its value for necessary information if the command doesn't mention it or describe it vaguely. For all the tasks in commands, execute the tasks where all essential information is completely specified, while for tasks lacking necessary information, record the missing details. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | 6 | Task plan: 7 | The following is a standard process for completing a task: 8 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 9 | 1. Import libraries 10 | 2. Open the file. 11 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 12 | 4. Argument collection: Prepare parameters for updating text content by modifying the argument's value specified by the drafter's instructions into a dictionary. 13 | 5. Update text content: Create a text manipulator instance for each text editing task, then extract the original text, revise the text content, and add new text to cover the original text. 14 | 6. Save the file. 15 | 16 | Available tools: 17 | Two constant defined by the user: 18 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 19 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 20 | 21 | Argument collection: 22 | Extract the argument values from the instruction and store them as a dictionary for the text editing task. Here is an example dictionary including all possible attributes: 23 | {"clip": a rectangle boxes the text that needed to be revised; "font": a string refers to the font of the text; "fontsize": an int refers to the font size; "align": a constant get by ```fitz.TEXT_ALIGN_RIGHT``` or ```fitz.TEXT_ALIGN_LEFT``` or ```fitz.TEXT_ALIGN_CENTER``` or ```fitz.TEXT_ALIGN_JUSTIFY``` refers to the alignment of the text in box; "rotate": an int in 0, or 90 refers to the direction of the original text to be edited, 0 means horizontal text, 90 means vertical text, if the direction of the text is not mentioned, the text is horizontal;} 24 | 25 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 26 | "import fitz 27 | import PDFbf" 28 | 29 | File open: 30 | You can open the file by calling the following function: 31 | "doc = fitz.open("filepath")" Open the file with the file path. 32 | 33 | Save the updated file: 34 | You can save the file with an updated name by calling the following function: 35 | " 36 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 37 | doc.save(updated_file) 38 | " 39 | Target position extraction: 40 | An annotation extractor is instantiated by providing the essential arguments document(doc): 41 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 42 | Get a rectangle and its reference points: 43 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 44 | 45 | 46 | Update text content: 47 | A text manipulator is instantiated by specifying arguments (document(doc), page number(pagenumber), clip) and optional arguments (font, fontsize, text color, rotate, align) that mentioned like this: 48 | "Text_manipulator=PDFbf.manipulate_text(doc=fitz.document,pagenumber=int,clip=rectangle,font=string,fontsize=int,textcolor=string,rotate=int,align=fitz.TEXT_ALIGN_LEFT)" 49 | You can extract the original horizontal text by calling the following function: 50 | "Text_manipulator.text=Text_manipulator.gethortext()" 51 | You can get a string of original vertical text by calling the following function: 52 | "Text_manipulator.text=Text_manipulator.getvertext()" 53 | 54 | Here are some arguments that should be specified when you want to delete or replace specified texts from the original texts: 55 | "deltex": a string refers to the text the user wants to delete from the original text; 56 | "retext": a string refers to the text the user wants to replace from the original text; 57 | "totext": a string refers to the text the user wants to use to replace the original text; 58 | 59 | You can delete specified text from the string of original text by defining the text to delete(deltex), and calling the following function: 60 | "Text_manipulator.text = Text_manipulator.deletetext(deltex=string)" 61 | "Text_manipulator.text = Text_manipulator.deletetext(deltex="target text")" delete the text "target text". 62 | "Text_manipulator.text = Text_manipulator.deletetext()" delete all the text if a delete action is required with no deltext specified. 63 | 64 | You can replace specified text with certain text for the string of original text by defining the text to replace(retext), text used to replace(totext) and calling the following function: 65 | "Text_manipulator.text = Text_manipulator.replacetext(retext=string, totext=string)" 66 | "Text_manipulator.text = Text_manipulator.replacetext(retext="old", totext="new")" replace the text "old" using text "new". 67 | "Text_manipulator.text = Text_manipulator.replacetext(totext="1236")" replace all the text using text "1236" if a replacement action is required with no retext specificed. 68 | You can get a doc with original text covered by manipulated new text added by calling the following functions: 69 | "doc = Text_manipulator.addtext()" 70 | 71 | Incomplete instruction recording: 72 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 73 | "recorder = PDFbf.recorder(missing_information=str)" 74 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no related part) or related part in the original instruction following with an "incomplete" in a brace (if there is some related description but some necessary information is missing) '''. 75 | The incomplete instruction can be recorded by calling the following function: 76 | "recorder.recording()" 77 | 78 | Here is an example of executing the user's instructions. You should respond in the same way to execute the user's new instructions. 79 | 80 | User: The file name is "42Del9iutete_text.pdf". For the text in the second box of page 5, delete "temporary and text". Replace the text with "assignment". Use left align. Font size 8. 81 | For the text in the fourth box of page 12, replace "manager" with "supervisor". Delete "completed" and "new". 82 | For the text in page 18, replace "due" with "scheduled". Delete "soon". Font size 12. 83 | 84 | Response: 85 | ```python 86 | 87 | import fitz 88 | import PDFbf 89 | 90 | #Task 1 91 | #File 1 92 | filepath = "42Del9iutete_text.pdf" 93 | doc = fitz.open(filepath) 94 | annoextractor = PDFbf.extractanno(doc=doc) 95 | #Page 1 96 | pagenumber0 = 4 97 | #Rectangle 1 98 | rectangleorder0 = 1 99 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 100 | #Operation 1 101 | argument0 = dict(clip=rectangle0, align=fitz.TEXT_ALIGN_LEFT, fontsize=8) 102 | Text_manipulator0 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), align=argument0.get("align"), fontsize=argument0.get("fontsize")) 103 | Text_manipulator0.text = Text_manipulator0.gethortext() 104 | Text_manipulator0.text = Text_manipulator0.deletetext(deltex="temporary and text") 105 | Text_manipulator0.text = Text_manipulator0.replacetext(totext="assignment") 106 | doc = Text_manipulator0.addtext() 107 | 108 | #Task 2 109 | #Page 2 110 | pagenumber1 = 11 111 | #Rectangle 2 112 | rectangleorder1 = 3 113 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 114 | #Operation 2 115 | argument1 = dict(clip=rectangle1) 116 | Text_manipulator1 = PDFbf.manipulate_text(doc=doc, pagenumber=pagenumber1, clip=argument1.get("clip")) 117 | Text_manipulator1.text = Text_manipulator1.gethortext() 118 | Text_manipulator1.text = Text_manipulator1.replacetext(retext="manager", totext="supervisor") 119 | Text_manipulator1.text = Text_manipulator1.deletetext(deltex="completed") 120 | Text_manipulator1.text = Text_manipulator1.deletetext(deltex="new") 121 | doc = Text_manipulator1.addtext() 122 | 123 | #Task 3 124 | #Lacking necessary information 125 | missing_information2='''file-level:42Del9iutete_text.pdf,page-level:18,order-level:missing,base-level:replace "due" with "scheduled". Delete "soon". Font size 12.''' 126 | recorder2 = PDFbf.recorder(missing_information=missing_information2) 127 | recorder2.recording() 128 | 129 | updatedfilepath = filepath[:-4] + "_updated.pdf" 130 | doc.save(updatedfilepath) 131 | ``` 132 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # :wrench: DrafterBench 2 | This repository is the official implementation of DrafterBench. We provide evaluation data, codes, and a brief introduction. 3 | 4 | ![Static Badge](https://img.shields.io/badge/Code_License-MIT_License-blue) ![Static Badge](https://img.shields.io/badge/Linux_%2F_OSX-Passing-green) ![Static Badge](https://img.shields.io/badge/Window-Failing-red) ![Static Badge](https://img.shields.io/badge/python-3.11-purple) 5 | 6 | 7 | --- 8 | 9 | ## :star: Introducing DrafterBench 10 | 11 | The DrafterBench is designed to evaluate large language models (LLMs) as an agent to automate monotonous, low-tech, and high-labor-intensity tasks in industry. Our initiative is drawing revision, which is a representation task in civil engineering that urgently needs to be automated. We took the following workflow to simulate the working scenario and evaluate the strengths and limitations of LLMs as automation agents. 12 | 13 | ![Automation Workflow](/figures/Workflow.png "Automation Workflow") 14 | 15 | After the stage of preprocessing, the drawing revision tasks (summarized from the real world, totalling 1920 across 12 types) are converted into natural language processing (NLP) tasks to evaluate complex function calls instructed by intricate and lengthy content commands. We designed over 40 drawing revision tools and provided them to LLMs, which play different functions. Some of them aim to make visible changes to drawings, while the others serve necessary preparations for them (e.g., opening the file or transferring critical arguments). It's difficult to determine whether the tools called are effective and functioning properly from the revised drawings, especially when checking if there are redundant or duplicated invisible tools. Therefore, to accurately evaluate the models‘ performance, we score their responses based on the operation chains rather than the revised drawing results. 16 | 17 | To record the operation chains, we prepared dual functions for the tools provided to the LLMs. Each dual function has the same name, input, and output type as the original tools, and its function is to capture the operations and valuable data in a well-structured JSON format (e.g., argument value, data type, etc.). During the working of the benchmark, the original tools called by the models will be replaced with dual functions to record the operation chains and help the final assessment. 18 | 19 | There are four essential capabilities evaluated by DrafterBench: 20 | - **Structured data understanding** 21 | - **Function execution** 22 | - **Instruction following** 23 | - **Critical reasoning** 24 | 25 | ![Capabilities Illustration](/figures/Capabilities.png "Capabilities Illustration") 26 | 27 | ## :ski: Table of Contents 28 | 29 | - [Dataset Summary](#dataset-summary) 30 | - [Quick Start](#quick-start) 31 | - [LeaderBoard](#leaderboard) 32 | 33 | --- 34 | 35 | ## :clipboard: Dataset Summary 36 | 37 | The DrafterBench is constructed on tasks over three object elements, four operations, and six complexity controllers. 38 | 39 | | Elements | Operations | Complexity Controllers | Capacities Investigated by Various Complexity | 40 | |------------------|-------------------------|----------------------------------------------|-------------------------------------------------------| 41 | | Text | Add new content |Language style (Structured/Unstructured) |Structured data understanding | 42 | | Table | Revise content |Task categories |Function execution | 43 | | Vector entity | Change position |Objects per instruction (Single/Multiple) |Instruction following | 44 | | | Update format |Operations per object (Single/Multiple) |Instruction following | 45 | | | |Instruction completeness (Complete/Incomplete)|Critical reasoning | 46 | | | |Detail ambiguity (Precise/Vague) |Critical reasoning | 47 | 48 | The dataset is [available here](https://huggingface.co/datasets/Eason666/DrafterBenchmark) on Huggingface. 49 | 50 | ## :fire: Quick Start 51 | 52 | ### Preparation 53 | 54 | First, configure an environment with Python 3.11 and download the repositories. 55 | 56 | ```shell 57 | git clone https://github.com/Eason-Li-AIS/DrafterBench.git 58 | cd DrafterBench 59 | ``` 60 | 61 | Then, install the dependencies. 62 | 63 | ```shell 64 | pip install -e . 65 | ``` 66 | 67 | ### Serve Model 68 | - For API calling, set up your OpenAI / Anthropic / Google / Mistral / Deepinfra / AnyScale or other API keys as environment variables. 69 | 70 | ```shell 71 | OPENAI_API_KEY=... 72 | ANTHROPIC_API_KEY=... 73 | GOOGLE_API_KEY=... 74 | MISTRAL_API_KEY=... 75 | DEEPINFRA_API_KEY=... 76 | HUGGINGFACE_TOKEN=... 77 | ``` 78 | - For customized model, provide your vllm url when running evaluation.py 79 | 80 | ```shell 81 | --vllm_url http://xx.xx.xx.xx:8000/v1 82 | ``` 83 | 84 | ### Run evaluation 85 | Specify the --model and --model-provider flags to run DrafterBench. The supported models and providers are [available here](https://docs.litellm.ai/docs/providers). You can name your experiment with the --exp_name flag, or it will be set as "model+time+task_group" by default. 86 | ```shell 87 | python evaluation.py --model gpt-4o-2024-08-06 --model-provider openai --temperature 0.0 88 | ``` 89 | 90 | - To run tasks of a specific set, use the --task_group flag. You can choose each set in ["Structured", "Unstructured", "Precise", "Vague", "Complete", "Error", "Single_Object", "Multiple_Objects", "Single_Operation", "Multiple_Operations"]. For example: 91 | 92 | ```shell 93 | python evaluation.py --model gpt-4o-2024-08-06 --model-provider openai --task_group Structured 94 | ``` 95 | This command will run only the tasks in a structured language. The default task group is "All" tasks. 96 | 97 | - To have a clear view of the result, you can set up your huggingface token, 98 | ```shell 99 | HUGGINGFACE_TOKEN=... 100 | ``` 101 | then use the --huggingface_user_name flag to provide your Huggingface user name. Our benchmark will create a new dataset repository with the --exp_name and push the results to it. This repository is private by default, you can create a public repository by setting the --huggingface_private flag to False. 102 | ```shell 103 | python evaluation.py --model gpt-4o-2024-08-06 --model-provider openai --task_group Structured --huggingface_user_name XXXXX(Replace "XXXXX" with your Huggingface username) 104 | ``` 105 | - The default prompts for 12 tasks can be found in ./prompts. You are encouraged to develop your own prompts to achieve a higher score. To do so, simply replace the default prompts in .txt file with your new prompts. 106 | 107 | - In case the evaluation is unexpectedly interrupted, DrafterBench supports resuming from existing results. You can specify the result file for resuming in the --resume_from flag. Alternatively, you can set the --auto_resume flag to True, and DrafterBench will automatically search the result directory for the latest file that matches the model name and task group, and resume the remaining evaluation process. 108 | ```shell 109 | python evaluation.py --model gpt-4o-2024-08-06 --model-provider openai --task_group Structured --resume_from *****.json 110 | ``` 111 | ```shell 112 | python evaluation.py --model gpt-4o-2024-08-06 --model-provider openai --task_group Structured --auto_resume True 113 | ``` 114 | 115 | ## :mortar_board: LeaderBoard 116 | 117 | |Metric|o3-2025-04-16 (Mean/Var)|o4-mini-2025-04-16 (Mean/Var)|gpt-4.1-2025-0414 (Mean/Var)|gpt-4o-mini (Mean/Var)|o1-2024-12-17 (Mean/Var)|gpt-4o-2024-08-06 (Mean / Var)|claude-3.5-sonnet-2024-1022 (Mean / Var)|DeepSeek-V3-0324 (Mean / Var)|Qwen2.5-72B-Instruct (Mean / Var)|LLaMA3-70B-Instruct (Mean / Var)| 118 | | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | 119 | |Structured language|85\.90/0.10|79\.99/0.44|79\.96/0.84|69\.22/0.02|80\.26/1.33|74\.95/1.85|74\.69/0.95|74\.52/0.70|73\.04/1.09|68\.82/2.36| 120 | |Unstructured language|86\.16/0.03|79\.87/0.07|80\.36/0.87|69\.24/0.02|80\.73/1.77|74\.95/2.04|76\.70/1.68|74\.51/0.25|72\.67/0.14|68\.23/2.03| 121 | |Precise detail|91\.27/0.01|88\.36/0.21|87\.07/0.49|73\.90/0.01|89\.86/0.40|80\.63/4.07|82\.75/1.99|78\.24/0.17|75\.03/0.05|71\.33/4.34| 122 | |Vague detail|80\.79/0.13|71\.50/0.37|73\.25/1.33|64\.57/0.02|70\.45/1.84|69\.27/0.55|69\.64/2.84|70\.79/0.58|70\.66/0.70|65\.68/0.93| 123 | |Complete instruction|87\.76/0.15|80\.14/0.66|81\.68/1.06|72\.70/0.04|79\.01/2.92|80\.06/1.99|84\.78/0.86|86\.16/0.79|87\.44/0.57|83\.64/5.82| 124 | |Incomplete (error) instruction|84\.31/0.07|79\.72/0.09|78\.64/0.74|65\.76/0.10|81\.97/0.59|71\.01/6.98|66\.86/2.91|62\.87/3.06|58\.26/0.49|53\.41/0.27| 125 | |Single object|87\.02/0.02|81\.07/0.31|80\.98/0.13|69\.79/0.02|81\.83/1.48|74\.53/6.06|73\.81/1.04|74\.05/0.73|73\.30/0.46|67\.28/2.97| 126 | |Multiple objects|85\.04/0.14|78\.79/0.06|79\.34/2.19|68\.67/0.14|79\.15/1.60|75\.37/0.20|78\.10/0.21|74\.98/0.52|72\.41/0.06|69\.77/1.31| 127 | |Single operation|86\.11/0.06|80\.01/0.20|81\.80/0.62|69\.88/0.01|81\.35/0.91|75\.79/1.91|75\.91/0.91|76\.73/0.15|75\.16/0.27|70\.85/2.17| 128 | |Multiple operations|85\.84/0.07|79\.75/0.69|76\.17/1.57|67\.66/0.14|78\.14/2.17|73\.00/1.81|75\.41/0.20|69\.33/2.69|67\.53/1.18|63\.14/1.75| 129 | |Average tasks|86\.03/0.05|79\.93/0.15|80\.16/0.85|69\.23/0.01|80\.49/1.53|74\.95/1.81|75\.85/0.36|74\.69/0.83|72\.85/0.16|68\.52/2.02| 130 | |Comprehensive rewards|84\.04/0.05|76\.80/0.23|77\.88/1.09|65\.42/0.02|78\.06/2.55|72\.24/2.33|73\.39/0.45|71\.74/0.81|69\.94/0.20|64\.96/2.44| 131 | 132 | Note: We have recently upgraded DrafterBench to be more challenging. Although the trend of models' ability is very consistent with the above leaderboard, some models may score lower than the records. 133 | The score on the leaderboard is the average of three independent runs. 134 | 135 | ## Citation 136 | 137 | If you use DrafterBench in your research, please consider citing our paper: 138 | 139 | ```bibtex 140 | @article{drafterbench, 141 | title={DrafterBenchmark: Benchmarking Large Language Models for Tasks Automation in Civil Engineering}, 142 | author={Yinsheng Li, Zhen Dong, Yi Shao.}, 143 | year={2025}, 144 | url={https://arxiv.org/abs/2507.11527}, 145 | } 146 | -------------------------------------------------------------------------------- /prompts/delete_vector.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter is trying to delete the vector graphics on the target area boxed by rectangle markups. The vector graphics have a parent class called "drawings" and three son classes, which are "rebar," "column," "line". There are two selection mode to select target class vectors, which are: 1. Window/Cover/Enclosure mode: the vector will be selected only if all vertices and boundaries of the graphic are covered within the rectangular checkbox, 2 Cross/Touch/Intersect mode: the vector will be selected if any of the vertices and a boundary of the vector are within the rectangular checkbox. Deleting vectors on a target position is seen as a task. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(the class(es) to be deleted) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. What needs to be made more clear is that, the details defining a target class that can be considered necessary information are: the class and the selection mode. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't assume the value of missing necessary information, but identify tasks lacking necessary information, record the missing details, and execute the task only once all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for deleting vectors on a target position by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Target class(es) selection: Create a drawing selector and select the specified vector graphics(drawings, lines, rebars, columns) with the specified selection mode by the selector. 13 | 6. Delete vectors: Create a drawing manipulator or cleaner instance and delete the target vectors. 14 | 7. Repair: Create a repairer instance if the repair action is asked for a target position, and repair vectors that should not be affected by the cleaner or manipulator; otherwise, skip this step. 15 | 8. Save the file. 16 | 17 | Available tools: 18 | Two constant defined by the user: 19 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 20 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 21 | 22 | Argument collection: 23 | Extract the argument values from the instruction and store them as a dictionary for the vector deleting task. Here is an example dictionary including all possible attributes: 24 | {"clip": a rectangle boxes the drawings that needed to be dealt with} 25 | 26 | 27 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 28 | "import fitz 29 | import PDFbf" 30 | You have to import the library befor you using the following tools provided. 31 | 32 | Target position extraction: 33 | An annotation extractor is instantiated by providing the essential arguments document(doc): 34 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 35 | Get a rectangle and its reference points: 36 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 37 | 38 | 39 | Target class(es) selection: 40 | A drawing selector can be instantiated by specifying the document(doc), page number(pagenumber), and clip like this: 41 | "selector = PDFbf.selector(doc=fitz.document, pagenumber=int, clip=rectangle)" 42 | The selector is to select a class with two optional selection modes. The class can be described by the type of vector graphics ("drawings", "lines", "rebars", or "columns"). There are two selection modes: 1. Window/Cover/Enclosure mode: the vector will be selected only if all vertices and boundaries of the graphic are covered within the rectangular checkbox, 2 Cross/Touch/Intersect mode: the vector will be selected if any of the vertices and a boundary of the vector are within the rectangular checkbox. If the selection mode is not mentioned, take selection mode 2 as default. 43 | Choose the selection specified or closest to the description of the subgroup. 44 | You can get a list of a subgroup with a specified selection mode by calling these functions: 45 | "covered_drawings = selector.mode1_drawings_Window_Cover_Enclosure()" select drawings with selection mode 1; 46 | "intersected_drawings = selector.mode2_drawings_Cross_Touch_Intersect()" select drawings with selection mode 2; 47 | "covered_lines = selector.mode1_lines_Window_Cover_Enclosure()" select lines with selection mode 1; 48 | "intersected_lines = selector.mode2_lines_Cross_Touch_Intersect()" select lines with selection mode 2; 49 | "covered_rebars= selector.mode1_rebars_Window_Cover_Enclosure()" select rebars with selection mode 1; 50 | "intersected_rebars = selector.mode2_rebars_Cross_Touch_Intersect()" select rebars with selection mode 2; 51 | "covered_columns = selector.mode1_columns_Window_Cover_Enclosure()" select columns with selection mode 1; 52 | "intersected_columns = selector.mode2_columns_Cross_Touch_Intersect()" select columns with selection mode 2; 53 | 54 | 55 | Delete vectors: 56 | 1. Delete selected class: 57 | A drawing manipulator is instantiated by specfying the document(doc), page number(pagenumber), and list of drawings(listofcdraw) like this: 58 | "drawing_manipulator = PDFbf.draw_drawer(doc= fitz.document, pagenumber=int, listofcdraw=list)" 59 | You can get a doc with the list of drawings deleted by calling following functions: 60 | "doc = drawing_manipulator.delete_draw()" 61 | 62 | 2. Delete indiscriminately: 63 | An indiscriminate cleaner is instantiated by specifying the document(doc), page number(pagenumber), and clip like this: 64 | "cleaner=PDFbf.delete(doc=fitz.document, pagenumber=int, clip=rectangle)" 65 | You can get a doc with everything in the clip covered by calling the following function: 66 | "doc = cleaner.applydelete()" 67 | 68 | Repair: 69 | A repairer is instantiated by specifying the document(doc), page number(pagenumber), clip, cdrawings(cdrawings) and optional specify selected drawing(drawings) like this: 70 | "repairer=PDFbf.repairer(doc= fitz.document,pagenumber=int,clip=rectangle, cdrawings=list, sel_drawings=list)" 71 | "cdrawings" can be obtained by calling the selector like this "selector.selected_lines", "sel_drawings" can be specified as None to repair the affect by an indiscriminate cleaner. 72 | You can get a lines repaired doc by calling this function: 73 | "doc = repairer.del_repair()" 74 | Selected drawings should be the total selected drawings. 75 | 76 | Incomplete instruction recording: 77 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 78 | "recorder = PDFbf.recorder(missing_information=str)" 79 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no realted part) or related part in the original instruction following with an "incompleted" in brack (if there is some related description but some necessary information is missing) '''. 80 | The incomplete instruction can be recorded by calling the following function: 81 | "recorder.recording()" 82 | 83 | Here is an example. You should respond in the same way. 84 | 85 | User: The file name is "sdh.pdf". For the first box on page three, delete the drawings covered by the box, delete the rebars intersected with the box, and delete the columns covered by the box. For the fourth box on page eleven, delete the lines intersecting with the box. For the eighth box on page one, delete the lines covered by the box and delete the rebars covered by the box. For the second box on page 18, delete. Repair all the drawings. 86 | 87 | Response: 88 | ```python 89 | 90 | import fitz 91 | import PDFbf 92 | 93 | filepath = "sdh.pdf" 94 | doc = fitz.open(filepath) 95 | 96 | annoextractor = PDFbf.extractanno(doc=doc) 97 | 98 | 99 | # Mission 1 100 | pagenumber0 = 2 101 | rectangleorder0 = 0 102 | 103 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 104 | argument0 = dict(clip=rectangle0) 105 | selector0 = PDFbf.selector(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip")) 106 | covered_drawings0 = selector0.mode1_drawings_Window_Cover_Enclosure() 107 | intersected_rebars0 = selector0.mode2_rebars_Cross_Touch_Intersect() 108 | covered_columns0 = selector0.mode1_columns_Window_Cover_Enclosure() 109 | sel_drawings0 = covered_drawings0 + intersected_rebars0 + covered_columns0 110 | drawing_manipulator0 = PDFbf.draw_drawer(doc=doc, pagenumber=pagenumber0, listofcdraw=sel_drawings0) 111 | doc = drawing_manipulator0.delete_draw() 112 | repairer0 = PDFbf.repairer(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), cdrawings=selector0.selected_lines, sel_drawings=sel_drawings0) 113 | doc = repairer0.del_repair() 114 | 115 | # Mission 2 116 | pagenumber1 = 10 117 | rectangleorder1 = 3 118 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 119 | argument1 = dict(clip=rectangle1) 120 | selector1 = PDFbf.selector(doc=doc, pagenumber=pagenumber1, clip=argument1.get("clip")) 121 | intersected_lines1 = selector1.mode2_lines_Cross_Touch_Intersect() 122 | drawing_manipulator1 = PDFbf.draw_drawer(doc=doc, pagenumber=pagenumber1, listofcdraw=intersected_lines1) 123 | doc = drawing_manipulator1.delete_draw() 124 | repairer1 = PDFbf.repairer(doc=doc, pagenumber=pagenumber1, clip=argument1.get("clip"), cdrawings=selector1.selected_lines, sel_drawings=intersected_lines1) 125 | doc = repairer1.del_repair() 126 | 127 | # Mission 3 128 | pagenumber2=0 129 | rectangleorder2=7 130 | rectangle2, rfpoint2 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder2) 131 | argument2 = dict(clip=rectangle2) 132 | selector2=PDFbf.selector(doc=doc,pagenumber=pagenumber2,clip=argument2.get("clip")) 133 | covered_lines2=selector2.mode1_lines_Window_Cover_Enclosure() 134 | covered_rebars2 = selector2.mode1_rebars_Window_Cover_Enclosure() 135 | sel_drawings2 = covered_lines2 + covered_rebars2 136 | drawing_manipulator2 = PDFbf.draw_drawer(doc=doc,pagenumber=pagenumber2,listofcdraw=sel_drawings2) 137 | doc = drawing_manipulator2.delete_draw() 138 | repairer2 = PDFbf.repairer(doc=doc, pagenumber=pagenumber2, clip=argument2.get("clip"), cdrawings=selector2.selected_lines, sel_drawings=sel_drawings2) 139 | doc = repairer2.del_repair() 140 | 141 | # Mission 4 142 | missing_information3='''file-level: sdh.pdf, page-level:18, order-level:2, base-level: delete(incomplete)''' 143 | recorder3 = PDFbf.recorder(missing_information=missing_information3) 144 | recorder3.recording() 145 | 146 | updatedfilepath = filepath[:-4] + "_updated.pdf" 147 | doc.save(updatedfilepath) 148 | ``` 149 | -------------------------------------------------------------------------------- /prompts/map_vector.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a high-level programmer. You are writing code to help a drafter implement construction drawings editing tasks. 3 | Now, the drafter is trying to map the vector graphics on the target area boxed by rectangle markups. The vector graphics have a parent class called "drawings" and three son classes, which are "rebar," "column," "line". There are two selection mode to select target class vectors, which are: 1. Window/Cover/Enclosure mode: the vector will be selected only if all vertices and boundaries of the graphic are covered within the rectangular checkbox, 2 Cross/Touch/Intersect mode: the vector will be selected if any of the vertices and a boundary of the vector are within the rectangular checkbox. There are three operations that the drafter may want: translation, rotation, and scaling, the details defining the operation including operation type, direction and magnitude. Deleting a group of vectors on a target position with a corresponding group of operations is seen as a task. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the classes of vectors to be mapped and providing the details for at least one of the three operations correspondingly) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. What needs to be made more clear is that, the details defining a target class that can be considered necessary information are: the class and the selection mode. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't assume the value of missing necessary information (only the magnitude of operation can be assumed), but identify tasks lacking necessary information, record the missing details, and execute the task only once all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for mapping vectors on a target position by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Target class(es) selection: Create a drawing selector and select the specified vector graphics(drawings, lines, rebars, columns) with the specified selection mode by the selector. 13 | 6. Mapping vectors: Create a vectors projector instance and map vectors. 14 | 7. Save the file. 15 | 16 | Available tools: 17 | Two constant defined by the user: 18 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 19 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 20 | 21 | 22 | Argument collection: 23 | Extract the argument values from the instruction and store them as a dictionary for the table position-changing task. Here is an example dictionary including all possible attributes: 24 | {"clip": a rectangle boxes the text that need to be dealt with; "move": a list refers to translation details, general form: [move in its right direction (positive value if move right, negative value if move left), move in its upper direction (positive value if move up, negative value if move down), reference_point], "rotation": a list refers to the rotation details, general form: ['r', rotation degrees in the clockwise direction (positive value in clockwise direction, negative value in counterclockwise direction),reference_point], if the direction of rotation is not specified, clockwise is considered; "scal": a list refers to the scaling, general form: ['sc', [zoom factor X direction, zoom factor Y direction], reference_point]} 25 | 26 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 27 | "import fitz 28 | import PDFbf" 29 | You have to import the library befor you using the following tools provided. 30 | 31 | File open: 32 | You can open the file by calling the following function: 33 | "doc = fitz.open("filepath")" Open the file with the file path. 34 | 35 | Save the updated file: 36 | You can save the file with an updated name by calling the following function: 37 | " 38 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 39 | doc.save(updated_file) 40 | " 41 | 42 | Target position extraction: 43 | An annotation extractor is instantiated by providing the essential arguments document(doc): 44 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 45 | Get a rectangle and its reference points 46 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 47 | 48 | Target class(es) selection: 49 | A vectors selector can be instantiated by specifying the document(doc), page number(pagenumber), and clip like this: 50 | "selector = PDFbf.selector(doc=fitz.document, pagenumber=int, clip=rectangle)" 51 | The selector is to select a subgroup from drawings with two optional selection modes. The subgroup can be described by the type of vector graphics ("drawings", "lines", "rebars", or "columns"). There are two selection modes: 1. Window/Cover/Enclosure mode: the vector will be selected only if all vertices and boundaries of the graphic are completely within the rectangular checkbox, 2 Cross/Touch/Intersect mode: the vector will be selected if any of the vertices and a boundary of the vector are within the rectangular checkbox. 52 | Choose the selection specified or closest to the description of the subgroup. 53 | You can get a list of a subgroup with a specified selection mode by calling these functions: 54 | "covered_drawings = selector.mode1_drawings_Window_Cover_Enclosure()" select drawings with selection mode 1; 55 | "intersected_drawings = selector.mode2_drawings_Cross_Touch_Intersect()" select drawings with selection mode 2; 56 | "covered_lines = selector.mode1_lines_Window_Cover-Enclosure()" select lines with selection mode 1; 57 | "intersected_lines = selector.mode2_lines_Cross_Touch_Intersect()" select lines with selection mode 2; 58 | "covered_rebars= selector.mode1_rebars_Window_Cover_Enclosure()" select rebars with selection mode 1; 59 | "intersected_rebars = selector.mode2_rebars_Cross_Touch_Intersect()" select rebars with selection mode 2; 60 | "covered_columns = selector.mode1_columns_Window_Cover_Enclosure()" select columns with selection mode 1; 61 | "intersected_columns = selector.mode2_columns_Cross_Touch_Intersect()" select columns with selection mode 2; 62 | 63 | Mapping vectors: 64 | A vector projector is instantiated by specifying the essential arguments (document(doc), page number(pagenumber), clip, selected drawings(sel_drawings),cdrawings(cdrawings)) and optional arguments (movement of translation (move), rotation(rotation) and scaling(scal)) on demand like this: 65 | "projector=PDFbf.project_draw(doc=fitz.document,pagenumber=int,clip=rectangle,sel_drawings=list, cdrawings=list, move=[move_in_right_direction, move_in_up_direction, reference_point],rotation=['r', rotation degrees in clockwise direction,reference_point],scal=['sc',[zoom factor X direction, zoom factor Y direction], reference_point])" 66 | You can get a doc with the drawings mapped by calling the following function: 67 | "doc = projector.project()" 68 | "cdrawings" can be obtained by calling the selector like this "selector.selected_lines", 69 | 70 | Incomplete instruction recording: 71 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 72 | "recorder = PDFbf.recorder(missing_information=str)" 73 | Where "missing_information" a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no realted part) or related part in the original instruction following with an "incompleted" in brackets (if there is some related description but some necessary information is missing) '''. 74 | The incomplete instruction can be recorded by calling the following function: 75 | "recorder.recording()" 76 | 77 | Here is an example. You should respond in the same way. 78 | 79 | User: The file name is "fjhgdf.pdf". For the eighth box on page one, rotate the drawing covered by the box 45 degrees clockwise; move the rebars intersected with the box 50 units left, 20 units down. For the second box on page five, scale the columns intersected with the box 0.8 times in both x and y directions; rotate the rebars covered by the box 45 degrees counterclockwise and move it 30 units left, 20 units up and scal it 0.8 times in x direction, 1.2 times in y firection. For the first box on page seven, scale the rebars covered by the box 1.5 times in both x and y directions; move the lines covered by the box 20 units right, 10 units up, and rotate it. 80 | 81 | Response: 82 | ```python 83 | 84 | import fitz 85 | import PDFbf 86 | 87 | filepath = "fjhgdf.pdf" 88 | doc = fitz.open(filepath) 89 | 90 | annoextractor = PDFbf.extractanno(doc=doc) 91 | 92 | pagenumber0=0 93 | rectangleorder0=7 94 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 95 | argument0=dict(clip=rectangle0, rotation=['r', 45, rfpoint0]) 96 | selector0 = PDFbf.selector(doc=doc,pagenumber=pagenumber0,clip=argument0.get("clip")) 97 | covered_drawings0 = selector0.mode1_drawings_Window_Cover_Enclosure() 98 | projector0 = PDFbf.project_draw(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), sel_drawings=covered_drawings0, cdrawings=selector0.selected_lines, move=argument0.get("rotation")) 99 | doc = projector0.project() 100 | 101 | argument1=dict(clip=rectangle0, move=[-50,-20, rfpoint0]) 102 | intersected_rebars1 = selector0.mode2_rebars_Cross_Touch_Intersect() 103 | projector1 = PDFbf.project_draw(doc=doc, pagenumber=pagenumber0, clip=argument1.get("clip"), sel_drawings=intersected_rebars1, cdrawings=selector0.selected_lines, rotation=argument1.get("move")) 104 | doc = projector1.project() 105 | 106 | 107 | pagenumber1 = 4 108 | rectangleorder1 = 1 109 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1,rectangleorder=rectangleorder1) 110 | argument2 = dict(clip=rectangle1, scal=['sc', [0.8, 0.8], rfpoint1]) 111 | selector1 = PDFbf.selector(doc=doc, pagenumber=pagenumber1, clip=argument2.get("clip")) 112 | intersected_columns2 = selector1.mode2_columns_Cross_Touch_Intersect() 113 | projector2 = PDFbf.project_draw(doc=doc, pagenumber=pagenumber1, clip=argument2.get("clip"), sel_drawings=intersected_columns2, cdrawings=selector1.selected_lines, scal=argument2.get("scal")) 114 | doc = projector2.project() 115 | 116 | argument3 = dict(clip=rectangle1, move=[-30, 20, rfpoint1], rotation=['r', -45, rfpoint1], scal=['sc', [0.8, 1.2], rfpoint1]) 117 | covered_rebars3 = selector1.mode1_rebars_Window_Cover_Enclosure() 118 | projector3 = PDFbf.project_draw(doc=doc, pagenumber=pagenumber1, clip=argument3.get("clip"), sel_drawings=covered_rebars3, cdrawings=selector1.selected_lines, rotation=argument3.get("rotation"), move=argument3.get("move"), scal=argument3.get("scal")) 119 | doc = projector3.project() 120 | 121 | pagenumber2 = 6 122 | rectangleorder2 = 0 123 | rectangle2, rfpoint2 = annoextractor.getclip_rfpoint(pagenumber=pagenumber2,rectangleorder=rectangleorder2) 124 | argument4 = dict(clip=rectangle2, scal=['sc', [1.5, 1.5], rfpoint2]) 125 | selector2 = PDFbf.selector(doc=doc, pagenumber=pagenumber2, clip=argument4.get("clip")) 126 | covered_rebars4 = selector2.mode1_rebars_Window_Cover_Enclosure() 127 | projector4 = PDFbf.project_draw(doc=doc, pagenumber=pagenumber2, clip=argument4.get("clip"), sel_drawings=covered_rebars4, cdrawings=selector2.selected_lines, scal=argument4.get("scal")) 128 | doc = projector4.project() 129 | 130 | missing_information5='''file-level: fjhgdf.pdf, page-level:7, order-level:1, base-level: move the lines covered by the box 20 units right, 10 units up, and rotate it (incomplete)''' 131 | recorder5 = PDFbf.recorder(missing_information=missing_information5) 132 | recorder5.recording() 133 | 134 | updatedfilepath = filepath[:-4] + "_updated.pdf" 135 | doc.save(updatedfilepath) 136 | ``` 137 | -------------------------------------------------------------------------------- /prompts/refresh_vector.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a high-level programmer. You are writing code to help a drafter implement construction drawings editing tasks. 3 | Now, the drafter is trying to update the vector graphics format on the target area boxed by rectangle markups. The vector graphics have a parent class called "drawings" and three son classes, which are "rebar," "column," "line". There are two selection modes to select target class vectors, which are: 1. Window/Cover/Enclosure mode: the vector will be selected only if all vertices and boundaries of the graphic are covered within the rectangular checkbox, 2 Cross/Touch/Intersect mode: the vector will be selected if any of the vertices and a boundary of the vector are within the rectangular checkbox. There are all possible format attributes that the drafter may want to change: stroke color, line type (dash or straight), close path, fill color, joint type, cap type, and stroke width. Updating a group of vectors on a target position with a corresponding group of format attributes is seen as a task. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the classes of vectors to be updated and providing the details for at least one of the format attributes correspondingly) from high to low. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. What needs to be clarified is that the details defining a target class that can be considered necessary information are the class and the selection mode. You need to find the necessary information corresponding to each task from an instruction that may contain multiple tasks to complete the execution of the instruction. Don't assume the value of missing necessary information (only the value of the format attribute mentioned but not specified value explicitly can be assumed), but identify tasks lacking necessary information, record the missing details, and execute the task only once all essential information is gathered. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information(file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for updating vectors on a target position by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Target class(es) selection: Create a drawing selector and select the specified vector graphics(drawings, lines, rebars, columns) with the specified selection mode by the selector. 13 | 6. Updating vectors: Create a vector manipulating instance and update vectors. 14 | 7. Save the file. 15 | 16 | Available tools: 17 | Two constant defined by the user: 18 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 19 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 20 | 21 | Argument collection: 22 | Extract the argument values from the instruction and store them as a dictionary for the vector updating task. Here is an example dictionary including all possible attributes: 23 | {"clip": a rectangle boxes the drawings that needed to be dealt with; "fillcolor": a string refers to the fill color; "drwcolor": a string refers to the color of the drawing; "dashes": a sting refers to the dashed line, the general format is "[n m] 0", n is an int indicting the dash length, m (optional) is an int indicting the subsequent gap length, and 0 is an int specifying no pixels should be skipped before the dashing starts, if a dashed line is required, but no parameters are specified, take "[3 3]0", if a straight line is required, the parameters should be "[0 0]0"; "closePath": a bool indicating whether the path is a close path; "lineJoin": an int controlling the way how line connections look like. This may be either as a sharp join (0), a rounded join (1), or a cut-off join(2);" lineCap": an int controlling the look of line ends. the options are sharp ends(0), semi-circle ends(1), and semi-square ends(2); "width": an int refers to the line width of drawings} 24 | 25 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 26 | "import fitz 27 | import PDFbf" 28 | You have to import the library befor you using the following tools provided. 29 | 30 | File open: 31 | You can open the file by calling the following function: 32 | "doc = fitz.open("filepath")" Open the file with the file path. 33 | 34 | Save the updated file: 35 | You can save the file with an updated name by calling the following function: 36 | " 37 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 38 | doc.save(updated_file) 39 | " 40 | 41 | Target position extraction: 42 | An annotation extractor is instantiated by providing the essential arguments document(doc): 43 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 44 | Get a rectangle and its reference points: 45 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 46 | 47 | 48 | Target class(es) selection: 49 | A vectors selector can be instantiated by specifying the document(doc), page number(pagenumber), and clip like this: 50 | "selector = PDFbf.selector(doc=fitz.document, pagenumber=int, clip=rectangle)" 51 | The selector is to select a subgroup from drawings with two optional selection modes. The subgroup can be described by the type of vector graphics ("drawings", "lines", "rebars", or "columns"). There are two selection modes: 1. Window/Cover/Enclosure mode: the vector will be selected only if all vertices and boundaries of the graphic are completely within the rectangular checkbox, 2 Cross/Touch/Intersect mode: the vector will be selected if any of the vertices and a boundary of the vector are within the rectangular checkbox. 52 | Choose the selection specified or closest to the description of the subgroup. 53 | You can get a list of a subgroup with a specified selection mode by calling these functions: 54 | "covered_drawings = selector.mode1_drawings_Window_Cover_Enclosure()" select drawings with selection mode 1; 55 | "intersected_drawings = selector.mode2_drawings_Cross_Touch_Intersect()" select drawings with selection mode 2; 56 | "covered_lines = selector.mode1_lines_Window_Cover_Enclosure()" select lines with selection mode 1; 57 | "intersected_lines = selector.mode2_lines_Cross_Touch_Intersect()" select lines with selection mode 2; 58 | "covered_rebars= selector.mode1_rebars_Window_Cover_Enclosure()" select rebars with selection mode 1; 59 | "intersected_rebars = selector.mode2_rebars_Cross_Touch_Intersect()" select rebars with selection mode 2; 60 | "covered_columns = selector.mode1_columns_Window_Cover_Enclosure()" select columns with selection mode 1; 61 | "intersected_columns = selector.mode2_columns_Cross_Touch_Intersect()" select columns with selection mode 2; 62 | 63 | 64 | Updating vectors: 65 | A vector manipulator is instantiated by specifying the essential arguments (document(doc), page number(pagenumber), selected drawings(sel_drawings)) and optional arguments (fill color (fillcolor), drawing color(drwcolor), dashes line (dashes), close path or not (closePath), line joint shape (lineJoin), line end shape (lineCap), line width (width)) on demand like this: 66 | "manipulator=PDFbf.manipulate_draw(doc=doc,pagenumber=int,sel_drawings=list,fillcolor=string,drwcolor=string,dashes=string,closePath=bool,lineJoin=int,lineCap=int,width=int)" 67 | You can get a doc with the list of drawings refreshed by calling the following function: 68 | "doc = manipulator.update_draw()" 69 | 70 | Incomplete instruction recording: 71 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(incomplete_instruction, missing_information): 72 | "recorder = PDFbf.recorder(missing_information=str)" 73 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no realted part) or related part in the original instruction following with an "incompleted" in brackets (if there is some related description but some necessary information is missing) '''. 74 | The incomplete instruction can be recorded by calling the following function: 75 | "recorder.recording()" 76 | 77 | Here is an example. You should respond in the same way. 78 | 79 | User: The file name is "fjhgdf.pdf". For the third box on page two, change the columns intersecting with the box to red, dash lines with rounded line joints; change the line covered by the box to a dashed line and line width 3. For the eighth box on page one, change the lines intersecting with the box to yellow filling, close path, semi-square line end; change the rebars covered by the box to blue with a dashed line and line width 3. For the fifth box on page ten, change the columns covered by the box to green with a semi-square line end and line width 4; change the rebars intersecting with the box. 80 | 81 | Response: 82 | ```python 83 | 84 | import fitz 85 | import PDFbf 86 | filepath = "fjhgdf.pdf" 87 | doc = fitz.open(filepath) 88 | 89 | annoextractor = PDFbf.extractanno(doc) 90 | 91 | # Mission 1 92 | pagenumber0=1 93 | rectangleorder0=2 94 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0, rectangleorder=rectangleorder0) 95 | argument0=dict(clip=rectangle0,drwcolor="red",dashes="[3,3]0",lineJoin=1) 96 | selector0 = PDFbf.selector(doc=doc,pagenumber=pagenumber0,clip=argument0.get("clip")) 97 | intersected_columns0 = selector0.mode2_columns_Cross_Touch_Intersect() 98 | manipulator0=PDFbf.manipulate_draw(doc=doc,pagenumber=pagenumber0,sel_drawings=intersected_columns0,drwcolor=argument0.get("drwcolor"),dashes=argument0.get("dashes"),lineJoin=argument0.get("lineJoin")) 99 | doc = manipulator0.update_draw() 100 | 101 | argument1=dict(clip=rectangle0,dashes="[3,3]0",width=3) 102 | covered_lines1 = selector0.mode1_lines_Window_Cover_Enclosure() 103 | manipulator1=PDFbf.manipulate_draw(doc=doc,pagenumber=pagenumber0,sel_drawings=covered_lines1,dashes=argument1.get("dashes"),width=argument1.get("width")) 104 | doc = manipulator1.update_draw() 105 | 106 | pagenumber1=0 107 | rectangleorder1=7 108 | rectangle1, rfpoint1 = annoextractor.getclip_rfpoint(pagenumber=pagenumber1, rectangleorder=rectangleorder1) 109 | argument2=dict(clip=rectangle1,fillcolor="yellow",closePath=True,lineCap=2) 110 | selector1 = PDFbf.selector(doc=doc,pagenumber=pagenumber1,clip=argument2.get("clip")) 111 | intersected_lines2 = selector1.mode2_lines_Cross_Touch_Intersect() 112 | manipulator2=PDFbf.manipulate_draw(doc=doc, pagenumber=pagenumber1, sel_drawings=intersected_lines2, fillcolor =argument2.get("fillcolor"), closePath=argument2.get("closePath"), lineCap=argument2.get("lineCap")) 113 | doc = manipulator2.update_draw() 114 | 115 | argument3 = dict(clip=rectangle1, drwcolor="blue", dashes="[3 3]0", width=3) 116 | covered_rebars3 = selector1.mode1_rebars_Window_Cover_Enclosure() 117 | manipulator3 = PDFbf.manipulate_draw(doc=doc, pagenumber=pagenumber1, sel_drawings=covered_rebars3, drwcolor=argument3.get("drwcolor"), dashes=argument3.get("dashes"), width=argument3.get("width")) 118 | doc = manipulator3.update_draw() 119 | 120 | # Mission 3 121 | pagenumber2 = 9 122 | rectangleorder2 = 4 123 | rectangle2, rfpoint2 = annoextractor.getclip_rfpoint(pagenumber=pagenumber2, rectangleorder=rectangleorder2) 124 | argument4 = dict(clip=rectangle2, drwcolor="green", lineCap=2, width=4) 125 | selector2 = PDFbf.selector(doc=doc, pagenumber=pagenumber2, clip=argument4.get("clip")) 126 | covered_columns4 = selector2.mode1_columns_Window_Cover_Enclosure() 127 | manipulator4 = PDFbf.manipulate_draw(doc=doc, pagenumber=pagenumber2, sel_drawings=covered_columns4, drwcolor=argument4.get("drwcolor"), lineCap=argument4.get("lineCap"), width=argument4.get("width")) 128 | doc = manipulator4.update_draw() 129 | 130 | missing_information5='''file-level: fjhgdf.pdf, page-level:10, order-level:5, base-level: change the rebars intersecting with the box (incomplete)''' 131 | recorder5 = PDFbf.recorder(missing_information=missing_information5) 132 | recorder5.recording() 133 | 134 | updatedfilepath = filepath[:-4] + "_updated.pdf" 135 | doc.save(updatedfilepath) 136 | ``` 137 | -------------------------------------------------------------------------------- /prompts/revise_table.txt: -------------------------------------------------------------------------------- 1 | Task background: 2 | You're a worker using tools provided by the drafter to implement his instruction on technical drawing editing tasks. 3 | Now, the drafter is trying to edit the table content on the target area boxed by rectangle markups. Editing a table on a target position is seen as a task. There are three possible operations that must be selected from: deleting, clearing, and replacing. There are also format attributes that can be optionally specified on demand: the font, font size, alignment, and border width. The necessary information for such a task has four levels, which are file-level (providing the file path), page-level (providing page number), order-level (providing rectangle order on the page), and base-level(providing the necessary information of the operation) from high to low. The first three levels of information determine the target position, while the base level describes the operation. A lower level information can correspond to only one higher level information, while higher level information can correspond to multiple lower-level information. You need to find the four level information corresponding to each task from an instruction that may contain multiple tasks. Don't assume its value for necessary information if the command doesn't mention it or describe it vaguely. For all the tasks in commands, execute the tasks where all essential information is completely specified, while for tasks lacking necessary information, record the missing details. Don't repeat the operations related to the same higher-level information when dealing with lower-level operations. Avoid using loops. 4 | 5 | Task plan: 6 | The following is a standard process for completing a task: 7 | 0. Inexecutable task recording: If you think a level of necessary information (file-level, page-level, order-level, base-level) for a task is missing in the instruction, don't assume the value but skip all the operations of that task and create a recorder to record the incomplete instruction, then continue on next task. Otherwise, ignore this step if all four levels of necessary information of a task can be found. 8 | 1. Import libraries 9 | 2. Open the file. 10 | 3. Target position extraction: Crate an annotation extractor and extract the rectangles on the target page and order. 11 | 4. Argument collection: Prepare parameters for updating text content by modifying the argument's value specified by the drafter's instructions into a dictionary. 12 | 5. Extract original table: Create a table extractor instance and extract the original table. 13 | 6. Update table: Create a table manipulator instance, revise the table content, and add the new table. 14 | 7. Save the file. 15 | 16 | Available tools: 17 | Two constant defined by the user: 18 | "pagenumber": an int refers to the page number, pagenumber=0 indicates page one; 19 | "rectangleorder" an int refers to the order of rectangles within the page, rectangleorder=0 indicates the first rectangle; 20 | 21 | Argument collection: 22 | Extract the argument values from the instruction and store them as a dictionary for the table editing task. Here is an example dictionary including all possible attributes: 23 | {"clip": a rectangle boxes the table that needed to be revised; "font": a string refers to the font of the text in table; "fontsize":an int refers to the size of text in table; "borderwidth": an int refers to the width of the table border, "align": a string refers to the alignment of the text in table, which can be one of "center", "left", "right"} 24 | 25 | Popular PDF editing library "PyMuPDF" and a user-tailored library "PDFbf" are provided. You can import the two libraries by: 26 | "import fitz 27 | import PDFbf" 28 | 29 | File open: 30 | You can open the file by calling the following function: 31 | "doc = fitz.open("filepath")" Open the file with the file path. 32 | 33 | Save the updated file: 34 | You can save the file with an updated name by calling the following function: 35 | " 36 | updated_file = filepath[:-4] + "_updated.pdf" # update file path 37 | doc.save(updated_file) 38 | " 39 | 40 | 41 | Target position extraction: 42 | An annotation extractor is instantiated by providing the essential arguments document(doc): 43 | "annoextractor = PDFbf.extractanno(doc=fitz.document)" 44 | Get a rectangle and its reference points: 45 | "rectangle, rfpoint = annoextractor.getclip_rfpoint(pagenumber=int,rectangleorder=int)" 46 | 47 | 48 | Extract the original table: 49 | A table extractor can be instantiated by specifying the document(doc), page number(pagenumber), clip like this: 50 | "table_extractor = PDFbf.extract_table(doc=fitz.document, pagenumber=int, clip=rectangle)" 51 | You can get the date of the table by calling the following function: 52 | "table_data = table_extractor.data" 53 | 54 | Update table: 55 | A table manipulator can be instantiated by specifying the essential arguments (document(doc), page number(pagenumber), clip, data) and optional arguments (font, fontsize, borderwidth, align) on demand like this: 56 | "table_manipulator=PDFbf.manipulate_table(doc=fitz.document, pagenumber=int, clip=rectangle, data=list, font=string, fontsize=int, borderwidth=int, align=string)" 57 | 58 | Here are some arguments that should be specified when you want to delete specified contents from the original tables: 59 | "delcolumn": a list of columns that need to be deleted, [1] for the first column; 60 | "delrow": similar to "delcolumn", a list of rows that need to be deleted, [5] for the fifth row; 61 | 62 | You can get the revised table data by deleting the whole table or specified row or column from the original table data by specifying rows to delete(delrow), columns to delete(delcolumn) and calling the following function: 63 | "table_manipulator.data = table_manipulator.cuttable(delrow=list,delcolumn=list)" 64 | "table_manipulator.data = table_manipulator.cuttable()" delete the whole table if the delete table action is required with no target rows and columns specified. 65 | "table_manipulator.data = table_manipulator.cuttable(delrow=[3,4,5,6])" delete the 3rd to 6th rows from the original table. 66 | "table_manipulator.data = table_manipulator.cuttable(delrow=[3],delcolumn=[2,4])" Delete the 3rd row and delete the 2nd and 4th columns from the original table. 67 | 68 | Here are some arguments that should be specified when you want to clear or replace specified contents from the original tables: 69 | "startcell": a list refers to the position of the start cell to be cleared or replaced in the data list, general form: [row number, column number]; 70 | "endcell": a list refers to the position of the end cell to be cleared or replaced in the data list, general form: [row number, column number]; 71 | "repdata": a list of data to replace the specified content from original table, general form: [[row1 content],[row2 content],...,[ith row content]]; 72 | 73 | You can get the revised table data by clearing the whole table or the data of specified cells of the original table by defining the start cell(startcell), end cell(endcell) and calling the following function: 74 | "table_manipulator.data = table_manipulator.emptytable(startcell=list,endcell=list)" 75 | "table_manipulator.data = table_manipulator.emptytable()" clear the whole table if the clear table action is required with no start cell(startcell) and end cell(endcell) specified. 76 | "table_manipulator.data = table_manipulator.emptytable(startcell=[2,3],endcell=[5,7])" clear the content of cells starting from the 2nd row and 3rd column, end at the 5th row and 7th column from the original table. 77 | “table_manipulator.data = table_manipulator.emptytable(startcell=[“all”,3])” clear the column 3. 78 | “table_manipulator.data = table_manipulator.emptytable(startcell=[7,“all”])” clear the row 7. 79 | "table_manipulator.data = table_manipulator.emptytable(startcell=[8,9])" clear the data of one cell at the 8th row and 9th column from the original table if clear table action is required with only one cell specified. 80 | 81 | You can get the revised table data by replacing the whole table or the data of specified cells from the original table by defining the data used to replace(repdata), start cell(startcell), end cell(endcell) and calling the following function: 82 | "table_manipulator.data = table_manipulator.modifytable(repdata=replace_data, startcell=list, endcell=list)" 83 | "table_manipulator.data = table_manipulator.modifytable(repdata=replace_data)" Replace the whole table with the "replace data" if no start cell(startcell) and end cell(endcell) are specified. 84 | "table_manipulator.data = table_manipulator.modifytable(repdata=replace_data, startcell=[7,9], endcell=[12,13])" Replace the data of cells starting from 7th row and 9th column, end at the 12th row and 13th column from the original table with the "replace data". 85 | “table_manipulator.data = table_manipulator.emptytable(repdata=replace_data, startcell=[“all”,3])” replace the column 3. 86 | “table_manipulator.data = table_manipulator.emptytable(repdata=replace_data, startcell=[7,“all”])” replace the row 7. 87 | "table_manipulator.data = table_manipulator.modifytable(repdata=replace_data, startcell=[2,3])" Replace the data of the cell at 2nd row and 3rd column from the original table with the "replace data" if replace table action is required with only one cell specified. 88 | 89 | You can get a doc with a revised table added by calling the following function: 90 | "doc = table_manipulator.addtable()" 91 | 92 | Incomplete instruction recording: 93 | A missing key attribute recorder can be instantiated to record the incomplete instruction by providing the essential arguments(missing_information): 94 | "recorder = PDFbf.recorder(missing_information=str)" 95 | Where "missing_information" is a string indicating the highest-level of missing information, a standard format is '''file-level: value or missing, page-level: value or missing, order-level: value or missing, base-level: related part in the original instruction or missing(if no related part) or related part in the original instruction following with an "incomplete" in brackets (if there is some related description but some necessary information is missing)'''. 96 | The incomplete instruction can be recorded by calling the following function: 97 | "recorder.recording()" 98 | 99 | 100 | Here is an example. You should respond in the same way. 101 | 102 | User: The file name is "De222lsfqweete_table.pdf". 103 | For the table in the first box of page 4, clear the cells from the second row and third column to the fourth row and fifth column. Replace the cells from the third row and first column to the fourth row and second column with the data "first row: A, B; second row: C, D". Then delete the first row and the second to fifth columns. 104 | For the table on page 7, delete the second column. Clear the table. 105 | For the seventh box on page eight, replace the table the data "first row: 10,11, second row: 25,26, third row: 31,32", clear the cell at 3rd row and 4th column, delete the 1-7th row and 10th columns. 106 | 107 | Response: 108 | ```python 109 | 110 | import fitz 111 | import PDFbf 112 | 113 | 114 | #Task 1 115 | #File 1 116 | filepath = "De222lsfqweete_table.pdf" 117 | doc = fitz.open(filepath) 118 | annoextractor = PDFbf.extractanno(doc=doc) 119 | 120 | #Page 1 121 | pagenumber0 = 3 122 | #Rectangle 1 123 | rectangleorder0 = 0 124 | rectangle0, rfpoint0 = annoextractor.getclip_rfpoint(pagenumber=pagenumber0,rectangleorder=rectangleorder0) 125 | #Operation 1 126 | argument0 = dict(clip=rectangle0) 127 | table_extractor0 = PDFbf.extract_table(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip")) 128 | table_data0 = table_extractor0.data 129 | table_manipulator0 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber0, clip=argument0.get("clip"), data=table_data0) 130 | table_manipulator0.data = table_manipulator0.emptytable(startcell=[2, 3], endcell=[4, 5]) 131 | repdata0 = [["A", "B"], ["C", "D"]] 132 | table_manipulator0.data = table_manipulator0.modifytable(repdata=repdata0, startcell=[3, 1], endcell=[4, 2]) 133 | table_manipulator0.data = table_manipulator0.cuttable(delrow=[1], delcolumn=[2,3,4,5]) 134 | doc = table_manipulator0.addtable() 135 | 136 | #Task 2 137 | #Lacking necessary information 138 | missing_information1='''file-level: De222lsfqweete_table.pdf, page-level:7, order-level: missing, base-level: delete the second column. Clear the table.''' 139 | recorder1 = PDFbf.recorder(missing_information=missing_information1) 140 | recorder1.recording() 141 | 142 | 143 | #Task 3 144 | #Page 3 145 | pagenumber2=7 146 | #Rectangle 3 147 | rectangleorder2=6 148 | rectangle2, rfpoint2 = annoextractor.getclip_rfpoint(pagenumber=pagenumber2,rectangleorder=rectangleorder2) 149 | argument2 = dict(clip=rectangle2) 150 | table_extractor2 = PDFbf.extract_table(doc=doc,pagenumber=pagenumber2,clip=argument2.get("clip")) 151 | table_data2 = table_extractor2.data 152 | table_manipulator2 = PDFbf.manipulate_table(doc=doc, pagenumber=pagenumber2, clip=argument2.get("clip"), data=table_data2) 153 | repdata2 = [[10,11],[25,26],[31,32]] 154 | table_manipulator2.data=table_manipulator2.modifytable(repdata=repdata2) 155 | table_manipulator2.data = table_manipulator2.emptytable(startcell=[3,4]) 156 | table_manipulator2.data = table_manipulator2.cuttable(delrow=[1,2,3,4,5,6,7],delcolumn=[10]) 157 | doc = table_manipulator2.addtable() 158 | 159 | updatedfilepath = filepath[:-4] + "_updated.pdf" 160 | doc.save(updatedfilepath) 161 | ``` 162 | -------------------------------------------------------------------------------- /methods/evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import copy 3 | import multiprocessing 4 | import numpy as np 5 | import datetime 6 | import sys 7 | import os 8 | import json 9 | import glob 10 | from tqdm import tqdm 11 | from functools import partial 12 | from datasets import DatasetDict, Dataset, load_dataset 13 | from multiprocessing import Manager, Lock 14 | from . import task_sets 15 | 16 | sys.path.append(os.path.abspath("..")) 17 | sys.path.append(os.path.abspath("../..")) 18 | 19 | from DrafterBench.methods.generator import generator 20 | 21 | def score_format(da): 22 | def calculate_score(success_count, total_count): 23 | return success_count / total_count if total_count else 1 24 | 25 | format = { 26 | "Tasktype": da["Tasktype"], 27 | "Task_id": da["Id"], 28 | "Define_arguments": calculate_score( 29 | da["Task_score"]["Success_arguments_define"], 30 | da["Task_score"]["Total_arguments_define"], 31 | ), 32 | "Transfer_variable": calculate_score( 33 | da["Task_score"]["Success_variable_transfer"], 34 | da["Task_score"]["Total_variable_transfer"], 35 | ), 36 | "Call_function": calculate_score( 37 | da["Task_score"]["Success_function_calling"], 38 | da["Task_score"]["Total_function_calling"], 39 | ), 40 | "Single_tool": calculate_score( 41 | da["Task_score"]["Success_single_tool_selection"], 42 | da["Task_score"]["Total_single_tool_selection"], 43 | ), 44 | "Multi-tool": calculate_score( 45 | da["Task_score"]["Success_multi_tool_selection"], 46 | da["Task_score"]["Total_multi_tool_selection"], 47 | ), 48 | "Execute_plan": calculate_score( 49 | da["Task_score"]["Intersected_plan_execution"], 50 | da["Task_score"]["Total_plans_appeared"], 51 | ), 52 | "Task_score": da["Task_score"]["Task_score"], 53 | } 54 | return format 55 | 56 | 57 | def evaluate(args): 58 | print(f"The benchmark is runing with following arguments:") 59 | print(f"{args}") 60 | print(f"Running tasks in {args.task_group} set(s)") 61 | os.makedirs(f"{args.result_dir}/{args.model.replace('/', '_')}", exist_ok=True) 62 | if args.exp_name == "default_name": 63 | args.exp_name = f"{args.model.replace('/', '_')}_{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}_{args.task_group}" 64 | 65 | 66 | response_results = Manager().list() 67 | max_length = 1920 68 | resume_path = None 69 | if args.resume_from: 70 | resume_path = args.resume_from 71 | elif args.auto_resume: 72 | result_dir = f"{args.result_dir}/{args.model.replace('/', '_')}" 73 | pattern = os.path.join(result_dir, f"*_{args.task_group}.json") 74 | files = glob.glob(pattern) 75 | if not files: 76 | print("No saved results found in {{args.result_dir}/{args.model.replace('/', '_')}}") 77 | user_input = input("Would you like to run a new assessment? (Yes/No):") 78 | if user_input == "Yes": 79 | print("Run a new assessment") 80 | result_path = f"{args.result_dir}/{args.model.replace('/', '_')}/{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}_{args.task_group}.json" 81 | else: 82 | print("Stop running DrafterBench") 83 | else: 84 | files.sort(key=lambda p: os.path.getmtime(p), reverse=True) 85 | resume_path = files[0] 86 | print(f"Find existing results archive: {resume_path}") 87 | else: 88 | result_path = f"{args.result_dir}/{args.model.replace('/', '_')}/{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}_{args.task_group}.json" 89 | 90 | if resume_path: 91 | try: 92 | with open(resume_path, "r", encoding="utf-8") as f: 93 | results_saved = json.load(f) 94 | except Exception as e: 95 | print(f"[Error] Faile to load saved results: {e}") 96 | return 97 | response_results.extend(results_saved) 98 | result_path = resume_path 99 | print(f"Resumed {len(results_saved)} items from {resume_path} ...") 100 | 101 | task_messages = load_dataset("Eason666/DrafterBench", "drafter_tasks") 102 | specified_instructions = [] 103 | for task_set in task_sets: 104 | if args.debug: 105 | specified_instructions.extend( 106 | [task_messages[task_set][i] for i in [1, 2, 3]] 107 | ) 108 | else: 109 | if args.task_group == "All": 110 | specified_instructions.extend( 111 | [ 112 | task_messages[task_set][i] 113 | for i in range(len(task_messages[task_set])) 114 | ] 115 | ) 116 | else: 117 | for i in range(len(task_messages[task_set])): 118 | task_parameters = [ 119 | task_messages[task_set][i]["Precise|Vague"], 120 | task_messages[task_set][i]["Complete|Incomplete"], 121 | task_messages[task_set][i]["Single|Multiple_objects"], 122 | task_messages[task_set][i]["Single|Multiple_operations"], 123 | task_messages[task_set][i]["Structured/Unstructured"], 124 | ] 125 | if args.task_group in task_parameters: 126 | specified_instructions.append(task_messages[task_set][i]) 127 | 128 | completed_ids = { 129 | (item["Tasktype"], item["Id"]) for item in response_results 130 | } 131 | remaining = len(specified_instructions) - len(completed_ids) 132 | if not remaining: 133 | print("[Resume] No remaining tasks. Skipping generation.") 134 | else: 135 | print(f"[Gen] Total tasks: {len(specified_instructions)} | Remaining: {remaining}") 136 | 137 | 138 | 139 | ctx = multiprocessing.get_context("spawn") 140 | if remaining: 141 | specified_instructions = [ 142 | task for task in specified_instructions 143 | if (task["Tasktype"], task["Id"]) not in completed_ids 144 | ] 145 | pool1 = ctx.Pool(processes=args.proc_num) 146 | print("Getting agent responses:") 147 | generator_partial = partial( 148 | generator, args.model, args.model_provider, args.temperature, args.vllm_url, max_length, response_results, result_path 149 | ) 150 | r = list( 151 | tqdm( 152 | pool1.imap_unordered(generator_partial, specified_instructions), 153 | total=len(specified_instructions), 154 | ) 155 | ) 156 | pool1.close() 157 | pool1.join() 158 | 159 | from DrafterBench.methods.evaluator import evaluator 160 | 161 | eval_results = Manager().list() 162 | pool2 = ctx.Pool(processes=args.proc_num) 163 | print("Evaluating agent responses:") 164 | evaluator_partial = partial(evaluator, result_path, eval_results) 165 | responses = copy.deepcopy(list(response_results)) 166 | p = list(tqdm(pool2.imap_unordered(evaluator_partial, responses), total=len(responses))) 167 | pool2.close() 168 | pool2.join() 169 | 170 | eval_list = list(eval_results) 171 | task_rewards = {} 172 | for task in task_sets: 173 | rewards = [ 174 | x["Task_score"]["Task_score"] for x in eval_list if x["Tasktype"] == task 175 | ] 176 | task_rewards.update({task: np.average(rewards)}) 177 | average_task_rewards = np.average([task_rewards[x] for x in task_sets]) 178 | comprehensive_rewards = ( 179 | average_task_rewards - (100 - min([task_rewards[x] for x in task_sets])) / 12 180 | ) 181 | structured_rewards = [ 182 | x["Task_score"]["Task_score"] 183 | for x in eval_list 184 | if x["Structured/Unstructured"] == "Structured" 185 | ] 186 | unstrctured_rewards = [ 187 | x["Task_score"]["Task_score"] 188 | for x in eval_list 189 | if x["Structured/Unstructured"] == "Unstructured" 190 | ] 191 | precise_rewards = [ 192 | x["Task_score"]["Task_score"] 193 | for x in eval_list 194 | if x["Precise|Vague"] == "Precise" 195 | ] 196 | vague_rewards = [ 197 | x["Task_score"]["Task_score"] 198 | for x in eval_list 199 | if x["Precise|Vague"] == "Vague" 200 | ] 201 | complete_rewards = [ 202 | x["Task_score"]["Task_score"] 203 | for x in eval_list 204 | if x["Complete|Incomplete"] == "Complete" 205 | ] 206 | error_rewards = [ 207 | x["Task_score"]["Task_score"] 208 | for x in eval_list 209 | if x["Complete|Incomplete"] == "Error" 210 | ] 211 | single_OB_rewards = [ 212 | x["Task_score"]["Task_score"] 213 | for x in eval_list 214 | if x["Single|Multiple_objects"] == "Single_Object" 215 | ] 216 | multiple_OB_rewards = [ 217 | x["Task_score"]["Task_score"] 218 | for x in eval_list 219 | if x["Single|Multiple_objects"] == "Multiple_Objects" 220 | ] 221 | single_OP_rewards = [ 222 | x["Task_score"]["Task_score"] 223 | for x in eval_list 224 | if x["Single|Multiple_operations"] == "Single_Operation" 225 | ] 226 | multiple_OP_rewards = [ 227 | x["Task_score"]["Task_score"] 228 | for x in eval_list 229 | if x["Single|Multiple_operations"] == "Multiple_Operations" 230 | ] 231 | average_structured_rewards = ( 232 | np.average(structured_rewards) if structured_rewards else "NaN" 233 | ) 234 | average_unstrctured_rewards = ( 235 | np.average(unstrctured_rewards) if unstrctured_rewards else "NaN" 236 | ) 237 | average_precise_rewards = np.average(precise_rewards) if precise_rewards else "NaN" 238 | average_vague_rewards = np.average(vague_rewards) if vague_rewards else "NaN" 239 | average_complete_rewards = ( 240 | np.average(complete_rewards) if complete_rewards else "NaN" 241 | ) 242 | average_error_rewards = np.average(error_rewards) if error_rewards else "NaN" 243 | average_single_OB_rewards = ( 244 | np.average(single_OB_rewards) if single_OB_rewards else "NaN" 245 | ) 246 | average_multiple_OB_rewards = ( 247 | np.average(multiple_OB_rewards) if multiple_OB_rewards else "NaN" 248 | ) 249 | average_single_OP_rewards = ( 250 | np.average(single_OP_rewards) if single_OP_rewards else "NaN" 251 | ) 252 | average_multiple_OP_rewards = ( 253 | np.average(multiple_OP_rewards) if multiple_OP_rewards else "NaN" 254 | ) 255 | 256 | reward_matrix = { 257 | "Structured": f"Structured language: {average_structured_rewards}\n", 258 | "Unstructured": f"Unstructured language: {average_unstrctured_rewards}\n", 259 | "Precise": f"Precise detail: {average_precise_rewards}\n", 260 | "Vague": f"Vague detail: {average_vague_rewards}\n", 261 | "Complete": f"Completed instruction: {average_complete_rewards}\n", 262 | "Error": f"Incomplete (error) instruction: {average_error_rewards}\n", 263 | "Single_Object": f"Single object: {average_single_OB_rewards}\n", 264 | "Multiple_Objects": f"Multiple objects: {average_multiple_OB_rewards}\n", 265 | "Single_Operation": f"Single operation: {average_single_OP_rewards}\n", 266 | "Multiple_Operations": f"Multiple operations: {average_multiple_OP_rewards}\n" 267 | } 268 | 269 | if args.task_group == 'All': 270 | print("Average reward in different metrics:\n") 271 | reward = ( 272 | f"Structured language: {average_structured_rewards}\n" 273 | f"Unstructured language: {average_unstrctured_rewards}\n" 274 | f"Precise detail: {average_precise_rewards}\n" 275 | f"Vague detail: {average_vague_rewards}\n" 276 | f"Complete instruction: {average_complete_rewards}\n" 277 | f"Incomplete (error) instruction: {average_error_rewards}\n" 278 | f"Single object: {average_single_OB_rewards}\n" 279 | f"Multiple objects: {average_multiple_OB_rewards}\n" 280 | f"Single operation: {average_single_OP_rewards}\n" 281 | f"Multiple operations: {average_multiple_OP_rewards}\n" 282 | f"Average tasks: {average_task_rewards}\n" 283 | f"Comprehensive rewards: {comprehensive_rewards}" 284 | ) 285 | else: 286 | print("Average reward of the task group you specified:\n") 287 | reward = reward_matrix[args.task_group] 288 | print(reward) 289 | text_result_path = f"{args.result_dir}/{args.model.replace('/', '_')}/{datetime.datetime.now().strftime('%Y-%m-%d-%H-%M')}_{args.task_group}.txt" 290 | with open(text_result_path, "w", encoding="utf-8") as w: 291 | w.write(reward) 292 | print(f"The experiment result has been saved in {args.result_dir}.") 293 | 294 | organized_data = [score_format(x) for x in eval_list] 295 | if args.huggingface_user_name: 296 | dataset = Dataset.from_list(organized_data) 297 | dataet_dict = DatasetDict({"result": dataset}) 298 | dataet_dict.push_to_hub( 299 | f"{args.huggingface_user_name}/{args.exp_name}", 300 | token=os.getenv("HUGGINGFACE_TOKEN"), 301 | private=args.huggingface_private, 302 | ) 303 | print( 304 | f"Dataset successfully pushed to the hub at https://huggingface.co/{args.huggingface_user_name}/{args.exp_name}" 305 | ) 306 | -------------------------------------------------------------------------------- /methods/metric.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import numpy as np 3 | from sentence_transformers import SentenceTransformer, util 4 | 5 | # 6 | model = SentenceTransformer("all-MiniLM-L6-v2", device="cuda") 7 | # 8 | 9 | def similarity(str1, str2): 10 | str1_embedding = model.encode(str1, convert_to_tensor=True) 11 | str2_embedding = model.encode(str2, convert_to_tensor=True) 12 | similarity_score = util.pytorch_cos_sim(str2_embedding, str1_embedding) 13 | return similarity_score if similarity_score < 0.90 else 1 14 | 15 | 16 | def ground_check(informationmatrix): # ground truth extraction & self-test 17 | total_define_argument = 0 18 | total_transfer_var = 0 19 | total_function_calling = 0 20 | total_single_tool = 0 21 | total_multiple_tool = 0 22 | total_plan_execution = 0 23 | 24 | def argu_check(func_dict: dict): 25 | nonlocal total_define_argument, total_transfer_var 26 | for argu in func_dict["arguments_value"]: 27 | if argu[1][1]: 28 | if argu[0] in func_dict["first_defined_arguments"]: 29 | total_define_argument += 1 30 | if argu[0] in func_dict["transfered_arguments"]: 31 | total_transfer_var += 1 32 | 33 | for file in informationmatrix: 34 | steps = list(file.keys()) 35 | if "filepath" in steps: 36 | if file["filepath"][0] != "missing": 37 | if file["change_maker"]: 38 | total_define_argument += 1 39 | total_function_calling += 1 40 | steps.pop(steps.index("filepath")) 41 | if "save" in steps and file["save"]: 42 | total_define_argument += 1 43 | total_function_calling += 1 44 | steps.pop(steps.index("save")) 45 | for step in steps: 46 | if step in ["pages", "annotation"]: 47 | continue 48 | for call in file[step]: 49 | total_function_calling += 1 50 | argu_check(call) 51 | if len(call["tool_callings"]) == 1: 52 | total_function_calling += 1 53 | total_single_tool += 1 54 | elif len(call["tool_callings"]) > 1: 55 | total_function_calling += len(call["tool_callings"]) 56 | total_multiple_tool += 1 57 | for tool in call["tool_callings"]: 58 | argu_check(tool) 59 | if step in ["change_maker", "recorder"]: 60 | total_plan_execution += len(file[step]) 61 | 62 | return [ 63 | total_define_argument, 64 | total_transfer_var, 65 | total_function_calling, 66 | total_single_tool, 67 | total_multiple_tool, 68 | total_plan_execution, 69 | ] 70 | 71 | 72 | def executable_check(inform_matrix): 73 | executabe = True 74 | for file in inform_matrix: 75 | if not file["filepath"][1]: 76 | executabe = False 77 | break 78 | update_file_check = [x["save_path"][1] for x in file["save"]] 79 | if update_file_check and not all(update_file_check): 80 | executabe = False 81 | break 82 | steps = list(file.keys()) 83 | for step in steps: 84 | if step in ["filepath", "save", "pages", "annotation"]: 85 | continue 86 | for call in file[step]: 87 | argu_check = [ 88 | False if x[1][0] and not x[1][1] else True 89 | for x in call["arguments_value"] 90 | ] 91 | if not all(argu_check): 92 | executabe = False 93 | break 94 | for tool in call["tool_callings"]: 95 | if tool["arguments_value"]: 96 | tool_argu_check = [ 97 | False if x[1][0] and not x[1][1] else True 98 | for x in tool["arguments_value"] 99 | ] 100 | if not any(tool_argu_check): 101 | executabe = False 102 | break 103 | return executabe 104 | 105 | 106 | def cross_check(groundtruth: list, reaction: list, mode: str): 107 | totalcount = ground_check(groundtruth) 108 | execute_score = 30 if executable_check(reaction) else 0 109 | 110 | task_detail_score, totalcount = file_compare( 111 | groundtruth, totalcount, reaction, mode 112 | ) 113 | task_score = [ 114 | max(x / y, 0) if y > 0 else 1 for x, y in zip(task_detail_score, totalcount) 115 | ] 116 | final_score = execute_score + sum(task_score) * 70 / 6 117 | return final_score, task_detail_score, totalcount 118 | 119 | 120 | def file_compare(groundtruth: list, totalcount: list, reaction: list, mode: str): 121 | sub_task_details = np.zeros(6) 122 | ground_list = copy.deepcopy(groundtruth) 123 | react_list = copy.deepcopy(reaction) 124 | for file in ground_list: 125 | possible_score = [] 126 | score_details = [] 127 | steps = list(file.keys()) 128 | temp_totalcount = [] 129 | if react_list: 130 | for pre_file in react_list: 131 | sub_task_score = np.zeros(6) 132 | if pre_file["change_maker"]: 133 | if ( 134 | pre_file["filepath"] == file["filepath"] 135 | and file["filepath"][0] != "missing" 136 | ): 137 | sub_task_score[0] += 1 138 | if pre_file["filepath"][1] and pre_file["filepath"][0] != "missing": 139 | sub_task_score[2] += 1 140 | if file["save"]: 141 | save_value = [ 142 | ( 143 | True 144 | if x 145 | and x["save_path"][0] == file["save"][0]["save_path"][0] 146 | else False 147 | ) 148 | for x in pre_file["save"] 149 | ] 150 | if any(save_value): 151 | sub_task_score[0] += 1 152 | save_validity = [x["save_path"][1] for x in file["save"]] 153 | sub_task_score[2] += min(save_validity.count(True), 1) 154 | sub_task_score[2] -= save_validity.count(False) 155 | else: 156 | save_value = [True if x else None for x in pre_file["save"]] 157 | if any(save_value): 158 | sub_task_score[0] -= len(save_value) 159 | sub_task_score[2] -= len(save_value) 160 | for step in steps: 161 | if step in ["filepath", "save", "pages", "annotation"]: 162 | continue 163 | sub_task_score += call_compare( 164 | file[step], totalcount, pre_file[step], mode 165 | ) 166 | delta_task_score, tem_count = plan_compare( 167 | file, totalcount, pre_file, mode 168 | ) 169 | sub_task_score += delta_task_score 170 | temp_totalcount.append(copy.deepcopy(tem_count)) 171 | comprehensive_score = task_grade(tem_count, list(sub_task_score)) 172 | score_details.append(copy.deepcopy(sub_task_score)) 173 | possible_score.append(copy.deepcopy(comprehensive_score)) 174 | index = possible_score.index(max(possible_score)) 175 | sub_task_details = sub_task_details + score_details[index] 176 | react_list.pop(index) 177 | totalcount = temp_totalcount[index] 178 | if react_list: 179 | sub_task_details -= ground_check(react_list) 180 | return sub_task_details, totalcount 181 | 182 | 183 | def task_grade(totalcount: list, sub_task_score: list): 184 | scores = [y / x if x != 0 else 1 for x, y in zip(totalcount, sub_task_score)] 185 | comprehensive_score = sum(scores) * 70 / 6 186 | return max(comprehensive_score, 0) 187 | 188 | 189 | def fun_validity_check(call): 190 | function_validity = True 191 | if call["arguments_value"]: 192 | for x in call["arguments_value"]: 193 | if x[1][0] and not x[1][1]: 194 | function_validity = False 195 | return function_validity 196 | 197 | 198 | def call_compare(ground_sequence, totalcount, pre_sequence, mode): 199 | ground_list = copy.deepcopy(ground_sequence) 200 | pre_list = copy.deepcopy(pre_sequence) 201 | sub_task_details = np.zeros(6) 202 | if ground_list: 203 | for ground in ground_list: 204 | possible_score = [] 205 | score_details = [] 206 | if pre_list: 207 | for pre in pre_list: 208 | sub_task_score = np.zeros(6) 209 | for x, y in zip(ground["arguments_value"], pre["arguments_value"]): 210 | if x[1][1]: 211 | if x[0] == "action": 212 | sub_task_score[0] += similarity(x[1][0], y[1][0]) 213 | argu_value = False 214 | elif mode == "precise": 215 | argu_value = True if x[1][0] == y[1][0] else False 216 | else: 217 | if x[0] in ground["vaguly_defined_arguments"]: 218 | argu_value = True if y[1][1] else False 219 | else: 220 | argu_value = True if x[1][0] == y[1][0] else False 221 | if x[0] in ground["first_defined_arguments"]: 222 | sub_task_score[0] += 1 if argu_value else 0 223 | if x[0] in ground["transfered_arguments"]: 224 | sub_task_score[1] += 1 if argu_value else 0 225 | else: 226 | if x[0] in ground["first_defined_arguments"]: 227 | sub_task_score[0] -= 1 if y[1][0] else 0 228 | if x[0] in ground["transfered_arguments"]: 229 | sub_task_score[1] += 1 if y[1][0] else 0 230 | sub_task_score[2] += 1 if fun_validity_check(pre) else 0 231 | sub_task_score += tool_compare( 232 | ground["tool_callings"], pre["tool_callings"] 233 | ) 234 | comprehensive_score = task_grade(totalcount, list(sub_task_score)) 235 | score_details.append(sub_task_score) 236 | possible_score.append(comprehensive_score) 237 | index = possible_score.index(max(possible_score)) 238 | sub_task_details = sub_task_details + score_details[index] 239 | pre_list.pop(index) 240 | if pre_list: 241 | for last_pre in pre_list: 242 | sub_task_details[2] -= 1 243 | for argu in last_pre["arguments_value"]: 244 | if argu[1][1]: 245 | if argu[0] in last_pre["first_defined_arguments"]: 246 | sub_task_details[0] -= 1 247 | if argu[0] in last_pre["transfered_arguments"]: 248 | sub_task_details[1] -= 1 249 | if len(last_pre["tool_callings"]) == 1: 250 | sub_task_details[2] -= 1 251 | sub_task_details[3] -= 1 252 | elif len(last_pre["tool_callings"]) > 1: 253 | sub_task_details[2] -= len(last_pre["tool_callings"]) 254 | sub_task_details[4] -= 1 255 | return sub_task_details 256 | 257 | 258 | def tool_compare(ground_tool: list, pre_tool: list): 259 | ground_list = copy.deepcopy(ground_tool) 260 | pre_list = copy.deepcopy(pre_tool) 261 | sub_task_details = np.zeros(6) 262 | if len(ground_list) == 1: 263 | if len(pre_list) == 1: 264 | sub_task_details[3] += ( 265 | 1 if ground_list[0]["tool_name"] == pre_list[0]["tool_name"] else 0 266 | ) 267 | if ground_list[0]["arguments_value"]: 268 | for arg in ground_list[0]["arguments_value"]: 269 | if arg[1][1] and arg in pre_list[0]["arguments_value"]: 270 | sub_task_details[0] += 1 271 | sub_task_details[2] += 1 if fun_validity_check(pre_list[0]) else 0 272 | else: 273 | if ground_list[0] in pre_list: 274 | if ground_list[0]["arguments_value"]: 275 | for arg in ground_list[0]["arguments_value"]: 276 | if arg[1][1]: 277 | sub_task_details[0] += 1 278 | pre_list.pop(pre_list.index(ground_list[0])) 279 | sub_task_details[2] -= len(pre_list) 280 | else: 281 | if ground_list: 282 | ground_tool_name = [x["tool_name"] for x in ground_list] 283 | pre_tool_name = [x["tool_name"] for x in pre_list] 284 | if ground_tool_name == pre_tool_name: 285 | sub_task_details[4] += 1 286 | for ground in ground_list: 287 | if ground in pre_list: 288 | if ground["arguments_value"]: 289 | for arg in ground["arguments_value"]: 290 | if arg[1][1]: 291 | sub_task_details[0] += 1 292 | validity_tool_calling = [fun_validity_check(x) for x in pre_list] 293 | sub_task_details[2] += min(validity_tool_calling.count(True), len(ground_list)) 294 | sub_task_details[2] -= max( 295 | validity_tool_calling.count(True) - len(ground_list), 0 296 | ) 297 | return sub_task_details 298 | 299 | 300 | def plan_compare(file: dict, totalcount: list, pre_file: dict, mode: str): 301 | ground_change_maker = copy.deepcopy(file["change_maker"]) 302 | ground_post_maker = copy.deepcopy(file["post_change_maker"]) 303 | pre_change_maker = copy.deepcopy(pre_file["change_maker"]) 304 | pre_post_maker = copy.deepcopy(pre_file["post_change_maker"]) 305 | ground_recorder = copy.deepcopy(file["recorder"]) 306 | pre_recorder = copy.deepcopy(pre_file["recorder"]) 307 | sub_task_details = np.zeros(6) 308 | if len(pre_change_maker) > len(ground_change_maker): 309 | totalcount[5] += len(pre_change_maker) - len(ground_change_maker) 310 | if len(pre_recorder) > len(ground_recorder): 311 | totalcount[5] += len(pre_recorder) - len(ground_recorder) 312 | 313 | def estimate_plan(ground_list, pre_list, is_recorder=False): 314 | for ground in ground_list: 315 | if pre_list: 316 | for pre in pre_list: 317 | plan_check = True 318 | for x, y in zip(ground["arguments_value"], pre["arguments_value"]): 319 | if mode == "precise": 320 | if x[1][0] != y[1][0]: 321 | plan_check = False 322 | break 323 | else: 324 | if x[0] in ground["vaguly_defined_arguments"]: 325 | if x[1][1] != y[1][1]: 326 | plan_check = False 327 | break 328 | else: 329 | if x[1][0] != y[1][0]: 330 | plan_check = False 331 | break 332 | if ground["tool_callings"] != pre["tool_callings"]: 333 | plan_check = False 334 | if plan_check: 335 | sub_task_details[5] += 0.5 if not is_recorder else 1 336 | pre_list.pop(pre_list.index(pre)) 337 | break 338 | return pre_list 339 | 340 | if ground_change_maker: 341 | estimate_plan(ground_change_maker, pre_change_maker) 342 | if ground_post_maker: 343 | pre_post_maker = estimate_plan(ground_post_maker, pre_post_maker) 344 | post_count = len(ground_change_maker) - len(ground_post_maker) 345 | sub_task_details[5] += max((post_count - len(pre_post_maker)), 0) * 0.5 346 | if ground_recorder: 347 | estimate_plan(ground_recorder, pre_recorder, True) 348 | return sub_task_details, totalcount 349 | -------------------------------------------------------------------------------- /utils/testf/functions.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import copy 3 | import re 4 | import fitz 5 | import sys 6 | import os 7 | 8 | sys.path.append(os.path.abspath('../..')) 9 | 10 | from DrafterBench.utils.testf.test_types import ( 11 | extrracted_anno, 12 | fontlist, 13 | TEXT_ALIGN_CENTER, 14 | TEXT_ALIGN_JUSTIFY, 15 | TEXT_ALIGN_RIGHT, 16 | TEXT_ALIGN_LEFT, 17 | fileobject, 18 | ) 19 | 20 | 21 | align_list = [ 22 | TEXT_ALIGN_CENTER, 23 | TEXT_ALIGN_LEFT, 24 | TEXT_ALIGN_RIGHT, 25 | TEXT_ALIGN_JUSTIFY, 26 | fitz.TEXT_ALIGN_CENTER, 27 | fitz.TEXT_ALIGN_JUSTIFY, 28 | fitz.TEXT_ALIGN_LEFT, 29 | fitz.TEXT_ALIGN_RIGHT, 30 | 0, 31 | 1, 32 | 2, 33 | 3, 34 | "center", 35 | "left", 36 | "right", 37 | "justify", 38 | ] 39 | 40 | global taskinformation 41 | taskinformation = [] 42 | 43 | tool_calling_format = { 44 | "tool_name": "tool_name", 45 | "arguments_value": [], 46 | "necessary_arguments": [], 47 | "transfered_arguments": [], 48 | "first_defined_arguments": [], 49 | "vaguly_defined_arguments": [], 50 | "save": False, 51 | } 52 | 53 | 54 | def convert_num(num_in_str): 55 | try: 56 | d = ast.literal_eval(num_in_str) 57 | except Exception as e: 58 | d = num_in_str 59 | return d 60 | 61 | 62 | def validity_check(argu: any, tar_type: list): 63 | return True if type(argu) in tar_type else False 64 | 65 | 66 | def doctrack(doc: any) -> fileobject: 67 | if not isinstance(doc, fileobject): 68 | doc = fileobject() 69 | filepath = doc.fileblock["filepath"] 70 | global taskinformation 71 | exsistfile = [x["filepath"] for x in taskinformation] 72 | if filepath not in exsistfile: 73 | taskinformation.append(doc.fileblock) 74 | return doc 75 | 76 | 77 | def page_track(doc: fileobject, pagenumber: any = "Unknown"): 78 | exsist_pages = doc.fileblock["pages"] 79 | page = ( 80 | pagenumber, 81 | ( 82 | True 83 | if type(pagenumber) is int or pagenumber in ["missing", "Missing"] 84 | else False 85 | ), 86 | ) 87 | if page in exsist_pages: 88 | page_index = exsist_pages.index(page) 89 | else: 90 | doc.fileblock["pages"].append(page) 91 | page_index = len(doc.fileblock["pages"]) - 1 92 | return page, page_index 93 | 94 | 95 | def rect_track(doc: fileobject, clip: any = "Unknown"): 96 | if isinstance(clip, extrracted_anno): 97 | rect = clip 98 | rect_infro = ( 99 | str(clip.doc[0]) 100 | + "-" 101 | + str(clip.page[0]) 102 | + "-" 103 | + str(clip.order_or_annocolor[0]), 104 | ( 105 | True 106 | if all([clip.doc[1], clip.page[1], clip.order_or_annocolor[1]]) 107 | else False 108 | ), 109 | ) 110 | elif ( 111 | type(clip) is list 112 | and type(clip[0]) is dict 113 | and list(clip[0].keys()) == ["data_name", "position_arguments"] 114 | ): 115 | rect = ( 116 | clip[0]["position_arguments"][0][1], 117 | clip[0]["position_arguments"][1][1], 118 | clip[0]["position_arguments"][2][1], 119 | ) 120 | rect_infro = ( 121 | str(rect[0][0]) + "-" + str(rect[1][0]) + "-" + str(rect[2][0]), 122 | True if all([rect[0][1], rect[1][1], rect[2][1]]) else False, 123 | ) 124 | else: 125 | rect = extrracted_anno() 126 | rect_infro = ( 127 | extrracted_anno().doc[0] 128 | + "-" 129 | + extrracted_anno().page[0] 130 | + "-" 131 | + extrracted_anno().order_or_annocolor[0], 132 | False, 133 | ) 134 | if rect in doc.fileblock["annotation"]: 135 | rect_index = doc.fileblock["annotation"].index(rect) 136 | else: 137 | doc.fileblock["annotation"].append(rect) 138 | rect_index = len(doc.fileblock["annotation"]) - 1 139 | return rect_infro, rect_index 140 | 141 | 142 | def class_track(doc: fileobject, class_block: dict, instance: str): 143 | exsist_instances = [ 144 | {k: v for k, v in x.items() if k != "tool_callings"} 145 | for x in doc.fileblock[instance] 146 | if type(x) is dict 147 | ] 148 | block = {k: v for k, v in class_block.items() if k != "tool_callings"} 149 | if block in exsist_instances: 150 | tracked_index = exsist_instances.index(block) 151 | else: 152 | doc.fileblock[instance].append(class_block) 153 | tracked_index = len(doc.fileblock[instance]) - 1 154 | return doc.fileblock[instance][tracked_index] 155 | 156 | 157 | def tool_track(class_block: dict, tool_block: dict): 158 | exsist_tools = class_block["tool_callings"] 159 | if tool_block in exsist_tools: 160 | tracked_index = class_block["tool_callings"].index(tool_block) 161 | else: 162 | class_block["tool_callings"].append(tool_block) 163 | tracked_index = len(class_block["tool_callings"]) - 1 164 | return class_block["tool_callings"][tracked_index] 165 | 166 | 167 | def tool_calling_update(tool_name: str, class_block: dict, tool_block=None): 168 | tool_block = copy.deepcopy(tool_calling_format) if not tool_block else tool_block 169 | tool_block["tool_name"] = tool_name 170 | tool_track(class_block, tool_block) 171 | 172 | 173 | def open(filepath: any = "Unknow"): 174 | doc = fileobject(filepath) 175 | taskinformation.append(doc.fileblock) 176 | return doc 177 | 178 | 179 | class basic: 180 | def __init__( 181 | self, 182 | doc: any = "Unknown", 183 | pagenumber: any = "Unknown", 184 | ): 185 | self.doc = doctrack(doc) 186 | self.page, self.page_index = page_track(self.doc, pagenumber) 187 | 188 | 189 | class extractanno: 190 | def __init__( 191 | self, doc: any = "Unknow", pagenumber: any = "Unknow", annocolor: any = "Unknow" 192 | ): 193 | self.doc = doctrack(doc) 194 | self.page = (pagenumber, True if type(pagenumber) in [int] else False) 195 | self.annoclolor = (annocolor, True if type(annocolor) is str else False) 196 | self.extractor_block = { 197 | "operation": "instantiate_extractor", 198 | "arguments_value": [("doc", self.doc.fileblock["filepath"])], 199 | "necessary_arguments": ["doc"], 200 | "transfered_arguments": ["doc"], 201 | "first_defined_arguments": [], 202 | "vaguly_defined_arguments": [], 203 | "tool_callings": [], 204 | } 205 | self.extractor_block = class_track(self.doc, self.extractor_block, "extractor") 206 | 207 | def getclip_rfpoint( 208 | self, pagenumber: any = "Unknow", rectangleorder: any = "Unknow" 209 | ): 210 | page = (pagenumber, True if type(pagenumber) is int else False) 211 | rect = (rectangleorder, True if type(rectangleorder) is int else False) 212 | tool_block = copy.deepcopy(tool_calling_format) 213 | key_list = ["page", "order"] 214 | argulist = [("page", page), ("order", rect)] 215 | tool_block["arguments_value"] = argulist 216 | tool_block["first_defined_arguments"].extend(key_list) 217 | tool_calling_update("exract_rect", self.extractor_block, tool_block) 218 | extracted_rectangle_rfpoint = extrracted_anno( 219 | self.doc.fileblock["filepath"], page, rect 220 | ) 221 | return extracted_rectangle_rfpoint, extracted_rectangle_rfpoint 222 | 223 | def anno(self): 224 | tool_block = copy.deepcopy(tool_calling_format) 225 | key_list = ["page", "order"] 226 | argulist = [("page", self.page), ("order", self.annoclolor)] 227 | tool_block["arguments_value"] = argulist 228 | tool_block["first_defined_arguments"].extend(key_list) 229 | tool_calling_update("exract_anno", self.extractor_block, tool_block) 230 | extracteddrawing = extrracted_anno( 231 | self.doc.fileblock["filepath"], self.page, self.annoclolor 232 | ) 233 | selected_vectors = [ 234 | { 235 | "data_name": "vectors", 236 | "position_arguments": [ 237 | ("doc", self.doc.fileblock["filepath"]), 238 | ("page", self.page), 239 | ("clip", self.annoclolor), 240 | ], 241 | } 242 | ] 243 | selected_vectors[0]["data_name"] = "drawings" 244 | return selected_vectors 245 | 246 | 247 | class basic_selector(basic): 248 | def __init__( 249 | self, doc: any = "Unknown", pagenumber: any = "Unknow", clip: any = "Uknown" 250 | ): 251 | super().__init__(doc, pagenumber) 252 | self.rect, self.rect_index = rect_track(self.doc, clip) 253 | self.selector_block = { 254 | "operation": "instantiate_selector", 255 | "arguments_value": [ 256 | ("doc", self.doc.fileblock["filepath"]), 257 | ("page", self.page), 258 | ("clip", self.rect), 259 | ], 260 | "necessary_arguments": ["doc", "page", "clip"], 261 | "transfered_arguments": ["doc", "page", "clip"], 262 | "first_defined_arguments": [], 263 | "vaguly_defined_arguments": [], 264 | "tool_callings": [], 265 | } 266 | self.selected_vectors = [ 267 | { 268 | "data_name": "all_lines", 269 | "position_arguments": [ 270 | ("doc", self.doc.fileblock["filepath"]), 271 | ("page", self.page), 272 | ("clip", self.rect), 273 | ], 274 | } 275 | ] 276 | 277 | def mode1_drawings_Window_Cover_Enclosure(self): 278 | tool_calling_update("select_mode1_drawings", self.selector_block) 279 | selected_vectors = copy.deepcopy(self.selected_vectors) 280 | selected_vectors[0]["data_name"] = "mode1_drawings" 281 | return selected_vectors 282 | 283 | def mode2_drawings_Cross_Touch_Intersect(self): 284 | tool_calling_update("select_mode2_drawings", self.selector_block) 285 | selected_vectors = copy.deepcopy(self.selected_vectors) 286 | selected_vectors[0]["data_name"] = "mode2_drawings" 287 | return selected_vectors 288 | 289 | def mode1_lines_Window_Cover_Enclosure(self): 290 | tool_calling_update("select_mode1_lines", self.selector_block) 291 | selected_vectors = copy.deepcopy(self.selected_vectors) 292 | selected_vectors[0]["data_name"] = "mode1_lines" 293 | return selected_vectors 294 | 295 | def mode2_lines_Cross_Touch_Intersect(self): 296 | tool_calling_update("select_mode2_lines", self.selector_block) 297 | selected_vectors = copy.deepcopy(self.selected_vectors) 298 | selected_vectors[0]["data_name"] = "mode2_lines" 299 | return selected_vectors 300 | 301 | def mode1_rebars_Window_Cover_Enclosure(self): 302 | tool_calling_update("select_mode1_rebars", self.selector_block) 303 | selected_vectors = copy.deepcopy(self.selected_vectors) 304 | selected_vectors[0]["data_name"] = "mode1_rebars" 305 | return selected_vectors 306 | 307 | def mode2_rebars_Cross_Touch_Intersect(self): 308 | tool_calling_update("select_mode2_rebars", self.selector_block) 309 | selected_vectors = copy.deepcopy(self.selected_vectors) 310 | selected_vectors[0]["data_name"] = "mode2_rebars" 311 | return selected_vectors 312 | 313 | def mode1_columns_Window_Cover_Enclosure(self): 314 | tool_calling_update("select_mode1_columns", self.selector_block) 315 | selected_vectors = copy.deepcopy(self.selected_vectors) 316 | selected_vectors[0]["data_name"] = "mode1_columns" 317 | return selected_vectors 318 | 319 | def mode2_columns_Cross_Touch_Intersect(self): 320 | tool_calling_update("select_mode2_columns", self.selector_block) 321 | selected_vectors = copy.deepcopy(self.selected_vectors) 322 | selected_vectors[0]["data_name"] = "mode2_columns" 323 | return selected_vectors 324 | 325 | 326 | class selector(basic_selector): 327 | def __init__( 328 | self, doc: any = "Unknown", pagenumber: any = "Unknow", clip: any = "Uknown" 329 | ): 330 | super().__init__(doc, pagenumber, clip) 331 | self.selector_block = class_track(self.doc, self.selector_block, "selector") 332 | self.selected_lines = self.selected_vectors 333 | 334 | 335 | class select_from_drawings(basic_selector): 336 | def __init__( 337 | self, 338 | doc: any = "Unknown", 339 | pagenumber: any = "Unknown", 340 | cdrawings: any = "Unknown", 341 | ): 342 | super().__init__(doc, pagenumber, cdrawings) 343 | self.anno, self.anno_index = rect_track(self.doc, cdrawings) 344 | self.selector_block = { 345 | "operation": "instantiate_selector", 346 | "arguments_value": [ 347 | ("doc", self.doc.fileblock["filepath"]), 348 | ("page", self.page), 349 | ("cdrawings", self.anno), 350 | ], 351 | "necessary_arguments": ["doc", "page", "cdrawings"], 352 | "transfered_arguments": ["doc", "page", "cdrawings"], 353 | "first_defined_arguments": [], 354 | "vaguly_defined_arguments": [], 355 | "tool_callings": [], 356 | } 357 | self.selected_vectors = [ 358 | { 359 | "data_name": "vectors", 360 | "position_arguments": [ 361 | ("doc", self.doc.fileblock["filepath"]), 362 | ("page", self.page), 363 | ("clip", self.anno), 364 | ], 365 | } 366 | ] 367 | self.selector_block = class_track( 368 | self.doc, self.selector_block, "select_from_drawings" 369 | ) 370 | 371 | def get_rebar_column(self): 372 | tool_calling_update("select_rebar_and_columns", self.selector_block) 373 | selected_vectors = copy.deepcopy(self.selected_vectors) 374 | selected_vectors[0]["data_name"] = "rebars_and_columns" 375 | return selected_vectors 376 | 377 | 378 | class manipulate_text(basic): 379 | def __init__( 380 | self, 381 | doc="Unknown", 382 | pagenumber="Unknown", 383 | clip="Unknown", 384 | text="Unknown", 385 | font=None, 386 | fontsize=None, 387 | textcolor=None, 388 | fill=None, 389 | rotate=None, 390 | align=None, 391 | ): 392 | super().__init__(doc, pagenumber) 393 | self.rect, _ = rect_track(self.doc, clip) 394 | self._text = (text, validity_check(text, [str])) 395 | self.font = (font, True if font in fontlist else False) 396 | self.fontsize = (fontsize, validity_check(fontsize, [int])) 397 | self.textcolor = (textcolor, validity_check(textcolor, [str])) 398 | self.fill = (fill, validity_check(fill, [str])) 399 | self.rotate = (rotate, validity_check(rotate, [int, float])) 400 | self.align = (align, True if align in align_list else False) 401 | self.text_manipulator_block = { 402 | "operation": "instantiate_text_manipulator", 403 | "arguments_value": [ 404 | ("doc", self.doc.fileblock["filepath"]), 405 | ("page", self.page), 406 | ("clip", self.rect), 407 | ("text", self._text), 408 | ("font", self.font), 409 | ("fontsize", self.fontsize), 410 | ("textcolor", self.textcolor), 411 | ("fill", self.fill), 412 | ("rotate", self.rotate), 413 | ("align", self.align), 414 | ], 415 | "necessary_arguments": ["doc", "page", "clip", "text"], 416 | "transfered_arguments": ["doc", "page", "clip"], 417 | "first_defined_arguments": [ 418 | "text", 419 | "font", 420 | "fontsize", 421 | "textcolor", 422 | "fill", 423 | "rotate", 424 | "align", 425 | ], 426 | "vaguly_defined_arguments": [ 427 | "font", 428 | "fontsize", 429 | "textcolor", 430 | "fill", 431 | "rotate", 432 | "align", 433 | ], 434 | "tool_callings": [], 435 | } 436 | self.text_manipulator_block = class_track( 437 | self.doc, self.text_manipulator_block, "change_maker" 438 | ) 439 | 440 | @property 441 | def text(self): 442 | return self._text 443 | 444 | @text.setter 445 | def text(self, te): 446 | self._text = (te, validity_check(te, [str])) 447 | self._update() 448 | 449 | def _update(self): 450 | updated_text = ("text", self._text) 451 | self.text_manipulator_block["arguments_value"] = [ 452 | updated_text if x[0] == "text" else x 453 | for x in self.text_manipulator_block["arguments_value"] 454 | ] 455 | 456 | def addtext(self): 457 | tool_calling_update("add_text", self.text_manipulator_block) 458 | return self.doc 459 | 460 | def gethortext(self): 461 | tool_calling_update("get_hor_text", self.text_manipulator_block) 462 | return "horizontal_text" 463 | 464 | def getvertext(self): 465 | tool_calling_update("get_ver_text", self.text_manipulator_block) 466 | return "vertical_text" 467 | 468 | def deletetext(self, deltex=None): 469 | tool_block = copy.deepcopy(tool_calling_format) 470 | argulist = [ 471 | ("deltex", (deltex, True if not deltex or type(deltex) is str else False)) 472 | ] 473 | tool_block["arguments_value"] = argulist 474 | tool_block["first_defined_arguments"].append("deltex") 475 | tool_calling_update("delete_text", self.text_manipulator_block, tool_block) 476 | return self.text[0] 477 | 478 | def replacetext(self, retext=None, totext=None): 479 | tool_block = copy.deepcopy(tool_calling_format) 480 | key_list = ["retext", "totext"] 481 | argulist = [ 482 | ("retext", (retext, validity_check(retext, [str]))), 483 | ("totext", (totext, validity_check(totext, [str]))), 484 | ] 485 | tool_block["arguments_value"] = argulist 486 | tool_block["necessary_arguments"].append("totext") 487 | tool_block["first_defined_arguments"].extend(key_list) 488 | tool_calling_update("replace_text", self.text_manipulator_block, tool_block) 489 | return self.text[0] 490 | 491 | 492 | class extract_table(basic): 493 | def __init__( 494 | self, doc: any = "Unknown", pagenumber: any = "Unknow", clip: any = "Uknown" 495 | ): 496 | super().__init__(doc, pagenumber) 497 | self.rect, self.rect_index = rect_track(self.doc, clip) 498 | self.table_extractor_block = { 499 | "operation": "instantiate_table_extractor", 500 | "arguments_value": [ 501 | ("doc", self.doc.fileblock["filepath"]), 502 | ("page", self.page), 503 | ("clip", self.rect), 504 | ], 505 | "necessary_arguments": ["doc", "page", "clip"], 506 | "transfered_arguments": ["doc", "page", "clip"], 507 | "first_defined_arguments": [], 508 | "vaguly_defined_arguments": [], 509 | "tool_callings": [], 510 | } 511 | self.data = [ 512 | [ 513 | { 514 | "data_name": "table_data", 515 | "position_arguments": [ 516 | ("doc", self.doc.fileblock["filepath"]), 517 | ("page", self.page), 518 | ("clip", self.rect), 519 | ], 520 | } 521 | ] 522 | ] 523 | self.table_extractor_block = class_track( 524 | self.doc, self.table_extractor_block, "table_extractor" 525 | ) 526 | 527 | 528 | def data_arrange_check(data): 529 | if data: 530 | format_check = ( 531 | [True if not x or type(x) is list else False for x in data] 532 | if type(data) is list 533 | else [False] 534 | ) 535 | value_check = ( 536 | [[convert_num(y) for y in x] if type(x) is list else x for x in data] 537 | if type(data) is list 538 | else [False] 539 | ) 540 | else: 541 | format_check = [None] 542 | value_check = None 543 | return (value_check, True if all(format_check) else False) 544 | 545 | 546 | class manipulate_table(basic): 547 | def __init__( 548 | self, 549 | doc="Unknown", 550 | pagenumber="Unknown", 551 | clip="Unknown", 552 | data="Unknown", 553 | arrange=None, 554 | font=None, 555 | fontsize=None, 556 | borderwidth=None, 557 | align=None, 558 | ): 559 | super().__init__(doc, pagenumber) 560 | self.rect, self.rect_index = rect_track(self.doc, clip) 561 | self.data = data_arrange_check(data) 562 | self.arrange = data_arrange_check(arrange) 563 | self.font = (font, True if font in fontlist else False) 564 | self.font_size = (fontsize, validity_check(fontsize, [int])) 565 | self.border_width = (borderwidth, validity_check(borderwidth, [int, float])) 566 | self.align = ( 567 | align, 568 | True if align in ["center", "left", "right", "justify"] else False, 569 | ) 570 | self.table_manipulator_block = { 571 | "operation": "instantiate_table_manipulator", 572 | "arguments_value": [ 573 | ("doc", self.doc.fileblock["filepath"]), 574 | ("page", self.page), 575 | ("clip", self.rect), 576 | ("data", self.data), 577 | ("arrange", self.arrange), 578 | ("font", self.font), 579 | ("font_size", self.font_size), 580 | ("border_width", self.border_width), 581 | ("align", self.align), 582 | ], 583 | "necessary_arguments": ["doc", "page", "clip", "data"], 584 | "transfered_arguments": ["doc", "page", "clip"], 585 | "first_defined_arguments": [ 586 | "arrange", 587 | "font", 588 | "font_size", 589 | "border_width", 590 | "align", 591 | ], 592 | "vaguly_defined_arguments": ["font", "font_size", "border_width", "align"], 593 | "tool_callings": [], 594 | } 595 | try: 596 | if type(self.data[0][0]) is dict: 597 | self.table_manipulator_block["transfered_arguments"].append("data") 598 | else: 599 | self.table_manipulator_block["first_defined_arguments"].append("data") 600 | except Exception as e: 601 | self.table_manipulator_block["first_defined_arguments"].append("data") 602 | self.table_manipulator_block = class_track( 603 | self.doc, self.table_manipulator_block, "change_maker" 604 | ) 605 | 606 | def addtable(self): 607 | tool_calling_update("add_table", self.table_manipulator_block) 608 | return self.doc 609 | 610 | def cuttable(self, delrow=None, delcolumn=None): 611 | tool_block = copy.deepcopy(tool_calling_format) 612 | key_list = ["delrow", "delcolumn"] 613 | argulist = [ 614 | ("delrow", (delrow, validity_check(delrow, [list]))), 615 | ("delcolumn", (delcolumn, validity_check(delcolumn, [list]))), 616 | ] 617 | tool_block["arguments_value"] = argulist 618 | tool_block["first_defined_arguments"].extend(key_list) 619 | tool_calling_update("cut_table", self.table_manipulator_block, tool_block) 620 | return self.data 621 | 622 | def emptytable(self, startcell=None, endcell=None): 623 | tool_block = copy.deepcopy(tool_calling_format) 624 | key_list = ["start_cell", "end_cell"] 625 | argulist = [ 626 | ("start_cell", (startcell, validity_check(startcell, [list]))), 627 | ("end_cell", (endcell, validity_check(endcell, [list]))), 628 | ] 629 | tool_block["arguments_value"] = argulist 630 | tool_block["first_defined_arguments"].extend(key_list) 631 | tool_calling_update("empty_table", self.table_manipulator_block, tool_block) 632 | return self.data 633 | 634 | def modifytable(self, startcell=None, endcell=None, repdata="Unknown"): 635 | tool_block = copy.deepcopy(tool_calling_format) 636 | key_list = ["start_cell", "end_cell", "rep_data"] 637 | argulist = [ 638 | ("start_cell", (startcell, validity_check(startcell, [list]))), 639 | ("end_cell", (endcell, validity_check(endcell, [list]))), 640 | ("rep_data", (repdata, validity_check(repdata, [list]))), 641 | ] 642 | tool_block["arguments_value"] = argulist 643 | tool_block["necessary_arguments"].append("rep_data") 644 | tool_block["first_defined_arguments"].extend(key_list) 645 | tool_calling_update("modify_table", self.table_manipulator_block, tool_block) 646 | return self.data 647 | 648 | 649 | def drawing_list_check(list_of_draw): 650 | draw_chcek = ( 651 | [ 652 | ( 653 | True 654 | if type(x) is dict 655 | and list(x.keys()) == ["data_name", "position_arguments"] 656 | else False 657 | ) 658 | for x in list_of_draw 659 | ] 660 | if type(list_of_draw) is list 661 | else [False] 662 | ) 663 | return (copy.deepcopy(list_of_draw), True if all(draw_chcek) else False) 664 | 665 | 666 | class draw_drawer(basic): 667 | def __init__( 668 | self, 669 | doc: any = "Unknown", 670 | pagenumber: any = "Unknown", 671 | listofcdraw: any = "Unknown", 672 | ): 673 | super().__init__(doc, pagenumber) 674 | self.listofdraw = drawing_list_check(listofcdraw) 675 | self.drawer_block = { 676 | "operation": "instantiate_drawer", 677 | "arguments_value": [ 678 | ("doc", self.doc.fileblock["filepath"]), 679 | ("page", self.page), 680 | ("listofdraw", self.listofdraw), 681 | ], 682 | "necessary_arguments": ["doc", "page", "listofdraw"], 683 | "transfered_arguments": ["doc", "page", "listofdraw"], 684 | "first_defined_arguments": [], 685 | "vaguly_defined_arguments": [], 686 | "tool_callings": [], 687 | } 688 | self.drawer_block = class_track(self.doc, self.drawer_block, "change_maker") 689 | 690 | def delete_draw(self): 691 | tool_calling_update("delete_drawings", self.drawer_block) 692 | return self.doc 693 | 694 | 695 | class delete(basic): 696 | def __init__( 697 | self, doc: any = "Unknown", pagenumber: any = "Unknown", clip: any = "Unknown" 698 | ): 699 | super().__init__(doc, pagenumber) 700 | self.rect, self.rect_index = rect_track(self.doc, clip) 701 | self.cleaner_block = { 702 | "operation": "instantiate_cleaner", 703 | "arguments_value": [ 704 | ("doc", self.doc.fileblock["filepath"]), 705 | ("page", self.page), 706 | ("clip", self.rect), 707 | ], 708 | "necessary_arguments": ["doc", "page", "clip"], 709 | "transfered_arguments": ["doc", "page", "clip"], 710 | "first_defined_arguments": [], 711 | "vaguly_defined_arguments": [], 712 | "tool_callings": [], 713 | } 714 | self.cleaner_block = class_track(self.doc, self.cleaner_block, "change_maker") 715 | 716 | def applydelete(self): 717 | tool_calling_update("clean_drawings", self.cleaner_block) 718 | return self.doc 719 | 720 | 721 | class repairer(basic): 722 | def __init__( 723 | self, 724 | doc: any = "Unknown", 725 | pagenumber: any = "Unknown", 726 | clip: any = "Unknown", 727 | sel_drawings: any = "Unknown", 728 | cdrawings: any = "Unknown", 729 | ): 730 | super().__init__(doc, pagenumber) 731 | self.rect, self.rect_index = rect_track(self.doc, clip) 732 | self.sel_drawings = drawing_list_check(sel_drawings) 733 | self.cdrawings = drawing_list_check(cdrawings) 734 | self.repairer_block = { 735 | "operation": "instantiate_repairer", 736 | "arguments_value": [ 737 | ("doc", self.doc.fileblock["filepath"]), 738 | ("page", self.page), 739 | ("clip", self.rect), 740 | ("sel_drawings", self.sel_drawings), 741 | ("cdrawings", self.cdrawings), 742 | ], 743 | "necessary_arguments": ["doc", "page", "clip", "sel_drawings", "cdrawings"], 744 | "transfered_arguments": [ 745 | "doc", 746 | "page", 747 | "clip", 748 | "sel_drawings", 749 | "cdrawings", 750 | ], 751 | "first_defined_arguments": [], 752 | "vaguly_defined_arguments": [], 753 | "tool_callings": [], 754 | } 755 | self.repairer_block = class_track( 756 | self.doc, self.repairer_block, "post_change_maker" 757 | ) 758 | 759 | def del_repair(self): 760 | tool_calling_update("repair_drawings", self.repairer_block) 761 | return self.doc 762 | 763 | 764 | def dashes_check(dashes: any) -> tuple: 765 | dashchecker = re.compile(r"\s*(\[\s*\d*\s+\d*\s*\]\s*\d*)") 766 | dashvalue = dashchecker.search(dashes) if dashes else None 767 | dash = dashvalue.group(0) if dashvalue else None 768 | return (dash, True if dash else False) 769 | 770 | 771 | class manipulate_draw(basic): 772 | def __init__( 773 | self, 774 | doc: any = "Unknown", 775 | pagenumber: any = "Unknown", 776 | sel_drawings: any = "Unknown", 777 | fillcolor=None, 778 | drwcolor=None, 779 | dashes=None, 780 | closePath=None, 781 | lineJoin=None, 782 | lineCap=None, 783 | width=None, 784 | ): 785 | super().__init__(doc, pagenumber) 786 | self.sel_drawings = drawing_list_check(sel_drawings) 787 | self.fillcolor = (fillcolor, validity_check(fillcolor, [str])) 788 | self.drwcolor = (drwcolor, validity_check(drwcolor, [str])) 789 | self.dashes = dashes_check(dashes) 790 | self.closePath = (closePath, validity_check(closePath, [bool])) 791 | self.lineJoin = (lineJoin, True if lineJoin in [0, 1, 2] else False) 792 | self.lineCap = (lineCap, True if lineCap in [0, 1, 2] else False) 793 | self.width = (width, validity_check(width, [int, float])) 794 | self.drawing_manipulator_block = { 795 | "operation": "instantiate_drawing_manipulator", 796 | "arguments_value": [ 797 | ("doc", self.doc.fileblock["filepath"]), 798 | ("page", self.page), 799 | ("sel_drawings", self.sel_drawings), 800 | ("fillcolor", self.fillcolor), 801 | ("drwcolor", self.drwcolor), 802 | ("dashes", self.dashes), 803 | ("closePath", self.closePath), 804 | ("lineJoin", self.lineJoin), 805 | ("lineCap", self.lineCap), 806 | ("width", self.width), 807 | ], 808 | "necessary_arguments": ["doc", "page", "sel_drawings"], 809 | "transfered_arguments": ["doc", "page", "sel_drawings"], 810 | "first_defined_arguments": [ 811 | "fillcolor", 812 | "drwcolor", 813 | "dashes", 814 | "closePath", 815 | "lineJoin", 816 | "lineCap", 817 | "width", 818 | ], 819 | "vaguly_defined_arguments": [ 820 | "fillcolor", 821 | "drwcolor", 822 | "dashes", 823 | "closePath", 824 | "lineJoin", 825 | "lineCap", 826 | "width", 827 | ], 828 | "tool_callings": [], 829 | } 830 | self.drawing_manipulator_block = class_track( 831 | self.doc, self.drawing_manipulator_block, "change_maker" 832 | ) 833 | 834 | def update_draw(self): 835 | tool_calling_update("update_drawings", self.drawing_manipulator_block) 836 | return self.doc 837 | 838 | def add_standrawing(self): 839 | tool_calling_update("add_standrawings", self.drawing_manipulator_block) 840 | return self.doc 841 | 842 | 843 | def missing_structure(missing_information): 844 | file = re.compile("\s*file-level\s*:\s*(?P[^,\s]*)").search( 845 | missing_information 846 | ) 847 | page = re.compile("\s*page-level\s*:\s*(?P[^,\s]*)").search( 848 | missing_information 849 | ) 850 | order = re.compile("\s*order-level\s*:\s*(?P[^,\s]*)").search( 851 | missing_information 852 | ) 853 | base = re.compile("\s*base-level\s*:\s*(?P.*)").search(missing_information) 854 | complete = re.compile("\(incomplete\)").search(missing_information) 855 | file_information = file.group("filename") if file else "Unknown" 856 | page_information = convert_num(page.group("pagenumber")) if page else "Unknown" 857 | order_information = ( 858 | ( 859 | convert_num(order.group("order")), 860 | ( 861 | True 862 | if order.group("order") == "missing" 863 | or type(convert_num(order.group("order"))) in [int, str] 864 | else False 865 | ), 866 | ) 867 | if order 868 | else "Unknown" 869 | ) 870 | action_information = (base.group("base"), True) if base else ("Unknow", False) 871 | complete = ("non_complete", True) if complete else (None, False) 872 | return ( 873 | file_information, 874 | page_information, 875 | order_information, 876 | action_information, 877 | complete, 878 | ) 879 | 880 | 881 | class recorder: 882 | def __init__(self, missing_information): 883 | """file-level: K3R1M8F.pdf, page-level: 5, order-level: 1, base-level: add a new text with rotation of 270 degrees and font size 12.(incomplete)""" 884 | self.file, self.page, self.order, self.action, self.complete = ( 885 | missing_structure(missing_information) 886 | ) 887 | self.doc = open(self.file) 888 | self.page, self.page_index = page_track(self.doc, self.page) 889 | self.rect, self.rect_index = rect_track( 890 | self.doc, 891 | extrracted_anno(self.doc.fileblock["filepath"], self.page, self.order), 892 | ) 893 | self.recorder_block = { 894 | "operation": "instantiate_recorder", 895 | "arguments_value": [ 896 | ("doc", self.doc.fileblock["filepath"]), 897 | ("page", self.page), 898 | ("clip", self.rect), 899 | ("action", self.action), 900 | ("complete", self.complete), 901 | ], 902 | "necessary_arguments": ["doc", "page", "clip", "action", "complete"], 903 | "transfered_arguments": [], 904 | "first_defined_arguments": ["doc", "page", "clip", "action", "complete"], 905 | "vaguly_defined_arguments": [], 906 | "tool_callings": [], 907 | } 908 | self.recorder_block = class_track(self.doc, self.recorder_block, "recorder") 909 | 910 | def recording(self): 911 | tool_calling_update("record", self.recorder_block) 912 | 913 | 914 | def move_check(move): 915 | if type(move) == list and len(move) == 3: 916 | value_check = [True if type(x) in [int, float] else False for x in move[:2]] 917 | value_check.append(True if type(move[2]) == extrracted_anno else False) 918 | else: 919 | value_check = [False] 920 | if value_check[-1]: 921 | move[2] = ( 922 | str(move[2].doc[0]) 923 | + "-" 924 | + str(move[2].page[0]) 925 | + "-" 926 | + str(move[2].order_or_annocolor[0]) 927 | ) 928 | return (move, True if all(value_check) else False) 929 | 930 | 931 | def rotate_check(rotate): 932 | if type(rotate) == list and len(rotate) == 3: 933 | value_check = [True] if rotate[0] == "r" else [False] 934 | value_check.append(True if type(rotate[1]) in [int, float] else False) 935 | value_check.append(True if type(rotate[2]) == extrracted_anno else False) 936 | else: 937 | value_check = [False] 938 | if value_check[-1]: 939 | rotate[2] = ( 940 | str(rotate[2].doc[0]) 941 | + "-" 942 | + str(rotate[2].page[0]) 943 | + "-" 944 | + str(rotate[2].order_or_annocolor[0]) 945 | ) 946 | return (rotate, True if all(value_check) else False) 947 | 948 | 949 | def scal_check(scal): 950 | if type(scal) == list and len(scal) == 3: 951 | value_check = [True] if scal[0] == "sc" else [False] 952 | value_check.append( 953 | True if type(scal[1]) is list and len(scal[1]) == 2 else False 954 | ) 955 | value_check.append(True if type(scal[2]) == extrracted_anno else False) 956 | else: 957 | value_check = [False] 958 | if value_check[-1]: 959 | scal[2] = ( 960 | str(scal[2].doc[0]) 961 | + "-" 962 | + str(scal[2].page[0]) 963 | + "-" 964 | + str(scal[2].order_or_annocolor[0]) 965 | ) 966 | return (scal, True if all(value_check) else False) 967 | 968 | 969 | class project_basic(basic): 970 | def __init__( 971 | self, 972 | doc: any = "Unknown", 973 | pagenumber: any = "Unknown", 974 | clip: any = "Unknown", 975 | move=None, 976 | rotation=None, 977 | scal=None, 978 | ): 979 | super().__init__( 980 | doc, 981 | pagenumber, 982 | ) 983 | self.rect, self.rect_index = rect_track(self.doc, clip) 984 | self.move = (move, move_check(move)) 985 | self.rotation = (rotation, rotate_check(rotation)) 986 | self.scal = (scal, scal_check(scal)) 987 | self.project_block = { 988 | "operation": "instantiate_projector", 989 | "arguments_value": [ 990 | ("doc", self.doc.fileblock["filepath"]), 991 | ("page", self.page), 992 | ("clip", self.rect), 993 | ("move", self.move), 994 | ("rotation", self.rotation), 995 | ("scal", self.scal), 996 | ], 997 | "necessary_arguments": [ 998 | "doc", 999 | "page", 1000 | "clip", 1001 | ["move", "rotation", "scal"], 1002 | ], 1003 | "transfered_arguments": ["doc", "page", "clip"], 1004 | "first_defined_arguments": ["move", "rotation", "scal"], 1005 | "vaguly_defined_arguments": ["move", "rotation", "scal"], 1006 | "tool_callings": [], 1007 | } 1008 | 1009 | def project(self): 1010 | tool_calling_update("project", self.project_block) 1011 | return self.doc 1012 | 1013 | 1014 | class Projector(project_basic): 1015 | def __init__( 1016 | self, 1017 | doc: any = "Unknown", 1018 | pagenumber: any = "Unknown", 1019 | clip: any = "Unknown", 1020 | move=None, 1021 | rotation=None, 1022 | scal=None, 1023 | ): 1024 | super().__init__(doc, pagenumber, clip, move, rotation, scal) 1025 | self.project_block = class_track(self.doc, self.project_block, "change_maker") 1026 | 1027 | 1028 | class project_draw(project_basic): 1029 | def __init__( 1030 | self, 1031 | doc: any = "Unknown", 1032 | pagenumber: any = "Unknown", 1033 | clip: any = "Unknown", 1034 | sel_drawings=None, 1035 | cdrawings=None, 1036 | move=None, 1037 | rotation=None, 1038 | scal=None, 1039 | ): 1040 | super().__init__(doc, pagenumber, clip, move, rotation, scal) 1041 | self.sel_drawings = drawing_list_check(sel_drawings) 1042 | self.cdrawings = drawing_list_check(cdrawings) 1043 | self.project_block = { 1044 | "operation": "instantiate_draw_projector", 1045 | "arguments_value": [ 1046 | ("doc", self.doc.fileblock["filepath"]), 1047 | ("page", self.page), 1048 | ("clip", self.rect), 1049 | ("move", self.move), 1050 | ("rotation", self.rotation), 1051 | ("scal", self.scal), 1052 | ("sel_drawings", self.sel_drawings), 1053 | ("cdrawings", self.cdrawings), 1054 | ], 1055 | "necessary_arguments": [ 1056 | "doc", 1057 | "page", 1058 | "clip", 1059 | ["move", "rotation", "scal"], 1060 | "sel_drawings", 1061 | "cdrawings", 1062 | ], 1063 | "transfered_arguments": [ 1064 | "doc", 1065 | "page", 1066 | "clip", 1067 | "sel_drawings", 1068 | "cdrawings", 1069 | ], 1070 | "first_defined_arguments": ["move", "rotation", "scal"], 1071 | "vaguly_defined_arguments": ["move", "rotation", "scal"], 1072 | "tool_callings": [], 1073 | } 1074 | self.project_block = class_track(self.doc, self.project_block, "change_maker") 1075 | --------------------------------------------------------------------------------