├── core ├── __init__.py ├── interface.py └── backend.py ├── evaluate.sh ├── run.sh ├── roles ├── __init__.py ├── instruction.py ├── analyst.py ├── tester.py ├── rule_descriptions_actc.py ├── rule_descriptions_act.py ├── coder.py ├── project_tester.py ├── ui_designer.py ├── project_roles.py ├── web_visualization_specialist.py ├── enhanced_role.py └── project_architect.py ├── tools ├── __init__.py ├── global_tool_orchestrator.py └── enhanced_tools.py ├── LICENSE ├── README.md ├── evaluate ├── all_evaluate.py ├── execute │ ├── execution.py │ └── _execution.py └── evaluation.py ├── run_project.sh ├── project_main.py ├── utils.py ├── main.py ├── session.py └── project_session.py /core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /evaluate.sh: -------------------------------------------------------------------------------- 1 | python evaluate/all_evaluate.py --input_path humaneval_output_0301.jsonl 2 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | python main.py --dataset humaneval --signature --model gpt-3.5-turbo --output_path humaneval_output.jsonl 2 | -------------------------------------------------------------------------------- /roles/__init__.py: -------------------------------------------------------------------------------- 1 | from .analyst import Analyst 2 | from .coder import Coder 3 | from .tester import Tester 4 | # from .reviewer import Reviewer 5 | 6 | -------------------------------------------------------------------------------- /roles/instruction.py: -------------------------------------------------------------------------------- 1 | INSTRUCTPLAN = "The plan from the requirement analyst is as following:\n{report}" 2 | INSTRUCTREPORT = "The report from the tester is as following:\n{report}" 3 | INSTRUCTCODE = "Please implement the following code. Use ```python to put the Python code in markdown quotes:\n{requirement}" 4 | INSTRUCTEST = "The code provided by developer is as follows:\n{code}\n" 5 | 6 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | # Simple tools module for project mode 2 | from .global_tool_orchestrator import GlobalToolOrchestrator 3 | from .simple_tools import ( 4 | CodeAnalyzer, FileManager, QualityChecker, 5 | APIIntegrationTool, AutomatedTester, 6 | code_analyzer, file_manager, quality_checker, 7 | api_integration_tool, automated_tester 8 | ) 9 | 10 | # Create global instance 11 | global_tool_orchestrator = GlobalToolOrchestrator() 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 YihongDong 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /roles/analyst.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import argparse 5 | import tqdm 6 | import time 7 | 8 | from core import interface 9 | from utils import code_truncate, construct_system_message 10 | 11 | 12 | class Analyst(object): 13 | def __init__(self, TEAM, ANALYST, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512, 14 | temperature=0.0, top_p=0.95): 15 | self.model = model 16 | self.majority = majority 17 | self.max_tokens = max_tokens 18 | self.temperature = temperature 19 | self.top_p = top_p 20 | self.history_message = [] 21 | 22 | self.itf = interface.ProgramInterface( 23 | stop='', 24 | verbose=False, 25 | model = self.model, 26 | ) 27 | 28 | system_message = construct_system_message(requirement, ANALYST, TEAM) 29 | self.history_message_append(system_message) 30 | 31 | 32 | def analyze(self): 33 | try: 34 | responses = self.itf.run(prompt=self.history_message, majority_at = self.majority, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 35 | except Exception as e: 36 | print(e) 37 | print("analyze fail") 38 | time.sleep(5) 39 | return "error" 40 | 41 | plan = responses[0] 42 | 43 | self.history_message_append(plan, "assistant") 44 | 45 | return plan 46 | 47 | def history_message_append(self, system_message, role="user"): 48 | self.history_message.append({ 49 | "role": role, 50 | "content": system_message 51 | }) 52 | 53 | 54 | -------------------------------------------------------------------------------- /roles/tester.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import argparse 5 | import tqdm 6 | import time 7 | 8 | from core import interface 9 | from utils import code_truncate, construct_system_message 10 | from roles.instruction import INSTRUCTEST 11 | 12 | 13 | class Tester(object): 14 | def __init__(self, TEAM, TESTER, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512, 15 | temperature=0.0, top_p=0.95): 16 | self.model = model 17 | self.majority = majority 18 | self.max_tokens = max_tokens 19 | self.temperature = temperature 20 | self.top_p = top_p 21 | self.history_message = [] 22 | 23 | self.itf = interface.ProgramInterface( 24 | stop='', 25 | verbose=False, 26 | model = self.model, 27 | ) 28 | 29 | system_message = construct_system_message(requirement, TESTER, TEAM) 30 | self.history_message_append(system_message) 31 | 32 | 33 | def test(self, code): 34 | instruction = INSTRUCTEST.format(code=code) 35 | self.history_message.append({ 36 | "role": "user", 37 | "content": instruction 38 | }) 39 | 40 | try: 41 | responses = self.itf.run(prompt=self.history_message, majority_at = self.majority, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 42 | except Exception as e: 43 | print(e) 44 | print("test fail") 45 | time.sleep(5) 46 | return "error" 47 | 48 | report = responses[0] 49 | 50 | self.history_message_append(report, "assistant") 51 | 52 | return report 53 | 54 | def history_message_append(self, system_message, role="user"): 55 | self.history_message.append({ 56 | "role": role, 57 | "content": system_message 58 | }) 59 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Self-collaboration Code Generation via ChatGPT 2 | [![arXiv](https://img.shields.io/badge/arXiv-2304.07590-b31b1b.svg)](https://arxiv.org/abs/2304.07590) 3 | 4 | The first LLM-based agent for (Function-Level and Project-Level) code generation in software engineering, released in April 2023, has been accepted to TOSEM. 5 | 6 | ### Function-Level Generation 7 | ```bash 8 | # Generate function-level code 9 | bash run.sh 10 | # Evaluate results 11 | bash evaluate.sh 12 | ``` 13 | 14 | ### Project-Level Generation 15 | ```bash 16 | # Generate project-level code 17 | bash run_project.sh 18 | 19 | ``` 20 | 21 | #### 📋 Usage Examples 22 | 23 | ##### 1. Portfolio Website 24 | ```bash 25 | python main.py --mode project \ 26 | --project_type web_visualization \ 27 | --requirement "Create a personal portfolio with project showcase and contact form" \ 28 | --output_dir "my_portfolio" 29 | ``` 30 | 31 | ##### 2. Interactive Dashboard 32 | ```bash 33 | python main.py --mode project \ 34 | --project_type web_visualization \ 35 | --requirement "Create a sales analytics dashboard with charts, filters, and real-time updates" \ 36 | --output_dir "sales_dashboard" 37 | ``` 38 | 39 | ##### 3. Data Visualization App 40 | ```bash 41 | python main.py --mode project \ 42 | --project_type web_visualization \ 43 | --requirement "Create an interactive data explorer with multiple chart types" \ 44 | --output_dir "data_explorer" 45 | ``` 46 | 47 | ### Citation 48 | ``` 49 | @article{Self-collaboration, 50 | author={Dong, Yihong and Jiang, Xue and Jin, Zhi and Li, Ge}, 51 | title = {Self-collaboration Code Generation via ChatGPT}, 52 | journal = {{ACM} Trans. Softw. Eng. Methodol.}, 53 | volume = {33}, 54 | number = {7}, 55 | pages = {189:1--189:38}, 56 | year = {2024} 57 | keywords = {Code generation, large language models, multi-agent collaboration, software development, software engineering} 58 | } 59 | ``` 60 | -------------------------------------------------------------------------------- /roles/rule_descriptions_actc.py: -------------------------------------------------------------------------------- 1 | ANALYST = '''I want you to act as a requirement analyst on our development team. Given a user requirement, your task is to analyze, decompose, and develop a high-level and concise plan to guide our developer in writing programs. The plan should include the following information: 2 | 1. Decompose the requirement into several easy-to-solve subproblems that can be more easily implemented by the developer. 3 | 2. Develop a high-level plan that outlines the major steps of the program. 4 | Remember, you only need to provide the concise plan in json. 5 | ''' 6 | 7 | PYTHON_DEVELOPER = '''I want you to act as a Python developer on our development team. You will receive plans from a requirement analyst or test reports from a tester. Your job is split into two parts: 8 | 1. If you receive a plan from a requirement analyst, write code in Python that meets the requirement following the plan. Ensure that the code you write is efficient, readable, and follows best practices. 9 | 2. If you receive a test report from a tester, write the fixed or improved code based on the content of the report. Ensure that any changes made to the code do not introduce new bugs or negatively impact the performance of the code. 10 | Remember, you only need to provide the code in Python and do not need to explain the code you wrote. 11 | ''' 12 | 13 | TESTER = '''I want you to act as a tester on our development team. You will receive the code written by the developer, and your job is as follows: 14 | 1. Write the test code that starts with "def check(candidate):" and candidate is a 'function' object. 15 | 2. Call candidate with different inputs (up to five) that starts with "print", and do not write assert statements. 16 | Remember, you only need to provide the test code in Python and avoid using assert statements. 17 | ''' 18 | 19 | TEAM = '''There is a development team that includes a requirement analyst, a Python developer, and a tester. The team needs to develop programs that satisfy the requirement of the users. The different roles have different divisions of labor and need to cooperate with each others. 20 | ''' 21 | -------------------------------------------------------------------------------- /core/interface.py: -------------------------------------------------------------------------------- 1 | import io 2 | import signal 3 | from contextlib import redirect_stdout 4 | from typing import Any, Callable, List, Optional 5 | from collections import Counter 6 | 7 | from .backend import call_chatgpt 8 | 9 | 10 | class timeout: 11 | def __init__(self, seconds=1, error_message='Timeout'): 12 | self.seconds = seconds 13 | self.error_message = error_message 14 | def timeout_handler(self, signum, frame): 15 | raise TimeoutError(self.error_message) 16 | def __enter__(self): 17 | signal.signal(signal.SIGALRM, self.timeout_handler) 18 | signal.alarm(self.seconds) 19 | def __exit__(self, type, value, traceback): 20 | signal.alarm(0) 21 | 22 | class ProgramInterface: 23 | 24 | def __init__( 25 | self, 26 | model: str = 'code-davinci-002', 27 | stop: str = '\n\n', 28 | get_answer_symbol: Optional[str] = None, 29 | get_answer_expr: Optional[str] = None, 30 | get_answer_from_stdout: bool = False, 31 | verbose: bool = False 32 | ) -> None: 33 | 34 | self.model = model 35 | self.history = [] 36 | self.stop = stop 37 | self.answer_symbol = get_answer_symbol 38 | self.answer_expr = get_answer_expr 39 | self.get_answer_from_stdout = get_answer_from_stdout 40 | self.verbose = verbose 41 | 42 | def clear_history(self): 43 | self.history = [] 44 | 45 | def process_generation_to_code(self, gens: str): 46 | return [g.split('\n') for g in gens] 47 | 48 | def generate(self, prompt: str, temperature: float =0.0, top_p: float =1.0, 49 | max_tokens: int =512, majority_at: int =None, echo: bool =False, return_logprobs: bool =False): 50 | 51 | if 'davinci' not in self.model: 52 | gens = call_chatgpt(prompt, model=self.model, stop=self.stop, 53 | temperature=temperature, top_p=top_p, max_tokens=max_tokens, echo=echo, majority_at=majority_at) 54 | 55 | return gens 56 | 57 | def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 58 | max_tokens: int =512, majority_at: int =None, echo=False, return_logprobs: bool =False): 59 | code_snippets = self.generate(prompt, majority_at=majority_at, temperature=temperature, top_p=top_p, max_tokens=max_tokens, echo=echo, return_logprobs=return_logprobs) 60 | 61 | return code_snippets 62 | -------------------------------------------------------------------------------- /evaluate/all_evaluate.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import copy 4 | import argparse 5 | import sys 6 | from pathlib import Path 7 | sys.path.append(str(Path(__file__).resolve().parents[1])) 8 | 9 | from utils import build_test_method, find_method_name, code_split, prompt_split_humaneval 10 | from execute.execution import evaluate_with_test_code, evaluate_with_test_code_T 11 | from evaluation import pass_at_K, AvgPassRatio 12 | from datasets import load_dataset, load_from_disk 13 | 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('--dataset', type=str, default='humaneval') 16 | parser.add_argument('--lang', type=str, default='python') 17 | parser.add_argument('--input_path', type=str, default='humaneval_output_240415.jsonl') 18 | parser.add_argument('--output_path', type=str, default='outputs/test_eval.jsonl') 19 | args = parser.parse_args() 20 | 21 | INPUTPATH = args.input_path 22 | OUTPUT_PATH = args.output_path 23 | 24 | if args.dataset == 'humaneval': 25 | dataset = load_dataset("openai_humaneval") 26 | dataset_key = ["test"] 27 | 28 | 29 | with open(INPUTPATH, 'r') as f: 30 | except_list = [] 31 | handled_solutions = [json.loads(line) for line in f if json.loads(line)["task_id"] not in except_list] 32 | print(len(handled_solutions)) 33 | 34 | for solution in handled_solutions: 35 | solution["generation"] = solution['prompt'] + solution["completion"] 36 | solution["prompt"] = "" 37 | solution["entry_point"] = find_method_name(solution["generation"]) if find_method_name(solution["generation"]) else "candidate" 38 | solution["completion"] = solution["generation"] 39 | 40 | print(INPUTPATH) 41 | data_dict = {} 42 | for key in dataset_key: 43 | for idx, task in enumerate(dataset[key]): 44 | data_dict[task['task_id']] = task 45 | 46 | exec_result = evaluate_with_test_code(handled_solutions, timeout=10) 47 | print('pass@1:') 48 | pass_at_K(exec_result, k=[1]) 49 | 50 | if args.dataset == "humaneval": 51 | test_case_path= 'data/HumanEval_test_case_ET.jsonl' 52 | with open(test_case_path, 'r') as f: 53 | test_cases = [json.loads(line) for line in f] 54 | 55 | test_cases_dict = {} 56 | for case in test_cases: 57 | test = build_test_method(case['test_case_list'], "", case['entry_point']) 58 | test_cases_dict[case['task_id']] = test 59 | 60 | 61 | for solution in handled_solutions: 62 | solution['test'] =test_cases_dict[solution['task_id']] 63 | 64 | exec_result_T = evaluate_with_test_code(handled_solutions, timeout=10) 65 | 66 | print('pass@1 - ET:') 67 | pass_at_K(exec_result_T, k=[1]) -------------------------------------------------------------------------------- /roles/rule_descriptions_act.py: -------------------------------------------------------------------------------- 1 | ANALYST = '''I want you to act as a requirement analyst on our development team. Given a user requirement, your task is to analyze, decompose, and develop a high-level plan to guide our developer in writing programs. The plan should include the following information: 2 | 1. Decompose the requirement into several easy-to-solve subproblems that can be more easily implemented by the developer. 3 | 2. Develop a high-level plan that outlines the major steps of the program. 4 | Remember, your plan should be high-level and focused on guiding the developer in writing code, rather than providing implementation details. 5 | ''' 6 | 7 | DEVELOPER = '''I want you to act as a developer on our development team. You will receive plans from a requirements analyst or test reports from a reviewer. Your job is split into two parts: 8 | 1. If you receive a plan from a requirements analyst, write code in Python that meets the requirements following the plan. Ensure that the code you write is efficient, readable, and follows best practices. 9 | 2. If you receive a test report from a reviewer, fix or improve the code based on the content of the report. Ensure that any changes made to the code do not introduce new bugs or negatively impact the performance of the code. 10 | Remember, do not need to explain the code you wrote. You should provide a well-formed python code and your response should start with "```python\n". 11 | ''' 12 | 13 | TESTER = '''I want you to act as a tester in the team. You will receive the code written by the developer, and your job is to complete a report as follows: 14 | { 15 | "Code Review": Evaluate the structure and syntax of the code to ensure that it conforms to the specifications of the programming language, that the APIs used are correct, and that the code does not contain syntax errors or logic holes. 16 | "Code Description": Briefly describe what the code is supposed to do. This helps identify differences between the code implementation and the requirement. 17 | "Satisfying the requirements": Ture or False. This indicates whether the code satisfies the requirement. 18 | "Edge cases": Edge cases are scenarios where the code might not behave as expected or where inputs are at the extreme ends of what the code should handle. 19 | "Conclusion": "Code Test Passed" or "Code Test Failed". This is a summary of the test results. 20 | } 21 | ''' 22 | 23 | TEAM = '''There is a development team that includes a requirements analyst, a developer, and a quality assurance reviewer. The team needs to develop programs that satisfy the requirements of the users. The different roles have different divisions of labor and need to cooperate with each others. 24 | ''' -------------------------------------------------------------------------------- /roles/coder.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | import time 4 | import copy 5 | import json 6 | import argparse 7 | import tqdm 8 | 9 | from core import interface 10 | from utils import code_truncate, construct_system_message 11 | from roles.instruction import INSTRUCTPLAN, INSTRUCTREPORT, INSTRUCTCODE 12 | 13 | class Coder(object): 14 | def __init__(self, TEAM, PYTHON_DEVELOPER, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512, 15 | temperature=0.0, top_p=0.95): 16 | self.model = model 17 | self.majority = majority 18 | self.max_tokens = max_tokens 19 | self.temperature = temperature 20 | self.top_p = top_p 21 | self.history_message = [] 22 | self.requirement = requirement 23 | 24 | self.itf = interface.ProgramInterface( 25 | stop='', 26 | verbose=False, 27 | model = self.model, 28 | ) 29 | 30 | system_message = construct_system_message(requirement, PYTHON_DEVELOPER, TEAM) 31 | 32 | self.history_message_append(system_message) 33 | 34 | def implement(self, report, is_init=False): 35 | self.construct_with_report(report, is_init) 36 | 37 | try: 38 | responses = self.itf.run(prompt=self.history_message, majority_at = self.majority, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 39 | except Exception as e: 40 | print(e) 41 | print("implement fail") 42 | time.sleep(5) 43 | return "error" 44 | 45 | if 'gpt' not in self.model: 46 | generation = responses[0][responses[0].find("def"):] 47 | tem = [s for s in generation.split('\n\n') if 'def ' in s or s[:1] == ' '] 48 | code = '\n\n'.join(tem).strip('```').strip() 49 | else: 50 | code = code_truncate(responses[0]) 51 | 52 | self.history_message = self.history_message[:-1] 53 | self.history_message_append(code, "assistant") 54 | 55 | return code 56 | 57 | def history_message_append(self, system_message, role="user"): 58 | self.history_message.append({ 59 | "role": role, 60 | "content": system_message 61 | }) 62 | 63 | def construct_with_report(self, report, is_init=False): 64 | if report != "": 65 | if is_init: 66 | instruction = INSTRUCTPLAN.format(report=report.strip()) 67 | else: 68 | instruction = INSTRUCTREPORT.format(report=report.strip()) 69 | self.history_message_append(instruction) 70 | self.history_message_append(INSTRUCTCODE.format(requirement=self.requirement)) 71 | -------------------------------------------------------------------------------- /run_project.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Project-level code generation script 4 | 5 | echo "Self-Collaboration Project-Level Code Generation" 6 | echo "==============================================" 7 | 8 | # Example 1: Portfolio Website 9 | echo "Generating personal portfolio website..." 10 | python3 main.py --mode project \ 11 | --project_type web_visualization \ 12 | --requirement "Create a modern personal portfolio website with sections for about me, skills, projects showcase, and contact form. Include smooth scrolling, animations, and responsive design. Use a professional color scheme and modern typography." \ 13 | --output_dir "generated_portfolio" \ 14 | --model "gpt-3.5-turbo" \ 15 | --max_round 2 \ 16 | --max_tokens 4096 \ 17 | --temperature 0 18 | 19 | echo "" 20 | echo "Portfolio generation complete! Check the 'generated_portfolio' folder." 21 | echo "Open generated_portfolio/index.html in your browser to view the result." 22 | echo "" 23 | 24 | # Example 2: Web Visualization Dashboard 25 | echo "Generating interactive data visualization dashboard..." 26 | python3 main.py --mode project \ 27 | --project_type web_visualization \ 28 | --requirement "Create an interactive data visualization dashboard that displays sales data with multiple chart types (bar charts, line charts, pie charts). Include filters for date range and product categories. Make it responsive and visually appealing with modern UI design." \ 29 | --output_dir "generated_dashboard" \ 30 | --model "gpt-3.5-turbo" \ 31 | --max_round 2 \ 32 | --max_tokens 4096 \ 33 | --temperature 0 34 | 35 | echo "" 36 | echo "Dashboard generation complete! Check the 'generated_dashboard' folder." 37 | echo "Open generated_dashboard/index.html in your browser to view the result." 38 | echo "" 39 | 40 | 41 | # Example 3: Real-time Analytics Dashboard 42 | echo "Generating real-time analytics dashboard..." 43 | python3 main.py --mode project \ 44 | --project_type web_visualization \ 45 | --requirement "Create a real-time analytics dashboard for monitoring website traffic and user behavior. Include live charts for page views, user sessions, bounce rate, and geographic distribution. Add real-time notifications and customizable widgets." \ 46 | --output_dir "generated_analytics" \ 47 | --model "gpt-3.5-turbo" \ 48 | --max_round 2 \ 49 | --max_tokens 4096 \ 50 | --temperature 0 51 | 52 | echo "" 53 | echo "Analytics dashboard generation complete! Check the 'generated_analytics' folder." 54 | echo "Open generated_analytics/index.html in your browser to view the result." 55 | echo "" 56 | 57 | echo "==============================================" 58 | echo "All project generations completed!" 59 | echo "You can also run custom projects with:" 60 | echo "python main.py --mode universal --requirement 'Your custom requirement' --output_dir 'your_output_folder'" 61 | -------------------------------------------------------------------------------- /project_main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import argparse 5 | import tqdm 6 | 7 | from project_session import ProjectSession 8 | from utils import construct_system_message 9 | 10 | parser = argparse.ArgumentParser() 11 | parser.add_argument('--project_type', type=str, default='web_visualization', 12 | choices=['web_visualization', 'data_analysis', 'api_service', 'desktop_app']) 13 | parser.add_argument('--requirement', type=str, required=True, help='Project requirement description') 14 | parser.add_argument('--output_dir', type=str, default='generated_project') 15 | parser.add_argument('--model', type=str, default='gpt-3.5-turbo') 16 | parser.add_argument('--max_round', type=int, default=3) 17 | parser.add_argument('--max_tokens', type=int, default=1024) 18 | parser.add_argument('--majority', type=int, default=1) 19 | parser.add_argument('--temperature', type=float, default=0.2) 20 | parser.add_argument('--top_p', type=float, default=0.95) 21 | parser.add_argument('--verbose', action='store_true') 22 | args = parser.parse_args() 23 | 24 | 25 | if __name__ == '__main__': 26 | from roles.project_roles import PROJECT_TEAM, PROJECT_ARCHITECT, PROJECT_DEVELOPER, PROJECT_TESTER, UI_DESIGNER 27 | 28 | OUTPUT_DIR = args.output_dir 29 | 30 | # Create output directory 31 | os.makedirs(OUTPUT_DIR, exist_ok=True) 32 | 33 | try: 34 | # Initialize project session with enhanced roles 35 | session = ProjectSession( 36 | team_description=PROJECT_TEAM, 37 | architect_description=PROJECT_ARCHITECT, 38 | developer_description=PROJECT_DEVELOPER, 39 | tester_description=PROJECT_TESTER, 40 | ui_designer_description=UI_DESIGNER, 41 | requirement=args.requirement, 42 | project_type=args.project_type, 43 | model=args.model, 44 | majority=args.majority, 45 | max_tokens=args.max_tokens, 46 | temperature=args.temperature, 47 | top_p=args.top_p, 48 | max_round=args.max_round, 49 | output_dir=OUTPUT_DIR 50 | ) 51 | 52 | # Run project generation session 53 | project_files, session_history = session.run_project_session() 54 | 55 | # Save session history 56 | with open(os.path.join(OUTPUT_DIR, 'session_history.json'), 'w', encoding='utf-8') as f: 57 | json.dump(session_history, f, indent=2, ensure_ascii=False) 58 | 59 | print(f"Project generated successfully in: {OUTPUT_DIR}") 60 | print(f"Generated files: {list(project_files.keys())}") 61 | 62 | # If web project, provide instructions for running 63 | if args.project_type == 'web_visualization' and 'index.html' in project_files: 64 | print("\nTo view the web application:") 65 | print(f"Open {os.path.join(OUTPUT_DIR, 'index.html')} in your browser") 66 | 67 | except Exception as e: 68 | print(f"Project generation failed: {str(e)}") 69 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import ast 4 | import time 5 | import difflib 6 | import copy 7 | 8 | 9 | def code_truncate_regex(code): 10 | code_regex = r"```(.*?|)\n(?P.*?)```" 11 | match = re.search(code_regex, code, re.DOTALL) 12 | code = match.group("code") if match else "" 13 | return code 14 | 15 | def code_truncate(response): 16 | code = code_truncate_regex(response) 17 | if code == "": 18 | generation = response[response.find("def"):] 19 | tem = [s for s in generation.split('\n\n') if 'def ' in s or s[:1] == ' '] 20 | code = '\n\n'.join(tem).strip('```').strip() 21 | return code 22 | 23 | def prompt_split_humaneval(prompt, mehotd_name): 24 | prompt = prompt.strip() 25 | prompt = prompt.replace("\r\n", "\n") 26 | before_func = prompt[:prompt.rfind("def ")] 27 | code = prompt[prompt.rfind("def "):] 28 | 29 | comment_start_1 = re.search("\"\"\"", code) 30 | comment_start_2 = re.search("\'\'\'", code) 31 | if comment_start_1: 32 | comment_start = comment_start_1.end() 33 | elif comment_start_2: 34 | comment_start = comment_start_2.end() 35 | 36 | 37 | example_start_1 = re.search("[eE]xample(:)?", code) 38 | example_start_2 = re.search("[fF]or [eE]xamble(:)?", code) 39 | example_start_3 = re.search(">>>", code) 40 | example_start_4 = re.search(mehotd_name+"\(.+\)", code[comment_start:]) 41 | 42 | 43 | if example_start_1: 44 | comment = code[comment_start:example_start_1.start()] 45 | example = code[example_start_1.start():-4] 46 | elif example_start_2: 47 | comment = code[comment_start:example_start_2.start()] 48 | example = code[example_start_2.start():-4] 49 | elif example_start_3: 50 | comment = code[comment_start:example_start_3.start()] 51 | example = "Example:\n"+code[example_start_3.start():-4] 52 | elif example_start_4: 53 | comment = code[comment_start:example_start_4.start()+comment_start] 54 | example = "Example:\n"+code[example_start_4.start()+comment_start:-4] 55 | else: 56 | comment = code[comment_start:-4] 57 | example = "" 58 | comment = comment.strip().replace("\n", " ") 59 | comment = re.sub("\s+", " ", comment) 60 | 61 | example = re.sub("\n(\s)*","\n\t",example) 62 | test_case = "\t"+example.strip() 63 | signature = code[:code.index("\n")+1] 64 | 65 | return before_func, signature, comment, test_case 66 | 67 | def build_test_method(test_list, test_imports, method_name): 68 | if test_imports: 69 | test_imports = "\n".join(test_imports) 70 | test_method = test_imports + "\n" 71 | else: 72 | test_method = "" 73 | test_method = "def check(" + method_name + "):\n" 74 | if len(test_list) == 0: 75 | return test_method + "\treturn True" + "\n" 76 | for test in test_list: 77 | test_method += '\t' + test + "\n" 78 | return test_method.strip("\n") 79 | 80 | def find_method_name(code, lang="python"): 81 | try: 82 | parsed = ast.parse(code) 83 | function_defs = [node for node in parsed.body if isinstance(node, ast.FunctionDef)] 84 | if function_defs: 85 | if len(function_defs) == 1: 86 | method_name = function_defs[0].name 87 | else: 88 | method_name = function_defs[-1].name if function_defs[-1].name != "main" else function_defs[-2].name 89 | else: 90 | method_name = None 91 | except: 92 | method_name = None 93 | 94 | return method_name 95 | 96 | 97 | def code_split(func): 98 | ''' 99 | Split code into signature, comment and function body 100 | ''' 101 | func = func.replace("\r\n", "\n") 102 | before_func = func[:func.rfind("def ")] 103 | code = func[func.rfind("def "):] 104 | 105 | is_comment = False 106 | comments = [] 107 | 108 | statements = code.split("\n") 109 | for s_idx, s in enumerate(statements): 110 | s = s.strip() 111 | if s.startswith("def"): 112 | signature = statements[:s_idx+1] 113 | method_name = s.split("def ")[1].split("(")[0] 114 | func_body_idx = s_idx+1 115 | tmp_statement = statements[func_body_idx].strip() 116 | if not tmp_statement.startswith("'''"): 117 | break 118 | elif s.startswith("'''") and not is_comment: 119 | is_comment = True 120 | 121 | elif is_comment: 122 | if s.startswith("'''"): 123 | is_comment = False 124 | func_body_idx = s_idx+1 125 | break 126 | comments.append(s) 127 | func_body = statements[func_body_idx:] 128 | return method_name, "\n".join(signature), "\n".join(comments), "\n".join(func_body), before_func 129 | 130 | def construct_system_message(requirement, role, team=''): 131 | if team == '': 132 | system_message = "The requirement from users is: \n{'requirement':\n" + "'"+ requirement.replace('\n\n','\n').strip(".") + "'\n}\n\n" + role 133 | else: 134 | system_message = team + '\n '+ \ 135 | "The requirement from users is: \n{'requirement':\n" + "'"+ requirement.replace('\n\n','\n').strip(".") + "'\n}\n\n" + \ 136 | role 137 | 138 | return system_message 139 | -------------------------------------------------------------------------------- /evaluate/execute/execution.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import ctypes 5 | libgcc_s = ctypes.CDLL('libgcc_s.so.1') 6 | 7 | from collections import defaultdict 8 | from concurrent.futures import as_completed, ProcessPoolExecutor 9 | import logging 10 | 11 | from execute._execution import check_correctness, check_correctness_with_test_cases, check_correctness_T 12 | 13 | logging.basicConfig( 14 | format="SystemLog: [%(asctime)s][%(name)s][%(levelname)s] - %(message)s", 15 | datefmt="%Y-%m-%d %H:%M:%S", 16 | level=logging.INFO, 17 | ) 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | def evaluate_with_test_code( 22 | samples, 23 | timeout 24 | ): 25 | # logger.info(f'Start evaluation with test code, timeout={timeout}') 26 | # Check the generated samples against test suites. 27 | with ProcessPoolExecutor() as executor: 28 | 29 | futures = [] 30 | existed_completion = defaultdict(set) 31 | results = defaultdict(defaultdict) 32 | 33 | for sample in samples: 34 | task_id = sample["task_id"] 35 | prompt = sample['prompt'] 36 | test = sample['test'] 37 | entry_point = sample['entry_point'] 38 | completion = sample["completion"] 39 | if completion in existed_completion[task_id]: 40 | continue 41 | existed_completion[task_id].add(completion) 42 | args = (task_id, prompt, completion, test, entry_point, timeout) 43 | future = executor.submit(check_correctness, *args) 44 | futures.append(future) 45 | logger.info(f'{len(futures)} execution requests are submitted') 46 | 47 | for idx, future in enumerate(as_completed(futures)): 48 | # logger.info('[{}/{}] execution completed'.format(idx+1, len(futures))) 49 | result = future.result() 50 | results[result["task_id"]][result["completion"]] = result 51 | 52 | # logger.info('execution finished! start parsing results') 53 | samples_with_result = [] 54 | for sample in samples: 55 | task_id = sample["task_id"] 56 | completion = sample["completion"] 57 | result = results[task_id][completion] 58 | sample["result"] = result["result"] 59 | sample["passed"] = result["passed"] 60 | samples_with_result.append(sample) 61 | 62 | assert len(samples_with_result) == len(samples), "Some problems are not attempted." 63 | 64 | return samples_with_result 65 | 66 | def evaluate_with_test_cases( 67 | solutions, 68 | test_cases_dict, 69 | timeout, 70 | limit 71 | ): 72 | # logger.info(f'Start evaluation with test cases, timeout={timeout}, limit={limit}') 73 | # Check the generated solutions against test suites. 74 | with ProcessPoolExecutor() as executor: 75 | futures = [] 76 | results_list = [] 77 | existed_completion = defaultdict(set) 78 | 79 | for solution in solutions: 80 | task_id = solution['task_id'] 81 | prompt = solution['prompt'] 82 | completion = solution['completion'] 83 | if completion in existed_completion[task_id]: 84 | continue 85 | existed_completion[task_id].add(completion) 86 | task_test_cases = test_cases_dict[task_id] 87 | if not task_test_cases: 88 | continue 89 | # get limited test cases 90 | limited_task_test_cases = [cases_per_sample[:limit] for cases_per_sample in task_test_cases] 91 | limited_task_test_cases = sum(limited_task_test_cases, []) 92 | 93 | args = (task_id, prompt, completion, list(set(limited_task_test_cases)), timeout) 94 | future = executor.submit(check_correctness_with_test_cases, *args) 95 | futures.append(future) 96 | 97 | # logger.info(f'{len(futures)} execution requests are submitted') 98 | for idx, future in enumerate(as_completed(futures)): 99 | # logger.info('[{}/{}] execution completed'.format(idx+1, len(futures))) 100 | result = future.result() 101 | results_list.append(result) 102 | 103 | # logger.info('execution finished!') 104 | return results_list 105 | 106 | def evaluate_with_test_code_T( 107 | samples, 108 | timeout 109 | ): 110 | # logger.info(f'Start evaluation with test code, timeout={timeout}') 111 | # Check the generated samples against test suites. 112 | with ProcessPoolExecutor() as executor: 113 | 114 | futures = [] 115 | existed_completion = defaultdict(set) 116 | results = defaultdict(defaultdict) 117 | 118 | for sample in samples: 119 | task_id = sample["task_id"] 120 | prompt = sample['prompt'] 121 | test = sample['test_case_list'] 122 | entry_point = sample['entry_point'] 123 | completion = sample["completion"] 124 | if completion in existed_completion[task_id]: 125 | continue 126 | existed_completion[task_id].add(completion) 127 | args = (task_id, prompt, completion, test, entry_point, timeout) 128 | future = executor.submit(check_correctness_T, *args) 129 | futures.append(future) 130 | # logger.info(f'{len(futures)} execution requests are submitted') 131 | 132 | for idx, future in enumerate(as_completed(futures)): 133 | # logger.info('[{}/{}] execution completed'.format(idx+1, len(futures))) 134 | result = future.result() 135 | results[result["task_id"]][result["completion"]] = result 136 | 137 | # logger.info('execution finished! start parsing results') 138 | samples_with_result = [] 139 | for sample in samples: 140 | task_id = sample["task_id"] 141 | completion = sample["completion"] 142 | result = results[task_id][completion] 143 | sample["result"] = result["result"] 144 | sample["passed"] = result["passed"] 145 | samples_with_result.append(sample) 146 | 147 | assert len(samples_with_result) == len(samples), "Some problems are not attempted." 148 | 149 | return samples_with_result -------------------------------------------------------------------------------- /evaluate/evaluation.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | import statistics 5 | import numpy as np 6 | from collections import defaultdict 7 | import logging 8 | from typing import List, Union 9 | import itertools 10 | from utils import build_test_method 11 | 12 | logging.basicConfig( 13 | format="SystemLog: [%(asctime)s][%(name)s][%(levelname)s] - %(message)s", 14 | datefmt="%Y-%m-%d %H:%M:%S", 15 | level=logging.INFO, 16 | ) 17 | 18 | logger = logging.getLogger(__name__) 19 | 20 | def _dictionized_ground_truth_results(ground_truth_exec_results): 21 | ground_truth_results_by_task_and_solution = defaultdict(defaultdict) 22 | for result in ground_truth_exec_results: 23 | ground_truth_results_by_task_and_solution[result['task_id']][result['completion']] = result['passed'] 24 | return ground_truth_results_by_task_and_solution 25 | 26 | def _turn_solution_scores_into_choose_count(sorted_solution_scores, topk): 27 | wrapped = True if type(sorted_solution_scores[0][0]) == list else False 28 | result = [] 29 | if wrapped: 30 | last_score = sorted_solution_scores[0][1] 31 | merged_solutions_and_score = [sorted_solution_scores[0]] 32 | for solutions, score in sorted_solution_scores[1:]: 33 | if score == last_score: 34 | last_solutions = merged_solutions_and_score[-1][0] 35 | merged_solutions_and_score[-1] = (last_solutions + solutions, score) 36 | else: 37 | merged_solutions_and_score.append((solutions, score)) 38 | last_score = score 39 | for solutions_and_score in merged_solutions_and_score: 40 | result.append((solutions_and_score[0], 1)) # choose one from solutions_and_score 41 | else: 42 | topk_scores = sorted(list(set([i[1] for i in sorted_solution_scores])), reverse=True) 43 | for score in topk_scores: 44 | solutions = [s[0] for s in sorted_solution_scores if s[1] == score] 45 | result.append((solutions, 1)) 46 | 47 | if len(result) >= topk: 48 | return result[:topk] 49 | else: 50 | intial_choose_count = [1]*len(result) 51 | for i in range(topk-len(result)): 52 | intial_choose_count[i%len(result)] += 1 53 | for i, choose_count in enumerate(intial_choose_count): 54 | result[i] = (result[i][0], choose_count) 55 | return result 56 | 57 | 58 | def get_result_of_sorted_solutions(ground_truth_results_list, sorted_solutions_by_task, topks=[1,2,10]): 59 | # sorted_solutions_by_task {task_id: [([solutions], score), ...]} 60 | def _count_correct(solutions: list, ground_truth_results: dict) -> int: 61 | return sum([ground_truth_results[s] for s in solutions]) 62 | 63 | ground_truth_results = _dictionized_ground_truth_results(ground_truth_results_list) 64 | topk_results = dict() 65 | for topk in topks: 66 | random_pass_at_k_by_task = pass_at_K_by_task(ground_truth_results_list, k=topk) 67 | pass_rates = [] 68 | for task_id in ground_truth_results.keys(): 69 | all_wrong_probability = 1 70 | if task_id in sorted_solutions_by_task and sorted_solutions_by_task[task_id]: 71 | solutions_and_probability = _turn_solution_scores_into_choose_count(sorted_solutions_by_task[task_id], topk) 72 | for solutions, choose_count in solutions_and_probability: 73 | current_wrong_prob = _estimator(len(solutions), _count_correct(solutions, ground_truth_results[task_id]), 1) 74 | repeat_current_wrong_prob = pow(current_wrong_prob, choose_count) 75 | all_wrong_probability *= repeat_current_wrong_prob 76 | pass_rates.append(1-all_wrong_probability) 77 | else: 78 | pass_rates.append(random_pass_at_k_by_task[task_id]) 79 | 80 | # the avg rate of all tasks 81 | topk_results[f'pass@{topk}'] = round(statistics.mean(pass_rates), 4) 82 | logger.info(topk_results) 83 | 84 | def pass_at_K_by_task(results, k): 85 | result_dict = defaultdict(list) 86 | for line in results: 87 | result_dict[line['task_id']].append(line['passed']) 88 | result = dict() 89 | for task_id in result_dict.keys(): 90 | total = len(result_dict[task_id]) 91 | correct = sum(result_dict[task_id]) 92 | score = _estimate_pass_at_k(total, [correct], k)[0] 93 | result[task_id] = score 94 | return result 95 | 96 | def pass_at_K(results, k = [1, 10, 100]): 97 | def _turn_list_into_dict(result_lines): 98 | result_dict = defaultdict(list) 99 | for line in result_lines: 100 | result_dict[line['task_id']].append(line['passed']) 101 | return result_dict 102 | 103 | # Calculate pass@k. 104 | total, correct = [], [] 105 | for passed in _turn_list_into_dict(results).values(): 106 | total.append(len(passed)) 107 | correct.append(sum(passed)) 108 | 109 | total = np.array(total) 110 | correct = np.array(correct) 111 | 112 | ks = k 113 | pass_at_k = {f"pass@{k}": round(_estimate_pass_at_k(total, correct, k).mean(), 4) 114 | for k in ks if (total >= k).all()} 115 | logger.info(pass_at_k) 116 | 117 | def _estimator(n: int, c: int, k: int) -> float: 118 | """ 119 | Calculates comb(n - c, k) / comb(n, k). 120 | """ 121 | if n - c < k: 122 | return 0 123 | return np.prod(1.0 - k / np.arange(n - c + 1, n + 1)) 124 | 125 | def _estimate_pass_at_k( 126 | num_samples: Union[int, List[int], np.ndarray], 127 | num_correct: Union[List[int], np.ndarray], 128 | k: int 129 | ) -> np.ndarray: 130 | """ 131 | Estimates pass@k of each problem and returns them in an array. 132 | """ 133 | if isinstance(num_samples, int): 134 | num_samples_it = itertools.repeat(num_samples, len(num_correct)) 135 | else: 136 | assert len(num_samples) == len(num_correct) 137 | num_samples_it = iter(num_samples) 138 | 139 | return np.array([1.0 - _estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)]) 140 | 141 | 142 | 143 | 144 | def AvgPassRatio(handled_solutions): 145 | total = len(handled_solutions) 146 | correct = sum([1 for s in handled_solutions if s['passed']]) 147 | return correct/total 148 | 149 | 150 | -------------------------------------------------------------------------------- /core/backend.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from openai import OpenAI 3 | import time 4 | import os 5 | import httpx 6 | import json 7 | import tiktoken 8 | 9 | 10 | def count_tokens(messages, model="gpt-3.5-turbo"): 11 | """Calculate the number of tokens in messages""" 12 | try: 13 | encoding = tiktoken.encoding_for_model(model) 14 | except KeyError: 15 | encoding = tiktoken.get_encoding("cl100k_base") 16 | 17 | num_tokens = 0 18 | for message in messages: 19 | if isinstance(message, dict): 20 | # ChatGPT format 21 | num_tokens += 4 # every message follows {role/name}\n{content}\n 22 | for key, value in message.items(): 23 | if isinstance(value, str): 24 | num_tokens += len(encoding.encode(value)) 25 | if key == "name": # if there's a name, the role is omitted 26 | num_tokens += -1 # role is always required and always 1 token 27 | elif isinstance(message, str): 28 | num_tokens += len(encoding.encode(message)) 29 | 30 | num_tokens += 2 # every reply is primed with assistant 31 | return num_tokens 32 | 33 | 34 | def adjust_max_tokens(messages, model='gpt-3.5-turbo', desired_max_tokens=4096): 35 | """Intelligently adjust max_tokens to avoid exceeding model limits""" 36 | 37 | # Context length limits for different models 38 | model_limits = { 39 | 'gpt-3.5-turbo': 16385, 40 | 'gpt-3.5-turbo-0301': 4096, 41 | 'gpt-3.5-turbo-0613': 4096, 42 | 'gpt-3.5-turbo-16k': 16385, 43 | 'gpt-4': 8192, 44 | 'gpt-4-0314': 8192, 45 | 'gpt-4-0613': 8192, 46 | 'gpt-4-32k': 32768, 47 | 'gpt-4-1106-preview': 128000, 48 | 'gpt-4-turbo': 128000, 49 | } 50 | 51 | # Get the maximum context length for the model 52 | max_context_length = model_limits.get(model, 16385) # Default to gpt-3.5-turbo limit 53 | 54 | # Calculate input message token count 55 | input_tokens = count_tokens(messages, model) 56 | 57 | # Calculate available tokens, leaving some margin 58 | available_tokens = max_context_length - input_tokens - 100 # Leave 100 tokens margin 59 | 60 | # Adjust max_tokens 61 | if available_tokens <= 0: 62 | print(f"⚠️ Warning: Input too long ({input_tokens} tokens), truncating...") 63 | # If input is too long, truncate message history 64 | adjusted_max_tokens = min(512, max_context_length // 4) # Use minimum output tokens 65 | truncated_messages = truncate_messages(messages, max_context_length - adjusted_max_tokens - 100) 66 | return truncated_messages, adjusted_max_tokens 67 | else: 68 | # Use smaller value: desired max_tokens or available token count 69 | adjusted_max_tokens = min(desired_max_tokens, available_tokens) 70 | print(f"📊 Token info: Input={input_tokens}, Available={available_tokens}, Using={adjusted_max_tokens}") 71 | return messages, max(adjusted_max_tokens, 256) # Use at least 256 tokens 72 | 73 | 74 | def truncate_messages(messages, max_tokens): 75 | """Truncate message history to fit token limit""" 76 | if not messages: 77 | return messages 78 | 79 | # Keep system messages and latest user messages 80 | truncated = [] 81 | 82 | # If first message is system message, keep it 83 | if messages and isinstance(messages[0], dict) and messages[0].get('role') == 'system': 84 | truncated.append(messages[0]) 85 | remaining_messages = messages[1:] 86 | else: 87 | remaining_messages = messages 88 | 89 | # Start from latest messages, add messages forward until token limit reached 90 | current_tokens = count_tokens(truncated) 91 | for message in reversed(remaining_messages): 92 | message_tokens = count_tokens([message]) 93 | if current_tokens + message_tokens <= max_tokens: 94 | truncated.insert(-1 if truncated and truncated[0].get('role') == 'system' else 0, message) 95 | current_tokens += message_tokens 96 | else: 97 | break 98 | 99 | print(f"📝 Truncated messages: {len(messages)} -> {len(truncated)} messages") 100 | return truncated 101 | 102 | 103 | def call_chatgpt(prompt, model='gpt-3.5-turbo', stop=None, temperature=0., top_p=0.95, 104 | max_tokens=128, echo=False, majority_at=None): 105 | 106 | client = OpenAI() 107 | 108 | # Intelligently adjust token count 109 | adjusted_prompt, adjusted_max_tokens = adjust_max_tokens(prompt, model, max_tokens) 110 | 111 | num_completions = majority_at if majority_at is not None else 1 112 | num_completions_batch_size = 10 113 | 114 | completions = [] 115 | for i in range(20 * (num_completions // num_completions_batch_size + 1)): 116 | try: 117 | requested_completions = min(num_completions_batch_size, num_completions - len(completions)) 118 | 119 | response = client.chat.completions.create( 120 | model=model, 121 | messages=adjusted_prompt, 122 | max_tokens=adjusted_max_tokens, 123 | temperature=temperature, 124 | top_p=top_p, 125 | n=requested_completions 126 | ) 127 | completions.extend([choice.message.content for choice in response.choices]) 128 | if len(completions) >= num_completions: 129 | return completions[:num_completions] 130 | 131 | except openai.BadRequestError as e: 132 | error_message = str(e) 133 | if "context_length_exceeded" in error_message or "maximum context length" in error_message: 134 | print(f"🔄 Context length exceeded, reducing max_tokens from {adjusted_max_tokens} to {adjusted_max_tokens // 2}") 135 | adjusted_max_tokens = max(adjusted_max_tokens // 2, 256) 136 | # Further truncate messages 137 | adjusted_prompt, adjusted_max_tokens = adjust_max_tokens( 138 | adjusted_prompt, model, adjusted_max_tokens 139 | ) 140 | continue 141 | else: 142 | print(f"❌ API Error: {error_message}") 143 | raise e 144 | 145 | except openai.RateLimitError as e: 146 | print(f"⏳ Rate limit hit, waiting {min(i**2, 60)} seconds...") 147 | time.sleep(min(i**2, 60)) 148 | 149 | except Exception as e: 150 | print(f"❌ Unexpected error: {str(e)}") 151 | if i < 3: # First 3 attempts try reducing token count 152 | adjusted_max_tokens = max(adjusted_max_tokens // 2, 256) 153 | print(f"🔄 Retrying with reduced max_tokens: {adjusted_max_tokens}") 154 | continue 155 | else: 156 | raise e 157 | 158 | raise RuntimeError('Failed to call GPT API after multiple attempts') -------------------------------------------------------------------------------- /roles/project_tester.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import time 5 | 6 | from core import interface 7 | from utils import construct_system_message 8 | from tools import global_tool_orchestrator 9 | 10 | 11 | class ProjectTester(object): 12 | def __init__(self, team_description, tester_description, requirement, project_type, 13 | model='gpt-3.5-turbo', majority=1, max_tokens=1024, temperature=0.2, top_p=0.95): 14 | self.model = model 15 | self.majority = majority 16 | self.max_tokens = max_tokens 17 | self.temperature = temperature 18 | self.top_p = top_p 19 | self.history_message = [] 20 | self.requirement = requirement 21 | self.project_type = project_type 22 | 23 | # Add tool orchestrator reference 24 | self.tool_orchestrator = global_tool_orchestrator 25 | 26 | self.itf = interface.ProgramInterface( 27 | stop='', 28 | verbose=False, 29 | model=self.model, 30 | ) 31 | 32 | system_message = construct_system_message(requirement, tester_description, team_description) 33 | self.history_message_append(system_message) 34 | 35 | def test_project(self, project_files, architecture_plan): 36 | """Test the complete project implementation with tool assistance""" 37 | 38 | print("🧪 Enhanced testing with automated tools...") 39 | 40 | # Use automated tester for initial validation 41 | automated_tester = self.tool_orchestrator.get_tool("automated_tester") 42 | quality_checker = self.tool_orchestrator.get_tool("quality_checker") 43 | 44 | tool_results = {} 45 | if automated_tester: 46 | automated_results = automated_tester.execute("full_suite", 47 | files=project_files, 48 | project_type=self.project_type) 49 | tool_results["automated_tests"] = automated_results 50 | print(f"🤖 Automated tests: {automated_results.get('status', 'completed')}") 51 | 52 | if quality_checker: 53 | quality_results = quality_checker.execute("check_quality", 54 | files=project_files, 55 | project_type=self.project_type) 56 | tool_results["quality_check"] = quality_results 57 | print(f"🔍 Quality check: {quality_results.get('overall_score', 'N/A')}/10") 58 | 59 | files_summary = self._create_files_summary(project_files) 60 | 61 | # Create enhanced testing prompt with tool results 62 | tool_summary = "" 63 | if tool_results: 64 | tool_summary = f""" 65 | 66 | Tool-based Testing Results: 67 | {json.dumps(tool_results, indent=2)} 68 | """ 69 | 70 | testing_prompt = f""" 71 | Please test the following project implementation comprehensively, considering both manual review and automated tool results. 72 | 73 | Project Type: {self.project_type} 74 | Requirements: {self.requirement} 75 | Architecture Plan: {architecture_plan} 76 | 77 | Project Files: 78 | {files_summary} 79 | {tool_summary} 80 | 81 | Please perform the following types of testing: 82 | 83 | 1. **Code Quality Analysis** (Enhanced with tools): 84 | - Check for syntax errors 85 | - Verify proper HTML structure and semantic markup 86 | - Validate CSS syntax and modern practices 87 | - Review JavaScript functionality and ES6+ usage 88 | - Consider automated tool findings 89 | 90 | 2. **Functionality Testing**: 91 | - Verify all required features are implemented 92 | - Check if the project meets the stated requirements 93 | - Test user interactions and interface elements 94 | - Validate data flow and API integration 95 | 96 | 3. **Design and UX Testing**: 97 | - Evaluate visual design and modern UI principles 98 | - Check responsive design implementation 99 | - Assess user experience and accessibility (WCAG compliance) 100 | - Review color schemes, typography, and spacing 101 | 102 | 4. **Performance and Best Practices**: 103 | - Review code organization and structure 104 | - Check for performance optimizations 105 | - Verify modern web development practices 106 | - Assess browser compatibility 107 | - Review security considerations 108 | 109 | 5. **Integration and Compatibility**: 110 | - Test cross-browser functionality 111 | - Check mobile responsiveness 112 | - Validate external dependencies 113 | - Assess loading performance 114 | 115 | Provide a detailed test report with: 116 | - Issues found (categorized by severity) 117 | - Suggestions for improvement (prioritized) 118 | - Overall assessment with scoring 119 | - Specific areas that need attention 120 | - Validation of tool-based findings 121 | 122 | If everything looks good, clearly state "All tests passed - no issues found." 123 | Include a final recommendation for deployment readiness. 124 | """ 125 | 126 | self.history_message_append(testing_prompt) 127 | 128 | try: 129 | responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 130 | max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 131 | except Exception as e: 132 | print(f"Project testing failed: {e}") 133 | time.sleep(5) 134 | return "error" 135 | 136 | test_report = responses[0] 137 | self.history_message_append(test_report, "assistant") 138 | 139 | return test_report 140 | 141 | def _create_files_summary(self, project_files): 142 | """Create a summary of project files for testing""" 143 | summary = "" 144 | 145 | for file_path, content in project_files.items(): 146 | summary += f"\n--- {file_path} ---\n" 147 | # Include first 20 lines or 1000 characters, whichever is shorter 148 | lines = content.split('\n') 149 | if len(lines) > 20: 150 | preview = '\n'.join(lines[:20]) + '\n... (truncated)' 151 | else: 152 | preview = content[:1000] 153 | if len(content) > 1000: 154 | preview += "... (truncated)" 155 | summary += preview + "\n" 156 | 157 | return summary 158 | 159 | def history_message_append(self, message, role="user"): 160 | self.history_message.append({ 161 | "role": role, 162 | "content": message 163 | }) 164 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import argparse 5 | import tqdm 6 | 7 | from session import Session 8 | from project_session import ProjectSession 9 | from datasets import load_dataset, load_from_disk 10 | from utils import prompt_split_humaneval, find_method_name, code_split, build_test_method 11 | 12 | parser = argparse.ArgumentParser() 13 | parser.add_argument('--mode', type=str, default='function', choices=['function', 'project'], 14 | help='Generation mode: function-level or project-level') 15 | 16 | # Function-level arguments 17 | parser.add_argument('--dataset', type=str, default='humaneval') 18 | parser.add_argument('--lang', type=str, default='python') 19 | parser.add_argument('--output_path', type=str, default='output.jsonl') 20 | parser.add_argument('--signature', action='store_true') 21 | 22 | # Project-level arguments 23 | parser.add_argument('--project_type', type=str, default='web_visualization', 24 | choices=['web_visualization', 'data_analysis', 'api_service', 'desktop_app']) 25 | parser.add_argument('--requirement', type=str, help='Project requirement description (for project mode)') 26 | parser.add_argument('--output_dir', type=str, default='generated_project') 27 | 28 | # Common arguments 29 | parser.add_argument('--model', type=str, default='gpt-3.5-turbo') 30 | parser.add_argument('--max_round', type=int, default=2) 31 | parser.add_argument('--max_tokens', type=int, default=512) 32 | parser.add_argument('--majority', type=int, default=1) 33 | parser.add_argument('--temperature', type=float, default=0.0) 34 | parser.add_argument('--top_p', type=float, default=0.95) 35 | 36 | parser.add_argument('--fail_list', type=list, default=[]) 37 | parser.add_argument('--append', action='store_true') 38 | parser.add_argument('--verbose', action='store_true') 39 | parser.add_argument("--timeout", type=float, default=10, help="how many seconds to wait during execution for each test case") 40 | args = parser.parse_args() 41 | 42 | 43 | if __name__ == '__main__': 44 | if args.mode == 'project': 45 | # Project-level code generation 46 | from roles.project_roles import (PROJECT_TEAM, PROJECT_ARCHITECT, PROJECT_DEVELOPER, 47 | PROJECT_TESTER, UI_DESIGNER) 48 | 49 | if not args.requirement: 50 | print("Error: --requirement is required for project mode") 51 | exit(1) 52 | 53 | OUTPUT_DIR = args.output_dir 54 | os.makedirs(OUTPUT_DIR, exist_ok=True) 55 | 56 | try: 57 | # Initialize project session 58 | session = ProjectSession( 59 | team_description=PROJECT_TEAM, 60 | architect_description=PROJECT_ARCHITECT, 61 | developer_description=PROJECT_DEVELOPER, 62 | tester_description=PROJECT_TESTER, 63 | ui_designer_description=UI_DESIGNER, 64 | requirement=args.requirement, 65 | project_type=args.project_type, 66 | model=args.model, 67 | majority=args.majority, 68 | max_tokens=max(args.max_tokens, 1024), # Use larger tokens for project mode 69 | temperature=max(args.temperature, 0.2), # Use higher temperature for creativity 70 | top_p=args.top_p, 71 | max_round=args.max_round, 72 | output_dir=OUTPUT_DIR 73 | ) 74 | 75 | # Run project generation session 76 | project_files, session_history = session.run_project_session() 77 | 78 | # Save session history 79 | with open(os.path.join(OUTPUT_DIR, 'session_history.json'), 'w', encoding='utf-8') as f: 80 | json.dump(session_history, f, indent=2, ensure_ascii=False) 81 | 82 | print(f"Project generated successfully in: {OUTPUT_DIR}") 83 | print(f"Generated files: {list(project_files.keys())}") 84 | 85 | # If web project, provide instructions for running 86 | if args.project_type == 'web_visualization' and 'index.html' in project_files: 87 | print("\nTo view the web application:") 88 | print(f"Open {os.path.join(OUTPUT_DIR, 'index.html')} in your browser") 89 | 90 | except Exception as e: 91 | print(f"Project generation failed: {str(e)}") 92 | 93 | else: 94 | # Original function-level code generation 95 | from roles.rule_descriptions_actc import TEAM, ANALYST, PYTHON_DEVELOPER, TESTER 96 | 97 | OUTPUT_PATH = args.output_path 98 | fail_list = args.fail_list 99 | 100 | # load dataset 101 | if args.dataset == 'humaneval': 102 | if args.lang == 'python': 103 | dataset = load_dataset("openai_humaneval") 104 | dataset_key = ["test"] 105 | 106 | with open(OUTPUT_PATH, 'w+') as f: 107 | for key in dataset_key: 108 | pbar = tqdm.tqdm(dataset[key], total=len(dataset[key])) 109 | for idx, task in enumerate(pbar): 110 | 111 | if args.dataset == 'humaneval': 112 | method_name = task['entry_point'] 113 | before_func, signature, intent, public_test_case = prompt_split_humaneval(task['prompt'],method_name) 114 | args.signature = True 115 | if args.signature: 116 | intent = task['prompt'] 117 | 118 | test = task['test'] 119 | 120 | try: 121 | session = Session(TEAM, ANALYST, PYTHON_DEVELOPER, TESTER,requirement=intent, model=args.model, majority=args.majority, 122 | max_tokens=args.max_tokens, temperature=args.temperature, 123 | top_p=args.top_p, max_round=args.max_round, before_func=before_func) 124 | 125 | code, session_history = session.run_session() 126 | 127 | except RuntimeError as e: 128 | print(str(e)) 129 | print("task-%d fail"%(task['task_id'])) 130 | fail_list.append(task['task_id']) 131 | continue 132 | 133 | if code == "error": 134 | continue 135 | 136 | entry_point = find_method_name(code) 137 | solution = { 138 | 'task_id': task['task_id'], 139 | 'prompt': before_func+"\n", 140 | 'test': test, 141 | 'entry_point': entry_point, 142 | 'completion': code, 143 | 'session_history': session_history, 144 | } 145 | f.write(json.dumps(solution) + '\n') 146 | f.flush() 147 | -------------------------------------------------------------------------------- /roles/ui_designer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import time 5 | 6 | from core import interface 7 | from utils import construct_system_message 8 | from .enhanced_role import EnhancedRole 9 | 10 | 11 | class UIDesigner(EnhancedRole): 12 | def __init__(self, team_description, designer_description, requirement, project_type, 13 | model='gpt-3.5-turbo', majority=1, max_tokens=1024, temperature=0.2, top_p=0.95): 14 | # Initialize base class 15 | super().__init__() 16 | 17 | self.model = model 18 | self.majority = majority 19 | self.max_tokens = max_tokens 20 | self.temperature = temperature 21 | self.top_p = top_p 22 | self.history_message = [] 23 | self.requirement = requirement 24 | self.project_type = project_type 25 | 26 | self.itf = interface.ProgramInterface( 27 | stop='', 28 | verbose=False, 29 | model=self.model, 30 | ) 31 | 32 | system_message = construct_system_message(requirement, designer_description, team_description) 33 | self.history_message_append(system_message) 34 | 35 | def design_ui(self, architecture_plan): 36 | """Design the user interface for the project""" 37 | 38 | ui_design_prompt = f""" 39 | Based on the following architecture plan and project requirements, create a comprehensive UI design. 40 | 41 | Project Type: {self.project_type} 42 | Requirements: {self.requirement} 43 | Architecture Plan: {architecture_plan} 44 | 45 | Please provide a detailed UI design specification in JSON format: 46 | 47 | {{ 48 | "design_system": {{ 49 | "colors": {{ 50 | "primary": "#007bff", 51 | "secondary": "#6c757d", 52 | "background": "#f8f9fa", 53 | "text": "#333333" 54 | }}, 55 | "typography": {{ 56 | "font_family": "Arial, sans-serif", 57 | "headings": "bold", 58 | "body_size": "16px" 59 | }}, 60 | "spacing": {{ 61 | "base_unit": "8px", 62 | "container_padding": "20px", 63 | "section_margin": "40px" 64 | }} 65 | }}, 66 | "layout": {{ 67 | "type": "responsive", 68 | "grid_system": "CSS Grid / Flexbox", 69 | "breakpoints": {{ 70 | "mobile": "768px", 71 | "tablet": "992px", 72 | "desktop": "1200px" 73 | }} 74 | }}, 75 | "components": [ 76 | {{ 77 | "name": "header", 78 | "description": "Main navigation and branding", 79 | "styling": "Modern, clean design with navigation menu" 80 | }}, 81 | {{ 82 | "name": "main_content", 83 | "description": "Primary content area", 84 | "styling": "Card-based layout with proper spacing" 85 | }}, 86 | {{ 87 | "name": "footer", 88 | "description": "Footer information", 89 | "styling": "Minimal, informational" 90 | }} 91 | ], 92 | "interactions": [ 93 | {{ 94 | "element": "buttons", 95 | "behavior": "Hover effects with smooth transitions" 96 | }}, 97 | {{ 98 | "element": "forms", 99 | "behavior": "Real-time validation with clear feedback" 100 | }} 101 | ], 102 | "accessibility": [ 103 | "ARIA labels for interactive elements", 104 | "Keyboard navigation support", 105 | "High contrast color ratios", 106 | "Responsive text sizing" 107 | ] 108 | }} 109 | 110 | For web visualization projects, emphasize: 111 | - Modern, clean aesthetic 112 | - Interactive data visualization elements 113 | - Responsive design for all devices 114 | - Professional color scheme 115 | - Clear typography hierarchy 116 | - Intuitive user interactions 117 | """ 118 | 119 | self.history_message_append(ui_design_prompt) 120 | 121 | try: 122 | responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 123 | max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 124 | except Exception as e: 125 | print(f"UI design failed: {e}") 126 | time.sleep(5) 127 | return "error" 128 | 129 | ui_design = responses[0] 130 | self.history_message_append(ui_design, "assistant") 131 | 132 | return ui_design 133 | 134 | def _adapt_to_frontend(self): 135 | """Adapt to frontend projects""" 136 | self.design_priorities = [ 137 | "responsive_design", 138 | "modern_ui_components", 139 | "cross_browser_compatibility", 140 | "performance_optimization", 141 | "accessibility" 142 | ] 143 | 144 | def _adapt_to_backend(self): 145 | """Adapt to backend projects""" 146 | self.design_priorities = [ 147 | "admin_interface", 148 | "api_documentation_ui", 149 | "monitoring_dashboard", 150 | "simple_clean_design" 151 | ] 152 | 153 | def _adapt_to_data_science(self): 154 | """Adapt to data science projects""" 155 | self.design_priorities = [ 156 | "data_visualization", 157 | "interactive_charts", 158 | "dashboard_layout", 159 | "analytical_ui_components", 160 | "filtering_controls" 161 | ] 162 | 163 | def _adapt_to_mobile(self): 164 | """Adapt to mobile application projects""" 165 | self.design_priorities = [ 166 | "mobile_first_design", 167 | "touch_friendly_interface", 168 | "native_app_feel", 169 | "gesture_support", 170 | "offline_ui_states" 171 | ] 172 | 173 | def _adapt_to_desktop(self): 174 | """Adapt to desktop application projects""" 175 | self.design_priorities = [ 176 | "desktop_conventions", 177 | "keyboard_shortcuts", 178 | "menu_systems", 179 | "toolbar_design", 180 | "window_management" 181 | ] 182 | 183 | def _adapt_to_fullstack(self): 184 | """Adapt to fullstack projects""" 185 | self.design_priorities = [ 186 | "unified_design_system", 187 | "admin_and_user_interfaces", 188 | "responsive_components", 189 | "consistent_branding" 190 | ] 191 | 192 | def history_message_append(self, message, role="user"): 193 | self.history_message.append({ 194 | "role": role, 195 | "content": message 196 | }) 197 | -------------------------------------------------------------------------------- /roles/project_roles.py: -------------------------------------------------------------------------------- 1 | PROJECT_TEAM = '''There is a development team that includes a project architect, a full-stack developer, a tester, a UI designer, and a web visualization specialist. The team needs to develop complete projects that satisfy the requirements of users. Each role has different responsibilities and they need to collaborate with each other to create high-quality, functional applications with rich data visualizations and modern web interfaces. 2 | ''' 3 | 4 | PROJECT_ARCHITECT = '''I want you to act as a project architect on our development team. Given a user requirement, your task is to: 5 | 1. Analyze the overall project requirements and break them down into components 6 | 2. Design the project structure including file organization, dependencies, and architecture 7 | 3. Create a detailed implementation plan with specific files and their purposes 8 | 4. Define the technology stack and frameworks to be used 9 | 5. Specify the data flow and component interactions 10 | 6. Plan for scalability, performance, and modern web standards 11 | 7. Consider visualization requirements and data handling strategies 12 | For web visualization projects, prioritize: 13 | - Modern build tools and development workflow 14 | - Component-based architecture 15 | - Efficient data loading and caching strategies 16 | - Responsive design considerations 17 | - Performance optimization for large datasets 18 | Remember, provide a comprehensive project plan in JSON format that includes: 19 | - project_structure: detailed file tree with descriptions 20 | - technology_stack: frameworks, libraries, and tools 21 | - implementation_phases: step-by-step development plan 22 | - component_interactions: how different parts connect 23 | - data_flow: how data moves through the application 24 | - performance_considerations: optimization strategies 25 | ''' 26 | 27 | PROJECT_DEVELOPER = '''I want you to act as a full-stack developer on our development team. You will receive: 28 | 1. Project architecture plans from the architect 29 | 2. UI designs from the designer 30 | 3. Test reports from the tester 31 | 4. Visualization specifications from the visualization specialist 32 | Your responsibilities include: 33 | 1. Implementing backend logic and APIs 34 | 2. Creating frontend components and interfaces 35 | 3. Integrating different system components 36 | 4. Writing clean, efficient, and maintainable code 37 | 5. Following the project architecture and design guidelines 38 | 6. Fixing issues based on test feedback 39 | 7. Implementing interactive data visualizations with modern libraries 40 | 8. Creating responsive and accessible web applications 41 | For web visualization projects, focus on: 42 | - Modern ES6+ JavaScript with async/await patterns 43 | - Multiple visualization libraries (Chart.js, D3.js, Plotly, ECharts) 44 | - Advanced CSS3 features (Grid, Flexbox, Animations, Custom Properties) 45 | - Interactive features (filters, real-time updates, responsive charts) 46 | - Performance optimization for large datasets 47 | - Cross-browser compatibility and mobile responsiveness 48 | - Progressive Web App features when applicable 49 | - WebGL and Canvas optimization for complex visualizations 50 | Remember, provide complete, functional code files with rich interactivity and beautiful design. 51 | ''' 52 | 53 | PROJECT_TESTER = '''I want you to act as a project tester on our development team. Your responsibilities include: 54 | 1. Creating comprehensive test plans for the entire project 55 | 2. Writing unit tests for individual components 56 | 3. Performing integration testing 57 | 4. Testing user interfaces and user experience 58 | 5. Identifying bugs, performance issues, and usability problems 59 | 6. Providing detailed test reports with specific feedback 60 | 7. Testing data visualization functionality and interactivity 61 | 8. Validating responsive design across different devices 62 | 9. Testing accessibility compliance (WCAG 2.1) 63 | 10. Performance testing for data-heavy operations 64 | For web visualization projects, focus on: 65 | - Chart rendering accuracy and performance 66 | - Interactive elements functionality 67 | - Data loading and error handling 68 | - Cross-browser compatibility 69 | - Mobile touch interactions 70 | - Accessibility features for visualizations 71 | Remember, provide: 72 | - Test cases that cover main functionality 73 | - Test code when applicable 74 | - Detailed bug reports with reproduction steps 75 | - Performance and usability feedback 76 | - Accessibility audit results 77 | - Cross-browser compatibility reports 78 | ''' 79 | 80 | UI_DESIGNER = '''I want you to act as a UI/UX designer on our development team. Your responsibilities include: 81 | 1. Creating user interface designs based on project requirements 82 | 2. Designing user experience flows and interactions 83 | 3. Choosing appropriate color schemes, fonts, and layouts 84 | 4. Creating responsive designs that work on different devices 85 | 5. Ensuring accessibility and usability best practices 86 | 6. Providing CSS styling and frontend design specifications 87 | 7. Designing data visualization aesthetics and interaction patterns 88 | 8. Creating modern design systems with consistent components 89 | For web visualization projects, focus on: 90 | - Modern design trends (glassmorphism, neumorphism, gradient overlays) 91 | - Advanced CSS techniques (CSS Grid, Flexbox, animations, transitions) 92 | - Color schemes optimized for data visualization (accessible contrasts) 93 | - Typography that enhances readability of data and metrics 94 | - Interactive UI patterns (hover states, loading animations, micro-interactions) 95 | - Dark mode and light mode support 96 | - Mobile-first responsive design approach 97 | - Design systems with CSS custom properties 98 | Remember, provide: 99 | - Detailed UI specifications with CSS custom properties 100 | - Component-based design systems 101 | - Animation and transition specifications 102 | - Accessibility guidelines (WCAG 2.1 compliance) 103 | - Interactive prototyping guidance 104 | - Responsive design breakpoints and strategies 105 | ''' 106 | 107 | WEB_VISUALIZATION_SPECIALIST = '''I want you to act as a web visualization specialist with deep expertise in data visualization and interactive web applications. Your comprehensive responsibilities include: 108 | 109 | **Core Visualization Expertise:** 110 | 1. Master-level proficiency in multiple visualization libraries: 111 | - Chart.js (for standard charts with excellent performance) 112 | - D3.js (for custom, complex visualizations) 113 | - Plotly.js (for scientific and statistical visualizations) 114 | - ECharts (for enterprise-grade dashboards) 115 | - Three.js (for 3D visualizations) 116 | - Leaflet/Mapbox (for geospatial data) 117 | 118 | **Advanced Chart Types & Techniques:** 119 | 2. Implement diverse visualization types: 120 | - Standard: Bar, Line, Pie, Scatter, Area charts 121 | - Advanced: Heatmaps, Treemaps, Sunburst, Sankey diagrams 122 | - Statistical: Box plots, Violin plots, Regression lines 123 | - Time-series: Candlestick, Stream graphs, Timeline charts 124 | - Geospatial: Choropleth maps, Heat maps, Marker clustering 125 | - 3D: Surface plots, 3D scatter, WebGL-accelerated charts 126 | 127 | **Interactive Features:** 128 | 3. Create rich interactivity: 129 | - Real-time data updates with WebSockets 130 | - Advanced filtering and drill-down capabilities 131 | - Brush and zoom functionality 132 | - Cross-chart filtering and linking 133 | - Animation and smooth transitions 134 | - Touch and gesture support for mobile 135 | - Keyboard navigation for accessibility 136 | 137 | **Performance & Optimization:** 138 | 4. Optimize for large datasets: 139 | - Data virtualization and pagination 140 | - Canvas rendering for performance 141 | - WebGL acceleration when needed 142 | - Lazy loading and progressive enhancement 143 | - Memory management and garbage collection 144 | - Efficient data structures and algorithms 145 | 146 | **Modern Web Technologies:** 147 | 5. Leverage cutting-edge technologies: 148 | - Web Workers for heavy computations 149 | - WebAssembly for performance-critical operations 150 | - Progressive Web App features 151 | - Service Workers for offline functionality 152 | - Modern JavaScript (ES2023+ features) 153 | - TypeScript for type safety 154 | 155 | **Data Integration:** 156 | 6. Handle diverse data sources: 157 | - REST APIs and GraphQL 158 | - CSV, JSON, XML parsing 159 | - Real-time streams and WebSockets 160 | - Database connections (when applicable) 161 | - File uploads and drag-drop functionality 162 | 163 | **Design & UX Excellence:** 164 | 7. Create exceptional user experiences: 165 | - Responsive design for all devices 166 | - Accessible visualizations (WCAG 2.1) 167 | - Intuitive interaction patterns 168 | - Progressive disclosure of complexity 169 | - Error handling and loading states 170 | - Contextual help and tooltips 171 | 172 | Remember to always: 173 | - Provide complete, production-ready code 174 | - Include comprehensive error handling 175 | - Implement responsive design patterns 176 | - Add accessibility features 177 | - Optimize for performance 178 | - Include detailed code comments 179 | - Create modular, reusable components 180 | - Follow modern web development best practices 181 | 182 | For each project, create visually stunning, highly interactive, and performant web applications that showcase the full potential of modern data visualization. 183 | ''' 184 | -------------------------------------------------------------------------------- /session.py: -------------------------------------------------------------------------------- 1 | from roles import Analyst, Coder, Tester 2 | from utils import find_method_name 3 | import time 4 | from utils import code_truncate 5 | 6 | 7 | class Session(object): 8 | def __init__(self, TEAM, ANALYST, PYTHON_DEVELOPER, TESTER, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512, 9 | temperature=0.0, top_p=0.95, max_round=4, before_func=''): 10 | 11 | self.session_history = {} 12 | self.max_round = max_round 13 | self.before_func = before_func 14 | self.requirement = requirement 15 | self.analyst = Analyst(TEAM, ANALYST, requirement, model, majority, max_tokens, temperature, top_p) 16 | self.coder = Coder(TEAM, PYTHON_DEVELOPER, requirement, model, majority, max_tokens, temperature, top_p) 17 | self.tester = Tester(TEAM, TESTER, requirement, model, majority, max_tokens, temperature, top_p) 18 | 19 | def run_session(self): 20 | plan = self.analyst.analyze() 21 | report = plan 22 | is_init=True 23 | self.session_history["plan"] = plan 24 | code = "" 25 | 26 | for i in range(self.max_round): 27 | 28 | naivecode = self.coder.implement(report, is_init) 29 | method_name = find_method_name(naivecode) 30 | if method_name: 31 | code = naivecode 32 | 33 | if code.strip() == "": 34 | if i == 0: 35 | code = "error" 36 | else: 37 | code = self.session_history['Round_{}'.format(i-1)]["code"] 38 | break 39 | 40 | if i == self.max_round-1: 41 | self.session_history['Round_{}'.format(i)] = {"code": code} 42 | break 43 | 44 | tests = self.tester.test(code) 45 | test_report = code_truncate(tests) 46 | answer_report = unsafe_execute(self.before_func+code+'\n'+test_report+'\n'+f'check({method_name})', '') 47 | report = f'The compilation output of the preceding code is: {answer_report}' 48 | 49 | is_init = False 50 | self.session_history['Round_{}'.format(i)] = {"code": code, "report": report} 51 | 52 | if (plan == "error") or (code == "error") or (report == "error"): 53 | code = "error" 54 | break 55 | 56 | if answer_report == "Code Test Passed.": 57 | break 58 | 59 | self.analyst.itf.clear_history() 60 | self.coder.itf.clear_history() 61 | self.tester.itf.clear_history() 62 | 63 | return code, self.session_history 64 | 65 | def run_analyst_coder(self): 66 | plan = self.analyst.analyze() 67 | is_init=True 68 | self.session_history["plan"] = plan 69 | code = self.coder.implement(plan, is_init) 70 | 71 | if (plan == "error") or (code == "error"): 72 | code = "error" 73 | 74 | self.analyst.itf.clear_history() 75 | self.coder.itf.clear_history() 76 | self.tester.itf.clear_history() 77 | 78 | return code, self.session_history 79 | 80 | 81 | def run_coder_tester(self): 82 | report = "" 83 | is_init=True 84 | code = "" 85 | 86 | for i in range(self.max_round): 87 | 88 | naivecode = self.coder.implement(report, is_init) 89 | if find_method_name(naivecode): 90 | code = naivecode 91 | 92 | if code.strip() == "": 93 | if i == 0: 94 | code = self.coder.implement(report, is_init=True) 95 | else: 96 | code = self.session_history['Round_{}'.format(i-1)]["code"] 97 | break 98 | 99 | if i == self.max_round-1: 100 | self.session_history['Round_{}'.format(i)] = {"code": code} 101 | break 102 | tests = self.tester.test(code) 103 | test_report = code_truncate(tests) 104 | answer_report = unsafe_execute(self.before_func+code+'\n'+test_report+'\n'+f'check({method_name})', '') 105 | report = f'The compilation output of the preceding code is: {answer_report}' 106 | 107 | is_init = False 108 | self.session_history['Round_{}'.format(i)] = {"code": code, "report": report} 109 | 110 | if (code == "error") or (report == "error"): 111 | code = "error" 112 | break 113 | 114 | if report == "Code Test Passed.": 115 | break 116 | 117 | self.analyst.itf.clear_history() 118 | self.coder.itf.clear_history() 119 | self.tester.itf.clear_history() 120 | 121 | return code, self.session_history 122 | 123 | def run_coder_only(self): 124 | plan = "" 125 | code = self.coder.implement(plan, is_init=True) 126 | self.coder.itf.clear_history() 127 | return code, self.session_history 128 | 129 | 130 | import contextlib 131 | import faulthandler 132 | import io 133 | import os 134 | import platform 135 | import signal 136 | import tempfile 137 | 138 | def unsafe_execute(code, report): 139 | 140 | with create_tempdir(): 141 | 142 | # These system calls are needed when cleaning up tempdir. 143 | import os 144 | import shutil 145 | rmtree = shutil.rmtree 146 | rmdir = os.rmdir 147 | chdir = os.chdir 148 | 149 | # Disable functionalities that can make destructive changes to the test. 150 | reliability_guard() 151 | 152 | # Construct the check program and run it. 153 | check_program = ( 154 | code + report 155 | ) 156 | 157 | try: 158 | exec_globals = {} 159 | with swallow_io(): 160 | timeout = 10 161 | with time_limit(timeout): 162 | exec(check_program, exec_globals) 163 | result = "Code Test Passed." 164 | except AssertionError as e: 165 | result = f"failed with AssertionError. {e}" 166 | except TimeoutException: 167 | result = "timed out" 168 | except BaseException as e: 169 | result = f"{e}" 170 | 171 | 172 | # Needed for cleaning up. 173 | shutil.rmtree = rmtree 174 | os.rmdir = rmdir 175 | os.chdir = chdir 176 | return result 177 | 178 | 179 | def reliability_guard(maximum_memory_bytes = None): 180 | """ 181 | This disables various destructive functions and prevents the generated code 182 | from interfering with the test (e.g. fork bomb, killing other processes, 183 | removing filesystem files, etc.) 184 | 185 | WARNING 186 | This function is NOT a security sandbox. Untrusted code, including, model- 187 | generated code, should not be blindly executed outside of one. See the 188 | Codex paper for more information about OpenAI's code sandbox, and proceed 189 | with caution. 190 | """ 191 | 192 | if maximum_memory_bytes is not None: 193 | import resource 194 | resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)) 195 | resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)) 196 | if not platform.uname().system == 'Darwin': 197 | resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)) 198 | 199 | faulthandler.disable() 200 | 201 | import builtins 202 | builtins.exit = None 203 | builtins.quit = None 204 | 205 | import os 206 | os.environ['OMP_NUM_THREADS'] = '1' 207 | 208 | os.rmdir = None 209 | os.chdir = None 210 | 211 | import shutil 212 | shutil.rmtree = None 213 | shutil.move = None 214 | shutil.chown = None 215 | 216 | import subprocess 217 | subprocess.Popen = None # type: ignore 218 | 219 | __builtins__['help'] = None 220 | 221 | import sys 222 | sys.modules['ipdb'] = None 223 | sys.modules['joblib'] = None 224 | sys.modules['resource'] = None 225 | sys.modules['psutil'] = None 226 | sys.modules['tkinter'] = None 227 | 228 | @contextlib.contextmanager 229 | def time_limit(seconds: float): 230 | def signal_handler(signum, frame): 231 | raise TimeoutException("Timed out!") 232 | signal.setitimer(signal.ITIMER_REAL, seconds) 233 | signal.signal(signal.SIGALRM, signal_handler) 234 | try: 235 | yield 236 | finally: 237 | signal.setitimer(signal.ITIMER_REAL, 0) 238 | 239 | 240 | @contextlib.contextmanager 241 | def swallow_io(): 242 | stream = WriteOnlyStringIO() 243 | with contextlib.redirect_stdout(stream): 244 | with contextlib.redirect_stderr(stream): 245 | with redirect_stdin(stream): 246 | yield 247 | 248 | 249 | @contextlib.contextmanager 250 | def create_tempdir(): 251 | with tempfile.TemporaryDirectory() as dirname: 252 | with chdir(dirname): 253 | yield dirname 254 | 255 | class TimeoutException(Exception): 256 | pass 257 | 258 | 259 | class WriteOnlyStringIO(io.StringIO): 260 | """ StringIO that throws an exception when it's read from """ 261 | 262 | def read(self, *args, **kwargs): 263 | raise IOError 264 | 265 | def readline(self, *args, **kwargs): 266 | raise IOError 267 | 268 | def readlines(self, *args, **kwargs): 269 | raise IOError 270 | 271 | def readable(self, *args, **kwargs): 272 | """ Returns True if the IO object can be read. """ 273 | return False 274 | 275 | 276 | class redirect_stdin(contextlib._RedirectStream): # type: ignore 277 | _stream = 'stdin' 278 | 279 | 280 | @contextlib.contextmanager 281 | def chdir(root): 282 | if root == ".": 283 | yield 284 | return 285 | cwd = os.getcwd() 286 | os.chdir(root) 287 | try: 288 | yield 289 | except BaseException as exc: 290 | raise exc 291 | finally: 292 | os.chdir(cwd) -------------------------------------------------------------------------------- /roles/web_visualization_specialist.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import time 5 | import re 6 | 7 | from core import interface 8 | from utils import construct_system_message 9 | 10 | 11 | class WebVisualizationSpecialist(object): 12 | def __init__(self, team_description, specialist_description, requirement, project_type, 13 | model='gpt-3.5-turbo', majority=1, max_tokens=2048, temperature=0.3, top_p=0.95): 14 | self.model = model 15 | self.majority = majority 16 | self.max_tokens = max_tokens 17 | self.temperature = temperature 18 | self.top_p = top_p 19 | self.history_message = [] 20 | self.requirement = requirement 21 | self.project_type = project_type 22 | 23 | self.itf = interface.ProgramInterface( 24 | stop='', 25 | verbose=False, 26 | model=self.model, 27 | ) 28 | 29 | system_message = construct_system_message(requirement, specialist_description, team_description) 30 | self.history_message_append(system_message) 31 | 32 | def create_visualization_plan(self, architecture_plan): 33 | """Create detailed visualization specifications""" 34 | 35 | visualization_prompt = f""" 36 | Based on the project requirements and architecture plan, create a comprehensive visualization specification. 37 | 38 | Project Type: {self.project_type} 39 | Requirements: {self.requirement} 40 | Architecture Plan: {architecture_plan} 41 | 42 | Please provide a detailed JSON specification that includes: 43 | 44 | {{ 45 | "visualization_strategy": {{ 46 | "primary_library": "Chart.js|D3.js|Plotly|ECharts", 47 | "secondary_libraries": ["Three.js", "Leaflet"], 48 | "data_processing": "client-side|server-side|hybrid", 49 | "performance_approach": "canvas|svg|webgl" 50 | }}, 51 | "chart_specifications": [ 52 | {{ 53 | "chart_type": "bar|line|pie|scatter|heatmap|treemap|3d", 54 | "library": "Chart.js|D3.js|Plotly|ECharts", 55 | "data_source": "static|api|realtime|uploaded", 56 | "interactivity": ["hover", "click", "zoom", "brush", "filter"], 57 | "animations": ["entrance", "update", "transition"], 58 | "responsive": true, 59 | "accessibility": ["aria-labels", "keyboard-nav", "screen-reader"] 60 | }} 61 | ], 62 | "interactive_features": {{ 63 | "real_time_updates": true|false, 64 | "data_filters": ["date-range", "category", "search"], 65 | "cross_chart_interactions": true|false, 66 | "export_functionality": ["png", "pdf", "csv", "json"], 67 | "drill_down_capabilities": true|false 68 | }}, 69 | "performance_optimizations": {{ 70 | "lazy_loading": true|false, 71 | "data_virtualization": true|false, 72 | "web_workers": true|false, 73 | "caching_strategy": "memory|localStorage|sessionStorage", 74 | "progressive_loading": true|false 75 | }}, 76 | "modern_features": {{ 77 | "pwa_support": true|false, 78 | "offline_functionality": true|false, 79 | "web_components": true|false, 80 | "module_system": "es6|webpack|rollup", 81 | "typescript_support": true|false 82 | }}, 83 | "sample_data_structure": {{ 84 | "format": "json|csv|api_response", 85 | "schema": "describe expected data structure", 86 | "sample_size": "number of records for demo" 87 | }} 88 | }} 89 | 90 | Focus on creating specifications that will result in: 91 | 1. High-performance visualizations 92 | 2. Rich interactivity and user engagement 93 | 3. Beautiful, modern aesthetics 94 | 4. Responsive design for all devices 95 | 5. Accessibility compliance 96 | 6. Scalability for large datasets 97 | """ 98 | 99 | self.history_message_append(visualization_prompt) 100 | 101 | try: 102 | responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 103 | max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 104 | except Exception as e: 105 | print(f"Visualization planning failed: {e}") 106 | time.sleep(5) 107 | return "error" 108 | 109 | visualization_plan = responses[0] 110 | self.history_message_append(visualization_plan, "assistant") 111 | 112 | return visualization_plan 113 | 114 | def generate_advanced_code_templates(self, visualization_plan, ui_design): 115 | """Generate advanced code templates with modern visualization libraries""" 116 | 117 | template_prompt = f""" 118 | Based on the visualization plan and UI design, generate comprehensive code templates. 119 | 120 | Visualization Plan: {visualization_plan} 121 | UI Design: {ui_design} 122 | 123 | Please provide complete code templates for the following files: 124 | 125 | === TEMPLATE: js/visualization-engine.js === 126 | // Advanced visualization engine with multiple library support 127 | // Include: Chart.js, D3.js, and Plotly integration 128 | // Features: Dynamic chart switching, real-time updates, responsive design 129 | 130 | === TEMPLATE: js/data-manager.js === 131 | // Data management and processing utilities 132 | // Include: API integration, data transformation, caching, real-time updates 133 | 134 | === TEMPLATE: js/interaction-controller.js === 135 | // Advanced interaction handling 136 | // Include: Cross-chart filtering, brush-zoom, touch support, keyboard navigation 137 | 138 | === TEMPLATE: css/visualization-styles.css === 139 | // Modern CSS for visualizations 140 | // Include: CSS Grid layouts, animations, dark/light themes, responsive design 141 | 142 | === TEMPLATE: js/performance-optimizer.js === 143 | // Performance optimization utilities 144 | // Include: Data virtualization, lazy loading, Web Workers integration 145 | 146 | Requirements for each template: 147 | 1. Use modern ES6+ JavaScript features 148 | 2. Include comprehensive error handling 149 | 3. Implement responsive design patterns 150 | 4. Add accessibility features (ARIA labels, keyboard navigation) 151 | 5. Include performance optimizations 152 | 6. Use modular, reusable code structure 153 | 7. Add detailed comments explaining functionality 154 | 8. Support multiple visualization libraries 155 | 9. Include sample data and demo functionality 156 | 10. Implement modern web standards (Progressive Web App features) 157 | 158 | Each template should be production-ready and demonstrate best practices. 159 | """ 160 | 161 | self.history_message_append(template_prompt) 162 | 163 | try: 164 | responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 165 | max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 166 | except Exception as e: 167 | print(f"Template generation failed: {e}") 168 | time.sleep(5) 169 | return {} 170 | 171 | templates_response = responses[0] 172 | self.history_message_append(templates_response, "assistant") 173 | 174 | # Parse the templates from the response 175 | templates = self._parse_templates(templates_response) 176 | 177 | return templates 178 | 179 | def _parse_templates(self, templates_response): 180 | """Parse the templates response into separate files""" 181 | templates = {} 182 | 183 | # Pattern to match template sections 184 | template_pattern = r'=== TEMPLATE: (.+?) ===\n(.*?)(?=\n=== TEMPLATE:|$)' 185 | matches = re.findall(template_pattern, templates_response, re.DOTALL) 186 | 187 | for file_path, content in matches: 188 | file_path = file_path.strip() 189 | content = content.strip() 190 | templates[file_path] = content 191 | 192 | return templates 193 | 194 | def optimize_for_performance(self, project_files): 195 | """Provide performance optimization suggestions""" 196 | 197 | files_summary = "\n".join([f"{path}: {len(content)} characters" 198 | for path, content in project_files.items()]) 199 | 200 | optimization_prompt = f""" 201 | Analyze the following project files and provide performance optimization recommendations. 202 | 203 | Project Files Summary: 204 | {files_summary} 205 | 206 | Please provide specific recommendations for: 207 | 1. JavaScript performance optimizations 208 | 2. CSS optimization strategies 209 | 3. Data loading and caching improvements 210 | 4. Visualization rendering optimizations 211 | 5. Mobile performance considerations 212 | 6. Bundle size reduction techniques 213 | 7. Progressive loading strategies 214 | 8. Memory management improvements 215 | 216 | Focus on modern web performance best practices and visualization-specific optimizations. 217 | """ 218 | 219 | self.history_message_append(optimization_prompt) 220 | 221 | try: 222 | responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 223 | max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 224 | except Exception as e: 225 | print(f"Performance optimization failed: {e}") 226 | time.sleep(5) 227 | return "error" 228 | 229 | optimization_suggestions = responses[0] 230 | self.history_message_append(optimization_suggestions, "assistant") 231 | 232 | return optimization_suggestions 233 | 234 | def history_message_append(self, message, role="user"): 235 | self.history_message.append({ 236 | "role": role, 237 | "content": message 238 | }) -------------------------------------------------------------------------------- /roles/enhanced_role.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Enhanced role base class - provides common tool integration capabilities for all roles 4 | """ 5 | 6 | import sys 7 | import os 8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) 9 | 10 | # Use new simplified tool system 11 | from tools import global_tool_orchestrator, CodeAnalyzer, FileManager, QualityChecker 12 | 13 | class EnhancedRole: 14 | """Enhanced role base class - provides common tool integration capabilities for all roles""" 15 | 16 | def __init__(self): 17 | self.tools = [] 18 | self.project_specification = None 19 | self.quality_gates = [] 20 | self.feedback_history = [] 21 | self.role_name = self.__class__.__name__ 22 | 23 | # Add universal tools 24 | self._add_universal_tools() 25 | 26 | def _add_universal_tools(self): 27 | """Add universal tools""" 28 | self.tools.extend([ 29 | CodeAnalyzer(), 30 | FileManager(), 31 | QualityChecker() 32 | ]) 33 | 34 | def set_project_specification(self, project_spec): 35 | """Set project specification that all roles need to follow""" 36 | self.project_specification = project_spec 37 | self._adapt_to_project_type(project_spec["type"]) 38 | print(f"📋 {self.role_name} adapted to project type: {project_spec['type']}") 39 | 40 | def _adapt_to_project_type(self, project_type): 41 | """Adapt role behavior based on project type - subclasses implement specific adaptation logic""" 42 | # Base class provides default implementation, subclasses can override 43 | adaptation_strategies = { 44 | "web_frontend": self._adapt_to_frontend, 45 | "web_backend": self._adapt_to_backend, 46 | "data_science": self._adapt_to_data_science, 47 | "mobile": self._adapt_to_mobile, 48 | "desktop": self._adapt_to_desktop, 49 | "fullstack": self._adapt_to_fullstack 50 | } 51 | 52 | strategy = adaptation_strategies.get(project_type, self._adapt_to_generic) 53 | strategy() 54 | 55 | def _adapt_to_frontend(self): 56 | """Adapt for frontend projects""" 57 | print(f"🎨 {self.role_name} adapted for frontend development") 58 | 59 | def _adapt_to_backend(self): 60 | """Adapt for backend projects""" 61 | print(f"⚙️ {self.role_name} adapted for backend development") 62 | 63 | def _adapt_to_data_science(self): 64 | """Adapt for data science projects""" 65 | print(f"📊 {self.role_name} adapted for data science project") 66 | 67 | def _adapt_to_mobile(self): 68 | """Adapt for mobile projects""" 69 | print(f"📱 {self.role_name} adapted for mobile development") 70 | 71 | def _adapt_to_desktop(self): 72 | """Adapt for desktop projects""" 73 | print(f"🖥️ {self.role_name} adapted for desktop development") 74 | 75 | def _adapt_to_fullstack(self): 76 | """Adapt for fullstack projects""" 77 | print(f"🌐 {self.role_name} adapted for fullstack development") 78 | 79 | def _adapt_to_generic(self): 80 | """Adapt for generic projects""" 81 | print(f"🔧 {self.role_name} adapted for generic project") 82 | 83 | def execute_with_quality_gate(self, phase, task_func, *args, **kwargs): 84 | """Execute task under quality gate protection""" 85 | print(f"🚪 {self.role_name} executing {phase} with quality gate protection...") 86 | 87 | # Execute task 88 | result = task_func(*args, **kwargs) 89 | 90 | # Quality gate check 91 | gatekeeper = self.get_tool("QualityGatekeeper") 92 | if gatekeeper and self.project_specification: 93 | print(f"🔍 Running quality gate for {phase}...") 94 | gate_result = gatekeeper.execute(phase, result, self.project_specification) 95 | 96 | if not gate_result["gate_passed"]: 97 | print(f"❌ Quality gate failed for {phase}") 98 | print(f"🔧 Attempting to fix issues: {gate_result['blocking_issues']}") 99 | # Quality gate failed, attempt to fix 100 | result = self._handle_quality_gate_failure(result, gate_result, phase) 101 | else: 102 | print(f"✅ Quality gate passed for {phase} (score: {gate_result['overall_score']:.1f}/10)") 103 | 104 | # Record quality gate history 105 | self.quality_gates.append({ 106 | "phase": phase, 107 | "result": gate_result, 108 | "timestamp": self._get_timestamp() 109 | }) 110 | 111 | return result 112 | 113 | def _handle_quality_gate_failure(self, result, gate_result, phase): 114 | """Handle quality gate failure""" 115 | print(f"🛠️ {self.role_name} handling quality gate failure in {phase}...") 116 | 117 | # Attempt to fix based on failure reasons 118 | blocking_issues = gate_result.get("blocking_issues", []) 119 | recommendations = gate_result.get("recommendations", []) 120 | 121 | # Record feedback 122 | self.feedback_history.append({ 123 | "phase": phase, 124 | "issues": blocking_issues, 125 | "recommendations": recommendations, 126 | "timestamp": self._get_timestamp() 127 | }) 128 | 129 | # Attempt to improve result based on recommendations 130 | improved_result = self._apply_quality_improvements(result, recommendations, phase) 131 | 132 | return improved_result 133 | 134 | def _apply_quality_improvements(self, result, recommendations, phase): 135 | """Apply quality improvement recommendations""" 136 | print(f"💡 Applying {len(recommendations)} quality improvements...") 137 | 138 | # Implementation of specific improvement logic here 139 | # Base class provides default implementation, subclasses can override for more specific improvements 140 | 141 | if not result: 142 | result = {} 143 | 144 | # Add improvement markers 145 | if "quality_improvements" not in result: 146 | result["quality_improvements"] = [] 147 | 148 | for rec in recommendations: 149 | result["quality_improvements"].append({ 150 | "criterion": rec.get("criterion", "unknown"), 151 | "priority": rec.get("priority", "medium"), 152 | "suggestion": rec.get("suggestion", "No specific suggestion"), 153 | "applied": True 154 | }) 155 | 156 | return result 157 | 158 | def get_tool(self, tool_name): 159 | """Get tool instance""" 160 | for tool in self.tools: 161 | if tool.name == tool_name: 162 | return tool 163 | return None 164 | 165 | def get_contextual_tools(self, phase): 166 | """Get relevant tools based on current phase""" 167 | if not self.project_specification: 168 | return self.tools 169 | 170 | project_type = self.project_specification["type"] 171 | return [tool for tool in self.tools if self._is_tool_relevant(tool, phase, project_type)] 172 | 173 | def _is_tool_relevant(self, tool, phase, project_type): 174 | """Determine if tool is relevant to current phase and project type""" 175 | # Universal tools are always relevant 176 | universal_tools = ["ProjectSpecificationCoordinator", "UniversalValidator", "QualityGatekeeper"] 177 | if tool.name in universal_tools: 178 | return True 179 | 180 | # Other tools determined by phase and project type 181 | relevance_map = { 182 | "architecture_design": ["ProjectSpecificationCoordinator", "UniversalValidator"], 183 | "design_modeling": ["UniversalValidator", "QualityGatekeeper"], 184 | "implementation": ["UniversalValidator", "QualityGatekeeper"], 185 | "testing_validation": ["UniversalValidator", "QualityGatekeeper"] 186 | } 187 | 188 | return tool.name in relevance_map.get(phase, []) 189 | 190 | def generate_role_report(self): 191 | """Generate role work report""" 192 | return { 193 | "role": self.role_name, 194 | "project_type": self.project_specification.get("type") if self.project_specification else "unknown", 195 | "tools_used": [tool.name for tool in self.tools], 196 | "quality_gates": len(self.quality_gates), 197 | "feedback_received": len(self.feedback_history), 198 | "overall_performance": self._calculate_role_performance() 199 | } 200 | 201 | def _calculate_role_performance(self): 202 | """Calculate role performance""" 203 | if not self.quality_gates: 204 | return 0 205 | 206 | total_score = sum(gate["result"]["overall_score"] for gate in self.quality_gates) 207 | return total_score / len(self.quality_gates) 208 | 209 | def _get_timestamp(self): 210 | """Get timestamp""" 211 | from datetime import datetime 212 | return datetime.now().isoformat() 213 | 214 | def add_custom_tool(self, tool): 215 | """Add custom tool""" 216 | self.tools.append(tool) 217 | print(f"🔧 Added custom tool {tool.name} to {self.role_name}") 218 | 219 | def remove_tool(self, tool_name): 220 | """Remove tool""" 221 | self.tools = [tool for tool in self.tools if tool.name != tool_name] 222 | print(f"🗑️ Removed tool {tool_name} from {self.role_name}") 223 | 224 | def get_feedback_summary(self): 225 | """Get feedback summary""" 226 | if not self.feedback_history: 227 | return "No feedback received" 228 | 229 | total_issues = sum(len(feedback["issues"]) for feedback in self.feedback_history) 230 | phases_with_issues = len(set(feedback["phase"] for feedback in self.feedback_history)) 231 | 232 | return { 233 | "total_feedback_sessions": len(self.feedback_history), 234 | "total_issues_identified": total_issues, 235 | "phases_with_issues": phases_with_issues, 236 | "improvement_rate": self._calculate_improvement_rate() 237 | } 238 | 239 | def _calculate_improvement_rate(self): 240 | """Calculate improvement rate""" 241 | if len(self.quality_gates) < 2: 242 | return 0 243 | 244 | # Compare scores of first and last quality gates 245 | first_score = self.quality_gates[0]["result"]["overall_score"] 246 | last_score = self.quality_gates[-1]["result"]["overall_score"] 247 | 248 | return ((last_score - first_score) / first_score) * 100 if first_score > 0 else 0 249 | -------------------------------------------------------------------------------- /evaluate/execute/_execution.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft Corporation. 2 | # Licensed under the MIT license. 3 | 4 | from typing import Optional, Dict 5 | import contextlib 6 | import faulthandler 7 | import io 8 | import os 9 | import multiprocessing 10 | import platform 11 | import signal 12 | import tempfile 13 | 14 | def _pack_test_cases(test_cases, timeout): 15 | blank_4 = ' ' * 4 16 | blank_8 = ' ' * 8 17 | blank_12 = ' ' * 12 18 | result = f'def check():\n pass_result = []\n' 19 | for idx, tc in enumerate(test_cases): 20 | multi_line_assertion = tc.strip().replace('\n', f'\n{blank_12}') 21 | result += f'\n{blank_4}try:\n{blank_8}with time_limit({timeout}):\n{blank_12}{multi_line_assertion}\ 22 | \n{blank_12}pass_result.append(True)\n{blank_4}except Exception as e:\n{blank_8}pass_result.append(False)\n' 23 | result += '\n return pass_result\n' 24 | result += f'\nglobal final_result\nfinal_result = check()' 25 | return result 26 | 27 | 28 | def check_correctness_with_test_cases(task_id, prompt, completion, test_cases, timeout): 29 | """ 30 | Evaluates the functional correctness of a solution_content by running the test 31 | suite provided in the problem. 32 | """ 33 | extend_timeout = timeout*len(test_cases) 34 | 35 | def unsafe_execute(): 36 | 37 | with create_tempdir(): 38 | 39 | # These system calls are needed when cleaning up tempdir. 40 | import os 41 | import shutil 42 | rmtree = shutil.rmtree 43 | rmdir = os.rmdir 44 | chdir = os.chdir 45 | 46 | # Disable functionalities that can make destructive changes to the test. 47 | reliability_guard() 48 | 49 | # Construct the check program and run it. 50 | check_program = ( 51 | prompt + completion + "\n" + 52 | _pack_test_cases(test_cases, timeout) 53 | ) 54 | 55 | try: 56 | exec_globals = {'time_limit': time_limit} 57 | with swallow_io(): 58 | exec(check_program, exec_globals) 59 | result.append(exec_globals['final_result']) 60 | except TimeoutException: 61 | result.append("timed out") 62 | except BaseException as e: 63 | result.append(f"failed: {e}") 64 | 65 | # Needed for cleaning up. 66 | shutil.rmtree = rmtree 67 | os.rmdir = rmdir 68 | os.chdir = chdir 69 | 70 | manager = multiprocessing.Manager() 71 | result = manager.list() 72 | 73 | p = multiprocessing.Process(target=unsafe_execute) 74 | p.start() 75 | p.join(timeout=extend_timeout + 0.1) 76 | if p.is_alive(): 77 | p.kill() 78 | 79 | if not result: 80 | result.append("timed out") 81 | 82 | return dict( 83 | task_id=task_id, 84 | test_cases=test_cases, 85 | completion=completion, 86 | passed=(type(result[0]) == list) and len(result[0]) > 0, 87 | result=result[0] 88 | ) 89 | 90 | def check_correctness_T(task_id: str, prompt: str, completion: str, test: list, entry_point: str, timeout: float) -> Dict: 91 | """ 92 | Evaluates the functional correctness of a completion by running the test 93 | suite provided in the problem. 94 | """ 95 | 96 | def unsafe_execute(): 97 | 98 | with create_tempdir(): 99 | 100 | # These system calls are needed when cleaning up tempdir. 101 | import os 102 | import shutil 103 | rmtree = shutil.rmtree 104 | rmdir = os.rmdir 105 | chdir = os.chdir 106 | 107 | # Disable functionalities that can make destructive changes to the test. 108 | reliability_guard() 109 | 110 | # Construct the check program and run it. 111 | check_program = ( 112 | prompt + completion + "\n" + '\n'.join(test) 113 | ) 114 | 115 | try: 116 | exec_globals = {} 117 | with swallow_io(): 118 | with time_limit(timeout): 119 | exec(check_program, exec_globals) 120 | result.append("passed") 121 | except AssertionError: 122 | result.append("assertion") 123 | except TimeoutException: 124 | result.append("timed out") 125 | except BaseException as e: 126 | result.append(f"failed: {e}") 127 | 128 | # Needed for cleaning up. 129 | shutil.rmtree = rmtree 130 | os.rmdir = rmdir 131 | os.chdir = chdir 132 | 133 | manager = multiprocessing.Manager() 134 | result = manager.list() 135 | 136 | p = multiprocessing.Process(target=unsafe_execute) 137 | p.start() 138 | p.join(timeout=timeout+1) 139 | if p.is_alive(): 140 | p.kill() 141 | 142 | if not result: 143 | result.append("timed out") 144 | 145 | return dict( 146 | task_id=task_id, 147 | passed=result[0] == "passed", 148 | result=result[0], 149 | completion=completion, 150 | ) 151 | 152 | 153 | def check_correctness(task_id: str, prompt: str, completion: str, test: str, entry_point: str, timeout: float) -> Dict: 154 | """ 155 | Evaluates the functional correctness of a completion by running the test 156 | suite provided in the problem. 157 | """ 158 | 159 | def unsafe_execute(): 160 | 161 | with create_tempdir(): 162 | 163 | # These system calls are needed when cleaning up tempdir. 164 | import os 165 | import shutil 166 | rmtree = shutil.rmtree 167 | rmdir = os.rmdir 168 | chdir = os.chdir 169 | 170 | # Disable functionalities that can make destructive changes to the test. 171 | reliability_guard() 172 | 173 | # Construct the check program and run it. 174 | check_program = ( 175 | prompt + completion + "\n" + test + "\n" + f'check({entry_point})' 176 | ) 177 | 178 | try: 179 | exec_globals = {} 180 | with swallow_io(): 181 | with time_limit(timeout): 182 | exec(check_program, exec_globals) 183 | result.append("passed") 184 | except TimeoutException: 185 | result.append("timed out") 186 | except BaseException as e: 187 | result.append(f"failed: {e}") 188 | 189 | # Needed for cleaning up. 190 | shutil.rmtree = rmtree 191 | os.rmdir = rmdir 192 | os.chdir = chdir 193 | 194 | manager = multiprocessing.Manager() 195 | result = manager.list() 196 | 197 | p = multiprocessing.Process(target=unsafe_execute) 198 | p.start() 199 | p.join(timeout=timeout+1) 200 | if p.is_alive(): 201 | p.kill() 202 | 203 | if not result: 204 | result.append("timed out") 205 | 206 | return dict( 207 | task_id=task_id, 208 | passed=result[0] == "passed", 209 | result=result[0], 210 | completion=completion, 211 | ) 212 | 213 | @contextlib.contextmanager 214 | def time_limit(seconds: float): 215 | def signal_handler(signum, frame): 216 | raise TimeoutException("Timed out!") 217 | signal.setitimer(signal.ITIMER_REAL, seconds) 218 | signal.signal(signal.SIGALRM, signal_handler) 219 | try: 220 | yield 221 | finally: 222 | signal.setitimer(signal.ITIMER_REAL, 0) 223 | 224 | 225 | @contextlib.contextmanager 226 | def swallow_io(): 227 | stream = WriteOnlyStringIO() 228 | with contextlib.redirect_stdout(stream): 229 | with contextlib.redirect_stderr(stream): 230 | with redirect_stdin(stream): 231 | yield 232 | 233 | 234 | @contextlib.contextmanager 235 | def create_tempdir(): 236 | with tempfile.TemporaryDirectory() as dirname: 237 | with chdir(dirname): 238 | yield dirname 239 | 240 | 241 | class TimeoutException(Exception): 242 | pass 243 | 244 | 245 | class WriteOnlyStringIO(io.StringIO): 246 | """ StringIO that throws an exception when it's read from """ 247 | 248 | def read(self, *args, **kwargs): 249 | raise IOError 250 | 251 | def readline(self, *args, **kwargs): 252 | raise IOError 253 | 254 | def readlines(self, *args, **kwargs): 255 | raise IOError 256 | 257 | def readable(self, *args, **kwargs): 258 | """ Returns True if the IO object can be read. """ 259 | return False 260 | 261 | 262 | class redirect_stdin(contextlib._RedirectStream): # type: ignore 263 | _stream = 'stdin' 264 | 265 | 266 | @contextlib.contextmanager 267 | def chdir(root): 268 | if root == ".": 269 | yield 270 | return 271 | cwd = os.getcwd() 272 | os.chdir(root) 273 | try: 274 | yield 275 | except BaseException as exc: 276 | raise exc 277 | finally: 278 | os.chdir(cwd) 279 | 280 | 281 | def reliability_guard(maximum_memory_bytes: Optional[int] = None): 282 | """ 283 | This disables various destructive functions and prevents the generated code 284 | from interfering with the test (e.g. fork bomb, killing other processes, 285 | removing filesystem files, etc.) 286 | 287 | WARNING 288 | This function is NOT a security sandbox. Untrusted code, including, model- 289 | generated code, should not be blindly executed outside of one. See the 290 | Codex paper for more information about OpenAI's code sandbox, and proceed 291 | with caution. 292 | """ 293 | 294 | if maximum_memory_bytes is not None: 295 | import resource 296 | resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes)) 297 | resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes)) 298 | if not platform.uname().system == 'Darwin': 299 | resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes)) 300 | 301 | faulthandler.disable() 302 | 303 | import builtins 304 | builtins.exit = None 305 | builtins.quit = None 306 | 307 | import os 308 | os.environ['OMP_NUM_THREADS'] = '1' 309 | 310 | os.kill = None 311 | os.system = None 312 | os.putenv = None 313 | os.remove = None 314 | os.removedirs = None 315 | os.rmdir = None 316 | os.fchdir = None 317 | os.setuid = None 318 | os.fork = None 319 | os.forkpty = None 320 | os.killpg = None 321 | os.rename = None 322 | os.renames = None 323 | os.truncate = None 324 | os.replace = None 325 | os.unlink = None 326 | os.fchmod = None 327 | os.fchown = None 328 | os.chmod = None 329 | os.chown = None 330 | os.chroot = None 331 | os.fchdir = None 332 | os.lchflags = None 333 | os.lchmod = None 334 | os.lchown = None 335 | os.getcwd = None 336 | os.chdir = None 337 | 338 | import shutil 339 | shutil.rmtree = None 340 | shutil.move = None 341 | shutil.chown = None 342 | 343 | import subprocess 344 | subprocess.Popen = None # type: ignore 345 | 346 | __builtins__['help'] = None 347 | 348 | import sys 349 | sys.modules['ipdb'] = None 350 | sys.modules['joblib'] = None 351 | sys.modules['resource'] = None 352 | sys.modules['psutil'] = None 353 | sys.modules['tkinter'] = None 354 | -------------------------------------------------------------------------------- /tools/global_tool_orchestrator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Practical tool orchestrator - for project mode 3 | """ 4 | import os 5 | import json 6 | import re 7 | from datetime import datetime 8 | 9 | class GlobalToolOrchestrator: 10 | """Global tool orchestrator - manages and coordinates various development tools""" 11 | 12 | def __init__(self): 13 | self.tools = {} 14 | self.execution_log = [] 15 | self.start_time = datetime.now() 16 | 17 | def execute(self, action, *args, **kwargs): 18 | """Execute specified tool operation""" 19 | log_entry = { 20 | "timestamp": datetime.now().isoformat(), 21 | "action": action, 22 | "args": args, 23 | "kwargs": kwargs 24 | } 25 | 26 | try: 27 | if action == "organize_files": 28 | result = self._organize_files(kwargs.get("files", {}), kwargs.get("output_dir", "")) 29 | elif action == "backup_existing": 30 | result = self._backup_existing(kwargs.get("output_dir", "")) 31 | elif action == "fetch_color_palette": 32 | result = self._fetch_color_palette(kwargs.get("theme", "modern")) 33 | elif action == "get_web_fonts": 34 | result = self._get_web_fonts(kwargs.get("font_name", "Open Sans")) 35 | elif action == "fetch_external_libraries": 36 | result = self._fetch_external_libraries(kwargs.get("project_type", "web")) 37 | elif action == "automated_tester_check": 38 | result = self._run_automated_tests(kwargs.get("files", {})) 39 | elif action == "quality_check": 40 | result = self._run_quality_check(kwargs.get("files", {}), kwargs.get("project_type", "web")) 41 | else: 42 | result = {"status": "success", "result": f"Tool action '{action}' executed"} 43 | 44 | log_entry["result"] = result 45 | log_entry["status"] = "success" 46 | 47 | except Exception as e: 48 | result = {"status": "error", "error": str(e)} 49 | log_entry["result"] = result 50 | log_entry["status"] = "error" 51 | 52 | self.execution_log.append(log_entry) 53 | return result 54 | 55 | def _organize_files(self, files, output_dir): 56 | """Organize file structure""" 57 | organized = {} 58 | for file_path, content in files.items(): 59 | # Ensure file path normalization 60 | clean_path = file_path.replace("\\", "/").strip("/") 61 | organized[clean_path] = content 62 | 63 | return { 64 | "status": "success", 65 | "organized_files": len(organized), 66 | "structure": list(organized.keys()) 67 | } 68 | 69 | def _backup_existing(self, output_dir): 70 | """Backup existing files""" 71 | if not os.path.exists(output_dir): 72 | return {"backup_created": False, "reason": "Output directory does not exist"} 73 | 74 | files = os.listdir(output_dir) 75 | if not files: 76 | return {"backup_created": False, "reason": "No files to backup"} 77 | 78 | backup_dir = f"{output_dir}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}" 79 | # This is just simulation backup logic, real projects can implement actual backup 80 | return { 81 | "backup_created": True, 82 | "backup_path": backup_dir, 83 | "files_backed_up": len(files) 84 | } 85 | 86 | def _fetch_color_palette(self, theme): 87 | """Get color palette""" 88 | palettes = { 89 | "modern": { 90 | "primary": "#3498db", 91 | "secondary": "#2ecc71", 92 | "accent": "#e74c3c", 93 | "background": "#f8f9fa", 94 | "text": "#2c3e50" 95 | }, 96 | "dark": { 97 | "primary": "#0d1117", 98 | "secondary": "#21262d", 99 | "accent": "#58a6ff", 100 | "background": "#010409", 101 | "text": "#f0f6fc" 102 | }, 103 | "minimal": { 104 | "primary": "#000000", 105 | "secondary": "#ffffff", 106 | "accent": "#6c757d", 107 | "background": "#f8f9fa", 108 | "text": "#212529" 109 | } 110 | } 111 | 112 | return { 113 | "status": "success", 114 | "theme": theme, 115 | "palette": palettes.get(theme, palettes["modern"]) 116 | } 117 | 118 | def _get_web_fonts(self, font_name): 119 | """Get web font information""" 120 | fonts = { 121 | "Open Sans": { 122 | "url": "https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap", 123 | "family": "'Open Sans', sans-serif" 124 | }, 125 | "Roboto": { 126 | "url": "https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap", 127 | "family": "'Roboto', sans-serif" 128 | }, 129 | "Inter": { 130 | "url": "https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap", 131 | "family": "'Inter', sans-serif" 132 | } 133 | } 134 | 135 | return { 136 | "status": "success", 137 | "font": fonts.get(font_name, fonts["Open Sans"]) 138 | } 139 | 140 | def _fetch_external_libraries(self, project_type): 141 | """Get external library information""" 142 | libraries = { 143 | "web": [ 144 | {"name": "Chart.js", "url": "https://cdn.jsdelivr.net/npm/chart.js", "type": "js"}, 145 | {"name": "D3.js", "url": "https://d3js.org/d3.v7.min.js", "type": "js"}, 146 | {"name": "Bootstrap", "url": "https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css", "type": "css"} 147 | ], 148 | "data_analysis": [ 149 | {"name": "Plotly", "url": "https://cdn.plot.ly/plotly-latest.min.js", "type": "js"}, 150 | {"name": "NumJS", "url": "https://unpkg.com/numjs@latest/dist/numjs.min.js", "type": "js"} 151 | ] 152 | } 153 | 154 | return { 155 | "status": "success", 156 | "libraries": libraries.get(project_type, libraries["web"]) 157 | } 158 | 159 | def validate_code(self, code, files=None): 160 | """Validate code quality""" 161 | issues = [] 162 | warnings = [] 163 | suggestions = [] 164 | 165 | if not code or len(code.strip()) < 10: 166 | issues.append("Code is too short or empty") 167 | 168 | # Check HTML structure 169 | if "" in code: 170 | if "" not in code: 171 | warnings.append("Missing page title") 172 | if 'lang="' not in code: 173 | suggestions.append("Consider adding language attribute to HTML tag") 174 | 175 | # Check CSS 176 | if code.strip().startswith(".") or code.strip().startswith("#"): 177 | if "color:" not in code and "background" not in code: 178 | suggestions.append("Consider adding color scheme to CSS") 179 | 180 | # Check JavaScript 181 | if "function" in code or "const " in code or "let " in code: 182 | if "console.log" in code: 183 | warnings.append("Remove console.log statements in production") 184 | 185 | return { 186 | "is_valid": len(issues) == 0, 187 | "errors": issues, 188 | "warnings": warnings, 189 | "suggestions": suggestions 190 | } 191 | 192 | def organize_files(self, files): 193 | """Organize file structure""" 194 | return self._organize_files(files, "") 195 | 196 | def generate_report(self): 197 | """Generate tool usage report""" 198 | execution_time = (datetime.now() - self.start_time).total_seconds() 199 | 200 | successful_operations = len([log for log in self.execution_log if log.get("status") == "success"]) 201 | total_operations = len(self.execution_log) 202 | success_rate = (successful_operations / total_operations * 100) if total_operations > 0 else 100 203 | 204 | return { 205 | "tools_used": list(set([log.get("action", "unknown") for log in self.execution_log])), 206 | "execution_time": f"{execution_time:.2f}s", 207 | "success_rate": f"{success_rate:.1f}%", 208 | "total_operations": total_operations, 209 | "successful_operations": successful_operations, 210 | "detailed_log": self.execution_log[-5:] # Last 5 records 211 | } 212 | 213 | def get_tool(self, tool_name): 214 | """Get specific tool instance""" 215 | # For compatibility, return self, because all tool functions are integrated here 216 | return self 217 | 218 | def _run_automated_tests(self, files): 219 | """Run automated tests""" 220 | issues = [] 221 | test_results = {} 222 | 223 | for file_path, content in files.items(): 224 | file_issues = [] 225 | 226 | # Basic file checks 227 | if not content.strip(): 228 | file_issues.append("File is empty") 229 | 230 | # HTML checks 231 | if file_path.endswith('.html'): 232 | if '<!DOCTYPE html>' not in content.upper(): 233 | file_issues.append("Missing DOCTYPE declaration") 234 | if '<html' not in content: 235 | file_issues.append("Missing html tag") 236 | if '<head>' not in content or '<body>' not in content: 237 | file_issues.append("Missing head or body tags") 238 | 239 | # CSS checks 240 | elif file_path.endswith('.css'): 241 | open_braces = content.count('{') 242 | close_braces = content.count('}') 243 | if open_braces != close_braces: 244 | file_issues.append("Unbalanced CSS braces") 245 | 246 | # JavaScript checks 247 | elif file_path.endswith('.js'): 248 | if content.count('(') != content.count(')'): 249 | file_issues.append("Unbalanced parentheses") 250 | if content.count('{') != content.count('}'): 251 | file_issues.append("Unbalanced braces") 252 | 253 | test_results[file_path] = { 254 | "passed": len(file_issues) == 0, 255 | "issues": file_issues 256 | } 257 | issues.extend(file_issues) 258 | 259 | return { 260 | "status": "success", 261 | "overall_passed": len(issues) == 0, 262 | "total_issues": len(issues), 263 | "file_results": test_results, 264 | "summary": f"Tested {len(files)} files, found {len(issues)} issues" 265 | } 266 | 267 | def _run_quality_check(self, files, project_type): 268 | """Run quality check""" 269 | quality_score = 0 270 | total_checks = 0 271 | quality_details = {} 272 | 273 | for file_path, content in files.items(): 274 | file_score = 0 275 | file_checks = 0 276 | 277 | # Basic quality checks 278 | if content.strip(): 279 | file_score += 1 280 | file_checks += 1 281 | 282 | # Code length check 283 | if len(content) > 100: # Basic content check 284 | file_score += 1 285 | file_checks += 1 286 | 287 | # File-specific checks 288 | if file_path.endswith('.html'): 289 | if 'class=' in content or 'id=' in content: 290 | file_score += 1 # Has CSS selectors 291 | file_checks += 1 292 | 293 | if '<title>' in content: 294 | file_score += 1 # Has title 295 | file_checks += 1 296 | 297 | elif file_path.endswith('.css'): 298 | if ':' in content and '{' in content: 299 | file_score += 1 # Has CSS rules 300 | file_checks += 1 301 | 302 | if 'color:' in content or 'background:' in content: 303 | file_score += 1 # Has style definitions 304 | file_checks += 1 305 | 306 | quality_details[file_path] = { 307 | "score": file_score, 308 | "max_score": file_checks, 309 | "percentage": (file_score / file_checks * 100) if file_checks > 0 else 0 310 | } 311 | 312 | quality_score += file_score 313 | total_checks += file_checks 314 | 315 | overall_percentage = (quality_score / total_checks * 100) if total_checks > 0 else 0 316 | 317 | return { 318 | "status": "success", 319 | "overall_score": quality_score, 320 | "max_score": total_checks, 321 | "percentage": overall_percentage, 322 | "grade": "A" if overall_percentage >= 90 else "B" if overall_percentage >= 80 else "C" if overall_percentage >= 70 else "D", 323 | "file_details": quality_details, 324 | "summary": f"Quality score: {quality_score}/{total_checks} ({overall_percentage:.1f}%)" 325 | } 326 | -------------------------------------------------------------------------------- /project_session.py: -------------------------------------------------------------------------------- 1 | from roles.analyst import Analyst 2 | from roles.coder import Coder 3 | from roles.tester import Tester 4 | from roles.project_architect import ProjectArchitect 5 | from roles.project_developer import ProjectDeveloper 6 | from roles.project_tester import ProjectTester 7 | from roles.ui_designer import UIDesigner 8 | from utils import find_method_name, construct_system_message 9 | from tools import global_tool_orchestrator, CodeAnalyzer, FileManager, QualityChecker, APIIntegrationTool, AutomatedTester 10 | import time 11 | import os 12 | import json 13 | import re 14 | 15 | 16 | class ProjectSession(object): 17 | def __init__(self, team_description, architect_description, developer_description, 18 | tester_description, ui_designer_description, requirement, project_type='web_visualization', 19 | model='gpt-3.5-turbo', majority=1, max_tokens=1024, 20 | temperature=0.2, top_p=0.95, max_round=3, output_dir='generated_project'): 21 | 22 | self.session_history = {} 23 | self.max_round = max_round 24 | self.requirement = requirement 25 | self.project_type = project_type 26 | self.output_dir = output_dir 27 | self.project_files = {} 28 | 29 | # Intelligently adjust max_tokens to avoid context length issues 30 | self.base_max_tokens = max_tokens 31 | self.current_max_tokens = max_tokens 32 | self.model = model 33 | 34 | # Dynamically adjust tokens based on model and rounds 35 | model_limits = { 36 | 'gpt-3.5-turbo': 16385, 37 | 'gpt-4': 16385, 38 | } 39 | self.model_limit = model_limits.get(model, 16385) 40 | 41 | # Initial token allocation: reserve space for multi-round iteration 42 | if max_round > 1: 43 | # Use smaller tokens for multi-round iteration, reserve space for history 44 | adjusted_max_tokens = min(max_tokens, self.model_limit // (max_round + 1)) 45 | else: 46 | adjusted_max_tokens = max_tokens 47 | 48 | print(f"🔧 Token management: Model={model}, Limit={self.model_limit}, Base={max_tokens}, Adjusted={adjusted_max_tokens}") 49 | 50 | # Initialize tools 51 | self.tool_orchestrator = global_tool_orchestrator 52 | self.code_analyzer = CodeAnalyzer() 53 | self.file_manager = FileManager() 54 | self.quality_checker = QualityChecker() 55 | self.api_tool = APIIntegrationTool() 56 | self.automated_tester = AutomatedTester() 57 | 58 | # Initialize project roles with adjusted tokens 59 | self.architect = ProjectArchitect(team_description, architect_description, requirement, 60 | project_type, model, majority, adjusted_max_tokens, temperature, top_p) 61 | self.developer = ProjectDeveloper(team_description, developer_description, requirement, 62 | project_type, model, majority, adjusted_max_tokens, temperature, top_p) 63 | self.tester = ProjectTester(team_description, tester_description, requirement, 64 | project_type, model, majority, adjusted_max_tokens, temperature, top_p) 65 | self.ui_designer = UIDesigner(team_description, ui_designer_description, requirement, 66 | project_type, model, majority, adjusted_max_tokens, temperature, top_p) 67 | 68 | def _adjust_tokens_for_round(self, round_num): 69 | """Dynamically adjust token allocation based on round number""" 70 | # As rounds increase, reduce token allocation to reserve space for conversation history 71 | reduction_factor = 1.0 #- (round_num * 0.1) # Reduce 10% per round 72 | new_max_tokens = max(int(self.base_max_tokens * reduction_factor), 512) # Minimum 512 tokens 73 | 74 | if new_max_tokens != self.current_max_tokens: 75 | print(f"🔄 Round {round_num + 1}: Adjusting max_tokens from {self.current_max_tokens} to {new_max_tokens}") 76 | self.current_max_tokens = new_max_tokens 77 | 78 | # Update token settings for all roles 79 | for role in [self.architect, self.developer, self.tester, self.ui_designer]: 80 | if hasattr(role, 'max_tokens'): 81 | role.max_tokens = new_max_tokens 82 | 83 | return new_max_tokens 84 | 85 | def run_project_session(self): 86 | """Run the complete project generation session with integrated tools""" 87 | 88 | print("🔧 Phase 0: Initializing resources...") 89 | # Fetch external resources based on project type 90 | if self.project_type == 'web_visualization': 91 | external_resources = self.api_tool.execute( 92 | "fetch_cdn_libraries", 93 | libraries=["chart.js", "d3.js", "bootstrap", "jquery"] 94 | ) 95 | self.session_history["external_resources"] = external_resources 96 | print(f"✅ Fetched external resources: {len(external_resources.get('libraries', []))} libraries") 97 | 98 | # Phase 1: Planning and Architecture Design 99 | print("🏗️ Phase 1: Creating planning and architecture design...") 100 | architecture_plan = self.architect.design_architecture() 101 | self.session_history["architecture"] = architecture_plan 102 | 103 | if architecture_plan == "error": 104 | raise RuntimeError("Architecture design failed") 105 | 106 | # Use code analyzer to analyze architecture plan 107 | if isinstance(architecture_plan, str): 108 | arch_analysis = self.code_analyzer.execute( 109 | "analyze", 110 | code=architecture_plan, 111 | project_type=self.project_type 112 | ) 113 | print(f"📊 Architecture analysis: {arch_analysis.get('summary', 'Analysis completed')}") 114 | 115 | # Phase 2: UI Design (for web projects) 116 | ui_design = None 117 | if self.project_type in ['web_visualization', 'desktop_app']: 118 | print("🎨 Phase 2: Creating UI design...") 119 | ui_design = self.ui_designer.design_ui(architecture_plan) 120 | self.session_history["ui_design"] = ui_design 121 | 122 | # Fetch color palette and fonts for better design 123 | if ui_design != "error": 124 | design_resources = self.api_tool.execute("fetch_color_palette", theme="modern") 125 | fonts = self.api_tool.execute("get_web_fonts", font_name="Open Sans") 126 | self.session_history["design_resources"] = { 127 | "colors": design_resources, 128 | "fonts": fonts 129 | } 130 | print("🎨 Enhanced UI design with external resources") 131 | 132 | # Phase 3: Development with iterative improvement 133 | print("⚡ Phase 3: Implementing project iteratively...") 134 | for round_num in range(self.max_round): 135 | print(f"🔄 Development round {round_num + 1}/{self.max_round}") 136 | 137 | # Dynamically adjust token allocation 138 | self._adjust_tokens_for_round(round_num) 139 | 140 | # Development with file management 141 | try: 142 | project_files = self.developer.implement_project(architecture_plan, ui_design, 143 | self.project_files, round_num == 0) 144 | except Exception as e: 145 | error_str = str(e) 146 | if "context_length_exceeded" in error_str or "maximum context length" in error_str: 147 | print(f"⚠️ Context length exceeded in round {round_num + 1}, reducing complexity...") 148 | # Clear developer's message history 149 | if hasattr(self.developer, 'itf') and hasattr(self.developer.itf, 'clear_history'): 150 | self.developer.itf.clear_history() 151 | # Retry once with fewer tokens 152 | self.current_max_tokens = max(self.current_max_tokens // 2, 256) 153 | self.developer.max_tokens = self.current_max_tokens 154 | try: 155 | project_files = self.developer.implement_project(architecture_plan, ui_design, 156 | self.project_files, round_num == 0) 157 | except Exception as e2: 158 | print(f"❌ Failed after retry: {e2}") 159 | if round_num == 0: 160 | raise RuntimeError("Initial development failed") 161 | else: 162 | project_files = self.project_files 163 | break 164 | else: 165 | raise e 166 | 167 | if project_files == "error": 168 | if round_num == 0: 169 | raise RuntimeError("Initial development failed") 170 | else: 171 | # Use files from previous round 172 | project_files = self.project_files 173 | break 174 | 175 | # Use file manager to organize and validate files 176 | file_validation = self.file_manager.execute( 177 | "validate_structure", 178 | files=project_files, 179 | project_type=self.project_type 180 | ) 181 | print(f"📁 File structure validation: {file_validation.get('status', 'completed')}") 182 | 183 | # Save generated files with backup 184 | self._save_project_files_with_tools(project_files) 185 | self.project_files = project_files 186 | 187 | # Quality checking with enhanced tools 188 | quality_report = self.quality_checker.execute( 189 | "comprehensive_check", 190 | files=project_files, 191 | project_type=self.project_type 192 | ) 193 | print(f"🔍 Quality check score: {quality_report.get('overall_score', 'N/A')}") 194 | 195 | # Testing and feedback with automated tools (except last round) 196 | if round_num < self.max_round - 1: 197 | print("🧪 Testing with automated tools...") 198 | 199 | # Traditional testing 200 | test_report = self.tester.test_project(project_files, architecture_plan) 201 | 202 | # Enhanced automated testing 203 | automated_test_results = self.automated_tester.execute( 204 | "full_suite", 205 | files=project_files, 206 | project_type=self.project_type 207 | ) 208 | 209 | combined_test_report = { 210 | "traditional_tests": test_report, 211 | "automated_tests": automated_test_results, 212 | "quality_metrics": quality_report 213 | } 214 | 215 | self.session_history[f'round_{round_num}'] = { 216 | "files": list(project_files.keys()), 217 | "test_report": combined_test_report, 218 | "tool_reports": { 219 | "file_validation": file_validation, 220 | "quality_check": quality_report 221 | } 222 | } 223 | 224 | if test_report == "error": 225 | print("⚠️ Testing failed, continuing with current implementation") 226 | break 227 | 228 | # Enhanced success criteria 229 | traditional_passed = ("all tests passed" in test_report.lower() or 230 | "no issues found" in test_report.lower()) 231 | automated_passed = automated_test_results.get("overall_status") == "passed" 232 | quality_good = quality_report.get("overall_score", 0) >= 7.0 233 | 234 | if traditional_passed and automated_passed and quality_good: 235 | print("✅ All tests passed with high quality! Project completed successfully.") 236 | break 237 | 238 | # Provide enhanced feedback combining all reports 239 | enhanced_feedback = self._generate_enhanced_feedback( 240 | test_report, automated_test_results, quality_report 241 | ) 242 | self.developer.receive_feedback(enhanced_feedback) 243 | 244 | # Adjust tokens for next round 245 | self._adjust_tokens_for_round(round_num) 246 | 247 | # Final tool report generation 248 | print("📊 Generating final tool usage report...") 249 | tool_usage_report = self.tool_orchestrator.generate_report() 250 | self.session_history["tool_usage_report"] = tool_usage_report 251 | 252 | # Clean up interfaces 253 | self.architect.itf.clear_history() 254 | self.developer.itf.clear_history() 255 | self.tester.itf.clear_history() 256 | if hasattr(self.ui_designer, 'itf'): 257 | self.ui_designer.itf.clear_history() 258 | 259 | return self.project_files, self.session_history 260 | 261 | def _save_project_files(self, project_files): 262 | """Save generated project files to disk""" 263 | for file_path, content in project_files.items(): 264 | full_path = os.path.join(self.output_dir, file_path) 265 | 266 | # Create directory if it doesn't exist 267 | dir_path = os.path.dirname(full_path) 268 | if dir_path: 269 | os.makedirs(dir_path, exist_ok=True) 270 | 271 | # Write file content 272 | try: 273 | with open(full_path, 'w', encoding='utf-8') as f: 274 | f.write(content) 275 | print(f"Saved: {file_path}") 276 | except Exception as e: 277 | print(f"Error saving {file_path}: {e}") 278 | 279 | def _save_project_files_with_tools(self, project_files): 280 | """Save project files with tool assistance and backup""" 281 | # Use file manager to organize files before saving 282 | organization_result = self.file_manager.execute( 283 | "organize_files", 284 | files=project_files, 285 | output_dir=self.output_dir 286 | ) 287 | 288 | # Create backup if files already exist 289 | backup_result = self.file_manager.execute( 290 | "backup_existing", 291 | output_dir=self.output_dir 292 | ) 293 | 294 | # Save files with the original method 295 | self._save_project_files(project_files) 296 | 297 | print(f"📁 File organization: {organization_result.get('status', 'completed')}") 298 | if backup_result.get('backup_created'): 299 | print(f"💾 Backup created: {backup_result.get('backup_path', 'N/A')}") 300 | 301 | def _generate_enhanced_feedback(self, traditional_report, automated_report, quality_report): 302 | """Generate simple, actionable feedback for the developer""" 303 | 304 | # Start with a simple structure 305 | issues = [] 306 | 307 | # Check for test failures 308 | if automated_report.get("issues"): 309 | issues.extend(automated_report["issues"]) 310 | 311 | # Traditional test issues 312 | if traditional_report and "error" in traditional_report.lower(): 313 | issues.append("Fix syntax errors and runtime issues") 314 | 315 | # Generate simple feedback 316 | if not issues: 317 | return "✅ Good! Continue with current implementation approach." 318 | 319 | # Create actionable feedback 320 | feedback = "Please fix these issues:\n" 321 | for i, issue in enumerate(issues[:3], 1): # Limit to 3 most important issues 322 | feedback += f"{i}. {issue}\n" 323 | 324 | feedback += "\nFocus on fixing issues for better results." 325 | return feedback 326 | 327 | 328 | class FunctionSession(object): 329 | """Original function-level session for backwards compatibility""" 330 | def __init__(self, TEAM, ANALYST, PYTHON_DEVELOPER, TESTER, requirement, model='gpt-3.5-turbo', 331 | majority=1, max_tokens=512, temperature=0.0, top_p=0.95, max_round=4, before_func=''): 332 | 333 | self.session_history = {} 334 | self.max_round = max_round 335 | self.before_func = before_func 336 | self.requirement = requirement 337 | self.analyst = Analyst(TEAM, ANALYST, requirement, model, majority, max_tokens, temperature, top_p) 338 | self.coder = Coder(TEAM, PYTHON_DEVELOPER, requirement, model, majority, max_tokens, temperature, top_p) 339 | self.tester = Tester(TEAM, TESTER, requirement, model, majority, max_tokens, temperature, top_p) 340 | 341 | def run_session(self): 342 | # ... (keep original implementation from session.py) 343 | from session import Session 344 | original_session = Session(None, None, None, None, self.requirement, 345 | model=self.analyst.model, majority=self.analyst.majority, 346 | max_tokens=self.analyst.max_tokens, temperature=self.analyst.temperature, 347 | top_p=self.analyst.top_p, max_round=self.max_round, 348 | before_func=self.before_func) 349 | original_session.analyst = self.analyst 350 | original_session.coder = self.coder 351 | original_session.tester = self.tester 352 | return original_session.run_session() 353 | -------------------------------------------------------------------------------- /roles/project_architect.py: -------------------------------------------------------------------------------- 1 | import os 2 | import copy 3 | import json 4 | import time 5 | 6 | from core import interface 7 | from utils import construct_system_message 8 | from roles.enhanced_role import EnhancedRole 9 | 10 | 11 | class ProjectArchitect(EnhancedRole): 12 | def __init__(self, team_description, architect_description, requirement, project_type, 13 | model='gpt-3.5-turbo', majority=1, max_tokens=1024, temperature=0.2, top_p=0.95): 14 | # First call enhanced base class initialization 15 | super().__init__() 16 | 17 | self.model = model 18 | self.majority = majority 19 | self.max_tokens = max_tokens 20 | self.temperature = temperature 21 | self.top_p = top_p 22 | self.history_message = [] 23 | self.requirement = requirement 24 | self.project_type = project_type 25 | 26 | self.itf = interface.ProgramInterface( 27 | stop='', 28 | verbose=False, 29 | model=self.model, 30 | ) 31 | 32 | system_message = construct_system_message(requirement, architect_description, team_description) 33 | self.history_message_append(system_message) 34 | 35 | def _adapt_to_project_type(self, project_type): 36 | """Adjust architect behavior based on project type""" 37 | super()._adapt_to_project_type(project_type) 38 | 39 | # Architect-specific adaptation logic 40 | adaptation_strategies = { 41 | "web_frontend": self._adapt_to_frontend_architecture, 42 | "web_backend": self._adapt_to_backend_architecture, 43 | "data_science": self._adapt_to_data_science_architecture, 44 | "mobile": self._adapt_to_mobile_architecture, 45 | "desktop": self._adapt_to_desktop_architecture, 46 | "fullstack": self._adapt_to_fullstack_architecture 47 | } 48 | 49 | strategy = adaptation_strategies.get(project_type, self._adapt_to_generic_architecture) 50 | strategy() 51 | 52 | def _adapt_to_frontend_architecture(self): 53 | """Adapt to frontend architecture design""" 54 | print("🎨 ProjectArchitect: Focusing on component-based frontend architecture") 55 | 56 | def _adapt_to_backend_architecture(self): 57 | """Adapt to backend architecture design""" 58 | print("⚙️ ProjectArchitect: Focusing on API and service architecture") 59 | 60 | def _adapt_to_data_science_architecture(self): 61 | """Adapt to data science architecture design""" 62 | print("📊 ProjectArchitect: Focusing on data pipeline and model architecture") 63 | 64 | def _adapt_to_mobile_architecture(self): 65 | """Adapt to mobile architecture design""" 66 | print("📱 ProjectArchitect: Focusing on mobile app architecture") 67 | 68 | def _adapt_to_desktop_architecture(self): 69 | """Adapt to desktop architecture design""" 70 | print("🖥️ ProjectArchitect: Focusing on desktop application architecture") 71 | 72 | def _adapt_to_fullstack_architecture(self): 73 | """Adapt to fullstack architecture design""" 74 | print("🌐 ProjectArchitect: Focusing on end-to-end system architecture") 75 | 76 | def _adapt_to_generic_architecture(self): 77 | """Adapt to generic architecture design""" 78 | print("🔧 ProjectArchitect: Using generic architecture patterns") 79 | 80 | def design_system_architecture(self, requirements=None): 81 | """Design system architecture - using enhanced quality gate process""" 82 | if requirements is None: 83 | requirements = {"description": self.requirement, "project_type": self.project_type} 84 | 85 | return self.execute_with_quality_gate( 86 | "architecture_design", 87 | self._design_architecture_internal, 88 | requirements 89 | ) 90 | 91 | def _design_architecture_internal(self, requirements): 92 | """Internal architecture design logic""" 93 | # First determine project specification (if not already done) 94 | if not self.project_specification: 95 | spec_coordinator = self.get_tool("ProjectSpecificationCoordinator") 96 | project_spec_result = spec_coordinator.execute( 97 | requirements.get("project_type", self.project_type), 98 | requirements 99 | ) 100 | self.set_project_specification(project_spec_result["project_specification"]) 101 | 102 | # Design architecture based on project specification 103 | architecture = self._design_architecture_for_type( 104 | self.project_specification["type"], 105 | requirements, 106 | self.project_specification 107 | ) 108 | 109 | return { 110 | "architecture": architecture, 111 | "project_specification": self.project_specification, 112 | "design_rationale": self._generate_design_rationale(architecture), 113 | "implementation_guidance": self._generate_implementation_guidance(architecture) 114 | } 115 | 116 | def _design_architecture_for_type(self, project_type, requirements, project_spec): 117 | """Design architecture based on project type""" 118 | architecture_designers = { 119 | "web_frontend": self._design_frontend_architecture, 120 | "web_backend": self._design_backend_architecture, 121 | "data_science": self._design_data_science_architecture, 122 | "mobile": self._design_mobile_architecture, 123 | "desktop": self._design_desktop_architecture, 124 | "fullstack": self._design_fullstack_architecture 125 | } 126 | 127 | designer = architecture_designers.get(project_type, self._design_generic_architecture) 128 | return designer(requirements, project_spec) 129 | 130 | def _design_frontend_architecture(self, requirements, project_spec): 131 | """Design frontend architecture""" 132 | tech_stack = project_spec["technology_stack"] 133 | 134 | return { 135 | "type": "frontend", 136 | "components": [ 137 | {"name": "App", "responsibility": "Main application component", "type": "container"}, 138 | {"name": "Header", "responsibility": "Navigation and branding", "type": "presentational"}, 139 | {"name": "Sidebar", "responsibility": "Secondary navigation", "type": "presentational"}, 140 | {"name": "MainContent", "responsibility": "Primary content display", "type": "container"}, 141 | {"name": "Footer", "responsibility": "Footer information", "type": "presentational"} 142 | ], 143 | "state_management": { 144 | "pattern": "Component State" if "React" in tech_stack.get("frameworks", []) else "Global State", 145 | "tools": tech_stack.get("state_management", ["Context API"]) 146 | }, 147 | "routing": { 148 | "type": "client-side", 149 | "routes": [ 150 | {"path": "/", "component": "Home", "description": "Landing page"}, 151 | {"path": "/dashboard", "component": "Dashboard", "description": "Main dashboard"}, 152 | {"path": "/settings", "component": "Settings", "description": "User settings"} 153 | ] 154 | }, 155 | "data_flow": "Component Props -> State -> UI Updates", 156 | "file_structure": { 157 | "src/": "Source code directory", 158 | "src/components/": "React/Vue components", 159 | "src/styles/": "CSS/SCSS files", 160 | "src/utils/": "Utility functions", 161 | "public/": "Static assets" 162 | } 163 | } 164 | 165 | def _design_backend_architecture(self, requirements, project_spec): 166 | """Design backend architecture""" 167 | tech_stack = project_spec["technology_stack"] 168 | 169 | return { 170 | "type": "backend", 171 | "services": [ 172 | {"name": "UserService", "responsibility": "User management", "endpoints": ["/users", "/auth"]}, 173 | {"name": "DataService", "responsibility": "Data operations", "endpoints": ["/data", "/analytics"]}, 174 | {"name": "NotificationService", "responsibility": "Notifications", "endpoints": ["/notifications"]} 175 | ], 176 | "api": { 177 | "style": "REST", 178 | "version": "v1", 179 | "endpoints": [ 180 | {"method": "GET", "path": "/api/v1/users", "description": "Get all users"}, 181 | {"method": "POST", "path": "/api/v1/users", "description": "Create user"}, 182 | {"method": "GET", "path": "/api/v1/data", "description": "Get data"} 183 | ] 184 | }, 185 | "data_model": { 186 | "entities": [ 187 | {"name": "User", "fields": ["id", "name", "email", "created_at"]}, 188 | {"name": "DataRecord", "fields": ["id", "user_id", "data", "timestamp"]} 189 | ] 190 | }, 191 | "middleware": ["Authentication", "CORS", "Rate Limiting", "Logging"], 192 | "file_structure": { 193 | "app/": "Main application code", 194 | "app/models/": "Data models", 195 | "app/services/": "Business logic services", 196 | "app/controllers/": "Request handlers", 197 | "config/": "Configuration files" 198 | } 199 | } 200 | 201 | def _design_data_science_architecture(self, requirements, project_spec): 202 | """Design data science architecture""" 203 | return { 204 | "type": "data_science", 205 | "pipeline": [ 206 | {"stage": "Data Ingestion", "tools": ["pandas", "requests"], "responsibility": "Collect raw data"}, 207 | {"stage": "Data Cleaning", "tools": ["pandas", "numpy"], "responsibility": "Clean and validate data"}, 208 | {"stage": "Feature Engineering", "tools": ["scikit-learn"], "responsibility": "Create features"}, 209 | {"stage": "Model Training", "tools": ["scikit-learn", "tensorflow"], "responsibility": "Train models"}, 210 | {"stage": "Model Evaluation", "tools": ["scikit-learn", "matplotlib"], "responsibility": "Evaluate performance"}, 211 | {"stage": "Deployment", "tools": ["flask", "docker"], "responsibility": "Deploy model"} 212 | ], 213 | "data_flow": "Raw Data -> Cleaned Data -> Features -> Model -> Predictions", 214 | "file_structure": { 215 | "data/": "Data files (raw, processed)", 216 | "notebooks/": "Jupyter notebooks for exploration", 217 | "src/": "Python modules", 218 | "models/": "Trained model files", 219 | "config/": "Configuration files" 220 | } 221 | } 222 | 223 | def _design_mobile_architecture(self, requirements, project_spec): 224 | """Design mobile architecture""" 225 | return { 226 | "type": "mobile", 227 | "navigation": { 228 | "type": "Stack Navigation", 229 | "screens": [ 230 | {"name": "Home", "description": "Main screen"}, 231 | {"name": "Profile", "description": "User profile"}, 232 | {"name": "Settings", "description": "App settings"} 233 | ] 234 | }, 235 | "components": [ 236 | {"name": "AppNavigator", "responsibility": "Navigation container"}, 237 | {"name": "HomeScreen", "responsibility": "Main application screen"}, 238 | {"name": "CustomButton", "responsibility": "Reusable button component"} 239 | ], 240 | "file_structure": { 241 | "src/screens/": "Screen components", 242 | "src/components/": "Reusable components", 243 | "src/navigation/": "Navigation configuration", 244 | "src/services/": "API and data services" 245 | } 246 | } 247 | 248 | def _design_desktop_architecture(self, requirements, project_spec): 249 | """Design desktop architecture""" 250 | return { 251 | "type": "desktop", 252 | "windows": [ 253 | {"name": "MainWindow", "description": "Primary application window"}, 254 | {"name": "SettingsDialog", "description": "Settings configuration"}, 255 | {"name": "AboutDialog", "description": "About information"} 256 | ], 257 | "components": [ 258 | {"name": "MenuBar", "responsibility": "Application menu"}, 259 | {"name": "StatusBar", "responsibility": "Status information"}, 260 | {"name": "ContentArea", "responsibility": "Main content display"} 261 | ], 262 | "file_structure": { 263 | "src/": "Source code", 264 | "resources/": "Images, icons, etc.", 265 | "config/": "Configuration files" 266 | } 267 | } 268 | 269 | def _design_fullstack_architecture(self, requirements, project_spec): 270 | """Design fullstack architecture""" 271 | frontend_arch = self._design_frontend_architecture(requirements, project_spec) 272 | backend_arch = self._design_backend_architecture(requirements, project_spec) 273 | 274 | return { 275 | "type": "fullstack", 276 | "frontend": frontend_arch, 277 | "backend": backend_arch, 278 | "integration": { 279 | "api_communication": "REST API calls from frontend to backend", 280 | "authentication": "JWT tokens", 281 | "data_synchronization": "Real-time updates via WebSocket" 282 | }, 283 | "deployment": { 284 | "frontend": "Static hosting (Vercel, Netlify)", 285 | "backend": "Container deployment (Docker + K8s)", 286 | "database": "Managed database service" 287 | } 288 | } 289 | 290 | def _design_generic_architecture(self, requirements, project_spec): 291 | """Design generic architecture""" 292 | return { 293 | "type": "generic", 294 | "components": [ 295 | {"name": "Core", "responsibility": "Main application logic"}, 296 | {"name": "Interface", "responsibility": "User interface"}, 297 | {"name": "Data", "responsibility": "Data management"} 298 | ], 299 | "patterns": ["MVC", "Observer", "Factory"], 300 | "file_structure": { 301 | "src/": "Source code", 302 | "docs/": "Documentation", 303 | "tests/": "Test files" 304 | } 305 | } 306 | 307 | def _generate_design_rationale(self, architecture): 308 | """Generate design rationale""" 309 | return { 310 | "architectural_decisions": [ 311 | f"Chose {architecture.get('type', 'generic')} architecture for project requirements", 312 | "Component-based design for maintainability", 313 | "Clear separation of concerns", 314 | "Scalable file structure" 315 | ], 316 | "trade_offs": [ 317 | "Flexibility vs. Simplicity", 318 | "Performance vs. Maintainability", 319 | "Development Speed vs. Code Quality" 320 | ] 321 | } 322 | 323 | def _generate_implementation_guidance(self, architecture): 324 | """Generate implementation guidance""" 325 | return { 326 | "development_phases": [ 327 | "1. Set up project structure", 328 | "2. Implement core components", 329 | "3. Add data layer", 330 | "4. Implement user interface", 331 | "5. Add testing", 332 | "6. Performance optimization" 333 | ], 334 | "best_practices": [ 335 | "Follow naming conventions", 336 | "Write clean, readable code", 337 | "Include comprehensive tests", 338 | "Document API endpoints", 339 | "Use version control effectively" 340 | ] 341 | } 342 | 343 | def design_architecture(self): 344 | """Design the overall project architecture""" 345 | architecture_prompt = f""" 346 | Please design a complete project architecture for a {self.project_type} project. 347 | 348 | Project Requirements: {self.requirement} 349 | 350 | Provide a detailed JSON response with the following structure: 351 | {{ 352 | "project_structure": {{ 353 | "files": [ 354 | {{"path": "index.html", "description": "Main HTML file", "priority": 1}}, 355 | {{"path": "css/style.css", "description": "Main stylesheet", "priority": 2}}, 356 | {{"path": "js/main.js", "description": "Main JavaScript file", "priority": 2}} 357 | ] 358 | }}, 359 | "technology_stack": {{ 360 | "frontend": ["HTML5", "CSS3", "JavaScript"], 361 | "visualization": ["Chart.js", "D3.js"], 362 | "styling": ["Bootstrap", "CSS Grid", "Flexbox"] 363 | }}, 364 | "implementation_phases": [ 365 | {{"phase": 1, "description": "Create basic HTML structure and layout"}}, 366 | {{"phase": 2, "description": "Implement styling and responsive design"}}, 367 | {{"phase": 3, "description": "Add interactive visualizations and functionality"}} 368 | ], 369 | "component_interactions": [ 370 | {{"source": "main.js", "target": "index.html", "description": "Dynamic content updates"}}, 371 | {{"source": "style.css", "target": "index.html", "description": "Visual styling"}} 372 | ] 373 | }} 374 | 375 | For web visualization projects, prioritize creating interactive charts, beautiful UI, and responsive design. 376 | """ 377 | 378 | self.history_message_append(architecture_prompt) 379 | 380 | try: 381 | responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 382 | max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p) 383 | except Exception as e: 384 | print(f"Architecture design failed: {e}") 385 | time.sleep(5) 386 | return "error" 387 | 388 | architecture = responses[0] 389 | self.history_message_append(architecture, "assistant") 390 | 391 | return architecture 392 | 393 | def history_message_append(self, message, role="user"): 394 | self.history_message.append({ 395 | "role": role, 396 | "content": message 397 | }) 398 | -------------------------------------------------------------------------------- /tools/enhanced_tools.py: -------------------------------------------------------------------------------- 1 | """ 2 | Enhanced Tool System for Project-Level Code Generation 3 | Provides powerful tool invocation capabilities to enhance project code generation quality and functionality 4 | """ 5 | 6 | import os 7 | import re 8 | import json 9 | import subprocess 10 | import requests 11 | import ast 12 | import time 13 | from typing import Dict, List, Any, Optional, Tuple 14 | from abc import ABC, abstractmethod 15 | from pathlib import Path 16 | 17 | 18 | class BaseTool(ABC): 19 | """Base tool class""" 20 | 21 | def __init__(self, name: str, description: str): 22 | self.name = name 23 | self.description = description 24 | self.usage_count = 0 25 | self.last_used = None 26 | 27 | @abstractmethod 28 | def execute(self, *args, **kwargs) -> Dict[str, Any]: 29 | """Execute tool function""" 30 | pass 31 | 32 | def log_usage(self): 33 | """Log tool usage""" 34 | self.usage_count += 1 35 | self.last_used = time.time() 36 | 37 | def get_info(self) -> Dict[str, Any]: 38 | """Get tool information""" 39 | return { 40 | "name": self.name, 41 | "description": self.description, 42 | "usage_count": self.usage_count, 43 | "last_used": self.last_used 44 | } 45 | 46 | 47 | class CodeAnalyzer(BaseTool): 48 | """Code analysis tool""" 49 | 50 | def __init__(self): 51 | super().__init__( 52 | "code_analyzer", 53 | "Analyze code quality, complexity and potential issues" 54 | ) 55 | 56 | def execute(self, code: str, language: str = "javascript") -> Dict[str, Any]: 57 | """Analyze code quality""" 58 | self.log_usage() 59 | 60 | result = { 61 | "language": language, 62 | "metrics": {}, 63 | "issues": [], 64 | "suggestions": [] 65 | } 66 | 67 | if language.lower() in ["javascript", "js"]: 68 | result.update(self._analyze_javascript(code)) 69 | elif language.lower() in ["python", "py"]: 70 | result.update(self._analyze_python(code)) 71 | elif language.lower() in ["html"]: 72 | result.update(self._analyze_html(code)) 73 | elif language.lower() in ["css"]: 74 | result.update(self._analyze_css(code)) 75 | 76 | return result 77 | 78 | def _analyze_javascript(self, code: str) -> Dict[str, Any]: 79 | """Analyze JavaScript code""" 80 | metrics = { 81 | "lines_of_code": len(code.splitlines()), 82 | "functions_count": len(re.findall(r'function\s+\w+|=>\s*{|\w+\s*:\s*function', code)), 83 | "classes_count": len(re.findall(r'class\s+\w+', code)), 84 | "complexity_score": self._calculate_complexity(code) 85 | } 86 | 87 | issues = [] 88 | suggestions = [] 89 | 90 | # Check common issues 91 | if 'var ' in code: 92 | issues.append({ 93 | "type": "style", 94 | "message": "Recommend using 'let' or 'const' instead of 'var'", 95 | "severity": "warning" 96 | }) 97 | suggestions.append("Use modern ES6+ syntax, replace var with let/const") 98 | 99 | if '==' in code and '===' not in code: 100 | issues.append({ 101 | "type": "quality", 102 | "message": "Recommend using strict equality '===' instead of '=='", 103 | "severity": "warning" 104 | }) 105 | 106 | if 'console.log' in code: 107 | issues.append({ 108 | "type": "production", 109 | "message": "Should remove console.log in production code", 110 | "severity": "info" 111 | }) 112 | 113 | # Check for modern features usage 114 | modern_features = ['async', 'await', '=>', 'const', 'let', 'destructuring'] 115 | used_features = [f for f in modern_features if f in code] 116 | if used_features: 117 | suggestions.append(f"Uses modern JavaScript features: {', '.join(used_features)}") 118 | 119 | return { 120 | "metrics": metrics, 121 | "issues": issues, 122 | "suggestions": suggestions 123 | } 124 | 125 | def _analyze_python(self, code: str) -> Dict[str, Any]: 126 | """Analyze Python code""" 127 | try: 128 | tree = ast.parse(code) 129 | 130 | metrics = { 131 | "lines_of_code": len(code.splitlines()), 132 | "functions_count": len([n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)]), 133 | "classes_count": len([n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]), 134 | "imports_count": len([n for n in ast.walk(tree) if isinstance(n, (ast.Import, ast.ImportFrom))]) 135 | } 136 | 137 | return { 138 | "metrics": metrics, 139 | "issues": [], 140 | "suggestions": ["Python code syntax is correct"] 141 | } 142 | except SyntaxError as e: 143 | return { 144 | "metrics": {"lines_of_code": len(code.splitlines())}, 145 | "issues": [{ 146 | "type": "syntax", 147 | "message": f"Syntax error: {str(e)}", 148 | "severity": "error" 149 | }], 150 | "suggestions": ["Fix syntax errors"] 151 | } 152 | 153 | def _analyze_html(self, code: str) -> Dict[str, Any]: 154 | """Analyze HTML code""" 155 | metrics = { 156 | "lines_of_code": len(code.splitlines()), 157 | "elements_count": len(re.findall(r'<\w+', code)), 158 | "semantic_elements": len(re.findall(r'<(header|nav|main|section|article|aside|footer)', code)), 159 | "images_count": len(re.findall(r'<img', code)) 160 | } 161 | 162 | issues = [] 163 | suggestions = [] 164 | 165 | # Check semantics 166 | if metrics["semantic_elements"] > 0: 167 | suggestions.append("Uses HTML5 semantic elements") 168 | else: 169 | issues.append({ 170 | "type": "accessibility", 171 | "message": "Recommend using HTML5 semantic elements", 172 | "severity": "warning" 173 | }) 174 | 175 | # Check accessibility 176 | if 'alt=' not in code and '<img' in code: 177 | issues.append({ 178 | "type": "accessibility", 179 | "message": "Images missing alt attributes", 180 | "severity": "warning" 181 | }) 182 | 183 | if 'aria-' in code: 184 | suggestions.append("Uses ARIA attributes for better accessibility") 185 | 186 | return { 187 | "metrics": metrics, 188 | "issues": issues, 189 | "suggestions": suggestions 190 | } 191 | 192 | def _analyze_css(self, code: str) -> Dict[str, Any]: 193 | """Analyze CSS code""" 194 | metrics = { 195 | "lines_of_code": len(code.splitlines()), 196 | "rules_count": len(re.findall(r'{[^}]*}', code)), 197 | "custom_properties": len(re.findall(r'--[\w-]+:', code)), 198 | "media_queries": len(re.findall(r'@media', code)) 199 | } 200 | 201 | issues = [] 202 | suggestions = [] 203 | 204 | # Check modern CSS features 205 | if metrics["custom_properties"] > 0: 206 | suggestions.append("Uses CSS custom properties (variables)") 207 | 208 | if 'grid' in code or 'flexbox' in code: 209 | suggestions.append("Uses modern CSS layout (Grid/Flexbox)") 210 | 211 | if metrics["media_queries"] > 0: 212 | suggestions.append("Implements responsive design") 213 | 214 | return { 215 | "metrics": metrics, 216 | "issues": issues, 217 | "suggestions": suggestions 218 | } 219 | 220 | def _calculate_complexity(self, code: str) -> int: 221 | """Calculate code complexity""" 222 | complexity = 1 # Base complexity 223 | 224 | # Add complexity for decision points 225 | decision_points = ['if', 'else', 'while', 'for', 'case', 'catch', '&&', '||', '?'] 226 | for point in decision_points: 227 | complexity += len(re.findall(rf'\b{point}\b', code)) 228 | 229 | return complexity 230 | 231 | 232 | class FileManager(BaseTool): 233 | """File system management tool""" 234 | 235 | def __init__(self): 236 | super().__init__( 237 | "file_manager", 238 | "Manage project file structure and operations" 239 | ) 240 | 241 | def execute(self, action: str, **kwargs) -> Dict[str, Any]: 242 | """Execute file operations""" 243 | self.log_usage() 244 | 245 | try: 246 | if action == "create_structure": 247 | return self._create_project_structure(kwargs.get("structure", {})) 248 | elif action == "validate_structure": 249 | return self._validate_structure(kwargs.get("path", ".")) 250 | elif action == "optimize_structure": 251 | return self._optimize_structure(kwargs.get("files", {})) 252 | elif action == "generate_tree": 253 | return self._generate_directory_tree(kwargs.get("path", ".")) 254 | else: 255 | return {"error": f"Unknown operation: {action}"} 256 | except Exception as e: 257 | return {"error": str(e)} 258 | 259 | def _create_project_structure(self, structure: Dict[str, str]) -> Dict[str, Any]: 260 | """Create project file structure""" 261 | created_dirs = [] 262 | created_files = [] 263 | 264 | for path, content in structure.items(): 265 | full_path = Path(path) 266 | 267 | # Create directories 268 | if full_path.suffix == "": # Directory 269 | full_path.mkdir(parents=True, exist_ok=True) 270 | created_dirs.append(str(full_path)) 271 | else: # File 272 | full_path.parent.mkdir(parents=True, exist_ok=True) 273 | with open(full_path, 'w', encoding='utf-8') as f: 274 | f.write(content if content else "") 275 | created_files.append(str(full_path)) 276 | 277 | return { 278 | "success": True, 279 | "created_directories": created_dirs, 280 | "created_files": created_files 281 | } 282 | 283 | def _validate_structure(self, path: str) -> Dict[str, Any]: 284 | """Validate project structure""" 285 | path_obj = Path(path) 286 | if not path_obj.exists(): 287 | return {"error": "Path does not exist"} 288 | 289 | structure_info = { 290 | "is_valid_project": False, 291 | "has_index_html": False, 292 | "has_css_folder": False, 293 | "has_js_folder": False, 294 | "has_manifest": False, 295 | "suggestions": [] 296 | } 297 | 298 | # Check key files and directories 299 | files_and_dirs = list(path_obj.iterdir()) 300 | file_names = [f.name for f in files_and_dirs] 301 | 302 | structure_info["has_index_html"] = "index.html" in file_names 303 | structure_info["has_css_folder"] = any(f.name == "css" and f.is_dir() for f in files_and_dirs) 304 | structure_info["has_js_folder"] = any(f.name == "js" and f.is_dir() for f in files_and_dirs) 305 | structure_info["has_manifest"] = "manifest.json" in file_names 306 | 307 | # Generate suggestions 308 | if not structure_info["has_index_html"]: 309 | structure_info["suggestions"].append("Recommend adding index.html as the main page") 310 | 311 | if not structure_info["has_css_folder"]: 312 | structure_info["suggestions"].append("Recommend creating css folder to organize style files") 313 | 314 | if not structure_info["has_js_folder"]: 315 | structure_info["suggestions"].append("Recommend creating js folder to organize script files") 316 | 317 | if not structure_info["has_manifest"]: 318 | structure_info["suggestions"].append("Recommend adding manifest.json for PWA support") 319 | 320 | structure_info["is_valid_project"] = all([ 321 | structure_info["has_index_html"], 322 | structure_info["has_css_folder"], 323 | structure_info["has_js_folder"] 324 | ]) 325 | 326 | return structure_info 327 | 328 | def _optimize_structure(self, files: Dict[str, str]) -> Dict[str, Any]: 329 | """Optimize file structure""" 330 | optimized_structure = {} 331 | recommendations = [] 332 | 333 | # Reorganize by file type 334 | for file_path, content in files.items(): 335 | path_obj = Path(file_path) 336 | extension = path_obj.suffix.lower() 337 | 338 | if extension == '.html': 339 | if path_obj.name == 'index.html': 340 | optimized_structure['index.html'] = content 341 | else: 342 | optimized_structure[f'pages/{path_obj.name}'] = content 343 | elif extension == '.css': 344 | optimized_structure[f'css/{path_obj.name}'] = content 345 | elif extension in ['.js', '.ts']: 346 | optimized_structure[f'js/{path_obj.name}'] = content 347 | elif extension in ['.png', '.jpg', '.jpeg', '.svg', '.ico']: 348 | optimized_structure[f'assets/images/{path_obj.name}'] = content 349 | elif extension == '.json': 350 | if 'manifest' in path_obj.name: 351 | optimized_structure['manifest.json'] = content 352 | else: 353 | optimized_structure[f'data/{path_obj.name}'] = content 354 | else: 355 | optimized_structure[file_path] = content 356 | 357 | # Generate optimization suggestions 358 | if len([f for f in files.keys() if f.endswith('.css')]) > 3: 359 | recommendations.append("Consider splitting CSS files further into main.css, components.css, utilities.css") 360 | 361 | if len([f for f in files.keys() if f.endswith('.js')]) > 5: 362 | recommendations.append("Consider using modular JavaScript, split files by functionality") 363 | 364 | return { 365 | "optimized_structure": optimized_structure, 366 | "recommendations": recommendations, 367 | "original_files_count": len(files), 368 | "optimized_files_count": len(optimized_structure) 369 | } 370 | 371 | def _generate_directory_tree(self, path: str) -> Dict[str, Any]: 372 | """Generate directory tree""" 373 | def build_tree(dir_path: Path, prefix: str = "") -> List[str]: 374 | tree_lines = [] 375 | if not dir_path.is_dir(): 376 | return tree_lines 377 | 378 | items = sorted(dir_path.iterdir(), key=lambda x: (not x.is_dir(), x.name)) 379 | 380 | for i, item in enumerate(items): 381 | is_last = i == len(items) - 1 382 | current_prefix = "└── " if is_last else "├── " 383 | tree_lines.append(f"{prefix}{current_prefix}{item.name}") 384 | 385 | if item.is_dir(): 386 | extension_prefix = " " if is_last else "│ " 387 | tree_lines.extend(build_tree(item, prefix + extension_prefix)) 388 | 389 | return tree_lines 390 | 391 | path_obj = Path(path) 392 | if not path_obj.exists(): 393 | return {"error": "Path does not exist"} 394 | 395 | tree = build_tree(path_obj) 396 | 397 | return { 398 | "tree": tree, 399 | "tree_string": "\n".join([path_obj.name + "/"] + tree) 400 | } 401 | 402 | 403 | class QualityChecker(BaseTool): 404 | """Code quality checking tool""" 405 | 406 | def __init__(self): 407 | super().__init__( 408 | "quality_checker", 409 | "Check code quality, security and best practices" 410 | ) 411 | 412 | def execute(self, files: Dict[str, str], project_type: str = "web_visualization") -> Dict[str, Any]: 413 | """Execute quality check""" 414 | self.log_usage() 415 | 416 | results = { 417 | "overall_score": 0, 418 | "file_scores": {}, 419 | "security_issues": [], 420 | "performance_issues": [], 421 | "best_practices": [], 422 | "accessibility_score": 0, 423 | "recommendations": [] 424 | } 425 | 426 | total_score = 0 427 | file_count = 0 428 | 429 | for file_path, content in files.items(): 430 | file_result = self._check_file_quality(file_path, content) 431 | results["file_scores"][file_path] = file_result 432 | total_score += file_result.get("score", 0) 433 | file_count += 1 434 | 435 | # Collect various issues 436 | results["security_issues"].extend(file_result.get("security_issues", [])) 437 | results["performance_issues"].extend(file_result.get("performance_issues", [])) 438 | results["best_practices"].extend(file_result.get("best_practices", [])) 439 | 440 | # Calculate overall score 441 | results["overall_score"] = total_score / file_count if file_count > 0 else 0 442 | 443 | # Calculate accessibility score 444 | results["accessibility_score"] = self._calculate_accessibility_score(files) 445 | 446 | # Generate recommendations 447 | results["recommendations"] = self._generate_quality_recommendations(results) 448 | 449 | return results 450 | 451 | def _check_file_quality(self, file_path: str, content: str) -> Dict[str, Any]: 452 | """Check individual file quality""" 453 | path_obj = Path(file_path) 454 | extension = path_obj.suffix.lower() 455 | 456 | if extension == '.html': 457 | return self._check_html_quality(content) 458 | elif extension == '.css': 459 | return self._check_css_quality(content) 460 | elif extension in ['.js', '.ts']: 461 | return self._check_js_quality(content) 462 | else: 463 | return {"score": 70, "issues": [], "suggestions": []} 464 | 465 | def _check_html_quality(self, content: str) -> Dict[str, Any]: 466 | """Check HTML quality""" 467 | score = 100 468 | issues = [] 469 | best_practices = [] 470 | security_issues = [] 471 | performance_issues = [] 472 | 473 | # Check DOCTYPE 474 | if '<!DOCTYPE html>' not in content: 475 | score -= 10 476 | issues.append("Missing HTML5 DOCTYPE declaration") 477 | else: 478 | best_practices.append("Uses HTML5 DOCTYPE") 479 | 480 | # Check language declaration 481 | if 'lang=' not in content: 482 | score -= 5 483 | issues.append("Missing language declaration (lang attribute)") 484 | 485 | # Check meta viewport 486 | if 'viewport' not in content: 487 | score -= 10 488 | issues.append("Missing viewport meta tag") 489 | 490 | # Check semantics 491 | semantic_elements = ['header', 'nav', 'main', 'section', 'article', 'aside', 'footer'] 492 | used_semantic = [elem for elem in semantic_elements if f'<{elem}' in content] 493 | if len(used_semantic) >= 3: 494 | best_practices.append("Uses semantic HTML5 elements") 495 | elif len(used_semantic) > 0: 496 | best_practices.append(f"Uses some semantic elements: {', '.join(used_semantic)}") 497 | else: 498 | score -= 15 499 | issues.append("Recommend using semantic HTML5 elements") 500 | 501 | # Check accessibility 502 | if 'aria-' in content: 503 | best_practices.append("Uses ARIA attributes") 504 | score += 5 505 | 506 | if 'alt=' in content: 507 | best_practices.append("Images include alt attributes") 508 | elif '<img' in content: 509 | score -= 10 510 | issues.append("Images missing alt attributes") 511 | 512 | # Check security 513 | if 'javascript:' in content: 514 | security_issues.append("Avoid using javascript: protocol") 515 | score -= 20 516 | 517 | # Check performance 518 | if 'preload' in content or 'preconnect' in content: 519 | performance_issues.append("Uses resource preloading optimization") 520 | score += 5 521 | 522 | return { 523 | "score": max(0, score), 524 | "issues": issues, 525 | "best_practices": best_practices, 526 | "security_issues": security_issues, 527 | "performance_issues": performance_issues 528 | } 529 | 530 | def _check_css_quality(self, content: str) -> Dict[str, Any]: 531 | """Check CSS quality""" 532 | score = 100 533 | issues = [] 534 | best_practices = [] 535 | performance_issues = [] 536 | 537 | # Check CSS custom properties 538 | if '--' in content and ':root' in content: 539 | best_practices.append("Uses CSS custom properties (variables)") 540 | score += 10 541 | 542 | # Check modern layout 543 | if 'display: grid' in content or 'display: flex' in content: 544 | best_practices.append("Uses modern CSS layout") 545 | score += 5 546 | 547 | # Check responsive design 548 | if '@media' in content: 549 | best_practices.append("Implements responsive design") 550 | score += 10 551 | 552 | # Check performance issues 553 | if '*' in content and 'box-sizing' in content: 554 | performance_issues.append("Uses universal selector for box-sizing reset") 555 | 556 | # Check maintainability 557 | if len(content.splitlines()) > 500: 558 | issues.append("CSS file too long, recommend splitting into multiple files") 559 | score -= 10 560 | 561 | return { 562 | "score": max(0, score), 563 | "issues": issues, 564 | "best_practices": best_practices, 565 | "performance_issues": performance_issues 566 | } 567 | 568 | def _check_js_quality(self, content: str) -> Dict[str, Any]: 569 | """Check JavaScript quality""" 570 | score = 100 571 | issues = [] 572 | best_practices = [] 573 | security_issues = [] 574 | performance_issues = [] 575 | 576 | # Check modern JavaScript features 577 | modern_features = ['const ', 'let ', '=>', 'async ', 'await ', '...'] 578 | used_features = [f.strip() for f in modern_features if f in content] 579 | if len(used_features) >= 3: 580 | best_practices.append(f"Uses modern ES6+ features: {', '.join(used_features)}") 581 | score += 10 582 | 583 | # Check bad practices 584 | if 'var ' in content: 585 | issues.append("Recommend using let/const instead of var") 586 | score -= 5 587 | 588 | if 'eval(' in content: 589 | security_issues.append("Avoid using eval() function") 590 | score -= 20 591 | 592 | if '==' in content and '===' not in content: 593 | issues.append("Recommend using strict equality operator (===)") 594 | score -= 5 595 | 596 | # Check error handling 597 | if 'try' in content and 'catch' in content: 598 | best_practices.append("Implements error handling") 599 | score += 5 600 | 601 | # Check performance 602 | if 'addEventListener' in content: 603 | best_practices.append("Uses event listeners") 604 | 605 | if 'querySelector' in content: 606 | best_practices.append("Uses modern DOM query methods") 607 | 608 | return { 609 | "score": max(0, score), 610 | "issues": issues, 611 | "best_practices": best_practices, 612 | "security_issues": security_issues, 613 | "performance_issues": performance_issues 614 | } 615 | 616 | def _calculate_accessibility_score(self, files: Dict[str, str]) -> int: 617 | """Calculate accessibility score""" 618 | score = 0 619 | total_checks = 0 620 | 621 | for file_path, content in files.items(): 622 | if file_path.endswith('.html'): 623 | total_checks += 10 624 | 625 | # ARIA attributes 626 | if 'aria-' in content: 627 | score += 2 628 | 629 | # Semantic elements 630 | semantic_count = len(re.findall(r'<(header|nav|main|section|article|aside|footer)', content)) 631 | if semantic_count >= 3: 632 | score += 2 633 | 634 | # Image alt attributes 635 | if '<img' in content: 636 | if 'alt=' in content: 637 | score += 1 638 | else: 639 | score += 1 # No images also passes 640 | 641 | # Form labels 642 | if '<input' in content: 643 | if '<label' in content: 644 | score += 1 645 | else: 646 | score += 1 647 | 648 | # Skip links 649 | if 'skip' in content.lower(): 650 | score += 1 651 | 652 | # Role attributes 653 | if 'role=' in content: 654 | score += 1 655 | 656 | # Color contrast indication 657 | if 'color:' in content and '#' in content: 658 | score += 1 # Simplified check 659 | 660 | # Keyboard navigation 661 | if 'tabindex' in content: 662 | score += 1 663 | 664 | return int((score / max(total_checks, 1)) * 100) if total_checks > 0 else 80 665 | 666 | def _generate_quality_recommendations(self, results: Dict[str, Any]) -> List[str]: 667 | """Generate quality improvement recommendations""" 668 | recommendations = [] 669 | 670 | overall_score = results["overall_score"] 671 | 672 | if overall_score < 70: 673 | recommendations.append("Overall code quality needs improvement, focus on basic standards") 674 | elif overall_score < 85: 675 | recommendations.append("Code quality is good, can further optimize performance and accessibility") 676 | else: 677 | recommendations.append("Code quality is excellent, continue maintaining best practices") 678 | 679 | # Security recommendations 680 | if results["security_issues"]: 681 | recommendations.append("Fix discovered security issues to ensure application security") 682 | 683 | # Performance recommendations 684 | if results["performance_issues"]: 685 | recommendations.append("Optimize performance-related issues to improve user experience") 686 | 687 | # Accessibility recommendations 688 | accessibility_score = results["accessibility_score"] 689 | if accessibility_score < 70: 690 | recommendations.append("Significantly improve accessibility, add ARIA attributes and semantic tags") 691 | elif accessibility_score < 90: 692 | recommendations.append("Further improve accessibility features") 693 | else: 694 | recommendations.append("Excellent accessibility performance") 695 | 696 | return recommendations 697 | 698 | 699 | class ToolOrchestrator: 700 | """Tool orchestrator - manage and coordinate all tool usage""" 701 | 702 | def __init__(self): 703 | self.tools = { 704 | "code_analyzer": CodeAnalyzer(), 705 | "file_manager": FileManager(), 706 | "quality_checker": QualityChecker() 707 | } 708 | self.execution_history = [] 709 | 710 | def get_available_tools(self) -> Dict[str, str]: 711 | """Get available tools list""" 712 | return {name: tool.description for name, tool in self.tools.items()} 713 | 714 | def get_tool(self, tool_name: str): 715 | """Get specified tool instance""" 716 | return self.tools.get(tool_name) 717 | 718 | def execute_tool(self, tool_name: str, *args, **kwargs) -> Dict[str, Any]: 719 | """Execute specified tool""" 720 | if tool_name not in self.tools: 721 | return {"error": f"Tool {tool_name} does not exist"} 722 | 723 | try: 724 | result = self.tools[tool_name].execute(*args, **kwargs) 725 | 726 | # Record execution history 727 | self.execution_history.append({ 728 | "tool": tool_name, 729 | "timestamp": time.time(), 730 | "args": args, 731 | "kwargs": kwargs, 732 | "success": "error" not in result 733 | }) 734 | 735 | return result 736 | except Exception as e: 737 | error_result = {"error": f"Tool execution failed: {str(e)}"} 738 | self.execution_history.append({ 739 | "tool": tool_name, 740 | "timestamp": time.time(), 741 | "args": args, 742 | "kwargs": kwargs, 743 | "success": False, 744 | "error": str(e) 745 | }) 746 | return error_result 747 | 748 | def get_tool_usage_stats(self) -> Dict[str, Any]: 749 | """Get tool usage statistics""" 750 | stats = {} 751 | for name, tool in self.tools.items(): 752 | stats[name] = tool.get_info() 753 | 754 | return { 755 | "tools": stats, 756 | "total_executions": len(self.execution_history), 757 | "recent_executions": self.execution_history[-10:] # Last 10 executions 758 | } 759 | 760 | def recommend_tools(self, context: str) -> List[str]: 761 | """Recommend suitable tools based on context""" 762 | recommendations = [] 763 | 764 | context_lower = context.lower() 765 | 766 | if any(keyword in context_lower for keyword in ["analyze", "check", "quality", "complexity"]): 767 | recommendations.append("code_analyzer") 768 | 769 | if any(keyword in context_lower for keyword in ["file", "structure", "directory", "organize"]): 770 | recommendations.append("file_manager") 771 | 772 | if any(keyword in context_lower for keyword in ["quality", "security", "performance", "best practices"]): 773 | recommendations.append("quality_checker") 774 | 775 | return recommendations if recommendations else ["code_analyzer", "file_manager", "quality_checker"] 776 | 777 | def generate_report(self) -> Dict[str, Any]: 778 | """Generate tool usage report""" 779 | report = { 780 | "timestamp": time.time(), 781 | "tools_stats": self.get_tool_usage_stats(), 782 | "execution_summary": { 783 | "total_executions": len(self.execution_history), 784 | "successful_executions": len([h for h in self.execution_history if h["success"]]), 785 | "failed_executions": len([h for h in self.execution_history if not h["success"]]), 786 | }, 787 | "most_used_tools": self._get_most_used_tools(), 788 | "recommendations": self._generate_usage_recommendations() 789 | } 790 | return report 791 | 792 | def _get_most_used_tools(self) -> List[Tuple[str, int]]: 793 | """Get most used tools""" 794 | usage_count = {} 795 | for tool_name, tool in self.tools.items(): 796 | usage_count[tool_name] = tool.usage_count 797 | return sorted(usage_count.items(), key=lambda x: x[1], reverse=True) 798 | 799 | def _generate_usage_recommendations(self) -> List[str]: 800 | """Generate usage recommendations""" 801 | recommendations = [] 802 | total_executions = len(self.execution_history) 803 | 804 | if total_executions > 0: 805 | successful_rate = len([h for h in self.execution_history if h["success"]]) / total_executions 806 | if successful_rate < 0.8: 807 | recommendations.append("Tool execution success rate is low, recommend checking input parameters") 808 | else: 809 | recommendations.append("Tool usage is effective, continue maintaining") 810 | 811 | most_used = self._get_most_used_tools() 812 | if most_used and most_used[0][1] > total_executions * 0.6: 813 | recommendations.append(f"Over-reliance on {most_used[0][0]} tool, recommend balanced use of other tools") 814 | 815 | return recommendations 816 | 817 | 818 | # Global tool orchestrator instance 819 | global_tool_orchestrator = ToolOrchestrator() --------------------------------------------------------------------------------