├── core
    ├── __init__.py
    ├── interface.py
    └── backend.py
├── evaluate.sh
├── run.sh
├── roles
    ├── __init__.py
    ├── instruction.py
    ├── analyst.py
    ├── tester.py
    ├── rule_descriptions_actc.py
    ├── rule_descriptions_act.py
    ├── coder.py
    ├── project_tester.py
    ├── ui_designer.py
    ├── project_roles.py
    ├── web_visualization_specialist.py
    ├── enhanced_role.py
    └── project_architect.py
├── tools
    ├── __init__.py
    ├── global_tool_orchestrator.py
    └── enhanced_tools.py
├── LICENSE
├── README.md
├── evaluate
    ├── all_evaluate.py
    ├── execute
    │   ├── execution.py
    │   └── _execution.py
    └── evaluation.py
├── run_project.sh
├── project_main.py
├── utils.py
├── main.py
├── session.py
└── project_session.py


/core/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluate.sh:
--------------------------------------------------------------------------------
1 | python evaluate/all_evaluate.py --input_path humaneval_output_0301.jsonl
2 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | python main.py --dataset humaneval --signature --model gpt-3.5-turbo --output_path humaneval_output.jsonl
2 | 


--------------------------------------------------------------------------------
/roles/__init__.py:
--------------------------------------------------------------------------------
1 | from .analyst import Analyst
2 | from .coder import Coder
3 | from .tester import Tester
4 | # from .reviewer import Reviewer
5 | 
6 | 


--------------------------------------------------------------------------------
/roles/instruction.py:
--------------------------------------------------------------------------------
1 | INSTRUCTPLAN = "The plan from the requirement analyst is as following:\n{report}"
2 | INSTRUCTREPORT = "The report from the tester is as following:\n{report}"
3 | INSTRUCTCODE = "Please implement the following code. Use ```python to put the Python code in markdown quotes:\n{requirement}"
4 | INSTRUCTEST = "The code provided by developer is as follows:\n{code}\n"
5 | 
6 | 


--------------------------------------------------------------------------------
/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | # Simple tools module for project mode
 2 | from .global_tool_orchestrator import GlobalToolOrchestrator
 3 | from .simple_tools import (
 4 |     CodeAnalyzer, FileManager, QualityChecker, 
 5 |     APIIntegrationTool, AutomatedTester,
 6 |     code_analyzer, file_manager, quality_checker,
 7 |     api_integration_tool, automated_tester
 8 | )
 9 | 
10 | # Create global instance
11 | global_tool_orchestrator = GlobalToolOrchestrator()
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 YihongDong
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/roles/analyst.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import copy
 3 | import json
 4 | import argparse
 5 | import tqdm
 6 | import time
 7 | 
 8 | from core import interface
 9 | from utils import code_truncate, construct_system_message
10 | 
11 | 
12 | class Analyst(object):
13 |     def __init__(self, TEAM, ANALYST, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512,
14 |                                 temperature=0.0, top_p=0.95):
15 |         self.model = model
16 |         self.majority = majority
17 |         self.max_tokens = max_tokens
18 |         self.temperature = temperature
19 |         self.top_p = top_p
20 |         self.history_message = []
21 | 
22 |         self.itf = interface.ProgramInterface(
23 |             stop='',
24 |             verbose=False,
25 |             model = self.model,
26 |         )
27 | 
28 |         system_message = construct_system_message(requirement, ANALYST, TEAM)
29 |         self.history_message_append(system_message)
30 | 
31 | 
32 |     def analyze(self):
33 |         try:
34 |             responses = self.itf.run(prompt=self.history_message, majority_at = self.majority, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
35 |         except Exception as e:
36 |             print(e)
37 |             print("analyze fail")
38 |             time.sleep(5)
39 |             return "error"
40 | 
41 |         plan = responses[0]
42 | 
43 |         self.history_message_append(plan, "assistant")
44 |     
45 |         return plan
46 |     
47 |     def history_message_append(self, system_message, role="user"):
48 |         self.history_message.append({
49 |             "role": role,
50 |             "content": system_message
51 |         })
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/roles/tester.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import copy
 3 | import json
 4 | import argparse
 5 | import tqdm
 6 | import time
 7 | 
 8 | from core import interface
 9 | from utils import code_truncate, construct_system_message
10 | from roles.instruction import INSTRUCTEST
11 | 
12 | 
13 | class Tester(object):
14 |     def __init__(self, TEAM, TESTER, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512,
15 |                                 temperature=0.0, top_p=0.95):
16 |         self.model = model
17 |         self.majority = majority
18 |         self.max_tokens = max_tokens
19 |         self.temperature = temperature
20 |         self.top_p = top_p
21 |         self.history_message = []
22 | 
23 |         self.itf = interface.ProgramInterface(
24 |             stop='',
25 |             verbose=False,
26 |             model = self.model,
27 |         )
28 | 
29 |         system_message = construct_system_message(requirement, TESTER, TEAM)
30 |         self.history_message_append(system_message)
31 |     
32 | 
33 |     def test(self, code): 
34 |         instruction = INSTRUCTEST.format(code=code)
35 |         self.history_message.append({
36 |             "role": "user",
37 |             "content": instruction
38 |         })
39 | 
40 |         try:
41 |             responses = self.itf.run(prompt=self.history_message, majority_at = self.majority, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
42 |         except Exception as e:
43 |             print(e)
44 |             print("test fail")
45 |             time.sleep(5)
46 |             return "error"
47 | 
48 |         report = responses[0]
49 | 
50 |         self.history_message_append(report, "assistant")
51 | 
52 |         return report
53 |     
54 |     def history_message_append(self, system_message, role="user"):
55 |         self.history_message.append({
56 |             "role": role,
57 |             "content": system_message
58 |         })
59 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Self-collaboration Code Generation via ChatGPT
 2 | [![arXiv](https://img.shields.io/badge/arXiv-2304.07590-b31b1b.svg)](https://arxiv.org/abs/2304.07590)
 3 | 
 4 | The first LLM-based agent for (Function-Level and Project-Level) code generation in software engineering, released in April 2023, has been accepted to TOSEM.
 5 | 
 6 | ### Function-Level Generation 
 7 | ```bash
 8 | # Generate function-level code
 9 | bash run.sh
10 | # Evaluate results
11 | bash evaluate.sh
12 | ```
13 | 
14 | ### Project-Level Generation
15 | ```bash
16 | # Generate project-level code
17 | bash run_project.sh
18 | 
19 | ```
20 | 
21 | #### 📋 Usage Examples
22 | 
23 | ##### 1. Portfolio Website
24 | ```bash
25 | python main.py --mode project \
26 |     --project_type web_visualization \
27 |     --requirement "Create a personal portfolio with project showcase and contact form" \
28 |     --output_dir "my_portfolio"
29 | ```
30 | 
31 | ##### 2. Interactive Dashboard
32 | ```bash
33 | python main.py --mode project \
34 |     --project_type web_visualization \
35 |     --requirement "Create a sales analytics dashboard with charts, filters, and real-time updates" \
36 |     --output_dir "sales_dashboard"
37 | ```
38 | 
39 | ##### 3. Data Visualization App
40 | ```bash
41 | python main.py --mode project \
42 |     --project_type web_visualization \
43 |     --requirement "Create an interactive data explorer with multiple chart types" \
44 |     --output_dir "data_explorer"
45 | ```
46 | 
47 | ### Citation
48 | ```
49 | @article{Self-collaboration,
50 |   author={Dong, Yihong and Jiang, Xue and Jin, Zhi and Li, Ge},
51 |   title        = {Self-collaboration Code Generation via ChatGPT},
52 |   journal      = {{ACM} Trans. Softw. Eng. Methodol.},
53 |   volume       = {33},
54 |   number       = {7},
55 |   pages        = {189:1--189:38},
56 |   year         = {2024}
57 |   keywords     = {Code generation, large language models, multi-agent collaboration, software development, software engineering}
58 | }
59 | ```
60 | 


--------------------------------------------------------------------------------
/roles/rule_descriptions_actc.py:
--------------------------------------------------------------------------------
 1 | ANALYST = '''I want you to act as a requirement analyst on our development team. Given a user requirement, your task is to analyze, decompose, and develop a high-level and concise plan to guide our developer in writing programs. The plan should include the following information:
 2 | 1. Decompose the requirement into several easy-to-solve subproblems that can be more easily implemented by the developer.
 3 | 2. Develop a high-level plan that outlines the major steps of the program.
 4 | Remember, you only need to provide the concise plan in json.
 5 | '''
 6 | 
 7 | PYTHON_DEVELOPER = '''I want you to act as a Python developer on our development team. You will receive plans from a requirement analyst or test reports from a tester. Your job is split into two parts:
 8 | 1. If you receive a plan from a requirement analyst, write code in Python that meets the requirement following the plan. Ensure that the code you write is efficient, readable, and follows best practices.
 9 | 2. If you receive a test report from a tester, write the fixed or improved code based on the content of the report. Ensure that any changes made to the code do not introduce new bugs or negatively impact the performance of the code.
10 | Remember, you only need to provide the code in Python and do not need to explain the code you wrote.
11 | '''
12 | 
13 | TESTER = '''I want you to act as a tester on our development team. You will receive the code written by the developer, and your job is as follows:
14 | 1. Write the test code that starts with "def check(candidate):" and candidate is a 'function' object.
15 | 2. Call candidate with different inputs (up to five) that starts with "print", and do not write assert statements.
16 | Remember, you only need to provide the test code in Python and avoid using assert statements.
17 | '''
18 | 
19 | TEAM = '''There is a development team that includes a requirement analyst, a Python developer, and a tester. The team needs to develop programs that satisfy the requirement of the users. The different roles have different divisions of labor and need to cooperate with each others.
20 | '''
21 | 


--------------------------------------------------------------------------------
/core/interface.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import signal
 3 | from contextlib import redirect_stdout
 4 | from typing import Any, Callable, List, Optional
 5 | from collections import Counter
 6 | 
 7 | from .backend import call_chatgpt
 8 | 
 9 | 
10 | class timeout:
11 |     def __init__(self, seconds=1, error_message='Timeout'):
12 |         self.seconds = seconds
13 |         self.error_message = error_message
14 |     def timeout_handler(self, signum, frame):
15 |         raise TimeoutError(self.error_message)
16 |     def __enter__(self):
17 |         signal.signal(signal.SIGALRM, self.timeout_handler)
18 |         signal.alarm(self.seconds)
19 |     def __exit__(self, type, value, traceback):
20 |         signal.alarm(0)
21 | 
22 | class ProgramInterface:
23 |     
24 |     def __init__(
25 |         self,
26 |         model: str = 'code-davinci-002',
27 |         stop: str = '\n\n',
28 |         get_answer_symbol: Optional[str] = None,
29 |         get_answer_expr: Optional[str] = None,
30 |         get_answer_from_stdout: bool = False,
31 |         verbose: bool = False
32 |     ) -> None:
33 | 
34 |         self.model = model
35 |         self.history = []
36 |         self.stop = stop
37 |         self.answer_symbol = get_answer_symbol
38 |         self.answer_expr = get_answer_expr
39 |         self.get_answer_from_stdout = get_answer_from_stdout
40 |         self.verbose = verbose
41 |         
42 |     def clear_history(self):
43 |         self.history = []
44 |     
45 |     def process_generation_to_code(self, gens: str):
46 |         return [g.split('\n') for g in gens]
47 |     
48 |     def generate(self, prompt: str, temperature: float =0.0, top_p: float =1.0, 
49 |             max_tokens: int =512, majority_at: int =None, echo: bool =False, return_logprobs: bool =False):
50 | 
51 |         if 'davinci' not in self.model:
52 |             gens = call_chatgpt(prompt, model=self.model, stop=self.stop, 
53 |                 temperature=temperature, top_p=top_p, max_tokens=max_tokens, echo=echo, majority_at=majority_at)
54 |             
55 |         return gens
56 |     
57 |     def run(self, prompt: str, time_out: float =10, temperature: float =0.0, top_p: float =1.0, 
58 |             max_tokens: int =512, majority_at: int =None, echo=False, return_logprobs: bool =False):
59 |         code_snippets = self.generate(prompt, majority_at=majority_at, temperature=temperature, top_p=top_p, max_tokens=max_tokens, echo=echo, return_logprobs=return_logprobs)
60 | 
61 |         return code_snippets
62 |     


--------------------------------------------------------------------------------
/evaluate/all_evaluate.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import json
 3 | import copy
 4 | import argparse
 5 | import sys
 6 | from pathlib import Path
 7 | sys.path.append(str(Path(__file__).resolve().parents[1]))
 8 | 
 9 | from utils import build_test_method, find_method_name, code_split, prompt_split_humaneval
10 | from execute.execution import evaluate_with_test_code, evaluate_with_test_code_T
11 | from evaluation import pass_at_K, AvgPassRatio
12 | from datasets import load_dataset, load_from_disk
13 | 
14 | parser = argparse.ArgumentParser()
15 | parser.add_argument('--dataset', type=str, default='humaneval')
16 | parser.add_argument('--lang', type=str, default='python')
17 | parser.add_argument('--input_path', type=str, default='humaneval_output_240415.jsonl')
18 | parser.add_argument('--output_path', type=str, default='outputs/test_eval.jsonl')
19 | args = parser.parse_args()
20 | 
21 | INPUTPATH = args.input_path
22 | OUTPUT_PATH = args.output_path
23 | 
24 | if args.dataset == 'humaneval':
25 |     dataset = load_dataset("openai_humaneval")
26 |     dataset_key = ["test"]
27 | 
28 | 
29 | with open(INPUTPATH, 'r') as f:
30 |     except_list = []
31 |     handled_solutions = [json.loads(line) for line in f if json.loads(line)["task_id"] not in except_list]
32 |     print(len(handled_solutions))
33 |     
34 | for solution in handled_solutions:
35 |     solution["generation"] = solution['prompt'] + solution["completion"]  
36 |     solution["prompt"] = ""
37 |     solution["entry_point"] = find_method_name(solution["generation"]) if find_method_name(solution["generation"]) else "candidate"
38 |     solution["completion"] = solution["generation"]
39 | 
40 | print(INPUTPATH)
41 | data_dict = {}
42 | for key in dataset_key:
43 |     for idx, task in enumerate(dataset[key]):
44 |         data_dict[task['task_id']] = task
45 | 
46 | exec_result = evaluate_with_test_code(handled_solutions, timeout=10)
47 | print('pass@1:')
48 | pass_at_K(exec_result, k=[1])
49 | 
50 | if args.dataset == "humaneval":
51 |     test_case_path= 'data/HumanEval_test_case_ET.jsonl'
52 |     with open(test_case_path, 'r') as f:
53 |         test_cases = [json.loads(line) for line in f]
54 |         
55 |     test_cases_dict = {}
56 |     for case in test_cases:
57 |         test = build_test_method(case['test_case_list'], "", case['entry_point'])
58 |         test_cases_dict[case['task_id']] = test
59 | 
60 | 
61 | for solution in handled_solutions:
62 |     solution['test'] =test_cases_dict[solution['task_id']]
63 | 
64 | exec_result_T = evaluate_with_test_code(handled_solutions, timeout=10)
65 | 
66 | print('pass@1 - ET:')
67 | pass_at_K(exec_result_T, k=[1])


--------------------------------------------------------------------------------
/roles/rule_descriptions_act.py:
--------------------------------------------------------------------------------
 1 | ANALYST = '''I want you to act as a requirement analyst on our development team. Given a user requirement, your task is to analyze, decompose, and develop a high-level plan to guide our developer in writing programs. The plan should include the following information:
 2 | 1. Decompose the requirement into several easy-to-solve subproblems that can be more easily implemented by the developer.
 3 | 2. Develop a high-level plan that outlines the major steps of the program.
 4 | Remember, your plan should be high-level and focused on guiding the developer in writing code, rather than providing implementation details.
 5 | '''
 6 | 
 7 | DEVELOPER = '''I want you to act as a developer on our development team. You will receive plans from a requirements analyst or test reports from a reviewer. Your job is split into two parts:
 8 | 1. If you receive a plan from a requirements analyst, write code in Python that meets the requirements following the plan. Ensure that the code you write is efficient, readable, and follows best practices.
 9 | 2. If you receive a test report from a reviewer, fix or improve the code based on the content of the report. Ensure that any changes made to the code do not introduce new bugs or negatively impact the performance of the code.
10 | Remember, do not need to explain the code you wrote. You should provide a well-formed python code and your response should start with "```python\n".
11 | '''
12 | 
13 | TESTER = '''I want you to act as a tester in the team. You will receive the code written by the developer, and your job is to complete a report as follows:
14 | {
15 | "Code Review": Evaluate the structure and syntax of the code to ensure that it conforms to the specifications of the programming language, that the APIs used are correct, and that the code does not contain syntax errors or logic holes.
16 | "Code Description": Briefly describe what the code is supposed to do. This helps identify differences between the code implementation and the requirement.
17 | "Satisfying the requirements": Ture or False. This indicates whether the code satisfies the requirement.
18 | "Edge cases": Edge cases are scenarios where the code might not behave as expected or where inputs are at the extreme ends of what the code should handle.
19 | "Conclusion": "Code Test Passed" or "Code Test Failed". This is a summary of the test results.
20 | }
21 | '''
22 | 
23 | TEAM = '''There is a development team that includes a requirements analyst, a developer, and a quality assurance reviewer. The team needs to develop programs that satisfy the requirements of the users. The different roles have different divisions of labor and need to cooperate with each others.
24 | '''


--------------------------------------------------------------------------------
/roles/coder.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import openai
 3 | import time
 4 | import copy
 5 | import json
 6 | import argparse
 7 | import tqdm
 8 | 
 9 | from core import interface
10 | from utils import code_truncate, construct_system_message
11 | from roles.instruction import INSTRUCTPLAN, INSTRUCTREPORT, INSTRUCTCODE
12 | 
13 | class Coder(object):
14 |     def __init__(self, TEAM, PYTHON_DEVELOPER, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512,
15 |                                 temperature=0.0, top_p=0.95):
16 |         self.model = model
17 |         self.majority = majority
18 |         self.max_tokens = max_tokens
19 |         self.temperature = temperature
20 |         self.top_p = top_p
21 |         self.history_message = []
22 |         self.requirement = requirement
23 | 
24 |         self.itf = interface.ProgramInterface(
25 |             stop='',
26 |             verbose=False,
27 |             model = self.model,
28 |         )
29 | 
30 |         system_message = construct_system_message(requirement, PYTHON_DEVELOPER, TEAM)
31 | 
32 |         self.history_message_append(system_message)
33 | 
34 |     def implement(self, report, is_init=False):
35 |         self.construct_with_report(report, is_init)
36 |         
37 |         try:
38 |             responses = self.itf.run(prompt=self.history_message, majority_at = self.majority, max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
39 |         except Exception as e:
40 |             print(e)
41 |             print("implement fail")
42 |             time.sleep(5)
43 |             return "error"
44 |         
45 |         if 'gpt' not in self.model:
46 |             generation = responses[0][responses[0].find("def"):]
47 |             tem = [s for s in generation.split('\n\n') if 'def ' in s or s[:1] == ' ']
48 |             code = '\n\n'.join(tem).strip('```').strip()
49 |         else:
50 |             code = code_truncate(responses[0])
51 |         
52 |         self.history_message = self.history_message[:-1]
53 |         self.history_message_append(code, "assistant")
54 |     
55 |         return code
56 |     
57 |     def history_message_append(self, system_message, role="user"):
58 |         self.history_message.append({
59 |             "role": role,
60 |             "content": system_message
61 |         })
62 |         
63 |     def construct_with_report(self, report, is_init=False):
64 |         if report != "":
65 |             if is_init:
66 |                 instruction = INSTRUCTPLAN.format(report=report.strip())
67 |             else:
68 |                 instruction = INSTRUCTREPORT.format(report=report.strip())
69 |             self.history_message_append(instruction)
70 |             self.history_message_append(INSTRUCTCODE.format(requirement=self.requirement))
71 | 


--------------------------------------------------------------------------------
/run_project.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Project-level code generation script
 4 | 
 5 | echo "Self-Collaboration Project-Level Code Generation"
 6 | echo "=============================================="
 7 | 
 8 | # Example 1: Portfolio Website  
 9 | echo "Generating personal portfolio website..."
10 | python3 main.py --mode project \
11 |     --project_type web_visualization \
12 |     --requirement "Create a modern personal portfolio website with sections for about me, skills, projects showcase, and contact form. Include smooth scrolling, animations, and responsive design. Use a professional color scheme and modern typography." \
13 |     --output_dir "generated_portfolio" \
14 |     --model "gpt-3.5-turbo" \
15 |     --max_round 2 \
16 |     --max_tokens 4096 \
17 |     --temperature 0
18 | 
19 | echo ""
20 | echo "Portfolio generation complete! Check the 'generated_portfolio' folder."
21 | echo "Open generated_portfolio/index.html in your browser to view the result."
22 | echo ""
23 | 
24 | # Example 2: Web Visualization Dashboard
25 | echo "Generating interactive data visualization dashboard..."
26 | python3 main.py --mode project \
27 |     --project_type web_visualization \
28 |     --requirement "Create an interactive data visualization dashboard that displays sales data with multiple chart types (bar charts, line charts, pie charts). Include filters for date range and product categories. Make it responsive and visually appealing with modern UI design." \
29 |     --output_dir "generated_dashboard" \
30 |     --model "gpt-3.5-turbo" \
31 |     --max_round 2 \
32 |     --max_tokens 4096 \
33 |     --temperature 0
34 | 
35 | echo ""
36 | echo "Dashboard generation complete! Check the 'generated_dashboard' folder."
37 | echo "Open generated_dashboard/index.html in your browser to view the result."
38 | echo ""
39 | 
40 | 
41 | # Example 3: Real-time Analytics Dashboard
42 | echo "Generating real-time analytics dashboard..."
43 | python3 main.py --mode project \
44 |     --project_type web_visualization \
45 |     --requirement "Create a real-time analytics dashboard for monitoring website traffic and user behavior. Include live charts for page views, user sessions, bounce rate, and geographic distribution. Add real-time notifications and customizable widgets." \
46 |     --output_dir "generated_analytics" \
47 |     --model "gpt-3.5-turbo" \
48 |     --max_round 2 \
49 |     --max_tokens 4096 \
50 |     --temperature 0
51 | 
52 | echo ""
53 | echo "Analytics dashboard generation complete! Check the 'generated_analytics' folder."
54 | echo "Open generated_analytics/index.html in your browser to view the result."
55 | echo ""
56 | 
57 | echo "=============================================="
58 | echo "All project generations completed!"
59 | echo "You can also run custom projects with:"
60 | echo "python main.py --mode universal --requirement 'Your custom requirement' --output_dir 'your_output_folder'"
61 | 


--------------------------------------------------------------------------------
/project_main.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import copy
 3 | import json
 4 | import argparse
 5 | import tqdm
 6 | 
 7 | from project_session import ProjectSession
 8 | from utils import construct_system_message
 9 | 
10 | parser = argparse.ArgumentParser()
11 | parser.add_argument('--project_type', type=str, default='web_visualization', 
12 |                    choices=['web_visualization', 'data_analysis', 'api_service', 'desktop_app'])
13 | parser.add_argument('--requirement', type=str, required=True, help='Project requirement description')
14 | parser.add_argument('--output_dir', type=str, default='generated_project')
15 | parser.add_argument('--model', type=str, default='gpt-3.5-turbo')
16 | parser.add_argument('--max_round', type=int, default=3)
17 | parser.add_argument('--max_tokens', type=int, default=1024) 
18 | parser.add_argument('--majority', type=int, default=1)
19 | parser.add_argument('--temperature', type=float, default=0.2)
20 | parser.add_argument('--top_p', type=float, default=0.95)
21 | parser.add_argument('--verbose', action='store_true')
22 | args = parser.parse_args()
23 | 
24 | 
25 | if __name__ == '__main__':
26 |     from roles.project_roles import PROJECT_TEAM, PROJECT_ARCHITECT, PROJECT_DEVELOPER, PROJECT_TESTER, UI_DESIGNER
27 | 
28 |     OUTPUT_DIR = args.output_dir
29 |     
30 |     # Create output directory
31 |     os.makedirs(OUTPUT_DIR, exist_ok=True)
32 | 
33 |     try:
34 |         # Initialize project session with enhanced roles
35 |         session = ProjectSession(
36 |             team_description=PROJECT_TEAM,
37 |             architect_description=PROJECT_ARCHITECT, 
38 |             developer_description=PROJECT_DEVELOPER,
39 |             tester_description=PROJECT_TESTER,
40 |             ui_designer_description=UI_DESIGNER,
41 |             requirement=args.requirement,
42 |             project_type=args.project_type,
43 |             model=args.model, 
44 |             majority=args.majority,
45 |             max_tokens=args.max_tokens, 
46 |             temperature=args.temperature,
47 |             top_p=args.top_p, 
48 |             max_round=args.max_round,
49 |             output_dir=OUTPUT_DIR
50 |         )
51 |         
52 |         # Run project generation session
53 |         project_files, session_history = session.run_project_session()
54 |         
55 |         # Save session history
56 |         with open(os.path.join(OUTPUT_DIR, 'session_history.json'), 'w', encoding='utf-8') as f:
57 |             json.dump(session_history, f, indent=2, ensure_ascii=False)
58 |             
59 |         print(f"Project generated successfully in: {OUTPUT_DIR}")
60 |         print(f"Generated files: {list(project_files.keys())}")
61 |         
62 |         # If web project, provide instructions for running
63 |         if args.project_type == 'web_visualization' and 'index.html' in project_files:
64 |             print("\nTo view the web application:")
65 |             print(f"Open {os.path.join(OUTPUT_DIR, 'index.html')} in your browser")
66 |             
67 |     except Exception as e:
68 |         print(f"Project generation failed: {str(e)}")
69 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import re
  3 | import ast
  4 | import time
  5 | import difflib
  6 | import copy
  7 | 
  8 | 
  9 | def code_truncate_regex(code):
 10 |     code_regex = r"```(.*?|)\n(?P<code>.*?)```"
 11 |     match = re.search(code_regex, code, re.DOTALL)
 12 |     code = match.group("code") if match else ""
 13 |     return code
 14 |     
 15 | def code_truncate(response):
 16 |     code = code_truncate_regex(response)
 17 |     if code == "":
 18 |         generation = response[response.find("def"):]
 19 |         tem = [s for s in generation.split('\n\n') if 'def ' in s or s[:1] == ' ']
 20 |         code = '\n\n'.join(tem).strip('```').strip()
 21 |     return code
 22 | 
 23 | def prompt_split_humaneval(prompt, mehotd_name):
 24 |     prompt = prompt.strip()
 25 |     prompt = prompt.replace("\r\n", "\n")
 26 |     before_func = prompt[:prompt.rfind("def ")]
 27 |     code = prompt[prompt.rfind("def "):]
 28 | 
 29 |     comment_start_1 = re.search("\"\"\"", code)
 30 |     comment_start_2 = re.search("\'\'\'", code)
 31 |     if comment_start_1:
 32 |         comment_start = comment_start_1.end()
 33 |     elif comment_start_2:
 34 |         comment_start = comment_start_2.end()
 35 | 
 36 | 
 37 |     example_start_1 = re.search("[eE]xample(:)?", code)
 38 |     example_start_2 = re.search("[fF]or [eE]xamble(:)?", code)
 39 |     example_start_3 = re.search(">>>", code)
 40 |     example_start_4 = re.search(mehotd_name+"\(.+\)", code[comment_start:])
 41 | 
 42 | 
 43 |     if example_start_1:
 44 |         comment = code[comment_start:example_start_1.start()]
 45 |         example = code[example_start_1.start():-4]
 46 |     elif example_start_2:
 47 |         comment = code[comment_start:example_start_2.start()]
 48 |         example = code[example_start_2.start():-4]
 49 |     elif example_start_3:
 50 |         comment = code[comment_start:example_start_3.start()]
 51 |         example = "Example:\n"+code[example_start_3.start():-4]
 52 |     elif example_start_4:
 53 |         comment = code[comment_start:example_start_4.start()+comment_start]
 54 |         example = "Example:\n"+code[example_start_4.start()+comment_start:-4]
 55 |     else:
 56 |         comment = code[comment_start:-4]
 57 |         example = ""
 58 |     comment = comment.strip().replace("\n", " ")
 59 |     comment = re.sub("\s+", " ", comment)
 60 | 
 61 |     example = re.sub("\n(\s)*","\n\t",example)
 62 |     test_case = "\t"+example.strip()
 63 |     signature = code[:code.index("\n")+1]
 64 | 
 65 |     return before_func, signature, comment, test_case
 66 | 
 67 | def build_test_method(test_list, test_imports, method_name):
 68 |     if test_imports:
 69 |         test_imports = "\n".join(test_imports)
 70 |         test_method = test_imports + "\n"
 71 |     else:
 72 |         test_method = ""
 73 |     test_method = "def check(" + method_name + "):\n"
 74 |     if len(test_list) == 0:
 75 |         return test_method + "\treturn True" + "\n"
 76 |     for test in test_list:
 77 |         test_method += '\t' + test + "\n"
 78 |     return test_method.strip("\n")
 79 | 
 80 | def find_method_name(code, lang="python"):
 81 |     try:
 82 |         parsed = ast.parse(code)
 83 |         function_defs = [node for node in parsed.body if isinstance(node, ast.FunctionDef)]
 84 |         if function_defs:
 85 |             if len(function_defs) == 1:
 86 |                 method_name = function_defs[0].name
 87 |             else:
 88 |                 method_name = function_defs[-1].name if function_defs[-1].name != "main" else function_defs[-2].name
 89 |         else:
 90 |             method_name = None
 91 |     except:
 92 |         method_name = None
 93 | 
 94 |     return method_name
 95 | 
 96 | 
 97 | def code_split(func):
 98 |     '''
 99 |     Split code into signature, comment and function body
100 |     '''
101 |     func = func.replace("\r\n", "\n")
102 |     before_func = func[:func.rfind("def ")]
103 |     code = func[func.rfind("def "):]
104 | 
105 |     is_comment = False
106 |     comments = []
107 |     
108 |     statements = code.split("\n")
109 |     for s_idx, s in enumerate(statements):
110 |         s = s.strip()
111 |         if s.startswith("def"):
112 |             signature = statements[:s_idx+1]
113 |             method_name = s.split("def ")[1].split("(")[0]
114 |             func_body_idx = s_idx+1
115 |             tmp_statement = statements[func_body_idx].strip()
116 |             if not tmp_statement.startswith("'''"):
117 |                 break
118 |         elif s.startswith("'''") and not is_comment:
119 |             is_comment = True
120 | 
121 |         elif is_comment:
122 |             if s.startswith("'''"):
123 |                 is_comment = False
124 |                 func_body_idx = s_idx+1
125 |                 break
126 |             comments.append(s)
127 |     func_body = statements[func_body_idx:]
128 |     return method_name, "\n".join(signature), "\n".join(comments), "\n".join(func_body), before_func
129 | 
130 | def construct_system_message(requirement, role, team=''):
131 |     if team == '':
132 |         system_message = "The requirement from users is: \n{'requirement':\n"  +  "'"+ requirement.replace('\n\n','\n').strip(".") + "'\n}\n\n" + role
133 |     else:
134 |         system_message = team + '\n '+ \
135 |                     "The requirement from users is: \n{'requirement':\n"  +  "'"+ requirement.replace('\n\n','\n').strip(".") + "'\n}\n\n" + \
136 |                     role
137 |                 
138 |     return system_message
139 |     


--------------------------------------------------------------------------------
/evaluate/execute/execution.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | import ctypes
  5 | libgcc_s = ctypes.CDLL('libgcc_s.so.1')
  6 | 
  7 | from collections import defaultdict
  8 | from concurrent.futures import as_completed, ProcessPoolExecutor
  9 | import logging
 10 | 
 11 | from execute._execution import check_correctness, check_correctness_with_test_cases, check_correctness_T
 12 | 
 13 | logging.basicConfig(
 14 |     format="SystemLog: [%(asctime)s][%(name)s][%(levelname)s] - %(message)s",
 15 |     datefmt="%Y-%m-%d %H:%M:%S",
 16 |     level=logging.INFO,
 17 | )
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | def evaluate_with_test_code(
 22 |     samples,
 23 |     timeout
 24 | ):
 25 |     # logger.info(f'Start evaluation with test code, timeout={timeout}')
 26 |     # Check the generated samples against test suites.
 27 |     with ProcessPoolExecutor() as executor:
 28 | 
 29 |         futures = []
 30 |         existed_completion = defaultdict(set)
 31 |         results = defaultdict(defaultdict)
 32 | 
 33 |         for sample in samples:
 34 |             task_id = sample["task_id"]
 35 |             prompt = sample['prompt']
 36 |             test = sample['test']
 37 |             entry_point = sample['entry_point']
 38 |             completion = sample["completion"]
 39 |             if completion in existed_completion[task_id]:
 40 |                 continue
 41 |             existed_completion[task_id].add(completion)
 42 |             args = (task_id, prompt, completion, test, entry_point, timeout)
 43 |             future = executor.submit(check_correctness, *args)
 44 |             futures.append(future)
 45 |         logger.info(f'{len(futures)} execution requests are submitted')
 46 |         
 47 |         for idx, future in enumerate(as_completed(futures)):
 48 |             # logger.info('[{}/{}] execution completed'.format(idx+1, len(futures)))
 49 |             result = future.result()
 50 |             results[result["task_id"]][result["completion"]] = result
 51 | 
 52 |     # logger.info('execution finished! start parsing results')
 53 |     samples_with_result = []
 54 |     for sample in samples:
 55 |         task_id = sample["task_id"]
 56 |         completion = sample["completion"]
 57 |         result = results[task_id][completion]
 58 |         sample["result"] = result["result"]
 59 |         sample["passed"] = result["passed"]
 60 |         samples_with_result.append(sample)
 61 | 
 62 |     assert len(samples_with_result) == len(samples), "Some problems are not attempted."
 63 | 
 64 |     return samples_with_result
 65 | 
 66 | def evaluate_with_test_cases(
 67 |     solutions,
 68 |     test_cases_dict,
 69 |     timeout,
 70 |     limit
 71 | ):
 72 |     # logger.info(f'Start evaluation with test cases, timeout={timeout}, limit={limit}')
 73 |     # Check the generated solutions against test suites.
 74 |     with ProcessPoolExecutor() as executor:
 75 |         futures = []
 76 |         results_list = []
 77 |         existed_completion = defaultdict(set)
 78 | 
 79 |         for solution in solutions:
 80 |             task_id = solution['task_id']
 81 |             prompt = solution['prompt']
 82 |             completion = solution['completion']
 83 |             if completion in existed_completion[task_id]:
 84 |                 continue
 85 |             existed_completion[task_id].add(completion)
 86 |             task_test_cases = test_cases_dict[task_id]
 87 |             if not task_test_cases:
 88 |                 continue
 89 |             # get limited test cases
 90 |             limited_task_test_cases = [cases_per_sample[:limit] for cases_per_sample in task_test_cases]
 91 |             limited_task_test_cases = sum(limited_task_test_cases, [])
 92 |             
 93 |             args = (task_id, prompt, completion, list(set(limited_task_test_cases)), timeout)
 94 |             future = executor.submit(check_correctness_with_test_cases, *args)
 95 |             futures.append(future)
 96 | 
 97 |         # logger.info(f'{len(futures)} execution requests are submitted')
 98 |         for idx, future in enumerate(as_completed(futures)):
 99 |             # logger.info('[{}/{}] execution completed'.format(idx+1, len(futures)))
100 |             result = future.result()
101 |             results_list.append(result)
102 | 
103 |     # logger.info('execution finished!')
104 |     return results_list
105 | 
106 | def evaluate_with_test_code_T(
107 |     samples,
108 |     timeout
109 | ):
110 |     # logger.info(f'Start evaluation with test code, timeout={timeout}')
111 |     # Check the generated samples against test suites.
112 |     with ProcessPoolExecutor() as executor:
113 | 
114 |         futures = []
115 |         existed_completion = defaultdict(set)
116 |         results = defaultdict(defaultdict)
117 | 
118 |         for sample in samples:
119 |             task_id = sample["task_id"]
120 |             prompt = sample['prompt']
121 |             test = sample['test_case_list']
122 |             entry_point = sample['entry_point']
123 |             completion = sample["completion"]
124 |             if completion in existed_completion[task_id]:
125 |                 continue
126 |             existed_completion[task_id].add(completion)
127 |             args = (task_id, prompt, completion, test, entry_point, timeout)
128 |             future = executor.submit(check_correctness_T, *args)
129 |             futures.append(future)
130 |         # logger.info(f'{len(futures)} execution requests are submitted')
131 |         
132 |         for idx, future in enumerate(as_completed(futures)):
133 |             # logger.info('[{}/{}] execution completed'.format(idx+1, len(futures)))
134 |             result = future.result()
135 |             results[result["task_id"]][result["completion"]] = result
136 | 
137 |     # logger.info('execution finished! start parsing results')
138 |     samples_with_result = []
139 |     for sample in samples:
140 |         task_id = sample["task_id"]
141 |         completion = sample["completion"]
142 |         result = results[task_id][completion]
143 |         sample["result"] = result["result"]
144 |         sample["passed"] = result["passed"]
145 |         samples_with_result.append(sample)
146 | 
147 |     assert len(samples_with_result) == len(samples), "Some problems are not attempted."
148 | 
149 |     return samples_with_result


--------------------------------------------------------------------------------
/evaluate/evaluation.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | import statistics
  5 | import numpy as np
  6 | from collections import defaultdict
  7 | import logging
  8 | from typing import List, Union
  9 | import itertools
 10 | from utils import build_test_method
 11 | 
 12 | logging.basicConfig(
 13 |     format="SystemLog: [%(asctime)s][%(name)s][%(levelname)s] - %(message)s",
 14 |     datefmt="%Y-%m-%d %H:%M:%S",
 15 |     level=logging.INFO,
 16 | )
 17 | 
 18 | logger = logging.getLogger(__name__)
 19 | 
 20 | def _dictionized_ground_truth_results(ground_truth_exec_results):
 21 |     ground_truth_results_by_task_and_solution = defaultdict(defaultdict)
 22 |     for result in ground_truth_exec_results:
 23 |         ground_truth_results_by_task_and_solution[result['task_id']][result['completion']] = result['passed']
 24 |     return ground_truth_results_by_task_and_solution
 25 | 
 26 | def _turn_solution_scores_into_choose_count(sorted_solution_scores, topk):
 27 |     wrapped = True if type(sorted_solution_scores[0][0]) == list else False
 28 |     result = []
 29 |     if wrapped:
 30 |         last_score = sorted_solution_scores[0][1]
 31 |         merged_solutions_and_score = [sorted_solution_scores[0]]
 32 |         for solutions, score in sorted_solution_scores[1:]:
 33 |             if score == last_score:
 34 |                 last_solutions = merged_solutions_and_score[-1][0]
 35 |                 merged_solutions_and_score[-1] = (last_solutions + solutions, score)
 36 |             else:
 37 |                 merged_solutions_and_score.append((solutions, score))
 38 |                 last_score = score
 39 |         for solutions_and_score in merged_solutions_and_score:
 40 |             result.append((solutions_and_score[0], 1))  # choose one from solutions_and_score
 41 |     else:
 42 |         topk_scores = sorted(list(set([i[1] for i in sorted_solution_scores])), reverse=True)
 43 |         for score in topk_scores:
 44 |             solutions = [s[0] for s in sorted_solution_scores if s[1] == score]
 45 |             result.append((solutions, 1))
 46 | 
 47 |     if len(result) >= topk:
 48 |         return result[:topk]
 49 |     else:
 50 |         intial_choose_count = [1]*len(result)
 51 |         for i in range(topk-len(result)):
 52 |             intial_choose_count[i%len(result)] += 1
 53 |         for i, choose_count in enumerate(intial_choose_count):
 54 |             result[i] = (result[i][0], choose_count)
 55 |         return result
 56 |     
 57 | 
 58 | def get_result_of_sorted_solutions(ground_truth_results_list, sorted_solutions_by_task, topks=[1,2,10]):
 59 |     # sorted_solutions_by_task {task_id: [([solutions], score), ...]}
 60 |     def _count_correct(solutions: list, ground_truth_results: dict) -> int:
 61 |         return sum([ground_truth_results[s] for s in solutions])
 62 |     
 63 |     ground_truth_results = _dictionized_ground_truth_results(ground_truth_results_list)
 64 |     topk_results = dict()
 65 |     for topk in topks:
 66 |         random_pass_at_k_by_task = pass_at_K_by_task(ground_truth_results_list, k=topk)
 67 |         pass_rates = []
 68 |         for task_id in ground_truth_results.keys():
 69 |             all_wrong_probability = 1
 70 |             if task_id in sorted_solutions_by_task and sorted_solutions_by_task[task_id]:
 71 |                 solutions_and_probability = _turn_solution_scores_into_choose_count(sorted_solutions_by_task[task_id], topk)
 72 |                 for solutions, choose_count in solutions_and_probability:
 73 |                     current_wrong_prob = _estimator(len(solutions), _count_correct(solutions, ground_truth_results[task_id]), 1)
 74 |                     repeat_current_wrong_prob = pow(current_wrong_prob, choose_count)
 75 |                     all_wrong_probability *= repeat_current_wrong_prob
 76 |                 pass_rates.append(1-all_wrong_probability)
 77 |             else:
 78 |                 pass_rates.append(random_pass_at_k_by_task[task_id])
 79 |         
 80 |         # the avg rate of all tasks
 81 |         topk_results[f'pass@{topk}'] = round(statistics.mean(pass_rates), 4)
 82 |     logger.info(topk_results)
 83 | 
 84 | def pass_at_K_by_task(results, k):
 85 |     result_dict = defaultdict(list)
 86 |     for line in results:
 87 |         result_dict[line['task_id']].append(line['passed'])
 88 |     result = dict()
 89 |     for task_id in result_dict.keys():
 90 |         total = len(result_dict[task_id])
 91 |         correct = sum(result_dict[task_id])
 92 |         score = _estimate_pass_at_k(total, [correct], k)[0]
 93 |         result[task_id] = score
 94 |     return result
 95 | 
 96 | def pass_at_K(results, k = [1, 10, 100]):
 97 |     def _turn_list_into_dict(result_lines):
 98 |         result_dict = defaultdict(list)
 99 |         for line in result_lines:
100 |             result_dict[line['task_id']].append(line['passed'])
101 |         return result_dict
102 | 
103 |     # Calculate pass@k.
104 |     total, correct = [], []
105 |     for passed in _turn_list_into_dict(results).values():
106 |         total.append(len(passed))
107 |         correct.append(sum(passed))
108 | 
109 |     total = np.array(total)
110 |     correct = np.array(correct)
111 | 
112 |     ks = k
113 |     pass_at_k = {f"pass@{k}": round(_estimate_pass_at_k(total, correct, k).mean(), 4)
114 |                  for k in ks if (total >= k).all()}
115 |     logger.info(pass_at_k)
116 | 
117 | def _estimator(n: int, c: int, k: int) -> float:
118 |     """
119 |     Calculates comb(n - c, k) / comb(n, k).
120 |     """
121 |     if n - c < k:
122 |         return 0
123 |     return np.prod(1.0 - k / np.arange(n - c + 1, n + 1))
124 | 
125 | def _estimate_pass_at_k(
126 |     num_samples: Union[int, List[int], np.ndarray],
127 |     num_correct: Union[List[int], np.ndarray],
128 |     k: int
129 | ) -> np.ndarray:
130 |     """
131 |     Estimates pass@k of each problem and returns them in an array.
132 |     """
133 |     if isinstance(num_samples, int):
134 |         num_samples_it = itertools.repeat(num_samples, len(num_correct))
135 |     else:
136 |         assert len(num_samples) == len(num_correct)
137 |         num_samples_it = iter(num_samples)
138 | 
139 |     return np.array([1.0 - _estimator(int(n), int(c), k) for n, c in zip(num_samples_it, num_correct)])
140 | 
141 | 
142 | 
143 | 
144 | def AvgPassRatio(handled_solutions):
145 |     total = len(handled_solutions)
146 |     correct = sum([1 for s in handled_solutions if s['passed']])
147 |     return correct/total
148 |     
149 | 
150 | 


--------------------------------------------------------------------------------
/core/backend.py:
--------------------------------------------------------------------------------
  1 | import openai
  2 | from openai import OpenAI
  3 | import time
  4 | import os
  5 | import httpx
  6 | import json
  7 | import tiktoken
  8 | 
  9 | 
 10 | def count_tokens(messages, model="gpt-3.5-turbo"):
 11 |     """Calculate the number of tokens in messages"""
 12 |     try:
 13 |         encoding = tiktoken.encoding_for_model(model)
 14 |     except KeyError:
 15 |         encoding = tiktoken.get_encoding("cl100k_base")
 16 |     
 17 |     num_tokens = 0
 18 |     for message in messages:
 19 |         if isinstance(message, dict):
 20 |             # ChatGPT format
 21 |             num_tokens += 4  # every message follows <im_start>{role/name}\n{content}<im_end>\n
 22 |             for key, value in message.items():
 23 |                 if isinstance(value, str):
 24 |                     num_tokens += len(encoding.encode(value))
 25 |                 if key == "name":  # if there's a name, the role is omitted
 26 |                     num_tokens += -1  # role is always required and always 1 token
 27 |         elif isinstance(message, str):
 28 |             num_tokens += len(encoding.encode(message))
 29 |     
 30 |     num_tokens += 2  # every reply is primed with <im_start>assistant
 31 |     return num_tokens
 32 | 
 33 | 
 34 | def adjust_max_tokens(messages, model='gpt-3.5-turbo', desired_max_tokens=4096):
 35 |     """Intelligently adjust max_tokens to avoid exceeding model limits"""
 36 |     
 37 |     # Context length limits for different models
 38 |     model_limits = {
 39 |         'gpt-3.5-turbo': 16385,
 40 |         'gpt-3.5-turbo-0301': 4096,
 41 |         'gpt-3.5-turbo-0613': 4096,
 42 |         'gpt-3.5-turbo-16k': 16385,
 43 |         'gpt-4': 8192,
 44 |         'gpt-4-0314': 8192,
 45 |         'gpt-4-0613': 8192,
 46 |         'gpt-4-32k': 32768,
 47 |         'gpt-4-1106-preview': 128000,
 48 |         'gpt-4-turbo': 128000,
 49 |     }
 50 |     
 51 |     # Get the maximum context length for the model
 52 |     max_context_length = model_limits.get(model, 16385)  # Default to gpt-3.5-turbo limit
 53 |     
 54 |     # Calculate input message token count
 55 |     input_tokens = count_tokens(messages, model)
 56 |     
 57 |     # Calculate available tokens, leaving some margin
 58 |     available_tokens = max_context_length - input_tokens - 100  # Leave 100 tokens margin
 59 |     
 60 |     # Adjust max_tokens
 61 |     if available_tokens <= 0:
 62 |         print(f"⚠️ Warning: Input too long ({input_tokens} tokens), truncating...")
 63 |         # If input is too long, truncate message history
 64 |         adjusted_max_tokens = min(512, max_context_length // 4)  # Use minimum output tokens
 65 |         truncated_messages = truncate_messages(messages, max_context_length - adjusted_max_tokens - 100)
 66 |         return truncated_messages, adjusted_max_tokens
 67 |     else:
 68 |         # Use smaller value: desired max_tokens or available token count
 69 |         adjusted_max_tokens = min(desired_max_tokens, available_tokens)
 70 |         print(f"📊 Token info: Input={input_tokens}, Available={available_tokens}, Using={adjusted_max_tokens}")
 71 |         return messages, max(adjusted_max_tokens, 256)  # Use at least 256 tokens
 72 | 
 73 | 
 74 | def truncate_messages(messages, max_tokens):
 75 |     """Truncate message history to fit token limit"""
 76 |     if not messages:
 77 |         return messages
 78 |     
 79 |     # Keep system messages and latest user messages
 80 |     truncated = []
 81 |     
 82 |     # If first message is system message, keep it
 83 |     if messages and isinstance(messages[0], dict) and messages[0].get('role') == 'system':
 84 |         truncated.append(messages[0])
 85 |         remaining_messages = messages[1:]
 86 |     else:
 87 |         remaining_messages = messages
 88 |     
 89 |     # Start from latest messages, add messages forward until token limit reached
 90 |     current_tokens = count_tokens(truncated)
 91 |     for message in reversed(remaining_messages):
 92 |         message_tokens = count_tokens([message])
 93 |         if current_tokens + message_tokens <= max_tokens:
 94 |             truncated.insert(-1 if truncated and truncated[0].get('role') == 'system' else 0, message)
 95 |             current_tokens += message_tokens
 96 |         else:
 97 |             break
 98 |     
 99 |     print(f"📝 Truncated messages: {len(messages)} -> {len(truncated)} messages")
100 |     return truncated
101 | 
102 | 
103 | def call_chatgpt(prompt, model='gpt-3.5-turbo', stop=None, temperature=0., top_p=0.95,
104 |         max_tokens=128, echo=False, majority_at=None):
105 |     
106 |     client = OpenAI()
107 |     
108 |     # Intelligently adjust token count
109 |     adjusted_prompt, adjusted_max_tokens = adjust_max_tokens(prompt, model, max_tokens)
110 |     
111 |     num_completions = majority_at if majority_at is not None else 1
112 |     num_completions_batch_size = 10
113 | 
114 |     completions = []
115 |     for i in range(20 * (num_completions // num_completions_batch_size + 1)):
116 |         try:
117 |             requested_completions = min(num_completions_batch_size, num_completions - len(completions))
118 | 
119 |             response = client.chat.completions.create(
120 |                 model=model,
121 |                 messages=adjusted_prompt,
122 |                 max_tokens=adjusted_max_tokens,
123 |                 temperature=temperature,
124 |                 top_p=top_p,
125 |                 n=requested_completions
126 |             )
127 |             completions.extend([choice.message.content for choice in response.choices])
128 |             if len(completions) >= num_completions:
129 |                 return completions[:num_completions]
130 |         
131 |         except openai.BadRequestError as e:
132 |             error_message = str(e)
133 |             if "context_length_exceeded" in error_message or "maximum context length" in error_message:
134 |                 print(f"🔄 Context length exceeded, reducing max_tokens from {adjusted_max_tokens} to {adjusted_max_tokens // 2}")
135 |                 adjusted_max_tokens = max(adjusted_max_tokens // 2, 256)
136 |                 # Further truncate messages
137 |                 adjusted_prompt, adjusted_max_tokens = adjust_max_tokens(
138 |                     adjusted_prompt, model, adjusted_max_tokens
139 |                 )
140 |                 continue
141 |             else:
142 |                 print(f"❌ API Error: {error_message}")
143 |                 raise e
144 |         
145 |         except openai.RateLimitError as e:
146 |             print(f"⏳ Rate limit hit, waiting {min(i**2, 60)} seconds...")
147 |             time.sleep(min(i**2, 60))
148 |         
149 |         except Exception as e:
150 |             print(f"❌ Unexpected error: {str(e)}")
151 |             if i < 3:  # First 3 attempts try reducing token count
152 |                 adjusted_max_tokens = max(adjusted_max_tokens // 2, 256)
153 |                 print(f"🔄 Retrying with reduced max_tokens: {adjusted_max_tokens}")
154 |                 continue
155 |             else:
156 |                 raise e
157 |     
158 |     raise RuntimeError('Failed to call GPT API after multiple attempts')


--------------------------------------------------------------------------------
/roles/project_tester.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import json
  4 | import time
  5 | 
  6 | from core import interface
  7 | from utils import construct_system_message
  8 | from tools import global_tool_orchestrator
  9 | 
 10 | 
 11 | class ProjectTester(object):
 12 |     def __init__(self, team_description, tester_description, requirement, project_type,
 13 |                  model='gpt-3.5-turbo', majority=1, max_tokens=1024, temperature=0.2, top_p=0.95):
 14 |         self.model = model
 15 |         self.majority = majority
 16 |         self.max_tokens = max_tokens
 17 |         self.temperature = temperature
 18 |         self.top_p = top_p
 19 |         self.history_message = []
 20 |         self.requirement = requirement
 21 |         self.project_type = project_type
 22 |         
 23 |         # Add tool orchestrator reference
 24 |         self.tool_orchestrator = global_tool_orchestrator
 25 | 
 26 |         self.itf = interface.ProgramInterface(
 27 |             stop='',
 28 |             verbose=False,
 29 |             model=self.model,
 30 |         )
 31 | 
 32 |         system_message = construct_system_message(requirement, tester_description, team_description)
 33 |         self.history_message_append(system_message)
 34 | 
 35 |     def test_project(self, project_files, architecture_plan):
 36 |         """Test the complete project implementation with tool assistance"""
 37 |         
 38 |         print("🧪 Enhanced testing with automated tools...")
 39 |         
 40 |         # Use automated tester for initial validation
 41 |         automated_tester = self.tool_orchestrator.get_tool("automated_tester")
 42 |         quality_checker = self.tool_orchestrator.get_tool("quality_checker")
 43 |         
 44 |         tool_results = {}
 45 |         if automated_tester:
 46 |             automated_results = automated_tester.execute("full_suite", 
 47 |                                                        files=project_files,
 48 |                                                        project_type=self.project_type)
 49 |             tool_results["automated_tests"] = automated_results
 50 |             print(f"🤖 Automated tests: {automated_results.get('status', 'completed')}")
 51 |         
 52 |         if quality_checker:
 53 |             quality_results = quality_checker.execute("check_quality",
 54 |                                                      files=project_files,
 55 |                                                      project_type=self.project_type)
 56 |             tool_results["quality_check"] = quality_results
 57 |             print(f"🔍 Quality check: {quality_results.get('overall_score', 'N/A')}/10")
 58 |         
 59 |         files_summary = self._create_files_summary(project_files)
 60 |         
 61 |         # Create enhanced testing prompt with tool results
 62 |         tool_summary = ""
 63 |         if tool_results:
 64 |             tool_summary = f"""
 65 |         
 66 |         Tool-based Testing Results:
 67 |         {json.dumps(tool_results, indent=2)}
 68 |         """
 69 |         
 70 |         testing_prompt = f"""
 71 |         Please test the following project implementation comprehensively, considering both manual review and automated tool results.
 72 |         
 73 |         Project Type: {self.project_type}
 74 |         Requirements: {self.requirement}
 75 |         Architecture Plan: {architecture_plan}
 76 |         
 77 |         Project Files:
 78 |         {files_summary}
 79 |         {tool_summary}
 80 |         
 81 |         Please perform the following types of testing:
 82 |         
 83 |         1. **Code Quality Analysis** (Enhanced with tools):
 84 |            - Check for syntax errors
 85 |            - Verify proper HTML structure and semantic markup
 86 |            - Validate CSS syntax and modern practices
 87 |            - Review JavaScript functionality and ES6+ usage
 88 |            - Consider automated tool findings
 89 |         
 90 |         2. **Functionality Testing**:
 91 |            - Verify all required features are implemented
 92 |            - Check if the project meets the stated requirements
 93 |            - Test user interactions and interface elements
 94 |            - Validate data flow and API integration
 95 |         
 96 |         3. **Design and UX Testing**:
 97 |            - Evaluate visual design and modern UI principles
 98 |            - Check responsive design implementation
 99 |            - Assess user experience and accessibility (WCAG compliance)
100 |            - Review color schemes, typography, and spacing
101 |         
102 |         4. **Performance and Best Practices**:
103 |            - Review code organization and structure
104 |            - Check for performance optimizations
105 |            - Verify modern web development practices
106 |            - Assess browser compatibility
107 |            - Review security considerations
108 |         
109 |         5. **Integration and Compatibility**:
110 |            - Test cross-browser functionality
111 |            - Check mobile responsiveness
112 |            - Validate external dependencies
113 |            - Assess loading performance
114 |         
115 |         Provide a detailed test report with:
116 |         - Issues found (categorized by severity)
117 |         - Suggestions for improvement (prioritized)
118 |         - Overall assessment with scoring
119 |         - Specific areas that need attention
120 |         - Validation of tool-based findings
121 |         
122 |         If everything looks good, clearly state "All tests passed - no issues found."
123 |         Include a final recommendation for deployment readiness.
124 |         """
125 |         
126 |         self.history_message_append(testing_prompt)
127 |         
128 |         try:
129 |             responses = self.itf.run(prompt=self.history_message, majority_at=self.majority,
130 |                                    max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
131 |         except Exception as e:
132 |             print(f"Project testing failed: {e}")
133 |             time.sleep(5)
134 |             return "error"
135 | 
136 |         test_report = responses[0]
137 |         self.history_message_append(test_report, "assistant")
138 |         
139 |         return test_report
140 |     
141 |     def _create_files_summary(self, project_files):
142 |         """Create a summary of project files for testing"""
143 |         summary = ""
144 |         
145 |         for file_path, content in project_files.items():
146 |             summary += f"\n--- {file_path} ---\n"
147 |             # Include first 20 lines or 1000 characters, whichever is shorter
148 |             lines = content.split('\n')
149 |             if len(lines) > 20:
150 |                 preview = '\n'.join(lines[:20]) + '\n... (truncated)'
151 |             else:
152 |                 preview = content[:1000]
153 |                 if len(content) > 1000:
154 |                     preview += "... (truncated)"
155 |             summary += preview + "\n"
156 |         
157 |         return summary
158 |     
159 |     def history_message_append(self, message, role="user"):
160 |         self.history_message.append({
161 |             "role": role,
162 |             "content": message
163 |         })
164 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import json
  4 | import argparse
  5 | import tqdm
  6 | 
  7 | from session import Session
  8 | from project_session import ProjectSession
  9 | from datasets import load_dataset, load_from_disk
 10 | from utils import prompt_split_humaneval, find_method_name, code_split, build_test_method
 11 | 
 12 | parser = argparse.ArgumentParser()
 13 | parser.add_argument('--mode', type=str, default='function', choices=['function', 'project'], 
 14 |                    help='Generation mode: function-level or project-level')
 15 | 
 16 | # Function-level arguments
 17 | parser.add_argument('--dataset', type=str, default='humaneval')
 18 | parser.add_argument('--lang', type=str, default='python')
 19 | parser.add_argument('--output_path', type=str, default='output.jsonl')
 20 | parser.add_argument('--signature', action='store_true')
 21 | 
 22 | # Project-level arguments  
 23 | parser.add_argument('--project_type', type=str, default='web_visualization',
 24 |                    choices=['web_visualization', 'data_analysis', 'api_service', 'desktop_app'])
 25 | parser.add_argument('--requirement', type=str, help='Project requirement description (for project mode)')
 26 | parser.add_argument('--output_dir', type=str, default='generated_project')
 27 | 
 28 | # Common arguments
 29 | parser.add_argument('--model', type=str, default='gpt-3.5-turbo')
 30 | parser.add_argument('--max_round', type=int, default=2)
 31 | parser.add_argument('--max_tokens', type=int, default=512) 
 32 | parser.add_argument('--majority', type=int, default=1)
 33 | parser.add_argument('--temperature', type=float, default=0.0)
 34 | parser.add_argument('--top_p', type=float, default=0.95)
 35 | 
 36 | parser.add_argument('--fail_list', type=list, default=[])
 37 | parser.add_argument('--append', action='store_true')
 38 | parser.add_argument('--verbose', action='store_true')
 39 | parser.add_argument("--timeout", type=float, default=10, help="how many seconds to wait during execution for each test case")
 40 | args = parser.parse_args()
 41 | 
 42 | 
 43 | if __name__ == '__main__':
 44 |     if args.mode == 'project':
 45 |         # Project-level code generation
 46 |         from roles.project_roles import (PROJECT_TEAM, PROJECT_ARCHITECT, PROJECT_DEVELOPER, 
 47 |                                        PROJECT_TESTER, UI_DESIGNER)
 48 |         
 49 |         if not args.requirement:
 50 |             print("Error: --requirement is required for project mode")
 51 |             exit(1)
 52 |         
 53 |         OUTPUT_DIR = args.output_dir
 54 |         os.makedirs(OUTPUT_DIR, exist_ok=True)
 55 | 
 56 |         try:
 57 |             # Initialize project session
 58 |             session = ProjectSession(
 59 |                 team_description=PROJECT_TEAM,
 60 |                 architect_description=PROJECT_ARCHITECT, 
 61 |                 developer_description=PROJECT_DEVELOPER,
 62 |                 tester_description=PROJECT_TESTER,
 63 |                 ui_designer_description=UI_DESIGNER,
 64 |                 requirement=args.requirement,
 65 |                 project_type=args.project_type,
 66 |                 model=args.model, 
 67 |                 majority=args.majority,
 68 |                 max_tokens=max(args.max_tokens, 1024),  # Use larger tokens for project mode
 69 |                 temperature=max(args.temperature, 0.2),  # Use higher temperature for creativity
 70 |                 top_p=args.top_p, 
 71 |                 max_round=args.max_round,
 72 |                 output_dir=OUTPUT_DIR
 73 |             )
 74 |             
 75 |             # Run project generation session
 76 |             project_files, session_history = session.run_project_session()
 77 |             
 78 |             # Save session history
 79 |             with open(os.path.join(OUTPUT_DIR, 'session_history.json'), 'w', encoding='utf-8') as f:
 80 |                 json.dump(session_history, f, indent=2, ensure_ascii=False)
 81 |                 
 82 |             print(f"Project generated successfully in: {OUTPUT_DIR}")
 83 |             print(f"Generated files: {list(project_files.keys())}")
 84 |             
 85 |             # If web project, provide instructions for running
 86 |             if args.project_type == 'web_visualization' and 'index.html' in project_files:
 87 |                 print("\nTo view the web application:")
 88 |                 print(f"Open {os.path.join(OUTPUT_DIR, 'index.html')} in your browser")
 89 |                 
 90 |         except Exception as e:
 91 |             print(f"Project generation failed: {str(e)}")
 92 |     
 93 |     else:
 94 |         # Original function-level code generation
 95 |         from roles.rule_descriptions_actc import TEAM, ANALYST, PYTHON_DEVELOPER, TESTER
 96 | 
 97 |         OUTPUT_PATH = args.output_path
 98 |         fail_list = args.fail_list
 99 | 
100 |         # load dataset
101 |         if args.dataset == 'humaneval':
102 |             if args.lang == 'python':
103 |                 dataset = load_dataset("openai_humaneval")
104 |                 dataset_key = ["test"]
105 | 
106 |         with open(OUTPUT_PATH, 'w+') as f:
107 |             for key in dataset_key:
108 |                 pbar = tqdm.tqdm(dataset[key], total=len(dataset[key]))
109 |                 for idx, task in enumerate(pbar):
110 |                     
111 |                     if args.dataset == 'humaneval':
112 |                         method_name = task['entry_point']
113 |                         before_func, signature, intent, public_test_case = prompt_split_humaneval(task['prompt'],method_name)
114 |                         args.signature = True
115 |                         if args.signature:
116 |                             intent = task['prompt']
117 |                         
118 |                         test = task['test']
119 | 
120 |                     try:
121 |                         session = Session(TEAM, ANALYST, PYTHON_DEVELOPER, TESTER,requirement=intent, model=args.model, majority=args.majority, 
122 |                                         max_tokens=args.max_tokens, temperature=args.temperature, 
123 |                                         top_p=args.top_p, max_round=args.max_round, before_func=before_func)
124 |                         
125 |                         code, session_history = session.run_session()
126 | 
127 |                     except RuntimeError as e:
128 |                         print(str(e))
129 |                         print("task-%d fail"%(task['task_id']))
130 |                         fail_list.append(task['task_id'])
131 |                         continue
132 | 
133 |                     if  code == "error":
134 |                         continue
135 | 
136 |                     entry_point = find_method_name(code)
137 |                     solution = {
138 |                         'task_id': task['task_id'],
139 |                         'prompt': before_func+"\n",
140 |                         'test': test,
141 |                         'entry_point': entry_point,
142 |                         'completion': code,
143 |                         'session_history': session_history,
144 |                     }
145 |                     f.write(json.dumps(solution) + '\n')
146 |                     f.flush()
147 | 


--------------------------------------------------------------------------------
/roles/ui_designer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import json
  4 | import time
  5 | 
  6 | from core import interface
  7 | from utils import construct_system_message
  8 | from .enhanced_role import EnhancedRole
  9 | 
 10 | 
 11 | class UIDesigner(EnhancedRole):
 12 |     def __init__(self, team_description, designer_description, requirement, project_type,
 13 |                  model='gpt-3.5-turbo', majority=1, max_tokens=1024, temperature=0.2, top_p=0.95):
 14 |         # Initialize base class
 15 |         super().__init__()
 16 |         
 17 |         self.model = model
 18 |         self.majority = majority
 19 |         self.max_tokens = max_tokens
 20 |         self.temperature = temperature
 21 |         self.top_p = top_p
 22 |         self.history_message = []
 23 |         self.requirement = requirement
 24 |         self.project_type = project_type
 25 | 
 26 |         self.itf = interface.ProgramInterface(
 27 |             stop='',
 28 |             verbose=False,
 29 |             model=self.model,
 30 |         )
 31 | 
 32 |         system_message = construct_system_message(requirement, designer_description, team_description)
 33 |         self.history_message_append(system_message)
 34 | 
 35 |     def design_ui(self, architecture_plan):
 36 |         """Design the user interface for the project"""
 37 |         
 38 |         ui_design_prompt = f"""
 39 |         Based on the following architecture plan and project requirements, create a comprehensive UI design.
 40 |         
 41 |         Project Type: {self.project_type}
 42 |         Requirements: {self.requirement}
 43 |         Architecture Plan: {architecture_plan}
 44 |         
 45 |         Please provide a detailed UI design specification in JSON format:
 46 |         
 47 |         {{
 48 |             "design_system": {{
 49 |                 "colors": {{
 50 |                     "primary": "#007bff",
 51 |                     "secondary": "#6c757d",
 52 |                     "background": "#f8f9fa",
 53 |                     "text": "#333333"
 54 |                 }},
 55 |                 "typography": {{
 56 |                     "font_family": "Arial, sans-serif",
 57 |                     "headings": "bold",
 58 |                     "body_size": "16px"
 59 |                 }},
 60 |                 "spacing": {{
 61 |                     "base_unit": "8px",
 62 |                     "container_padding": "20px",
 63 |                     "section_margin": "40px"
 64 |                 }}
 65 |             }},
 66 |             "layout": {{
 67 |                 "type": "responsive",
 68 |                 "grid_system": "CSS Grid / Flexbox",
 69 |                 "breakpoints": {{
 70 |                     "mobile": "768px",
 71 |                     "tablet": "992px",
 72 |                     "desktop": "1200px"
 73 |                 }}
 74 |             }},
 75 |             "components": [
 76 |                 {{
 77 |                     "name": "header",
 78 |                     "description": "Main navigation and branding",
 79 |                     "styling": "Modern, clean design with navigation menu"
 80 |                 }},
 81 |                 {{
 82 |                     "name": "main_content",
 83 |                     "description": "Primary content area",
 84 |                     "styling": "Card-based layout with proper spacing"
 85 |                 }},
 86 |                 {{
 87 |                     "name": "footer",
 88 |                     "description": "Footer information",
 89 |                     "styling": "Minimal, informational"
 90 |                 }}
 91 |             ],
 92 |             "interactions": [
 93 |                 {{
 94 |                     "element": "buttons",
 95 |                     "behavior": "Hover effects with smooth transitions"
 96 |                 }},
 97 |                 {{
 98 |                     "element": "forms",
 99 |                     "behavior": "Real-time validation with clear feedback"
100 |                 }}
101 |             ],
102 |             "accessibility": [
103 |                 "ARIA labels for interactive elements",
104 |                 "Keyboard navigation support",
105 |                 "High contrast color ratios",
106 |                 "Responsive text sizing"
107 |             ]
108 |         }}
109 |         
110 |         For web visualization projects, emphasize:
111 |         - Modern, clean aesthetic
112 |         - Interactive data visualization elements
113 |         - Responsive design for all devices
114 |         - Professional color scheme
115 |         - Clear typography hierarchy
116 |         - Intuitive user interactions
117 |         """
118 |         
119 |         self.history_message_append(ui_design_prompt)
120 |         
121 |         try:
122 |             responses = self.itf.run(prompt=self.history_message, majority_at=self.majority,
123 |                                    max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
124 |         except Exception as e:
125 |             print(f"UI design failed: {e}")
126 |             time.sleep(5)
127 |             return "error"
128 | 
129 |         ui_design = responses[0]
130 |         self.history_message_append(ui_design, "assistant")
131 |         
132 |         return ui_design
133 |     
134 |     def _adapt_to_frontend(self):
135 |         """Adapt to frontend projects"""
136 |         self.design_priorities = [
137 |             "responsive_design",
138 |             "modern_ui_components", 
139 |             "cross_browser_compatibility",
140 |             "performance_optimization",
141 |             "accessibility"
142 |         ]
143 |         
144 |     def _adapt_to_backend(self):
145 |         """Adapt to backend projects"""
146 |         self.design_priorities = [
147 |             "admin_interface",
148 |             "api_documentation_ui",
149 |             "monitoring_dashboard",
150 |             "simple_clean_design"
151 |         ]
152 |         
153 |     def _adapt_to_data_science(self):
154 |         """Adapt to data science projects"""
155 |         self.design_priorities = [
156 |             "data_visualization",
157 |             "interactive_charts",
158 |             "dashboard_layout",
159 |             "analytical_ui_components",
160 |             "filtering_controls"
161 |         ]
162 |         
163 |     def _adapt_to_mobile(self):
164 |         """Adapt to mobile application projects"""
165 |         self.design_priorities = [
166 |             "mobile_first_design",
167 |             "touch_friendly_interface",
168 |             "native_app_feel",
169 |             "gesture_support",
170 |             "offline_ui_states"
171 |         ]
172 |         
173 |     def _adapt_to_desktop(self):
174 |         """Adapt to desktop application projects"""
175 |         self.design_priorities = [
176 |             "desktop_conventions",
177 |             "keyboard_shortcuts",
178 |             "menu_systems",
179 |             "toolbar_design",
180 |             "window_management"
181 |         ]
182 |         
183 |     def _adapt_to_fullstack(self):
184 |         """Adapt to fullstack projects"""
185 |         self.design_priorities = [
186 |             "unified_design_system",
187 |             "admin_and_user_interfaces",
188 |             "responsive_components",
189 |             "consistent_branding"
190 |         ]
191 | 
192 |     def history_message_append(self, message, role="user"):
193 |         self.history_message.append({
194 |             "role": role,
195 |             "content": message
196 |         })
197 | 


--------------------------------------------------------------------------------
/roles/project_roles.py:
--------------------------------------------------------------------------------
  1 | PROJECT_TEAM = '''There is a development team that includes a project architect, a full-stack developer, a tester, a UI designer, and a web visualization specialist. The team needs to develop complete projects that satisfy the requirements of users. Each role has different responsibilities and they need to collaborate with each other to create high-quality, functional applications with rich data visualizations and modern web interfaces.
  2 | '''
  3 | 
  4 | PROJECT_ARCHITECT = '''I want you to act as a project architect on our development team. Given a user requirement, your task is to:
  5 | 1. Analyze the overall project requirements and break them down into components
  6 | 2. Design the project structure including file organization, dependencies, and architecture
  7 | 3. Create a detailed implementation plan with specific files and their purposes
  8 | 4. Define the technology stack and frameworks to be used
  9 | 5. Specify the data flow and component interactions
 10 | 6. Plan for scalability, performance, and modern web standards
 11 | 7. Consider visualization requirements and data handling strategies
 12 | For web visualization projects, prioritize:
 13 | - Modern build tools and development workflow
 14 | - Component-based architecture
 15 | - Efficient data loading and caching strategies
 16 | - Responsive design considerations
 17 | - Performance optimization for large datasets
 18 | Remember, provide a comprehensive project plan in JSON format that includes:
 19 | - project_structure: detailed file tree with descriptions
 20 | - technology_stack: frameworks, libraries, and tools
 21 | - implementation_phases: step-by-step development plan
 22 | - component_interactions: how different parts connect
 23 | - data_flow: how data moves through the application
 24 | - performance_considerations: optimization strategies
 25 | '''
 26 | 
 27 | PROJECT_DEVELOPER = '''I want you to act as a full-stack developer on our development team. You will receive:
 28 | 1. Project architecture plans from the architect
 29 | 2. UI designs from the designer
 30 | 3. Test reports from the tester
 31 | 4. Visualization specifications from the visualization specialist
 32 | Your responsibilities include:
 33 | 1. Implementing backend logic and APIs
 34 | 2. Creating frontend components and interfaces
 35 | 3. Integrating different system components
 36 | 4. Writing clean, efficient, and maintainable code
 37 | 5. Following the project architecture and design guidelines
 38 | 6. Fixing issues based on test feedback
 39 | 7. Implementing interactive data visualizations with modern libraries
 40 | 8. Creating responsive and accessible web applications
 41 | For web visualization projects, focus on:
 42 | - Modern ES6+ JavaScript with async/await patterns
 43 | - Multiple visualization libraries (Chart.js, D3.js, Plotly, ECharts)
 44 | - Advanced CSS3 features (Grid, Flexbox, Animations, Custom Properties)
 45 | - Interactive features (filters, real-time updates, responsive charts)
 46 | - Performance optimization for large datasets
 47 | - Cross-browser compatibility and mobile responsiveness
 48 | - Progressive Web App features when applicable
 49 | - WebGL and Canvas optimization for complex visualizations
 50 | Remember, provide complete, functional code files with rich interactivity and beautiful design.
 51 | '''
 52 | 
 53 | PROJECT_TESTER = '''I want you to act as a project tester on our development team. Your responsibilities include:
 54 | 1. Creating comprehensive test plans for the entire project
 55 | 2. Writing unit tests for individual components
 56 | 3. Performing integration testing
 57 | 4. Testing user interfaces and user experience
 58 | 5. Identifying bugs, performance issues, and usability problems
 59 | 6. Providing detailed test reports with specific feedback
 60 | 7. Testing data visualization functionality and interactivity
 61 | 8. Validating responsive design across different devices
 62 | 9. Testing accessibility compliance (WCAG 2.1)
 63 | 10. Performance testing for data-heavy operations
 64 | For web visualization projects, focus on:
 65 | - Chart rendering accuracy and performance
 66 | - Interactive elements functionality
 67 | - Data loading and error handling
 68 | - Cross-browser compatibility
 69 | - Mobile touch interactions
 70 | - Accessibility features for visualizations
 71 | Remember, provide:
 72 | - Test cases that cover main functionality
 73 | - Test code when applicable
 74 | - Detailed bug reports with reproduction steps
 75 | - Performance and usability feedback
 76 | - Accessibility audit results
 77 | - Cross-browser compatibility reports
 78 | '''
 79 | 
 80 | UI_DESIGNER = '''I want you to act as a UI/UX designer on our development team. Your responsibilities include:
 81 | 1. Creating user interface designs based on project requirements
 82 | 2. Designing user experience flows and interactions
 83 | 3. Choosing appropriate color schemes, fonts, and layouts
 84 | 4. Creating responsive designs that work on different devices
 85 | 5. Ensuring accessibility and usability best practices
 86 | 6. Providing CSS styling and frontend design specifications
 87 | 7. Designing data visualization aesthetics and interaction patterns
 88 | 8. Creating modern design systems with consistent components
 89 | For web visualization projects, focus on:
 90 | - Modern design trends (glassmorphism, neumorphism, gradient overlays)
 91 | - Advanced CSS techniques (CSS Grid, Flexbox, animations, transitions)
 92 | - Color schemes optimized for data visualization (accessible contrasts)
 93 | - Typography that enhances readability of data and metrics
 94 | - Interactive UI patterns (hover states, loading animations, micro-interactions)
 95 | - Dark mode and light mode support
 96 | - Mobile-first responsive design approach
 97 | - Design systems with CSS custom properties
 98 | Remember, provide:
 99 | - Detailed UI specifications with CSS custom properties
100 | - Component-based design systems
101 | - Animation and transition specifications
102 | - Accessibility guidelines (WCAG 2.1 compliance)
103 | - Interactive prototyping guidance
104 | - Responsive design breakpoints and strategies
105 | '''
106 | 
107 | WEB_VISUALIZATION_SPECIALIST = '''I want you to act as a web visualization specialist with deep expertise in data visualization and interactive web applications. Your comprehensive responsibilities include:
108 | 
109 | **Core Visualization Expertise:**
110 | 1. Master-level proficiency in multiple visualization libraries:
111 |    - Chart.js (for standard charts with excellent performance)
112 |    - D3.js (for custom, complex visualizations)
113 |    - Plotly.js (for scientific and statistical visualizations)
114 |    - ECharts (for enterprise-grade dashboards)
115 |    - Three.js (for 3D visualizations)
116 |    - Leaflet/Mapbox (for geospatial data)
117 | 
118 | **Advanced Chart Types & Techniques:**
119 | 2. Implement diverse visualization types:
120 |    - Standard: Bar, Line, Pie, Scatter, Area charts
121 |    - Advanced: Heatmaps, Treemaps, Sunburst, Sankey diagrams
122 |    - Statistical: Box plots, Violin plots, Regression lines
123 |    - Time-series: Candlestick, Stream graphs, Timeline charts
124 |    - Geospatial: Choropleth maps, Heat maps, Marker clustering
125 |    - 3D: Surface plots, 3D scatter, WebGL-accelerated charts
126 | 
127 | **Interactive Features:**
128 | 3. Create rich interactivity:
129 |    - Real-time data updates with WebSockets
130 |    - Advanced filtering and drill-down capabilities
131 |    - Brush and zoom functionality
132 |    - Cross-chart filtering and linking
133 |    - Animation and smooth transitions
134 |    - Touch and gesture support for mobile
135 |    - Keyboard navigation for accessibility
136 | 
137 | **Performance & Optimization:**
138 | 4. Optimize for large datasets:
139 |    - Data virtualization and pagination
140 |    - Canvas rendering for performance
141 |    - WebGL acceleration when needed
142 |    - Lazy loading and progressive enhancement
143 |    - Memory management and garbage collection
144 |    - Efficient data structures and algorithms
145 | 
146 | **Modern Web Technologies:**
147 | 5. Leverage cutting-edge technologies:
148 |    - Web Workers for heavy computations
149 |    - WebAssembly for performance-critical operations
150 |    - Progressive Web App features
151 |    - Service Workers for offline functionality
152 |    - Modern JavaScript (ES2023+ features)
153 |    - TypeScript for type safety
154 | 
155 | **Data Integration:**
156 | 6. Handle diverse data sources:
157 |    - REST APIs and GraphQL
158 |    - CSV, JSON, XML parsing
159 |    - Real-time streams and WebSockets
160 |    - Database connections (when applicable)
161 |    - File uploads and drag-drop functionality
162 | 
163 | **Design & UX Excellence:**
164 | 7. Create exceptional user experiences:
165 |    - Responsive design for all devices
166 |    - Accessible visualizations (WCAG 2.1)
167 |    - Intuitive interaction patterns
168 |    - Progressive disclosure of complexity
169 |    - Error handling and loading states
170 |    - Contextual help and tooltips
171 | 
172 | Remember to always:
173 | - Provide complete, production-ready code
174 | - Include comprehensive error handling
175 | - Implement responsive design patterns
176 | - Add accessibility features
177 | - Optimize for performance
178 | - Include detailed code comments
179 | - Create modular, reusable components
180 | - Follow modern web development best practices
181 | 
182 | For each project, create visually stunning, highly interactive, and performant web applications that showcase the full potential of modern data visualization.
183 | '''
184 | 


--------------------------------------------------------------------------------
/session.py:
--------------------------------------------------------------------------------
  1 | from roles import Analyst, Coder, Tester
  2 | from utils import find_method_name
  3 | import time
  4 | from utils import code_truncate
  5 | 
  6 | 
  7 | class Session(object):
  8 |     def __init__(self, TEAM, ANALYST, PYTHON_DEVELOPER, TESTER, requirement, model='gpt-3.5-turbo', majority=1, max_tokens=512,
  9 |                                 temperature=0.0, top_p=0.95, max_round=4, before_func=''):
 10 | 
 11 |         self.session_history = {}
 12 |         self.max_round = max_round
 13 |         self.before_func = before_func
 14 |         self.requirement = requirement
 15 |         self.analyst = Analyst(TEAM, ANALYST, requirement, model, majority, max_tokens, temperature, top_p)
 16 |         self.coder = Coder(TEAM, PYTHON_DEVELOPER, requirement, model, majority, max_tokens, temperature, top_p)
 17 |         self.tester = Tester(TEAM, TESTER, requirement, model, majority, max_tokens, temperature, top_p)
 18 |     
 19 |     def run_session(self):
 20 |         plan = self.analyst.analyze()
 21 |         report = plan
 22 |         is_init=True
 23 |         self.session_history["plan"] = plan
 24 |         code = ""
 25 | 
 26 |         for i in range(self.max_round):
 27 | 
 28 |             naivecode = self.coder.implement(report, is_init)
 29 |             method_name = find_method_name(naivecode)
 30 |             if method_name:
 31 |                 code = naivecode
 32 |                 
 33 |             if code.strip() == "":
 34 |                 if i == 0:
 35 |                     code = "error"
 36 |                 else:
 37 |                     code = self.session_history['Round_{}'.format(i-1)]["code"]
 38 |                 break
 39 |             
 40 |             if i == self.max_round-1:
 41 |                 self.session_history['Round_{}'.format(i)] = {"code": code}
 42 |                 break
 43 |             
 44 |             tests = self.tester.test(code)
 45 |             test_report = code_truncate(tests)
 46 |             answer_report = unsafe_execute(self.before_func+code+'\n'+test_report+'\n'+f'check({method_name})', '')
 47 |             report = f'The compilation output of the preceding code is: {answer_report}'
 48 | 
 49 |             is_init = False
 50 |             self.session_history['Round_{}'.format(i)] = {"code": code, "report": report}
 51 | 
 52 |             if (plan == "error") or (code == "error") or (report == "error"):
 53 |                 code = "error"
 54 |                 break
 55 |             
 56 |             if answer_report == "Code Test Passed.":
 57 |                 break
 58 | 
 59 |         self.analyst.itf.clear_history()
 60 |         self.coder.itf.clear_history()
 61 |         self.tester.itf.clear_history()
 62 | 
 63 |         return code, self.session_history
 64 | 
 65 |     def run_analyst_coder(self):
 66 |         plan = self.analyst.analyze()
 67 |         is_init=True
 68 |         self.session_history["plan"] = plan
 69 |         code = self.coder.implement(plan, is_init)
 70 | 
 71 |         if (plan == "error") or (code == "error"):
 72 |             code = "error"
 73 | 
 74 |         self.analyst.itf.clear_history()
 75 |         self.coder.itf.clear_history()
 76 |         self.tester.itf.clear_history()
 77 | 
 78 |         return code, self.session_history
 79 | 
 80 | 
 81 |     def run_coder_tester(self):
 82 |         report = ""
 83 |         is_init=True
 84 |         code = ""
 85 |         
 86 |         for i in range(self.max_round):
 87 | 
 88 |             naivecode = self.coder.implement(report, is_init)
 89 |             if find_method_name(naivecode):
 90 |                 code = naivecode
 91 | 
 92 |             if code.strip() == "":
 93 |                 if i == 0:
 94 |                     code = self.coder.implement(report, is_init=True)
 95 |                 else:
 96 |                     code = self.session_history['Round_{}'.format(i-1)]["code"]
 97 |                 break
 98 |             
 99 |             if i == self.max_round-1:
100 |                 self.session_history['Round_{}'.format(i)] = {"code": code}
101 |                 break
102 |             tests = self.tester.test(code)
103 |             test_report = code_truncate(tests)
104 |             answer_report = unsafe_execute(self.before_func+code+'\n'+test_report+'\n'+f'check({method_name})', '')
105 |             report = f'The compilation output of the preceding code is: {answer_report}'
106 | 
107 |             is_init = False
108 |             self.session_history['Round_{}'.format(i)] = {"code": code, "report": report}
109 | 
110 |             if (code == "error") or (report == "error"):
111 |                 code = "error"
112 |                 break
113 |             
114 |             if report == "Code Test Passed.":
115 |                 break
116 | 
117 |         self.analyst.itf.clear_history()
118 |         self.coder.itf.clear_history()
119 |         self.tester.itf.clear_history()
120 | 
121 |         return code, self.session_history
122 | 
123 |     def run_coder_only(self):
124 |         plan = ""
125 |         code = self.coder.implement(plan, is_init=True)
126 |         self.coder.itf.clear_history()
127 |         return code, self.session_history
128 | 
129 | 
130 | import contextlib
131 | import faulthandler
132 | import io
133 | import os
134 | import platform
135 | import signal
136 | import tempfile 
137 | 
138 | def unsafe_execute(code, report):
139 | 
140 |         with create_tempdir():
141 | 
142 |             # These system calls are needed when cleaning up tempdir.
143 |             import os
144 |             import shutil
145 |             rmtree = shutil.rmtree
146 |             rmdir = os.rmdir
147 |             chdir = os.chdir 
148 | 
149 |             # Disable functionalities that can make destructive changes to the test.
150 |             reliability_guard()
151 | 
152 |             # Construct the check program and run it.
153 |             check_program = (
154 |                 code + report
155 |             )
156 | 
157 |             try:
158 |                 exec_globals = {}
159 |                 with swallow_io():
160 |                     timeout = 10
161 |                     with time_limit(timeout):
162 |                         exec(check_program, exec_globals)
163 |                 result = "Code Test Passed."
164 |             except AssertionError as e:
165 |                 result = f"failed with AssertionError. {e}"
166 |             except TimeoutException:
167 |                 result = "timed out"
168 |             except BaseException as e:
169 |                 result = f"{e}"
170 | 
171 | 
172 |             # Needed for cleaning up.
173 |             shutil.rmtree = rmtree
174 |             os.rmdir = rmdir
175 |             os.chdir = chdir
176 |             return result
177 | 
178 | 
179 | def reliability_guard(maximum_memory_bytes = None):
180 |     """
181 |     This disables various destructive functions and prevents the generated code
182 |     from interfering with the test (e.g. fork bomb, killing other processes,
183 |     removing filesystem files, etc.)
184 | 
185 |     WARNING
186 |     This function is NOT a security sandbox. Untrusted code, including, model-
187 |     generated code, should not be blindly executed outside of one. See the 
188 |     Codex paper for more information about OpenAI's code sandbox, and proceed
189 |     with caution.
190 |     """
191 | 
192 |     if maximum_memory_bytes is not None:
193 |         import resource
194 |         resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
195 |         resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
196 |         if not platform.uname().system == 'Darwin':
197 |             resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
198 | 
199 |     faulthandler.disable()
200 | 
201 |     import builtins
202 |     builtins.exit = None
203 |     builtins.quit = None
204 | 
205 |     import os
206 |     os.environ['OMP_NUM_THREADS'] = '1'
207 | 
208 |     os.rmdir = None
209 |     os.chdir = None
210 | 
211 |     import shutil
212 |     shutil.rmtree = None
213 |     shutil.move = None
214 |     shutil.chown = None
215 | 
216 |     import subprocess
217 |     subprocess.Popen = None  # type: ignore
218 | 
219 |     __builtins__['help'] = None
220 | 
221 |     import sys
222 |     sys.modules['ipdb'] = None
223 |     sys.modules['joblib'] = None
224 |     sys.modules['resource'] = None
225 |     sys.modules['psutil'] = None
226 |     sys.modules['tkinter'] = None
227 |     
228 | @contextlib.contextmanager
229 | def time_limit(seconds: float):
230 |     def signal_handler(signum, frame):
231 |         raise TimeoutException("Timed out!")
232 |     signal.setitimer(signal.ITIMER_REAL, seconds)
233 |     signal.signal(signal.SIGALRM, signal_handler)
234 |     try:
235 |         yield
236 |     finally:
237 |         signal.setitimer(signal.ITIMER_REAL, 0)
238 | 
239 | 
240 | @contextlib.contextmanager
241 | def swallow_io():
242 |     stream = WriteOnlyStringIO()
243 |     with contextlib.redirect_stdout(stream):
244 |         with contextlib.redirect_stderr(stream):
245 |             with redirect_stdin(stream):
246 |                 yield
247 | 
248 | 
249 | @contextlib.contextmanager
250 | def create_tempdir():
251 |     with tempfile.TemporaryDirectory() as dirname:
252 |         with chdir(dirname):
253 |             yield dirname
254 |             
255 | class TimeoutException(Exception):
256 |     pass
257 | 
258 | 
259 | class WriteOnlyStringIO(io.StringIO):
260 |     """ StringIO that throws an exception when it's read from """
261 | 
262 |     def read(self, *args, **kwargs):
263 |         raise IOError
264 | 
265 |     def readline(self, *args, **kwargs):
266 |         raise IOError
267 | 
268 |     def readlines(self, *args, **kwargs):
269 |         raise IOError
270 | 
271 |     def readable(self, *args, **kwargs):
272 |         """ Returns True if the IO object can be read. """
273 |         return False
274 | 
275 | 
276 | class redirect_stdin(contextlib._RedirectStream):  # type: ignore
277 |     _stream = 'stdin'
278 | 
279 | 
280 | @contextlib.contextmanager
281 | def chdir(root):
282 |     if root == ".":
283 |         yield
284 |         return
285 |     cwd = os.getcwd()
286 |     os.chdir(root)
287 |     try:
288 |         yield
289 |     except BaseException as exc:
290 |         raise exc
291 |     finally:
292 |         os.chdir(cwd)


--------------------------------------------------------------------------------
/roles/web_visualization_specialist.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import json
  4 | import time
  5 | import re
  6 | 
  7 | from core import interface
  8 | from utils import construct_system_message
  9 | 
 10 | 
 11 | class WebVisualizationSpecialist(object):
 12 |     def __init__(self, team_description, specialist_description, requirement, project_type,
 13 |                  model='gpt-3.5-turbo', majority=1, max_tokens=2048, temperature=0.3, top_p=0.95):
 14 |         self.model = model
 15 |         self.majority = majority
 16 |         self.max_tokens = max_tokens
 17 |         self.temperature = temperature
 18 |         self.top_p = top_p
 19 |         self.history_message = []
 20 |         self.requirement = requirement
 21 |         self.project_type = project_type
 22 | 
 23 |         self.itf = interface.ProgramInterface(
 24 |             stop='',
 25 |             verbose=False,
 26 |             model=self.model,
 27 |         )
 28 | 
 29 |         system_message = construct_system_message(requirement, specialist_description, team_description)
 30 |         self.history_message_append(system_message)
 31 | 
 32 |     def create_visualization_plan(self, architecture_plan):
 33 |         """Create detailed visualization specifications"""
 34 |         
 35 |         visualization_prompt = f"""
 36 |         Based on the project requirements and architecture plan, create a comprehensive visualization specification.
 37 |         
 38 |         Project Type: {self.project_type}
 39 |         Requirements: {self.requirement}
 40 |         Architecture Plan: {architecture_plan}
 41 |         
 42 |         Please provide a detailed JSON specification that includes:
 43 |         
 44 |         {{
 45 |             "visualization_strategy": {{
 46 |                 "primary_library": "Chart.js|D3.js|Plotly|ECharts",
 47 |                 "secondary_libraries": ["Three.js", "Leaflet"],
 48 |                 "data_processing": "client-side|server-side|hybrid",
 49 |                 "performance_approach": "canvas|svg|webgl"
 50 |             }},
 51 |             "chart_specifications": [
 52 |                 {{
 53 |                     "chart_type": "bar|line|pie|scatter|heatmap|treemap|3d",
 54 |                     "library": "Chart.js|D3.js|Plotly|ECharts",
 55 |                     "data_source": "static|api|realtime|uploaded",
 56 |                     "interactivity": ["hover", "click", "zoom", "brush", "filter"],
 57 |                     "animations": ["entrance", "update", "transition"],
 58 |                     "responsive": true,
 59 |                     "accessibility": ["aria-labels", "keyboard-nav", "screen-reader"]
 60 |                 }}
 61 |             ],
 62 |             "interactive_features": {{
 63 |                 "real_time_updates": true|false,
 64 |                 "data_filters": ["date-range", "category", "search"],
 65 |                 "cross_chart_interactions": true|false,
 66 |                 "export_functionality": ["png", "pdf", "csv", "json"],
 67 |                 "drill_down_capabilities": true|false
 68 |             }},
 69 |             "performance_optimizations": {{
 70 |                 "lazy_loading": true|false,
 71 |                 "data_virtualization": true|false,
 72 |                 "web_workers": true|false,
 73 |                 "caching_strategy": "memory|localStorage|sessionStorage",
 74 |                 "progressive_loading": true|false
 75 |             }},
 76 |             "modern_features": {{
 77 |                 "pwa_support": true|false,
 78 |                 "offline_functionality": true|false,
 79 |                 "web_components": true|false,
 80 |                 "module_system": "es6|webpack|rollup",
 81 |                 "typescript_support": true|false
 82 |             }},
 83 |             "sample_data_structure": {{
 84 |                 "format": "json|csv|api_response",
 85 |                 "schema": "describe expected data structure",
 86 |                 "sample_size": "number of records for demo"
 87 |             }}
 88 |         }}
 89 |         
 90 |         Focus on creating specifications that will result in:
 91 |         1. High-performance visualizations
 92 |         2. Rich interactivity and user engagement
 93 |         3. Beautiful, modern aesthetics
 94 |         4. Responsive design for all devices
 95 |         5. Accessibility compliance
 96 |         6. Scalability for large datasets
 97 |         """
 98 |         
 99 |         self.history_message_append(visualization_prompt)
100 |         
101 |         try:
102 |             responses = self.itf.run(prompt=self.history_message, majority_at=self.majority,
103 |                                    max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
104 |         except Exception as e:
105 |             print(f"Visualization planning failed: {e}")
106 |             time.sleep(5)
107 |             return "error"
108 | 
109 |         visualization_plan = responses[0]
110 |         self.history_message_append(visualization_plan, "assistant")
111 |         
112 |         return visualization_plan
113 | 
114 |     def generate_advanced_code_templates(self, visualization_plan, ui_design):
115 |         """Generate advanced code templates with modern visualization libraries"""
116 |         
117 |         template_prompt = f"""
118 |         Based on the visualization plan and UI design, generate comprehensive code templates.
119 |         
120 |         Visualization Plan: {visualization_plan}
121 |         UI Design: {ui_design}
122 |         
123 |         Please provide complete code templates for the following files:
124 |         
125 |         === TEMPLATE: js/visualization-engine.js ===
126 |         // Advanced visualization engine with multiple library support
127 |         // Include: Chart.js, D3.js, and Plotly integration
128 |         // Features: Dynamic chart switching, real-time updates, responsive design
129 |         
130 |         === TEMPLATE: js/data-manager.js ===  
131 |         // Data management and processing utilities
132 |         // Include: API integration, data transformation, caching, real-time updates
133 |         
134 |         === TEMPLATE: js/interaction-controller.js ===
135 |         // Advanced interaction handling
136 |         // Include: Cross-chart filtering, brush-zoom, touch support, keyboard navigation
137 |         
138 |         === TEMPLATE: css/visualization-styles.css ===
139 |         // Modern CSS for visualizations
140 |         // Include: CSS Grid layouts, animations, dark/light themes, responsive design
141 |         
142 |         === TEMPLATE: js/performance-optimizer.js ===
143 |         // Performance optimization utilities
144 |         // Include: Data virtualization, lazy loading, Web Workers integration
145 |         
146 |         Requirements for each template:
147 |         1. Use modern ES6+ JavaScript features
148 |         2. Include comprehensive error handling
149 |         3. Implement responsive design patterns
150 |         4. Add accessibility features (ARIA labels, keyboard navigation)
151 |         5. Include performance optimizations
152 |         6. Use modular, reusable code structure
153 |         7. Add detailed comments explaining functionality
154 |         8. Support multiple visualization libraries
155 |         9. Include sample data and demo functionality
156 |         10. Implement modern web standards (Progressive Web App features)
157 |         
158 |         Each template should be production-ready and demonstrate best practices.
159 |         """
160 |         
161 |         self.history_message_append(template_prompt)
162 |         
163 |         try:
164 |             responses = self.itf.run(prompt=self.history_message, majority_at=self.majority,
165 |                                    max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
166 |         except Exception as e:
167 |             print(f"Template generation failed: {e}")
168 |             time.sleep(5)
169 |             return {}
170 | 
171 |         templates_response = responses[0]
172 |         self.history_message_append(templates_response, "assistant")
173 |         
174 |         # Parse the templates from the response
175 |         templates = self._parse_templates(templates_response)
176 |         
177 |         return templates
178 | 
179 |     def _parse_templates(self, templates_response):
180 |         """Parse the templates response into separate files"""
181 |         templates = {}
182 |         
183 |         # Pattern to match template sections
184 |         template_pattern = r'=== TEMPLATE: (.+?) ===\n(.*?)(?=\n=== TEMPLATE:|$)'
185 |         matches = re.findall(template_pattern, templates_response, re.DOTALL)
186 |         
187 |         for file_path, content in matches:
188 |             file_path = file_path.strip()
189 |             content = content.strip()
190 |             templates[file_path] = content
191 |         
192 |         return templates
193 | 
194 |     def optimize_for_performance(self, project_files):
195 |         """Provide performance optimization suggestions"""
196 |         
197 |         files_summary = "\n".join([f"{path}: {len(content)} characters" 
198 |                                   for path, content in project_files.items()])
199 |         
200 |         optimization_prompt = f"""
201 |         Analyze the following project files and provide performance optimization recommendations.
202 |         
203 |         Project Files Summary:
204 |         {files_summary}
205 |         
206 |         Please provide specific recommendations for:
207 |         1. JavaScript performance optimizations
208 |         2. CSS optimization strategies
209 |         3. Data loading and caching improvements
210 |         4. Visualization rendering optimizations
211 |         5. Mobile performance considerations
212 |         6. Bundle size reduction techniques
213 |         7. Progressive loading strategies
214 |         8. Memory management improvements
215 |         
216 |         Focus on modern web performance best practices and visualization-specific optimizations.
217 |         """
218 |         
219 |         self.history_message_append(optimization_prompt)
220 |         
221 |         try:
222 |             responses = self.itf.run(prompt=self.history_message, majority_at=self.majority,
223 |                                    max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
224 |         except Exception as e:
225 |             print(f"Performance optimization failed: {e}")
226 |             time.sleep(5)
227 |             return "error"
228 | 
229 |         optimization_suggestions = responses[0]
230 |         self.history_message_append(optimization_suggestions, "assistant")
231 |         
232 |         return optimization_suggestions
233 | 
234 |     def history_message_append(self, message, role="user"):
235 |         self.history_message.append({
236 |             "role": role,
237 |             "content": message
238 |         })


--------------------------------------------------------------------------------
/roles/enhanced_role.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | """
  3 | Enhanced role base class - provides common tool integration capabilities for all roles
  4 | """
  5 | 
  6 | import sys
  7 | import os
  8 | sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9 | 
 10 | # Use new simplified tool system
 11 | from tools import global_tool_orchestrator, CodeAnalyzer, FileManager, QualityChecker
 12 | 
 13 | class EnhancedRole:
 14 |     """Enhanced role base class - provides common tool integration capabilities for all roles"""
 15 |     
 16 |     def __init__(self):
 17 |         self.tools = []
 18 |         self.project_specification = None
 19 |         self.quality_gates = []
 20 |         self.feedback_history = []
 21 |         self.role_name = self.__class__.__name__
 22 |         
 23 |         # Add universal tools
 24 |         self._add_universal_tools()
 25 |     
 26 |     def _add_universal_tools(self):
 27 |         """Add universal tools"""
 28 |         self.tools.extend([
 29 |             CodeAnalyzer(),
 30 |             FileManager(), 
 31 |             QualityChecker()
 32 |         ])
 33 |     
 34 |     def set_project_specification(self, project_spec):
 35 |         """Set project specification that all roles need to follow"""
 36 |         self.project_specification = project_spec
 37 |         self._adapt_to_project_type(project_spec["type"])
 38 |         print(f"📋 {self.role_name} adapted to project type: {project_spec['type']}")
 39 |     
 40 |     def _adapt_to_project_type(self, project_type):
 41 |         """Adapt role behavior based on project type - subclasses implement specific adaptation logic"""
 42 |         # Base class provides default implementation, subclasses can override
 43 |         adaptation_strategies = {
 44 |             "web_frontend": self._adapt_to_frontend,
 45 |             "web_backend": self._adapt_to_backend,
 46 |             "data_science": self._adapt_to_data_science,
 47 |             "mobile": self._adapt_to_mobile,
 48 |             "desktop": self._adapt_to_desktop,
 49 |             "fullstack": self._adapt_to_fullstack
 50 |         }
 51 |         
 52 |         strategy = adaptation_strategies.get(project_type, self._adapt_to_generic)
 53 |         strategy()
 54 |     
 55 |     def _adapt_to_frontend(self):
 56 |         """Adapt for frontend projects"""
 57 |         print(f"🎨 {self.role_name} adapted for frontend development")
 58 |     
 59 |     def _adapt_to_backend(self):
 60 |         """Adapt for backend projects"""
 61 |         print(f"⚙️ {self.role_name} adapted for backend development")
 62 |     
 63 |     def _adapt_to_data_science(self):
 64 |         """Adapt for data science projects"""
 65 |         print(f"📊 {self.role_name} adapted for data science project")
 66 |     
 67 |     def _adapt_to_mobile(self):
 68 |         """Adapt for mobile projects"""
 69 |         print(f"📱 {self.role_name} adapted for mobile development")
 70 |     
 71 |     def _adapt_to_desktop(self):
 72 |         """Adapt for desktop projects"""
 73 |         print(f"🖥️ {self.role_name} adapted for desktop development")
 74 |     
 75 |     def _adapt_to_fullstack(self):
 76 |         """Adapt for fullstack projects"""
 77 |         print(f"🌐 {self.role_name} adapted for fullstack development")
 78 |     
 79 |     def _adapt_to_generic(self):
 80 |         """Adapt for generic projects"""
 81 |         print(f"🔧 {self.role_name} adapted for generic project")
 82 |     
 83 |     def execute_with_quality_gate(self, phase, task_func, *args, **kwargs):
 84 |         """Execute task under quality gate protection"""
 85 |         print(f"🚪 {self.role_name} executing {phase} with quality gate protection...")
 86 |         
 87 |         # Execute task
 88 |         result = task_func(*args, **kwargs)
 89 |         
 90 |         # Quality gate check
 91 |         gatekeeper = self.get_tool("QualityGatekeeper")
 92 |         if gatekeeper and self.project_specification:
 93 |             print(f"🔍 Running quality gate for {phase}...")
 94 |             gate_result = gatekeeper.execute(phase, result, self.project_specification)
 95 |             
 96 |             if not gate_result["gate_passed"]:
 97 |                 print(f"❌ Quality gate failed for {phase}")
 98 |                 print(f"🔧 Attempting to fix issues: {gate_result['blocking_issues']}")
 99 |                 # Quality gate failed, attempt to fix
100 |                 result = self._handle_quality_gate_failure(result, gate_result, phase)
101 |             else:
102 |                 print(f"✅ Quality gate passed for {phase} (score: {gate_result['overall_score']:.1f}/10)")
103 |             
104 |             # Record quality gate history
105 |             self.quality_gates.append({
106 |                 "phase": phase,
107 |                 "result": gate_result,
108 |                 "timestamp": self._get_timestamp()
109 |             })
110 |         
111 |         return result
112 |     
113 |     def _handle_quality_gate_failure(self, result, gate_result, phase):
114 |         """Handle quality gate failure"""
115 |         print(f"🛠️ {self.role_name} handling quality gate failure in {phase}...")
116 |         
117 |         # Attempt to fix based on failure reasons
118 |         blocking_issues = gate_result.get("blocking_issues", [])
119 |         recommendations = gate_result.get("recommendations", [])
120 |         
121 |         # Record feedback
122 |         self.feedback_history.append({
123 |             "phase": phase,
124 |             "issues": blocking_issues,
125 |             "recommendations": recommendations,
126 |             "timestamp": self._get_timestamp()
127 |         })
128 |         
129 |         # Attempt to improve result based on recommendations
130 |         improved_result = self._apply_quality_improvements(result, recommendations, phase)
131 |         
132 |         return improved_result
133 |     
134 |     def _apply_quality_improvements(self, result, recommendations, phase):
135 |         """Apply quality improvement recommendations"""
136 |         print(f"💡 Applying {len(recommendations)} quality improvements...")
137 |         
138 |         # Implementation of specific improvement logic here
139 |         # Base class provides default implementation, subclasses can override for more specific improvements
140 |         
141 |         if not result:
142 |             result = {}
143 |         
144 |         # Add improvement markers
145 |         if "quality_improvements" not in result:
146 |             result["quality_improvements"] = []
147 |         
148 |         for rec in recommendations:
149 |             result["quality_improvements"].append({
150 |                 "criterion": rec.get("criterion", "unknown"),
151 |                 "priority": rec.get("priority", "medium"),
152 |                 "suggestion": rec.get("suggestion", "No specific suggestion"),
153 |                 "applied": True
154 |             })
155 |         
156 |         return result
157 |     
158 |     def get_tool(self, tool_name):
159 |         """Get tool instance"""
160 |         for tool in self.tools:
161 |             if tool.name == tool_name:
162 |                 return tool
163 |         return None
164 |     
165 |     def get_contextual_tools(self, phase):
166 |         """Get relevant tools based on current phase"""
167 |         if not self.project_specification:
168 |             return self.tools
169 |         
170 |         project_type = self.project_specification["type"]
171 |         return [tool for tool in self.tools if self._is_tool_relevant(tool, phase, project_type)]
172 |     
173 |     def _is_tool_relevant(self, tool, phase, project_type):
174 |         """Determine if tool is relevant to current phase and project type"""
175 |         # Universal tools are always relevant
176 |         universal_tools = ["ProjectSpecificationCoordinator", "UniversalValidator", "QualityGatekeeper"]
177 |         if tool.name in universal_tools:
178 |             return True
179 |         
180 |         # Other tools determined by phase and project type
181 |         relevance_map = {
182 |             "architecture_design": ["ProjectSpecificationCoordinator", "UniversalValidator"],
183 |             "design_modeling": ["UniversalValidator", "QualityGatekeeper"],
184 |             "implementation": ["UniversalValidator", "QualityGatekeeper"],
185 |             "testing_validation": ["UniversalValidator", "QualityGatekeeper"]
186 |         }
187 |         
188 |         return tool.name in relevance_map.get(phase, [])
189 |     
190 |     def generate_role_report(self):
191 |         """Generate role work report"""
192 |         return {
193 |             "role": self.role_name,
194 |             "project_type": self.project_specification.get("type") if self.project_specification else "unknown",
195 |             "tools_used": [tool.name for tool in self.tools],
196 |             "quality_gates": len(self.quality_gates),
197 |             "feedback_received": len(self.feedback_history),
198 |             "overall_performance": self._calculate_role_performance()
199 |         }
200 |     
201 |     def _calculate_role_performance(self):
202 |         """Calculate role performance"""
203 |         if not self.quality_gates:
204 |             return 0
205 |         
206 |         total_score = sum(gate["result"]["overall_score"] for gate in self.quality_gates)
207 |         return total_score / len(self.quality_gates)
208 |     
209 |     def _get_timestamp(self):
210 |         """Get timestamp"""
211 |         from datetime import datetime
212 |         return datetime.now().isoformat()
213 |     
214 |     def add_custom_tool(self, tool):
215 |         """Add custom tool"""
216 |         self.tools.append(tool)
217 |         print(f"🔧 Added custom tool {tool.name} to {self.role_name}")
218 |     
219 |     def remove_tool(self, tool_name):
220 |         """Remove tool"""
221 |         self.tools = [tool for tool in self.tools if tool.name != tool_name]
222 |         print(f"🗑️ Removed tool {tool_name} from {self.role_name}")
223 |     
224 |     def get_feedback_summary(self):
225 |         """Get feedback summary"""
226 |         if not self.feedback_history:
227 |             return "No feedback received"
228 |         
229 |         total_issues = sum(len(feedback["issues"]) for feedback in self.feedback_history)
230 |         phases_with_issues = len(set(feedback["phase"] for feedback in self.feedback_history))
231 |         
232 |         return {
233 |             "total_feedback_sessions": len(self.feedback_history),
234 |             "total_issues_identified": total_issues,
235 |             "phases_with_issues": phases_with_issues,
236 |             "improvement_rate": self._calculate_improvement_rate()
237 |         }
238 |     
239 |     def _calculate_improvement_rate(self):
240 |         """Calculate improvement rate"""
241 |         if len(self.quality_gates) < 2:
242 |             return 0
243 |         
244 |         # Compare scores of first and last quality gates
245 |         first_score = self.quality_gates[0]["result"]["overall_score"]
246 |         last_score = self.quality_gates[-1]["result"]["overall_score"]
247 |         
248 |         return ((last_score - first_score) / first_score) * 100 if first_score > 0 else 0
249 | 


--------------------------------------------------------------------------------
/evaluate/execute/_execution.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft Corporation.
  2 | # Licensed under the MIT license.
  3 | 
  4 | from typing import Optional, Dict
  5 | import contextlib
  6 | import faulthandler
  7 | import io
  8 | import os
  9 | import multiprocessing
 10 | import platform
 11 | import signal
 12 | import tempfile
 13 | 
 14 | def _pack_test_cases(test_cases, timeout):
 15 |     blank_4 = ' ' * 4
 16 |     blank_8 = ' ' * 8
 17 |     blank_12 = ' ' * 12
 18 |     result = f'def check():\n    pass_result = []\n'
 19 |     for idx, tc in enumerate(test_cases):
 20 |         multi_line_assertion = tc.strip().replace('\n', f'\n{blank_12}')
 21 |         result += f'\n{blank_4}try:\n{blank_8}with time_limit({timeout}):\n{blank_12}{multi_line_assertion}\
 22 |                     \n{blank_12}pass_result.append(True)\n{blank_4}except Exception as e:\n{blank_8}pass_result.append(False)\n'
 23 |     result += '\n    return pass_result\n'
 24 |     result += f'\nglobal final_result\nfinal_result = check()'
 25 |     return result
 26 | 
 27 | 
 28 | def check_correctness_with_test_cases(task_id, prompt, completion, test_cases, timeout):
 29 |     """
 30 |     Evaluates the functional correctness of a solution_content by running the test
 31 |     suite provided in the problem. 
 32 |     """
 33 |     extend_timeout = timeout*len(test_cases)
 34 | 
 35 |     def unsafe_execute():
 36 | 
 37 |         with create_tempdir():
 38 | 
 39 |             # These system calls are needed when cleaning up tempdir.
 40 |             import os
 41 |             import shutil
 42 |             rmtree = shutil.rmtree
 43 |             rmdir = os.rmdir
 44 |             chdir = os.chdir
 45 | 
 46 |             # Disable functionalities that can make destructive changes to the test.
 47 |             reliability_guard()
 48 | 
 49 |             # Construct the check program and run it.
 50 |             check_program = (
 51 |                 prompt + completion + "\n" +
 52 |                 _pack_test_cases(test_cases, timeout)
 53 |             )
 54 | 
 55 |             try:
 56 |                 exec_globals = {'time_limit': time_limit}
 57 |                 with swallow_io():
 58 |                     exec(check_program, exec_globals)
 59 |                 result.append(exec_globals['final_result'])
 60 |             except TimeoutException:
 61 |                 result.append("timed out")
 62 |             except BaseException as e:
 63 |                 result.append(f"failed: {e}")
 64 | 
 65 |             # Needed for cleaning up.
 66 |             shutil.rmtree = rmtree
 67 |             os.rmdir = rmdir
 68 |             os.chdir = chdir
 69 | 
 70 |     manager = multiprocessing.Manager()
 71 |     result = manager.list()
 72 | 
 73 |     p = multiprocessing.Process(target=unsafe_execute)
 74 |     p.start()
 75 |     p.join(timeout=extend_timeout + 0.1)
 76 |     if p.is_alive():
 77 |         p.kill()
 78 | 
 79 |     if not result:
 80 |         result.append("timed out")
 81 | 
 82 |     return dict(
 83 |         task_id=task_id,
 84 |         test_cases=test_cases,
 85 |         completion=completion,
 86 |         passed=(type(result[0]) == list) and len(result[0]) > 0,
 87 |         result=result[0]
 88 |     )
 89 | 
 90 | def check_correctness_T(task_id: str, prompt: str, completion: str, test: list, entry_point: str, timeout: float) -> Dict:
 91 |     """
 92 |     Evaluates the functional correctness of a completion by running the test
 93 |     suite provided in the problem. 
 94 |     """
 95 | 
 96 |     def unsafe_execute():
 97 | 
 98 |         with create_tempdir():
 99 | 
100 |             # These system calls are needed when cleaning up tempdir.
101 |             import os
102 |             import shutil
103 |             rmtree = shutil.rmtree
104 |             rmdir = os.rmdir
105 |             chdir = os.chdir
106 | 
107 |             # Disable functionalities that can make destructive changes to the test.
108 |             reliability_guard()
109 | 
110 |             # Construct the check program and run it.
111 |             check_program = (
112 |                 prompt + completion + "\n" + '\n'.join(test)
113 |             )
114 | 
115 |             try:
116 |                 exec_globals = {}
117 |                 with swallow_io():
118 |                     with time_limit(timeout):
119 |                         exec(check_program, exec_globals)
120 |                 result.append("passed")
121 |             except AssertionError:
122 |                 result.append("assertion")
123 |             except TimeoutException:
124 |                 result.append("timed out")
125 |             except BaseException as e:
126 |                 result.append(f"failed: {e}")
127 | 
128 |             # Needed for cleaning up.
129 |             shutil.rmtree = rmtree
130 |             os.rmdir = rmdir
131 |             os.chdir = chdir
132 | 
133 |     manager = multiprocessing.Manager()
134 |     result = manager.list()
135 | 
136 |     p = multiprocessing.Process(target=unsafe_execute)
137 |     p.start()
138 |     p.join(timeout=timeout+1)
139 |     if p.is_alive():
140 |         p.kill()
141 | 
142 |     if not result:
143 |         result.append("timed out")
144 | 
145 |     return dict(
146 |         task_id=task_id,
147 |         passed=result[0] == "passed",
148 |         result=result[0],
149 |         completion=completion,
150 |     )
151 | 
152 | 
153 | def check_correctness(task_id: str, prompt: str, completion: str, test: str, entry_point: str, timeout: float) -> Dict:
154 |     """
155 |     Evaluates the functional correctness of a completion by running the test
156 |     suite provided in the problem. 
157 |     """
158 | 
159 |     def unsafe_execute():
160 | 
161 |         with create_tempdir():
162 | 
163 |             # These system calls are needed when cleaning up tempdir.
164 |             import os
165 |             import shutil
166 |             rmtree = shutil.rmtree
167 |             rmdir = os.rmdir
168 |             chdir = os.chdir
169 | 
170 |             # Disable functionalities that can make destructive changes to the test.
171 |             reliability_guard()
172 | 
173 |             # Construct the check program and run it.
174 |             check_program = (
175 |                 prompt + completion + "\n" + test + "\n" + f'check({entry_point})'
176 |             )
177 | 
178 |             try:
179 |                 exec_globals = {}
180 |                 with swallow_io():
181 |                     with time_limit(timeout):
182 |                         exec(check_program, exec_globals)
183 |                 result.append("passed")
184 |             except TimeoutException:
185 |                 result.append("timed out")
186 |             except BaseException as e:
187 |                 result.append(f"failed: {e}")
188 | 
189 |             # Needed for cleaning up.
190 |             shutil.rmtree = rmtree
191 |             os.rmdir = rmdir
192 |             os.chdir = chdir
193 | 
194 |     manager = multiprocessing.Manager()
195 |     result = manager.list()
196 | 
197 |     p = multiprocessing.Process(target=unsafe_execute)
198 |     p.start()
199 |     p.join(timeout=timeout+1)
200 |     if p.is_alive():
201 |         p.kill()
202 | 
203 |     if not result:
204 |         result.append("timed out")
205 | 
206 |     return dict(
207 |         task_id=task_id,
208 |         passed=result[0] == "passed",
209 |         result=result[0],
210 |         completion=completion,
211 |     )
212 | 
213 | @contextlib.contextmanager
214 | def time_limit(seconds: float):
215 |     def signal_handler(signum, frame):
216 |         raise TimeoutException("Timed out!")
217 |     signal.setitimer(signal.ITIMER_REAL, seconds)
218 |     signal.signal(signal.SIGALRM, signal_handler)
219 |     try:
220 |         yield
221 |     finally:
222 |         signal.setitimer(signal.ITIMER_REAL, 0)
223 | 
224 | 
225 | @contextlib.contextmanager
226 | def swallow_io():
227 |     stream = WriteOnlyStringIO()
228 |     with contextlib.redirect_stdout(stream):
229 |         with contextlib.redirect_stderr(stream):
230 |             with redirect_stdin(stream):
231 |                 yield
232 | 
233 | 
234 | @contextlib.contextmanager
235 | def create_tempdir():
236 |     with tempfile.TemporaryDirectory() as dirname:
237 |         with chdir(dirname):
238 |             yield dirname
239 | 
240 | 
241 | class TimeoutException(Exception):
242 |     pass
243 | 
244 | 
245 | class WriteOnlyStringIO(io.StringIO):
246 |     """ StringIO that throws an exception when it's read from """
247 | 
248 |     def read(self, *args, **kwargs):
249 |         raise IOError
250 | 
251 |     def readline(self, *args, **kwargs):
252 |         raise IOError
253 | 
254 |     def readlines(self, *args, **kwargs):
255 |         raise IOError
256 | 
257 |     def readable(self, *args, **kwargs):
258 |         """ Returns True if the IO object can be read. """
259 |         return False
260 | 
261 | 
262 | class redirect_stdin(contextlib._RedirectStream):  # type: ignore
263 |     _stream = 'stdin'
264 | 
265 | 
266 | @contextlib.contextmanager
267 | def chdir(root):
268 |     if root == ".":
269 |         yield
270 |         return
271 |     cwd = os.getcwd()
272 |     os.chdir(root)
273 |     try:
274 |         yield
275 |     except BaseException as exc:
276 |         raise exc
277 |     finally:
278 |         os.chdir(cwd)
279 | 
280 | 
281 | def reliability_guard(maximum_memory_bytes: Optional[int] = None):
282 |     """
283 |     This disables various destructive functions and prevents the generated code
284 |     from interfering with the test (e.g. fork bomb, killing other processes,
285 |     removing filesystem files, etc.)
286 | 
287 |     WARNING
288 |     This function is NOT a security sandbox. Untrusted code, including, model-
289 |     generated code, should not be blindly executed outside of one. See the 
290 |     Codex paper for more information about OpenAI's code sandbox, and proceed
291 |     with caution.
292 |     """
293 | 
294 |     if maximum_memory_bytes is not None:
295 |         import resource
296 |         resource.setrlimit(resource.RLIMIT_AS, (maximum_memory_bytes, maximum_memory_bytes))
297 |         resource.setrlimit(resource.RLIMIT_DATA, (maximum_memory_bytes, maximum_memory_bytes))
298 |         if not platform.uname().system == 'Darwin':
299 |             resource.setrlimit(resource.RLIMIT_STACK, (maximum_memory_bytes, maximum_memory_bytes))
300 | 
301 |     faulthandler.disable()
302 | 
303 |     import builtins
304 |     builtins.exit = None
305 |     builtins.quit = None
306 | 
307 |     import os
308 |     os.environ['OMP_NUM_THREADS'] = '1'
309 | 
310 |     os.kill = None
311 |     os.system = None
312 |     os.putenv = None
313 |     os.remove = None
314 |     os.removedirs = None
315 |     os.rmdir = None
316 |     os.fchdir = None
317 |     os.setuid = None
318 |     os.fork = None
319 |     os.forkpty = None
320 |     os.killpg = None
321 |     os.rename = None
322 |     os.renames = None
323 |     os.truncate = None
324 |     os.replace = None
325 |     os.unlink = None
326 |     os.fchmod = None
327 |     os.fchown = None
328 |     os.chmod = None
329 |     os.chown = None
330 |     os.chroot = None
331 |     os.fchdir = None
332 |     os.lchflags = None
333 |     os.lchmod = None
334 |     os.lchown = None
335 |     os.getcwd = None
336 |     os.chdir = None
337 | 
338 |     import shutil
339 |     shutil.rmtree = None
340 |     shutil.move = None
341 |     shutil.chown = None
342 | 
343 |     import subprocess
344 |     subprocess.Popen = None  # type: ignore
345 | 
346 |     __builtins__['help'] = None
347 | 
348 |     import sys
349 |     sys.modules['ipdb'] = None
350 |     sys.modules['joblib'] = None
351 |     sys.modules['resource'] = None
352 |     sys.modules['psutil'] = None
353 |     sys.modules['tkinter'] = None
354 | 


--------------------------------------------------------------------------------
/tools/global_tool_orchestrator.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Practical tool orchestrator - for project mode
  3 | """
  4 | import os
  5 | import json
  6 | import re
  7 | from datetime import datetime
  8 | 
  9 | class GlobalToolOrchestrator:
 10 |     """Global tool orchestrator - manages and coordinates various development tools"""
 11 |     
 12 |     def __init__(self):
 13 |         self.tools = {}
 14 |         self.execution_log = []
 15 |         self.start_time = datetime.now()
 16 |     
 17 |     def execute(self, action, *args, **kwargs):
 18 |         """Execute specified tool operation"""
 19 |         log_entry = {
 20 |             "timestamp": datetime.now().isoformat(),
 21 |             "action": action,
 22 |             "args": args,
 23 |             "kwargs": kwargs
 24 |         }
 25 |         
 26 |         try:
 27 |             if action == "organize_files":
 28 |                 result = self._organize_files(kwargs.get("files", {}), kwargs.get("output_dir", ""))
 29 |             elif action == "backup_existing":
 30 |                 result = self._backup_existing(kwargs.get("output_dir", ""))
 31 |             elif action == "fetch_color_palette":
 32 |                 result = self._fetch_color_palette(kwargs.get("theme", "modern"))
 33 |             elif action == "get_web_fonts":
 34 |                 result = self._get_web_fonts(kwargs.get("font_name", "Open Sans"))
 35 |             elif action == "fetch_external_libraries":
 36 |                 result = self._fetch_external_libraries(kwargs.get("project_type", "web"))
 37 |             elif action == "automated_tester_check":
 38 |                 result = self._run_automated_tests(kwargs.get("files", {}))
 39 |             elif action == "quality_check":
 40 |                 result = self._run_quality_check(kwargs.get("files", {}), kwargs.get("project_type", "web"))
 41 |             else:
 42 |                 result = {"status": "success", "result": f"Tool action '{action}' executed"}
 43 |             
 44 |             log_entry["result"] = result
 45 |             log_entry["status"] = "success"
 46 |             
 47 |         except Exception as e:
 48 |             result = {"status": "error", "error": str(e)}
 49 |             log_entry["result"] = result
 50 |             log_entry["status"] = "error"
 51 |         
 52 |         self.execution_log.append(log_entry)
 53 |         return result
 54 |     
 55 |     def _organize_files(self, files, output_dir):
 56 |         """Organize file structure"""
 57 |         organized = {}
 58 |         for file_path, content in files.items():
 59 |             # Ensure file path normalization
 60 |             clean_path = file_path.replace("\\", "/").strip("/")
 61 |             organized[clean_path] = content
 62 |         
 63 |         return {
 64 |             "status": "success",
 65 |             "organized_files": len(organized),
 66 |             "structure": list(organized.keys())
 67 |         }
 68 |     
 69 |     def _backup_existing(self, output_dir):
 70 |         """Backup existing files"""
 71 |         if not os.path.exists(output_dir):
 72 |             return {"backup_created": False, "reason": "Output directory does not exist"}
 73 |         
 74 |         files = os.listdir(output_dir)
 75 |         if not files:
 76 |             return {"backup_created": False, "reason": "No files to backup"}
 77 |         
 78 |         backup_dir = f"{output_dir}_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
 79 |         # This is just simulation backup logic, real projects can implement actual backup
 80 |         return {
 81 |             "backup_created": True,
 82 |             "backup_path": backup_dir,
 83 |             "files_backed_up": len(files)
 84 |         }
 85 |     
 86 |     def _fetch_color_palette(self, theme):
 87 |         """Get color palette"""
 88 |         palettes = {
 89 |             "modern": {
 90 |                 "primary": "#3498db",
 91 |                 "secondary": "#2ecc71", 
 92 |                 "accent": "#e74c3c",
 93 |                 "background": "#f8f9fa",
 94 |                 "text": "#2c3e50"
 95 |             },
 96 |             "dark": {
 97 |                 "primary": "#0d1117",
 98 |                 "secondary": "#21262d",
 99 |                 "accent": "#58a6ff",
100 |                 "background": "#010409",
101 |                 "text": "#f0f6fc"
102 |             },
103 |             "minimal": {
104 |                 "primary": "#000000",
105 |                 "secondary": "#ffffff",
106 |                 "accent": "#6c757d",
107 |                 "background": "#f8f9fa",
108 |                 "text": "#212529"
109 |             }
110 |         }
111 |         
112 |         return {
113 |             "status": "success",
114 |             "theme": theme,
115 |             "palette": palettes.get(theme, palettes["modern"])
116 |         }
117 |     
118 |     def _get_web_fonts(self, font_name):
119 |         """Get web font information"""
120 |         fonts = {
121 |             "Open Sans": {
122 |                 "url": "https://fonts.googleapis.com/css2?family=Open+Sans:wght@300;400;600;700&display=swap",
123 |                 "family": "'Open Sans', sans-serif"
124 |             },
125 |             "Roboto": {
126 |                 "url": "https://fonts.googleapis.com/css2?family=Roboto:wght@300;400;500;700&display=swap",
127 |                 "family": "'Roboto', sans-serif"
128 |             },
129 |             "Inter": {
130 |                 "url": "https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap",
131 |                 "family": "'Inter', sans-serif"
132 |             }
133 |         }
134 |         
135 |         return {
136 |             "status": "success",
137 |             "font": fonts.get(font_name, fonts["Open Sans"])
138 |         }
139 |     
140 |     def _fetch_external_libraries(self, project_type):
141 |         """Get external library information"""
142 |         libraries = {
143 |             "web": [
144 |                 {"name": "Chart.js", "url": "https://cdn.jsdelivr.net/npm/chart.js", "type": "js"},
145 |                 {"name": "D3.js", "url": "https://d3js.org/d3.v7.min.js", "type": "js"},
146 |                 {"name": "Bootstrap", "url": "https://cdn.jsdelivr.net/npm/bootstrap@5.1.3/dist/css/bootstrap.min.css", "type": "css"}
147 |             ],
148 |             "data_analysis": [
149 |                 {"name": "Plotly", "url": "https://cdn.plot.ly/plotly-latest.min.js", "type": "js"},
150 |                 {"name": "NumJS", "url": "https://unpkg.com/numjs@latest/dist/numjs.min.js", "type": "js"}
151 |             ]
152 |         }
153 |         
154 |         return {
155 |             "status": "success",
156 |             "libraries": libraries.get(project_type, libraries["web"])
157 |         }
158 |     
159 |     def validate_code(self, code, files=None):
160 |         """Validate code quality"""
161 |         issues = []
162 |         warnings = []
163 |         suggestions = []
164 |         
165 |         if not code or len(code.strip()) < 10:
166 |             issues.append("Code is too short or empty")
167 |         
168 |         # Check HTML structure
169 |         if "<!DOCTYPE html>" in code:
170 |             if "<title>" not in code:
171 |                 warnings.append("Missing page title")
172 |             if 'lang="' not in code:
173 |                 suggestions.append("Consider adding language attribute to HTML tag")
174 |         
175 |         # Check CSS
176 |         if code.strip().startswith(".") or code.strip().startswith("#"):
177 |             if "color:" not in code and "background" not in code:
178 |                 suggestions.append("Consider adding color scheme to CSS")
179 |         
180 |         # Check JavaScript
181 |         if "function" in code or "const " in code or "let " in code:
182 |             if "console.log" in code:
183 |                 warnings.append("Remove console.log statements in production")
184 |         
185 |         return {
186 |             "is_valid": len(issues) == 0,
187 |             "errors": issues,
188 |             "warnings": warnings,
189 |             "suggestions": suggestions
190 |         }
191 |     
192 |     def organize_files(self, files):
193 |         """Organize file structure"""
194 |         return self._organize_files(files, "")
195 |     
196 |     def generate_report(self):
197 |         """Generate tool usage report"""
198 |         execution_time = (datetime.now() - self.start_time).total_seconds()
199 |         
200 |         successful_operations = len([log for log in self.execution_log if log.get("status") == "success"])
201 |         total_operations = len(self.execution_log)
202 |         success_rate = (successful_operations / total_operations * 100) if total_operations > 0 else 100
203 |         
204 |         return {
205 |             "tools_used": list(set([log.get("action", "unknown") for log in self.execution_log])),
206 |             "execution_time": f"{execution_time:.2f}s",
207 |             "success_rate": f"{success_rate:.1f}%",
208 |             "total_operations": total_operations,
209 |             "successful_operations": successful_operations,
210 |             "detailed_log": self.execution_log[-5:]  # Last 5 records
211 |         }
212 |     
213 |     def get_tool(self, tool_name):
214 |         """Get specific tool instance"""
215 |         # For compatibility, return self, because all tool functions are integrated here
216 |         return self
217 | 
218 |     def _run_automated_tests(self, files):
219 |         """Run automated tests"""
220 |         issues = []
221 |         test_results = {}
222 |         
223 |         for file_path, content in files.items():
224 |             file_issues = []
225 |             
226 |             # Basic file checks
227 |             if not content.strip():
228 |                 file_issues.append("File is empty")
229 |             
230 |             # HTML checks
231 |             if file_path.endswith('.html'):
232 |                 if '<!DOCTYPE html>' not in content.upper():
233 |                     file_issues.append("Missing DOCTYPE declaration")
234 |                 if '<html' not in content:
235 |                     file_issues.append("Missing html tag")
236 |                 if '<head>' not in content or '<body>' not in content:
237 |                     file_issues.append("Missing head or body tags")
238 |             
239 |             # CSS checks
240 |             elif file_path.endswith('.css'):
241 |                 open_braces = content.count('{')
242 |                 close_braces = content.count('}')
243 |                 if open_braces != close_braces:
244 |                     file_issues.append("Unbalanced CSS braces")
245 |             
246 |             # JavaScript checks
247 |             elif file_path.endswith('.js'):
248 |                 if content.count('(') != content.count(')'):
249 |                     file_issues.append("Unbalanced parentheses")
250 |                 if content.count('{') != content.count('}'):
251 |                     file_issues.append("Unbalanced braces")
252 |             
253 |             test_results[file_path] = {
254 |                 "passed": len(file_issues) == 0,
255 |                 "issues": file_issues
256 |             }
257 |             issues.extend(file_issues)
258 |         
259 |         return {
260 |             "status": "success",
261 |             "overall_passed": len(issues) == 0,
262 |             "total_issues": len(issues),
263 |             "file_results": test_results,
264 |             "summary": f"Tested {len(files)} files, found {len(issues)} issues"
265 |         }
266 |     
267 |     def _run_quality_check(self, files, project_type):
268 |         """Run quality check"""
269 |         quality_score = 0
270 |         total_checks = 0
271 |         quality_details = {}
272 |         
273 |         for file_path, content in files.items():
274 |             file_score = 0
275 |             file_checks = 0
276 |             
277 |             # Basic quality checks
278 |             if content.strip():
279 |                 file_score += 1
280 |             file_checks += 1
281 |             
282 |             # Code length check
283 |             if len(content) > 100:  # Basic content check
284 |                 file_score += 1
285 |             file_checks += 1
286 |             
287 |             # File-specific checks
288 |             if file_path.endswith('.html'):
289 |                 if 'class=' in content or 'id=' in content:
290 |                     file_score += 1  # Has CSS selectors
291 |                 file_checks += 1
292 |                 
293 |                 if '<title>' in content:
294 |                     file_score += 1  # Has title
295 |                 file_checks += 1
296 |                 
297 |             elif file_path.endswith('.css'):
298 |                 if ':' in content and '{' in content:
299 |                     file_score += 1  # Has CSS rules
300 |                 file_checks += 1
301 |                 
302 |                 if 'color:' in content or 'background:' in content:
303 |                     file_score += 1  # Has style definitions
304 |                 file_checks += 1
305 |             
306 |             quality_details[file_path] = {
307 |                 "score": file_score,
308 |                 "max_score": file_checks,
309 |                 "percentage": (file_score / file_checks * 100) if file_checks > 0 else 0
310 |             }
311 |             
312 |             quality_score += file_score
313 |             total_checks += file_checks
314 |         
315 |         overall_percentage = (quality_score / total_checks * 100) if total_checks > 0 else 0
316 |         
317 |         return {
318 |             "status": "success",
319 |             "overall_score": quality_score,
320 |             "max_score": total_checks,
321 |             "percentage": overall_percentage,
322 |             "grade": "A" if overall_percentage >= 90 else "B" if overall_percentage >= 80 else "C" if overall_percentage >= 70 else "D",
323 |             "file_details": quality_details,
324 |             "summary": f"Quality score: {quality_score}/{total_checks} ({overall_percentage:.1f}%)"
325 |         }
326 | 


--------------------------------------------------------------------------------
/project_session.py:
--------------------------------------------------------------------------------
  1 | from roles.analyst import Analyst
  2 | from roles.coder import Coder
  3 | from roles.tester import Tester
  4 | from roles.project_architect import ProjectArchitect
  5 | from roles.project_developer import ProjectDeveloper
  6 | from roles.project_tester import ProjectTester
  7 | from roles.ui_designer import UIDesigner
  8 | from utils import find_method_name, construct_system_message
  9 | from tools import global_tool_orchestrator, CodeAnalyzer, FileManager, QualityChecker, APIIntegrationTool, AutomatedTester
 10 | import time
 11 | import os
 12 | import json
 13 | import re
 14 | 
 15 | 
 16 | class ProjectSession(object):
 17 |     def __init__(self, team_description, architect_description, developer_description, 
 18 |                  tester_description, ui_designer_description, requirement, project_type='web_visualization',
 19 |                  model='gpt-3.5-turbo', majority=1, max_tokens=1024,
 20 |                  temperature=0.2, top_p=0.95, max_round=3, output_dir='generated_project'):
 21 | 
 22 |         self.session_history = {}
 23 |         self.max_round = max_round
 24 |         self.requirement = requirement
 25 |         self.project_type = project_type
 26 |         self.output_dir = output_dir
 27 |         self.project_files = {}
 28 |         
 29 |         # Intelligently adjust max_tokens to avoid context length issues
 30 |         self.base_max_tokens = max_tokens
 31 |         self.current_max_tokens = max_tokens
 32 |         self.model = model
 33 |         
 34 |         # Dynamically adjust tokens based on model and rounds
 35 |         model_limits = {
 36 |             'gpt-3.5-turbo': 16385,
 37 |             'gpt-4': 16385,
 38 |         }
 39 |         self.model_limit = model_limits.get(model, 16385)
 40 |         
 41 |         # Initial token allocation: reserve space for multi-round iteration
 42 |         if max_round > 1:
 43 |             # Use smaller tokens for multi-round iteration, reserve space for history
 44 |             adjusted_max_tokens = min(max_tokens, self.model_limit // (max_round + 1))
 45 |         else:
 46 |             adjusted_max_tokens = max_tokens
 47 |         
 48 |         print(f"🔧 Token management: Model={model}, Limit={self.model_limit}, Base={max_tokens}, Adjusted={adjusted_max_tokens}")
 49 |         
 50 |         # Initialize tools
 51 |         self.tool_orchestrator = global_tool_orchestrator
 52 |         self.code_analyzer = CodeAnalyzer()
 53 |         self.file_manager = FileManager()
 54 |         self.quality_checker = QualityChecker()
 55 |         self.api_tool = APIIntegrationTool()
 56 |         self.automated_tester = AutomatedTester()
 57 |         
 58 |         # Initialize project roles with adjusted tokens
 59 |         self.architect = ProjectArchitect(team_description, architect_description, requirement, 
 60 |                                         project_type, model, majority, adjusted_max_tokens, temperature, top_p)
 61 |         self.developer = ProjectDeveloper(team_description, developer_description, requirement,
 62 |                                         project_type, model, majority, adjusted_max_tokens, temperature, top_p)
 63 |         self.tester = ProjectTester(team_description, tester_description, requirement,
 64 |                                   project_type, model, majority, adjusted_max_tokens, temperature, top_p)
 65 |         self.ui_designer = UIDesigner(team_description, ui_designer_description, requirement,
 66 |                                     project_type, model, majority, adjusted_max_tokens, temperature, top_p)
 67 |     
 68 |     def _adjust_tokens_for_round(self, round_num):
 69 |         """Dynamically adjust token allocation based on round number"""
 70 |         # As rounds increase, reduce token allocation to reserve space for conversation history
 71 |         reduction_factor = 1.0 #- (round_num * 0.1)  # Reduce 10% per round
 72 |         new_max_tokens = max(int(self.base_max_tokens * reduction_factor), 512)  # Minimum 512 tokens
 73 |         
 74 |         if new_max_tokens != self.current_max_tokens:
 75 |             print(f"🔄 Round {round_num + 1}: Adjusting max_tokens from {self.current_max_tokens} to {new_max_tokens}")
 76 |             self.current_max_tokens = new_max_tokens
 77 |             
 78 |             # Update token settings for all roles
 79 |             for role in [self.architect, self.developer, self.tester, self.ui_designer]:
 80 |                 if hasattr(role, 'max_tokens'):
 81 |                     role.max_tokens = new_max_tokens
 82 |         
 83 |         return new_max_tokens
 84 |     
 85 |     def run_project_session(self):
 86 |         """Run the complete project generation session with integrated tools"""
 87 |         
 88 |         print("🔧 Phase 0: Initializing resources...")
 89 |         # Fetch external resources based on project type
 90 |         if self.project_type == 'web_visualization':
 91 |             external_resources = self.api_tool.execute(
 92 |                 "fetch_cdn_libraries", 
 93 |                 libraries=["chart.js", "d3.js", "bootstrap", "jquery"]
 94 |             )
 95 |             self.session_history["external_resources"] = external_resources
 96 |             print(f"✅ Fetched external resources: {len(external_resources.get('libraries', []))} libraries")
 97 |         
 98 |         # Phase 1: Planning and Architecture Design
 99 |         print("🏗️ Phase 1: Creating planning and architecture design...")
100 |         architecture_plan = self.architect.design_architecture()
101 |         self.session_history["architecture"] = architecture_plan
102 |         
103 |         if architecture_plan == "error":
104 |             raise RuntimeError("Architecture design failed")
105 |         
106 |         # Use code analyzer to analyze architecture plan
107 |         if isinstance(architecture_plan, str):
108 |             arch_analysis = self.code_analyzer.execute(
109 |                 "analyze",
110 |                 code=architecture_plan,
111 |                 project_type=self.project_type
112 |             )
113 |             print(f"📊 Architecture analysis: {arch_analysis.get('summary', 'Analysis completed')}")
114 |         
115 |         # Phase 2: UI Design (for web projects) 
116 |         ui_design = None
117 |         if self.project_type in ['web_visualization', 'desktop_app']:
118 |             print("🎨 Phase 2: Creating UI design...")
119 |             ui_design = self.ui_designer.design_ui(architecture_plan)
120 |             self.session_history["ui_design"] = ui_design
121 |             
122 |             # Fetch color palette and fonts for better design
123 |             if ui_design != "error":
124 |                 design_resources = self.api_tool.execute("fetch_color_palette", theme="modern")
125 |                 fonts = self.api_tool.execute("get_web_fonts", font_name="Open Sans")
126 |                 self.session_history["design_resources"] = {
127 |                     "colors": design_resources,
128 |                     "fonts": fonts
129 |                 }
130 |                 print("🎨 Enhanced UI design with external resources")
131 |         
132 |         # Phase 3: Development with iterative improvement
133 |         print("⚡ Phase 3: Implementing project iteratively...")
134 |         for round_num in range(self.max_round):
135 |             print(f"🔄 Development round {round_num + 1}/{self.max_round}")
136 |             
137 |             # Dynamically adjust token allocation
138 |             self._adjust_tokens_for_round(round_num)
139 |             
140 |             # Development with file management
141 |             try:
142 |                 project_files = self.developer.implement_project(architecture_plan, ui_design, 
143 |                                                                self.project_files, round_num == 0)
144 |             except Exception as e:
145 |                 error_str = str(e)
146 |                 if "context_length_exceeded" in error_str or "maximum context length" in error_str:
147 |                     print(f"⚠️ Context length exceeded in round {round_num + 1}, reducing complexity...")
148 |                     # Clear developer's message history
149 |                     if hasattr(self.developer, 'itf') and hasattr(self.developer.itf, 'clear_history'):
150 |                         self.developer.itf.clear_history()
151 |                     # Retry once with fewer tokens
152 |                     self.current_max_tokens = max(self.current_max_tokens // 2, 256)
153 |                     self.developer.max_tokens = self.current_max_tokens
154 |                     try:
155 |                         project_files = self.developer.implement_project(architecture_plan, ui_design, 
156 |                                                                        self.project_files, round_num == 0)
157 |                     except Exception as e2:
158 |                         print(f"❌ Failed after retry: {e2}")
159 |                         if round_num == 0:
160 |                             raise RuntimeError("Initial development failed")
161 |                         else:
162 |                             project_files = self.project_files
163 |                             break
164 |                 else:
165 |                     raise e
166 |             
167 |             if project_files == "error":
168 |                 if round_num == 0:
169 |                     raise RuntimeError("Initial development failed")
170 |                 else:
171 |                     # Use files from previous round
172 |                     project_files = self.project_files
173 |                     break
174 |             
175 |             # Use file manager to organize and validate files
176 |             file_validation = self.file_manager.execute(
177 |                 "validate_structure",
178 |                 files=project_files,
179 |                 project_type=self.project_type
180 |             )
181 |             print(f"📁 File structure validation: {file_validation.get('status', 'completed')}")
182 |             
183 |             # Save generated files with backup
184 |             self._save_project_files_with_tools(project_files)
185 |             self.project_files = project_files
186 |             
187 |             # Quality checking with enhanced tools
188 |             quality_report = self.quality_checker.execute(
189 |                 "comprehensive_check",
190 |                 files=project_files,
191 |                 project_type=self.project_type
192 |             )
193 |             print(f"🔍 Quality check score: {quality_report.get('overall_score', 'N/A')}")
194 |             
195 |             # Testing and feedback with automated tools (except last round)
196 |             if round_num < self.max_round - 1:
197 |                 print("🧪 Testing with automated tools...")
198 |                 
199 |                 # Traditional testing
200 |                 test_report = self.tester.test_project(project_files, architecture_plan)
201 |                 
202 |                 # Enhanced automated testing
203 |                 automated_test_results = self.automated_tester.execute(
204 |                     "full_suite",
205 |                     files=project_files,
206 |                     project_type=self.project_type
207 |                 )
208 |                 
209 |                 combined_test_report = {
210 |                     "traditional_tests": test_report,
211 |                     "automated_tests": automated_test_results,
212 |                     "quality_metrics": quality_report
213 |                 }
214 |                 
215 |                 self.session_history[f'round_{round_num}'] = {
216 |                     "files": list(project_files.keys()),
217 |                     "test_report": combined_test_report,
218 |                     "tool_reports": {
219 |                         "file_validation": file_validation,
220 |                         "quality_check": quality_report
221 |                     }
222 |                 }
223 |                 
224 |                 if test_report == "error":
225 |                     print("⚠️ Testing failed, continuing with current implementation")
226 |                     break
227 |                 
228 |                 # Enhanced success criteria
229 |                 traditional_passed = ("all tests passed" in test_report.lower() or 
230 |                                     "no issues found" in test_report.lower())
231 |                 automated_passed = automated_test_results.get("overall_status") == "passed"
232 |                 quality_good = quality_report.get("overall_score", 0) >= 7.0
233 |                 
234 |                 if traditional_passed and automated_passed and quality_good:
235 |                     print("✅ All tests passed with high quality! Project completed successfully.")
236 |                     break
237 |                 
238 |                 # Provide enhanced feedback combining all reports
239 |                 enhanced_feedback = self._generate_enhanced_feedback(
240 |                     test_report, automated_test_results, quality_report
241 |                 )
242 |                 self.developer.receive_feedback(enhanced_feedback)
243 |             
244 |             # Adjust tokens for next round
245 |             self._adjust_tokens_for_round(round_num)
246 |         
247 |         # Final tool report generation
248 |         print("📊 Generating final tool usage report...")
249 |         tool_usage_report = self.tool_orchestrator.generate_report()
250 |         self.session_history["tool_usage_report"] = tool_usage_report
251 |         
252 |         # Clean up interfaces
253 |         self.architect.itf.clear_history()
254 |         self.developer.itf.clear_history() 
255 |         self.tester.itf.clear_history()
256 |         if hasattr(self.ui_designer, 'itf'):
257 |             self.ui_designer.itf.clear_history()
258 | 
259 |         return self.project_files, self.session_history
260 |     
261 |     def _save_project_files(self, project_files):
262 |         """Save generated project files to disk"""
263 |         for file_path, content in project_files.items():
264 |             full_path = os.path.join(self.output_dir, file_path)
265 |             
266 |             # Create directory if it doesn't exist
267 |             dir_path = os.path.dirname(full_path)
268 |             if dir_path:
269 |                 os.makedirs(dir_path, exist_ok=True)
270 |             
271 |             # Write file content
272 |             try:
273 |                 with open(full_path, 'w', encoding='utf-8') as f:
274 |                     f.write(content)
275 |                 print(f"Saved: {file_path}")
276 |             except Exception as e:
277 |                 print(f"Error saving {file_path}: {e}")
278 |     
279 |     def _save_project_files_with_tools(self, project_files):
280 |         """Save project files with tool assistance and backup"""
281 |         # Use file manager to organize files before saving
282 |         organization_result = self.file_manager.execute(
283 |             "organize_files",
284 |             files=project_files,
285 |             output_dir=self.output_dir
286 |         )
287 |         
288 |         # Create backup if files already exist
289 |         backup_result = self.file_manager.execute(
290 |             "backup_existing",
291 |             output_dir=self.output_dir
292 |         )
293 |         
294 |         # Save files with the original method
295 |         self._save_project_files(project_files)
296 |         
297 |         print(f"📁 File organization: {organization_result.get('status', 'completed')}")
298 |         if backup_result.get('backup_created'):
299 |             print(f"💾 Backup created: {backup_result.get('backup_path', 'N/A')}")
300 |     
301 |     def _generate_enhanced_feedback(self, traditional_report, automated_report, quality_report):
302 |         """Generate simple, actionable feedback for the developer"""
303 |         
304 |         # Start with a simple structure
305 |         issues = []
306 |         
307 |         # Check for test failures
308 |         if automated_report.get("issues"):
309 |             issues.extend(automated_report["issues"])
310 |         
311 |         # Traditional test issues
312 |         if traditional_report and "error" in traditional_report.lower():
313 |             issues.append("Fix syntax errors and runtime issues")
314 |         
315 |         # Generate simple feedback
316 |         if not issues:
317 |             return "✅ Good! Continue with current implementation approach."
318 |         
319 |         # Create actionable feedback
320 |         feedback = "Please fix these issues:\n"
321 |         for i, issue in enumerate(issues[:3], 1):  # Limit to 3 most important issues
322 |             feedback += f"{i}. {issue}\n"
323 |         
324 |         feedback += "\nFocus on fixing issues for better results."
325 |         return feedback
326 | 
327 | 
328 | class FunctionSession(object):
329 |     """Original function-level session for backwards compatibility"""
330 |     def __init__(self, TEAM, ANALYST, PYTHON_DEVELOPER, TESTER, requirement, model='gpt-3.5-turbo', 
331 |                  majority=1, max_tokens=512, temperature=0.0, top_p=0.95, max_round=4, before_func=''):
332 | 
333 |         self.session_history = {}
334 |         self.max_round = max_round
335 |         self.before_func = before_func
336 |         self.requirement = requirement
337 |         self.analyst = Analyst(TEAM, ANALYST, requirement, model, majority, max_tokens, temperature, top_p)
338 |         self.coder = Coder(TEAM, PYTHON_DEVELOPER, requirement, model, majority, max_tokens, temperature, top_p)
339 |         self.tester = Tester(TEAM, TESTER, requirement, model, majority, max_tokens, temperature, top_p)
340 |     
341 |     def run_session(self):
342 |         # ... (keep original implementation from session.py)
343 |         from session import Session
344 |         original_session = Session(None, None, None, None, self.requirement, 
345 |                                  model=self.analyst.model, majority=self.analyst.majority,
346 |                                  max_tokens=self.analyst.max_tokens, temperature=self.analyst.temperature,
347 |                                  top_p=self.analyst.top_p, max_round=self.max_round, 
348 |                                  before_func=self.before_func)
349 |         original_session.analyst = self.analyst
350 |         original_session.coder = self.coder  
351 |         original_session.tester = self.tester
352 |         return original_session.run_session()
353 | 


--------------------------------------------------------------------------------
/roles/project_architect.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import copy
  3 | import json
  4 | import time
  5 | 
  6 | from core import interface
  7 | from utils import construct_system_message
  8 | from roles.enhanced_role import EnhancedRole
  9 | 
 10 | 
 11 | class ProjectArchitect(EnhancedRole):
 12 |     def __init__(self, team_description, architect_description, requirement, project_type,
 13 |                  model='gpt-3.5-turbo', majority=1, max_tokens=1024, temperature=0.2, top_p=0.95):
 14 |         # First call enhanced base class initialization
 15 |         super().__init__()
 16 |         
 17 |         self.model = model
 18 |         self.majority = majority
 19 |         self.max_tokens = max_tokens
 20 |         self.temperature = temperature
 21 |         self.top_p = top_p
 22 |         self.history_message = []
 23 |         self.requirement = requirement
 24 |         self.project_type = project_type
 25 | 
 26 |         self.itf = interface.ProgramInterface(
 27 |             stop='',
 28 |             verbose=False,
 29 |             model=self.model,
 30 |         )
 31 | 
 32 |         system_message = construct_system_message(requirement, architect_description, team_description)
 33 |         self.history_message_append(system_message)
 34 |     
 35 |     def _adapt_to_project_type(self, project_type):
 36 |         """Adjust architect behavior based on project type"""
 37 |         super()._adapt_to_project_type(project_type)
 38 |         
 39 |         # Architect-specific adaptation logic
 40 |         adaptation_strategies = {
 41 |             "web_frontend": self._adapt_to_frontend_architecture,
 42 |             "web_backend": self._adapt_to_backend_architecture,
 43 |             "data_science": self._adapt_to_data_science_architecture,
 44 |             "mobile": self._adapt_to_mobile_architecture,
 45 |             "desktop": self._adapt_to_desktop_architecture,
 46 |             "fullstack": self._adapt_to_fullstack_architecture
 47 |         }
 48 |         
 49 |         strategy = adaptation_strategies.get(project_type, self._adapt_to_generic_architecture)
 50 |         strategy()
 51 |     
 52 |     def _adapt_to_frontend_architecture(self):
 53 |         """Adapt to frontend architecture design"""
 54 |         print("🎨 ProjectArchitect: Focusing on component-based frontend architecture")
 55 |     
 56 |     def _adapt_to_backend_architecture(self):
 57 |         """Adapt to backend architecture design"""
 58 |         print("⚙️ ProjectArchitect: Focusing on API and service architecture")
 59 |     
 60 |     def _adapt_to_data_science_architecture(self):
 61 |         """Adapt to data science architecture design"""
 62 |         print("📊 ProjectArchitect: Focusing on data pipeline and model architecture")
 63 |     
 64 |     def _adapt_to_mobile_architecture(self):
 65 |         """Adapt to mobile architecture design"""
 66 |         print("📱 ProjectArchitect: Focusing on mobile app architecture")
 67 |     
 68 |     def _adapt_to_desktop_architecture(self):
 69 |         """Adapt to desktop architecture design"""
 70 |         print("🖥️ ProjectArchitect: Focusing on desktop application architecture")
 71 |     
 72 |     def _adapt_to_fullstack_architecture(self):
 73 |         """Adapt to fullstack architecture design"""
 74 |         print("🌐 ProjectArchitect: Focusing on end-to-end system architecture")
 75 |     
 76 |     def _adapt_to_generic_architecture(self):
 77 |         """Adapt to generic architecture design"""
 78 |         print("🔧 ProjectArchitect: Using generic architecture patterns")
 79 | 
 80 |     def design_system_architecture(self, requirements=None):
 81 |         """Design system architecture - using enhanced quality gate process"""
 82 |         if requirements is None:
 83 |             requirements = {"description": self.requirement, "project_type": self.project_type}
 84 |         
 85 |         return self.execute_with_quality_gate(
 86 |             "architecture_design",
 87 |             self._design_architecture_internal,
 88 |             requirements
 89 |         )
 90 |     
 91 |     def _design_architecture_internal(self, requirements):
 92 |         """Internal architecture design logic"""
 93 |         # First determine project specification (if not already done)
 94 |         if not self.project_specification:
 95 |             spec_coordinator = self.get_tool("ProjectSpecificationCoordinator")
 96 |             project_spec_result = spec_coordinator.execute(
 97 |                 requirements.get("project_type", self.project_type),
 98 |                 requirements
 99 |             )
100 |             self.set_project_specification(project_spec_result["project_specification"])
101 |         
102 |         # Design architecture based on project specification
103 |         architecture = self._design_architecture_for_type(
104 |             self.project_specification["type"],
105 |             requirements,
106 |             self.project_specification
107 |         )
108 |         
109 |         return {
110 |             "architecture": architecture,
111 |             "project_specification": self.project_specification,
112 |             "design_rationale": self._generate_design_rationale(architecture),
113 |             "implementation_guidance": self._generate_implementation_guidance(architecture)
114 |         }
115 |     
116 |     def _design_architecture_for_type(self, project_type, requirements, project_spec):
117 |         """Design architecture based on project type"""
118 |         architecture_designers = {
119 |             "web_frontend": self._design_frontend_architecture,
120 |             "web_backend": self._design_backend_architecture,
121 |             "data_science": self._design_data_science_architecture,
122 |             "mobile": self._design_mobile_architecture,
123 |             "desktop": self._design_desktop_architecture,
124 |             "fullstack": self._design_fullstack_architecture
125 |         }
126 |         
127 |         designer = architecture_designers.get(project_type, self._design_generic_architecture)
128 |         return designer(requirements, project_spec)
129 |     
130 |     def _design_frontend_architecture(self, requirements, project_spec):
131 |         """Design frontend architecture"""
132 |         tech_stack = project_spec["technology_stack"]
133 |         
134 |         return {
135 |             "type": "frontend",
136 |             "components": [
137 |                 {"name": "App", "responsibility": "Main application component", "type": "container"},
138 |                 {"name": "Header", "responsibility": "Navigation and branding", "type": "presentational"},
139 |                 {"name": "Sidebar", "responsibility": "Secondary navigation", "type": "presentational"},
140 |                 {"name": "MainContent", "responsibility": "Primary content display", "type": "container"},
141 |                 {"name": "Footer", "responsibility": "Footer information", "type": "presentational"}
142 |             ],
143 |             "state_management": {
144 |                 "pattern": "Component State" if "React" in tech_stack.get("frameworks", []) else "Global State",
145 |                 "tools": tech_stack.get("state_management", ["Context API"])
146 |             },
147 |             "routing": {
148 |                 "type": "client-side",
149 |                 "routes": [
150 |                     {"path": "/", "component": "Home", "description": "Landing page"},
151 |                     {"path": "/dashboard", "component": "Dashboard", "description": "Main dashboard"},
152 |                     {"path": "/settings", "component": "Settings", "description": "User settings"}
153 |                 ]
154 |             },
155 |             "data_flow": "Component Props -> State -> UI Updates",
156 |             "file_structure": {
157 |                 "src/": "Source code directory",
158 |                 "src/components/": "React/Vue components",
159 |                 "src/styles/": "CSS/SCSS files", 
160 |                 "src/utils/": "Utility functions",
161 |                 "public/": "Static assets"
162 |             }
163 |         }
164 |     
165 |     def _design_backend_architecture(self, requirements, project_spec):
166 |         """Design backend architecture"""
167 |         tech_stack = project_spec["technology_stack"]
168 |         
169 |         return {
170 |             "type": "backend",
171 |             "services": [
172 |                 {"name": "UserService", "responsibility": "User management", "endpoints": ["/users", "/auth"]},
173 |                 {"name": "DataService", "responsibility": "Data operations", "endpoints": ["/data", "/analytics"]},
174 |                 {"name": "NotificationService", "responsibility": "Notifications", "endpoints": ["/notifications"]}
175 |             ],
176 |             "api": {
177 |                 "style": "REST",
178 |                 "version": "v1",
179 |                 "endpoints": [
180 |                     {"method": "GET", "path": "/api/v1/users", "description": "Get all users"},
181 |                     {"method": "POST", "path": "/api/v1/users", "description": "Create user"},
182 |                     {"method": "GET", "path": "/api/v1/data", "description": "Get data"}
183 |                 ]
184 |             },
185 |             "data_model": {
186 |                 "entities": [
187 |                     {"name": "User", "fields": ["id", "name", "email", "created_at"]},
188 |                     {"name": "DataRecord", "fields": ["id", "user_id", "data", "timestamp"]}
189 |                 ]
190 |             },
191 |             "middleware": ["Authentication", "CORS", "Rate Limiting", "Logging"],
192 |             "file_structure": {
193 |                 "app/": "Main application code",
194 |                 "app/models/": "Data models",
195 |                 "app/services/": "Business logic services",
196 |                 "app/controllers/": "Request handlers",
197 |                 "config/": "Configuration files"
198 |             }
199 |         }
200 |     
201 |     def _design_data_science_architecture(self, requirements, project_spec):
202 |         """Design data science architecture"""
203 |         return {
204 |             "type": "data_science",
205 |             "pipeline": [
206 |                 {"stage": "Data Ingestion", "tools": ["pandas", "requests"], "responsibility": "Collect raw data"},
207 |                 {"stage": "Data Cleaning", "tools": ["pandas", "numpy"], "responsibility": "Clean and validate data"},
208 |                 {"stage": "Feature Engineering", "tools": ["scikit-learn"], "responsibility": "Create features"},
209 |                 {"stage": "Model Training", "tools": ["scikit-learn", "tensorflow"], "responsibility": "Train models"},
210 |                 {"stage": "Model Evaluation", "tools": ["scikit-learn", "matplotlib"], "responsibility": "Evaluate performance"},
211 |                 {"stage": "Deployment", "tools": ["flask", "docker"], "responsibility": "Deploy model"}
212 |             ],
213 |             "data_flow": "Raw Data -> Cleaned Data -> Features -> Model -> Predictions",
214 |             "file_structure": {
215 |                 "data/": "Data files (raw, processed)",
216 |                 "notebooks/": "Jupyter notebooks for exploration", 
217 |                 "src/": "Python modules",
218 |                 "models/": "Trained model files",
219 |                 "config/": "Configuration files"
220 |             }
221 |         }
222 |     
223 |     def _design_mobile_architecture(self, requirements, project_spec):
224 |         """Design mobile architecture"""
225 |         return {
226 |             "type": "mobile",
227 |             "navigation": {
228 |                 "type": "Stack Navigation",
229 |                 "screens": [
230 |                     {"name": "Home", "description": "Main screen"},
231 |                     {"name": "Profile", "description": "User profile"},
232 |                     {"name": "Settings", "description": "App settings"}
233 |                 ]
234 |             },
235 |             "components": [
236 |                 {"name": "AppNavigator", "responsibility": "Navigation container"},
237 |                 {"name": "HomeScreen", "responsibility": "Main application screen"},
238 |                 {"name": "CustomButton", "responsibility": "Reusable button component"}
239 |             ],
240 |             "file_structure": {
241 |                 "src/screens/": "Screen components",
242 |                 "src/components/": "Reusable components",
243 |                 "src/navigation/": "Navigation configuration",
244 |                 "src/services/": "API and data services"
245 |             }
246 |         }
247 |     
248 |     def _design_desktop_architecture(self, requirements, project_spec):
249 |         """Design desktop architecture"""
250 |         return {
251 |             "type": "desktop",
252 |             "windows": [
253 |                 {"name": "MainWindow", "description": "Primary application window"},
254 |                 {"name": "SettingsDialog", "description": "Settings configuration"},
255 |                 {"name": "AboutDialog", "description": "About information"}
256 |             ],
257 |             "components": [
258 |                 {"name": "MenuBar", "responsibility": "Application menu"},
259 |                 {"name": "StatusBar", "responsibility": "Status information"},
260 |                 {"name": "ContentArea", "responsibility": "Main content display"}
261 |             ],
262 |             "file_structure": {
263 |                 "src/": "Source code",
264 |                 "resources/": "Images, icons, etc.",
265 |                 "config/": "Configuration files"
266 |             }
267 |         }
268 |     
269 |     def _design_fullstack_architecture(self, requirements, project_spec):
270 |         """Design fullstack architecture"""
271 |         frontend_arch = self._design_frontend_architecture(requirements, project_spec)
272 |         backend_arch = self._design_backend_architecture(requirements, project_spec)
273 |         
274 |         return {
275 |             "type": "fullstack",
276 |             "frontend": frontend_arch,
277 |             "backend": backend_arch,
278 |             "integration": {
279 |                 "api_communication": "REST API calls from frontend to backend",
280 |                 "authentication": "JWT tokens",
281 |                 "data_synchronization": "Real-time updates via WebSocket"
282 |             },
283 |             "deployment": {
284 |                 "frontend": "Static hosting (Vercel, Netlify)",
285 |                 "backend": "Container deployment (Docker + K8s)",
286 |                 "database": "Managed database service"
287 |             }
288 |         }
289 |     
290 |     def _design_generic_architecture(self, requirements, project_spec):
291 |         """Design generic architecture"""
292 |         return {
293 |             "type": "generic",
294 |             "components": [
295 |                 {"name": "Core", "responsibility": "Main application logic"},
296 |                 {"name": "Interface", "responsibility": "User interface"},
297 |                 {"name": "Data", "responsibility": "Data management"}
298 |             ],
299 |             "patterns": ["MVC", "Observer", "Factory"],
300 |             "file_structure": {
301 |                 "src/": "Source code",
302 |                 "docs/": "Documentation",
303 |                 "tests/": "Test files"
304 |             }
305 |         }
306 |     
307 |     def _generate_design_rationale(self, architecture):
308 |         """Generate design rationale"""
309 |         return {
310 |             "architectural_decisions": [
311 |                 f"Chose {architecture.get('type', 'generic')} architecture for project requirements",
312 |                 "Component-based design for maintainability",
313 |                 "Clear separation of concerns",
314 |                 "Scalable file structure"
315 |             ],
316 |             "trade_offs": [
317 |                 "Flexibility vs. Simplicity",
318 |                 "Performance vs. Maintainability",
319 |                 "Development Speed vs. Code Quality"
320 |             ]
321 |         }
322 |     
323 |     def _generate_implementation_guidance(self, architecture):
324 |         """Generate implementation guidance"""
325 |         return {
326 |             "development_phases": [
327 |                 "1. Set up project structure",
328 |                 "2. Implement core components",
329 |                 "3. Add data layer",
330 |                 "4. Implement user interface",
331 |                 "5. Add testing",
332 |                 "6. Performance optimization"
333 |             ],
334 |             "best_practices": [
335 |                 "Follow naming conventions",
336 |                 "Write clean, readable code",
337 |                 "Include comprehensive tests",
338 |                 "Document API endpoints",
339 |                 "Use version control effectively"
340 |             ]
341 |         }
342 | 
343 |     def design_architecture(self):
344 |         """Design the overall project architecture"""
345 |         architecture_prompt = f"""
346 |         Please design a complete project architecture for a {self.project_type} project.
347 |         
348 |         Project Requirements: {self.requirement}
349 |         
350 |         Provide a detailed JSON response with the following structure:
351 |         {{
352 |             "project_structure": {{
353 |                 "files": [
354 |                     {{"path": "index.html", "description": "Main HTML file", "priority": 1}},
355 |                     {{"path": "css/style.css", "description": "Main stylesheet", "priority": 2}},
356 |                     {{"path": "js/main.js", "description": "Main JavaScript file", "priority": 2}}
357 |                 ]
358 |             }},
359 |             "technology_stack": {{
360 |                 "frontend": ["HTML5", "CSS3", "JavaScript"],
361 |                 "visualization": ["Chart.js", "D3.js"],
362 |                 "styling": ["Bootstrap", "CSS Grid", "Flexbox"]
363 |             }},
364 |             "implementation_phases": [
365 |                 {{"phase": 1, "description": "Create basic HTML structure and layout"}},
366 |                 {{"phase": 2, "description": "Implement styling and responsive design"}},
367 |                 {{"phase": 3, "description": "Add interactive visualizations and functionality"}}
368 |             ],
369 |             "component_interactions": [
370 |                 {{"source": "main.js", "target": "index.html", "description": "Dynamic content updates"}},
371 |                 {{"source": "style.css", "target": "index.html", "description": "Visual styling"}}
372 |             ]
373 |         }}
374 |         
375 |         For web visualization projects, prioritize creating interactive charts, beautiful UI, and responsive design.
376 |         """
377 |         
378 |         self.history_message_append(architecture_prompt)
379 |         
380 |         try:
381 |             responses = self.itf.run(prompt=self.history_message, majority_at=self.majority, 
382 |                                    max_tokens=self.max_tokens, temperature=self.temperature, top_p=self.top_p)
383 |         except Exception as e:
384 |             print(f"Architecture design failed: {e}")
385 |             time.sleep(5)
386 |             return "error"
387 | 
388 |         architecture = responses[0]
389 |         self.history_message_append(architecture, "assistant")
390 |         
391 |         return architecture
392 |     
393 |     def history_message_append(self, message, role="user"):
394 |         self.history_message.append({
395 |             "role": role,
396 |             "content": message
397 |         })
398 | 


--------------------------------------------------------------------------------
/tools/enhanced_tools.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Enhanced Tool System for Project-Level Code Generation
  3 | Provides powerful tool invocation capabilities to enhance project code generation quality and functionality
  4 | """
  5 | 
  6 | import os
  7 | import re
  8 | import json
  9 | import subprocess
 10 | import requests
 11 | import ast
 12 | import time
 13 | from typing import Dict, List, Any, Optional, Tuple
 14 | from abc import ABC, abstractmethod
 15 | from pathlib import Path
 16 | 
 17 | 
 18 | class BaseTool(ABC):
 19 |     """Base tool class"""
 20 |     
 21 |     def __init__(self, name: str, description: str):
 22 |         self.name = name
 23 |         self.description = description
 24 |         self.usage_count = 0
 25 |         self.last_used = None
 26 |     
 27 |     @abstractmethod
 28 |     def execute(self, *args, **kwargs) -> Dict[str, Any]:
 29 |         """Execute tool function"""
 30 |         pass
 31 |     
 32 |     def log_usage(self):
 33 |         """Log tool usage"""
 34 |         self.usage_count += 1
 35 |         self.last_used = time.time()
 36 |     
 37 |     def get_info(self) -> Dict[str, Any]:
 38 |         """Get tool information"""
 39 |         return {
 40 |             "name": self.name,
 41 |             "description": self.description,
 42 |             "usage_count": self.usage_count,
 43 |             "last_used": self.last_used
 44 |         }
 45 | 
 46 | 
 47 | class CodeAnalyzer(BaseTool):
 48 |     """Code analysis tool"""
 49 |     
 50 |     def __init__(self):
 51 |         super().__init__(
 52 |             "code_analyzer",
 53 |             "Analyze code quality, complexity and potential issues"
 54 |         )
 55 |     
 56 |     def execute(self, code: str, language: str = "javascript") -> Dict[str, Any]:
 57 |         """Analyze code quality"""
 58 |         self.log_usage()
 59 |         
 60 |         result = {
 61 |             "language": language,
 62 |             "metrics": {},
 63 |             "issues": [],
 64 |             "suggestions": []
 65 |         }
 66 |         
 67 |         if language.lower() in ["javascript", "js"]:
 68 |             result.update(self._analyze_javascript(code))
 69 |         elif language.lower() in ["python", "py"]:
 70 |             result.update(self._analyze_python(code))
 71 |         elif language.lower() in ["html"]:
 72 |             result.update(self._analyze_html(code))
 73 |         elif language.lower() in ["css"]:
 74 |             result.update(self._analyze_css(code))
 75 |         
 76 |         return result
 77 |     
 78 |     def _analyze_javascript(self, code: str) -> Dict[str, Any]:
 79 |         """Analyze JavaScript code"""
 80 |         metrics = {
 81 |             "lines_of_code": len(code.splitlines()),
 82 |             "functions_count": len(re.findall(r'function\s+\w+|=>\s*{|\w+\s*:\s*function', code)),
 83 |             "classes_count": len(re.findall(r'class\s+\w+', code)),
 84 |             "complexity_score": self._calculate_complexity(code)
 85 |         }
 86 |         
 87 |         issues = []
 88 |         suggestions = []
 89 |         
 90 |         # Check common issues
 91 |         if 'var ' in code:
 92 |             issues.append({
 93 |                 "type": "style",
 94 |                 "message": "Recommend using 'let' or 'const' instead of 'var'",
 95 |                 "severity": "warning"
 96 |             })
 97 |             suggestions.append("Use modern ES6+ syntax, replace var with let/const")
 98 |         
 99 |         if '==' in code and '===' not in code:
100 |             issues.append({
101 |                 "type": "quality",
102 |                 "message": "Recommend using strict equality '===' instead of '=='",
103 |                 "severity": "warning"
104 |             })
105 |         
106 |         if 'console.log' in code:
107 |             issues.append({
108 |                 "type": "production",
109 |                 "message": "Should remove console.log in production code",
110 |                 "severity": "info"
111 |             })
112 |         
113 |         # Check for modern features usage
114 |         modern_features = ['async', 'await', '=>', 'const', 'let', 'destructuring']
115 |         used_features = [f for f in modern_features if f in code]
116 |         if used_features:
117 |             suggestions.append(f"Uses modern JavaScript features: {', '.join(used_features)}")
118 |         
119 |         return {
120 |             "metrics": metrics,
121 |             "issues": issues,
122 |             "suggestions": suggestions
123 |         }
124 |     
125 |     def _analyze_python(self, code: str) -> Dict[str, Any]:
126 |         """Analyze Python code"""
127 |         try:
128 |             tree = ast.parse(code)
129 |             
130 |             metrics = {
131 |                 "lines_of_code": len(code.splitlines()),
132 |                 "functions_count": len([n for n in ast.walk(tree) if isinstance(n, ast.FunctionDef)]),
133 |                 "classes_count": len([n for n in ast.walk(tree) if isinstance(n, ast.ClassDef)]),
134 |                 "imports_count": len([n for n in ast.walk(tree) if isinstance(n, (ast.Import, ast.ImportFrom))])
135 |             }
136 |             
137 |             return {
138 |                 "metrics": metrics,
139 |                 "issues": [],
140 |                 "suggestions": ["Python code syntax is correct"]
141 |             }
142 |         except SyntaxError as e:
143 |             return {
144 |                 "metrics": {"lines_of_code": len(code.splitlines())},
145 |                 "issues": [{
146 |                     "type": "syntax",
147 |                     "message": f"Syntax error: {str(e)}",
148 |                     "severity": "error"
149 |                 }],
150 |                 "suggestions": ["Fix syntax errors"]
151 |             }
152 |     
153 |     def _analyze_html(self, code: str) -> Dict[str, Any]:
154 |         """Analyze HTML code"""
155 |         metrics = {
156 |             "lines_of_code": len(code.splitlines()),
157 |             "elements_count": len(re.findall(r'<\w+', code)),
158 |             "semantic_elements": len(re.findall(r'<(header|nav|main|section|article|aside|footer)', code)),
159 |             "images_count": len(re.findall(r'<img', code))
160 |         }
161 |         
162 |         issues = []
163 |         suggestions = []
164 |         
165 |         # Check semantics
166 |         if metrics["semantic_elements"] > 0:
167 |             suggestions.append("Uses HTML5 semantic elements")
168 |         else:
169 |             issues.append({
170 |                 "type": "accessibility",
171 |                 "message": "Recommend using HTML5 semantic elements",
172 |                 "severity": "warning"
173 |             })
174 |         
175 |         # Check accessibility
176 |         if 'alt=' not in code and '<img' in code:
177 |             issues.append({
178 |                 "type": "accessibility",
179 |                 "message": "Images missing alt attributes",
180 |                 "severity": "warning"
181 |             })
182 |         
183 |         if 'aria-' in code:
184 |             suggestions.append("Uses ARIA attributes for better accessibility")
185 |         
186 |         return {
187 |             "metrics": metrics,
188 |             "issues": issues,
189 |             "suggestions": suggestions
190 |         }
191 |     
192 |     def _analyze_css(self, code: str) -> Dict[str, Any]:
193 |         """Analyze CSS code"""
194 |         metrics = {
195 |             "lines_of_code": len(code.splitlines()),
196 |             "rules_count": len(re.findall(r'{[^}]*}', code)),
197 |             "custom_properties": len(re.findall(r'--[\w-]+:', code)),
198 |             "media_queries": len(re.findall(r'@media', code))
199 |         }
200 |         
201 |         issues = []
202 |         suggestions = []
203 |         
204 |         # Check modern CSS features
205 |         if metrics["custom_properties"] > 0:
206 |             suggestions.append("Uses CSS custom properties (variables)")
207 |         
208 |         if 'grid' in code or 'flexbox' in code:
209 |             suggestions.append("Uses modern CSS layout (Grid/Flexbox)")
210 |         
211 |         if metrics["media_queries"] > 0:
212 |             suggestions.append("Implements responsive design")
213 |         
214 |         return {
215 |             "metrics": metrics,
216 |             "issues": issues,
217 |             "suggestions": suggestions
218 |         }
219 |     
220 |     def _calculate_complexity(self, code: str) -> int:
221 |         """Calculate code complexity"""
222 |         complexity = 1  # Base complexity
223 |         
224 |         # Add complexity for decision points
225 |         decision_points = ['if', 'else', 'while', 'for', 'case', 'catch', '&&', '||', '?']
226 |         for point in decision_points:
227 |             complexity += len(re.findall(rf'\b{point}\b', code))
228 |         
229 |         return complexity
230 | 
231 | 
232 | class FileManager(BaseTool):
233 |     """File system management tool"""
234 |     
235 |     def __init__(self):
236 |         super().__init__(
237 |             "file_manager",
238 |             "Manage project file structure and operations"
239 |         )
240 |     
241 |     def execute(self, action: str, **kwargs) -> Dict[str, Any]:
242 |         """Execute file operations"""
243 |         self.log_usage()
244 |         
245 |         try:
246 |             if action == "create_structure":
247 |                 return self._create_project_structure(kwargs.get("structure", {}))
248 |             elif action == "validate_structure":
249 |                 return self._validate_structure(kwargs.get("path", "."))
250 |             elif action == "optimize_structure":
251 |                 return self._optimize_structure(kwargs.get("files", {}))
252 |             elif action == "generate_tree":
253 |                 return self._generate_directory_tree(kwargs.get("path", "."))
254 |             else:
255 |                 return {"error": f"Unknown operation: {action}"}
256 |         except Exception as e:
257 |             return {"error": str(e)}
258 |     
259 |     def _create_project_structure(self, structure: Dict[str, str]) -> Dict[str, Any]:
260 |         """Create project file structure"""
261 |         created_dirs = []
262 |         created_files = []
263 |         
264 |         for path, content in structure.items():
265 |             full_path = Path(path)
266 |             
267 |             # Create directories
268 |             if full_path.suffix == "":  # Directory
269 |                 full_path.mkdir(parents=True, exist_ok=True)
270 |                 created_dirs.append(str(full_path))
271 |             else:  # File
272 |                 full_path.parent.mkdir(parents=True, exist_ok=True)
273 |                 with open(full_path, 'w', encoding='utf-8') as f:
274 |                     f.write(content if content else "")
275 |                 created_files.append(str(full_path))
276 |         
277 |         return {
278 |             "success": True,
279 |             "created_directories": created_dirs,
280 |             "created_files": created_files
281 |         }
282 |     
283 |     def _validate_structure(self, path: str) -> Dict[str, Any]:
284 |         """Validate project structure"""
285 |         path_obj = Path(path)
286 |         if not path_obj.exists():
287 |             return {"error": "Path does not exist"}
288 |         
289 |         structure_info = {
290 |             "is_valid_project": False,
291 |             "has_index_html": False,
292 |             "has_css_folder": False,
293 |             "has_js_folder": False,
294 |             "has_manifest": False,
295 |             "suggestions": []
296 |         }
297 |         
298 |         # Check key files and directories
299 |         files_and_dirs = list(path_obj.iterdir())
300 |         file_names = [f.name for f in files_and_dirs]
301 |         
302 |         structure_info["has_index_html"] = "index.html" in file_names
303 |         structure_info["has_css_folder"] = any(f.name == "css" and f.is_dir() for f in files_and_dirs)
304 |         structure_info["has_js_folder"] = any(f.name == "js" and f.is_dir() for f in files_and_dirs)
305 |         structure_info["has_manifest"] = "manifest.json" in file_names
306 |         
307 |         # Generate suggestions
308 |         if not structure_info["has_index_html"]:
309 |             structure_info["suggestions"].append("Recommend adding index.html as the main page")
310 |         
311 |         if not structure_info["has_css_folder"]:
312 |             structure_info["suggestions"].append("Recommend creating css folder to organize style files")
313 |         
314 |         if not structure_info["has_js_folder"]:
315 |             structure_info["suggestions"].append("Recommend creating js folder to organize script files")
316 |         
317 |         if not structure_info["has_manifest"]:
318 |             structure_info["suggestions"].append("Recommend adding manifest.json for PWA support")
319 |         
320 |         structure_info["is_valid_project"] = all([
321 |             structure_info["has_index_html"],
322 |             structure_info["has_css_folder"],
323 |             structure_info["has_js_folder"]
324 |         ])
325 |         
326 |         return structure_info
327 |     
328 |     def _optimize_structure(self, files: Dict[str, str]) -> Dict[str, Any]:
329 |         """Optimize file structure"""
330 |         optimized_structure = {}
331 |         recommendations = []
332 |         
333 |         # Reorganize by file type
334 |         for file_path, content in files.items():
335 |             path_obj = Path(file_path)
336 |             extension = path_obj.suffix.lower()
337 |             
338 |             if extension == '.html':
339 |                 if path_obj.name == 'index.html':
340 |                     optimized_structure['index.html'] = content
341 |                 else:
342 |                     optimized_structure[f'pages/{path_obj.name}'] = content
343 |             elif extension == '.css':
344 |                 optimized_structure[f'css/{path_obj.name}'] = content
345 |             elif extension in ['.js', '.ts']:
346 |                 optimized_structure[f'js/{path_obj.name}'] = content
347 |             elif extension in ['.png', '.jpg', '.jpeg', '.svg', '.ico']:
348 |                 optimized_structure[f'assets/images/{path_obj.name}'] = content
349 |             elif extension == '.json':
350 |                 if 'manifest' in path_obj.name:
351 |                     optimized_structure['manifest.json'] = content
352 |                 else:
353 |                     optimized_structure[f'data/{path_obj.name}'] = content
354 |             else:
355 |                 optimized_structure[file_path] = content
356 |         
357 |         # Generate optimization suggestions
358 |         if len([f for f in files.keys() if f.endswith('.css')]) > 3:
359 |             recommendations.append("Consider splitting CSS files further into main.css, components.css, utilities.css")
360 |         
361 |         if len([f for f in files.keys() if f.endswith('.js')]) > 5:
362 |             recommendations.append("Consider using modular JavaScript, split files by functionality")
363 |         
364 |         return {
365 |             "optimized_structure": optimized_structure,
366 |             "recommendations": recommendations,
367 |             "original_files_count": len(files),
368 |             "optimized_files_count": len(optimized_structure)
369 |         }
370 |     
371 |     def _generate_directory_tree(self, path: str) -> Dict[str, Any]:
372 |         """Generate directory tree"""
373 |         def build_tree(dir_path: Path, prefix: str = "") -> List[str]:
374 |             tree_lines = []
375 |             if not dir_path.is_dir():
376 |                 return tree_lines
377 |             
378 |             items = sorted(dir_path.iterdir(), key=lambda x: (not x.is_dir(), x.name))
379 |             
380 |             for i, item in enumerate(items):
381 |                 is_last = i == len(items) - 1
382 |                 current_prefix = "└── " if is_last else "├── "
383 |                 tree_lines.append(f"{prefix}{current_prefix}{item.name}")
384 |                 
385 |                 if item.is_dir():
386 |                     extension_prefix = "    " if is_last else "│   "
387 |                     tree_lines.extend(build_tree(item, prefix + extension_prefix))
388 |             
389 |             return tree_lines
390 |         
391 |         path_obj = Path(path)
392 |         if not path_obj.exists():
393 |             return {"error": "Path does not exist"}
394 |         
395 |         tree = build_tree(path_obj)
396 |         
397 |         return {
398 |             "tree": tree,
399 |             "tree_string": "\n".join([path_obj.name + "/"] + tree)
400 |         }
401 | 
402 | 
403 | class QualityChecker(BaseTool):
404 |     """Code quality checking tool"""
405 |     
406 |     def __init__(self):
407 |         super().__init__(
408 |             "quality_checker",
409 |             "Check code quality, security and best practices"
410 |         )
411 |     
412 |     def execute(self, files: Dict[str, str], project_type: str = "web_visualization") -> Dict[str, Any]:
413 |         """Execute quality check"""
414 |         self.log_usage()
415 |         
416 |         results = {
417 |             "overall_score": 0,
418 |             "file_scores": {},
419 |             "security_issues": [],
420 |             "performance_issues": [],
421 |             "best_practices": [],
422 |             "accessibility_score": 0,
423 |             "recommendations": []
424 |         }
425 |         
426 |         total_score = 0
427 |         file_count = 0
428 |         
429 |         for file_path, content in files.items():
430 |             file_result = self._check_file_quality(file_path, content)
431 |             results["file_scores"][file_path] = file_result
432 |             total_score += file_result.get("score", 0)
433 |             file_count += 1
434 |             
435 |             # Collect various issues
436 |             results["security_issues"].extend(file_result.get("security_issues", []))
437 |             results["performance_issues"].extend(file_result.get("performance_issues", []))
438 |             results["best_practices"].extend(file_result.get("best_practices", []))
439 |         
440 |         # Calculate overall score
441 |         results["overall_score"] = total_score / file_count if file_count > 0 else 0
442 |         
443 |         # Calculate accessibility score
444 |         results["accessibility_score"] = self._calculate_accessibility_score(files)
445 |         
446 |         # Generate recommendations
447 |         results["recommendations"] = self._generate_quality_recommendations(results)
448 |         
449 |         return results
450 |     
451 |     def _check_file_quality(self, file_path: str, content: str) -> Dict[str, Any]:
452 |         """Check individual file quality"""
453 |         path_obj = Path(file_path)
454 |         extension = path_obj.suffix.lower()
455 |         
456 |         if extension == '.html':
457 |             return self._check_html_quality(content)
458 |         elif extension == '.css':
459 |             return self._check_css_quality(content)
460 |         elif extension in ['.js', '.ts']:
461 |             return self._check_js_quality(content)
462 |         else:
463 |             return {"score": 70, "issues": [], "suggestions": []}
464 |     
465 |     def _check_html_quality(self, content: str) -> Dict[str, Any]:
466 |         """Check HTML quality"""
467 |         score = 100
468 |         issues = []
469 |         best_practices = []
470 |         security_issues = []
471 |         performance_issues = []
472 |         
473 |         # Check DOCTYPE
474 |         if '<!DOCTYPE html>' not in content:
475 |             score -= 10
476 |             issues.append("Missing HTML5 DOCTYPE declaration")
477 |         else:
478 |             best_practices.append("Uses HTML5 DOCTYPE")
479 |         
480 |         # Check language declaration
481 |         if 'lang=' not in content:
482 |             score -= 5
483 |             issues.append("Missing language declaration (lang attribute)")
484 |         
485 |         # Check meta viewport
486 |         if 'viewport' not in content:
487 |             score -= 10
488 |             issues.append("Missing viewport meta tag")
489 |         
490 |         # Check semantics
491 |         semantic_elements = ['header', 'nav', 'main', 'section', 'article', 'aside', 'footer']
492 |         used_semantic = [elem for elem in semantic_elements if f'<{elem}' in content]
493 |         if len(used_semantic) >= 3:
494 |             best_practices.append("Uses semantic HTML5 elements")
495 |         elif len(used_semantic) > 0:
496 |             best_practices.append(f"Uses some semantic elements: {', '.join(used_semantic)}")
497 |         else:
498 |             score -= 15
499 |             issues.append("Recommend using semantic HTML5 elements")
500 |         
501 |         # Check accessibility
502 |         if 'aria-' in content:
503 |             best_practices.append("Uses ARIA attributes")
504 |             score += 5
505 |         
506 |         if 'alt=' in content:
507 |             best_practices.append("Images include alt attributes")
508 |         elif '<img' in content:
509 |             score -= 10
510 |             issues.append("Images missing alt attributes")
511 |         
512 |         # Check security
513 |         if 'javascript:' in content:
514 |             security_issues.append("Avoid using javascript: protocol")
515 |             score -= 20
516 |         
517 |         # Check performance
518 |         if 'preload' in content or 'preconnect' in content:
519 |             performance_issues.append("Uses resource preloading optimization")
520 |             score += 5
521 |         
522 |         return {
523 |             "score": max(0, score),
524 |             "issues": issues,
525 |             "best_practices": best_practices,
526 |             "security_issues": security_issues,
527 |             "performance_issues": performance_issues
528 |         }
529 |     
530 |     def _check_css_quality(self, content: str) -> Dict[str, Any]:
531 |         """Check CSS quality"""
532 |         score = 100
533 |         issues = []
534 |         best_practices = []
535 |         performance_issues = []
536 |         
537 |         # Check CSS custom properties
538 |         if '--' in content and ':root' in content:
539 |             best_practices.append("Uses CSS custom properties (variables)")
540 |             score += 10
541 |         
542 |         # Check modern layout
543 |         if 'display: grid' in content or 'display: flex' in content:
544 |             best_practices.append("Uses modern CSS layout")
545 |             score += 5
546 |         
547 |         # Check responsive design
548 |         if '@media' in content:
549 |             best_practices.append("Implements responsive design")
550 |             score += 10
551 |         
552 |         # Check performance issues
553 |         if '*' in content and 'box-sizing' in content:
554 |             performance_issues.append("Uses universal selector for box-sizing reset")
555 |         
556 |         # Check maintainability
557 |         if len(content.splitlines()) > 500:
558 |             issues.append("CSS file too long, recommend splitting into multiple files")
559 |             score -= 10
560 |         
561 |         return {
562 |             "score": max(0, score),
563 |             "issues": issues,
564 |             "best_practices": best_practices,
565 |             "performance_issues": performance_issues
566 |         }
567 |     
568 |     def _check_js_quality(self, content: str) -> Dict[str, Any]:
569 |         """Check JavaScript quality"""
570 |         score = 100
571 |         issues = []
572 |         best_practices = []
573 |         security_issues = []
574 |         performance_issues = []
575 |         
576 |         # Check modern JavaScript features
577 |         modern_features = ['const ', 'let ', '=>', 'async ', 'await ', '...']
578 |         used_features = [f.strip() for f in modern_features if f in content]
579 |         if len(used_features) >= 3:
580 |             best_practices.append(f"Uses modern ES6+ features: {', '.join(used_features)}")
581 |             score += 10
582 |         
583 |         # Check bad practices
584 |         if 'var ' in content:
585 |             issues.append("Recommend using let/const instead of var")
586 |             score -= 5
587 |         
588 |         if 'eval(' in content:
589 |             security_issues.append("Avoid using eval() function")
590 |             score -= 20
591 |         
592 |         if '==' in content and '===' not in content:
593 |             issues.append("Recommend using strict equality operator (===)")
594 |             score -= 5
595 |         
596 |         # Check error handling
597 |         if 'try' in content and 'catch' in content:
598 |             best_practices.append("Implements error handling")
599 |             score += 5
600 |         
601 |         # Check performance
602 |         if 'addEventListener' in content:
603 |             best_practices.append("Uses event listeners")
604 |         
605 |         if 'querySelector' in content:
606 |             best_practices.append("Uses modern DOM query methods")
607 |         
608 |         return {
609 |             "score": max(0, score),
610 |             "issues": issues,
611 |             "best_practices": best_practices,
612 |             "security_issues": security_issues,
613 |             "performance_issues": performance_issues
614 |         }
615 |     
616 |     def _calculate_accessibility_score(self, files: Dict[str, str]) -> int:
617 |         """Calculate accessibility score"""
618 |         score = 0
619 |         total_checks = 0
620 |         
621 |         for file_path, content in files.items():
622 |             if file_path.endswith('.html'):
623 |                 total_checks += 10
624 |                 
625 |                 # ARIA attributes
626 |                 if 'aria-' in content:
627 |                     score += 2
628 |                 
629 |                 # Semantic elements
630 |                 semantic_count = len(re.findall(r'<(header|nav|main|section|article|aside|footer)', content))
631 |                 if semantic_count >= 3:
632 |                     score += 2
633 |                 
634 |                 # Image alt attributes
635 |                 if '<img' in content:
636 |                     if 'alt=' in content:
637 |                         score += 1
638 |                 else:
639 |                     score += 1  # No images also passes
640 |                 
641 |                 # Form labels
642 |                 if '<input' in content:
643 |                     if '<label' in content:
644 |                         score += 1
645 |                 else:
646 |                     score += 1
647 |                 
648 |                 # Skip links
649 |                 if 'skip' in content.lower():
650 |                     score += 1
651 |                 
652 |                 # Role attributes
653 |                 if 'role=' in content:
654 |                     score += 1
655 |                 
656 |                 # Color contrast indication
657 |                 if 'color:' in content and '#' in content:
658 |                     score += 1  # Simplified check
659 |                 
660 |                 # Keyboard navigation
661 |                 if 'tabindex' in content:
662 |                     score += 1
663 |         
664 |         return int((score / max(total_checks, 1)) * 100) if total_checks > 0 else 80
665 |     
666 |     def _generate_quality_recommendations(self, results: Dict[str, Any]) -> List[str]:
667 |         """Generate quality improvement recommendations"""
668 |         recommendations = []
669 |         
670 |         overall_score = results["overall_score"]
671 |         
672 |         if overall_score < 70:
673 |             recommendations.append("Overall code quality needs improvement, focus on basic standards")
674 |         elif overall_score < 85:
675 |             recommendations.append("Code quality is good, can further optimize performance and accessibility")
676 |         else:
677 |             recommendations.append("Code quality is excellent, continue maintaining best practices")
678 |         
679 |         # Security recommendations
680 |         if results["security_issues"]:
681 |             recommendations.append("Fix discovered security issues to ensure application security")
682 |         
683 |         # Performance recommendations
684 |         if results["performance_issues"]:
685 |             recommendations.append("Optimize performance-related issues to improve user experience")
686 |         
687 |         # Accessibility recommendations
688 |         accessibility_score = results["accessibility_score"]
689 |         if accessibility_score < 70:
690 |             recommendations.append("Significantly improve accessibility, add ARIA attributes and semantic tags")
691 |         elif accessibility_score < 90:
692 |             recommendations.append("Further improve accessibility features")
693 |         else:
694 |             recommendations.append("Excellent accessibility performance")
695 |         
696 |         return recommendations
697 | 
698 | 
699 | class ToolOrchestrator:
700 |     """Tool orchestrator - manage and coordinate all tool usage"""
701 |     
702 |     def __init__(self):
703 |         self.tools = {
704 |             "code_analyzer": CodeAnalyzer(),
705 |             "file_manager": FileManager(),
706 |             "quality_checker": QualityChecker()
707 |         }
708 |         self.execution_history = []
709 |     
710 |     def get_available_tools(self) -> Dict[str, str]:
711 |         """Get available tools list"""
712 |         return {name: tool.description for name, tool in self.tools.items()}
713 |     
714 |     def get_tool(self, tool_name: str):
715 |         """Get specified tool instance"""
716 |         return self.tools.get(tool_name)
717 |     
718 |     def execute_tool(self, tool_name: str, *args, **kwargs) -> Dict[str, Any]:
719 |         """Execute specified tool"""
720 |         if tool_name not in self.tools:
721 |             return {"error": f"Tool {tool_name} does not exist"}
722 |         
723 |         try:
724 |             result = self.tools[tool_name].execute(*args, **kwargs)
725 |             
726 |             # Record execution history
727 |             self.execution_history.append({
728 |                 "tool": tool_name,
729 |                 "timestamp": time.time(),
730 |                 "args": args,
731 |                 "kwargs": kwargs,
732 |                 "success": "error" not in result
733 |             })
734 |             
735 |             return result
736 |         except Exception as e:
737 |             error_result = {"error": f"Tool execution failed: {str(e)}"}
738 |             self.execution_history.append({
739 |                 "tool": tool_name,
740 |                 "timestamp": time.time(),
741 |                 "args": args,
742 |                 "kwargs": kwargs,
743 |                 "success": False,
744 |                 "error": str(e)
745 |             })
746 |             return error_result
747 |     
748 |     def get_tool_usage_stats(self) -> Dict[str, Any]:
749 |         """Get tool usage statistics"""
750 |         stats = {}
751 |         for name, tool in self.tools.items():
752 |             stats[name] = tool.get_info()
753 |         
754 |         return {
755 |             "tools": stats,
756 |             "total_executions": len(self.execution_history),
757 |             "recent_executions": self.execution_history[-10:]  # Last 10 executions
758 |         }
759 |     
760 |     def recommend_tools(self, context: str) -> List[str]:
761 |         """Recommend suitable tools based on context"""
762 |         recommendations = []
763 |         
764 |         context_lower = context.lower()
765 |         
766 |         if any(keyword in context_lower for keyword in ["analyze", "check", "quality", "complexity"]):
767 |             recommendations.append("code_analyzer")
768 |         
769 |         if any(keyword in context_lower for keyword in ["file", "structure", "directory", "organize"]):
770 |             recommendations.append("file_manager")
771 |         
772 |         if any(keyword in context_lower for keyword in ["quality", "security", "performance", "best practices"]):
773 |             recommendations.append("quality_checker")
774 |         
775 |         return recommendations if recommendations else ["code_analyzer", "file_manager", "quality_checker"]
776 |     
777 |     def generate_report(self) -> Dict[str, Any]:
778 |         """Generate tool usage report"""
779 |         report = {
780 |             "timestamp": time.time(),
781 |             "tools_stats": self.get_tool_usage_stats(),
782 |             "execution_summary": {
783 |                 "total_executions": len(self.execution_history),
784 |                 "successful_executions": len([h for h in self.execution_history if h["success"]]),
785 |                 "failed_executions": len([h for h in self.execution_history if not h["success"]]),
786 |             },
787 |             "most_used_tools": self._get_most_used_tools(),
788 |             "recommendations": self._generate_usage_recommendations()
789 |         }
790 |         return report
791 |     
792 |     def _get_most_used_tools(self) -> List[Tuple[str, int]]:
793 |         """Get most used tools"""
794 |         usage_count = {}
795 |         for tool_name, tool in self.tools.items():
796 |             usage_count[tool_name] = tool.usage_count
797 |         return sorted(usage_count.items(), key=lambda x: x[1], reverse=True)
798 |     
799 |     def _generate_usage_recommendations(self) -> List[str]:
800 |         """Generate usage recommendations"""
801 |         recommendations = []
802 |         total_executions = len(self.execution_history)
803 |         
804 |         if total_executions > 0:
805 |             successful_rate = len([h for h in self.execution_history if h["success"]]) / total_executions
806 |             if successful_rate < 0.8:
807 |                 recommendations.append("Tool execution success rate is low, recommend checking input parameters")
808 |             else:
809 |                 recommendations.append("Tool usage is effective, continue maintaining")
810 |         
811 |         most_used = self._get_most_used_tools()
812 |         if most_used and most_used[0][1] > total_executions * 0.6:
813 |             recommendations.append(f"Over-reliance on {most_used[0][0]} tool, recommend balanced use of other tools")
814 |         
815 |         return recommendations
816 | 
817 | 
818 | # Global tool orchestrator instance
819 | global_tool_orchestrator = ToolOrchestrator()


--------------------------------------------------------------------------------