├── README.md ├── extractFullReport.py ├── extract_embedding.py ├── main.py ├── main_functions.py └── requirements.txt /README.md: -------------------------------------------------------------------------------- 1 | # Automatic Documentation Generator for Python Projects 2 | 3 | ##Overview 4 | 5 | This project is an innovative automated documentation tool that uses static code analysis and artificial intelligence to generate comprehensive technical documents for Python projects. 6 | 7 | ##Main Features 8 | 9 | ### Advanced Code Analysis 10 | - Full scan of Python projects 11 | - Specific extraction of code structure 12 | - Identification of classes, functions, imports and doctrines 13 | - Calculation of code complexity 14 | 15 | ### AI-powered Documentation Generation 16 | - Use of advanced language models (Ollama/LLaMA) 17 | - Automatic generation of: 18 | - Project overview 19 | - Summaries of individual files 20 | - Description of interactions between modules 21 | 22 | ### Multiple Output Formats 23 | - JSON documentation for programmatic processing 24 | - Markdown file for human reading 25 | - HTML page for viewing in the browser 26 | 27 | ## Technologies Used 28 | 29 | - **Code Analysis**: Python AST (Abstract Syntax Tree) 30 | - **AI**: Ollama with LLaMA model 31 | - **Processing**: 32 | - Colorama (colored logs) 33 | - tqdm (progress bars) 34 | - markdown (document conversion) 35 | 36 | ## Benefits 37 | 38 | 1. **Time Saving**: Automatic documentation in minutes 39 | 2. **Consistency**: Standardized documentation 40 | 3. **Completeness**: Deep code analysis 41 | 4. **Flexibility**: Adaptable to different project structures 42 | 43 | ## Use Cases 44 | 45 | - Academic project documentation 46 | - Developer onboarding 47 | - Legacy code maintenance 48 | - Software architecture analysis 49 | 50 | ## Next Steps 51 | 52 | - Support for more programming languages 53 | - Integration with CI/CD tools 54 | - Refinement of AI models 55 | - Advanced documentation customization 56 | 57 | ## Contribution 58 | 59 | Contributions are welcome! Please review the contribution guidelines before submitting pull requests. 60 | 61 | ## License 62 | 63 | MIT License 64 | -------------------------------------------------------------------------------- /extractFullReport.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ast 3 | import json 4 | import markdown 5 | import time 6 | from typing import List, Dict 7 | from openai import OpenAI 8 | import colorama 9 | from tqdm import tqdm 10 | 11 | # Initialize colorama for terminal colors 12 | colorama.init(autoreset=True) 13 | 14 | # Ollama client configuration 15 | client = OpenAI( 16 | base_url='http://localhost:11434/v1', 17 | api_key='ollama' 18 | ) 19 | 20 | def log_info(message): 21 | """Prints informative messages in blue""" 22 | print(f"{colorama.Fore.CYAN}[INFO] {message}{colorama.Fore.RESET}") 23 | 24 | def log_warning(message): 25 | """Prints warning messages in yellow""" 26 | print(f"{colorama.Fore.YELLOW}[WARN] {message}{colorama.Fore.RESET}") 27 | 28 | def log_error(message): 29 | """Prints error messages in red""" 30 | print(f"{colorama.Fore.RED}[ERROR] {message}{colorama.Fore.RESET}") 31 | 32 | def log_success(message): 33 | """Prints success messages in green""" 34 | print(f"{colorama.Fore.GREEN}[SUCCESS] {message}{colorama.Fore.RESET}") 35 | 36 | def analyze_file(file_path: str) -> Dict: 37 | """Analyzes an individual Python file""" 38 | try: 39 | log_info(f"Analyzing file: {file_path}") 40 | start_time = time.time() 41 | 42 | with open(file_path, "r", encoding="utf-8") as f: 43 | code = f.read() 44 | tree = ast.parse(code) 45 | file_info = { 46 | "file": file_path, 47 | "classes": [], 48 | "functions": [], 49 | "imports": [], 50 | "docstrings": [], 51 | "complexity": 0 52 | } 53 | 54 | # Detailed AST analysis 55 | for node in ast.iter_child_nodes(tree): 56 | if isinstance(node, ast.ClassDef): 57 | file_info["classes"].append({ 58 | "name": node.name, 59 | "methods": [method.name for method in node.body if isinstance(method, ast.FunctionDef)] 60 | }) 61 | elif isinstance(node, ast.FunctionDef): 62 | file_info["functions"].append({ 63 | "name": node.name, 64 | "args": [arg.arg for arg in node.args.args], 65 | "complexity": len(list(ast.walk(node))) 66 | }) 67 | elif isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom): 68 | file_info["imports"].append(ast.unparse(node)) 69 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): 70 | file_info["docstrings"].append(node.value.s) 71 | 72 | end_time = time.time() 73 | log_success(f"File analysis completed in {end_time - start_time:.2f} seconds") 74 | return file_info 75 | except Exception as e: 76 | log_error(f"Error analyzing {file_path}: {e}") 77 | return {} 78 | 79 | def collect_python_files(directory: str) -> List[str]: 80 | """Collects all Python files in a directory""" 81 | log_info(f"Collecting Python files in: {directory}") 82 | python_files = [] 83 | total_files = 0 84 | for root, _, files in os.walk(directory): 85 | for file in files: 86 | if file.endswith(".py"): 87 | python_files.append(os.path.join(root, file)) 88 | total_files += 1 89 | 90 | log_success(f"Found {total_files} Python files") 91 | return python_files 92 | 93 | def generate_documentation(analysis_results: List[Dict], model: str = "qwen2.5:14b-instruct-q4_K_M") -> Dict: 94 | """Generates documentation using LLM""" 95 | documentation = { 96 | "project_overview": "", 97 | "file_summaries": {}, 98 | "module_interactions": "" 99 | } 100 | 101 | # Project overview analysis 102 | log_info("Generating project overview") 103 | overview_prompt = "Analyze this project structure and provide a comprehensive overview:\n\n" 104 | for result in analysis_results: 105 | overview_prompt += f"File: {result['file']}\n" 106 | overview_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']])}\n" 107 | overview_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']])}\n\n" 108 | 109 | overview_prompt += "Describe the project's purpose, main components, and how they interact." 110 | 111 | try: 112 | overview_completion = client.chat.completions.create( 113 | model=model, 114 | messages=[ 115 | {"role": "system", "content": "You are an expert in machine learning project analysis."}, 116 | {"role": "user", "content": overview_prompt} 117 | ] 118 | ) 119 | documentation["project_overview"] = overview_completion.choices[0].message.content 120 | log_success("Project overview generated") 121 | except Exception as e: 122 | log_error(f"Error generating project overview: {e}") 123 | 124 | # Individual file summaries 125 | log_info("Generating file summaries") 126 | for result in tqdm(analysis_results, desc="Processing files"): 127 | try: 128 | file_summary_prompt = f"Analyze the file {result['file']} and explain its purpose and key components:\n" 129 | file_summary_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']])}\n" 130 | file_summary_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']])}\n" 131 | 132 | file_summary_completion = client.chat.completions.create( 133 | model=model, 134 | messages=[ 135 | {"role": "system", "content": "You are an expert in code analysis."}, 136 | {"role": "user", "content": file_summary_prompt} 137 | ] 138 | ) 139 | 140 | documentation["file_summaries"][result['file']] = { 141 | "summary": file_summary_completion.choices[0].message.content, 142 | "details": result 143 | } 144 | except Exception as e: 145 | log_warning(f"Error generating summary for {result['file']}: {e}") 146 | 147 | # Module interactions 148 | log_info("Generating module interaction description") 149 | try: 150 | interaction_prompt = "Describe how the modules and components in this project interact with each other." 151 | interaction_completion = client.chat.completions.create( 152 | model=model, 153 | messages=[ 154 | {"role": "system", "content": "You are an expert in software architecture."}, 155 | {"role": "user", "content": interaction_prompt} 156 | ] 157 | ) 158 | documentation["module_interactions"] = interaction_completion.choices[0].message.content 159 | log_success("Module interaction description generated") 160 | except Exception as e: 161 | log_error(f"Error generating module interactions: {e}") 162 | 163 | return documentation 164 | 165 | def save_documentation(documentation: Dict, output_dir: str = "project_docs"): 166 | """Saves documentation in multiple formats""" 167 | log_info(f"Saving documentation to directory: {output_dir}") 168 | 169 | # Create output directory 170 | os.makedirs(output_dir, exist_ok=True) 171 | 172 | # Save JSON 173 | json_path = os.path.join(output_dir, "project_documentation.json") 174 | try: 175 | with open(json_path, "w", encoding="utf-8") as f: 176 | json.dump(documentation, f, indent=2) 177 | log_success(f"JSON documentation saved to: {json_path}") 178 | except Exception as e: 179 | log_error(f"Error saving JSON: {e}") 180 | 181 | # Generate Markdown 182 | markdown_content = f"""# Project Documentation 183 | 184 | ## Project Overview 185 | {documentation['project_overview']} 186 | 187 | ## Module Interactions 188 | {documentation['module_interactions']} 189 | 190 | ## File Summaries 191 | """ 192 | 193 | for file_path, file_info in documentation['file_summaries'].items(): 194 | markdown_content += f""" 195 | ### {file_path} 196 | {file_info['summary']} 197 | 198 | #### Detailed Components 199 | - **Classes**: {[cls['name'] for cls in file_info['details']['classes']]} 200 | - **Functions**: {[func['name'] for func in file_info['details']['functions']]} 201 | """ 202 | 203 | # Save Markdown 204 | md_path = os.path.join(output_dir, "project_documentation.md") 205 | try: 206 | with open(md_path, "w", encoding="utf-8") as f: 207 | f.write(markdown_content) 208 | log_success(f"Markdown documentation saved to: {md_path}") 209 | except Exception as e: 210 | log_error(f"Error saving Markdown: {e}") 211 | 212 | # Convert to HTML 213 | try: 214 | html_content = markdown.markdown(markdown_content) 215 | html_path = os.path.join(output_dir, "project_documentation.html") 216 | with open(html_path, "w", encoding="utf-8") as f: 217 | f.write(f""" 218 | 219 | 220 | 221 | Project Documentation 222 | 225 | 226 | 227 | {html_content} 228 | 229 | 230 | """) 231 | log_success(f"HTML documentation saved to: {html_path}") 232 | except Exception as e: 233 | log_error(f"Error saving HTML: {e}") 234 | 235 | if __name__ == "__main__": 236 | # Start of execution 237 | start_total_time = time.time() 238 | log_info("Starting project analysis") 239 | 240 | # Project directory 241 | project_dir = "/home/marcos/projetos_automatizacao/ENTENDER_textgrad/textgrad" 242 | 243 | # File collection 244 | python_files = collect_python_files(project_dir) 245 | 246 | # File analysis 247 | log_info("Starting detailed file analysis") 248 | results = [] 249 | for file in tqdm(python_files, desc="Analyzing files"): 250 | file_result = analyze_file(file) 251 | if file_result: 252 | results.append(file_result) 253 | 254 | # Documentation generation 255 | log_info("Generating documentation with AI assistant") 256 | documentation = generate_documentation(results) 257 | 258 | # Saving documentation 259 | save_documentation(documentation) 260 | 261 | # Total execution time 262 | end_total_time = time.time() 263 | log_success(f"Analysis completed in {end_total_time - start_total_time:.2f} seconds") 264 | print("\nDocumentation generated successfully in 'project_docs' directory!") -------------------------------------------------------------------------------- /extract_embedding.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ollama 3 | import chromadb 4 | 5 | class SemanticSearchChroma: 6 | def __init__(self, collection_name='document_embeddings'): 7 | """ 8 | Inicializa cliente ChromaDB e cria/carrega coleção 9 | """ 10 | self.client = chromadb.PersistentClient(path="./chroma_storage") 11 | self.collection = self.client.get_or_create_collection(name=collection_name) 12 | 13 | def add_documents(self, directory): 14 | """ 15 | Adiciona documentos do diretório à coleção ChromaDB 16 | """ 17 | for filename in os.listdir(directory): 18 | filepath = os.path.join(directory, filename) 19 | 20 | if os.path.isfile(filepath): 21 | with open(filepath, 'r', encoding='utf-8') as file: 22 | content = file.read() 23 | 24 | # Gera embedding 25 | embedding = ollama.embeddings( 26 | model='mxbai-embed-large', 27 | prompt=content 28 | )['embedding'] 29 | 30 | # Adiciona ao ChromaDB 31 | self.collection.add( 32 | embeddings=[embedding], 33 | documents=[content], 34 | ids=[filename] 35 | ) 36 | 37 | def search(self, query, n_results=3): 38 | """ 39 | Busca semântica com ChromaDB 40 | """ 41 | query_embedding = ollama.embeddings( 42 | model='mxbai-embed-large', 43 | prompt=query 44 | )['embedding'] 45 | 46 | results = self.collection.query( 47 | query_embeddings=[query_embedding], 48 | n_results=n_results 49 | ) 50 | 51 | return results 52 | 53 | def main(): 54 | base_dir = '/home/marcos/projetos_automatizacao/meu_primeiro_agent/_relatorios/' 55 | 56 | # Inicializa e popula 57 | searcher = SemanticSearchChroma() 58 | searcher.add_documents(base_dir) 59 | 60 | # Exemplo de busca 61 | query = input("Digite o assunto para busca: ") 62 | results = searcher.search(query) 63 | 64 | # Exibe resultados 65 | for i, doc in enumerate(results['documents'][0], 1): 66 | print(f"Resultado {i}:\n{doc[:500]}...\n") 67 | 68 | if __name__ == '__main__': 69 | main() -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from PyQt5.QtWidgets import ( 4 | QApplication, QMainWindow, QWidget, QVBoxLayout, QHBoxLayout, 5 | QLabel, QLineEdit, QPushButton, QFileDialog, QCheckBox, 6 | QTextEdit, QSplitter, QTreeView, QMenu, QAction 7 | ) 8 | from PyQt5.QtCore import Qt, QDir, pyqtSignal 9 | from PyQt5.QtGui import QStandardItemModel, QStandardItem 10 | from ollama import chat 11 | from ollama import ChatResponse 12 | from extract_embedding import SemanticSearchChroma 13 | from main_functions import ( 14 | collect_python_files, 15 | generate_documentation, 16 | analyze_file, 17 | save_documentation, 18 | log_info, 19 | log_error 20 | ) 21 | 22 | searcher = SemanticSearchChroma() 23 | 24 | class DocumentationApp(QMainWindow): 25 | def __init__(self): 26 | super().__init__() 27 | self.setWindowTitle("Automated Project Documentation") 28 | self.setGeometry(100, 100, 1200, 800) 29 | self.localizacao_da_pasta = None 30 | 31 | # Main central widget 32 | main_widget = QWidget() 33 | self.setCentralWidget(main_widget) 34 | main_layout = QHBoxLayout() 35 | main_widget.setLayout(main_layout) 36 | 37 | # Left Sidebar 38 | sidebar = QWidget() 39 | sidebar_layout = QVBoxLayout() 40 | sidebar.setLayout(sidebar_layout) 41 | sidebar.setMaximumWidth(300) 42 | 43 | # Project Directory Selection 44 | dir_label = QLabel("Select Project Directory:") 45 | self.dir_input = QLineEdit() 46 | browse_button = QPushButton("Browse") 47 | browse_button.clicked.connect(self.select_directory) 48 | 49 | self.partial_report_checkbox = QCheckBox("Partial Report") 50 | 51 | # Partial Report Criteria 52 | self.partial_criteria_input = QLineEdit() 53 | self.partial_criteria_input.setPlaceholderText("Enter partial report criteria") 54 | self.partial_criteria_input.setEnabled(False) 55 | 56 | # Toggle partial report input 57 | self.partial_report_checkbox.toggled.connect( 58 | self.partial_criteria_input.setEnabled 59 | ) 60 | 61 | # Generate Report Button 62 | generate_button = QPushButton("Generate Documentation") 63 | generate_button.clicked.connect(self.generate_documentation) 64 | 65 | # Add widgets to sidebar 66 | sidebar_layout.addWidget(dir_label) 67 | sidebar_layout.addWidget(self.dir_input) 68 | sidebar_layout.addWidget(browse_button) 69 | sidebar_layout.addWidget(self.partial_report_checkbox) 70 | sidebar_layout.addWidget(self.partial_criteria_input) 71 | sidebar_layout.addWidget(generate_button) 72 | 73 | # Generate Individual Reports Button 74 | generate_individual_button = QPushButton("Generate Individual Reports") 75 | generate_individual_button.clicked.connect(self.generate_individual_reports) 76 | sidebar_layout.addWidget(generate_individual_button) 77 | 78 | # Generate Embeddings Button 79 | self.generate_embedding_button = QPushButton("Gerar embeddings") 80 | self.generate_embedding_button.setEnabled(False) 81 | self.generate_embedding_button.clicked.connect(self.generate_embedding) 82 | sidebar_layout.addWidget(self.generate_embedding_button) 83 | sidebar_layout.addStretch(1) 84 | 85 | # Main Content Area 86 | content_area = QWidget() 87 | content_layout = QVBoxLayout() 88 | content_area.setLayout(content_layout) 89 | 90 | # Results Display 91 | self.results_text = QTextEdit() 92 | self.results_text.setReadOnly(True) 93 | 94 | # File Tree View 95 | self.file_tree = QTreeView() 96 | self.file_tree.setContextMenuPolicy(Qt.CustomContextMenu) 97 | self.file_tree.customContextMenuRequested.connect(self.show_context_menu) 98 | 99 | # Add widgets to content area 100 | content_layout.addWidget(QLabel("Documentation Results:")) 101 | content_layout.addWidget(self.results_text) 102 | content_layout.addWidget(self.file_tree) 103 | 104 | # Add components to main layout 105 | main_layout.addWidget(sidebar) 106 | main_layout.addWidget(content_area) 107 | 108 | def enable_new_button(self): 109 | self.generate_embedding_button.setEnabled(True) 110 | 111 | def generate_embedding(self): 112 | if self.localizacao_da_pasta is not None: 113 | searcher.add_documents(self.localizacao_da_pasta) 114 | else: 115 | print("Localização da pasta não definida") 116 | 117 | def select_directory(self): 118 | """Open directory selection dialog""" 119 | dir_path = QFileDialog.getExistingDirectory( 120 | self, 121 | "Select Project Directory", 122 | os.path.expanduser('~') 123 | ) 124 | 125 | if dir_path: 126 | self.dir_input.setText(dir_path) 127 | self.populate_file_tree(dir_path) 128 | 129 | 130 | def populate_file_tree(self, directory): 131 | """Populate file tree with project structure""" 132 | model = QStandardItemModel() 133 | root_item = model.invisibleRootItem() 134 | 135 | try: 136 | log_info(f"Populating file tree for directory: {directory}") 137 | for root, dirs, files in os.walk(directory): 138 | # Criar um item para o diretório atual 139 | dir_item = QStandardItem(os.path.basename(root)) 140 | dir_item.setData(root, Qt.UserRole + 1) # Armazena o caminho completo do diretório 141 | dir_item.setSelectable(False) # Impede seleção de diretórios 142 | 143 | for file in files: 144 | # Adiciona apenas arquivos Python 145 | if file.endswith('.py'): 146 | file_item = QStandardItem(file) 147 | # Armazena o caminho completo do arquivo 148 | full_file_path = os.path.join(root, file) 149 | file_item.setData(full_file_path, Qt.UserRole + 1) 150 | dir_item.appendRow(file_item) 151 | 152 | # Adiciona apenas diretórios que contêm arquivos Python 153 | if dir_item.rowCount() > 0: 154 | root_item.appendRow(dir_item) 155 | 156 | self.file_tree.setModel(model) 157 | self.file_tree.doubleClicked.connect(self.generate_individual_report) 158 | except Exception as e: 159 | log_error(f"Error populating file tree: {e}") 160 | 161 | def generate_individual_report(self, index): 162 | """Generate individual report for a double-clicked file""" 163 | # Recupera o item selecionado 164 | selected_item = index.model().itemFromIndex(index) 165 | 166 | # Recupera o caminho completo do arquivo 167 | file_path = selected_item.data(Qt.UserRole + 1) 168 | 169 | # Verifica se é um arquivo (não um diretório) 170 | if os.path.isfile(file_path): 171 | print(f"Arquivo selecionado: {file_path}") 172 | 173 | try: 174 | file_result = analyze_file(file_path) 175 | if file_result: 176 | report = self.generate_file_report(file_path, file_result) 177 | self.results_text.setText(report) 178 | except Exception as e: 179 | log_error(f"Erro ao gerar relatório para {file_path}: {e}") 180 | self.results_text.setText(f"Erro ao analisar arquivo: {e}") 181 | 182 | def generate_file_report(self, file, file_result): 183 | # Create a prompt for the LLM 184 | prompt = f""" 185 | Extract the most relevant information from the file {file} and generate a concise and informative text describing its content. 186 | The generated text should be optimized for semantic search using embeddings. 187 | 188 | Desired output example: 189 | 190 | Main topics: climate change, agriculture, environmental impact, food security, data analysis, statistical modeling. 191 | Content: This scientific study investigates the effects of climate change on global agricultural production. 192 | By analyzing historical data and future projections, the document demonstrates how extreme climate events, 193 | such as droughts and floods, affect agricultural productivity and food availability. 194 | The authors propose adaptation and mitigation measures to ensure food security in a global warming scenario. 195 | """ 196 | 197 | # Call the LLM 198 | response: ChatResponse = chat(model='qwen2.5:14b-instruct-q4_K_M', messages=[ 199 | { 200 | 'role':'system', 201 | 'content': 'You are an expert in code analysis.' 202 | }, 203 | { 204 | 'role': 'user', 205 | 'content': prompt 206 | } 207 | ]) 208 | 209 | # Return the report generated by the LLM 210 | return response.message.content 211 | 212 | def generate_documentation(self): 213 | project_dir = self.dir_input.text() 214 | if not project_dir: 215 | self.results_text.setText("Please select a project directory") 216 | return 217 | 218 | try: 219 | documentation = generate_documentation(project_dir) 220 | self.display_documentation_results(documentation) 221 | except Exception as e: 222 | log_error(f"Error generating documentation: {e}") 223 | self.results_text.setText(f"An error occurred: {str(e)}") 224 | 225 | def generate_individual_reports(self): 226 | project_dir = self.dir_input.text() 227 | if not project_dir: 228 | self.results_text.setText("Please select a project directory") 229 | return 230 | 231 | try: 232 | # Collect Python files 233 | python_files = collect_python_files(project_dir) 234 | 235 | # Create a separate folder to store individual reports 236 | reports_dir = os.path.join(project_dir, "_relatorios") 237 | self.localizacao_da_pasta = reports_dir 238 | os.makedirs(reports_dir, exist_ok=True) 239 | 240 | # Generate individual reports for each file 241 | for file in python_files: 242 | file_result = analyze_file(file) 243 | if file_result: 244 | # Generate a report for the file 245 | report = self.generate_file_report(file, file_result) 246 | 247 | # Save the report in a separate folder 248 | report_file = os.path.join(reports_dir, f"{os.path.basename(file)}.txt") 249 | with open(report_file, "w") as f: 250 | f.write(report) 251 | 252 | self.results_text.setText("Individual reports generated successfully!") 253 | self.enable_new_button() 254 | except Exception as e: 255 | log_error(f"Error generating individual reports: {e}") 256 | self.results_text.setText(f"An error occurred: {str(e)}") 257 | 258 | def show_context_menu(self, pos): 259 | """Show context menu for selected file in the tree view""" 260 | selected_indexes = self.file_tree.selectedIndexes() 261 | if selected_indexes: 262 | selected_file = selected_indexes[0].data() 263 | context_menu = QMenu() 264 | generate_report_action = QAction(f"Generate Report for {selected_file}", self) 265 | generate_report_action.triggered.connect(lambda: self.generate_individual_report(None, None)) 266 | context_menu.addAction(generate_report_action) 267 | context_menu.exec_(self.file_tree.mapToGlobal(pos)) 268 | 269 | def display_documentation_results(self, documentation): 270 | """Display documentation results in the text area""" 271 | results_text = f""" 272 | Project Overview: 273 | {documentation['project_overview']} 274 | Module Interactions: 275 | {documentation['module_interactions']} 276 | File Summaries: 277 | """ 278 | for file_path, file_info in documentation['file_summaries'].items(): 279 | results_text += f"\n{file_path}:\n{file_info['summary']}\n" 280 | self.results_text.setText(results_text) 281 | 282 | def main(): 283 | app = QApplication(sys.argv) 284 | main_window = DocumentationApp() 285 | main_window.show() 286 | sys.exit(app.exec_()) 287 | 288 | if __name__ == "__main__": 289 | main() -------------------------------------------------------------------------------- /main_functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import ast 3 | import json 4 | import time 5 | import numpy as np 6 | from typing import List, Dict 7 | import ollama 8 | from ollama import chat, ChatResponse 9 | import colorama 10 | from tqdm import tqdm 11 | import markdown 12 | 13 | # Configurações existentes mantidas 14 | colorama.init(autoreset=True) 15 | 16 | # Adicionando modelo de embeddings 17 | 18 | def log_info(message): 19 | """Prints informative messages in blue""" 20 | print(f"{colorama.Fore.CYAN}[INFO] {message}{colorama.Fore.RESET}") 21 | 22 | def log_warning(message): 23 | """Prints warning messages in yellow""" 24 | print(f"{colorama.Fore.YELLOW}[WARN] {message}{colorama.Fore.RESET}") 25 | 26 | def log_error(message): 27 | """Prints error messages in red""" 28 | print(f"{colorama.Fore.RED}[ERROR] {message}{colorama.Fore.RESET}") 29 | 30 | def log_success(message): 31 | """Prints success messages in green""" 32 | print(f"{colorama.Fore.GREEN}[SUCCESS] {message}{colorama.Fore.RESET}") 33 | 34 | def analyze_file(file_path: str) -> Dict: 35 | """Analyzes an individual Python file with enhanced metadata""" 36 | try: 37 | log_info(f"Analyzing file: {file_path}") 38 | start_time = time.time() 39 | 40 | with open(file_path, "r", encoding="utf-8") as f: 41 | code = f.read() 42 | tree = ast.parse(code) 43 | file_info = { 44 | "file": file_path, 45 | "filename": os.path.basename(file_path), 46 | "classes": [], 47 | "functions": [], 48 | "imports": [], 49 | "docstrings": [], 50 | "complexity": 0, 51 | "summary": "" # Placeholder for AI-generated summary 52 | } 53 | 54 | # Existing AST analysis logic 55 | for node in ast.iter_child_nodes(tree): 56 | if isinstance(node, ast.ClassDef): 57 | file_info["classes"].append({ 58 | "name": node.name, 59 | "methods": [method.name for method in node.body if isinstance(method, ast.FunctionDef)] 60 | }) 61 | elif isinstance(node, ast.FunctionDef): 62 | file_info["functions"].append({ 63 | "name": node.name, 64 | "args": [arg.arg for arg in node.args.args], 65 | "complexity": len(list(ast.walk(node))) 66 | }) 67 | elif isinstance(node, ast.Import) or isinstance(node, ast.ImportFrom): 68 | file_info["imports"].append(ast.unparse(node)) 69 | elif isinstance(node, ast.Expr) and isinstance(node.value, ast.Str): 70 | file_info["docstrings"].append(node.value.s) 71 | 72 | # AI-generated summary (can be enhanced later) 73 | file_info["summary"] = generate_file_summary(file_info) 74 | 75 | end_time = time.time() 76 | log_success(f"File analysis completed in {end_time - start_time:.2f} seconds") 77 | return file_info 78 | except Exception as e: 79 | log_error(f"Error analyzing {file_path}: {e}") 80 | return {} 81 | 82 | def generate_file_summary(file_info: Dict) -> str: 83 | """Generate a basic summary for a file""" 84 | summary = f"Arquivo: {file_info['filename']}\n" 85 | summary += f"Classes: {', '.join([cls['name'] for cls in file_info['classes']] or ['Nenhuma'])}\n" 86 | summary += f"Funções: {', '.join([func['name'] for func in file_info['functions']] or ['Nenhuma'])}\n" 87 | return summary 88 | 89 | def collect_python_files(directory: str) -> List[str]: 90 | """Collects all Python files in a directory""" 91 | log_info(f"Collecting Python files in: {directory}") 92 | python_files = [] 93 | total_files = 0 94 | for root, _, files in os.walk(directory): 95 | for file in files: 96 | if file.endswith(".py"): 97 | python_files.append(os.path.join(root, file)) 98 | total_files += 1 99 | 100 | log_success(f"Found {total_files} Python files") 101 | return python_files 102 | 103 | def generate_embeddings(descriptions: Dict[str, str]) -> Dict[str, np.ndarray]: 104 | """Generate embeddings for file descriptions""" 105 | log_info("Generating semantic embeddings") 106 | embeddings = {} 107 | 108 | for file_path, description in descriptions.items(): 109 | embedding = ollama.embeddings( 110 | model='mxbai-embed-large', 111 | prompt=description 112 | ) 113 | embeddings[file_path] = embedding 114 | 115 | # Opcional: salvar embeddings 116 | embedding_dir = os.path.join(os.path.dirname(file_path), '.project_docs') 117 | os.makedirs(embedding_dir, exist_ok=True) 118 | np.save(os.path.join(embedding_dir, f"{os.path.basename(file_path)}.embedding.npy"), embedding) 119 | 120 | log_success(f"Generated {len(embeddings)} embeddings") 121 | return embeddings 122 | 123 | def search_project_files(project_dir: str, query: str, top_k: int = 5) -> List[tuple]: 124 | """Semantic search across project files""" 125 | log_info(f"Performing semantic search for query: {query}") 126 | query_embedding = ollama.embeddings( 127 | model='mxbai-embed-large', 128 | prompt=query 129 | ) 130 | similarities = {} 131 | 132 | # Buscar arquivos e embeddings 133 | for root, _, files in os.walk(project_dir): 134 | for file in files: 135 | if file.endswith('.py'): 136 | embedding_path = os.path.join(root, '.project_docs', f"{file}.embedding.npy") 137 | if os.path.exists(embedding_path): 138 | file_embedding = np.load(embedding_path) 139 | # Calcula a similaridade entre os embeddings 140 | # A forma como você calcula a similaridade pode variar dependendo do tipo de embedding 141 | similarity = np.dot(query_embedding, file_embedding) / ( 142 | np.linalg.norm(query_embedding) * np.linalg.norm(file_embedding) 143 | ) 144 | similarities[os.path.join(root, file)] = similarity 145 | 146 | # Ordenar e retornar top k resultados 147 | sorted_results = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k] 148 | log_success(f"Found {len(sorted_results)} relevant files") 149 | return sorted_results 150 | 151 | def generate_documentation(analysis_results: List[Dict], model: str = "qwen2.5:14b-instruct-q4_K_M") -> Dict: 152 | """Generates documentation using LLM""" 153 | documentation = { 154 | "project_overview": "", 155 | "file_summaries": {}, 156 | "module_interactions": "" 157 | } 158 | 159 | # Project overview analysis 160 | log_info("Generating project overview") 161 | overview_prompt = "Analyze this project structure and provide a comprehensive overview:\n\n" 162 | for result in analysis_results: 163 | overview_prompt += f"File: {result['file']}\n" 164 | overview_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']] or ['Nenhuma'])}\n" 165 | overview_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']] or ['Nenhuma'])}\n\n" 166 | 167 | overview_prompt += "Describe the project's purpose, main components, and how they interact." 168 | 169 | try: 170 | response: ChatResponse = chat(model=model, messages=[ 171 | {'role':'system', 'content': 'You are an expert in machine learning project analysis.'}, 172 | {'role': 'user', 'content': overview_prompt} 173 | ]) 174 | documentation["project_overview"] = response.message.content 175 | log_success("Project overview generated") 176 | except Exception as e: 177 | log_error(f"Error generating project overview: {e}") 178 | 179 | # Individual file summaries 180 | log_info("Generating file summaries") 181 | for result in tqdm(analysis_results, desc="Processing files"): 182 | try: 183 | file_summary_prompt = f"Analyze the file {result['file']} and explain its purpose and key components:\n" 184 | file_summary_prompt += f"Classes: {', '.join([cls['name'] for cls in result['classes']] or ['Nenhuma'])}\n" 185 | file_summary_prompt += f"Functions: {', '.join([func['name'] for func in result['functions']] or ['Nenhuma'])}\n" 186 | 187 | response: ChatResponse = chat(model=model, messages=[ 188 | {'role':'system', 'content': 'You are an expert in code analysis.'}, 189 | {'role': 'user', 'content': file_summary_prompt} 190 | ]) 191 | 192 | documentation["file_summaries"][result['file']] = { 193 | "summary": response.message.content, 194 | "details": result 195 | } 196 | except Exception as e: 197 | log_warning(f"Error generating summary for {result['file']}: {e}") 198 | 199 | # Module interactions 200 | log_info("Generating module interaction description") 201 | try: 202 | interaction_prompt = "Describe how the modules and components in this project interact with each other." 203 | response: ChatResponse = chat(model=model, messages=[ 204 | {'role':'system', 'content': 'You are an expert in software architecture.'}, 205 | {'role': 'user', 'content': interaction_prompt} 206 | ]) 207 | documentation["module_interactions"] = response.message.content 208 | log_success("Module interaction description generated") 209 | except Exception as e: 210 | log_error(f"Error generating module interactions: {e}") 211 | 212 | return documentation 213 | 214 | def save_documentation(documentation: Dict, output_dir: str = "project_docs"): 215 | """Saves documentation in multiple formats""" 216 | log_info(f"Saving documentation to directory: {output_dir}") 217 | 218 | # Create output directory 219 | os.makedirs(output_dir, exist_ok=True) 220 | 221 | # Save JSON 222 | json_path = os.path.join(output_dir, "project_documentation.json") 223 | try: 224 | with open(json_path, "w", encoding="utf-8") as f: 225 | json.dump(documentation, f, indent=2) 226 | log_success(f"JSON documentation saved to: {json_path}") 227 | except Exception as e: 228 | log_error(f"Error saving JSON: {e}") 229 | 230 | # Generate Markdown 231 | markdown_content = f"""# Project Documentation 232 | 233 | ## Project Overview 234 | {documentation['project_overview']} 235 | 236 | ## Module Interactions 237 | {documentation['module_interactions']} 238 | 239 | ## File Summaries 240 | """ 241 | 242 | for file_path, file_info in documentation['file_summaries'].items(): 243 | markdown_content += f""" 244 | ### {file_path} 245 | {file_info['summary']} 246 | 247 | #### Detailed Components 248 | - **Classes**: {', '.join([cls['name'] for cls in file_info['details']['classes']])} 249 | - **Functions**: {', '.join([func['name'] for func in file_info['details']['functions']])} 250 | """ 251 | 252 | # Save Markdown 253 | md_path = os.path.join(output_dir, "project_documentation.md") 254 | try: 255 | with open(md_path, "w", encoding="utf-8") as f: 256 | f.write(markdown_content) 257 | log_success(f"Markdown documentation saved to: {md_path}") 258 | except Exception as e: 259 | log_error(f"Error saving Markdown: {e}") 260 | 261 | # Convert to HTML 262 | try: 263 | html_content = markdown.markdown(markdown_content) 264 | html_path = os.path.join(output_dir, "project_documentation.html") 265 | with open(html_path, "w", encoding="utf-8") as f: 266 | f.write(f""" 267 | 268 | 269 | 270 | Project Documentation 271 | 274 | 275 | 276 | {html_content} 277 | 278 | 279 | """) 280 | log_success(f"HTML documentation saved to: {html_path}") 281 | except Exception as e: 282 | log_error(f"Error saving HTML: {e}") 283 | 284 | if __name__ == "__main__": 285 | # Start of execution 286 | start_total_time = time.time() 287 | log_info("Starting project analysis") 288 | 289 | # Project directory 290 | project_dir = "/home/marcos/projetos_automatizacao/ENTENDER_textgrad/textgrad" 291 | 292 | # File collection 293 | python_files = collect_python_files(project_dir) 294 | 295 | # File analysis 296 | log_info("Starting detailed file analysis") 297 | results = [] 298 | for file in tqdm(python_files, desc="Analyzing files"): 299 | print(file) 300 | file_result = analyze_file(file) 301 | if file_result: 302 | results.append(file_result) 303 | 304 | # Documentation generation 305 | log_info("Generating documentation with AI assistant") 306 | documentation = generate_documentation(results) 307 | 308 | # Saving documentation 309 | save_documentation(documentation) 310 | 311 | # Total execution time 312 | end_total_time = time.time() 313 | log_success(f"Analysis completed in {end_total_time - start_total_time:.2f} seconds") 314 | print("\nDocumentation generated successfully in 'project_docs' directory!") -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | annotated-types==0.7.0 2 | anyio==4.7.0 3 | certifi==2024.8.30 4 | colorama==0.4.6 5 | distro==1.9.0 6 | exceptiongroup==1.2.2 7 | h11==0.14.0 8 | httpcore==1.0.7 9 | httpx==0.28.1 10 | idna==3.10 11 | jiter==0.8.2 12 | Markdown==3.7 13 | openai==1.57.4 14 | pydantic==2.10.3 15 | pydantic_core==2.27.1 16 | sniffio==1.3.1 17 | tqdm==4.67.1 18 | typing_extensions==4.12.2 19 | --------------------------------------------------------------------------------